From 793b883ed12a6ae6e2901ddb5e038b77d6f0c0ac Mon Sep 17 00:00:00 2001 From: Stephen Hemminger Date: Wed, 14 Sep 2005 16:06:14 -0700 Subject: [PATCH] sky2: driver update. Here is revised patch against netdev sky2 branch. It includes whitespace fixes, all the changes from the previous review as well as some optimizations and timing fixes to solve some of the hangs. The stall problem is better but not perfect. It appears that under stress the chip can't keep up with the bus and sends a pause frame, then hangs. This version is for testing, and hopefully other eyes might see the root cause of the problem. I don't want to reinvent the ugly watchdog code in the syskonnect version of sk98lin. If you read it you will see, the original driver writer and the hardware developer obviously didn't understand each other. Dual port support is included, but not tested yet. It did require small change to NAPI since both ports share same IRQ. Signed-off-by: Jeff Garzik --- include/linux/netdevice.h | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h index 7c717907896d..5e90557715ab 100644 --- a/include/linux/netdevice.h +++ b/include/linux/netdevice.h @@ -780,11 +780,15 @@ static inline u32 netif_msg_init(int debug_value, int default_msg_enable_bits) } /* Schedule rx intr now? */ +static inline int netif_rx_schedule_test(struct net_device *dev) +{ + return !test_and_set_bit(__LINK_STATE_RX_SCHED, &dev->state); +} +/* Schedule only if device is up */ static inline int netif_rx_schedule_prep(struct net_device *dev) { - return netif_running(dev) && - !test_and_set_bit(__LINK_STATE_RX_SCHED, &dev->state); + return netif_running(dev) && netif_rx_schedule_test(dev); } /* Add interface to tail of rx poll list. This assumes that _prep has -- cgit v1.2.3-71-gd317 From c2373ee98982a1c842dfb213c398f388d4227e63 Mon Sep 17 00:00:00 2001 From: Mitch Williams Date: Wed, 9 Nov 2005 10:34:45 -0800 Subject: [PATCH] net: make dev_valid_name public dev_valid_name() is a useful function. Make it public. Signed-off-by: Mitch Williams Acked-by: Jay Vosburgh Signed-off-by: John W. Linville --- include/linux/netdevice.h | 1 + net/core/dev.c | 3 ++- 2 files changed, 3 insertions(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h index 936f8b76114e..4fceff0e59ec 100644 --- a/include/linux/netdevice.h +++ b/include/linux/netdevice.h @@ -684,6 +684,7 @@ extern int netif_rx(struct sk_buff *skb); extern int netif_rx_ni(struct sk_buff *skb); #define HAVE_NETIF_RECEIVE_SKB 1 extern int netif_receive_skb(struct sk_buff *skb); +extern int dev_valid_name(const char *name); extern int dev_ioctl(unsigned int cmd, void __user *); extern int dev_ethtool(struct ifreq *); extern unsigned dev_get_flags(const struct net_device *); diff --git a/net/core/dev.c b/net/core/dev.c index 0b48e294aafe..94e642ee6e2b 100644 --- a/net/core/dev.c +++ b/net/core/dev.c @@ -626,7 +626,7 @@ struct net_device * dev_get_by_flags(unsigned short if_flags, unsigned short mas * Network device names need to be valid file names to * to allow sysfs to work */ -static int dev_valid_name(const char *name) +int dev_valid_name(const char *name) { return !(*name == '\0' || !strcmp(name, ".") @@ -3269,6 +3269,7 @@ EXPORT_SYMBOL(__dev_get_by_index); EXPORT_SYMBOL(__dev_get_by_name); EXPORT_SYMBOL(__dev_remove_pack); EXPORT_SYMBOL(__skb_linearize); +EXPORT_SYMBOL(dev_valid_name); EXPORT_SYMBOL(dev_add_pack); EXPORT_SYMBOL(dev_alloc_name); EXPORT_SYMBOL(dev_close); -- cgit v1.2.3-71-gd317 From e74ac79956ecb56e71a398c57eb10fab8c58a562 Mon Sep 17 00:00:00 2001 From: Jeff Garzik Date: Mon, 14 Nov 2005 18:16:37 -0500 Subject: [libata] remove two unused fields from struct ata_port --- include/linux/libata.h | 2 -- 1 file changed, 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/libata.h b/include/linux/libata.h index f2dbb684ce9e..83a83babff84 100644 --- a/include/linux/libata.h +++ b/include/linux/libata.h @@ -330,8 +330,6 @@ struct ata_port { u8 ctl; /* cache of ATA control register */ u8 last_ctl; /* Cache last written value */ - unsigned int bus_state; - unsigned int port_state; unsigned int pio_mask; unsigned int mwdma_mask; unsigned int udma_mask; -- cgit v1.2.3-71-gd317 From 0a1225769763779288d759e904c4f5a660844ce4 Mon Sep 17 00:00:00 2001 From: "shemminger@osdl.org" Date: Wed, 30 Nov 2005 11:45:17 -0800 Subject: [PATCH] sky2: change netif_rx_schedule_test to __netif_schedule_prep I didn't like the name netif_rx_schedule_test(), in earlier patches and changed to __netif_rx_schedule_prep to be more consistent. Signed-off-by: Stephen Hemminger Signed-off-by: Jeff Garzik --- drivers/net/sky2.c | 5 +++-- include/linux/netdevice.h | 8 ++++---- 2 files changed, 7 insertions(+), 6 deletions(-) (limited to 'include/linux') diff --git a/drivers/net/sky2.c b/drivers/net/sky2.c index 2253140ff4dc..f56de9894208 100644 --- a/drivers/net/sky2.c +++ b/drivers/net/sky2.c @@ -1961,10 +1961,11 @@ static irqreturn_t sky2_intr(int irq, void *dev_id, struct pt_regs *regs) if (status & Y2_IS_STAT_BMU) { hw->intr_mask &= ~Y2_IS_STAT_BMU; sky2_write32(hw, B0_IMSK, hw->intr_mask); - prefetch(&hw->st_le[hw->st_idx]); - if (netif_rx_schedule_test(dev0)) + if (likely(__netif_rx_schedule_prep(dev0))) { + prefetch(&hw->st_le[hw->st_idx]); __netif_rx_schedule(dev0); + } } if (status & Y2_IS_IRQ_PHY1) diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h index 07e114d48bbb..7fda03d338d1 100644 --- a/include/linux/netdevice.h +++ b/include/linux/netdevice.h @@ -802,16 +802,16 @@ static inline u32 netif_msg_init(int debug_value, int default_msg_enable_bits) return (1 << debug_value) - 1; } -/* Schedule rx intr now? */ -static inline int netif_rx_schedule_test(struct net_device *dev) +/* Test if receive needs to be scheduled */ +static inline int __netif_rx_schedule_prep(struct net_device *dev) { return !test_and_set_bit(__LINK_STATE_RX_SCHED, &dev->state); } -/* Schedule only if device is up */ +/* Test if receive needs to be scheduled but only if up */ static inline int netif_rx_schedule_prep(struct net_device *dev) { - return netif_running(dev) && netif_rx_schedule_test(dev); + return netif_running(dev) && __netif_rx_schedule_prep(dev); } /* Add interface to tail of rx poll list. This assumes that _prep has -- cgit v1.2.3-71-gd317 From a22e2eb0710798009b8e696ae911aef745089dd6 Mon Sep 17 00:00:00 2001 From: Albert Lee Date: Mon, 5 Dec 2005 15:38:02 +0800 Subject: [PATCH] libata: move err_mask to ata_queued_cmd - remove err_mask from the parameter list of the complete functions - move err_mask to ata_queued_cmd - initialize qc->err_mask when needed - for each function call to ata_qc_complete(), replace the err_mask parameter with qc->err_mask. Signed-off-by: Albert Lee =============== Signed-off-by: Jeff Garzik --- drivers/scsi/ahci.c | 12 ++++++++---- drivers/scsi/libata-core.c | 32 +++++++++++++++++++------------- drivers/scsi/libata-scsi.c | 18 ++++++++++-------- drivers/scsi/libata.h | 2 +- drivers/scsi/pdc_adma.c | 11 +++++------ drivers/scsi/sata_mv.c | 9 ++++++--- drivers/scsi/sata_promise.c | 14 ++++++++------ drivers/scsi/sata_qstor.c | 7 ++++--- drivers/scsi/sata_sil24.c | 15 ++++++++++----- drivers/scsi/sata_sx4.c | 15 ++++++++++----- include/linux/libata.h | 7 +++++-- 11 files changed, 86 insertions(+), 56 deletions(-) (limited to 'include/linux') diff --git a/drivers/scsi/ahci.c b/drivers/scsi/ahci.c index cfbdd3f071b6..887eaa2a3ebf 100644 --- a/drivers/scsi/ahci.c +++ b/drivers/scsi/ahci.c @@ -643,7 +643,8 @@ static void ahci_eng_timeout(struct ata_port *ap) * not being called from the SCSI EH. */ qc->scsidone = scsi_finish_command; - ata_qc_complete(qc, AC_ERR_OTHER); + qc->err_mask |= AC_ERR_OTHER; + ata_qc_complete(qc); } spin_unlock_irqrestore(&host_set->lock, flags); @@ -664,7 +665,8 @@ static inline int ahci_host_intr(struct ata_port *ap, struct ata_queued_cmd *qc) ci = readl(port_mmio + PORT_CMD_ISSUE); if (likely((ci & 0x1) == 0)) { if (qc) { - ata_qc_complete(qc, 0); + assert(qc->err_mask == 0); + ata_qc_complete(qc); qc = NULL; } } @@ -681,8 +683,10 @@ static inline int ahci_host_intr(struct ata_port *ap, struct ata_queued_cmd *qc) /* command processing has stopped due to error; restart */ ahci_restart_port(ap, status); - if (qc) - ata_qc_complete(qc, err_mask); + if (qc) { + qc->err_mask |= AC_ERR_OTHER; + ata_qc_complete(qc); + } } return 1; diff --git a/drivers/scsi/libata-core.c b/drivers/scsi/libata-core.c index 0a959566f964..f56b4daf4189 100644 --- a/drivers/scsi/libata-core.c +++ b/drivers/scsi/libata-core.c @@ -1053,9 +1053,9 @@ static int ata_qc_wait_err(struct ata_queued_cmd *qc, if (wait_for_completion_timeout(wait, 30 * HZ) < 1) { /* timeout handling */ - unsigned int err_mask = ac_err_mask(ata_chk_status(qc->ap)); + qc->err_mask |= ac_err_mask(ata_chk_status(qc->ap)); - if (!err_mask) { + if (!qc->err_mask) { printk(KERN_WARNING "ata%u: slow completion (cmd %x)\n", qc->ap->id, qc->tf.command); } else { @@ -1064,7 +1064,7 @@ static int ata_qc_wait_err(struct ata_queued_cmd *qc, rc = -EIO; } - ata_qc_complete(qc, err_mask); + ata_qc_complete(qc); } return rc; @@ -1175,6 +1175,7 @@ retry: qc->cursg_ofs = 0; qc->cursect = 0; qc->nsect = 1; + qc->err_mask = 0; goto retry; } } @@ -2777,7 +2778,7 @@ skip_map: * None. (grabs host lock) */ -void ata_poll_qc_complete(struct ata_queued_cmd *qc, unsigned int err_mask) +void ata_poll_qc_complete(struct ata_queued_cmd *qc) { struct ata_port *ap = qc->ap; unsigned long flags; @@ -2785,7 +2786,7 @@ void ata_poll_qc_complete(struct ata_queued_cmd *qc, unsigned int err_mask) spin_lock_irqsave(&ap->host_set->lock, flags); ap->flags &= ~ATA_FLAG_NOINTR; ata_irq_on(ap); - ata_qc_complete(qc, err_mask); + ata_qc_complete(qc); spin_unlock_irqrestore(&ap->host_set->lock, flags); } @@ -2885,7 +2886,8 @@ static int ata_pio_complete (struct ata_port *ap) ap->hsm_task_state = HSM_ST_IDLE; - ata_poll_qc_complete(qc, 0); + assert(qc->err_mask == 0); + ata_poll_qc_complete(qc); /* another command may start at this point */ @@ -3261,7 +3263,8 @@ static void ata_pio_error(struct ata_port *ap) ap->hsm_task_state = HSM_ST_IDLE; - ata_poll_qc_complete(qc, AC_ERR_ATA_BUS); + qc->err_mask |= AC_ERR_ATA_BUS; + ata_poll_qc_complete(qc); } static void ata_pio_task(void *_data) @@ -3363,7 +3366,8 @@ static void ata_qc_timeout(struct ata_queued_cmd *qc) ap->id, qc->tf.command, drv_stat, host_stat); /* complete taskfile transaction */ - ata_qc_complete(qc, ac_err_mask(drv_stat)); + qc->err_mask |= ac_err_mask(drv_stat); + ata_qc_complete(qc); break; } @@ -3462,7 +3466,7 @@ struct ata_queued_cmd *ata_qc_new_init(struct ata_port *ap, return qc; } -int ata_qc_complete_noop(struct ata_queued_cmd *qc, unsigned int err_mask) +int ata_qc_complete_noop(struct ata_queued_cmd *qc) { return 0; } @@ -3521,7 +3525,7 @@ void ata_qc_free(struct ata_queued_cmd *qc) * spin_lock_irqsave(host_set lock) */ -void ata_qc_complete(struct ata_queued_cmd *qc, unsigned int err_mask) +void ata_qc_complete(struct ata_queued_cmd *qc) { int rc; @@ -3538,7 +3542,7 @@ void ata_qc_complete(struct ata_queued_cmd *qc, unsigned int err_mask) qc->flags &= ~ATA_QCFLAG_ACTIVE; /* call completion callback */ - rc = qc->complete_fn(qc, err_mask); + rc = qc->complete_fn(qc); /* if callback indicates not to complete command (non-zero), * return immediately @@ -3976,7 +3980,8 @@ inline unsigned int ata_host_intr (struct ata_port *ap, ap->ops->irq_clear(ap); /* complete taskfile transaction */ - ata_qc_complete(qc, ac_err_mask(status)); + qc->err_mask |= ac_err_mask(status); + ata_qc_complete(qc); break; default: @@ -4111,7 +4116,8 @@ static void atapi_packet_task(void *_data) err_out_status: status = ata_chk_status(ap); err_out: - ata_poll_qc_complete(qc, __ac_err_mask(status)); + qc->err_mask |= __ac_err_mask(status); + ata_poll_qc_complete(qc); } diff --git a/drivers/scsi/libata-scsi.c b/drivers/scsi/libata-scsi.c index ef763ed9a0e5..2aef41112c43 100644 --- a/drivers/scsi/libata-scsi.c +++ b/drivers/scsi/libata-scsi.c @@ -1203,12 +1203,11 @@ nothing_to_do: return 1; } -static int ata_scsi_qc_complete(struct ata_queued_cmd *qc, - unsigned int err_mask) +static int ata_scsi_qc_complete(struct ata_queued_cmd *qc) { struct scsi_cmnd *cmd = qc->scsicmd; u8 *cdb = cmd->cmnd; - int need_sense = (err_mask != 0); + int need_sense = (qc->err_mask != 0); /* For ATA pass thru (SAT) commands, generate a sense block if * user mandated it or if there's an error. Note that if we @@ -1955,9 +1954,9 @@ void ata_scsi_badcmd(struct scsi_cmnd *cmd, void (*done)(struct scsi_cmnd *), u8 done(cmd); } -static int atapi_sense_complete(struct ata_queued_cmd *qc,unsigned int err_mask) +static int atapi_sense_complete(struct ata_queued_cmd *qc) { - if (err_mask && ((err_mask & AC_ERR_DEV) == 0)) + if (qc->err_mask && ((qc->err_mask & AC_ERR_DEV) == 0)) /* FIXME: not quite right; we don't want the * translation of taskfile registers into * a sense descriptors, since that's only @@ -2015,15 +2014,18 @@ static void atapi_request_sense(struct ata_queued_cmd *qc) qc->complete_fn = atapi_sense_complete; - if (ata_qc_issue(qc)) - ata_qc_complete(qc, AC_ERR_OTHER); + if (ata_qc_issue(qc)) { + qc->err_mask |= AC_ERR_OTHER; + ata_qc_complete(qc); + } DPRINTK("EXIT\n"); } -static int atapi_qc_complete(struct ata_queued_cmd *qc, unsigned int err_mask) +static int atapi_qc_complete(struct ata_queued_cmd *qc) { struct scsi_cmnd *cmd = qc->scsicmd; + unsigned int err_mask = qc->err_mask; VPRINTK("ENTER, err_mask 0x%X\n", err_mask); diff --git a/drivers/scsi/libata.h b/drivers/scsi/libata.h index 8ebaa694d18e..686255df76b8 100644 --- a/drivers/scsi/libata.h +++ b/drivers/scsi/libata.h @@ -39,7 +39,7 @@ struct ata_scsi_args { /* libata-core.c */ extern int atapi_enabled; -extern int ata_qc_complete_noop(struct ata_queued_cmd *qc, unsigned int err_mask); +extern int ata_qc_complete_noop(struct ata_queued_cmd *qc); extern struct ata_queued_cmd *ata_qc_new_init(struct ata_port *ap, struct ata_device *dev); extern void ata_rwcmd_protocol(struct ata_queued_cmd *qc); diff --git a/drivers/scsi/pdc_adma.c b/drivers/scsi/pdc_adma.c index f557f17ca00c..e8df0c9ec1e6 100644 --- a/drivers/scsi/pdc_adma.c +++ b/drivers/scsi/pdc_adma.c @@ -464,14 +464,12 @@ static inline unsigned int adma_intr_pkt(struct ata_host_set *host_set) continue; qc = ata_qc_from_tag(ap, ap->active_tag); if (qc && (!(qc->tf.ctl & ATA_NIEN))) { - unsigned int err_mask = 0; - if ((status & (aPERR | aPSD | aUIRQ))) - err_mask = AC_ERR_OTHER; + qc->err_mask |= AC_ERR_OTHER; else if (pp->pkt[0] != cDONE) - err_mask = AC_ERR_OTHER; + qc->err_mask |= AC_ERR_OTHER; - ata_qc_complete(qc, err_mask); + ata_qc_complete(qc); } } return handled; @@ -501,7 +499,8 @@ static inline unsigned int adma_intr_mmio(struct ata_host_set *host_set) /* complete taskfile transaction */ pp->state = adma_state_idle; - ata_qc_complete(qc, ac_err_mask(status)); + qc->err_mask |= ac_err_mask(status); + ata_qc_complete(qc); handled = 1; } } diff --git a/drivers/scsi/sata_mv.c b/drivers/scsi/sata_mv.c index c94176693d1f..3e7866b51ac6 100644 --- a/drivers/scsi/sata_mv.c +++ b/drivers/scsi/sata_mv.c @@ -1242,8 +1242,10 @@ static void mv_host_intr(struct ata_host_set *host_set, u32 relevant, VPRINTK("port %u IRQ found for qc, " "ata_status 0x%x\n", port,ata_status); /* mark qc status appropriately */ - if (!(qc->tf.ctl & ATA_NIEN)) - ata_qc_complete(qc, err_mask); + if (!(qc->tf.ctl & ATA_NIEN)) { + qc->err_mask |= err_mask; + ata_qc_complete(qc); + } } } } @@ -1864,7 +1866,8 @@ static void mv_eng_timeout(struct ata_port *ap) */ spin_lock_irqsave(&ap->host_set->lock, flags); qc->scsidone = scsi_finish_command; - ata_qc_complete(qc, AC_ERR_OTHER); + qc->err_mask |= AC_ERR_OTHER; + ata_qc_complete(qc); spin_unlock_irqrestore(&ap->host_set->lock, flags); } } diff --git a/drivers/scsi/sata_promise.c b/drivers/scsi/sata_promise.c index 02089069b0f6..e2e146a14f97 100644 --- a/drivers/scsi/sata_promise.c +++ b/drivers/scsi/sata_promise.c @@ -401,7 +401,8 @@ static void pdc_eng_timeout(struct ata_port *ap) case ATA_PROT_NODATA: printk(KERN_ERR "ata%u: command timeout\n", ap->id); drv_stat = ata_wait_idle(ap); - ata_qc_complete(qc, __ac_err_mask(drv_stat)); + qc->err_mask |= __ac_err_mask(drv_stat); + ata_qc_complete(qc); break; default: @@ -410,7 +411,8 @@ static void pdc_eng_timeout(struct ata_port *ap) printk(KERN_ERR "ata%u: unknown timeout, cmd 0x%x stat 0x%x\n", ap->id, qc->tf.command, drv_stat); - ata_qc_complete(qc, ac_err_mask(drv_stat)); + qc->err_mask |= ac_err_mask(drv_stat); + ata_qc_complete(qc); break; } @@ -422,21 +424,21 @@ out: static inline unsigned int pdc_host_intr( struct ata_port *ap, struct ata_queued_cmd *qc) { - unsigned int handled = 0, err_mask = 0; + unsigned int handled = 0; u32 tmp; void __iomem *mmio = (void __iomem *) ap->ioaddr.cmd_addr + PDC_GLOBAL_CTL; tmp = readl(mmio); if (tmp & PDC_ERR_MASK) { - err_mask = AC_ERR_DEV; + qc->err_mask |= AC_ERR_DEV; pdc_reset_port(ap); } switch (qc->tf.protocol) { case ATA_PROT_DMA: case ATA_PROT_NODATA: - err_mask |= ac_err_mask(ata_wait_idle(ap)); - ata_qc_complete(qc, err_mask); + qc->err_mask |= ac_err_mask(ata_wait_idle(ap)); + ata_qc_complete(qc); handled = 1; break; diff --git a/drivers/scsi/sata_qstor.c b/drivers/scsi/sata_qstor.c index 6b9c3ae07cb3..de05e2883f9c 100644 --- a/drivers/scsi/sata_qstor.c +++ b/drivers/scsi/sata_qstor.c @@ -409,8 +409,8 @@ static inline unsigned int qs_intr_pkt(struct ata_host_set *host_set) case 3: /* device error */ pp->state = qs_state_idle; qs_enter_reg_mode(qc->ap); - ata_qc_complete(qc, - ac_err_mask(sDST)); + qc->err_mask |= ac_err_mask(sDST); + ata_qc_complete(qc); break; default: break; @@ -447,7 +447,8 @@ static inline unsigned int qs_intr_mmio(struct ata_host_set *host_set) /* complete taskfile transaction */ pp->state = qs_state_idle; - ata_qc_complete(qc, ac_err_mask(status)); + qc->err_mask |= ac_err_mask(status); + ata_qc_complete(qc); handled = 1; } } diff --git a/drivers/scsi/sata_sil24.c b/drivers/scsi/sata_sil24.c index e0d6f194f54f..a0ad3ed2200a 100644 --- a/drivers/scsi/sata_sil24.c +++ b/drivers/scsi/sata_sil24.c @@ -654,7 +654,8 @@ static void sil24_eng_timeout(struct ata_port *ap) */ printk(KERN_ERR "ata%u: command timeout\n", ap->id); qc->scsidone = scsi_finish_command; - ata_qc_complete(qc, AC_ERR_OTHER); + qc->err_mask |= AC_ERR_OTHER; + ata_qc_complete(qc); sil24_reset_controller(ap); } @@ -711,8 +712,10 @@ static void sil24_error_intr(struct ata_port *ap, u32 slot_stat) sil24_reset_controller(ap); } - if (qc) - ata_qc_complete(qc, err_mask); + if (qc) { + qc->err_mask |= err_mask; + ata_qc_complete(qc); + } } static inline void sil24_host_intr(struct ata_port *ap) @@ -734,8 +737,10 @@ static inline void sil24_host_intr(struct ata_port *ap) */ sil24_update_tf(ap); - if (qc) - ata_qc_complete(qc, ac_err_mask(pp->tf.command)); + if (qc) { + qc->err_mask |= ac_err_mask(pp->tf.command); + ata_qc_complete(qc); + } } else sil24_error_intr(ap, slot_stat); } diff --git a/drivers/scsi/sata_sx4.c b/drivers/scsi/sata_sx4.c index 7c4b53575510..58da854a7c68 100644 --- a/drivers/scsi/sata_sx4.c +++ b/drivers/scsi/sata_sx4.c @@ -718,7 +718,8 @@ static inline unsigned int pdc20621_host_intr( struct ata_port *ap, VPRINTK("ata%u: read hdma, 0x%x 0x%x\n", ap->id, readl(mmio + 0x104), readl(mmio + PDC_HDMA_CTLSTAT)); /* get drive status; clear intr; complete txn */ - ata_qc_complete(qc, ac_err_mask(ata_wait_idle(ap))); + qc->err_mask |= ac_err_mask(ata_wait_idle(ap)); + ata_qc_complete(qc); pdc20621_pop_hdma(qc); } @@ -756,7 +757,8 @@ static inline unsigned int pdc20621_host_intr( struct ata_port *ap, VPRINTK("ata%u: write ata, 0x%x 0x%x\n", ap->id, readl(mmio + 0x104), readl(mmio + PDC_HDMA_CTLSTAT)); /* get drive status; clear intr; complete txn */ - ata_qc_complete(qc, ac_err_mask(ata_wait_idle(ap))); + qc->err_mask |= ac_err_mask(ata_wait_idle(ap)); + ata_qc_complete(qc); pdc20621_pop_hdma(qc); } handled = 1; @@ -766,7 +768,8 @@ static inline unsigned int pdc20621_host_intr( struct ata_port *ap, status = ata_busy_wait(ap, ATA_BUSY | ATA_DRQ, 1000); DPRINTK("BUS_NODATA (drv_stat 0x%X)\n", status); - ata_qc_complete(qc, ac_err_mask(status)); + qc->err_mask |= ac_err_mask(status); + ata_qc_complete(qc); handled = 1; } else { @@ -881,7 +884,8 @@ static void pdc_eng_timeout(struct ata_port *ap) case ATA_PROT_DMA: case ATA_PROT_NODATA: printk(KERN_ERR "ata%u: command timeout\n", ap->id); - ata_qc_complete(qc, __ac_err_mask(ata_wait_idle(ap))); + qc->err_mask |= __ac_err_mask(ata_wait_idle(ap)); + ata_qc_complete(qc); break; default: @@ -890,7 +894,8 @@ static void pdc_eng_timeout(struct ata_port *ap) printk(KERN_ERR "ata%u: unknown timeout, cmd 0x%x stat 0x%x\n", ap->id, qc->tf.command, drv_stat); - ata_qc_complete(qc, ac_err_mask(drv_stat)); + qc->err_mask |= ac_err_mask(drv_stat); + ata_qc_complete(qc); break; } diff --git a/include/linux/libata.h b/include/linux/libata.h index 83a83babff84..e18ce039cdfd 100644 --- a/include/linux/libata.h +++ b/include/linux/libata.h @@ -194,7 +194,7 @@ struct ata_port; struct ata_queued_cmd; /* typedefs */ -typedef int (*ata_qc_cb_t) (struct ata_queued_cmd *qc, unsigned int err_mask); +typedef int (*ata_qc_cb_t) (struct ata_queued_cmd *qc); struct ata_ioports { unsigned long cmd_addr; @@ -279,6 +279,8 @@ struct ata_queued_cmd { /* DO NOT iterate over __sg manually, use ata_for_each_sg() */ struct scatterlist *__sg; + unsigned int err_mask; + ata_qc_cb_t complete_fn; struct completion *waiting; @@ -475,7 +477,7 @@ extern void ata_bmdma_start (struct ata_queued_cmd *qc); extern void ata_bmdma_stop(struct ata_queued_cmd *qc); extern u8 ata_bmdma_status(struct ata_port *ap); extern void ata_bmdma_irq_clear(struct ata_port *ap); -extern void ata_qc_complete(struct ata_queued_cmd *qc, unsigned int err_mask); +extern void ata_qc_complete(struct ata_queued_cmd *qc); extern void ata_eng_timeout(struct ata_port *ap); extern void ata_scsi_simulate(u16 *id, struct scsi_cmnd *cmd, void (*done)(struct scsi_cmnd *)); @@ -667,6 +669,7 @@ static inline void ata_qc_reinit(struct ata_queued_cmd *qc) qc->cursect = qc->cursg = qc->cursg_ofs = 0; qc->nsect = 0; qc->nbytes = qc->curbytes = 0; + qc->err_mask = 0; ata_tf_init(qc->ap, &qc->tf, qc->dev->devno); } -- cgit v1.2.3-71-gd317 From 95235ca2c20ac0b31a8eb39e2d599bcc3e9c9a10 Mon Sep 17 00:00:00 2001 From: Venkatesh Pallipadi Date: Fri, 2 Dec 2005 10:43:20 -0800 Subject: [CPUFREQ] CPU frequency display in /proc/cpuinfo What is the value shown in "cpu MHz" of /proc/cpuinfo when CPUs are capable of changing frequency? Today the answer is: It depends. On i386: SMP kernel - It is always the boot frequency UP kernel - Scales with the frequency change and shows that was last set. On x86_64: There is one single variable cpu_khz that gets written by all the CPUs. So, the frequency set by last CPU will be seen on /proc/cpuinfo of all the CPUs in the system. What you see also depends on whether you have constant_tsc capable CPU or not. On ia64: It is always boot time frequency of a particular CPU that gets displayed. The patch below changes this to: Show the last known frequency of the particular CPU, when cpufreq is present. If cpu doesnot support changing of frequency through cpufreq, then boot frequency will be shown. The patch affects i386, x86_64 and ia64 architectures. Signed-off-by: Venkatesh Pallipadi Signed-off-by: Dave Jones --- arch/i386/kernel/cpu/proc.c | 6 +++++- arch/ia64/kernel/setup.c | 8 +++++++- arch/x86_64/kernel/setup.c | 6 +++++- drivers/cpufreq/cpufreq.c | 24 ++++++++++++++++++++++++ include/linux/cpufreq.h | 10 ++++++++++ 5 files changed, 51 insertions(+), 3 deletions(-) (limited to 'include/linux') diff --git a/arch/i386/kernel/cpu/proc.c b/arch/i386/kernel/cpu/proc.c index e7921315ae9d..6d91b274589c 100644 --- a/arch/i386/kernel/cpu/proc.c +++ b/arch/i386/kernel/cpu/proc.c @@ -3,6 +3,7 @@ #include #include #include +#include /* * Get CPU information for use by the procfs. @@ -86,8 +87,11 @@ static int show_cpuinfo(struct seq_file *m, void *v) seq_printf(m, "stepping\t: unknown\n"); if ( cpu_has(c, X86_FEATURE_TSC) ) { + unsigned int freq = cpufreq_quick_get(n); + if (!freq) + freq = cpu_khz; seq_printf(m, "cpu MHz\t\t: %u.%03u\n", - cpu_khz / 1000, (cpu_khz % 1000)); + freq / 1000, (freq % 1000)); } /* Cache size */ diff --git a/arch/ia64/kernel/setup.c b/arch/ia64/kernel/setup.c index 5add0bcf87a7..088e5dded8dc 100644 --- a/arch/ia64/kernel/setup.c +++ b/arch/ia64/kernel/setup.c @@ -43,6 +43,7 @@ #include #include #include +#include #include #include @@ -517,6 +518,7 @@ show_cpuinfo (struct seq_file *m, void *v) char family[32], features[128], *cp, sep; struct cpuinfo_ia64 *c = v; unsigned long mask; + unsigned int proc_freq; int i; mask = c->features; @@ -549,6 +551,10 @@ show_cpuinfo (struct seq_file *m, void *v) sprintf(cp, " 0x%lx", mask); } + proc_freq = cpufreq_quick_get(cpunum); + if (!proc_freq) + proc_freq = c->proc_freq / 1000; + seq_printf(m, "processor : %d\n" "vendor : %s\n" @@ -565,7 +571,7 @@ show_cpuinfo (struct seq_file *m, void *v) "BogoMIPS : %lu.%02lu\n", cpunum, c->vendor, family, c->model, c->revision, c->archrev, features, c->ppn, c->number, - c->proc_freq / 1000000, c->proc_freq % 1000000, + proc_freq / 1000, proc_freq % 1000, c->itc_freq / 1000000, c->itc_freq % 1000000, lpj*HZ/500000, (lpj*HZ/5000) % 100); #ifdef CONFIG_SMP diff --git a/arch/x86_64/kernel/setup.c b/arch/x86_64/kernel/setup.c index 750e01dcbdf4..64c4534b930c 100644 --- a/arch/x86_64/kernel/setup.c +++ b/arch/x86_64/kernel/setup.c @@ -42,6 +42,7 @@ #include #include #include +#include #include #include @@ -1256,8 +1257,11 @@ static int show_cpuinfo(struct seq_file *m, void *v) seq_printf(m, "stepping\t: unknown\n"); if (cpu_has(c,X86_FEATURE_TSC)) { + unsigned int freq = cpufreq_quick_get((unsigned)(c-cpu_data)); + if (!freq) + freq = cpu_khz; seq_printf(m, "cpu MHz\t\t: %u.%03u\n", - cpu_khz / 1000, (cpu_khz % 1000)); + freq / 1000, (freq % 1000)); } /* Cache size */ diff --git a/drivers/cpufreq/cpufreq.c b/drivers/cpufreq/cpufreq.c index 815902c2c856..a9163d02983a 100644 --- a/drivers/cpufreq/cpufreq.c +++ b/drivers/cpufreq/cpufreq.c @@ -822,6 +822,30 @@ static void cpufreq_out_of_sync(unsigned int cpu, unsigned int old_freq, unsigne } +/** + * cpufreq_quick_get - get the CPU frequency (in kHz) frpm policy->cur + * @cpu: CPU number + * + * This is the last known freq, without actually getting it from the driver. + * Return value will be same as what is shown in scaling_cur_freq in sysfs. + */ +unsigned int cpufreq_quick_get(unsigned int cpu) +{ + struct cpufreq_policy *policy = cpufreq_cpu_get(cpu); + unsigned int ret = 0; + + if (policy) { + down(&policy->lock); + ret = policy->cur; + up(&policy->lock); + cpufreq_cpu_put(policy); + } + + return (ret); +} +EXPORT_SYMBOL(cpufreq_quick_get); + + /** * cpufreq_get - get the current CPU frequency (in kHz) * @cpu: CPU number diff --git a/include/linux/cpufreq.h b/include/linux/cpufreq.h index d068176b7ad7..c31650df9241 100644 --- a/include/linux/cpufreq.h +++ b/include/linux/cpufreq.h @@ -256,6 +256,16 @@ int cpufreq_update_policy(unsigned int cpu); /* query the current CPU frequency (in kHz). If zero, cpufreq couldn't detect it */ unsigned int cpufreq_get(unsigned int cpu); +/* query the last known CPU freq (in kHz). If zero, cpufreq couldn't detect it */ +#ifdef CONFIG_CPU_FREQ +unsigned int cpufreq_quick_get(unsigned int cpu); +#else +static inline unsigned int cpufreq_quick_get(unsigned int cpu) +{ + return 0; +} +#endif + /********************************************************************* * CPUFREQ DEFAULT GOVERNOR * -- cgit v1.2.3-71-gd317 From a2a7a662f80d8b7f2295a36de1f9b033ed0b910c Mon Sep 17 00:00:00 2001 From: Tejun Heo Date: Tue, 13 Dec 2005 14:48:31 +0900 Subject: [PATCH] libata: implement ata_exec_internal() This patch implements ata_exec_internal() function which performs libata internal command execution. Previously, this was done by each user by manually initializing a qc, issueing it, waiting for its completion and handling errors. In addition to obvious code factoring, using ata_exec_internal() fixes the following bugs. * qc not freed on issue failure * ap->qactive clearing could race with the next internal command * race between timeout handling and irq * ignoring error condition not represented in tf->status Also, qc & hardware are not accessed anymore once it's completed, making internal commands more conformant with general semantics. ata_exec_internal() also makes it easy to issue internal commands from multiple threads if that becomes necessary. This patch only implements ata_exec_internal(). A following patch will convert all users. Signed-off-by: Tejun Heo -- Jeff, all patches have been regenerated against upstream branch as of today. (575ab52a218e4ff0667a6cbd972c3af443ee8713) Also, I took out a debug printk from ata_exec_internal (don't know how that one got left there). Other than that, all patches are identical to the previous posting. Thanks. :-) Signed-off-by: Jeff Garzik --- drivers/scsi/libata-core.c | 99 ++++++++++++++++++++++++++++++++++++++++++++++ include/linux/libata.h | 2 + 2 files changed, 101 insertions(+) (limited to 'include/linux') diff --git a/drivers/scsi/libata-core.c b/drivers/scsi/libata-core.c index a0060cf31e0d..de80abeab065 100644 --- a/drivers/scsi/libata-core.c +++ b/drivers/scsi/libata-core.c @@ -1046,6 +1046,105 @@ static unsigned int ata_pio_modes(const struct ata_device *adev) return modes; } +struct ata_exec_internal_arg { + unsigned int err_mask; + struct ata_taskfile *tf; + struct completion *waiting; +}; + +int ata_qc_complete_internal(struct ata_queued_cmd *qc) +{ + struct ata_exec_internal_arg *arg = qc->private_data; + struct completion *waiting = arg->waiting; + + if (!(qc->err_mask & ~AC_ERR_DEV)) + qc->ap->ops->tf_read(qc->ap, arg->tf); + arg->err_mask = qc->err_mask; + arg->waiting = NULL; + complete(waiting); + + return 0; +} + +/** + * ata_exec_internal - execute libata internal command + * @ap: Port to which the command is sent + * @dev: Device to which the command is sent + * @tf: Taskfile registers for the command and the result + * @dma_dir: Data tranfer direction of the command + * @buf: Data buffer of the command + * @buflen: Length of data buffer + * + * Executes libata internal command with timeout. @tf contains + * command on entry and result on return. Timeout and error + * conditions are reported via return value. No recovery action + * is taken after a command times out. It's caller's duty to + * clean up after timeout. + * + * LOCKING: + * None. Should be called with kernel context, might sleep. + */ + +static unsigned +ata_exec_internal(struct ata_port *ap, struct ata_device *dev, + struct ata_taskfile *tf, + int dma_dir, void *buf, unsigned int buflen) +{ + u8 command = tf->command; + struct ata_queued_cmd *qc; + DECLARE_COMPLETION(wait); + unsigned long flags; + struct ata_exec_internal_arg arg; + + spin_lock_irqsave(&ap->host_set->lock, flags); + + qc = ata_qc_new_init(ap, dev); + BUG_ON(qc == NULL); + + qc->tf = *tf; + qc->dma_dir = dma_dir; + if (dma_dir != DMA_NONE) { + ata_sg_init_one(qc, buf, buflen); + qc->nsect = buflen / ATA_SECT_SIZE; + } + + arg.waiting = &wait; + arg.tf = tf; + qc->private_data = &arg; + qc->complete_fn = ata_qc_complete_internal; + + if (ata_qc_issue(qc)) + goto issue_fail; + + spin_unlock_irqrestore(&ap->host_set->lock, flags); + + if (!wait_for_completion_timeout(&wait, ATA_TMOUT_INTERNAL)) { + spin_lock_irqsave(&ap->host_set->lock, flags); + + /* We're racing with irq here. If we lose, the + * following test prevents us from completing the qc + * again. If completion irq occurs after here but + * before the caller cleans up, it will result in a + * spurious interrupt. We can live with that. + */ + if (arg.waiting) { + qc->err_mask = AC_ERR_OTHER; + ata_qc_complete(qc); + printk(KERN_WARNING "ata%u: qc timeout (cmd 0x%x)\n", + ap->id, command); + } + + spin_unlock_irqrestore(&ap->host_set->lock, flags); + } + + return arg.err_mask; + + issue_fail: + ata_qc_free(qc); + spin_unlock_irqrestore(&ap->host_set->lock, flags); + return AC_ERR_OTHER; +} + static int ata_qc_wait_err(struct ata_queued_cmd *qc, struct completion *wait) { diff --git a/include/linux/libata.h b/include/linux/libata.h index e18ce039cdfd..833e57afd54c 100644 --- a/include/linux/libata.h +++ b/include/linux/libata.h @@ -135,6 +135,8 @@ enum { ATA_TMOUT_BOOT_QUICK = 7 * HZ, /* hueristic */ ATA_TMOUT_CDB = 30 * HZ, ATA_TMOUT_CDB_QUICK = 5 * HZ, + ATA_TMOUT_INTERNAL = 30 * HZ, + ATA_TMOUT_INTERNAL_QUICK = 5 * HZ, /* ATA bus states */ BUS_UNKNOWN = 0, -- cgit v1.2.3-71-gd317 From b5632303401c231bf270ef36f1013e52caf4caf9 Mon Sep 17 00:00:00 2001 From: Tejun Heo Date: Tue, 13 Dec 2005 14:51:25 +0900 Subject: [PATCH] libata: remove unused qc->waiting There is no user of qc->waiting left after ata_exec_internal() changes. Kill the field. Signed-off-by: Tejun Heo Signed-off-by: Jeff Garzik --- drivers/scsi/libata-core.c | 14 ++------------ include/linux/libata.h | 2 -- 2 files changed, 2 insertions(+), 14 deletions(-) (limited to 'include/linux') diff --git a/drivers/scsi/libata-core.c b/drivers/scsi/libata-core.c index 1c4dbf3e9818..9ea102587914 100644 --- a/drivers/scsi/libata-core.c +++ b/drivers/scsi/libata-core.c @@ -3503,7 +3503,7 @@ struct ata_queued_cmd *ata_qc_new_init(struct ata_port *ap, static void __ata_qc_complete(struct ata_queued_cmd *qc) { struct ata_port *ap = qc->ap; - unsigned int tag, do_clear = 0; + unsigned int tag; qc->flags = 0; tag = qc->tag; @@ -3511,17 +3511,8 @@ static void __ata_qc_complete(struct ata_queued_cmd *qc) if (tag == ap->active_tag) ap->active_tag = ATA_TAG_POISON; qc->tag = ATA_TAG_POISON; - do_clear = 1; - } - - if (qc->waiting) { - struct completion *waiting = qc->waiting; - qc->waiting = NULL; - complete(waiting); - } - - if (likely(do_clear)) clear_bit(tag, &ap->qactive); + } } /** @@ -3537,7 +3528,6 @@ static void __ata_qc_complete(struct ata_queued_cmd *qc) void ata_qc_free(struct ata_queued_cmd *qc) { assert(qc != NULL); /* ata_qc_from_tag _might_ return NULL */ - assert(qc->waiting == NULL); /* nothing should be waiting */ __ata_qc_complete(qc); } diff --git a/include/linux/libata.h b/include/linux/libata.h index 833e57afd54c..46337e71613e 100644 --- a/include/linux/libata.h +++ b/include/linux/libata.h @@ -285,8 +285,6 @@ struct ata_queued_cmd { ata_qc_cb_t complete_fn; - struct completion *waiting; - void *private_data; }; -- cgit v1.2.3-71-gd317 From 4e06cbd42c41f9e49fcfe5ee45c749eefaae9cf4 Mon Sep 17 00:00:00 2001 From: "Moore, Eric Dean" Date: Thu, 1 Dec 2005 16:51:02 -0700 Subject: [SCSI] pci_ids.h: add subclass code for SAS Controllers Signed-off-by: Eric Moore Signed-off-by: James Bottomley --- include/linux/pci_ids.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux') diff --git a/include/linux/pci_ids.h b/include/linux/pci_ids.h index 1e737e269db9..aa76fc4e38c2 100644 --- a/include/linux/pci_ids.h +++ b/include/linux/pci_ids.h @@ -15,6 +15,7 @@ #define PCI_CLASS_STORAGE_FLOPPY 0x0102 #define PCI_CLASS_STORAGE_IPI 0x0103 #define PCI_CLASS_STORAGE_RAID 0x0104 +#define PCI_CLASS_STORAGE_SAS 0x0107 #define PCI_CLASS_STORAGE_OTHER 0x0180 #define PCI_BASE_CLASS_NETWORK 0x02 -- cgit v1.2.3-71-gd317 From 6e39b69e7ea9205c5f80aeac3ef999ab8fb1a4cc Mon Sep 17 00:00:00 2001 From: Mike Christie Date: Fri, 11 Nov 2005 05:30:24 -0600 Subject: [SCSI] export blk layer functions needed for blk_execute_rq_nowait To send async requests we need these two functions exported. Signed-off-by: Mike Christie Signed-off-by: James Bottomley --- block/ll_rw_blk.c | 6 +++++- include/linux/blkdev.h | 5 +++++ 2 files changed, 10 insertions(+), 1 deletion(-) (limited to 'include/linux') diff --git a/block/ll_rw_blk.c b/block/ll_rw_blk.c index 99c9ca6d5992..c525b5a2b598 100644 --- a/block/ll_rw_blk.c +++ b/block/ll_rw_blk.c @@ -2306,6 +2306,8 @@ void blk_execute_rq_nowait(request_queue_t *q, struct gendisk *bd_disk, generic_unplug_device(q); } +EXPORT_SYMBOL_GPL(blk_execute_rq_nowait); + /** * blk_execute_rq - insert a request into queue for execution * @q: queue to insert the request in @@ -2444,7 +2446,7 @@ void disk_round_stats(struct gendisk *disk) /* * queue lock must be held */ -static void __blk_put_request(request_queue_t *q, struct request *req) +void __blk_put_request(request_queue_t *q, struct request *req) { struct request_list *rl = req->rl; @@ -2473,6 +2475,8 @@ static void __blk_put_request(request_queue_t *q, struct request *req) } } +EXPORT_SYMBOL_GPL(__blk_put_request); + void blk_put_request(struct request *req) { unsigned long flags; diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h index a33a31e71bbc..9a68716dcf75 100644 --- a/include/linux/blkdev.h +++ b/include/linux/blkdev.h @@ -558,6 +558,7 @@ extern void blk_unregister_queue(struct gendisk *disk); extern void register_disk(struct gendisk *dev); extern void generic_make_request(struct bio *bio); extern void blk_put_request(struct request *); +extern void __blk_put_request(request_queue_t *, struct request *); extern void blk_end_sync_rq(struct request *rq); extern void blk_attempt_remerge(request_queue_t *, struct request *); extern struct request *blk_get_request(request_queue_t *, int, gfp_t); @@ -579,6 +580,10 @@ extern int blk_rq_map_kern(request_queue_t *, struct request *, void *, unsigned extern int blk_rq_map_user_iov(request_queue_t *, struct request *, struct sg_iovec *, int); extern int blk_execute_rq(request_queue_t *, struct gendisk *, struct request *, int); +extern void blk_execute_rq_nowait(request_queue_t *, struct gendisk *, + struct request *, int, + void (*done)(struct request *)); + static inline request_queue_t *bdev_get_queue(struct block_device *bdev) { return bdev->bd_disk->queue; -- cgit v1.2.3-71-gd317 From 6e68af666f5336254b5715dca591026b7324499a Mon Sep 17 00:00:00 2001 From: Mike Christie Date: Fri, 11 Nov 2005 05:30:27 -0600 Subject: [SCSI] Convert SCSI mid-layer to scsi_execute_async Add scsi helpers to create really-large-requests and convert scsi-ml to scsi_execute_async(). Per Jens's previous comments, I placed this function in scsi_lib.c. I made it follow all the queue's limits - I think I did at least :), so I removed the warning on the function header. I think the scsi_execute_* functions should eventually take a request_queue and be placed some place where the dm-multipath hw_handler can use them if that failover code is going to stay in the kernel. That conversion patch will be sent in another mail though. Signed-off-by: Mike Christie Signed-off-by: James Bottomley --- drivers/scsi/scsi_error.c | 47 ++------- drivers/scsi/scsi_lib.c | 230 ++++++++++++++++++++++++++++++++++++++------- drivers/scsi/scsi_priv.h | 1 - fs/bio.c | 20 ++++ include/linux/bio.h | 2 + include/scsi/scsi_device.h | 6 ++ 6 files changed, 233 insertions(+), 73 deletions(-) (limited to 'include/linux') diff --git a/drivers/scsi/scsi_error.c b/drivers/scsi/scsi_error.c index 18c5d2523014..53ea62d3b53d 100644 --- a/drivers/scsi/scsi_error.c +++ b/drivers/scsi/scsi_error.c @@ -1314,23 +1314,6 @@ int scsi_decide_disposition(struct scsi_cmnd *scmd) } } -/** - * scsi_eh_lock_done - done function for eh door lock request - * @scmd: SCSI command block for the door lock request - * - * Notes: - * We completed the asynchronous door lock request, and it has either - * locked the door or failed. We must free the command structures - * associated with this request. - **/ -static void scsi_eh_lock_done(struct scsi_cmnd *scmd) -{ - struct scsi_request *sreq = scmd->sc_request; - - scsi_release_request(sreq); -} - - /** * scsi_eh_lock_door - Prevent medium removal for the specified device * @sdev: SCSI device to prevent medium removal @@ -1353,29 +1336,17 @@ static void scsi_eh_lock_done(struct scsi_cmnd *scmd) **/ static void scsi_eh_lock_door(struct scsi_device *sdev) { - struct scsi_request *sreq = scsi_allocate_request(sdev, GFP_KERNEL); + unsigned char cmnd[MAX_COMMAND_SIZE]; - if (unlikely(!sreq)) { - printk(KERN_ERR "%s: request allocate failed," - "prevent media removal cmd not sent\n", __FUNCTION__); - return; - } + cmnd[0] = ALLOW_MEDIUM_REMOVAL; + cmnd[1] = 0; + cmnd[2] = 0; + cmnd[3] = 0; + cmnd[4] = SCSI_REMOVAL_PREVENT; + cmnd[5] = 0; - sreq->sr_cmnd[0] = ALLOW_MEDIUM_REMOVAL; - sreq->sr_cmnd[1] = 0; - sreq->sr_cmnd[2] = 0; - sreq->sr_cmnd[3] = 0; - sreq->sr_cmnd[4] = SCSI_REMOVAL_PREVENT; - sreq->sr_cmnd[5] = 0; - sreq->sr_data_direction = DMA_NONE; - sreq->sr_bufflen = 0; - sreq->sr_buffer = NULL; - sreq->sr_allowed = 5; - sreq->sr_done = scsi_eh_lock_done; - sreq->sr_timeout_per_command = 10 * HZ; - sreq->sr_cmd_len = COMMAND_SIZE(sreq->sr_cmnd[0]); - - scsi_insert_special_req(sreq, 1); + scsi_execute_async(sdev, cmnd, DMA_NONE, NULL, 0, 0, 10 * HZ, + 5, NULL, NULL, GFP_KERNEL); } diff --git a/drivers/scsi/scsi_lib.c b/drivers/scsi/scsi_lib.c index 1f2782767ca9..eb0cfbfbcf8f 100644 --- a/drivers/scsi/scsi_lib.c +++ b/drivers/scsi/scsi_lib.c @@ -63,39 +63,6 @@ static struct scsi_host_sg_pool scsi_sg_pools[] = { }; #undef SP - -/* - * Function: scsi_insert_special_req() - * - * Purpose: Insert pre-formed request into request queue. - * - * Arguments: sreq - request that is ready to be queued. - * at_head - boolean. True if we should insert at head - * of queue, false if we should insert at tail. - * - * Lock status: Assumed that lock is not held upon entry. - * - * Returns: Nothing - * - * Notes: This function is called from character device and from - * ioctl types of functions where the caller knows exactly - * what SCSI command needs to be issued. The idea is that - * we merely inject the command into the queue (at the head - * for now), and then call the queue request function to actually - * process it. - */ -int scsi_insert_special_req(struct scsi_request *sreq, int at_head) -{ - /* - * Because users of this function are apt to reuse requests with no - * modification, we have to sanitise the request flags here - */ - sreq->sr_request->flags &= ~REQ_DONTPREP; - blk_insert_request(sreq->sr_device->request_queue, sreq->sr_request, - at_head, sreq); - return 0; -} - static void scsi_run_queue(struct request_queue *q); /* @@ -249,8 +216,13 @@ void scsi_do_req(struct scsi_request *sreq, const void *cmnd, /* * head injection *required* here otherwise quiesce won't work + * + * Because users of this function are apt to reuse requests with no + * modification, we have to sanitise the request flags here */ - scsi_insert_special_req(sreq, 1); + sreq->sr_request->flags &= ~REQ_DONTPREP; + blk_insert_request(sreq->sr_device->request_queue, sreq->sr_request, + 1, sreq); } EXPORT_SYMBOL(scsi_do_req); @@ -327,6 +299,196 @@ int scsi_execute_req(struct scsi_device *sdev, const unsigned char *cmd, } EXPORT_SYMBOL(scsi_execute_req); +struct scsi_io_context { + void *data; + void (*done)(void *data, char *sense, int result, int resid); + char sense[SCSI_SENSE_BUFFERSIZE]; +}; + +static void scsi_end_async(struct request *req) +{ + struct scsi_io_context *sioc = req->end_io_data; + + if (sioc->done) + sioc->done(sioc->data, sioc->sense, req->errors, req->data_len); + + kfree(sioc); + __blk_put_request(req->q, req); +} + +static int scsi_merge_bio(struct request *rq, struct bio *bio) +{ + struct request_queue *q = rq->q; + + bio->bi_flags &= ~(1 << BIO_SEG_VALID); + if (rq_data_dir(rq) == WRITE) + bio->bi_rw |= (1 << BIO_RW); + blk_queue_bounce(q, &bio); + + if (!rq->bio) + blk_rq_bio_prep(q, rq, bio); + else if (!q->back_merge_fn(q, rq, bio)) + return -EINVAL; + else { + rq->biotail->bi_next = bio; + rq->biotail = bio; + rq->hard_nr_sectors += bio_sectors(bio); + rq->nr_sectors = rq->hard_nr_sectors; + } + + return 0; +} + +static int scsi_bi_endio(struct bio *bio, unsigned int bytes_done, int error) +{ + if (bio->bi_size) + return 1; + + bio_put(bio); + return 0; +} + +/** + * scsi_req_map_sg - map a scatterlist into a request + * @rq: request to fill + * @sg: scatterlist + * @nsegs: number of elements + * @bufflen: len of buffer + * @gfp: memory allocation flags + * + * scsi_req_map_sg maps a scatterlist into a request so that the + * request can be sent to the block layer. We do not trust the scatterlist + * sent to use, as some ULDs use that struct to only organize the pages. + */ +static int scsi_req_map_sg(struct request *rq, struct scatterlist *sgl, + int nsegs, unsigned bufflen, gfp_t gfp) +{ + struct request_queue *q = rq->q; + int nr_pages = (bufflen + PAGE_SIZE - 1) >> PAGE_SHIFT; + unsigned int data_len = 0, len, bytes, off; + struct page *page; + struct bio *bio = NULL; + int i, err, nr_vecs = 0; + + for (i = 0; i < nsegs; i++) { + page = sgl[i].page; + off = sgl[i].offset; + len = sgl[i].length; + data_len += len; + + while (len > 0) { + bytes = min_t(unsigned int, len, PAGE_SIZE - off); + + if (!bio) { + nr_vecs = min_t(int, BIO_MAX_PAGES, nr_pages); + nr_pages -= nr_vecs; + + bio = bio_alloc(gfp, nr_vecs); + if (!bio) { + err = -ENOMEM; + goto free_bios; + } + bio->bi_end_io = scsi_bi_endio; + } + + if (bio_add_pc_page(q, bio, page, bytes, off) != + bytes) { + bio_put(bio); + err = -EINVAL; + goto free_bios; + } + + if (bio->bi_vcnt >= nr_vecs) { + err = scsi_merge_bio(rq, bio); + if (err) { + bio_endio(bio, bio->bi_size, 0); + goto free_bios; + } + bio = NULL; + } + + page++; + len -= bytes; + off = 0; + } + } + + rq->buffer = rq->data = NULL; + rq->data_len = data_len; + return 0; + +free_bios: + while ((bio = rq->bio) != NULL) { + rq->bio = bio->bi_next; + /* + * call endio instead of bio_put incase it was bounced + */ + bio_endio(bio, bio->bi_size, 0); + } + + return err; +} + +/** + * scsi_execute_async - insert request + * @sdev: scsi device + * @cmd: scsi command + * @data_direction: data direction + * @buffer: data buffer (this can be a kernel buffer or scatterlist) + * @bufflen: len of buffer + * @use_sg: if buffer is a scatterlist this is the number of elements + * @timeout: request timeout in seconds + * @retries: number of times to retry request + * @flags: or into request flags + **/ +int scsi_execute_async(struct scsi_device *sdev, const unsigned char *cmd, + int data_direction, void *buffer, unsigned bufflen, + int use_sg, int timeout, int retries, void *privdata, + void (*done)(void *, char *, int, int), gfp_t gfp) +{ + struct request *req; + struct scsi_io_context *sioc; + int err = 0; + int write = (data_direction == DMA_TO_DEVICE); + + sioc = kzalloc(sizeof(*sioc), gfp); + if (!sioc) + return DRIVER_ERROR << 24; + + req = blk_get_request(sdev->request_queue, write, gfp); + if (!req) + goto free_sense; + + if (use_sg) + err = scsi_req_map_sg(req, buffer, use_sg, bufflen, gfp); + else if (bufflen) + err = blk_rq_map_kern(req->q, req, buffer, bufflen, gfp); + + if (err) + goto free_req; + + req->cmd_len = COMMAND_SIZE(cmd[0]); + memcpy(req->cmd, cmd, req->cmd_len); + req->sense = sioc->sense; + req->sense_len = 0; + req->timeout = timeout; + req->flags |= REQ_BLOCK_PC | REQ_QUIET; + req->end_io_data = sioc; + + sioc->data = privdata; + sioc->done = done; + + blk_execute_rq_nowait(req->q, NULL, req, 1, scsi_end_async); + return 0; + +free_req: + blk_put_request(req); +free_sense: + kfree(sioc); + return DRIVER_ERROR << 24; +} +EXPORT_SYMBOL_GPL(scsi_execute_async); + /* * Function: scsi_init_cmd_errh() * diff --git a/drivers/scsi/scsi_priv.h b/drivers/scsi/scsi_priv.h index a8d121c8fbcd..f04e7e11f57a 100644 --- a/drivers/scsi/scsi_priv.h +++ b/drivers/scsi/scsi_priv.h @@ -40,7 +40,6 @@ extern void scsi_exit_hosts(void); extern int scsi_dispatch_cmd(struct scsi_cmnd *cmd); extern int scsi_setup_command_freelist(struct Scsi_Host *shost); extern void scsi_destroy_command_freelist(struct Scsi_Host *shost); -extern int scsi_insert_special_req(struct scsi_request *sreq, int); extern void scsi_init_cmd_from_req(struct scsi_cmnd *cmd, struct scsi_request *sreq); extern void __scsi_release_request(struct scsi_request *sreq); diff --git a/fs/bio.c b/fs/bio.c index 460554b07ff9..4d21ee3873ec 100644 --- a/fs/bio.c +++ b/fs/bio.c @@ -385,6 +385,25 @@ static int __bio_add_page(request_queue_t *q, struct bio *bio, struct page return len; } +/** + * bio_add_pc_page - attempt to add page to bio + * @bio: destination bio + * @page: page to add + * @len: vec entry length + * @offset: vec entry offset + * + * Attempt to add a page to the bio_vec maplist. This can fail for a + * number of reasons, such as the bio being full or target block + * device limitations. The target block device must allow bio's + * smaller than PAGE_SIZE, so it is always possible to add a single + * page to an empty bio. This should only be used by REQ_PC bios. + */ +int bio_add_pc_page(request_queue_t *q, struct bio *bio, struct page *page, + unsigned int len, unsigned int offset) +{ + return __bio_add_page(q, bio, page, len, offset); +} + /** * bio_add_page - attempt to add page to bio * @bio: destination bio @@ -1228,6 +1247,7 @@ EXPORT_SYMBOL(bio_clone); EXPORT_SYMBOL(bio_phys_segments); EXPORT_SYMBOL(bio_hw_segments); EXPORT_SYMBOL(bio_add_page); +EXPORT_SYMBOL(bio_add_pc_page); EXPORT_SYMBOL(bio_get_nr_vecs); EXPORT_SYMBOL(bio_map_user); EXPORT_SYMBOL(bio_unmap_user); diff --git a/include/linux/bio.h b/include/linux/bio.h index 685fd3720df5..b60ffe32cd21 100644 --- a/include/linux/bio.h +++ b/include/linux/bio.h @@ -292,6 +292,8 @@ extern struct bio *bio_clone(struct bio *, gfp_t); extern void bio_init(struct bio *); extern int bio_add_page(struct bio *, struct page *, unsigned int,unsigned int); +extern int bio_add_pc_page(struct request_queue *, struct bio *, struct page *, + unsigned int, unsigned int); extern int bio_get_nr_vecs(struct block_device *); extern struct bio *bio_map_user(struct request_queue *, struct block_device *, unsigned long, unsigned int, int); diff --git a/include/scsi/scsi_device.h b/include/scsi/scsi_device.h index 063e32fe036c..e94ca4d36035 100644 --- a/include/scsi/scsi_device.h +++ b/include/scsi/scsi_device.h @@ -274,6 +274,12 @@ extern int scsi_execute(struct scsi_device *sdev, const unsigned char *cmd, extern int scsi_execute_req(struct scsi_device *sdev, const unsigned char *cmd, int data_direction, void *buffer, unsigned bufflen, struct scsi_sense_hdr *, int timeout, int retries); +extern int scsi_execute_async(struct scsi_device *sdev, + const unsigned char *cmd, int data_direction, + void *buffer, unsigned bufflen, int use_sg, + int timeout, int retries, void *privdata, + void (*done)(void *, char *, int, int), + gfp_t gfp); static inline unsigned int sdev_channel(struct scsi_device *sdev) { -- cgit v1.2.3-71-gd317 From 17e01f216b611fc46956dcd9063aec4de75991e3 Mon Sep 17 00:00:00 2001 From: Mike Christie Date: Fri, 11 Nov 2005 05:31:37 -0600 Subject: [SCSI] add retries field to request for REQ_BLOCK_PC use For tape we need to control the retries. This patch adds a retries counter on the request for REQ_BLOCK_PC commands originating from scsi_execute* to use. REQ_BLOCK_PC commands comming from the block layer SG_IO path continue to use the retires set in the ULD init_command. (scsi_execute* does not set the gendisk so we do not execute the init_command in that path). Signed-off-by: Mike Christie Signed-off-by: James Bottomley --- drivers/scsi/scsi_lib.c | 4 +++- include/linux/blkdev.h | 1 + 2 files changed, 4 insertions(+), 1 deletion(-) (limited to 'include/linux') diff --git a/drivers/scsi/scsi_lib.c b/drivers/scsi/scsi_lib.c index eb0cfbfbcf8f..365843a1561f 100644 --- a/drivers/scsi/scsi_lib.c +++ b/drivers/scsi/scsi_lib.c @@ -259,6 +259,7 @@ int scsi_execute(struct scsi_device *sdev, const unsigned char *cmd, memcpy(req->cmd, cmd, req->cmd_len); req->sense = sense; req->sense_len = 0; + req->retries = retries; req->timeout = timeout; req->flags |= flags | REQ_BLOCK_PC | REQ_SPECIAL | REQ_QUIET; @@ -472,6 +473,7 @@ int scsi_execute_async(struct scsi_device *sdev, const unsigned char *cmd, req->sense = sioc->sense; req->sense_len = 0; req->timeout = timeout; + req->retries = retries; req->flags |= REQ_BLOCK_PC | REQ_QUIET; req->end_io_data = sioc; @@ -1393,7 +1395,7 @@ static int scsi_prep_fn(struct request_queue *q, struct request *req) cmd->sc_data_direction = DMA_NONE; cmd->transfersize = req->data_len; - cmd->allowed = 3; + cmd->allowed = req->retries; cmd->timeout_per_command = req->timeout; cmd->done = scsi_generic_done; } diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h index 9a68716dcf75..509e9a03a328 100644 --- a/include/linux/blkdev.h +++ b/include/linux/blkdev.h @@ -184,6 +184,7 @@ struct request { void *sense; unsigned int timeout; + int retries; /* * For Power Management requests -- cgit v1.2.3-71-gd317 From defd94b75409b983f94548ea2f52ff5787ddb848 Mon Sep 17 00:00:00 2001 From: Mike Christie Date: Mon, 5 Dec 2005 02:37:06 -0600 Subject: [SCSI] seperate max_sectors from max_hw_sectors - export __blk_put_request and blk_execute_rq_nowait needed for async REQ_BLOCK_PC requests - seperate max_hw_sectors and max_sectors for block/scsi_ioctl.c and SG_IO bio.c helpers per Jens's last comments. Since block/scsi_ioctl.c SG_IO was already testing against max_sectors and SCSI-ml was setting max_sectors and max_hw_sectors to the same value this does not change any scsi SG_IO behavior. It only prepares ll_rw_blk.c, scsi_ioctl.c and bio.c for when SCSI-ml begins to set a valid max_hw_sectors for all LLDs. Today if a LLD does not set it SCSI-ml sets it to a safe default and some LLDs set it to a artificial low value to overcome memory and feedback issues. Note: Since we now cap max_sectors to BLK_DEF_MAX_SECTORS, which is 1024, drivers that used to call blk_queue_max_sectors with a large value of max_sectors will now see the fs requests capped to BLK_DEF_MAX_SECTORS. Signed-off-by: Mike Christie Signed-off-by: James Bottomley --- block/ll_rw_blk.c | 34 ++++++++++++++++++++++++++-------- block/scsi_ioctl.c | 2 +- drivers/md/dm-table.c | 2 +- drivers/scsi/scsi_lib.c | 2 +- fs/bio.c | 20 +++++++++++--------- include/linux/blkdev.h | 3 ++- 6 files changed, 42 insertions(+), 21 deletions(-) (limited to 'include/linux') diff --git a/block/ll_rw_blk.c b/block/ll_rw_blk.c index c525b5a2b598..d4beb9a89ee0 100644 --- a/block/ll_rw_blk.c +++ b/block/ll_rw_blk.c @@ -239,7 +239,7 @@ void blk_queue_make_request(request_queue_t * q, make_request_fn * mfn) q->backing_dev_info.ra_pages = (VM_MAX_READAHEAD * 1024) / PAGE_CACHE_SIZE; q->backing_dev_info.state = 0; q->backing_dev_info.capabilities = BDI_CAP_MAP_COPY; - blk_queue_max_sectors(q, MAX_SECTORS); + blk_queue_max_sectors(q, SAFE_MAX_SECTORS); blk_queue_hardsect_size(q, 512); blk_queue_dma_alignment(q, 511); blk_queue_congestion_threshold(q); @@ -555,7 +555,12 @@ void blk_queue_max_sectors(request_queue_t *q, unsigned short max_sectors) printk("%s: set to minimum %d\n", __FUNCTION__, max_sectors); } - q->max_sectors = q->max_hw_sectors = max_sectors; + if (BLK_DEF_MAX_SECTORS > max_sectors) + q->max_hw_sectors = q->max_sectors = max_sectors; + else { + q->max_sectors = BLK_DEF_MAX_SECTORS; + q->max_hw_sectors = max_sectors; + } } EXPORT_SYMBOL(blk_queue_max_sectors); @@ -657,8 +662,8 @@ EXPORT_SYMBOL(blk_queue_hardsect_size); void blk_queue_stack_limits(request_queue_t *t, request_queue_t *b) { /* zero is "infinity" */ - t->max_sectors = t->max_hw_sectors = - min_not_zero(t->max_sectors,b->max_sectors); + t->max_sectors = min_not_zero(t->max_sectors,b->max_sectors); + t->max_hw_sectors = min_not_zero(t->max_hw_sectors,b->max_hw_sectors); t->max_phys_segments = min(t->max_phys_segments,b->max_phys_segments); t->max_hw_segments = min(t->max_hw_segments,b->max_hw_segments); @@ -1293,9 +1298,15 @@ static inline int ll_new_hw_segment(request_queue_t *q, static int ll_back_merge_fn(request_queue_t *q, struct request *req, struct bio *bio) { + unsigned short max_sectors; int len; - if (req->nr_sectors + bio_sectors(bio) > q->max_sectors) { + if (unlikely(blk_pc_request(req))) + max_sectors = q->max_hw_sectors; + else + max_sectors = q->max_sectors; + + if (req->nr_sectors + bio_sectors(bio) > max_sectors) { req->flags |= REQ_NOMERGE; if (req == q->last_merge) q->last_merge = NULL; @@ -1325,9 +1336,16 @@ static int ll_back_merge_fn(request_queue_t *q, struct request *req, static int ll_front_merge_fn(request_queue_t *q, struct request *req, struct bio *bio) { + unsigned short max_sectors; int len; - if (req->nr_sectors + bio_sectors(bio) > q->max_sectors) { + if (unlikely(blk_pc_request(req))) + max_sectors = q->max_hw_sectors; + else + max_sectors = q->max_sectors; + + + if (req->nr_sectors + bio_sectors(bio) > max_sectors) { req->flags |= REQ_NOMERGE; if (req == q->last_merge) q->last_merge = NULL; @@ -2144,7 +2162,7 @@ int blk_rq_map_user(request_queue_t *q, struct request *rq, void __user *ubuf, struct bio *bio; int reading; - if (len > (q->max_sectors << 9)) + if (len > (q->max_hw_sectors << 9)) return -EINVAL; if (!len || !ubuf) return -EINVAL; @@ -2259,7 +2277,7 @@ int blk_rq_map_kern(request_queue_t *q, struct request *rq, void *kbuf, { struct bio *bio; - if (len > (q->max_sectors << 9)) + if (len > (q->max_hw_sectors << 9)) return -EINVAL; if (!len || !kbuf) return -EINVAL; diff --git a/block/scsi_ioctl.c b/block/scsi_ioctl.c index 382dea7b224c..4e390dfd3157 100644 --- a/block/scsi_ioctl.c +++ b/block/scsi_ioctl.c @@ -233,7 +233,7 @@ static int sg_io(struct file *file, request_queue_t *q, if (verify_command(file, cmd)) return -EPERM; - if (hdr->dxfer_len > (q->max_sectors << 9)) + if (hdr->dxfer_len > (q->max_hw_sectors << 9)) return -EIO; if (hdr->dxfer_len) diff --git a/drivers/md/dm-table.c b/drivers/md/dm-table.c index a6d3baa46f61..a6f2dc66c3db 100644 --- a/drivers/md/dm-table.c +++ b/drivers/md/dm-table.c @@ -638,7 +638,7 @@ int dm_split_args(int *argc, char ***argvp, char *input) static void check_for_valid_limits(struct io_restrictions *rs) { if (!rs->max_sectors) - rs->max_sectors = MAX_SECTORS; + rs->max_sectors = SAFE_MAX_SECTORS; if (!rs->max_phys_segments) rs->max_phys_segments = MAX_PHYS_SEGMENTS; if (!rs->max_hw_segments) diff --git a/drivers/scsi/scsi_lib.c b/drivers/scsi/scsi_lib.c index 54a72f197487..14ad2a785a34 100644 --- a/drivers/scsi/scsi_lib.c +++ b/drivers/scsi/scsi_lib.c @@ -462,6 +462,7 @@ int scsi_execute_async(struct scsi_device *sdev, const unsigned char *cmd, req = blk_get_request(sdev->request_queue, write, gfp); if (!req) goto free_sense; + req->flags |= REQ_BLOCK_PC | REQ_QUIET; if (use_sg) err = scsi_req_map_sg(req, buffer, use_sg, bufflen, gfp); @@ -477,7 +478,6 @@ int scsi_execute_async(struct scsi_device *sdev, const unsigned char *cmd, req->sense_len = 0; req->timeout = timeout; req->retries = retries; - req->flags |= REQ_BLOCK_PC | REQ_QUIET; req->end_io_data = sioc; sioc->data = privdata; diff --git a/fs/bio.c b/fs/bio.c index 4d21ee3873ec..38d3e8023a07 100644 --- a/fs/bio.c +++ b/fs/bio.c @@ -313,7 +313,8 @@ int bio_get_nr_vecs(struct block_device *bdev) } static int __bio_add_page(request_queue_t *q, struct bio *bio, struct page - *page, unsigned int len, unsigned int offset) + *page, unsigned int len, unsigned int offset, + unsigned short max_sectors) { int retried_segments = 0; struct bio_vec *bvec; @@ -327,7 +328,7 @@ static int __bio_add_page(request_queue_t *q, struct bio *bio, struct page if (bio->bi_vcnt >= bio->bi_max_vecs) return 0; - if (((bio->bi_size + len) >> 9) > q->max_sectors) + if (((bio->bi_size + len) >> 9) > max_sectors) return 0; /* @@ -401,7 +402,7 @@ static int __bio_add_page(request_queue_t *q, struct bio *bio, struct page int bio_add_pc_page(request_queue_t *q, struct bio *bio, struct page *page, unsigned int len, unsigned int offset) { - return __bio_add_page(q, bio, page, len, offset); + return __bio_add_page(q, bio, page, len, offset, q->max_hw_sectors); } /** @@ -420,8 +421,8 @@ int bio_add_pc_page(request_queue_t *q, struct bio *bio, struct page *page, int bio_add_page(struct bio *bio, struct page *page, unsigned int len, unsigned int offset) { - return __bio_add_page(bdev_get_queue(bio->bi_bdev), bio, page, - len, offset); + struct request_queue *q = bdev_get_queue(bio->bi_bdev); + return __bio_add_page(q, bio, page, len, offset, q->max_sectors); } struct bio_map_data { @@ -533,7 +534,7 @@ struct bio *bio_copy_user(request_queue_t *q, unsigned long uaddr, break; } - if (__bio_add_page(q, bio, page, bytes, 0) < bytes) { + if (bio_add_pc_page(q, bio, page, bytes, 0) < bytes) { ret = -EINVAL; break; } @@ -647,7 +648,8 @@ static struct bio *__bio_map_user_iov(request_queue_t *q, /* * sorry... */ - if (__bio_add_page(q, bio, pages[j], bytes, offset) < bytes) + if (bio_add_pc_page(q, bio, pages[j], bytes, offset) < + bytes) break; len -= bytes; @@ -820,8 +822,8 @@ static struct bio *__bio_map_kern(request_queue_t *q, void *data, if (bytes > len) bytes = len; - if (__bio_add_page(q, bio, virt_to_page(data), bytes, - offset) < bytes) + if (bio_add_pc_page(q, bio, virt_to_page(data), bytes, + offset) < bytes) break; data += bytes; diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h index 509e9a03a328..a18500d196e1 100644 --- a/include/linux/blkdev.h +++ b/include/linux/blkdev.h @@ -702,7 +702,8 @@ extern int blkdev_issue_flush(struct block_device *, sector_t *); #define MAX_PHYS_SEGMENTS 128 #define MAX_HW_SEGMENTS 128 -#define MAX_SECTORS 255 +#define SAFE_MAX_SECTORS 255 +#define BLK_DEF_MAX_SECTORS 1024 #define MAX_SEGMENT_SIZE 65536 -- cgit v1.2.3-71-gd317 From f83b5e323f57d6e1f35a839d663e91cebe985e54 Mon Sep 17 00:00:00 2001 From: Ustyugov Roman Date: Fri, 23 Sep 2005 08:42:11 +0400 Subject: kbuild: set correct KBUILD_MODNAME when using well known kernel symbols as module names This patch fixes a problem when we use well known kernel symbols as module names. For example, if module source name is current.c, idle_stack.c or etc., we have a bad KBUILD_MODNAME value. For example, KBUILD_MODNAME will be "get_current()" instead of "current", or "(init_thread_union.stack)" instead of "idle_task". The trick is to define a stringify macro on the commandline - named KBUILD_STR for namespace reasons - and then to stringify the module name. There are a few uses of KBUILD_MODNAME throughout the tree but the usage is for debug and will not be harmed by this change so left untouched for now. While at it KBUILD_BASENAME was changed too. Any spinlock usage in the unix module would have created wrong section names without it. Usage in spinlock.h fixed so it no longer stringify KBUILD_BASENAME. Original patch from Ustyogov Roman - all bugs introduced by me. Signed-off-by: Sam Ravnborg --- include/linux/spinlock.h | 3 +-- scripts/Makefile.lib | 8 +++++--- scripts/mod/modpost.c | 3 +-- 3 files changed, 7 insertions(+), 7 deletions(-) (limited to 'include/linux') diff --git a/include/linux/spinlock.h b/include/linux/spinlock.h index 0e9682c9def5..799be6747944 100644 --- a/include/linux/spinlock.h +++ b/include/linux/spinlock.h @@ -59,8 +59,7 @@ /* * Must define these before including other files, inline functions need them */ -#define LOCK_SECTION_NAME \ - ".text.lock." __stringify(KBUILD_BASENAME) +#define LOCK_SECTION_NAME ".text.lock."KBUILD_BASENAME #define LOCK_SECTION_START(extra) \ ".subsection 1\n\t" \ diff --git a/scripts/Makefile.lib b/scripts/Makefile.lib index 0f81dcfd6909..550798f57da5 100644 --- a/scripts/Makefile.lib +++ b/scripts/Makefile.lib @@ -81,8 +81,10 @@ obj-dirs := $(addprefix $(obj)/,$(obj-dirs)) # Note: It's possible that one object gets potentially linked into more # than one module. In that case KBUILD_MODNAME will be set to foo_bar, # where foo and bar are the name of the modules. -basename_flags = -DKBUILD_BASENAME=$(subst $(comma),_,$(subst -,_,$(*F))) -modname_flags = $(if $(filter 1,$(words $(modname))),-DKBUILD_MODNAME=$(subst $(comma),_,$(subst -,_,$(modname)))) +name-fix = $(subst $(comma),_,$(subst -,_,$1)) +basename_flags = -D"KBUILD_BASENAME=KBUILD_STR($(call name-fix,$(*F)))" +modname_flags = $(if $(filter 1,$(words $(modname))),\ + -D"KBUILD_MODNAME=KBUILD_STR($(call name-fix,$(modname)))") _c_flags = $(CFLAGS) $(EXTRA_CFLAGS) $(CFLAGS_$(*F).o) _a_flags = $(AFLAGS) $(EXTRA_AFLAGS) $(AFLAGS_$(*F).o) @@ -113,7 +115,7 @@ endif c_flags = -Wp,-MD,$(depfile) $(NOSTDINC_FLAGS) $(CPPFLAGS) \ $(__c_flags) $(modkern_cflags) \ - $(basename_flags) $(modname_flags) + -D"KBUILD_STR(s)=\#s" $(basename_flags) $(modname_flags) a_flags = -Wp,-MD,$(depfile) $(NOSTDINC_FLAGS) $(CPPFLAGS) \ $(__a_flags) $(modkern_aflags) diff --git a/scripts/mod/modpost.c b/scripts/mod/modpost.c index 8ce5a6318684..f70ff13d4818 100644 --- a/scripts/mod/modpost.c +++ b/scripts/mod/modpost.c @@ -539,10 +539,9 @@ add_header(struct buffer *b, struct module *mod) buf_printf(b, "\n"); buf_printf(b, "MODULE_INFO(vermagic, VERMAGIC_STRING);\n"); buf_printf(b, "\n"); - buf_printf(b, "#undef unix\n"); /* We have a module called "unix" */ buf_printf(b, "struct module __this_module\n"); buf_printf(b, "__attribute__((section(\".gnu.linkonce.this_module\"))) = {\n"); - buf_printf(b, " .name = __stringify(KBUILD_MODNAME),\n"); + buf_printf(b, " .name = KBUILD_MODNAME,\n"); if (mod->has_init) buf_printf(b, " .init = init_module,\n"); if (mod->has_cleanup) -- cgit v1.2.3-71-gd317 From 9b4ffa48ae855c8657a36014c5b0243ff69f4722 Mon Sep 17 00:00:00 2001 From: Jaya Kumar Date: Thu, 17 Nov 2005 10:12:23 +0100 Subject: [ALSA] Add support for the CS5535 Audio device Add support for the CS5535 Audio device. I've fixed up some errors as per Takashi's advice from the thread: http://lkml.org/lkml/2005/9/15/119 From: Alan Cox cs5535 is a 32bit x86 only device using weird CPU features Signed-off-by: Jaya Kumar Signed-off-by: Andrew Morton Signed-off-by: Takashi Iwai --- CREDITS | 1 + MAINTAINERS | 5 + include/linux/pci_ids.h | 4 + sound/pci/Kconfig | 13 + sound/pci/Makefile | 1 + sound/pci/cs5535audio/Makefile | 8 + sound/pci/cs5535audio/cs5535audio.c | 410 ++++++++++++++++++++++++++++++ sound/pci/cs5535audio/cs5535audio.h | 123 +++++++++ sound/pci/cs5535audio/cs5535audio_pcm.c | 430 ++++++++++++++++++++++++++++++++ 9 files changed, 995 insertions(+) create mode 100644 sound/pci/cs5535audio/Makefile create mode 100644 sound/pci/cs5535audio/cs5535audio.c create mode 100644 sound/pci/cs5535audio/cs5535audio.h create mode 100644 sound/pci/cs5535audio/cs5535audio_pcm.c (limited to 'include/linux') diff --git a/CREDITS b/CREDITS index 1b4f8694fa48..521f00d1b549 100644 --- a/CREDITS +++ b/CREDITS @@ -1883,6 +1883,7 @@ N: Jaya Kumar E: jayalk@intworks.biz W: http://www.intworks.biz D: Arc monochrome LCD framebuffer driver, x86 reboot fixups +D: pirq addr, CS5535 alsa audio driver S: Gurgaon, India S: Kuala Lumpur, Malaysia diff --git a/MAINTAINERS b/MAINTAINERS index 6af683025ae0..93f97b3afac5 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -650,6 +650,11 @@ L: linux-crypto@vger.kernel.org T: git kernel.org:/pub/scm/linux/kernel/git/herbert/crypto-2.6.git S: Maintained +CS5535 Audio ALSA driver +P: Jaya Kumar +M: jayakumar.alsa@gmail.com +S: Maintained + CYBERPRO FB DRIVER P: Russell King M: rmk@arm.linux.org.uk diff --git a/include/linux/pci_ids.h b/include/linux/pci_ids.h index 4db67b3b05cc..9093f118f99d 100644 --- a/include/linux/pci_ids.h +++ b/include/linux/pci_ids.h @@ -376,6 +376,10 @@ #define PCI_DEVICE_ID_NS_87560_USB 0x0012 #define PCI_DEVICE_ID_NS_83815 0x0020 #define PCI_DEVICE_ID_NS_83820 0x0022 +#define PCI_DEVICE_ID_NS_CS5535_IDE 0x002d +#define PCI_DEVICE_ID_NS_CS5535_AUDIO 0x002e +#define PCI_DEVICE_ID_NS_CS5535_USB 0x002f +#define PCI_DEVICE_ID_NS_CS5535_VIDEO 0x0030 #define PCI_DEVICE_ID_NS_SATURN 0x0035 #define PCI_DEVICE_ID_NS_SCx200_BRIDGE 0x0500 #define PCI_DEVICE_ID_NS_SCx200_SMI 0x0501 diff --git a/sound/pci/Kconfig b/sound/pci/Kconfig index 0fb16cf335ea..920305c7402f 100644 --- a/sound/pci/Kconfig +++ b/sound/pci/Kconfig @@ -359,6 +359,19 @@ config SND_ENS1370 To compile this driver as a module, choose M here: the module will be called snd-ens1370. +config SND_CS5535AUDIO + tristate "CS5535 Audio" + depends on SND && X86 && !X86_64 + select SND_PCM + select SND_AC97_CODEC + help + Say Y here to include support for audio on CS5535 chips. It is + referred to as NS CS5535 IO or AMD CS5535 IO companion in + various literature. + + To compile this driver as a module, choose M here: the module + will be called snd-cs5535audio. + config SND_ENS1371 tristate "(Creative) Ensoniq AudioPCI 1371/1373" depends on SND diff --git a/sound/pci/Makefile b/sound/pci/Makefile index 42fabfcfc2a9..82a9c734f84d 100644 --- a/sound/pci/Makefile +++ b/sound/pci/Makefile @@ -54,6 +54,7 @@ obj-$(CONFIG_SND) += \ au88x0/ \ ca0106/ \ cs46xx/ \ + cs5535audio/ \ emu10k1/ \ hda/ \ ice1712/ \ diff --git a/sound/pci/cs5535audio/Makefile b/sound/pci/cs5535audio/Makefile new file mode 100644 index 000000000000..08d8ee6547d3 --- /dev/null +++ b/sound/pci/cs5535audio/Makefile @@ -0,0 +1,8 @@ +# +# Makefile for cs5535audio +# + +snd-cs5535audio-objs := cs5535audio.o cs5535audio_pcm.o + +# Toplevel Module Dependency +obj-$(CONFIG_SND_CS5535AUDIO) += snd-cs5535audio.o diff --git a/sound/pci/cs5535audio/cs5535audio.c b/sound/pci/cs5535audio/cs5535audio.c new file mode 100644 index 000000000000..920c857fc223 --- /dev/null +++ b/sound/pci/cs5535audio/cs5535audio.c @@ -0,0 +1,410 @@ +/* + * Driver for audio on multifunction CS5535 companion device + * Copyright (C) Jaya Kumar + * + * Based on Jaroslav Kysela and Takashi Iwai's examples. + * This work was sponsored by CIS(M) Sdn Bhd. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + * + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include "cs5535audio.h" + +#define DRIVER_NAME "cs5535audio" + + +static int index[SNDRV_CARDS] = SNDRV_DEFAULT_IDX; +static char *id[SNDRV_CARDS] = SNDRV_DEFAULT_STR; +static int enable[SNDRV_CARDS] = SNDRV_DEFAULT_ENABLE_PNP; + +static struct pci_device_id snd_cs5535audio_ids[] = { + { PCI_VENDOR_ID_NS, PCI_DEVICE_ID_NS_CS5535_AUDIO, PCI_ANY_ID, + PCI_ANY_ID, 0, 0, 0, }, + {} +}; + +MODULE_DEVICE_TABLE(pci, snd_cs5535audio_ids); + +static void wait_till_cmd_acked(cs5535audio_t *cs5535au, unsigned long timeout) +{ + unsigned long tmp; + do { + tmp = cs_readl(cs5535au, ACC_CODEC_CNTL); + if (!(tmp & CMD_NEW)) + break; + msleep(10); + } while (--timeout); + if (!timeout) + snd_printk(KERN_ERR "Failure writing to cs5535 codec\n"); +} + +static unsigned short snd_cs5535audio_codec_read(cs5535audio_t *cs5535au, + unsigned short reg) +{ + unsigned long regdata; + unsigned long timeout; + unsigned long val; + + regdata = ((unsigned long) reg) << 24; + regdata |= ACC_CODEC_CNTL_RD_CMD; + regdata |= CMD_NEW; + + cs_writel(cs5535au, ACC_CODEC_CNTL, regdata); + wait_till_cmd_acked(cs5535au, 500); + + timeout = 50; + do { + val = cs_readl(cs5535au, ACC_CODEC_STATUS); + if ( (val & STS_NEW) && + ((unsigned long) reg == ((0xFF000000 & val)>>24)) ) + break; + msleep(10); + } while (--timeout); + if (!timeout) + snd_printk(KERN_ERR "Failure reading cs5535 codec\n"); + + return ((unsigned short) val); +} + +static void snd_cs5535audio_codec_write(cs5535audio_t *cs5535au, + unsigned short reg, unsigned short val) +{ + unsigned long regdata; + + regdata = ((unsigned long) reg) << 24; + regdata |= (unsigned long) val; + regdata &= CMD_MASK; + regdata |= CMD_NEW; + regdata &= ACC_CODEC_CNTL_WR_CMD; + + cs_writel(cs5535au, ACC_CODEC_CNTL, regdata); + wait_till_cmd_acked(cs5535au, 50); +} + +static void snd_cs5535audio_ac97_codec_write(ac97_t *ac97, + unsigned short reg, unsigned short val) +{ + cs5535audio_t *cs5535au = ac97->private_data; + snd_cs5535audio_codec_write(cs5535au, reg, val); +} + +static unsigned short snd_cs5535audio_ac97_codec_read(ac97_t *ac97, + unsigned short reg) +{ + cs5535audio_t *cs5535au = ac97->private_data; + return snd_cs5535audio_codec_read(cs5535au, reg); +} + +static void snd_cs5535audio_mixer_free_ac97(ac97_t *ac97) +{ + cs5535audio_t *cs5535audio = ac97->private_data; + cs5535audio->ac97 = NULL; +} + +static int snd_cs5535audio_mixer(cs5535audio_t *cs5535au) +{ + snd_card_t *card = cs5535au->card; + ac97_bus_t *pbus; + ac97_template_t ac97; + int err; + static ac97_bus_ops_t ops = { + .write = snd_cs5535audio_ac97_codec_write, + .read = snd_cs5535audio_ac97_codec_read, + }; + + if ((err = snd_ac97_bus(card, 0, &ops, NULL, &pbus)) < 0) + return err; + + memset(&ac97, 0, sizeof(ac97)); + ac97.scaps = AC97_SCAP_AUDIO|AC97_SCAP_SKIP_MODEM; + ac97.private_data = cs5535au; + ac97.pci = cs5535au->pci; + ac97.private_free = snd_cs5535audio_mixer_free_ac97; + + if ((err = snd_ac97_mixer(pbus, &ac97, &cs5535au->ac97)) < 0) { + snd_printk("mixer failed\n"); + return err; + } + + return 0; +} + +static void process_bm0_irq(cs5535audio_t *cs5535au) +{ + u8 bm_stat; + spin_lock(&cs5535au->reg_lock); + bm_stat = cs_readb(cs5535au, ACC_BM0_STATUS); + spin_unlock(&cs5535au->reg_lock); + if (bm_stat & EOP) { + cs5535audio_dma_t *dma; + dma = cs5535au->playback_substream->runtime->private_data; + snd_pcm_period_elapsed(cs5535au->playback_substream); + } else { + snd_printk(KERN_ERR "unexpected bm0 irq src, bm_stat=%x\n", + bm_stat); + } +} + +static void process_bm1_irq(cs5535audio_t *cs5535au) +{ + u8 bm_stat; + spin_lock(&cs5535au->reg_lock); + bm_stat = cs_readb(cs5535au, ACC_BM1_STATUS); + spin_unlock(&cs5535au->reg_lock); + if (bm_stat & EOP) { + cs5535audio_dma_t *dma; + dma = cs5535au->capture_substream->runtime->private_data; + snd_pcm_period_elapsed(cs5535au->capture_substream); + } +} + +static irqreturn_t snd_cs5535audio_interrupt(int irq, void *dev_id, + struct pt_regs *regs) +{ + u16 acc_irq_stat; + u8 bm_stat; + unsigned char count; + cs5535audio_t *cs5535au = dev_id; + + if (cs5535au == NULL) + return IRQ_NONE; + + acc_irq_stat = cs_readw(cs5535au, ACC_IRQ_STATUS); + + if (!acc_irq_stat) + return IRQ_NONE; + for (count=0; count < 10; count++) { + if (acc_irq_stat & (1<irq); + pci_set_power_state(cs5535au->pci, 3); + + if (cs5535au->irq >= 0) + free_irq(cs5535au->irq, cs5535au); + + pci_release_regions(cs5535au->pci); + pci_disable_device(cs5535au->pci); + kfree(cs5535au); + return 0; +} + +static int snd_cs5535audio_dev_free(snd_device_t *device) +{ + cs5535audio_t *cs5535au = device->device_data; + return snd_cs5535audio_free(cs5535au); +} + +static int __devinit snd_cs5535audio_create(snd_card_t *card, + struct pci_dev *pci, + cs5535audio_t **rcs5535au) +{ + cs5535audio_t *cs5535au; + + int err; + static snd_device_ops_t ops = { + .dev_free = snd_cs5535audio_dev_free, + }; + + *rcs5535au = NULL; + if ((err = pci_enable_device(pci)) < 0) + return err; + + if (pci_set_dma_mask(pci, DMA_32BIT_MASK) < 0 || + pci_set_consistent_dma_mask(pci, DMA_32BIT_MASK) < 0) { + printk(KERN_WARNING "unable to get 32bit dma\n"); + err = -ENXIO; + goto pcifail; + } + + cs5535au = kzalloc(sizeof(*cs5535au), GFP_KERNEL); + if (cs5535au == NULL) { + err = -ENOMEM; + goto pcifail; + } + + spin_lock_init(&cs5535au->reg_lock); + cs5535au->card = card; + cs5535au->pci = pci; + cs5535au->irq = -1; + + if ((err = pci_request_regions(pci, "CS5535 Audio")) < 0) { + kfree(cs5535au); + goto pcifail; + } + + cs5535au->port = pci_resource_start(pci, 0); + + if (request_irq(pci->irq, snd_cs5535audio_interrupt, + SA_INTERRUPT|SA_SHIRQ, "CS5535 Audio", cs5535au)) { + snd_printk("unable to grab IRQ %d\n", pci->irq); + err = -EBUSY; + goto sndfail; + } + + cs5535au->irq = pci->irq; + pci_set_master(pci); + + if ((err = snd_device_new(card, SNDRV_DEV_LOWLEVEL, + cs5535au, &ops)) < 0) + goto sndfail; + + snd_card_set_dev(card, &pci->dev); + + *rcs5535au = cs5535au; + return 0; + +sndfail: /* leave the device alive, just kill the snd */ + snd_cs5535audio_free(cs5535au); + return err; + +pcifail: + pci_disable_device(pci); + return err; +} + +static int __devinit snd_cs5535audio_probe(struct pci_dev *pci, + const struct pci_device_id *pci_id) +{ + static int dev; + snd_card_t *card; + cs5535audio_t *cs5535au; + int err; + + if (dev >= SNDRV_CARDS) + return -ENODEV; + if (!enable[dev]) { + dev++; + return -ENOENT; + } + + card = snd_card_new(index[dev], id[dev], THIS_MODULE, 0); + if (card == NULL) + return -ENOMEM; + + if ((err = snd_cs5535audio_create(card, pci, &cs5535au)) < 0) + goto probefail_out; + + if ((err = snd_cs5535audio_mixer(cs5535au)) < 0) + goto probefail_out; + + if ((err = snd_cs5535audio_pcm(cs5535au)) < 0) + goto probefail_out; + + strcpy(card->driver, DRIVER_NAME); + + strcpy(card->shortname, "CS5535 Audio"); + sprintf(card->longname, "%s %s at 0x%lx, irq %i", + card->shortname, card->driver, + cs5535au->port, cs5535au->irq); + + if ((err = snd_card_register(card)) < 0) + goto probefail_out; + + pci_set_drvdata(pci, card); + dev++; + return 0; + +probefail_out: + snd_card_free(card); + return err; +} + +static void __devexit snd_cs5535audio_remove(struct pci_dev *pci) +{ + snd_card_free(pci_get_drvdata(pci)); + pci_set_drvdata(pci, NULL); +} + +static struct pci_driver driver = { + .name = DRIVER_NAME, + .id_table = snd_cs5535audio_ids, + .probe = snd_cs5535audio_probe, + .remove = __devexit_p(snd_cs5535audio_remove), +}; + +static int __init alsa_card_cs5535audio_init(void) +{ + return pci_module_init(&driver); +} + +static void __exit alsa_card_cs5535audio_exit(void) +{ + pci_unregister_driver(&driver); +} + +module_init(alsa_card_cs5535audio_init) +module_exit(alsa_card_cs5535audio_exit) + +MODULE_AUTHOR("Jaya Kumar"); +MODULE_LICENSE("GPL"); +MODULE_DESCRIPTION("CS5535 Audio"); +MODULE_SUPPORTED_DEVICE("CS5535 Audio"); diff --git a/sound/pci/cs5535audio/cs5535audio.h b/sound/pci/cs5535audio/cs5535audio.h new file mode 100644 index 000000000000..e28177fb0991 --- /dev/null +++ b/sound/pci/cs5535audio/cs5535audio.h @@ -0,0 +1,123 @@ +#ifndef __SOUND_CS5535AUDIO_H +#define __SOUND_CS5535AUDIO_H + +#define cs_writel(cs5535au, reg, val) outl(val, (int) cs5535au->port + reg) +#define cs_writeb(cs5535au, reg, val) outb(val, (int) cs5535au->port + reg) +#define cs_readl(cs5535au, reg) inl((unsigned short) (cs5535au->port + reg)) +#define cs_readw(cs5535au, reg) inw((unsigned short) (cs5535au->port + reg)) +#define cs_readb(cs5535au, reg) inb((unsigned short) (cs5535au->port + reg)) + +#define CS5535AUDIO_MAX_DESCRIPTORS 128 + +/* acc_codec bar0 reg addrs */ +#define ACC_GPIO_STATUS 0x00 +#define ACC_CODEC_STATUS 0x08 +#define ACC_CODEC_CNTL 0x0C +#define ACC_IRQ_STATUS 0x12 +#define ACC_BM0_CMD 0x20 +#define ACC_BM1_CMD 0x28 +#define ACC_BM2_CMD 0x30 +#define ACC_BM3_CMD 0x38 +#define ACC_BM4_CMD 0x40 +#define ACC_BM5_CMD 0x48 +#define ACC_BM6_CMD 0x50 +#define ACC_BM7_CMD 0x58 +#define ACC_BM0_PRD 0x24 +#define ACC_BM1_PRD 0x2C +#define ACC_BM2_PRD 0x34 +#define ACC_BM3_PRD 0x3C +#define ACC_BM4_PRD 0x44 +#define ACC_BM5_PRD 0x4C +#define ACC_BM6_PRD 0x54 +#define ACC_BM7_PRD 0x5C +#define ACC_BM0_STATUS 0x21 +#define ACC_BM1_STATUS 0x29 +#define ACC_BM2_STATUS 0x31 +#define ACC_BM3_STATUS 0x39 +#define ACC_BM4_STATUS 0x41 +#define ACC_BM5_STATUS 0x49 +#define ACC_BM6_STATUS 0x51 +#define ACC_BM7_STATUS 0x59 +#define ACC_BM0_PNTR 0x60 +#define ACC_BM1_PNTR 0x64 +#define ACC_BM2_PNTR 0x68 +#define ACC_BM3_PNTR 0x6C +#define ACC_BM4_PNTR 0x70 +#define ACC_BM5_PNTR 0x74 +#define ACC_BM6_PNTR 0x78 +#define ACC_BM7_PNTR 0x7C +/* acc_codec bar0 reg bits */ +/* ACC_IRQ_STATUS */ +#define IRQ_STS 0 +#define WU_IRQ_STS 1 +#define BM0_IRQ_STS 2 +#define BM1_IRQ_STS 3 +#define BM2_IRQ_STS 4 +#define BM3_IRQ_STS 5 +#define BM4_IRQ_STS 6 +#define BM5_IRQ_STS 7 +#define BM6_IRQ_STS 8 +#define BM7_IRQ_STS 9 +/* ACC_BMX_STATUS */ +#define EOP (1<<0) +#define BM_EOP_ERR (1<<1) +/* ACC_BMX_CTL */ +#define BM_CTL_EN 0x00000001 +#define BM_CTL_PAUSE 0x00000011 +#define BM_CTL_DIS 0x00000000 +#define BM_CTL_BYTE_ORD_LE 0x00000000 +#define BM_CTL_BYTE_ORD_BE 0x00000100 +/* cs5535 specific ac97 codec register defines */ +#define CMD_MASK 0xFF00FFFF +#define CMD_NEW 0x00010000 +#define STS_NEW 0x00020000 +#define PRM_RDY_STS 0x00800000 +#define ACC_CODEC_CNTL_WR_CMD (~0x80000000) +#define ACC_CODEC_CNTL_RD_CMD 0x80000000 +#define PRD_JMP 0x2000 +#define PRD_EOP 0x4000 +#define PRD_EOT 0x8000 + +typedef struct _snd_cs5535audio cs5535audio_t; +typedef struct snd_cs5535audio_dma cs5535audio_dma_t; +typedef struct snd_cs5535audio_dma_ops cs5535audio_dma_ops_t; + +enum { CS5535AUDIO_DMA_PLAYBACK, CS5535AUDIO_DMA_CAPTURE, NUM_CS5535AUDIO_DMAS }; +struct snd_cs5535audio_dma_ops { + int type; + void (*enable_dma)(cs5535audio_t *cs5535au); + void (*disable_dma)(cs5535audio_t *cs5535au); + void (*pause_dma)(cs5535audio_t *cs5535au); + void (*setup_prd)(cs5535audio_t *cs5535au, u32 prd_addr); + u32 (*read_dma_pntr)(cs5535audio_t *cs5535au); +}; + +typedef struct cs5535audio_dma_desc { + u32 addr; + u16 size; + u16 ctlreserved; +} cs5535audio_dma_desc_t; + +struct snd_cs5535audio_dma { + const cs5535audio_dma_ops_t *ops; + struct snd_dma_buffer desc_buf; + snd_pcm_substream_t *substream; + unsigned int buf_addr, buf_bytes; + unsigned int period_bytes, periods; +}; + +struct _snd_cs5535audio { + snd_card_t *card; + ac97_t *ac97; + int irq; + struct pci_dev *pci; + unsigned long port; + spinlock_t reg_lock; + snd_pcm_substream_t *playback_substream; + snd_pcm_substream_t *capture_substream; + cs5535audio_dma_t dmas[NUM_CS5535AUDIO_DMAS]; +}; + +int __devinit snd_cs5535audio_pcm(cs5535audio_t *cs5535audio); +#endif /* __SOUND_CS5535AUDIO_H */ + diff --git a/sound/pci/cs5535audio/cs5535audio_pcm.c b/sound/pci/cs5535audio/cs5535audio_pcm.c new file mode 100644 index 000000000000..5802ed9d57be --- /dev/null +++ b/sound/pci/cs5535audio/cs5535audio_pcm.c @@ -0,0 +1,430 @@ +/* + * Driver for audio on multifunction CS5535 companion device + * Copyright (C) Jaya Kumar + * + * Based on Jaroslav Kysela and Takashi Iwai's examples. + * This work was sponsored by CIS(M) Sdn Bhd. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + * + * todo: add be fmt support, spdif, pm + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include "cs5535audio.h" + +static snd_pcm_hardware_t snd_cs5535audio_playback = +{ + .info = ( + SNDRV_PCM_INFO_MMAP | + SNDRV_PCM_INFO_INTERLEAVED | + SNDRV_PCM_INFO_BLOCK_TRANSFER | + SNDRV_PCM_INFO_MMAP_VALID | + SNDRV_PCM_INFO_PAUSE | + SNDRV_PCM_INFO_SYNC_START + ), + .formats = ( + SNDRV_PCM_FMTBIT_S16_LE + ), + .rates = ( + SNDRV_PCM_RATE_CONTINUOUS | + SNDRV_PCM_RATE_8000_48000 + ), + .rate_min = 4000, + .rate_max = 48000, + .channels_min = 2, + .channels_max = 2, + .buffer_bytes_max = (128*1024), + .period_bytes_min = 64, + .period_bytes_max = (64*1024 - 16), + .periods_min = 1, + .periods_max = CS5535AUDIO_MAX_DESCRIPTORS, + .fifo_size = 0, +}; + +static snd_pcm_hardware_t snd_cs5535audio_capture = +{ + .info = ( + SNDRV_PCM_INFO_MMAP | + SNDRV_PCM_INFO_INTERLEAVED | + SNDRV_PCM_INFO_BLOCK_TRANSFER | + SNDRV_PCM_INFO_MMAP_VALID | + SNDRV_PCM_INFO_SYNC_START + ), + .formats = ( + SNDRV_PCM_FMTBIT_S16_LE + ), + .rates = ( + SNDRV_PCM_RATE_CONTINUOUS | + SNDRV_PCM_RATE_8000_48000 + ), + .rate_min = 4000, + .rate_max = 48000, + .channels_min = 2, + .channels_max = 2, + .buffer_bytes_max = (128*1024), + .period_bytes_min = 64, + .period_bytes_max = (64*1024 - 16), + .periods_min = 1, + .periods_max = CS5535AUDIO_MAX_DESCRIPTORS, + .fifo_size = 0, +}; + +static int snd_cs5535audio_playback_open(snd_pcm_substream_t *substream) +{ + int err; + cs5535audio_t *cs5535au = snd_pcm_substream_chip(substream); + snd_pcm_runtime_t *runtime = substream->runtime; + + runtime->hw = snd_cs5535audio_playback; + cs5535au->playback_substream = substream; + runtime->private_data = &(cs5535au->dmas[CS5535AUDIO_DMA_PLAYBACK]); + snd_pcm_set_sync(substream); + if ((err = snd_pcm_hw_constraint_integer(runtime, + SNDRV_PCM_HW_PARAM_PERIODS)) < 0) + return err; + + return 0; +} + +static int snd_cs5535audio_playback_close(snd_pcm_substream_t *substream) +{ + return 0; +} + +#define CS5535AUDIO_DESC_LIST_SIZE \ + PAGE_ALIGN(CS5535AUDIO_MAX_DESCRIPTORS * sizeof(cs5535audio_dma_desc_t)) + +static int cs5535audio_build_dma_packets(cs5535audio_t *cs5535au, + cs5535audio_dma_t *dma, + snd_pcm_substream_t *substream, + unsigned int periods, + unsigned int period_bytes) +{ + unsigned int i; + u32 addr, desc_addr, jmpprd_addr; + cs5535audio_dma_desc_t *lastdesc; + + if (periods > CS5535AUDIO_MAX_DESCRIPTORS) + return -ENOMEM; + + if (dma->desc_buf.area == NULL) { + if (snd_dma_alloc_pages(SNDRV_DMA_TYPE_DEV, + snd_dma_pci_data(cs5535au->pci), + CS5535AUDIO_DESC_LIST_SIZE+1, + &dma->desc_buf) < 0) + return -ENOMEM; + dma->period_bytes = dma->periods = 0; + } + + if (dma->periods == periods && dma->period_bytes == period_bytes) + return 0; + + /* the u32 cast is okay because in snd*create we succesfully told + pci alloc that we're only 32 bit capable so the uppper will be 0 */ + addr = (u32) substream->runtime->dma_addr; + desc_addr = (u32) dma->desc_buf.addr; + for (i = 0; i < periods; i++) { + cs5535audio_dma_desc_t *desc = + &((cs5535audio_dma_desc_t *) dma->desc_buf.area)[i]; + desc->addr = cpu_to_le32(addr); + desc->size = period_bytes; + desc->ctlreserved = PRD_EOP; + desc_addr += sizeof(cs5535audio_dma_desc_t); + addr += period_bytes; + } + /* we reserved one dummy descriptor at the end to do the PRD jump */ + lastdesc = &((cs5535audio_dma_desc_t *) dma->desc_buf.area)[periods]; + lastdesc->addr = cpu_to_le32((u32) dma->desc_buf.addr); + lastdesc->size = 0; + lastdesc->ctlreserved = PRD_JMP; + jmpprd_addr = cpu_to_le32(lastdesc->addr + + (sizeof(cs5535audio_dma_desc_t)*periods)); + + dma->period_bytes = period_bytes; + dma->periods = periods; + spin_lock_irq(&cs5535au->reg_lock); + dma->ops->disable_dma(cs5535au); + dma->ops->setup_prd(cs5535au, jmpprd_addr); + spin_unlock_irq(&cs5535au->reg_lock); + return 0; +} + +static void cs5535audio_playback_enable_dma(cs5535audio_t *cs5535au) +{ + cs_writeb(cs5535au, ACC_BM0_CMD, BM_CTL_EN); +} + +static void cs5535audio_playback_disable_dma(cs5535audio_t *cs5535au) +{ + cs_writeb(cs5535au, ACC_BM0_CMD, 0); +} + +static void cs5535audio_playback_pause_dma(cs5535audio_t *cs5535au) +{ + cs_writeb(cs5535au, ACC_BM0_CMD, BM_CTL_PAUSE); +} + +static void cs5535audio_playback_setup_prd(cs5535audio_t *cs5535au, + u32 prd_addr) +{ + cs_writel(cs5535au, ACC_BM0_PRD, prd_addr); +} + +static u32 cs5535audio_playback_read_dma_pntr(cs5535audio_t *cs5535au) +{ + return cs_readl(cs5535au, ACC_BM0_PNTR); +} + +static void cs5535audio_capture_enable_dma(cs5535audio_t *cs5535au) +{ + cs_writeb(cs5535au, ACC_BM1_CMD, BM_CTL_EN); +} + +static void cs5535audio_capture_disable_dma(cs5535audio_t *cs5535au) +{ + cs_writeb(cs5535au, ACC_BM1_CMD, 0); +} + +static void cs5535audio_capture_pause_dma(cs5535audio_t *cs5535au) +{ + cs_writeb(cs5535au, ACC_BM1_CMD, BM_CTL_PAUSE); +} + +static void cs5535audio_capture_setup_prd(cs5535audio_t *cs5535au, + u32 prd_addr) +{ + cs_writel(cs5535au, ACC_BM1_PRD, prd_addr); +} + +static u32 cs5535audio_capture_read_dma_pntr(cs5535audio_t *cs5535au) +{ + return cs_readl(cs5535au, ACC_BM1_PNTR); +} + +static void cs5535audio_clear_dma_packets(cs5535audio_t *cs5535au, + cs5535audio_dma_t *dma, + snd_pcm_substream_t *substream) +{ + snd_dma_free_pages(&dma->desc_buf); + dma->desc_buf.area = NULL; +} + +static int snd_cs5535audio_hw_params(snd_pcm_substream_t *substream, + snd_pcm_hw_params_t *hw_params) +{ + cs5535audio_t *cs5535au = snd_pcm_substream_chip(substream); + cs5535audio_dma_t *dma = substream->runtime->private_data; + int err; + + err = snd_pcm_lib_malloc_pages(substream, + params_buffer_bytes(hw_params)); + if (err < 0) + return err; + dma->buf_addr = substream->runtime->dma_addr; + dma->buf_bytes = params_buffer_bytes(hw_params); + + err = cs5535audio_build_dma_packets(cs5535au, dma, substream, + params_periods(hw_params), + params_period_bytes(hw_params)); + return err; +} + +static int snd_cs5535audio_hw_free(snd_pcm_substream_t *substream) +{ + cs5535audio_t *cs5535au = snd_pcm_substream_chip(substream); + cs5535audio_dma_t *dma = substream->runtime->private_data; + + cs5535audio_clear_dma_packets(cs5535au, dma, substream); + return snd_pcm_lib_free_pages(substream); +} + +static int snd_cs5535audio_playback_prepare(snd_pcm_substream_t *substream) +{ + cs5535audio_t *cs5535au = snd_pcm_substream_chip(substream); + return snd_ac97_set_rate(cs5535au->ac97, AC97_PCM_FRONT_DAC_RATE, + substream->runtime->rate); +} + +static int snd_cs5535audio_trigger(snd_pcm_substream_t *substream, int cmd) +{ + cs5535audio_t *cs5535au = snd_pcm_substream_chip(substream); + cs5535audio_dma_t *dma = substream->runtime->private_data; + + switch (cmd) { + case SNDRV_PCM_TRIGGER_PAUSE_PUSH: + spin_lock_irq(&cs5535au->reg_lock); + dma->ops->pause_dma(cs5535au); + spin_unlock_irq(&cs5535au->reg_lock); + break; + case SNDRV_PCM_TRIGGER_PAUSE_RELEASE: + spin_lock_irq(&cs5535au->reg_lock); + dma->ops->enable_dma(cs5535au); + spin_unlock_irq(&cs5535au->reg_lock); + break; + case SNDRV_PCM_TRIGGER_START: + spin_lock_irq(&cs5535au->reg_lock); + dma->ops->enable_dma(cs5535au); + spin_unlock_irq(&cs5535au->reg_lock); + break; + case SNDRV_PCM_TRIGGER_STOP: + spin_lock_irq(&cs5535au->reg_lock); + dma->ops->disable_dma(cs5535au); + spin_unlock_irq(&cs5535au->reg_lock); + break; + default: + snd_printk(KERN_ERR "unhandled trigger\n"); + return -EINVAL; + break; + } + return 0; +} + +static snd_pcm_uframes_t snd_cs5535audio_pcm_pointer(snd_pcm_substream_t + *substream) +{ + cs5535audio_t *cs5535au = snd_pcm_substream_chip(substream); + u32 curdma; + cs5535audio_dma_t *dma; + + dma = substream->runtime->private_data; + curdma = dma->ops->read_dma_pntr(cs5535au); + if (curdma < dma->buf_addr) { + snd_printk(KERN_ERR "curdma=%x < %x bufaddr.\n", + curdma, dma->buf_addr); + return 0; + } + curdma -= dma->buf_addr; + if (curdma >= dma->buf_bytes) { + snd_printk(KERN_ERR "diff=%x >= %x buf_bytes.\n", + curdma, dma->buf_bytes); + return 0; + } + return bytes_to_frames(substream->runtime, curdma); +} + +static int snd_cs5535audio_capture_open(snd_pcm_substream_t *substream) +{ + int err; + cs5535audio_t *cs5535au = snd_pcm_substream_chip(substream); + snd_pcm_runtime_t *runtime = substream->runtime; + + runtime->hw = snd_cs5535audio_capture; + cs5535au->capture_substream = substream; + runtime->private_data = &(cs5535au->dmas[CS5535AUDIO_DMA_CAPTURE]); + snd_pcm_set_sync(substream); + if ((err = snd_pcm_hw_constraint_integer(runtime, + SNDRV_PCM_HW_PARAM_PERIODS)) < 0) + return err; + return 0; +} + +static int snd_cs5535audio_capture_close(snd_pcm_substream_t *substream) +{ + return 0; +} + +static int snd_cs5535audio_capture_prepare(snd_pcm_substream_t *substream) +{ + cs5535audio_t *cs5535au = snd_pcm_substream_chip(substream); + return snd_ac97_set_rate(cs5535au->ac97, AC97_PCM_LR_ADC_RATE, + substream->runtime->rate); +} + +static snd_pcm_ops_t snd_cs5535audio_playback_ops = { + .open = snd_cs5535audio_playback_open, + .close = snd_cs5535audio_playback_close, + .ioctl = snd_pcm_lib_ioctl, + .hw_params = snd_cs5535audio_hw_params, + .hw_free = snd_cs5535audio_hw_free, + .prepare = snd_cs5535audio_playback_prepare, + .trigger = snd_cs5535audio_trigger, + .pointer = snd_cs5535audio_pcm_pointer, +}; + +static snd_pcm_ops_t snd_cs5535audio_capture_ops = { + .open = snd_cs5535audio_capture_open, + .close = snd_cs5535audio_capture_close, + .ioctl = snd_pcm_lib_ioctl, + .hw_params = snd_cs5535audio_hw_params, + .hw_free = snd_cs5535audio_hw_free, + .prepare = snd_cs5535audio_capture_prepare, + .trigger = snd_cs5535audio_trigger, + .pointer = snd_cs5535audio_pcm_pointer, +}; + +static void snd_cs5535audio_pcm_free(snd_pcm_t *pcm) +{ + snd_pcm_lib_preallocate_free_for_all(pcm); +} + +static cs5535audio_dma_ops_t snd_cs5535audio_playback_dma_ops = { + .type = CS5535AUDIO_DMA_PLAYBACK, + .enable_dma = cs5535audio_playback_enable_dma, + .disable_dma = cs5535audio_playback_disable_dma, + .setup_prd = cs5535audio_playback_setup_prd, + .pause_dma = cs5535audio_playback_pause_dma, + .read_dma_pntr = cs5535audio_playback_read_dma_pntr, +}; + +static cs5535audio_dma_ops_t snd_cs5535audio_capture_dma_ops = { + .type = CS5535AUDIO_DMA_CAPTURE, + .enable_dma = cs5535audio_capture_enable_dma, + .disable_dma = cs5535audio_capture_disable_dma, + .setup_prd = cs5535audio_capture_setup_prd, + .pause_dma = cs5535audio_capture_pause_dma, + .read_dma_pntr = cs5535audio_capture_read_dma_pntr, +}; + +int __devinit snd_cs5535audio_pcm(cs5535audio_t *cs5535au) +{ + snd_pcm_t *pcm; + int err; + + err = snd_pcm_new(cs5535au->card, "CS5535 Audio", 0, 1, 1, &pcm); + if (err < 0) + return err; + + cs5535au->dmas[CS5535AUDIO_DMA_PLAYBACK].ops = + &snd_cs5535audio_playback_dma_ops; + cs5535au->dmas[CS5535AUDIO_DMA_CAPTURE].ops = + &snd_cs5535audio_capture_dma_ops; + snd_pcm_set_ops(pcm, SNDRV_PCM_STREAM_PLAYBACK, + &snd_cs5535audio_playback_ops); + snd_pcm_set_ops(pcm, SNDRV_PCM_STREAM_CAPTURE, + &snd_cs5535audio_capture_ops); + + pcm->private_data = cs5535au; + pcm->private_free = snd_cs5535audio_pcm_free; + pcm->info_flags = 0; + strcpy(pcm->name, "CS5535 Audio"); + + snd_pcm_lib_preallocate_pages_for_all(pcm, SNDRV_DMA_TYPE_DEV, + snd_dma_pci_data(cs5535au->pci), + 64*1024, 128*1024); + + return 0; +} + -- cgit v1.2.3-71-gd317 From 4c98cfef2efa6b6662ac28c4f0069964bbd9fdf9 Mon Sep 17 00:00:00 2001 From: Takashi Iwai Date: Tue, 29 Nov 2005 09:09:32 +0100 Subject: [ALSA] PATCH] Add PM support to PnP drivers Add suspend/resume callback to pnp_driver and pnp_card_driver. Signed-off-by: Takashi Iwai --- drivers/pnp/card.c | 25 +++++++++++++++++++++++++ drivers/pnp/driver.c | 20 ++++++++++++++++++++ include/linux/pnp.h | 5 +++++ 3 files changed, 50 insertions(+) (limited to 'include/linux') diff --git a/drivers/pnp/card.c b/drivers/pnp/card.c index bd7c966ea2d7..0ecbe4edbec1 100644 --- a/drivers/pnp/card.c +++ b/drivers/pnp/card.c @@ -69,6 +69,7 @@ static int card_probe(struct pnp_card * card, struct pnp_card_driver * drv) return 0; clink->card = card; clink->driver = drv; + clink->pm_state = PMSG_ON; if (drv->probe) { if (drv->probe(clink, id)>=0) return 1; @@ -333,6 +334,28 @@ void pnp_release_card_device(struct pnp_dev * dev) up_write(&dev->dev.bus->subsys.rwsem); } +/* + * suspend/resume callbacks + */ +static int card_suspend(struct pnp_dev *dev, pm_message_t state) +{ + struct pnp_card_link *link = dev->card_link; + if (link->pm_state.event == state.event) + return 0; + link->pm_state = state; + return link->driver->suspend(link, state); +} + +static int card_resume(struct pnp_dev *dev) +{ + struct pnp_card_link *link = dev->card_link; + if (link->pm_state.event == PM_EVENT_ON) + return 0; + link->pm_state = PMSG_ON; + link->driver->resume(link); + return 0; +} + /** * pnp_register_card_driver - registers a PnP card driver with the PnP Layer * @drv: pointer to the driver to register @@ -348,6 +371,8 @@ int pnp_register_card_driver(struct pnp_card_driver * drv) drv->link.flags = drv->flags; drv->link.probe = NULL; drv->link.remove = &card_remove_first; + drv->link.suspend = drv->suspend ? card_suspend : NULL; + drv->link.resume = drv->resume ? card_resume : NULL; spin_lock(&pnp_lock); list_add_tail(&drv->global_list, &pnp_card_drivers); diff --git a/drivers/pnp/driver.c b/drivers/pnp/driver.c index d3ccce706ab4..ea2cb9a8b21d 100644 --- a/drivers/pnp/driver.c +++ b/drivers/pnp/driver.c @@ -146,10 +146,30 @@ static int pnp_bus_match(struct device *dev, struct device_driver *drv) return 1; } +static int pnp_bus_suspend(struct device *dev, pm_message_t state) +{ + struct pnp_dev * pnp_dev = to_pnp_dev(dev); + struct pnp_driver * pnp_drv = pnp_dev->driver; + + if (pnp_drv && pnp_drv->suspend) + return pnp_drv->suspend(pnp_dev, state); + return 0; +} + +static void pnp_bus_resume(struct device *dev) +{ + struct pnp_dev * pnp_dev = to_pnp_dev(dev); + struct pnp_driver * pnp_drv = pnp_dev->driver; + + if (pnp_drv && pnp_drv->resume) + pnp_drv->resume(pnp_dev); +} struct bus_type pnp_bus_type = { .name = "pnp", .match = pnp_bus_match, + .suspend = pnp_bus_suspend, + .resume = pnp_bus_resume, }; diff --git a/include/linux/pnp.h b/include/linux/pnp.h index 584d57cb393a..472319fcf631 100644 --- a/include/linux/pnp.h +++ b/include/linux/pnp.h @@ -162,6 +162,7 @@ struct pnp_card_link { struct pnp_card * card; struct pnp_card_driver * driver; void * driver_data; + pm_message_t pm_state; }; static inline void *pnp_get_card_drvdata (struct pnp_card_link *pcard) @@ -294,6 +295,8 @@ struct pnp_driver { unsigned int flags; int (*probe) (struct pnp_dev *dev, const struct pnp_device_id *dev_id); void (*remove) (struct pnp_dev *dev); + int (*suspend) (struct pnp_dev *dev, pm_message_t state); + int (*resume) (struct pnp_dev *dev); struct device_driver driver; }; @@ -306,6 +309,8 @@ struct pnp_card_driver { unsigned int flags; int (*probe) (struct pnp_card_link *card, const struct pnp_card_device_id *card_id); void (*remove) (struct pnp_card_link *card); + int (*suspend) (struct pnp_card_link *card, pm_message_t state); + int (*resume) (struct pnp_card_link *card); struct pnp_driver link; }; -- cgit v1.2.3-71-gd317 From 68094e3251a664ee1389fcf179497237cbf78331 Mon Sep 17 00:00:00 2001 From: Pierre Ossman Date: Tue, 29 Nov 2005 09:09:32 +0100 Subject: [ALSA] [PATCH] alsa: Improved PnP suspend support Also use the PnP functions to start/stop the devices during the suspend so that drivers will not have to duplicate this code. Cc: Adam Belay Cc: Jaroslav Kysela Cc: Takashi Iwai Signed-off-by: Pierre Ossman Signed-off-by: Andrew Morton Signed-off-by: Takashi Iwai --- drivers/pnp/driver.c | 37 ++++++++++++++++++++---- drivers/pnp/manager.c | 78 ++++++++++++++++++++++++++++++++++++++------------- include/linux/pnp.h | 4 +++ 3 files changed, 95 insertions(+), 24 deletions(-) (limited to 'include/linux') diff --git a/drivers/pnp/driver.c b/drivers/pnp/driver.c index ea2cb9a8b21d..15fb758a9e52 100644 --- a/drivers/pnp/driver.c +++ b/drivers/pnp/driver.c @@ -150,19 +150,46 @@ static int pnp_bus_suspend(struct device *dev, pm_message_t state) { struct pnp_dev * pnp_dev = to_pnp_dev(dev); struct pnp_driver * pnp_drv = pnp_dev->driver; + int error; + + if (!pnp_drv) + return 0; + + if (pnp_drv->suspend) { + error = pnp_drv->suspend(pnp_dev, state); + if (error) + return error; + } + + if (!(pnp_drv->flags & PNP_DRIVER_RES_DO_NOT_CHANGE) && + pnp_can_disable(pnp_dev)) { + error = pnp_stop_dev(pnp_dev); + if (error) + return error; + } - if (pnp_drv && pnp_drv->suspend) - return pnp_drv->suspend(pnp_dev, state); return 0; } -static void pnp_bus_resume(struct device *dev) +static int pnp_bus_resume(struct device *dev) { struct pnp_dev * pnp_dev = to_pnp_dev(dev); struct pnp_driver * pnp_drv = pnp_dev->driver; + int error; + + if (!pnp_drv) + return 0; + + if (!(pnp_drv->flags & PNP_DRIVER_RES_DO_NOT_CHANGE)) { + error = pnp_start_dev(pnp_dev); + if (error) + return error; + } - if (pnp_drv && pnp_drv->resume) - pnp_drv->resume(pnp_dev); + if (pnp_drv->resume) + return pnp_drv->resume(pnp_dev); + + return 0; } struct bus_type pnp_bus_type = { diff --git a/drivers/pnp/manager.c b/drivers/pnp/manager.c index 261668618b2d..c4256aa32bcb 100644 --- a/drivers/pnp/manager.c +++ b/drivers/pnp/manager.c @@ -469,6 +469,53 @@ int pnp_auto_config_dev(struct pnp_dev *dev) return -EBUSY; } +/** + * pnp_start_dev - low-level start of the PnP device + * @dev: pointer to the desired device + * + * assumes that resources have alread been allocated + */ + +int pnp_start_dev(struct pnp_dev *dev) +{ + if (!pnp_can_write(dev)) { + pnp_info("Device %s does not supported activation.", dev->dev.bus_id); + return -EINVAL; + } + + if (dev->protocol->set(dev, &dev->res)<0) { + pnp_err("Failed to activate device %s.", dev->dev.bus_id); + return -EIO; + } + + pnp_info("Device %s activated.", dev->dev.bus_id); + + return 0; +} + +/** + * pnp_stop_dev - low-level disable of the PnP device + * @dev: pointer to the desired device + * + * does not free resources + */ + +int pnp_stop_dev(struct pnp_dev *dev) +{ + if (!pnp_can_disable(dev)) { + pnp_info("Device %s does not supported disabling.", dev->dev.bus_id); + return -EINVAL; + } + if (dev->protocol->disable(dev)<0) { + pnp_err("Failed to disable device %s.", dev->dev.bus_id); + return -EIO; + } + + pnp_info("Device %s disabled.", dev->dev.bus_id); + + return 0; +} + /** * pnp_activate_dev - activates a PnP device for use * @dev: pointer to the desired device @@ -477,6 +524,8 @@ int pnp_auto_config_dev(struct pnp_dev *dev) */ int pnp_activate_dev(struct pnp_dev *dev) { + int error; + if (!dev) return -EINVAL; if (dev->active) { @@ -487,18 +536,11 @@ int pnp_activate_dev(struct pnp_dev *dev) if (pnp_auto_config_dev(dev)) return -EBUSY; - if (!pnp_can_write(dev)) { - pnp_info("Device %s does not supported activation.", dev->dev.bus_id); - return -EINVAL; - } - - if (dev->protocol->set(dev, &dev->res)<0) { - pnp_err("Failed to activate device %s.", dev->dev.bus_id); - return -EIO; - } + error = pnp_start_dev(dev); + if (error) + return error; dev->active = 1; - pnp_info("Device %s activated.", dev->dev.bus_id); return 1; } @@ -511,23 +553,19 @@ int pnp_activate_dev(struct pnp_dev *dev) */ int pnp_disable_dev(struct pnp_dev *dev) { + int error; + if (!dev) return -EINVAL; if (!dev->active) { return 0; /* the device is already disabled */ } - if (!pnp_can_disable(dev)) { - pnp_info("Device %s does not supported disabling.", dev->dev.bus_id); - return -EINVAL; - } - if (dev->protocol->disable(dev)<0) { - pnp_err("Failed to disable device %s.", dev->dev.bus_id); - return -EIO; - } + error = pnp_stop_dev(dev); + if (error) + return error; dev->active = 0; - pnp_info("Device %s disabled.", dev->dev.bus_id); /* release the resources so that other devices can use them */ down(&pnp_res_mutex); @@ -558,6 +596,8 @@ EXPORT_SYMBOL(pnp_manual_config_dev); #if 0 EXPORT_SYMBOL(pnp_auto_config_dev); #endif +EXPORT_SYMBOL(pnp_start_dev); +EXPORT_SYMBOL(pnp_stop_dev); EXPORT_SYMBOL(pnp_activate_dev); EXPORT_SYMBOL(pnp_disable_dev); EXPORT_SYMBOL(pnp_resource_change); diff --git a/include/linux/pnp.h b/include/linux/pnp.h index 472319fcf631..93b0959eb40f 100644 --- a/include/linux/pnp.h +++ b/include/linux/pnp.h @@ -385,6 +385,8 @@ void pnp_init_resource_table(struct pnp_resource_table *table); int pnp_manual_config_dev(struct pnp_dev *dev, struct pnp_resource_table *res, int mode); int pnp_auto_config_dev(struct pnp_dev *dev); int pnp_validate_config(struct pnp_dev *dev); +int pnp_start_dev(struct pnp_dev *dev); +int pnp_stop_dev(struct pnp_dev *dev); int pnp_activate_dev(struct pnp_dev *dev); int pnp_disable_dev(struct pnp_dev *dev); void pnp_resource_change(struct resource *resource, unsigned long start, unsigned long size); @@ -428,6 +430,8 @@ static inline void pnp_init_resource_table(struct pnp_resource_table *table) { } static inline int pnp_manual_config_dev(struct pnp_dev *dev, struct pnp_resource_table *res, int mode) { return -ENODEV; } static inline int pnp_auto_config_dev(struct pnp_dev *dev) { return -ENODEV; } static inline int pnp_validate_config(struct pnp_dev *dev) { return -ENODEV; } +static inline int pnp_start_dev(struct pnp_dev *dev) { return -ENODEV; } +static inline int pnp_stop_dev(struct pnp_dev *dev) { return -ENODEV; } static inline int pnp_activate_dev(struct pnp_dev *dev) { return -ENODEV; } static inline int pnp_disable_dev(struct pnp_dev *dev) { return -ENODEV; } static inline void pnp_resource_change(struct resource *resource, unsigned long start, unsigned long size) { } -- cgit v1.2.3-71-gd317 From 4d399cae3f5ec1f59b9e88084aae09c4f00760c9 Mon Sep 17 00:00:00 2001 From: Adrian Bunk Date: Tue, 3 Jan 2006 13:19:13 +0100 Subject: remove pointers to the defunct UDF mailing list This patch removes pointers to the defunct UDF mailing list. Signed-off-by: Adrian Bunk --- MAINTAINERS | 1 - fs/udf/balloc.c | 5 ----- fs/udf/crc.c | 5 ----- fs/udf/dir.c | 5 ----- fs/udf/directory.c | 5 ----- fs/udf/file.c | 5 ----- fs/udf/fsync.c | 5 ----- fs/udf/ialloc.c | 5 ----- fs/udf/inode.c | 5 ----- fs/udf/lowlevel.c | 5 ----- fs/udf/misc.c | 5 ----- fs/udf/namei.c | 5 ----- fs/udf/partition.c | 5 ----- fs/udf/super.c | 5 ----- fs/udf/symlink.c | 5 ----- fs/udf/truncate.c | 5 ----- fs/udf/unicode.c | 5 ----- include/linux/udf_fs.h | 5 ----- include/linux/udf_fs_i.h | 5 ----- include/linux/udf_fs_sb.h | 5 ----- 20 files changed, 96 deletions(-) (limited to 'include/linux') diff --git a/MAINTAINERS b/MAINTAINERS index 6af683025ae0..bbe5f04f59ec 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -2587,7 +2587,6 @@ S: Maintained UDF FILESYSTEM P: Ben Fennema M: bfennema@falcon.csc.calpoly.edu -L: linux_udf@hpesjro.fc.hp.com W: http://linux-udf.sourceforge.net S: Maintained diff --git a/fs/udf/balloc.c b/fs/udf/balloc.c index b9ded26b10a9..6598a5037ac8 100644 --- a/fs/udf/balloc.c +++ b/fs/udf/balloc.c @@ -4,11 +4,6 @@ * PURPOSE * Block allocation handling routines for the OSTA-UDF(tm) filesystem. * - * CONTACTS - * E-mail regarding any portion of the Linux UDF file system should be - * directed to the development team mailing list (run by majordomo): - * linux_udf@hpesjro.fc.hp.com - * * COPYRIGHT * This file is distributed under the terms of the GNU General Public * License (GPL). Copies of the GPL can be obtained from: diff --git a/fs/udf/crc.c b/fs/udf/crc.c index d95c6e38a455..1b82a4adc2f7 100644 --- a/fs/udf/crc.c +++ b/fs/udf/crc.c @@ -14,11 +14,6 @@ * * AT&T gives permission for the free use of the CRC source code. * - * CONTACTS - * E-mail regarding any portion of the Linux UDF file system should be - * directed to the development team mailing list (run by majordomo): - * linux_udf@hpesjro.fc.hp.com - * * COPYRIGHT * This file is distributed under the terms of the GNU General Public * License (GPL). Copies of the GPL can be obtained from: diff --git a/fs/udf/dir.c b/fs/udf/dir.c index 82440b731142..f5222527fe39 100644 --- a/fs/udf/dir.c +++ b/fs/udf/dir.c @@ -4,11 +4,6 @@ * PURPOSE * Directory handling routines for the OSTA-UDF(tm) filesystem. * - * CONTACTS - * E-mail regarding any portion of the Linux UDF file system should be - * directed to the development team mailing list (run by majordomo): - * linux_udf@hpesjro.fc.hp.com - * * COPYRIGHT * This file is distributed under the terms of the GNU General Public * License (GPL). Copies of the GPL can be obtained from: diff --git a/fs/udf/directory.c b/fs/udf/directory.c index 9a61ecc5451b..fe751a2a0e47 100644 --- a/fs/udf/directory.c +++ b/fs/udf/directory.c @@ -4,11 +4,6 @@ * PURPOSE * Directory related functions * - * CONTACTS - * E-mail regarding any portion of the Linux UDF file system should be - * directed to the development team mailing list (run by majordomo): - * linux_udf@hpesjro.fc.hp.com - * * COPYRIGHT * This file is distributed under the terms of the GNU General Public * License (GPL). Copies of the GPL can be obtained from: diff --git a/fs/udf/file.c b/fs/udf/file.c index 01f520c71dc1..8a388289040d 100644 --- a/fs/udf/file.c +++ b/fs/udf/file.c @@ -4,11 +4,6 @@ * PURPOSE * File handling routines for the OSTA-UDF(tm) filesystem. * - * CONTACTS - * E-mail regarding any portion of the Linux UDF file system should be - * directed to the development team mailing list (run by majordomo): - * linux_udf@hpesjro.fc.hp.com - * * COPYRIGHT * This file is distributed under the terms of the GNU General Public * License (GPL). Copies of the GPL can be obtained from: diff --git a/fs/udf/fsync.c b/fs/udf/fsync.c index 2dde6b888c2b..5887d78cde43 100644 --- a/fs/udf/fsync.c +++ b/fs/udf/fsync.c @@ -4,11 +4,6 @@ * PURPOSE * Fsync handling routines for the OSTA-UDF(tm) filesystem. * - * CONTACTS - * E-mail regarding any portion of the Linux UDF file system should be - * directed to the development team mailing list (run by majordomo): - * linux_udf@hpesjro.fc.hp.com - * * COPYRIGHT * This file is distributed under the terms of the GNU General Public * License (GPL). Copies of the GPL can be obtained from: diff --git a/fs/udf/ialloc.c b/fs/udf/ialloc.c index a7e5d40f1ebc..c9b707b470ca 100644 --- a/fs/udf/ialloc.c +++ b/fs/udf/ialloc.c @@ -4,11 +4,6 @@ * PURPOSE * Inode allocation handling routines for the OSTA-UDF(tm) filesystem. * - * CONTACTS - * E-mail regarding any portion of the Linux UDF file system should be - * directed to the development team mailing list (run by majordomo): - * linux_udf@hpesjro.fc.hp.com - * * COPYRIGHT * This file is distributed under the terms of the GNU General Public * License (GPL). Copies of the GPL can be obtained from: diff --git a/fs/udf/inode.c b/fs/udf/inode.c index b83890beaaac..4014f17d382e 100644 --- a/fs/udf/inode.c +++ b/fs/udf/inode.c @@ -4,11 +4,6 @@ * PURPOSE * Inode handling routines for the OSTA-UDF(tm) filesystem. * - * CONTACTS - * E-mail regarding any portion of the Linux UDF file system should be - * directed to the development team mailing list (run by majordomo): - * linux_udf@hpesjro.fc.hp.com - * * COPYRIGHT * This file is distributed under the terms of the GNU General Public * License (GPL). Copies of the GPL can be obtained from: diff --git a/fs/udf/lowlevel.c b/fs/udf/lowlevel.c index 2da5087dfe05..084216107667 100644 --- a/fs/udf/lowlevel.c +++ b/fs/udf/lowlevel.c @@ -4,11 +4,6 @@ * PURPOSE * Low Level Device Routines for the UDF filesystem * - * CONTACTS - * E-mail regarding any portion of the Linux UDF file system should be - * directed to the development team mailing list (run by majordomo): - * linux_udf@hpesjro.fc.hp.com - * * COPYRIGHT * This file is distributed under the terms of the GNU General Public * License (GPL). Copies of the GPL can be obtained from: diff --git a/fs/udf/misc.c b/fs/udf/misc.c index fd321f9ace83..cc8ca3254db1 100644 --- a/fs/udf/misc.c +++ b/fs/udf/misc.c @@ -4,11 +4,6 @@ * PURPOSE * Miscellaneous routines for the OSTA-UDF(tm) filesystem. * - * CONTACTS - * E-mail regarding any portion of the Linux UDF file system should be - * directed to the development team mailing list (run by majordomo): - * linux_udf@hpesjro.fc.hp.com - * * COPYRIGHT * This file is distributed under the terms of the GNU General Public * License (GPL). Copies of the GPL can be obtained from: diff --git a/fs/udf/namei.c b/fs/udf/namei.c index ac191ed7df0a..ca732e79c48b 100644 --- a/fs/udf/namei.c +++ b/fs/udf/namei.c @@ -4,11 +4,6 @@ * PURPOSE * Inode name handling routines for the OSTA-UDF(tm) filesystem. * - * CONTACTS - * E-mail regarding any portion of the Linux UDF file system should be - * directed to the development team mailing list (run by majordomo): - * linux_udf@hpesjro.fc.hp.com - * * COPYRIGHT * This file is distributed under the terms of the GNU General Public * License (GPL). Copies of the GPL can be obtained from: diff --git a/fs/udf/partition.c b/fs/udf/partition.c index 4d36f264be0d..dabf2b841db8 100644 --- a/fs/udf/partition.c +++ b/fs/udf/partition.c @@ -4,11 +4,6 @@ * PURPOSE * Partition handling routines for the OSTA-UDF(tm) filesystem. * - * CONTACTS - * E-mail regarding any portion of the Linux UDF file system should be - * directed to the development team mailing list (run by majordomo): - * linux_udf@hpesjro.fc.hp.com - * * COPYRIGHT * This file is distributed under the terms of the GNU General Public * License (GPL). Copies of the GPL can be obtained from: diff --git a/fs/udf/super.c b/fs/udf/super.c index 15bd4f24c5b7..4a6f49adc609 100644 --- a/fs/udf/super.c +++ b/fs/udf/super.c @@ -14,11 +14,6 @@ * http://www.ecma.ch/ * http://www.iso.org/ * - * CONTACTS - * E-mail regarding any portion of the Linux UDF file system should be - * directed to the development team mailing list (run by majordomo): - * linux_udf@hpesjro.fc.hp.com - * * COPYRIGHT * This file is distributed under the terms of the GNU General Public * License (GPL). Copies of the GPL can be obtained from: diff --git a/fs/udf/symlink.c b/fs/udf/symlink.c index 43f3051ef756..674bb40edc83 100644 --- a/fs/udf/symlink.c +++ b/fs/udf/symlink.c @@ -4,11 +4,6 @@ * PURPOSE * Symlink handling routines for the OSTA-UDF(tm) filesystem. * - * CONTACTS - * E-mail regarding any portion of the Linux UDF file system should be - * directed to the development team mailing list (run by majordomo): - * linux_udf@hpesjro.fc.hp.com - * * COPYRIGHT * This file is distributed under the terms of the GNU General Public * License (GPL). Copies of the GPL can be obtained from: diff --git a/fs/udf/truncate.c b/fs/udf/truncate.c index 7dc8a5572ca1..e1b0e8cfecb4 100644 --- a/fs/udf/truncate.c +++ b/fs/udf/truncate.c @@ -4,11 +4,6 @@ * PURPOSE * Truncate handling routines for the OSTA-UDF(tm) filesystem. * - * CONTACTS - * E-mail regarding any portion of the Linux UDF file system should be - * directed to the development team mailing list (run by majordomo): - * linux_udf@hpesjro.fc.hp.com - * * COPYRIGHT * This file is distributed under the terms of the GNU General Public * License (GPL). Copies of the GPL can be obtained from: diff --git a/fs/udf/unicode.c b/fs/udf/unicode.c index 5a80efd8debc..706c92e1dcc9 100644 --- a/fs/udf/unicode.c +++ b/fs/udf/unicode.c @@ -11,11 +11,6 @@ * UTF-8 is explained in the IETF RFC XXXX. * ftp://ftp.internic.net/rfc/rfcxxxx.txt * - * CONTACTS - * E-mail regarding any portion of the Linux UDF file system should be - * directed to the development team's mailing list (run by majordomo): - * linux_udf@hpesjro.fc.hp.com - * * COPYRIGHT * This file is distributed under the terms of the GNU General Public * License (GPL). Copies of the GPL can be obtained from: diff --git a/include/linux/udf_fs.h b/include/linux/udf_fs.h index 46e2bb945353..36c684e1b110 100644 --- a/include/linux/udf_fs.h +++ b/include/linux/udf_fs.h @@ -13,11 +13,6 @@ * http://www.osta.org/ * http://www.ecma.ch/ * http://www.iso.org/ * - * CONTACTS - * E-mail regarding any portion of the Linux UDF file system should be - * directed to the development team mailing list (run by majordomo): - * linux_udf@hpesjro.fc.hp.com - * * COPYRIGHT * This file is distributed under the terms of the GNU General Public * License (GPL). Copies of the GPL can be obtained from: diff --git a/include/linux/udf_fs_i.h b/include/linux/udf_fs_i.h index 62b15a4214e6..1e7508420fcf 100644 --- a/include/linux/udf_fs_i.h +++ b/include/linux/udf_fs_i.h @@ -3,11 +3,6 @@ * * This file is intended for the Linux kernel/module. * - * CONTACTS - * E-mail regarding any portion of the Linux UDF file system should be - * directed to the development team mailing list (run by majordomo): - * linux_udf@hpesjro.fc.hp.com - * * COPYRIGHT * This file is distributed under the terms of the GNU General Public * License (GPL). Copies of the GPL can be obtained from: diff --git a/include/linux/udf_fs_sb.h b/include/linux/udf_fs_sb.h index 1966a6dbb4b6..b15ff2e99c91 100644 --- a/include/linux/udf_fs_sb.h +++ b/include/linux/udf_fs_sb.h @@ -3,11 +3,6 @@ * * This include file is for the Linux kernel/module. * - * CONTACTS - * E-mail regarding any portion of the Linux UDF file system should be - * directed to the development team mailing list (run by majordomo): - * linux_udf@hpesjro.fc.hp.com - * * COPYRIGHT * This file is distributed under the terms of the GNU General Public * License (GPL). Copies of the GPL can be obtained from: -- cgit v1.2.3-71-gd317 From 7063fbf2261194f72ee75afca67b3b38b554b5fa Mon Sep 17 00:00:00 2001 From: Joel Becker Date: Thu, 15 Dec 2005 14:29:43 -0800 Subject: [PATCH] configfs: User-driven configuration filesystem Configfs, a file system for userspace-driven kernel object configuration. The OCFS2 stack makes extensive use of this for propagation of cluster configuration information into kernel. Signed-off-by: Joel Becker --- Documentation/filesystems/00-INDEX | 2 + Documentation/filesystems/configfs/configfs.txt | 434 ++++++++ .../filesystems/configfs/configfs_example.c | 474 +++++++++ MAINTAINERS | 5 + fs/Kconfig | 14 + fs/Makefile | 1 + fs/configfs/Makefile | 7 + fs/configfs/configfs_internal.h | 142 +++ fs/configfs/dir.c | 1102 ++++++++++++++++++++ fs/configfs/file.c | 360 +++++++ fs/configfs/inode.c | 162 +++ fs/configfs/item.c | 227 ++++ fs/configfs/mount.c | 159 +++ fs/configfs/symlink.c | 281 +++++ include/linux/configfs.h | 205 ++++ 15 files changed, 3575 insertions(+) create mode 100644 Documentation/filesystems/configfs/configfs.txt create mode 100644 Documentation/filesystems/configfs/configfs_example.c create mode 100644 fs/configfs/Makefile create mode 100644 fs/configfs/configfs_internal.h create mode 100644 fs/configfs/dir.c create mode 100644 fs/configfs/file.c create mode 100644 fs/configfs/inode.c create mode 100644 fs/configfs/item.c create mode 100644 fs/configfs/mount.c create mode 100644 fs/configfs/symlink.c create mode 100644 include/linux/configfs.h (limited to 'include/linux') diff --git a/Documentation/filesystems/00-INDEX b/Documentation/filesystems/00-INDEX index bcfbab899b37..628f8a7adb85 100644 --- a/Documentation/filesystems/00-INDEX +++ b/Documentation/filesystems/00-INDEX @@ -12,6 +12,8 @@ cifs.txt - description of the CIFS filesystem coda.txt - description of the CODA filesystem. +configfs/ + - directory containing configfs documentation and example code. cramfs.txt - info on the cram filesystem for small storage (ROMs etc) devfs/ diff --git a/Documentation/filesystems/configfs/configfs.txt b/Documentation/filesystems/configfs/configfs.txt new file mode 100644 index 000000000000..c4ff96b7c4e0 --- /dev/null +++ b/Documentation/filesystems/configfs/configfs.txt @@ -0,0 +1,434 @@ + +configfs - Userspace-driven kernel object configuation. + +Joel Becker + +Updated: 31 March 2005 + +Copyright (c) 2005 Oracle Corporation, + Joel Becker + + +[What is configfs?] + +configfs is a ram-based filesystem that provides the converse of +sysfs's functionality. Where sysfs is a filesystem-based view of +kernel objects, configfs is a filesystem-based manager of kernel +objects, or config_items. + +With sysfs, an object is created in kernel (for example, when a device +is discovered) and it is registered with sysfs. Its attributes then +appear in sysfs, allowing userspace to read the attributes via +readdir(3)/read(2). It may allow some attributes to be modified via +write(2). The important point is that the object is created and +destroyed in kernel, the kernel controls the lifecycle of the sysfs +representation, and sysfs is merely a window on all this. + +A configfs config_item is created via an explicit userspace operation: +mkdir(2). It is destroyed via rmdir(2). The attributes appear at +mkdir(2) time, and can be read or modified via read(2) and write(2). +As with sysfs, readdir(3) queries the list of items and/or attributes. +symlink(2) can be used to group items together. Unlike sysfs, the +lifetime of the representation is completely driven by userspace. The +kernel modules backing the items must respond to this. + +Both sysfs and configfs can and should exist together on the same +system. One is not a replacement for the other. + +[Using configfs] + +configfs can be compiled as a module or into the kernel. You can access +it by doing + + mount -t configfs none /config + +The configfs tree will be empty unless client modules are also loaded. +These are modules that register their item types with configfs as +subsystems. Once a client subsystem is loaded, it will appear as a +subdirectory (or more than one) under /config. Like sysfs, the +configfs tree is always there, whether mounted on /config or not. + +An item is created via mkdir(2). The item's attributes will also +appear at this time. readdir(3) can determine what the attributes are, +read(2) can query their default values, and write(2) can store new +values. Like sysfs, attributes should be ASCII text files, preferably +with only one value per file. The same efficiency caveats from sysfs +apply. Don't mix more than one attribute in one attribute file. + +Like sysfs, configfs expects write(2) to store the entire buffer at +once. When writing to configfs attributes, userspace processes should +first read the entire file, modify the portions they wish to change, and +then write the entire buffer back. Attribute files have a maximum size +of one page (PAGE_SIZE, 4096 on i386). + +When an item needs to be destroyed, remove it with rmdir(2). An +item cannot be destroyed if any other item has a link to it (via +symlink(2)). Links can be removed via unlink(2). + +[Configuring FakeNBD: an Example] + +Imagine there's a Network Block Device (NBD) driver that allows you to +access remote block devices. Call it FakeNBD. FakeNBD uses configfs +for its configuration. Obviously, there will be a nice program that +sysadmins use to configure FakeNBD, but somehow that program has to tell +the driver about it. Here's where configfs comes in. + +When the FakeNBD driver is loaded, it registers itself with configfs. +readdir(3) sees this just fine: + + # ls /config + fakenbd + +A fakenbd connection can be created with mkdir(2). The name is +arbitrary, but likely the tool will make some use of the name. Perhaps +it is a uuid or a disk name: + + # mkdir /config/fakenbd/disk1 + # ls /config/fakenbd/disk1 + target device rw + +The target attribute contains the IP address of the server FakeNBD will +connect to. The device attribute is the device on the server. +Predictably, the rw attribute determines whether the connection is +read-only or read-write. + + # echo 10.0.0.1 > /config/fakenbd/disk1/target + # echo /dev/sda1 > /config/fakenbd/disk1/device + # echo 1 > /config/fakenbd/disk1/rw + +That's it. That's all there is. Now the device is configured, via the +shell no less. + +[Coding With configfs] + +Every object in configfs is a config_item. A config_item reflects an +object in the subsystem. It has attributes that match values on that +object. configfs handles the filesystem representation of that object +and its attributes, allowing the subsystem to ignore all but the +basic show/store interaction. + +Items are created and destroyed inside a config_group. A group is a +collection of items that share the same attributes and operations. +Items are created by mkdir(2) and removed by rmdir(2), but configfs +handles that. The group has a set of operations to perform these tasks + +A subsystem is the top level of a client module. During initialization, +the client module registers the subsystem with configfs, the subsystem +appears as a directory at the top of the configfs filesystem. A +subsystem is also a config_group, and can do everything a config_group +can. + +[struct config_item] + + struct config_item { + char *ci_name; + char ci_namebuf[UOBJ_NAME_LEN]; + struct kref ci_kref; + struct list_head ci_entry; + struct config_item *ci_parent; + struct config_group *ci_group; + struct config_item_type *ci_type; + struct dentry *ci_dentry; + }; + + void config_item_init(struct config_item *); + void config_item_init_type_name(struct config_item *, + const char *name, + struct config_item_type *type); + struct config_item *config_item_get(struct config_item *); + void config_item_put(struct config_item *); + +Generally, struct config_item is embedded in a container structure, a +structure that actually represents what the subsystem is doing. The +config_item portion of that structure is how the object interacts with +configfs. + +Whether statically defined in a source file or created by a parent +config_group, a config_item must have one of the _init() functions +called on it. This initializes the reference count and sets up the +appropriate fields. + +All users of a config_item should have a reference on it via +config_item_get(), and drop the reference when they are done via +config_item_put(). + +By itself, a config_item cannot do much more than appear in configfs. +Usually a subsystem wants the item to display and/or store attributes, +among other things. For that, it needs a type. + +[struct config_item_type] + + struct configfs_item_operations { + void (*release)(struct config_item *); + ssize_t (*show_attribute)(struct config_item *, + struct configfs_attribute *, + char *); + ssize_t (*store_attribute)(struct config_item *, + struct configfs_attribute *, + const char *, size_t); + int (*allow_link)(struct config_item *src, + struct config_item *target); + int (*drop_link)(struct config_item *src, + struct config_item *target); + }; + + struct config_item_type { + struct module *ct_owner; + struct configfs_item_operations *ct_item_ops; + struct configfs_group_operations *ct_group_ops; + struct configfs_attribute **ct_attrs; + }; + +The most basic function of a config_item_type is to define what +operations can be performed on a config_item. All items that have been +allocated dynamically will need to provide the ct_item_ops->release() +method. This method is called when the config_item's reference count +reaches zero. Items that wish to display an attribute need to provide +the ct_item_ops->show_attribute() method. Similarly, storing a new +attribute value uses the store_attribute() method. + +[struct configfs_attribute] + + struct configfs_attribute { + char *ca_name; + struct module *ca_owner; + mode_t ca_mode; + }; + +When a config_item wants an attribute to appear as a file in the item's +configfs directory, it must define a configfs_attribute describing it. +It then adds the attribute to the NULL-terminated array +config_item_type->ct_attrs. When the item appears in configfs, the +attribute file will appear with the configfs_attribute->ca_name +filename. configfs_attribute->ca_mode specifies the file permissions. + +If an attribute is readable and the config_item provides a +ct_item_ops->show_attribute() method, that method will be called +whenever userspace asks for a read(2) on the attribute. The converse +will happen for write(2). + +[struct config_group] + +A config_item cannot live in a vaccum. The only way one can be created +is via mkdir(2) on a config_group. This will trigger creation of a +child item. + + struct config_group { + struct config_item cg_item; + struct list_head cg_children; + struct configfs_subsystem *cg_subsys; + struct config_group **default_groups; + }; + + void config_group_init(struct config_group *group); + void config_group_init_type_name(struct config_group *group, + const char *name, + struct config_item_type *type); + + +The config_group structure contains a config_item. Properly configuring +that item means that a group can behave as an item in its own right. +However, it can do more: it can create child items or groups. This is +accomplished via the group operations specified on the group's +config_item_type. + + struct configfs_group_operations { + struct config_item *(*make_item)(struct config_group *group, + const char *name); + struct config_group *(*make_group)(struct config_group *group, + const char *name); + int (*commit_item)(struct config_item *item); + void (*drop_item)(struct config_group *group, + struct config_item *item); + }; + +A group creates child items by providing the +ct_group_ops->make_item() method. If provided, this method is called from mkdir(2) in the group's directory. The subsystem allocates a new +config_item (or more likely, its container structure), initializes it, +and returns it to configfs. Configfs will then populate the filesystem +tree to reflect the new item. + +If the subsystem wants the child to be a group itself, the subsystem +provides ct_group_ops->make_group(). Everything else behaves the same, +using the group _init() functions on the group. + +Finally, when userspace calls rmdir(2) on the item or group, +ct_group_ops->drop_item() is called. As a config_group is also a +config_item, it is not necessary for a seperate drop_group() method. +The subsystem must config_item_put() the reference that was initialized +upon item allocation. If a subsystem has no work to do, it may omit +the ct_group_ops->drop_item() method, and configfs will call +config_item_put() on the item on behalf of the subsystem. + +IMPORTANT: drop_item() is void, and as such cannot fail. When rmdir(2) +is called, configfs WILL remove the item from the filesystem tree +(assuming that it has no children to keep it busy). The subsystem is +responsible for responding to this. If the subsystem has references to +the item in other threads, the memory is safe. It may take some time +for the item to actually disappear from the subsystem's usage. But it +is gone from configfs. + +A config_group cannot be removed while it still has child items. This +is implemented in the configfs rmdir(2) code. ->drop_item() will not be +called, as the item has not been dropped. rmdir(2) will fail, as the +directory is not empty. + +[struct configfs_subsystem] + +A subsystem must register itself, ususally at module_init time. This +tells configfs to make the subsystem appear in the file tree. + + struct configfs_subsystem { + struct config_group su_group; + struct semaphore su_sem; + }; + + int configfs_register_subsystem(struct configfs_subsystem *subsys); + void configfs_unregister_subsystem(struct configfs_subsystem *subsys); + + A subsystem consists of a toplevel config_group and a semaphore. +The group is where child config_items are created. For a subsystem, +this group is usually defined statically. Before calling +configfs_register_subsystem(), the subsystem must have initialized the +group via the usual group _init() functions, and it must also have +initialized the semaphore. + When the register call returns, the subsystem is live, and it +will be visible via configfs. At that point, mkdir(2) can be called and +the subsystem must be ready for it. + +[An Example] + +The best example of these basic concepts is the simple_children +subsystem/group and the simple_child item in configfs_example.c It +shows a trivial object displaying and storing an attribute, and a simple +group creating and destroying these children. + +[Hierarchy Navigation and the Subsystem Semaphore] + +There is an extra bonus that configfs provides. The config_groups and +config_items are arranged in a hierarchy due to the fact that they +appear in a filesystem. A subsystem is NEVER to touch the filesystem +parts, but the subsystem might be interested in this hierarchy. For +this reason, the hierarchy is mirrored via the config_group->cg_children +and config_item->ci_parent structure members. + +A subsystem can navigate the cg_children list and the ci_parent pointer +to see the tree created by the subsystem. This can race with configfs' +management of the hierarchy, so configfs uses the subsystem semaphore to +protect modifications. Whenever a subsystem wants to navigate the +hierarchy, it must do so under the protection of the subsystem +semaphore. + +A subsystem will be prevented from acquiring the semaphore while a newly +allocated item has not been linked into this hierarchy. Similarly, it +will not be able to acquire the semaphore while a dropping item has not +yet been unlinked. This means that an item's ci_parent pointer will +never be NULL while the item is in configfs, and that an item will only +be in its parent's cg_children list for the same duration. This allows +a subsystem to trust ci_parent and cg_children while they hold the +semaphore. + +[Item Aggregation Via symlink(2)] + +configfs provides a simple group via the group->item parent/child +relationship. Often, however, a larger environment requires aggregation +outside of the parent/child connection. This is implemented via +symlink(2). + +A config_item may provide the ct_item_ops->allow_link() and +ct_item_ops->drop_link() methods. If the ->allow_link() method exists, +symlink(2) may be called with the config_item as the source of the link. +These links are only allowed between configfs config_items. Any +symlink(2) attempt outside the configfs filesystem will be denied. + +When symlink(2) is called, the source config_item's ->allow_link() +method is called with itself and a target item. If the source item +allows linking to target item, it returns 0. A source item may wish to +reject a link if it only wants links to a certain type of object (say, +in its own subsystem). + +When unlink(2) is called on the symbolic link, the source item is +notified via the ->drop_link() method. Like the ->drop_item() method, +this is a void function and cannot return failure. The subsystem is +responsible for responding to the change. + +A config_item cannot be removed while it links to any other item, nor +can it be removed while an item links to it. Dangling symlinks are not +allowed in configfs. + +[Automatically Created Subgroups] + +A new config_group may want to have two types of child config_items. +While this could be codified by magic names in ->make_item(), it is much +more explicit to have a method whereby userspace sees this divergence. + +Rather than have a group where some items behave differently than +others, configfs provides a method whereby one or many subgroups are +automatically created inside the parent at its creation. Thus, +mkdir("parent) results in "parent", "parent/subgroup1", up through +"parent/subgroupN". Items of type 1 can now be created in +"parent/subgroup1", and items of type N can be created in +"parent/subgroupN". + +These automatic subgroups, or default groups, do not preclude other +children of the parent group. If ct_group_ops->make_group() exists, +other child groups can be created on the parent group directly. + +A configfs subsystem specifies default groups by filling in the +NULL-terminated array default_groups on the config_group structure. +Each group in that array is populated in the configfs tree at the same +time as the parent group. Similarly, they are removed at the same time +as the parent. No extra notification is provided. When a ->drop_item() +method call notifies the subsystem the parent group is going away, it +also means every default group child associated with that parent group. + +As a consequence of this, default_groups cannot be removed directly via +rmdir(2). They also are not considered when rmdir(2) on the parent +group is checking for children. + +[Committable Items] + +NOTE: Committable items are currently unimplemented. + +Some config_items cannot have a valid initial state. That is, no +default values can be specified for the item's attributes such that the +item can do its work. Userspace must configure one or more attributes, +after which the subsystem can start whatever entity this item +represents. + +Consider the FakeNBD device from above. Without a target address *and* +a target device, the subsystem has no idea what block device to import. +The simple example assumes that the subsystem merely waits until all the +appropriate attributes are configured, and then connects. This will, +indeed, work, but now every attribute store must check if the attributes +are initialized. Every attribute store must fire off the connection if +that condition is met. + +Far better would be an explicit action notifying the subsystem that the +config_item is ready to go. More importantly, an explicit action allows +the subsystem to provide feedback as to whether the attibutes are +initialized in a way that makes sense. configfs provides this as +committable items. + +configfs still uses only normal filesystem operations. An item is +committed via rename(2). The item is moved from a directory where it +can be modified to a directory where it cannot. + +Any group that provides the ct_group_ops->commit_item() method has +committable items. When this group appears in configfs, mkdir(2) will +not work directly in the group. Instead, the group will have two +subdirectories: "live" and "pending". The "live" directory does not +support mkdir(2) or rmdir(2) either. It only allows rename(2). The +"pending" directory does allow mkdir(2) and rmdir(2). An item is +created in the "pending" directory. Its attributes can be modified at +will. Userspace commits the item by renaming it into the "live" +directory. At this point, the subsystem recieves the ->commit_item() +callback. If all required attributes are filled to satisfaction, the +method returns zero and the item is moved to the "live" directory. + +As rmdir(2) does not work in the "live" directory, an item must be +shutdown, or "uncommitted". Again, this is done via rename(2), this +time from the "live" directory back to the "pending" one. The subsystem +is notified by the ct_group_ops->uncommit_object() method. + + diff --git a/Documentation/filesystems/configfs/configfs_example.c b/Documentation/filesystems/configfs/configfs_example.c new file mode 100644 index 000000000000..f3c6e4946f98 --- /dev/null +++ b/Documentation/filesystems/configfs/configfs_example.c @@ -0,0 +1,474 @@ +/* + * vim: noexpandtab ts=8 sts=0 sw=8: + * + * configfs_example.c - This file is a demonstration module containing + * a number of configfs subsystems. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public + * License as published by the Free Software Foundation; either + * version 2 of the License, or (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License for more details. + * + * You should have received a copy of the GNU General Public + * License along with this program; if not, write to the + * Free Software Foundation, Inc., 59 Temple Place - Suite 330, + * Boston, MA 021110-1307, USA. + * + * Based on sysfs: + * sysfs is Copyright (C) 2001, 2002, 2003 Patrick Mochel + * + * configfs Copyright (C) 2005 Oracle. All rights reserved. + */ + +#include +#include +#include + +#include + + + +/* + * 01-childless + * + * This first example is a childless subsystem. It cannot create + * any config_items. It just has attributes. + * + * Note that we are enclosing the configfs_subsystem inside a container. + * This is not necessary if a subsystem has no attributes directly + * on the subsystem. See the next example, 02-simple-children, for + * such a subsystem. + */ + +struct childless { + struct configfs_subsystem subsys; + int showme; + int storeme; +}; + +struct childless_attribute { + struct configfs_attribute attr; + ssize_t (*show)(struct childless *, char *); + ssize_t (*store)(struct childless *, const char *, size_t); +}; + +static inline struct childless *to_childless(struct config_item *item) +{ + return item ? container_of(to_configfs_subsystem(to_config_group(item)), struct childless, subsys) : NULL; +} + +static ssize_t childless_showme_read(struct childless *childless, + char *page) +{ + ssize_t pos; + + pos = sprintf(page, "%d\n", childless->showme); + childless->showme++; + + return pos; +} + +static ssize_t childless_storeme_read(struct childless *childless, + char *page) +{ + return sprintf(page, "%d\n", childless->storeme); +} + +static ssize_t childless_storeme_write(struct childless *childless, + const char *page, + size_t count) +{ + unsigned long tmp; + char *p = (char *) page; + + tmp = simple_strtoul(p, &p, 10); + if (!p || (*p && (*p != '\n'))) + return -EINVAL; + + if (tmp > INT_MAX) + return -ERANGE; + + childless->storeme = tmp; + + return count; +} + +static ssize_t childless_description_read(struct childless *childless, + char *page) +{ + return sprintf(page, +"[01-childless]\n" +"\n" +"The childless subsystem is the simplest possible subsystem in\n" +"configfs. It does not support the creation of child config_items.\n" +"It only has a few attributes. In fact, it isn't much different\n" +"than a directory in /proc.\n"); +} + +static struct childless_attribute childless_attr_showme = { + .attr = { .ca_owner = THIS_MODULE, .ca_name = "showme", .ca_mode = S_IRUGO }, + .show = childless_showme_read, +}; +static struct childless_attribute childless_attr_storeme = { + .attr = { .ca_owner = THIS_MODULE, .ca_name = "storeme", .ca_mode = S_IRUGO | S_IWUSR }, + .show = childless_storeme_read, + .store = childless_storeme_write, +}; +static struct childless_attribute childless_attr_description = { + .attr = { .ca_owner = THIS_MODULE, .ca_name = "description", .ca_mode = S_IRUGO }, + .show = childless_description_read, +}; + +static struct configfs_attribute *childless_attrs[] = { + &childless_attr_showme.attr, + &childless_attr_storeme.attr, + &childless_attr_description.attr, + NULL, +}; + +static ssize_t childless_attr_show(struct config_item *item, + struct configfs_attribute *attr, + char *page) +{ + struct childless *childless = to_childless(item); + struct childless_attribute *childless_attr = + container_of(attr, struct childless_attribute, attr); + ssize_t ret = 0; + + if (childless_attr->show) + ret = childless_attr->show(childless, page); + return ret; +} + +static ssize_t childless_attr_store(struct config_item *item, + struct configfs_attribute *attr, + const char *page, size_t count) +{ + struct childless *childless = to_childless(item); + struct childless_attribute *childless_attr = + container_of(attr, struct childless_attribute, attr); + ssize_t ret = -EINVAL; + + if (childless_attr->store) + ret = childless_attr->store(childless, page, count); + return ret; +} + +static struct configfs_item_operations childless_item_ops = { + .show_attribute = childless_attr_show, + .store_attribute = childless_attr_store, +}; + +static struct config_item_type childless_type = { + .ct_item_ops = &childless_item_ops, + .ct_attrs = childless_attrs, + .ct_owner = THIS_MODULE, +}; + +static struct childless childless_subsys = { + .subsys = { + .su_group = { + .cg_item = { + .ci_namebuf = "01-childless", + .ci_type = &childless_type, + }, + }, + }, +}; + + +/* ----------------------------------------------------------------- */ + +/* + * 02-simple-children + * + * This example merely has a simple one-attribute child. Note that + * there is no extra attribute structure, as the child's attribute is + * known from the get-go. Also, there is no container for the + * subsystem, as it has no attributes of its own. + */ + +struct simple_child { + struct config_item item; + int storeme; +}; + +static inline struct simple_child *to_simple_child(struct config_item *item) +{ + return item ? container_of(item, struct simple_child, item) : NULL; +} + +static struct configfs_attribute simple_child_attr_storeme = { + .ca_owner = THIS_MODULE, + .ca_name = "storeme", + .ca_mode = S_IRUGO | S_IWUSR, +}; + +static struct configfs_attribute *simple_child_attrs[] = { + &simple_child_attr_storeme, + NULL, +}; + +static ssize_t simple_child_attr_show(struct config_item *item, + struct configfs_attribute *attr, + char *page) +{ + ssize_t count; + struct simple_child *simple_child = to_simple_child(item); + + count = sprintf(page, "%d\n", simple_child->storeme); + + return count; +} + +static ssize_t simple_child_attr_store(struct config_item *item, + struct configfs_attribute *attr, + const char *page, size_t count) +{ + struct simple_child *simple_child = to_simple_child(item); + unsigned long tmp; + char *p = (char *) page; + + tmp = simple_strtoul(p, &p, 10); + if (!p || (*p && (*p != '\n'))) + return -EINVAL; + + if (tmp > INT_MAX) + return -ERANGE; + + simple_child->storeme = tmp; + + return count; +} + +static void simple_child_release(struct config_item *item) +{ + kfree(to_simple_child(item)); +} + +static struct configfs_item_operations simple_child_item_ops = { + .release = simple_child_release, + .show_attribute = simple_child_attr_show, + .store_attribute = simple_child_attr_store, +}; + +static struct config_item_type simple_child_type = { + .ct_item_ops = &simple_child_item_ops, + .ct_attrs = simple_child_attrs, + .ct_owner = THIS_MODULE, +}; + + +static struct config_item *simple_children_make_item(struct config_group *group, const char *name) +{ + struct simple_child *simple_child; + + simple_child = kmalloc(sizeof(struct simple_child), GFP_KERNEL); + if (!simple_child) + return NULL; + + memset(simple_child, 0, sizeof(struct simple_child)); + + config_item_init_type_name(&simple_child->item, name, + &simple_child_type); + + simple_child->storeme = 0; + + return &simple_child->item; +} + +static struct configfs_attribute simple_children_attr_description = { + .ca_owner = THIS_MODULE, + .ca_name = "description", + .ca_mode = S_IRUGO, +}; + +static struct configfs_attribute *simple_children_attrs[] = { + &simple_children_attr_description, + NULL, +}; + +static ssize_t simple_children_attr_show(struct config_item *item, + struct configfs_attribute *attr, + char *page) +{ + return sprintf(page, +"[02-simple-children]\n" +"\n" +"This subsystem allows the creation of child config_items. These\n" +"items have only one attribute that is readable and writeable.\n"); +} + +static struct configfs_item_operations simple_children_item_ops = { + .show_attribute = simple_children_attr_show, +}; + +/* + * Note that, since no extra work is required on ->drop_item(), + * no ->drop_item() is provided. + */ +static struct configfs_group_operations simple_children_group_ops = { + .make_item = simple_children_make_item, +}; + +static struct config_item_type simple_children_type = { + .ct_item_ops = &simple_children_item_ops, + .ct_group_ops = &simple_children_group_ops, + .ct_attrs = simple_children_attrs, +}; + +static struct configfs_subsystem simple_children_subsys = { + .su_group = { + .cg_item = { + .ci_namebuf = "02-simple-children", + .ci_type = &simple_children_type, + }, + }, +}; + + +/* ----------------------------------------------------------------- */ + +/* + * 03-group-children + * + * This example reuses the simple_children group from above. However, + * the simple_children group is not the subsystem itself, it is a + * child of the subsystem. Creation of a group in the subsystem creates + * a new simple_children group. That group can then have simple_child + * children of its own. + */ + +struct simple_children { + struct config_group group; +}; + +static struct config_group *group_children_make_group(struct config_group *group, const char *name) +{ + struct simple_children *simple_children; + + simple_children = kmalloc(sizeof(struct simple_children), + GFP_KERNEL); + if (!simple_children) + return NULL; + + memset(simple_children, 0, sizeof(struct simple_children)); + + config_group_init_type_name(&simple_children->group, name, + &simple_children_type); + + return &simple_children->group; +} + +static struct configfs_attribute group_children_attr_description = { + .ca_owner = THIS_MODULE, + .ca_name = "description", + .ca_mode = S_IRUGO, +}; + +static struct configfs_attribute *group_children_attrs[] = { + &group_children_attr_description, + NULL, +}; + +static ssize_t group_children_attr_show(struct config_item *item, + struct configfs_attribute *attr, + char *page) +{ + return sprintf(page, +"[03-group-children]\n" +"\n" +"This subsystem allows the creation of child config_groups. These\n" +"groups are like the subsystem simple-children.\n"); +} + +static struct configfs_item_operations group_children_item_ops = { + .show_attribute = group_children_attr_show, +}; + +/* + * Note that, since no extra work is required on ->drop_item(), + * no ->drop_item() is provided. + */ +static struct configfs_group_operations group_children_group_ops = { + .make_group = group_children_make_group, +}; + +static struct config_item_type group_children_type = { + .ct_item_ops = &group_children_item_ops, + .ct_group_ops = &group_children_group_ops, + .ct_attrs = group_children_attrs, +}; + +static struct configfs_subsystem group_children_subsys = { + .su_group = { + .cg_item = { + .ci_namebuf = "03-group-children", + .ci_type = &group_children_type, + }, + }, +}; + +/* ----------------------------------------------------------------- */ + +/* + * We're now done with our subsystem definitions. + * For convenience in this module, here's a list of them all. It + * allows the init function to easily register them. Most modules + * will only have one subsystem, and will only call register_subsystem + * on it directly. + */ +static struct configfs_subsystem *example_subsys[] = { + &childless_subsys.subsys, + &simple_children_subsys, + &group_children_subsys, + NULL, +}; + +static int __init configfs_example_init(void) +{ + int ret; + int i; + struct configfs_subsystem *subsys; + + for (i = 0; example_subsys[i]; i++) { + subsys = example_subsys[i]; + + config_group_init(&subsys->su_group); + init_MUTEX(&subsys->su_sem); + ret = configfs_register_subsystem(subsys); + if (ret) { + printk(KERN_ERR "Error %d while registering subsystem %s\n", + ret, + subsys->su_group.cg_item.ci_namebuf); + goto out_unregister; + } + } + + return 0; + +out_unregister: + for (; i >= 0; i--) { + configfs_unregister_subsystem(example_subsys[i]); + } + + return ret; +} + +static void __exit configfs_example_exit(void) +{ + int i; + + for (i = 0; example_subsys[i]; i++) { + configfs_unregister_subsystem(example_subsys[i]); + } +} + +module_init(configfs_example_init); +module_exit(configfs_example_exit); +MODULE_LICENSE("GPL"); diff --git a/MAINTAINERS b/MAINTAINERS index 6af683025ae0..86ee06f43794 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -554,6 +554,11 @@ W: http://us1.samba.org/samba/Linux_CIFS_client.html T: git kernel.org:/pub/scm/linux/kernel/git/sfrench/cifs-2.6.git S: Supported +CONFIGFS +P: Joel Becker +M: Joel Becker +S: Supported + CIRRUS LOGIC GENERIC FBDEV DRIVER P: Jeff Garzik M: jgarzik@pobox.com diff --git a/fs/Kconfig b/fs/Kconfig index d5255e627b5f..ba1dbe2b2202 100644 --- a/fs/Kconfig +++ b/fs/Kconfig @@ -841,6 +841,20 @@ config RELAYFS_FS If unsure, say N. +config CONFIGFS_FS + tristate "Userspace-driven configuration filesystem (EXPERIMENTAL)" + depends on EXPERIMENTAL + help + configfs is a ram-based filesystem that provides the converse + of sysfs's functionality. Where sysfs is a filesystem-based + view of kernel objects, configfs is a filesystem-based manager + of kernel objects, or config_items. + + Both sysfs and configfs can and should exist together on the + same system. One is not a replacement for the other. + + If unsure, say N. + endmenu menu "Miscellaneous filesystems" diff --git a/fs/Makefile b/fs/Makefile index 4c2655759078..ff3d48a744f5 100644 --- a/fs/Makefile +++ b/fs/Makefile @@ -101,3 +101,4 @@ obj-$(CONFIG_BEFS_FS) += befs/ obj-$(CONFIG_HOSTFS) += hostfs/ obj-$(CONFIG_HPPFS) += hppfs/ obj-$(CONFIG_DEBUG_FS) += debugfs/ +obj-$(CONFIG_CONFIGFS_FS) += configfs/ diff --git a/fs/configfs/Makefile b/fs/configfs/Makefile new file mode 100644 index 000000000000..00ffb278e98c --- /dev/null +++ b/fs/configfs/Makefile @@ -0,0 +1,7 @@ +# +# Makefile for the configfs virtual filesystem +# + +obj-$(CONFIG_CONFIGFS_FS) += configfs.o + +configfs-objs := inode.o file.o dir.o symlink.o mount.o item.o diff --git a/fs/configfs/configfs_internal.h b/fs/configfs/configfs_internal.h new file mode 100644 index 000000000000..8899d9c5f6bf --- /dev/null +++ b/fs/configfs/configfs_internal.h @@ -0,0 +1,142 @@ +/* -*- mode: c; c-basic-offset:8; -*- + * vim: noexpandtab sw=8 ts=8 sts=0: + * + * configfs_internal.h - Internal stuff for configfs + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public + * License as published by the Free Software Foundation; either + * version 2 of the License, or (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License for more details. + * + * You should have received a copy of the GNU General Public + * License along with this program; if not, write to the + * Free Software Foundation, Inc., 59 Temple Place - Suite 330, + * Boston, MA 021110-1307, USA. + * + * Based on sysfs: + * sysfs is Copyright (C) 2001, 2002, 2003 Patrick Mochel + * + * configfs Copyright (C) 2005 Oracle. All rights reserved. + */ + +#include +#include + +struct configfs_dirent { + atomic_t s_count; + struct list_head s_sibling; + struct list_head s_children; + struct list_head s_links; + void * s_element; + int s_type; + umode_t s_mode; + struct dentry * s_dentry; +}; + +#define CONFIGFS_ROOT 0x0001 +#define CONFIGFS_DIR 0x0002 +#define CONFIGFS_ITEM_ATTR 0x0004 +#define CONFIGFS_ITEM_LINK 0x0020 +#define CONFIGFS_USET_DIR 0x0040 +#define CONFIGFS_USET_DEFAULT 0x0080 +#define CONFIGFS_USET_DROPPING 0x0100 +#define CONFIGFS_NOT_PINNED (CONFIGFS_ITEM_ATTR) + +extern struct vfsmount * configfs_mount; + +extern int configfs_is_root(struct config_item *item); + +extern struct inode * configfs_new_inode(mode_t mode); +extern int configfs_create(struct dentry *, int mode, int (*init)(struct inode *)); + +extern int configfs_create_file(struct config_item *, const struct configfs_attribute *); +extern int configfs_make_dirent(struct configfs_dirent *, + struct dentry *, void *, umode_t, int); + +extern int configfs_add_file(struct dentry *, const struct configfs_attribute *, int); +extern void configfs_hash_and_remove(struct dentry * dir, const char * name); + +extern const unsigned char * configfs_get_name(struct configfs_dirent *sd); +extern void configfs_drop_dentry(struct configfs_dirent *sd, struct dentry *parent); + +extern int configfs_pin_fs(void); +extern void configfs_release_fs(void); + +extern struct rw_semaphore configfs_rename_sem; +extern struct super_block * configfs_sb; +extern struct file_operations configfs_dir_operations; +extern struct file_operations configfs_file_operations; +extern struct file_operations bin_fops; +extern struct inode_operations configfs_dir_inode_operations; +extern struct inode_operations configfs_symlink_inode_operations; + +extern int configfs_symlink(struct inode *dir, struct dentry *dentry, + const char *symname); +extern int configfs_unlink(struct inode *dir, struct dentry *dentry); + +struct configfs_symlink { + struct list_head sl_list; + struct config_item *sl_target; +}; + +extern int configfs_create_link(struct configfs_symlink *sl, + struct dentry *parent, + struct dentry *dentry); + +static inline struct config_item * to_item(struct dentry * dentry) +{ + struct configfs_dirent * sd = dentry->d_fsdata; + return ((struct config_item *) sd->s_element); +} + +static inline struct configfs_attribute * to_attr(struct dentry * dentry) +{ + struct configfs_dirent * sd = dentry->d_fsdata; + return ((struct configfs_attribute *) sd->s_element); +} + +static inline struct config_item *configfs_get_config_item(struct dentry *dentry) +{ + struct config_item * item = NULL; + + spin_lock(&dcache_lock); + if (!d_unhashed(dentry)) { + struct configfs_dirent * sd = dentry->d_fsdata; + if (sd->s_type & CONFIGFS_ITEM_LINK) { + struct configfs_symlink * sl = sd->s_element; + item = config_item_get(sl->sl_target); + } else + item = config_item_get(sd->s_element); + } + spin_unlock(&dcache_lock); + + return item; +} + +static inline void release_configfs_dirent(struct configfs_dirent * sd) +{ + if (!(sd->s_type & CONFIGFS_ROOT)) + kfree(sd); +} + +static inline struct configfs_dirent * configfs_get(struct configfs_dirent * sd) +{ + if (sd) { + WARN_ON(!atomic_read(&sd->s_count)); + atomic_inc(&sd->s_count); + } + return sd; +} + +static inline void configfs_put(struct configfs_dirent * sd) +{ + WARN_ON(!atomic_read(&sd->s_count)); + if (atomic_dec_and_test(&sd->s_count)) + release_configfs_dirent(sd); +} + diff --git a/fs/configfs/dir.c b/fs/configfs/dir.c new file mode 100644 index 000000000000..e48b539243a1 --- /dev/null +++ b/fs/configfs/dir.c @@ -0,0 +1,1102 @@ +/* -*- mode: c; c-basic-offset: 8; -*- + * vim: noexpandtab sw=8 ts=8 sts=0: + * + * dir.c - Operations for configfs directories. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public + * License as published by the Free Software Foundation; either + * version 2 of the License, or (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License for more details. + * + * You should have received a copy of the GNU General Public + * License along with this program; if not, write to the + * Free Software Foundation, Inc., 59 Temple Place - Suite 330, + * Boston, MA 021110-1307, USA. + * + * Based on sysfs: + * sysfs is Copyright (C) 2001, 2002, 2003 Patrick Mochel + * + * configfs Copyright (C) 2005 Oracle. All rights reserved. + */ + +#undef DEBUG + +#include +#include +#include +#include + +#include +#include "configfs_internal.h" + +DECLARE_RWSEM(configfs_rename_sem); + +static void configfs_d_iput(struct dentry * dentry, + struct inode * inode) +{ + struct configfs_dirent * sd = dentry->d_fsdata; + + if (sd) { + BUG_ON(sd->s_dentry != dentry); + sd->s_dentry = NULL; + configfs_put(sd); + } + iput(inode); +} + +/* + * We _must_ delete our dentries on last dput, as the chain-to-parent + * behavior is required to clear the parents of default_groups. + */ +static int configfs_d_delete(struct dentry *dentry) +{ + return 1; +} + +static struct dentry_operations configfs_dentry_ops = { + .d_iput = configfs_d_iput, + /* simple_delete_dentry() isn't exported */ + .d_delete = configfs_d_delete, +}; + +/* + * Allocates a new configfs_dirent and links it to the parent configfs_dirent + */ +static struct configfs_dirent *configfs_new_dirent(struct configfs_dirent * parent_sd, + void * element) +{ + struct configfs_dirent * sd; + + sd = kmalloc(sizeof(*sd), GFP_KERNEL); + if (!sd) + return NULL; + + memset(sd, 0, sizeof(*sd)); + atomic_set(&sd->s_count, 1); + INIT_LIST_HEAD(&sd->s_links); + INIT_LIST_HEAD(&sd->s_children); + list_add(&sd->s_sibling, &parent_sd->s_children); + sd->s_element = element; + + return sd; +} + +int configfs_make_dirent(struct configfs_dirent * parent_sd, + struct dentry * dentry, void * element, + umode_t mode, int type) +{ + struct configfs_dirent * sd; + + sd = configfs_new_dirent(parent_sd, element); + if (!sd) + return -ENOMEM; + + sd->s_mode = mode; + sd->s_type = type; + sd->s_dentry = dentry; + if (dentry) { + dentry->d_fsdata = configfs_get(sd); + dentry->d_op = &configfs_dentry_ops; + } + + return 0; +} + +static int init_dir(struct inode * inode) +{ + inode->i_op = &configfs_dir_inode_operations; + inode->i_fop = &configfs_dir_operations; + + /* directory inodes start off with i_nlink == 2 (for "." entry) */ + inode->i_nlink++; + return 0; +} + +static int init_file(struct inode * inode) +{ + inode->i_size = PAGE_SIZE; + inode->i_fop = &configfs_file_operations; + return 0; +} + +static int init_symlink(struct inode * inode) +{ + inode->i_op = &configfs_symlink_inode_operations; + return 0; +} + +static int create_dir(struct config_item * k, struct dentry * p, + struct dentry * d) +{ + int error; + umode_t mode = S_IFDIR| S_IRWXU | S_IRUGO | S_IXUGO; + + error = configfs_create(d, mode, init_dir); + if (!error) { + error = configfs_make_dirent(p->d_fsdata, d, k, mode, + CONFIGFS_DIR); + if (!error) { + p->d_inode->i_nlink++; + (d)->d_op = &configfs_dentry_ops; + } + } + return error; +} + + +/** + * configfs_create_dir - create a directory for an config_item. + * @item: config_itemwe're creating directory for. + * @dentry: config_item's dentry. + */ + +static int configfs_create_dir(struct config_item * item, struct dentry *dentry) +{ + struct dentry * parent; + int error = 0; + + BUG_ON(!item); + + if (item->ci_parent) + parent = item->ci_parent->ci_dentry; + else if (configfs_mount && configfs_mount->mnt_sb) + parent = configfs_mount->mnt_sb->s_root; + else + return -EFAULT; + + error = create_dir(item,parent,dentry); + if (!error) + item->ci_dentry = dentry; + return error; +} + +int configfs_create_link(struct configfs_symlink *sl, + struct dentry *parent, + struct dentry *dentry) +{ + int err = 0; + umode_t mode = S_IFLNK | S_IRWXUGO; + + err = configfs_create(dentry, mode, init_symlink); + if (!err) { + err = configfs_make_dirent(parent->d_fsdata, dentry, sl, + mode, CONFIGFS_ITEM_LINK); + if (!err) + dentry->d_op = &configfs_dentry_ops; + } + return err; +} + +static void remove_dir(struct dentry * d) +{ + struct dentry * parent = dget(d->d_parent); + struct configfs_dirent * sd; + + sd = d->d_fsdata; + list_del_init(&sd->s_sibling); + configfs_put(sd); + if (d->d_inode) + simple_rmdir(parent->d_inode,d); + + pr_debug(" o %s removing done (%d)\n",d->d_name.name, + atomic_read(&d->d_count)); + + dput(parent); +} + +/** + * configfs_remove_dir - remove an config_item's directory. + * @item: config_item we're removing. + * + * The only thing special about this is that we remove any files in + * the directory before we remove the directory, and we've inlined + * what used to be configfs_rmdir() below, instead of calling separately. + */ + +static void configfs_remove_dir(struct config_item * item) +{ + struct dentry * dentry = dget(item->ci_dentry); + + if (!dentry) + return; + + remove_dir(dentry); + /** + * Drop reference from dget() on entrance. + */ + dput(dentry); +} + + +/* attaches attribute's configfs_dirent to the dentry corresponding to the + * attribute file + */ +static int configfs_attach_attr(struct configfs_dirent * sd, struct dentry * dentry) +{ + struct configfs_attribute * attr = sd->s_element; + int error; + + error = configfs_create(dentry, (attr->ca_mode & S_IALLUGO) | S_IFREG, init_file); + if (error) + return error; + + dentry->d_op = &configfs_dentry_ops; + dentry->d_fsdata = configfs_get(sd); + sd->s_dentry = dentry; + d_rehash(dentry); + + return 0; +} + +static struct dentry * configfs_lookup(struct inode *dir, + struct dentry *dentry, + struct nameidata *nd) +{ + struct configfs_dirent * parent_sd = dentry->d_parent->d_fsdata; + struct configfs_dirent * sd; + int found = 0; + int err = 0; + + list_for_each_entry(sd, &parent_sd->s_children, s_sibling) { + if (sd->s_type & CONFIGFS_NOT_PINNED) { + const unsigned char * name = configfs_get_name(sd); + + if (strcmp(name, dentry->d_name.name)) + continue; + + found = 1; + err = configfs_attach_attr(sd, dentry); + break; + } + } + + if (!found) { + /* + * If it doesn't exist and it isn't a NOT_PINNED item, + * it must be negative. + */ + return simple_lookup(dir, dentry, nd); + } + + return ERR_PTR(err); +} + +/* + * Only subdirectories count here. Files (CONFIGFS_NOT_PINNED) are + * attributes and are removed by rmdir(). We recurse, taking i_sem + * on all children that are candidates for default detach. If the + * result is clean, then configfs_detach_group() will handle dropping + * i_sem. If there is an error, the caller will clean up the i_sem + * holders via configfs_detach_rollback(). + */ +static int configfs_detach_prep(struct dentry *dentry) +{ + struct configfs_dirent *parent_sd = dentry->d_fsdata; + struct configfs_dirent *sd; + int ret; + + ret = -EBUSY; + if (!list_empty(&parent_sd->s_links)) + goto out; + + ret = 0; + list_for_each_entry(sd, &parent_sd->s_children, s_sibling) { + if (sd->s_type & CONFIGFS_NOT_PINNED) + continue; + if (sd->s_type & CONFIGFS_USET_DEFAULT) { + down(&sd->s_dentry->d_inode->i_sem); + /* Mark that we've taken i_sem */ + sd->s_type |= CONFIGFS_USET_DROPPING; + + ret = configfs_detach_prep(sd->s_dentry); + if (!ret) + continue; + } else + ret = -ENOTEMPTY; + + break; + } + +out: + return ret; +} + +/* + * Walk the tree, dropping i_sem wherever CONFIGFS_USET_DROPPING is + * set. + */ +static void configfs_detach_rollback(struct dentry *dentry) +{ + struct configfs_dirent *parent_sd = dentry->d_fsdata; + struct configfs_dirent *sd; + + list_for_each_entry(sd, &parent_sd->s_children, s_sibling) { + if (sd->s_type & CONFIGFS_USET_DEFAULT) { + configfs_detach_rollback(sd->s_dentry); + + if (sd->s_type & CONFIGFS_USET_DROPPING) { + sd->s_type &= ~CONFIGFS_USET_DROPPING; + up(&sd->s_dentry->d_inode->i_sem); + } + } + } +} + +static void detach_attrs(struct config_item * item) +{ + struct dentry * dentry = dget(item->ci_dentry); + struct configfs_dirent * parent_sd; + struct configfs_dirent * sd, * tmp; + + if (!dentry) + return; + + pr_debug("configfs %s: dropping attrs for dir\n", + dentry->d_name.name); + + parent_sd = dentry->d_fsdata; + list_for_each_entry_safe(sd, tmp, &parent_sd->s_children, s_sibling) { + if (!sd->s_element || !(sd->s_type & CONFIGFS_NOT_PINNED)) + continue; + list_del_init(&sd->s_sibling); + configfs_drop_dentry(sd, dentry); + configfs_put(sd); + } + + /** + * Drop reference from dget() on entrance. + */ + dput(dentry); +} + +static int populate_attrs(struct config_item *item) +{ + struct config_item_type *t = item->ci_type; + struct configfs_attribute *attr; + int error = 0; + int i; + + if (!t) + return -EINVAL; + if (t->ct_attrs) { + for (i = 0; (attr = t->ct_attrs[i]) != NULL; i++) { + if ((error = configfs_create_file(item, attr))) + break; + } + } + + if (error) + detach_attrs(item); + + return error; +} + +static int configfs_attach_group(struct config_item *parent_item, + struct config_item *item, + struct dentry *dentry); +static void configfs_detach_group(struct config_item *item); + +static void detach_groups(struct config_group *group) +{ + struct dentry * dentry = dget(group->cg_item.ci_dentry); + struct dentry *child; + struct configfs_dirent *parent_sd; + struct configfs_dirent *sd, *tmp; + + if (!dentry) + return; + + parent_sd = dentry->d_fsdata; + list_for_each_entry_safe(sd, tmp, &parent_sd->s_children, s_sibling) { + if (!sd->s_element || + !(sd->s_type & CONFIGFS_USET_DEFAULT)) + continue; + + child = sd->s_dentry; + + configfs_detach_group(sd->s_element); + child->d_inode->i_flags |= S_DEAD; + + /* + * From rmdir/unregister, a configfs_detach_prep() pass + * has taken our i_sem for us. Drop it. + * From mkdir/register cleanup, there is no sem held. + */ + if (sd->s_type & CONFIGFS_USET_DROPPING) + up(&child->d_inode->i_sem); + + d_delete(child); + dput(child); + } + + /** + * Drop reference from dget() on entrance. + */ + dput(dentry); +} + +/* + * This fakes mkdir(2) on a default_groups[] entry. It + * creates a dentry, attachs it, and then does fixup + * on the sd->s_type. + * + * We could, perhaps, tweak our parent's ->mkdir for a minute and + * try using vfs_mkdir. Just a thought. + */ +static int create_default_group(struct config_group *parent_group, + struct config_group *group) +{ + int ret; + struct qstr name; + struct configfs_dirent *sd; + /* We trust the caller holds a reference to parent */ + struct dentry *child, *parent = parent_group->cg_item.ci_dentry; + + if (!group->cg_item.ci_name) + group->cg_item.ci_name = group->cg_item.ci_namebuf; + name.name = group->cg_item.ci_name; + name.len = strlen(name.name); + name.hash = full_name_hash(name.name, name.len); + + ret = -ENOMEM; + child = d_alloc(parent, &name); + if (child) { + d_add(child, NULL); + + ret = configfs_attach_group(&parent_group->cg_item, + &group->cg_item, child); + if (!ret) { + sd = child->d_fsdata; + sd->s_type |= CONFIGFS_USET_DEFAULT; + } else { + d_delete(child); + dput(child); + } + } + + return ret; +} + +static int populate_groups(struct config_group *group) +{ + struct config_group *new_group; + struct dentry *dentry = group->cg_item.ci_dentry; + int ret = 0; + int i; + + if (group && group->default_groups) { + /* FYI, we're faking mkdir here + * I'm not sure we need this semaphore, as we're called + * from our parent's mkdir. That holds our parent's + * i_sem, so afaik lookup cannot continue through our + * parent to find us, let alone mess with our tree. + * That said, taking our i_sem is closer to mkdir + * emulation, and shouldn't hurt. */ + down(&dentry->d_inode->i_sem); + + for (i = 0; group->default_groups[i]; i++) { + new_group = group->default_groups[i]; + + ret = create_default_group(group, new_group); + if (ret) + break; + } + + up(&dentry->d_inode->i_sem); + } + + if (ret) + detach_groups(group); + + return ret; +} + +/* + * All of link_obj/unlink_obj/link_group/unlink_group require that + * subsys->su_sem is held. + */ + +static void unlink_obj(struct config_item *item) +{ + struct config_group *group; + + group = item->ci_group; + if (group) { + list_del_init(&item->ci_entry); + + item->ci_group = NULL; + item->ci_parent = NULL; + config_item_put(item); + + config_group_put(group); + } +} + +static void link_obj(struct config_item *parent_item, struct config_item *item) +{ + /* Parent seems redundant with group, but it makes certain + * traversals much nicer. */ + item->ci_parent = parent_item; + item->ci_group = config_group_get(to_config_group(parent_item)); + list_add_tail(&item->ci_entry, &item->ci_group->cg_children); + + config_item_get(item); +} + +static void unlink_group(struct config_group *group) +{ + int i; + struct config_group *new_group; + + if (group->default_groups) { + for (i = 0; group->default_groups[i]; i++) { + new_group = group->default_groups[i]; + unlink_group(new_group); + } + } + + group->cg_subsys = NULL; + unlink_obj(&group->cg_item); +} + +static void link_group(struct config_group *parent_group, struct config_group *group) +{ + int i; + struct config_group *new_group; + struct configfs_subsystem *subsys = NULL; /* gcc is a turd */ + + link_obj(&parent_group->cg_item, &group->cg_item); + + if (parent_group->cg_subsys) + subsys = parent_group->cg_subsys; + else if (configfs_is_root(&parent_group->cg_item)) + subsys = to_configfs_subsystem(group); + else + BUG(); + group->cg_subsys = subsys; + + if (group->default_groups) { + for (i = 0; group->default_groups[i]; i++) { + new_group = group->default_groups[i]; + link_group(group, new_group); + } + } +} + +/* + * The goal is that configfs_attach_item() (and + * configfs_attach_group()) can be called from either the VFS or this + * module. That is, they assume that the items have been created, + * the dentry allocated, and the dcache is all ready to go. + * + * If they fail, they must clean up after themselves as if they + * had never been called. The caller (VFS or local function) will + * handle cleaning up the dcache bits. + * + * configfs_detach_group() and configfs_detach_item() behave similarly on + * the way out. They assume that the proper semaphores are held, they + * clean up the configfs items, and they expect their callers will + * handle the dcache bits. + */ +static int configfs_attach_item(struct config_item *parent_item, + struct config_item *item, + struct dentry *dentry) +{ + int ret; + + ret = configfs_create_dir(item, dentry); + if (!ret) { + ret = populate_attrs(item); + if (ret) { + configfs_remove_dir(item); + d_delete(dentry); + } + } + + return ret; +} + +static void configfs_detach_item(struct config_item *item) +{ + detach_attrs(item); + configfs_remove_dir(item); +} + +static int configfs_attach_group(struct config_item *parent_item, + struct config_item *item, + struct dentry *dentry) +{ + int ret; + struct configfs_dirent *sd; + + ret = configfs_attach_item(parent_item, item, dentry); + if (!ret) { + sd = dentry->d_fsdata; + sd->s_type |= CONFIGFS_USET_DIR; + + ret = populate_groups(to_config_group(item)); + if (ret) { + configfs_detach_item(item); + d_delete(dentry); + } + } + + return ret; +} + +static void configfs_detach_group(struct config_item *item) +{ + detach_groups(to_config_group(item)); + configfs_detach_item(item); +} + +/* + * Drop the initial reference from make_item()/make_group() + * This function assumes that reference is held on item + * and that item holds a valid reference to the parent. Also, it + * assumes the caller has validated ci_type. + */ +static void client_drop_item(struct config_item *parent_item, + struct config_item *item) +{ + struct config_item_type *type; + + type = parent_item->ci_type; + BUG_ON(!type); + + if (type->ct_group_ops && type->ct_group_ops->drop_item) + type->ct_group_ops->drop_item(to_config_group(parent_item), + item); + else + config_item_put(item); +} + + +static int configfs_mkdir(struct inode *dir, struct dentry *dentry, int mode) +{ + int ret; + struct config_group *group; + struct config_item *item; + struct config_item *parent_item; + struct configfs_subsystem *subsys; + struct configfs_dirent *sd; + struct config_item_type *type; + struct module *owner; + char *name; + + if (dentry->d_parent == configfs_sb->s_root) + return -EPERM; + + sd = dentry->d_parent->d_fsdata; + if (!(sd->s_type & CONFIGFS_USET_DIR)) + return -EPERM; + + parent_item = configfs_get_config_item(dentry->d_parent); + type = parent_item->ci_type; + subsys = to_config_group(parent_item)->cg_subsys; + BUG_ON(!subsys); + + if (!type || !type->ct_group_ops || + (!type->ct_group_ops->make_group && + !type->ct_group_ops->make_item)) { + config_item_put(parent_item); + return -EPERM; /* What lack-of-mkdir returns */ + } + + name = kmalloc(dentry->d_name.len + 1, GFP_KERNEL); + if (!name) { + config_item_put(parent_item); + return -ENOMEM; + } + snprintf(name, dentry->d_name.len + 1, "%s", dentry->d_name.name); + + down(&subsys->su_sem); + group = NULL; + item = NULL; + if (type->ct_group_ops->make_group) { + group = type->ct_group_ops->make_group(to_config_group(parent_item), name); + if (group) { + link_group(to_config_group(parent_item), group); + item = &group->cg_item; + } + } else { + item = type->ct_group_ops->make_item(to_config_group(parent_item), name); + if (item) + link_obj(parent_item, item); + } + up(&subsys->su_sem); + + kfree(name); + if (!item) { + config_item_put(parent_item); + return -ENOMEM; + } + + ret = -EINVAL; + type = item->ci_type; + if (type) { + owner = type->ct_owner; + if (try_module_get(owner)) { + if (group) { + ret = configfs_attach_group(parent_item, + item, + dentry); + } else { + ret = configfs_attach_item(parent_item, + item, + dentry); + } + + if (ret) { + down(&subsys->su_sem); + if (group) + unlink_group(group); + else + unlink_obj(item); + client_drop_item(parent_item, item); + up(&subsys->su_sem); + + config_item_put(parent_item); + module_put(owner); + } + } + } + + return ret; +} + +static int configfs_rmdir(struct inode *dir, struct dentry *dentry) +{ + struct config_item *parent_item; + struct config_item *item; + struct configfs_subsystem *subsys; + struct configfs_dirent *sd; + struct module *owner = NULL; + int ret; + + if (dentry->d_parent == configfs_sb->s_root) + return -EPERM; + + sd = dentry->d_fsdata; + if (sd->s_type & CONFIGFS_USET_DEFAULT) + return -EPERM; + + parent_item = configfs_get_config_item(dentry->d_parent); + subsys = to_config_group(parent_item)->cg_subsys; + BUG_ON(!subsys); + + if (!parent_item->ci_type) { + config_item_put(parent_item); + return -EINVAL; + } + + ret = configfs_detach_prep(dentry); + if (ret) { + configfs_detach_rollback(dentry); + config_item_put(parent_item); + return ret; + } + + item = configfs_get_config_item(dentry); + + /* Drop reference from above, item already holds one. */ + config_item_put(parent_item); + + if (item->ci_type) + owner = item->ci_type->ct_owner; + + if (sd->s_type & CONFIGFS_USET_DIR) { + configfs_detach_group(item); + + down(&subsys->su_sem); + unlink_group(to_config_group(item)); + } else { + configfs_detach_item(item); + + down(&subsys->su_sem); + unlink_obj(item); + } + + client_drop_item(parent_item, item); + up(&subsys->su_sem); + + /* Drop our reference from above */ + config_item_put(item); + + module_put(owner); + + return 0; +} + +struct inode_operations configfs_dir_inode_operations = { + .mkdir = configfs_mkdir, + .rmdir = configfs_rmdir, + .symlink = configfs_symlink, + .unlink = configfs_unlink, + .lookup = configfs_lookup, +}; + +#if 0 +int configfs_rename_dir(struct config_item * item, const char *new_name) +{ + int error = 0; + struct dentry * new_dentry, * parent; + + if (!strcmp(config_item_name(item), new_name)) + return -EINVAL; + + if (!item->parent) + return -EINVAL; + + down_write(&configfs_rename_sem); + parent = item->parent->dentry; + + down(&parent->d_inode->i_sem); + + new_dentry = lookup_one_len(new_name, parent, strlen(new_name)); + if (!IS_ERR(new_dentry)) { + if (!new_dentry->d_inode) { + error = config_item_set_name(item, "%s", new_name); + if (!error) { + d_add(new_dentry, NULL); + d_move(item->dentry, new_dentry); + } + else + d_delete(new_dentry); + } else + error = -EEXIST; + dput(new_dentry); + } + up(&parent->d_inode->i_sem); + up_write(&configfs_rename_sem); + + return error; +} +#endif + +static int configfs_dir_open(struct inode *inode, struct file *file) +{ + struct dentry * dentry = file->f_dentry; + struct configfs_dirent * parent_sd = dentry->d_fsdata; + + down(&dentry->d_inode->i_sem); + file->private_data = configfs_new_dirent(parent_sd, NULL); + up(&dentry->d_inode->i_sem); + + return file->private_data ? 0 : -ENOMEM; + +} + +static int configfs_dir_close(struct inode *inode, struct file *file) +{ + struct dentry * dentry = file->f_dentry; + struct configfs_dirent * cursor = file->private_data; + + down(&dentry->d_inode->i_sem); + list_del_init(&cursor->s_sibling); + up(&dentry->d_inode->i_sem); + + release_configfs_dirent(cursor); + + return 0; +} + +/* Relationship between s_mode and the DT_xxx types */ +static inline unsigned char dt_type(struct configfs_dirent *sd) +{ + return (sd->s_mode >> 12) & 15; +} + +static int configfs_readdir(struct file * filp, void * dirent, filldir_t filldir) +{ + struct dentry *dentry = filp->f_dentry; + struct configfs_dirent * parent_sd = dentry->d_fsdata; + struct configfs_dirent *cursor = filp->private_data; + struct list_head *p, *q = &cursor->s_sibling; + ino_t ino; + int i = filp->f_pos; + + switch (i) { + case 0: + ino = dentry->d_inode->i_ino; + if (filldir(dirent, ".", 1, i, ino, DT_DIR) < 0) + break; + filp->f_pos++; + i++; + /* fallthrough */ + case 1: + ino = parent_ino(dentry); + if (filldir(dirent, "..", 2, i, ino, DT_DIR) < 0) + break; + filp->f_pos++; + i++; + /* fallthrough */ + default: + if (filp->f_pos == 2) { + list_del(q); + list_add(q, &parent_sd->s_children); + } + for (p=q->next; p!= &parent_sd->s_children; p=p->next) { + struct configfs_dirent *next; + const char * name; + int len; + + next = list_entry(p, struct configfs_dirent, + s_sibling); + if (!next->s_element) + continue; + + name = configfs_get_name(next); + len = strlen(name); + if (next->s_dentry) + ino = next->s_dentry->d_inode->i_ino; + else + ino = iunique(configfs_sb, 2); + + if (filldir(dirent, name, len, filp->f_pos, ino, + dt_type(next)) < 0) + return 0; + + list_del(q); + list_add(q, p); + p = q; + filp->f_pos++; + } + } + return 0; +} + +static loff_t configfs_dir_lseek(struct file * file, loff_t offset, int origin) +{ + struct dentry * dentry = file->f_dentry; + + down(&dentry->d_inode->i_sem); + switch (origin) { + case 1: + offset += file->f_pos; + case 0: + if (offset >= 0) + break; + default: + up(&file->f_dentry->d_inode->i_sem); + return -EINVAL; + } + if (offset != file->f_pos) { + file->f_pos = offset; + if (file->f_pos >= 2) { + struct configfs_dirent *sd = dentry->d_fsdata; + struct configfs_dirent *cursor = file->private_data; + struct list_head *p; + loff_t n = file->f_pos - 2; + + list_del(&cursor->s_sibling); + p = sd->s_children.next; + while (n && p != &sd->s_children) { + struct configfs_dirent *next; + next = list_entry(p, struct configfs_dirent, + s_sibling); + if (next->s_element) + n--; + p = p->next; + } + list_add_tail(&cursor->s_sibling, p); + } + } + up(&dentry->d_inode->i_sem); + return offset; +} + +struct file_operations configfs_dir_operations = { + .open = configfs_dir_open, + .release = configfs_dir_close, + .llseek = configfs_dir_lseek, + .read = generic_read_dir, + .readdir = configfs_readdir, +}; + +int configfs_register_subsystem(struct configfs_subsystem *subsys) +{ + int err; + struct config_group *group = &subsys->su_group; + struct qstr name; + struct dentry *dentry; + struct configfs_dirent *sd; + + err = configfs_pin_fs(); + if (err) + return err; + + if (!group->cg_item.ci_name) + group->cg_item.ci_name = group->cg_item.ci_namebuf; + + sd = configfs_sb->s_root->d_fsdata; + link_group(to_config_group(sd->s_element), group); + + down(&configfs_sb->s_root->d_inode->i_sem); + + name.name = group->cg_item.ci_name; + name.len = strlen(name.name); + name.hash = full_name_hash(name.name, name.len); + + err = -ENOMEM; + dentry = d_alloc(configfs_sb->s_root, &name); + if (!dentry) + goto out_release; + + d_add(dentry, NULL); + + err = configfs_attach_group(sd->s_element, &group->cg_item, + dentry); + if (!err) + dentry = NULL; + else + d_delete(dentry); + + up(&configfs_sb->s_root->d_inode->i_sem); + + if (dentry) { + dput(dentry); +out_release: + unlink_group(group); + configfs_release_fs(); + } + + return err; +} + +void configfs_unregister_subsystem(struct configfs_subsystem *subsys) +{ + struct config_group *group = &subsys->su_group; + struct dentry *dentry = group->cg_item.ci_dentry; + + if (dentry->d_parent != configfs_sb->s_root) { + printk(KERN_ERR "configfs: Tried to unregister non-subsystem!\n"); + return; + } + + down(&configfs_sb->s_root->d_inode->i_sem); + down(&dentry->d_inode->i_sem); + if (configfs_detach_prep(dentry)) { + printk(KERN_ERR "configfs: Tried to unregister non-empty subsystem!\n"); + } + configfs_detach_group(&group->cg_item); + dentry->d_inode->i_flags |= S_DEAD; + up(&dentry->d_inode->i_sem); + + d_delete(dentry); + + up(&configfs_sb->s_root->d_inode->i_sem); + + dput(dentry); + + unlink_group(group); + configfs_release_fs(); +} + +EXPORT_SYMBOL(configfs_register_subsystem); +EXPORT_SYMBOL(configfs_unregister_subsystem); diff --git a/fs/configfs/file.c b/fs/configfs/file.c new file mode 100644 index 000000000000..af1ffc9a15c0 --- /dev/null +++ b/fs/configfs/file.c @@ -0,0 +1,360 @@ +/* -*- mode: c; c-basic-offset: 8; -*- + * vim: noexpandtab sw=8 ts=8 sts=0: + * + * file.c - operations for regular (text) files. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public + * License as published by the Free Software Foundation; either + * version 2 of the License, or (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License for more details. + * + * You should have received a copy of the GNU General Public + * License along with this program; if not, write to the + * Free Software Foundation, Inc., 59 Temple Place - Suite 330, + * Boston, MA 021110-1307, USA. + * + * Based on sysfs: + * sysfs is Copyright (C) 2001, 2002, 2003 Patrick Mochel + * + * configfs Copyright (C) 2005 Oracle. All rights reserved. + */ + +#include +#include +#include +#include +#include +#include + +#include +#include "configfs_internal.h" + + +struct configfs_buffer { + size_t count; + loff_t pos; + char * page; + struct configfs_item_operations * ops; + struct semaphore sem; + int needs_read_fill; +}; + + +/** + * fill_read_buffer - allocate and fill buffer from item. + * @dentry: dentry pointer. + * @buffer: data buffer for file. + * + * Allocate @buffer->page, if it hasn't been already, then call the + * config_item's show() method to fill the buffer with this attribute's + * data. + * This is called only once, on the file's first read. + */ +static int fill_read_buffer(struct dentry * dentry, struct configfs_buffer * buffer) +{ + struct configfs_attribute * attr = to_attr(dentry); + struct config_item * item = to_item(dentry->d_parent); + struct configfs_item_operations * ops = buffer->ops; + int ret = 0; + ssize_t count; + + if (!buffer->page) + buffer->page = (char *) get_zeroed_page(GFP_KERNEL); + if (!buffer->page) + return -ENOMEM; + + count = ops->show_attribute(item,attr,buffer->page); + buffer->needs_read_fill = 0; + BUG_ON(count > (ssize_t)PAGE_SIZE); + if (count >= 0) + buffer->count = count; + else + ret = count; + return ret; +} + + +/** + * flush_read_buffer - push buffer to userspace. + * @buffer: data buffer for file. + * @userbuf: user-passed buffer. + * @count: number of bytes requested. + * @ppos: file position. + * + * Copy the buffer we filled in fill_read_buffer() to userspace. + * This is done at the reader's leisure, copying and advancing + * the amount they specify each time. + * This may be called continuously until the buffer is empty. + */ +static int flush_read_buffer(struct configfs_buffer * buffer, char __user * buf, + size_t count, loff_t * ppos) +{ + int error; + + if (*ppos > buffer->count) + return 0; + + if (count > (buffer->count - *ppos)) + count = buffer->count - *ppos; + + error = copy_to_user(buf,buffer->page + *ppos,count); + if (!error) + *ppos += count; + return error ? -EFAULT : count; +} + +/** + * configfs_read_file - read an attribute. + * @file: file pointer. + * @buf: buffer to fill. + * @count: number of bytes to read. + * @ppos: starting offset in file. + * + * Userspace wants to read an attribute file. The attribute descriptor + * is in the file's ->d_fsdata. The target item is in the directory's + * ->d_fsdata. + * + * We call fill_read_buffer() to allocate and fill the buffer from the + * item's show() method exactly once (if the read is happening from + * the beginning of the file). That should fill the entire buffer with + * all the data the item has to offer for that attribute. + * We then call flush_read_buffer() to copy the buffer to userspace + * in the increments specified. + */ + +static ssize_t +configfs_read_file(struct file *file, char __user *buf, size_t count, loff_t *ppos) +{ + struct configfs_buffer * buffer = file->private_data; + ssize_t retval = 0; + + down(&buffer->sem); + if (buffer->needs_read_fill) { + if ((retval = fill_read_buffer(file->f_dentry,buffer))) + goto out; + } + pr_debug("%s: count = %d, ppos = %lld, buf = %s\n", + __FUNCTION__,count,*ppos,buffer->page); + retval = flush_read_buffer(buffer,buf,count,ppos); +out: + up(&buffer->sem); + return retval; +} + + +/** + * fill_write_buffer - copy buffer from userspace. + * @buffer: data buffer for file. + * @userbuf: data from user. + * @count: number of bytes in @userbuf. + * + * Allocate @buffer->page if it hasn't been already, then + * copy the user-supplied buffer into it. + */ + +static int +fill_write_buffer(struct configfs_buffer * buffer, const char __user * buf, size_t count) +{ + int error; + + if (!buffer->page) + buffer->page = (char *)get_zeroed_page(GFP_KERNEL); + if (!buffer->page) + return -ENOMEM; + + if (count > PAGE_SIZE) + count = PAGE_SIZE; + error = copy_from_user(buffer->page,buf,count); + buffer->needs_read_fill = 1; + return error ? -EFAULT : count; +} + + +/** + * flush_write_buffer - push buffer to config_item. + * @file: file pointer. + * @buffer: data buffer for file. + * + * Get the correct pointers for the config_item and the attribute we're + * dealing with, then call the store() method for the attribute, + * passing the buffer that we acquired in fill_write_buffer(). + */ + +static int +flush_write_buffer(struct dentry * dentry, struct configfs_buffer * buffer, size_t count) +{ + struct configfs_attribute * attr = to_attr(dentry); + struct config_item * item = to_item(dentry->d_parent); + struct configfs_item_operations * ops = buffer->ops; + + return ops->store_attribute(item,attr,buffer->page,count); +} + + +/** + * configfs_write_file - write an attribute. + * @file: file pointer + * @buf: data to write + * @count: number of bytes + * @ppos: starting offset + * + * Similar to configfs_read_file(), though working in the opposite direction. + * We allocate and fill the data from the user in fill_write_buffer(), + * then push it to the config_item in flush_write_buffer(). + * There is no easy way for us to know if userspace is only doing a partial + * write, so we don't support them. We expect the entire buffer to come + * on the first write. + * Hint: if you're writing a value, first read the file, modify only the + * the value you're changing, then write entire buffer back. + */ + +static ssize_t +configfs_write_file(struct file *file, const char __user *buf, size_t count, loff_t *ppos) +{ + struct configfs_buffer * buffer = file->private_data; + + down(&buffer->sem); + count = fill_write_buffer(buffer,buf,count); + if (count > 0) + count = flush_write_buffer(file->f_dentry,buffer,count); + if (count > 0) + *ppos += count; + up(&buffer->sem); + return count; +} + +static int check_perm(struct inode * inode, struct file * file) +{ + struct config_item *item = configfs_get_config_item(file->f_dentry->d_parent); + struct configfs_attribute * attr = to_attr(file->f_dentry); + struct configfs_buffer * buffer; + struct configfs_item_operations * ops = NULL; + int error = 0; + + if (!item || !attr) + goto Einval; + + /* Grab the module reference for this attribute if we have one */ + if (!try_module_get(attr->ca_owner)) { + error = -ENODEV; + goto Done; + } + + if (item->ci_type) + ops = item->ci_type->ct_item_ops; + else + goto Eaccess; + + /* File needs write support. + * The inode's perms must say it's ok, + * and we must have a store method. + */ + if (file->f_mode & FMODE_WRITE) { + + if (!(inode->i_mode & S_IWUGO) || !ops->store_attribute) + goto Eaccess; + + } + + /* File needs read support. + * The inode's perms must say it's ok, and we there + * must be a show method for it. + */ + if (file->f_mode & FMODE_READ) { + if (!(inode->i_mode & S_IRUGO) || !ops->show_attribute) + goto Eaccess; + } + + /* No error? Great, allocate a buffer for the file, and store it + * it in file->private_data for easy access. + */ + buffer = kmalloc(sizeof(struct configfs_buffer),GFP_KERNEL); + if (buffer) { + memset(buffer,0,sizeof(struct configfs_buffer)); + init_MUTEX(&buffer->sem); + buffer->needs_read_fill = 1; + buffer->ops = ops; + file->private_data = buffer; + } else + error = -ENOMEM; + goto Done; + + Einval: + error = -EINVAL; + goto Done; + Eaccess: + error = -EACCES; + module_put(attr->ca_owner); + Done: + if (error && item) + config_item_put(item); + return error; +} + +static int configfs_open_file(struct inode * inode, struct file * filp) +{ + return check_perm(inode,filp); +} + +static int configfs_release(struct inode * inode, struct file * filp) +{ + struct config_item * item = to_item(filp->f_dentry->d_parent); + struct configfs_attribute * attr = to_attr(filp->f_dentry); + struct module * owner = attr->ca_owner; + struct configfs_buffer * buffer = filp->private_data; + + if (item) + config_item_put(item); + /* After this point, attr should not be accessed. */ + module_put(owner); + + if (buffer) { + if (buffer->page) + free_page((unsigned long)buffer->page); + kfree(buffer); + } + return 0; +} + +struct file_operations configfs_file_operations = { + .read = configfs_read_file, + .write = configfs_write_file, + .llseek = generic_file_llseek, + .open = configfs_open_file, + .release = configfs_release, +}; + + +int configfs_add_file(struct dentry * dir, const struct configfs_attribute * attr, int type) +{ + struct configfs_dirent * parent_sd = dir->d_fsdata; + umode_t mode = (attr->ca_mode & S_IALLUGO) | S_IFREG; + int error = 0; + + down(&dir->d_inode->i_sem); + error = configfs_make_dirent(parent_sd, NULL, (void *) attr, mode, type); + up(&dir->d_inode->i_sem); + + return error; +} + + +/** + * configfs_create_file - create an attribute file for an item. + * @item: item we're creating for. + * @attr: atrribute descriptor. + */ + +int configfs_create_file(struct config_item * item, const struct configfs_attribute * attr) +{ + BUG_ON(!item || !item->ci_dentry || !attr); + + return configfs_add_file(item->ci_dentry, attr, + CONFIGFS_ITEM_ATTR); +} + diff --git a/fs/configfs/inode.c b/fs/configfs/inode.c new file mode 100644 index 000000000000..6b274c6d428f --- /dev/null +++ b/fs/configfs/inode.c @@ -0,0 +1,162 @@ +/* -*- mode: c; c-basic-offset: 8; -*- + * vim: noexpandtab sw=8 ts=8 sts=0: + * + * inode.c - basic inode and dentry operations. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public + * License as published by the Free Software Foundation; either + * version 2 of the License, or (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License for more details. + * + * You should have received a copy of the GNU General Public + * License along with this program; if not, write to the + * Free Software Foundation, Inc., 59 Temple Place - Suite 330, + * Boston, MA 021110-1307, USA. + * + * Based on sysfs: + * sysfs is Copyright (C) 2001, 2002, 2003 Patrick Mochel + * + * configfs Copyright (C) 2005 Oracle. All rights reserved. + * + * Please see Documentation/filesystems/configfs.txt for more information. + */ + +#undef DEBUG + +#include +#include +#include + +#include +#include "configfs_internal.h" + +extern struct super_block * configfs_sb; + +static struct address_space_operations configfs_aops = { + .readpage = simple_readpage, + .prepare_write = simple_prepare_write, + .commit_write = simple_commit_write +}; + +static struct backing_dev_info configfs_backing_dev_info = { + .ra_pages = 0, /* No readahead */ + .capabilities = BDI_CAP_NO_ACCT_DIRTY | BDI_CAP_NO_WRITEBACK, +}; + +struct inode * configfs_new_inode(mode_t mode) +{ + struct inode * inode = new_inode(configfs_sb); + if (inode) { + inode->i_mode = mode; + inode->i_uid = 0; + inode->i_gid = 0; + inode->i_blksize = PAGE_CACHE_SIZE; + inode->i_blocks = 0; + inode->i_atime = inode->i_mtime = inode->i_ctime = CURRENT_TIME; + inode->i_mapping->a_ops = &configfs_aops; + inode->i_mapping->backing_dev_info = &configfs_backing_dev_info; + } + return inode; +} + +int configfs_create(struct dentry * dentry, int mode, int (*init)(struct inode *)) +{ + int error = 0; + struct inode * inode = NULL; + if (dentry) { + if (!dentry->d_inode) { + if ((inode = configfs_new_inode(mode))) { + if (dentry->d_parent && dentry->d_parent->d_inode) { + struct inode *p_inode = dentry->d_parent->d_inode; + p_inode->i_mtime = p_inode->i_ctime = CURRENT_TIME; + } + goto Proceed; + } + else + error = -ENOMEM; + } else + error = -EEXIST; + } else + error = -ENOENT; + goto Done; + + Proceed: + if (init) + error = init(inode); + if (!error) { + d_instantiate(dentry, inode); + if (S_ISDIR(mode) || S_ISLNK(mode)) + dget(dentry); /* pin link and directory dentries in core */ + } else + iput(inode); + Done: + return error; +} + +/* + * Get the name for corresponding element represented by the given configfs_dirent + */ +const unsigned char * configfs_get_name(struct configfs_dirent *sd) +{ + struct attribute * attr; + + if (!sd || !sd->s_element) + BUG(); + + /* These always have a dentry, so use that */ + if (sd->s_type & (CONFIGFS_DIR | CONFIGFS_ITEM_LINK)) + return sd->s_dentry->d_name.name; + + if (sd->s_type & CONFIGFS_ITEM_ATTR) { + attr = sd->s_element; + return attr->name; + } + return NULL; +} + + +/* + * Unhashes the dentry corresponding to given configfs_dirent + * Called with parent inode's i_sem held. + */ +void configfs_drop_dentry(struct configfs_dirent * sd, struct dentry * parent) +{ + struct dentry * dentry = sd->s_dentry; + + if (dentry) { + spin_lock(&dcache_lock); + if (!(d_unhashed(dentry) && dentry->d_inode)) { + dget_locked(dentry); + __d_drop(dentry); + spin_unlock(&dcache_lock); + simple_unlink(parent->d_inode, dentry); + } else + spin_unlock(&dcache_lock); + } +} + +void configfs_hash_and_remove(struct dentry * dir, const char * name) +{ + struct configfs_dirent * sd; + struct configfs_dirent * parent_sd = dir->d_fsdata; + + down(&dir->d_inode->i_sem); + list_for_each_entry(sd, &parent_sd->s_children, s_sibling) { + if (!sd->s_element) + continue; + if (!strcmp(configfs_get_name(sd), name)) { + list_del_init(&sd->s_sibling); + configfs_drop_dentry(sd, dir); + configfs_put(sd); + break; + } + } + up(&dir->d_inode->i_sem); +} + + diff --git a/fs/configfs/item.c b/fs/configfs/item.c new file mode 100644 index 000000000000..e07485ac50ad --- /dev/null +++ b/fs/configfs/item.c @@ -0,0 +1,227 @@ +/* -*- mode: c; c-basic-offset: 8; -*- + * vim: noexpandtab sw=8 ts=8 sts=0: + * + * item.c - library routines for handling generic config items + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public + * License as published by the Free Software Foundation; either + * version 2 of the License, or (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License for more details. + * + * You should have received a copy of the GNU General Public + * License along with this program; if not, write to the + * Free Software Foundation, Inc., 59 Temple Place - Suite 330, + * Boston, MA 021110-1307, USA. + * + * Based on kobject: + * kobject is Copyright (c) 2002-2003 Patrick Mochel + * + * configfs Copyright (C) 2005 Oracle. All rights reserved. + * + * Please see the file Documentation/filesystems/configfs.txt for + * critical information about using the config_item interface. + */ + +#include +#include +#include +#include + +#include + + +static inline struct config_item * to_item(struct list_head * entry) +{ + return container_of(entry,struct config_item,ci_entry); +} + +/* Evil kernel */ +static void config_item_release(struct kref *kref); + +/** + * config_item_init - initialize item. + * @item: item in question. + */ +void config_item_init(struct config_item * item) +{ + kref_init(&item->ci_kref); + INIT_LIST_HEAD(&item->ci_entry); +} + +/** + * config_item_set_name - Set the name of an item + * @item: item. + * @name: name. + * + * If strlen(name) >= CONFIGFS_ITEM_NAME_LEN, then use a + * dynamically allocated string that @item->ci_name points to. + * Otherwise, use the static @item->ci_namebuf array. + */ + +int config_item_set_name(struct config_item * item, const char * fmt, ...) +{ + int error = 0; + int limit = CONFIGFS_ITEM_NAME_LEN; + int need; + va_list args; + char * name; + + /* + * First, try the static array + */ + va_start(args,fmt); + need = vsnprintf(item->ci_namebuf,limit,fmt,args); + va_end(args); + if (need < limit) + name = item->ci_namebuf; + else { + /* + * Need more space? Allocate it and try again + */ + limit = need + 1; + name = kmalloc(limit,GFP_KERNEL); + if (!name) { + error = -ENOMEM; + goto Done; + } + va_start(args,fmt); + need = vsnprintf(name,limit,fmt,args); + va_end(args); + + /* Still? Give up. */ + if (need >= limit) { + kfree(name); + error = -EFAULT; + goto Done; + } + } + + /* Free the old name, if necessary. */ + if (item->ci_name && item->ci_name != item->ci_namebuf) + kfree(item->ci_name); + + /* Now, set the new name */ + item->ci_name = name; + Done: + return error; +} + +EXPORT_SYMBOL(config_item_set_name); + +void config_item_init_type_name(struct config_item *item, + const char *name, + struct config_item_type *type) +{ + config_item_set_name(item, name); + item->ci_type = type; + config_item_init(item); +} +EXPORT_SYMBOL(config_item_init_type_name); + +void config_group_init_type_name(struct config_group *group, const char *name, + struct config_item_type *type) +{ + config_item_set_name(&group->cg_item, name); + group->cg_item.ci_type = type; + config_group_init(group); +} +EXPORT_SYMBOL(config_group_init_type_name); + +struct config_item * config_item_get(struct config_item * item) +{ + if (item) + kref_get(&item->ci_kref); + return item; +} + +/** + * config_item_cleanup - free config_item resources. + * @item: item. + */ + +void config_item_cleanup(struct config_item * item) +{ + struct config_item_type * t = item->ci_type; + struct config_group * s = item->ci_group; + struct config_item * parent = item->ci_parent; + + pr_debug("config_item %s: cleaning up\n",config_item_name(item)); + if (item->ci_name != item->ci_namebuf) + kfree(item->ci_name); + item->ci_name = NULL; + if (t && t->ct_item_ops && t->ct_item_ops->release) + t->ct_item_ops->release(item); + if (s) + config_group_put(s); + if (parent) + config_item_put(parent); +} + +static void config_item_release(struct kref *kref) +{ + config_item_cleanup(container_of(kref, struct config_item, ci_kref)); +} + +/** + * config_item_put - decrement refcount for item. + * @item: item. + * + * Decrement the refcount, and if 0, call config_item_cleanup(). + */ +void config_item_put(struct config_item * item) +{ + if (item) + kref_put(&item->ci_kref, config_item_release); +} + + +/** + * config_group_init - initialize a group for use + * @k: group + */ + +void config_group_init(struct config_group *group) +{ + config_item_init(&group->cg_item); + INIT_LIST_HEAD(&group->cg_children); +} + + +/** + * config_group_find_obj - search for item in group. + * @group: group we're looking in. + * @name: item's name. + * + * Lock group via @group->cg_subsys, and iterate over @group->cg_list, + * looking for a matching config_item. If matching item is found + * take a reference and return the item. + */ + +struct config_item * config_group_find_obj(struct config_group * group, const char * name) +{ + struct list_head * entry; + struct config_item * ret = NULL; + + /* XXX LOCKING! */ + list_for_each(entry,&group->cg_children) { + struct config_item * item = to_item(entry); + if (config_item_name(item) && + !strcmp(config_item_name(item), name)) { + ret = config_item_get(item); + break; + } + } + return ret; +} + + +EXPORT_SYMBOL(config_item_init); +EXPORT_SYMBOL(config_group_init); +EXPORT_SYMBOL(config_item_get); +EXPORT_SYMBOL(config_item_put); + diff --git a/fs/configfs/mount.c b/fs/configfs/mount.c new file mode 100644 index 000000000000..1a2f6f6a4d91 --- /dev/null +++ b/fs/configfs/mount.c @@ -0,0 +1,159 @@ +/* -*- mode: c; c-basic-offset: 8; -*- + * vim: noexpandtab sw=8 ts=8 sts=0: + * + * mount.c - operations for initializing and mounting configfs. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public + * License as published by the Free Software Foundation; either + * version 2 of the License, or (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License for more details. + * + * You should have received a copy of the GNU General Public + * License along with this program; if not, write to the + * Free Software Foundation, Inc., 59 Temple Place - Suite 330, + * Boston, MA 021110-1307, USA. + * + * Based on sysfs: + * sysfs is Copyright (C) 2001, 2002, 2003 Patrick Mochel + * + * configfs Copyright (C) 2005 Oracle. All rights reserved. + */ + +#include +#include +#include +#include +#include + +#include +#include "configfs_internal.h" + +/* Random magic number */ +#define CONFIGFS_MAGIC 0x62656570 + +struct vfsmount * configfs_mount = NULL; +struct super_block * configfs_sb = NULL; +static int configfs_mnt_count = 0; + +static struct super_operations configfs_ops = { + .statfs = simple_statfs, + .drop_inode = generic_delete_inode, +}; + +static struct config_group configfs_root_group = { + .cg_item = { + .ci_namebuf = "root", + .ci_name = configfs_root_group.cg_item.ci_namebuf, + }, +}; + +int configfs_is_root(struct config_item *item) +{ + return item == &configfs_root_group.cg_item; +} + +static struct configfs_dirent configfs_root = { + .s_sibling = LIST_HEAD_INIT(configfs_root.s_sibling), + .s_children = LIST_HEAD_INIT(configfs_root.s_children), + .s_element = &configfs_root_group.cg_item, + .s_type = CONFIGFS_ROOT, +}; + +static int configfs_fill_super(struct super_block *sb, void *data, int silent) +{ + struct inode *inode; + struct dentry *root; + + sb->s_blocksize = PAGE_CACHE_SIZE; + sb->s_blocksize_bits = PAGE_CACHE_SHIFT; + sb->s_magic = CONFIGFS_MAGIC; + sb->s_op = &configfs_ops; + configfs_sb = sb; + + inode = configfs_new_inode(S_IFDIR | S_IRWXU | S_IRUGO | S_IXUGO); + if (inode) { + inode->i_op = &configfs_dir_inode_operations; + inode->i_fop = &configfs_dir_operations; + /* directory inodes start off with i_nlink == 2 (for "." entry) */ + inode->i_nlink++; + } else { + pr_debug("configfs: could not get root inode\n"); + return -ENOMEM; + } + + root = d_alloc_root(inode); + if (!root) { + pr_debug("%s: could not get root dentry!\n",__FUNCTION__); + iput(inode); + return -ENOMEM; + } + config_group_init(&configfs_root_group); + configfs_root_group.cg_item.ci_dentry = root; + root->d_fsdata = &configfs_root; + sb->s_root = root; + return 0; +} + +static struct super_block *configfs_get_sb(struct file_system_type *fs_type, + int flags, const char *dev_name, void *data) +{ + return get_sb_single(fs_type, flags, data, configfs_fill_super); +} + +static struct file_system_type configfs_fs_type = { + .owner = THIS_MODULE, + .name = "configfs", + .get_sb = configfs_get_sb, + .kill_sb = kill_litter_super, +}; + +int configfs_pin_fs(void) +{ + return simple_pin_fs("configfs", &configfs_mount, + &configfs_mnt_count); +} + +void configfs_release_fs(void) +{ + simple_release_fs(&configfs_mount, &configfs_mnt_count); +} + + +static decl_subsys(config, NULL, NULL); + +static int __init configfs_init(void) +{ + int err; + + kset_set_kset_s(&config_subsys, kernel_subsys); + err = subsystem_register(&config_subsys); + if (err) + return err; + + err = register_filesystem(&configfs_fs_type); + if (err) { + printk(KERN_ERR "configfs: Unable to register filesystem!\n"); + subsystem_unregister(&config_subsys); + } + + return err; +} + +static void __exit configfs_exit(void) +{ + unregister_filesystem(&configfs_fs_type); + subsystem_unregister(&config_subsys); +} + +MODULE_AUTHOR("Oracle"); +MODULE_LICENSE("GPL"); +MODULE_VERSION("0.0.1"); +MODULE_DESCRIPTION("Simple RAM filesystem for user driven kernel subsystem configuration."); + +module_init(configfs_init); +module_exit(configfs_exit); diff --git a/fs/configfs/symlink.c b/fs/configfs/symlink.c new file mode 100644 index 000000000000..50f5840521a9 --- /dev/null +++ b/fs/configfs/symlink.c @@ -0,0 +1,281 @@ +/* -*- mode: c; c-basic-offset: 8; -*- + * vim: noexpandtab sw=8 ts=8 sts=0: + * + * symlink.c - operations for configfs symlinks. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public + * License as published by the Free Software Foundation; either + * version 2 of the License, or (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License for more details. + * + * You should have received a copy of the GNU General Public + * License along with this program; if not, write to the + * Free Software Foundation, Inc., 59 Temple Place - Suite 330, + * Boston, MA 021110-1307, USA. + * + * Based on sysfs: + * sysfs is Copyright (C) 2001, 2002, 2003 Patrick Mochel + * + * configfs Copyright (C) 2005 Oracle. All rights reserved. + */ + +#include +#include +#include + +#include +#include "configfs_internal.h" + +static int item_depth(struct config_item * item) +{ + struct config_item * p = item; + int depth = 0; + do { depth++; } while ((p = p->ci_parent) && !configfs_is_root(p)); + return depth; +} + +static int item_path_length(struct config_item * item) +{ + struct config_item * p = item; + int length = 1; + do { + length += strlen(config_item_name(p)) + 1; + p = p->ci_parent; + } while (p && !configfs_is_root(p)); + return length; +} + +static void fill_item_path(struct config_item * item, char * buffer, int length) +{ + struct config_item * p; + + --length; + for (p = item; p && !configfs_is_root(p); p = p->ci_parent) { + int cur = strlen(config_item_name(p)); + + /* back up enough to print this bus id with '/' */ + length -= cur; + strncpy(buffer + length,config_item_name(p),cur); + *(buffer + --length) = '/'; + } +} + +static int create_link(struct config_item *parent_item, + struct config_item *item, + struct dentry *dentry) +{ + struct configfs_dirent *target_sd = item->ci_dentry->d_fsdata; + struct configfs_symlink *sl; + int ret; + + ret = -ENOMEM; + sl = kmalloc(sizeof(struct configfs_symlink), GFP_KERNEL); + if (sl) { + sl->sl_target = config_item_get(item); + /* FIXME: needs a lock, I'd bet */ + list_add(&sl->sl_list, &target_sd->s_links); + ret = configfs_create_link(sl, parent_item->ci_dentry, + dentry); + if (ret) { + list_del_init(&sl->sl_list); + config_item_put(item); + kfree(sl); + } + } + + return ret; +} + + +static int get_target(const char *symname, struct nameidata *nd, + struct config_item **target) +{ + int ret; + + ret = path_lookup(symname, LOOKUP_FOLLOW|LOOKUP_DIRECTORY, nd); + if (!ret) { + if (nd->dentry->d_sb == configfs_sb) { + *target = configfs_get_config_item(nd->dentry); + if (!*target) { + ret = -ENOENT; + path_release(nd); + } + } else + ret = -EPERM; + } + + return ret; +} + + +int configfs_symlink(struct inode *dir, struct dentry *dentry, const char *symname) +{ + int ret; + struct nameidata nd; + struct config_item *parent_item; + struct config_item *target_item; + struct config_item_type *type; + + ret = -EPERM; /* What lack-of-symlink returns */ + if (dentry->d_parent == configfs_sb->s_root) + goto out; + + parent_item = configfs_get_config_item(dentry->d_parent); + type = parent_item->ci_type; + + if (!type || !type->ct_item_ops || + !type->ct_item_ops->allow_link) + goto out_put; + + ret = get_target(symname, &nd, &target_item); + if (ret) + goto out_put; + + ret = type->ct_item_ops->allow_link(parent_item, target_item); + if (!ret) + ret = create_link(parent_item, target_item, dentry); + + config_item_put(target_item); + path_release(&nd); + +out_put: + config_item_put(parent_item); + +out: + return ret; +} + +int configfs_unlink(struct inode *dir, struct dentry *dentry) +{ + struct configfs_dirent *sd = dentry->d_fsdata; + struct configfs_symlink *sl; + struct config_item *parent_item; + struct config_item_type *type; + int ret; + + ret = -EPERM; /* What lack-of-symlink returns */ + if (!(sd->s_type & CONFIGFS_ITEM_LINK)) + goto out; + + if (dentry->d_parent == configfs_sb->s_root) + BUG(); + + sl = sd->s_element; + + parent_item = configfs_get_config_item(dentry->d_parent); + type = parent_item->ci_type; + + list_del_init(&sd->s_sibling); + configfs_drop_dentry(sd, dentry->d_parent); + dput(dentry); + configfs_put(sd); + + /* + * drop_link() must be called before + * list_del_init(&sl->sl_list), so that the order of + * drop_link(this, target) and drop_item(target) is preserved. + */ + if (type && type->ct_item_ops && + type->ct_item_ops->drop_link) + type->ct_item_ops->drop_link(parent_item, + sl->sl_target); + + /* FIXME: Needs lock */ + list_del_init(&sl->sl_list); + + /* Put reference from create_link() */ + config_item_put(sl->sl_target); + kfree(sl); + + config_item_put(parent_item); + + ret = 0; + +out: + return ret; +} + +static int configfs_get_target_path(struct config_item * item, struct config_item * target, + char *path) +{ + char * s; + int depth, size; + + depth = item_depth(item); + size = item_path_length(target) + depth * 3 - 1; + if (size > PATH_MAX) + return -ENAMETOOLONG; + + pr_debug("%s: depth = %d, size = %d\n", __FUNCTION__, depth, size); + + for (s = path; depth--; s += 3) + strcpy(s,"../"); + + fill_item_path(target, path, size); + pr_debug("%s: path = '%s'\n", __FUNCTION__, path); + + return 0; +} + +static int configfs_getlink(struct dentry *dentry, char * path) +{ + struct config_item *item, *target_item; + int error = 0; + + item = configfs_get_config_item(dentry->d_parent); + if (!item) + return -EINVAL; + + target_item = configfs_get_config_item(dentry); + if (!target_item) { + config_item_put(item); + return -EINVAL; + } + + down_read(&configfs_rename_sem); + error = configfs_get_target_path(item, target_item, path); + up_read(&configfs_rename_sem); + + config_item_put(item); + config_item_put(target_item); + return error; + +} + +static void *configfs_follow_link(struct dentry *dentry, struct nameidata *nd) +{ + int error = -ENOMEM; + unsigned long page = get_zeroed_page(GFP_KERNEL); + + if (page) { + error = configfs_getlink(dentry, (char *)page); + if (!error) { + nd_set_link(nd, (char *)page); + return (void *)page; + } + } + + nd_set_link(nd, ERR_PTR(error)); + return NULL; +} + +static void configfs_put_link(struct dentry *dentry, struct nameidata *nd, + void *cookie) +{ + if (cookie) { + unsigned long page = (unsigned long)cookie; + free_page(page); + } +} + +struct inode_operations configfs_symlink_inode_operations = { + .follow_link = configfs_follow_link, + .readlink = generic_readlink, + .put_link = configfs_put_link, +}; + diff --git a/include/linux/configfs.h b/include/linux/configfs.h new file mode 100644 index 000000000000..acffb8c9073a --- /dev/null +++ b/include/linux/configfs.h @@ -0,0 +1,205 @@ +/* -*- mode: c; c-basic-offset: 8; -*- + * vim: noexpandtab sw=8 ts=8 sts=0: + * + * configfs.h - definitions for the device driver filesystem + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public + * License as published by the Free Software Foundation; either + * version 2 of the License, or (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License for more details. + * + * You should have received a copy of the GNU General Public + * License along with this program; if not, write to the + * Free Software Foundation, Inc., 59 Temple Place - Suite 330, + * Boston, MA 021110-1307, USA. + * + * Based on sysfs: + * sysfs is Copyright (C) 2001, 2002, 2003 Patrick Mochel + * + * Based on kobject.h: + * Copyright (c) 2002-2003 Patrick Mochel + * Copyright (c) 2002-2003 Open Source Development Labs + * + * configfs Copyright (C) 2005 Oracle. All rights reserved. + * + * Please read Documentation/filesystems/configfs.txt before using the + * configfs interface, ESPECIALLY the parts about reference counts and + * item destructors. + */ + +#ifndef _CONFIGFS_H_ +#define _CONFIGFS_H_ + +#ifdef __KERNEL__ + +#include +#include +#include + +#include +#include + +#define CONFIGFS_ITEM_NAME_LEN 20 + +struct module; + +struct configfs_item_operations; +struct configfs_group_operations; +struct configfs_attribute; +struct configfs_subsystem; + +struct config_item { + char *ci_name; + char ci_namebuf[CONFIGFS_ITEM_NAME_LEN]; + struct kref ci_kref; + struct list_head ci_entry; + struct config_item *ci_parent; + struct config_group *ci_group; + struct config_item_type *ci_type; + struct dentry *ci_dentry; +}; + +extern int config_item_set_name(struct config_item *, const char *, ...); + +static inline char *config_item_name(struct config_item * item) +{ + return item->ci_name; +} + +extern void config_item_init(struct config_item *); +extern void config_item_init_type_name(struct config_item *item, + const char *name, + struct config_item_type *type); +extern void config_item_cleanup(struct config_item *); + +extern struct config_item * config_item_get(struct config_item *); +extern void config_item_put(struct config_item *); + +struct config_item_type { + struct module *ct_owner; + struct configfs_item_operations *ct_item_ops; + struct configfs_group_operations *ct_group_ops; + struct configfs_attribute **ct_attrs; +}; + + +/** + * group - a group of config_items of a specific type, belonging + * to a specific subsystem. + */ + +struct config_group { + struct config_item cg_item; + struct list_head cg_children; + struct configfs_subsystem *cg_subsys; + struct config_group **default_groups; +}; + + +extern void config_group_init(struct config_group *group); +extern void config_group_init_type_name(struct config_group *group, + const char *name, + struct config_item_type *type); + + +static inline struct config_group *to_config_group(struct config_item *item) +{ + return item ? container_of(item,struct config_group,cg_item) : NULL; +} + +static inline struct config_group *config_group_get(struct config_group *group) +{ + return group ? to_config_group(config_item_get(&group->cg_item)) : NULL; +} + +static inline void config_group_put(struct config_group *group) +{ + config_item_put(&group->cg_item); +} + +extern struct config_item *config_group_find_obj(struct config_group *, const char *); + + +struct configfs_attribute { + char *ca_name; + struct module *ca_owner; + mode_t ca_mode; +}; + + +/* + * If allow_link() exists, the item can symlink(2) out to other + * items. If the item is a group, it may support mkdir(2). + * Groups supply one of make_group() and make_item(). If the + * group supports make_group(), one can create group children. If it + * supports make_item(), one can create config_item children. If it has + * default_groups on group->default_groups, it has automatically created + * group children. default_groups may coexist alongsize make_group() or + * make_item(), but if the group wishes to have only default_groups + * children (disallowing mkdir(2)), it need not provide either function. + * If the group has commit(), it supports pending and commited (active) + * items. + */ +struct configfs_item_operations { + void (*release)(struct config_item *); + ssize_t (*show_attribute)(struct config_item *, struct configfs_attribute *,char *); + ssize_t (*store_attribute)(struct config_item *,struct configfs_attribute *,const char *, size_t); + int (*allow_link)(struct config_item *src, struct config_item *target); + int (*drop_link)(struct config_item *src, struct config_item *target); +}; + +struct configfs_group_operations { + struct config_item *(*make_item)(struct config_group *group, const char *name); + struct config_group *(*make_group)(struct config_group *group, const char *name); + int (*commit_item)(struct config_item *item); + void (*drop_item)(struct config_group *group, struct config_item *item); +}; + + + +/** + * Use these macros to make defining attributes easier. See include/linux/device.h + * for examples.. + */ + +#if 0 +#define __ATTR(_name,_mode,_show,_store) { \ + .attr = {.ca_name = __stringify(_name), .ca_mode = _mode, .ca_owner = THIS_MODULE }, \ + .show = _show, \ + .store = _store, \ +} + +#define __ATTR_RO(_name) { \ + .attr = { .ca_name = __stringify(_name), .ca_mode = 0444, .ca_owner = THIS_MODULE }, \ + .show = _name##_show, \ +} + +#define __ATTR_NULL { .attr = { .name = NULL } } + +#define attr_name(_attr) (_attr).attr.name +#endif + + +struct configfs_subsystem { + struct config_group su_group; + struct semaphore su_sem; +}; + +static inline struct configfs_subsystem *to_configfs_subsystem(struct config_group *group) +{ + return group ? + container_of(group, struct configfs_subsystem, su_group) : + NULL; +} + +int configfs_register_subsystem(struct configfs_subsystem *subsys); +void configfs_unregister_subsystem(struct configfs_subsystem *subsys); + +#endif /* __KERNEL__ */ + +#endif /* _CONFIGFS_H_ */ -- cgit v1.2.3-71-gd317 From 994fc28c7b1e697ac56befe4aecabf23f0689f46 Mon Sep 17 00:00:00 2001 From: Zach Brown Date: Thu, 15 Dec 2005 14:28:17 -0800 Subject: [PATCH] add AOP_TRUNCATED_PAGE, prepend AOP_ to WRITEPAGE_ACTIVATE readpage(), prepare_write(), and commit_write() callers are updated to understand the special return code AOP_TRUNCATED_PAGE in the style of writepage() and WRITEPAGE_ACTIVATE. AOP_TRUNCATED_PAGE tells the caller that the callee has unlocked the page and that the operation should be tried again with a new page. OCFS2 uses this to detect and work around a lock inversion in its aop methods. There should be no change in behaviour for methods that don't return AOP_TRUNCATED_PAGE. WRITEPAGE_ACTIVATE is also prepended with AOP_ for consistency and they are made enums so that kerneldoc can be used to document their semantics. Signed-off-by: Zach Brown --- drivers/block/loop.c | 23 +++++++++++---- drivers/block/rd.c | 4 +-- fs/mpage.c | 2 +- include/linux/fs.h | 31 ++++++++++++++++++++ include/linux/writeback.h | 6 ---- mm/filemap.c | 73 ++++++++++++++++++++++++++++++++--------------- mm/readahead.c | 15 ++++++---- mm/shmem.c | 2 +- mm/vmscan.c | 2 +- 9 files changed, 113 insertions(+), 45 deletions(-) (limited to 'include/linux') diff --git a/drivers/block/loop.c b/drivers/block/loop.c index 96c664af8d06..a452b13620a2 100644 --- a/drivers/block/loop.c +++ b/drivers/block/loop.c @@ -213,7 +213,7 @@ static int do_lo_send_aops(struct loop_device *lo, struct bio_vec *bvec, struct address_space_operations *aops = mapping->a_ops; pgoff_t index; unsigned offset, bv_offs; - int len, ret = 0; + int len, ret; down(&mapping->host->i_sem); index = pos >> PAGE_CACHE_SHIFT; @@ -232,9 +232,15 @@ static int do_lo_send_aops(struct loop_device *lo, struct bio_vec *bvec, page = grab_cache_page(mapping, index); if (unlikely(!page)) goto fail; - if (unlikely(aops->prepare_write(file, page, offset, - offset + size))) + ret = aops->prepare_write(file, page, offset, + offset + size); + if (unlikely(ret)) { + if (ret == AOP_TRUNCATED_PAGE) { + page_cache_release(page); + continue; + } goto unlock; + } transfer_result = lo_do_transfer(lo, WRITE, page, offset, bvec->bv_page, bv_offs, size, IV); if (unlikely(transfer_result)) { @@ -251,9 +257,15 @@ static int do_lo_send_aops(struct loop_device *lo, struct bio_vec *bvec, kunmap_atomic(kaddr, KM_USER0); } flush_dcache_page(page); - if (unlikely(aops->commit_write(file, page, offset, - offset + size))) + ret = aops->commit_write(file, page, offset, + offset + size); + if (unlikely(ret)) { + if (ret == AOP_TRUNCATED_PAGE) { + page_cache_release(page); + continue; + } goto unlock; + } if (unlikely(transfer_result)) goto unlock; bv_offs += size; @@ -264,6 +276,7 @@ static int do_lo_send_aops(struct loop_device *lo, struct bio_vec *bvec, unlock_page(page); page_cache_release(page); } + ret = 0; out: up(&mapping->host->i_sem); return ret; diff --git a/drivers/block/rd.c b/drivers/block/rd.c index 68c60a5bcdab..ffd6abd6d5a0 100644 --- a/drivers/block/rd.c +++ b/drivers/block/rd.c @@ -154,7 +154,7 @@ static int ramdisk_commit_write(struct file *file, struct page *page, /* * ->writepage to the the blockdev's mapping has to redirty the page so that the - * VM doesn't go and steal it. We return WRITEPAGE_ACTIVATE so that the VM + * VM doesn't go and steal it. We return AOP_WRITEPAGE_ACTIVATE so that the VM * won't try to (pointlessly) write the page again for a while. * * Really, these pages should not be on the LRU at all. @@ -165,7 +165,7 @@ static int ramdisk_writepage(struct page *page, struct writeback_control *wbc) make_page_uptodate(page); SetPageDirty(page); if (wbc->for_reclaim) - return WRITEPAGE_ACTIVATE; + return AOP_WRITEPAGE_ACTIVATE; unlock_page(page); return 0; } diff --git a/fs/mpage.c b/fs/mpage.c index c5adcdddf3cc..f1d2d02bd4c8 100644 --- a/fs/mpage.c +++ b/fs/mpage.c @@ -721,7 +721,7 @@ retry: &last_block_in_bio, &ret, wbc, page->mapping->a_ops->writepage); } - if (unlikely(ret == WRITEPAGE_ACTIVATE)) + if (unlikely(ret == AOP_WRITEPAGE_ACTIVATE)) unlock_page(page); if (ret || (--(wbc->nr_to_write) <= 0)) done = 1; diff --git a/include/linux/fs.h b/include/linux/fs.h index cc35b6ac778d..ed9a41a71e8b 100644 --- a/include/linux/fs.h +++ b/include/linux/fs.h @@ -302,6 +302,37 @@ struct iattr { */ #include +/** + * enum positive_aop_returns - aop return codes with specific semantics + * + * @AOP_WRITEPAGE_ACTIVATE: Informs the caller that page writeback has + * completed, that the page is still locked, and + * should be considered active. The VM uses this hint + * to return the page to the active list -- it won't + * be a candidate for writeback again in the near + * future. Other callers must be careful to unlock + * the page if they get this return. Returned by + * writepage(); + * + * @AOP_TRUNCATED_PAGE: The AOP method that was handed a locked page has + * unlocked it and the page might have been truncated. + * The caller should back up to acquiring a new page and + * trying again. The aop will be taking reasonable + * precautions not to livelock. If the caller held a page + * reference, it should drop it before retrying. Returned + * by readpage(), prepare_write(), and commit_write(). + * + * address_space_operation functions return these large constants to indicate + * special semantics to the caller. These are much larger than the bytes in a + * page to allow for functions that return the number of bytes operated on in a + * given page. + */ + +enum positive_aop_returns { + AOP_WRITEPAGE_ACTIVATE = 0x80000, + AOP_TRUNCATED_PAGE = 0x80001, +}; + /* * oh the beauties of C type declarations. */ diff --git a/include/linux/writeback.h b/include/linux/writeback.h index 343d883d69c5..64a36ba43b2f 100644 --- a/include/linux/writeback.h +++ b/include/linux/writeback.h @@ -59,12 +59,6 @@ struct writeback_control { unsigned for_reclaim:1; /* Invoked from the page allocator */ }; -/* - * ->writepage() return values (make these much larger than a pagesize, in - * case some fs is returning number-of-bytes-written from writepage) - */ -#define WRITEPAGE_ACTIVATE 0x80000 /* IO was not started: activate page */ - /* * fs/fs-writeback.c */ diff --git a/mm/filemap.c b/mm/filemap.c index 33a28bfde158..6e1d08a2b8b9 100644 --- a/mm/filemap.c +++ b/mm/filemap.c @@ -831,8 +831,13 @@ readpage: /* Start the actual read. The read will unlock the page. */ error = mapping->a_ops->readpage(filp, page); - if (unlikely(error)) + if (unlikely(error)) { + if (error == AOP_TRUNCATED_PAGE) { + page_cache_release(page); + goto find_page; + } goto readpage_error; + } if (!PageUptodate(page)) { lock_page(page); @@ -1152,26 +1157,24 @@ static int fastcall page_cache_read(struct file * file, unsigned long offset) { struct address_space *mapping = file->f_mapping; struct page *page; - int error; + int ret; - page = page_cache_alloc_cold(mapping); - if (!page) - return -ENOMEM; + do { + page = page_cache_alloc_cold(mapping); + if (!page) + return -ENOMEM; + + ret = add_to_page_cache_lru(page, mapping, offset, GFP_KERNEL); + if (ret == 0) + ret = mapping->a_ops->readpage(file, page); + else if (ret == -EEXIST) + ret = 0; /* losing race to add is OK */ - error = add_to_page_cache_lru(page, mapping, offset, GFP_KERNEL); - if (!error) { - error = mapping->a_ops->readpage(file, page); page_cache_release(page); - return error; - } - /* - * We arrive here in the unlikely event that someone - * raced with us and added our page to the cache first - * or we are out of memory for radix-tree nodes. - */ - page_cache_release(page); - return error == -EEXIST ? 0 : error; + } while (ret == AOP_TRUNCATED_PAGE); + + return ret; } #define MMAP_LOTSAMISS (100) @@ -1331,10 +1334,14 @@ page_not_uptodate: goto success; } - if (!mapping->a_ops->readpage(file, page)) { + error = mapping->a_ops->readpage(file, page); + if (!error) { wait_on_page_locked(page); if (PageUptodate(page)) goto success; + } else if (error == AOP_TRUNCATED_PAGE) { + page_cache_release(page); + goto retry_find; } /* @@ -1358,10 +1365,14 @@ page_not_uptodate: goto success; } ClearPageError(page); - if (!mapping->a_ops->readpage(file, page)) { + error = mapping->a_ops->readpage(file, page); + if (!error) { wait_on_page_locked(page); if (PageUptodate(page)) goto success; + } else if (error == AOP_TRUNCATED_PAGE) { + page_cache_release(page); + goto retry_find; } /* @@ -1444,10 +1455,14 @@ page_not_uptodate: goto success; } - if (!mapping->a_ops->readpage(file, page)) { + error = mapping->a_ops->readpage(file, page); + if (!error) { wait_on_page_locked(page); if (PageUptodate(page)) goto success; + } else if (error == AOP_TRUNCATED_PAGE) { + page_cache_release(page); + goto retry_find; } /* @@ -1470,10 +1485,14 @@ page_not_uptodate: } ClearPageError(page); - if (!mapping->a_ops->readpage(file, page)) { + error = mapping->a_ops->readpage(file, page); + if (!error) { wait_on_page_locked(page); if (PageUptodate(page)) goto success; + } else if (error == AOP_TRUNCATED_PAGE) { + page_cache_release(page); + goto retry_find; } /* @@ -1934,12 +1953,16 @@ generic_file_buffered_write(struct kiocb *iocb, const struct iovec *iov, status = a_ops->prepare_write(file, page, offset, offset+bytes); if (unlikely(status)) { loff_t isize = i_size_read(inode); + + if (status != AOP_TRUNCATED_PAGE) + unlock_page(page); + page_cache_release(page); + if (status == AOP_TRUNCATED_PAGE) + continue; /* * prepare_write() may have instantiated a few blocks * outside i_size. Trim these off again. */ - unlock_page(page); - page_cache_release(page); if (pos + bytes > isize) vmtruncate(inode, isize); break; @@ -1952,6 +1975,10 @@ generic_file_buffered_write(struct kiocb *iocb, const struct iovec *iov, cur_iov, iov_base, bytes); flush_dcache_page(page); status = a_ops->commit_write(file, page, offset, offset+bytes); + if (status == AOP_TRUNCATED_PAGE) { + page_cache_release(page); + continue; + } if (likely(copied > 0)) { if (!status) status = copied; diff --git a/mm/readahead.c b/mm/readahead.c index 72e7adbb87c7..8d6eeaaa6296 100644 --- a/mm/readahead.c +++ b/mm/readahead.c @@ -158,7 +158,7 @@ static int read_pages(struct address_space *mapping, struct file *filp, { unsigned page_idx; struct pagevec lru_pvec; - int ret = 0; + int ret; if (mapping->a_ops->readpages) { ret = mapping->a_ops->readpages(filp, mapping, pages, nr_pages); @@ -171,14 +171,17 @@ static int read_pages(struct address_space *mapping, struct file *filp, list_del(&page->lru); if (!add_to_page_cache(page, mapping, page->index, GFP_KERNEL)) { - mapping->a_ops->readpage(filp, page); - if (!pagevec_add(&lru_pvec, page)) - __pagevec_lru_add(&lru_pvec); - } else { - page_cache_release(page); + ret = mapping->a_ops->readpage(filp, page); + if (ret != AOP_TRUNCATED_PAGE) { + if (!pagevec_add(&lru_pvec, page)) + __pagevec_lru_add(&lru_pvec); + continue; + } /* else fall through to release */ } + page_cache_release(page); } pagevec_lru_add(&lru_pvec); + ret = 0; out: return ret; } diff --git a/mm/shmem.c b/mm/shmem.c index dc25565a61e9..d9fc277940da 100644 --- a/mm/shmem.c +++ b/mm/shmem.c @@ -855,7 +855,7 @@ unlock: swap_free(swap); redirty: set_page_dirty(page); - return WRITEPAGE_ACTIVATE; /* Return with the page locked */ + return AOP_WRITEPAGE_ACTIVATE; /* Return with the page locked */ } #ifdef CONFIG_NUMA diff --git a/mm/vmscan.c b/mm/vmscan.c index b0cd81c32de6..795a050fe471 100644 --- a/mm/vmscan.c +++ b/mm/vmscan.c @@ -367,7 +367,7 @@ static pageout_t pageout(struct page *page, struct address_space *mapping) res = mapping->a_ops->writepage(page, &wbc); if (res < 0) handle_write_error(mapping, page, res); - if (res == WRITEPAGE_ACTIVATE) { + if (res == AOP_WRITEPAGE_ACTIVATE) { ClearPageReclaim(page); return PAGE_ACTIVATE; } -- cgit v1.2.3-71-gd317 From df71837d5024e2524cd51c93621e558aa7dd9f3f Mon Sep 17 00:00:00 2001 From: Trent Jaeger Date: Tue, 13 Dec 2005 23:12:27 -0800 Subject: [LSM-IPSec]: Security association restriction. This patch series implements per packet access control via the extension of the Linux Security Modules (LSM) interface by hooks in the XFRM and pfkey subsystems that leverage IPSec security associations to label packets. Extensions to the SELinux LSM are included that leverage the patch for this purpose. This patch implements the changes necessary to the XFRM subsystem, pfkey interface, ipv4/ipv6, and xfrm_user interface to restrict a socket to use only authorized security associations (or no security association) to send/receive network packets. Patch purpose: The patch is designed to enable access control per packets based on the strongly authenticated IPSec security association. Such access controls augment the existing ones based on network interface and IP address. The former are very coarse-grained, and the latter can be spoofed. By using IPSec, the system can control access to remote hosts based on cryptographic keys generated using the IPSec mechanism. This enables access control on a per-machine basis or per-application if the remote machine is running the same mechanism and trusted to enforce the access control policy. Patch design approach: The overall approach is that policy (xfrm_policy) entries set by user-level programs (e.g., setkey for ipsec-tools) are extended with a security context that is used at policy selection time in the XFRM subsystem to restrict the sockets that can send/receive packets via security associations (xfrm_states) that are built from those policies. A presentation available at www.selinux-symposium.org/2005/presentations/session2/2-3-jaeger.pdf from the SELinux symposium describes the overall approach. Patch implementation details: On output, the policy retrieved (via xfrm_policy_lookup or xfrm_sk_policy_lookup) must be authorized for the security context of the socket and the same security context is required for resultant security association (retrieved or negotiated via racoon in ipsec-tools). This is enforced in xfrm_state_find. On input, the policy retrieved must also be authorized for the socket (at __xfrm_policy_check), and the security context of the policy must also match the security association being used. The patch has virtually no impact on packets that do not use IPSec. The existing Netfilter (outgoing) and LSM rcv_skb hooks are used as before. Also, if IPSec is used without security contexts, the impact is minimal. The LSM must allow such policies to be selected for the combination of socket and remote machine, but subsequent IPSec processing proceeds as in the original case. Testing: The pfkey interface is tested using the ipsec-tools. ipsec-tools have been modified (a separate ipsec-tools patch is available for version 0.5) that supports assignment of xfrm_policy entries and security associations with security contexts via setkey and the negotiation using the security contexts via racoon. The xfrm_user interface is tested via ad hoc programs that set security contexts. These programs are also available from me, and contain programs for setting, getting, and deleting policy for testing this interface. Testing of sa functions was done by tracing kernel behavior. Signed-off-by: Trent Jaeger Signed-off-by: Herbert Xu Signed-off-by: David S. Miller --- include/linux/pfkeyv2.h | 13 +++- include/linux/security.h | 132 +++++++++++++++++++++++++++++++ include/linux/xfrm.h | 29 +++++++ include/net/flow.h | 7 +- include/net/xfrm.h | 27 ++++++- net/core/flow.c | 8 +- net/key/af_key.c | 197 +++++++++++++++++++++++++++++++++++++++++++++-- net/xfrm/xfrm_policy.c | 88 +++++++++++++-------- net/xfrm/xfrm_state.c | 9 ++- net/xfrm/xfrm_user.c | 148 +++++++++++++++++++++++++++++++++-- security/Kconfig | 13 ++++ security/dummy.c | 45 ++++++++++- 12 files changed, 655 insertions(+), 61 deletions(-) (limited to 'include/linux') diff --git a/include/linux/pfkeyv2.h b/include/linux/pfkeyv2.h index 724066778aff..6351c4055ace 100644 --- a/include/linux/pfkeyv2.h +++ b/include/linux/pfkeyv2.h @@ -216,6 +216,16 @@ struct sadb_x_nat_t_port { } __attribute__((packed)); /* sizeof(struct sadb_x_nat_t_port) == 8 */ +/* Generic LSM security context */ +struct sadb_x_sec_ctx { + uint16_t sadb_x_sec_len; + uint16_t sadb_x_sec_exttype; + uint8_t sadb_x_ctx_alg; /* LSMs: e.g., selinux == 1 */ + uint8_t sadb_x_ctx_doi; + uint16_t sadb_x_ctx_len; +} __attribute__((packed)); +/* sizeof(struct sadb_sec_ctx) = 8 */ + /* Message types */ #define SADB_RESERVED 0 #define SADB_GETSPI 1 @@ -325,7 +335,8 @@ struct sadb_x_nat_t_port { #define SADB_X_EXT_NAT_T_SPORT 21 #define SADB_X_EXT_NAT_T_DPORT 22 #define SADB_X_EXT_NAT_T_OA 23 -#define SADB_EXT_MAX 23 +#define SADB_X_EXT_SEC_CTX 24 +#define SADB_EXT_MAX 24 /* Identity Extension values */ #define SADB_IDENTTYPE_RESERVED 0 diff --git a/include/linux/security.h b/include/linux/security.h index f7e0ae018712..ef753654daa5 100644 --- a/include/linux/security.h +++ b/include/linux/security.h @@ -59,6 +59,12 @@ struct sk_buff; struct sock; struct sockaddr; struct socket; +struct flowi; +struct dst_entry; +struct xfrm_selector; +struct xfrm_policy; +struct xfrm_state; +struct xfrm_user_sec_ctx; extern int cap_netlink_send(struct sock *sk, struct sk_buff *skb); extern int cap_netlink_recv(struct sk_buff *skb); @@ -788,6 +794,52 @@ struct swap_info_struct; * which is used to copy security attributes between local stream sockets. * @sk_free_security: * Deallocate security structure. + * @sk_getsid: + * Retrieve the LSM-specific sid for the sock to enable caching of network + * authorizations. + * + * Security hooks for XFRM operations. + * + * @xfrm_policy_alloc_security: + * @xp contains the xfrm_policy being added to Security Policy Database + * used by the XFRM system. + * @sec_ctx contains the security context information being provided by + * the user-level policy update program (e.g., setkey). + * Allocate a security structure to the xp->selector.security field. + * The security field is initialized to NULL when the xfrm_policy is + * allocated. + * Return 0 if operation was successful (memory to allocate, legal context) + * @xfrm_policy_clone_security: + * @old contains an existing xfrm_policy in the SPD. + * @new contains a new xfrm_policy being cloned from old. + * Allocate a security structure to the new->selector.security field + * that contains the information from the old->selector.security field. + * Return 0 if operation was successful (memory to allocate). + * @xfrm_policy_free_security: + * @xp contains the xfrm_policy + * Deallocate xp->selector.security. + * @xfrm_state_alloc_security: + * @x contains the xfrm_state being added to the Security Association + * Database by the XFRM system. + * @sec_ctx contains the security context information being provided by + * the user-level SA generation program (e.g., setkey or racoon). + * Allocate a security structure to the x->sel.security field. The + * security field is initialized to NULL when the xfrm_state is + * allocated. + * Return 0 if operation was successful (memory to allocate, legal context). + * @xfrm_state_free_security: + * @x contains the xfrm_state. + * Deallocate x>sel.security. + * @xfrm_policy_lookup: + * @xp contains the xfrm_policy for which the access control is being + * checked. + * @sk_sid contains the sock security label that is used to authorize + * access to the policy xp. + * @dir contains the direction of the flow (input or output). + * Check permission when a sock selects a xfrm_policy for processing + * XFRMs on a packet. The hook is called when selecting either a + * per-socket policy or a generic xfrm policy. + * Return 0 if permission is granted. * * Security hooks affecting all Key Management operations * @@ -1237,8 +1289,18 @@ struct security_operations { int (*socket_getpeersec) (struct socket *sock, char __user *optval, int __user *optlen, unsigned len); int (*sk_alloc_security) (struct sock *sk, int family, gfp_t priority); void (*sk_free_security) (struct sock *sk); + unsigned int (*sk_getsid) (struct sock *sk, struct flowi *fl, u8 dir); #endif /* CONFIG_SECURITY_NETWORK */ +#ifdef CONFIG_SECURITY_NETWORK_XFRM + int (*xfrm_policy_alloc_security) (struct xfrm_policy *xp, struct xfrm_user_sec_ctx *sec_ctx); + int (*xfrm_policy_clone_security) (struct xfrm_policy *old, struct xfrm_policy *new); + void (*xfrm_policy_free_security) (struct xfrm_policy *xp); + int (*xfrm_state_alloc_security) (struct xfrm_state *x, struct xfrm_user_sec_ctx *sec_ctx); + void (*xfrm_state_free_security) (struct xfrm_state *x); + int (*xfrm_policy_lookup)(struct xfrm_policy *xp, u32 sk_sid, u8 dir); +#endif /* CONFIG_SECURITY_NETWORK_XFRM */ + /* key management security hooks */ #ifdef CONFIG_KEYS int (*key_alloc)(struct key *key); @@ -2679,6 +2741,11 @@ static inline void security_sk_free(struct sock *sk) { return security_ops->sk_free_security(sk); } + +static inline unsigned int security_sk_sid(struct sock *sk, struct flowi *fl, u8 dir) +{ + return security_ops->sk_getsid(sk, fl, dir); +} #else /* CONFIG_SECURITY_NETWORK */ static inline int security_unix_stream_connect(struct socket * sock, struct socket * other, @@ -2795,8 +2862,73 @@ static inline int security_sk_alloc(struct sock *sk, int family, gfp_t priority) static inline void security_sk_free(struct sock *sk) { } + +static inline unsigned int security_sk_sid(struct sock *sk, struct flowi *fl, u8 dir) +{ + return 0; +} #endif /* CONFIG_SECURITY_NETWORK */ +#ifdef CONFIG_SECURITY_NETWORK_XFRM +static inline int security_xfrm_policy_alloc(struct xfrm_policy *xp, struct xfrm_user_sec_ctx *sec_ctx) +{ + return security_ops->xfrm_policy_alloc_security(xp, sec_ctx); +} + +static inline int security_xfrm_policy_clone(struct xfrm_policy *old, struct xfrm_policy *new) +{ + return security_ops->xfrm_policy_clone_security(old, new); +} + +static inline void security_xfrm_policy_free(struct xfrm_policy *xp) +{ + security_ops->xfrm_policy_free_security(xp); +} + +static inline int security_xfrm_state_alloc(struct xfrm_state *x, struct xfrm_user_sec_ctx *sec_ctx) +{ + return security_ops->xfrm_state_alloc_security(x, sec_ctx); +} + +static inline void security_xfrm_state_free(struct xfrm_state *x) +{ + security_ops->xfrm_state_free_security(x); +} + +static inline int security_xfrm_policy_lookup(struct xfrm_policy *xp, u32 sk_sid, u8 dir) +{ + return security_ops->xfrm_policy_lookup(xp, sk_sid, dir); +} +#else /* CONFIG_SECURITY_NETWORK_XFRM */ +static inline int security_xfrm_policy_alloc(struct xfrm_policy *xp, struct xfrm_user_sec_ctx *sec_ctx) +{ + return 0; +} + +static inline int security_xfrm_policy_clone(struct xfrm_policy *old, struct xfrm_policy *new) +{ + return 0; +} + +static inline void security_xfrm_policy_free(struct xfrm_policy *xp) +{ +} + +static inline int security_xfrm_state_alloc(struct xfrm_state *x, struct xfrm_user_sec_ctx *sec_ctx) +{ + return 0; +} + +static inline void security_xfrm_state_free(struct xfrm_state *x) +{ +} + +static inline int security_xfrm_policy_lookup(struct xfrm_policy *xp, u32 sk_sid, u8 dir) +{ + return 0; +} +#endif /* CONFIG_SECURITY_NETWORK_XFRM */ + #ifdef CONFIG_KEYS #ifdef CONFIG_SECURITY static inline int security_key_alloc(struct key *key) diff --git a/include/linux/xfrm.h b/include/linux/xfrm.h index 0fb077d68441..82fbb758e28f 100644 --- a/include/linux/xfrm.h +++ b/include/linux/xfrm.h @@ -27,6 +27,22 @@ struct xfrm_id __u8 proto; }; +struct xfrm_sec_ctx { + __u8 ctx_doi; + __u8 ctx_alg; + __u16 ctx_len; + __u32 ctx_sid; + char ctx_str[0]; +}; + +/* Security Context Domains of Interpretation */ +#define XFRM_SC_DOI_RESERVED 0 +#define XFRM_SC_DOI_LSM 1 + +/* Security Context Algorithms */ +#define XFRM_SC_ALG_RESERVED 0 +#define XFRM_SC_ALG_SELINUX 1 + /* Selector, used as selector both on policy rules (SPD) and SAs. */ struct xfrm_selector @@ -146,6 +162,18 @@ enum { #define XFRM_NR_MSGTYPES (XFRM_MSG_MAX + 1 - XFRM_MSG_BASE) +/* + * Generic LSM security context for comunicating to user space + * NOTE: Same format as sadb_x_sec_ctx + */ +struct xfrm_user_sec_ctx { + __u16 len; + __u16 exttype; + __u8 ctx_alg; /* LSMs: e.g., selinux == 1 */ + __u8 ctx_doi; + __u16 ctx_len; +}; + struct xfrm_user_tmpl { struct xfrm_id id; __u16 family; @@ -176,6 +204,7 @@ enum xfrm_attr_type_t { XFRMA_TMPL, /* 1 or more struct xfrm_user_tmpl */ XFRMA_SA, XFRMA_POLICY, + XFRMA_SEC_CTX, /* struct xfrm_sec_ctx */ __XFRMA_MAX #define XFRMA_MAX (__XFRMA_MAX - 1) diff --git a/include/net/flow.h b/include/net/flow.h index 9a5c94b1a0ec..ec7eb86eb203 100644 --- a/include/net/flow.h +++ b/include/net/flow.h @@ -84,11 +84,12 @@ struct flowi { #define FLOW_DIR_OUT 1 #define FLOW_DIR_FWD 2 -typedef void (*flow_resolve_t)(struct flowi *key, u16 family, u8 dir, +struct sock; +typedef void (*flow_resolve_t)(struct flowi *key, u32 sk_sid, u16 family, u8 dir, void **objp, atomic_t **obj_refp); -extern void *flow_cache_lookup(struct flowi *key, u16 family, u8 dir, - flow_resolve_t resolver); +extern void *flow_cache_lookup(struct flowi *key, u32 sk_sid, u16 family, u8 dir, + flow_resolve_t resolver); extern void flow_cache_flush(void); extern atomic_t flow_cache_genid; diff --git a/include/net/xfrm.h b/include/net/xfrm.h index 1cdb87912137..487abca3ca6f 100644 --- a/include/net/xfrm.h +++ b/include/net/xfrm.h @@ -144,6 +144,9 @@ struct xfrm_state * transformer. */ struct xfrm_type *type; + /* Security context */ + struct xfrm_sec_ctx *security; + /* Private data of this transformer, format is opaque, * interpreted by xfrm_type methods. */ void *data; @@ -298,6 +301,7 @@ struct xfrm_policy __u8 flags; __u8 dead; __u8 xfrm_nr; + struct xfrm_sec_ctx *security; struct xfrm_tmpl xfrm_vec[XFRM_MAX_DEPTH]; }; @@ -510,6 +514,25 @@ xfrm_selector_match(struct xfrm_selector *sel, struct flowi *fl, return 0; } +#ifdef CONFIG_SECURITY_NETWORK_XFRM +/* If neither has a context --> match + * Otherwise, both must have a context and the sids, doi, alg must match + */ +static inline int xfrm_sec_ctx_match(struct xfrm_sec_ctx *s1, struct xfrm_sec_ctx *s2) +{ + return ((!s1 && !s2) || + (s1 && s2 && + (s1->ctx_sid == s2->ctx_sid) && + (s1->ctx_doi == s2->ctx_doi) && + (s1->ctx_alg == s2->ctx_alg))); +} +#else +static inline int xfrm_sec_ctx_match(struct xfrm_sec_ctx *s1, struct xfrm_sec_ctx *s2) +{ + return 1; +} +#endif + /* A struct encoding bundle of transformations to apply to some set of flow. * * dst->child points to the next element of bundle. @@ -878,8 +901,8 @@ static inline int xfrm_dst_lookup(struct xfrm_dst **dst, struct flowi *fl, unsig struct xfrm_policy *xfrm_policy_alloc(gfp_t gfp); extern int xfrm_policy_walk(int (*func)(struct xfrm_policy *, int, int, void*), void *); int xfrm_policy_insert(int dir, struct xfrm_policy *policy, int excl); -struct xfrm_policy *xfrm_policy_bysel(int dir, struct xfrm_selector *sel, - int delete); +struct xfrm_policy *xfrm_policy_bysel_ctx(int dir, struct xfrm_selector *sel, + struct xfrm_sec_ctx *ctx, int delete); struct xfrm_policy *xfrm_policy_byid(int dir, u32 id, int delete); void xfrm_policy_flush(void); u32 xfrm_get_acqseq(void); diff --git a/net/core/flow.c b/net/core/flow.c index 7e95b39de9fd..c4f25385029f 100644 --- a/net/core/flow.c +++ b/net/core/flow.c @@ -23,6 +23,7 @@ #include #include #include +#include struct flow_cache_entry { struct flow_cache_entry *next; @@ -30,6 +31,7 @@ struct flow_cache_entry { u8 dir; struct flowi key; u32 genid; + u32 sk_sid; void *object; atomic_t *object_ref; }; @@ -162,7 +164,7 @@ static int flow_key_compare(struct flowi *key1, struct flowi *key2) return 0; } -void *flow_cache_lookup(struct flowi *key, u16 family, u8 dir, +void *flow_cache_lookup(struct flowi *key, u32 sk_sid, u16 family, u8 dir, flow_resolve_t resolver) { struct flow_cache_entry *fle, **head; @@ -186,6 +188,7 @@ void *flow_cache_lookup(struct flowi *key, u16 family, u8 dir, for (fle = *head; fle; fle = fle->next) { if (fle->family == family && fle->dir == dir && + fle->sk_sid == sk_sid && flow_key_compare(key, &fle->key) == 0) { if (fle->genid == atomic_read(&flow_cache_genid)) { void *ret = fle->object; @@ -210,6 +213,7 @@ void *flow_cache_lookup(struct flowi *key, u16 family, u8 dir, *head = fle; fle->family = family; fle->dir = dir; + fle->sk_sid = sk_sid; memcpy(&fle->key, key, sizeof(*key)); fle->object = NULL; flow_count(cpu)++; @@ -221,7 +225,7 @@ nocache: void *obj; atomic_t *obj_ref; - resolver(key, family, dir, &obj, &obj_ref); + resolver(key, sk_sid, family, dir, &obj, &obj_ref); if (fle) { fle->genid = atomic_read(&flow_cache_genid); diff --git a/net/key/af_key.c b/net/key/af_key.c index 39031684b65c..d32f7791f1e4 100644 --- a/net/key/af_key.c +++ b/net/key/af_key.c @@ -336,6 +336,7 @@ static u8 sadb_ext_min_len[] = { [SADB_X_EXT_NAT_T_SPORT] = (u8) sizeof(struct sadb_x_nat_t_port), [SADB_X_EXT_NAT_T_DPORT] = (u8) sizeof(struct sadb_x_nat_t_port), [SADB_X_EXT_NAT_T_OA] = (u8) sizeof(struct sadb_address), + [SADB_X_EXT_SEC_CTX] = (u8) sizeof(struct sadb_x_sec_ctx), }; /* Verify sadb_address_{len,prefixlen} against sa_family. */ @@ -383,6 +384,55 @@ static int verify_address_len(void *p) return 0; } +static inline int pfkey_sec_ctx_len(struct sadb_x_sec_ctx *sec_ctx) +{ + int len = 0; + + len += sizeof(struct sadb_x_sec_ctx); + len += sec_ctx->sadb_x_ctx_len; + len += sizeof(uint64_t) - 1; + len /= sizeof(uint64_t); + + return len; +} + +static inline int verify_sec_ctx_len(void *p) +{ + struct sadb_x_sec_ctx *sec_ctx = (struct sadb_x_sec_ctx *)p; + int len; + + if (sec_ctx->sadb_x_ctx_len > PAGE_SIZE) + return -EINVAL; + + len = pfkey_sec_ctx_len(sec_ctx); + + if (sec_ctx->sadb_x_sec_len != len) + return -EINVAL; + + return 0; +} + +static inline struct xfrm_user_sec_ctx *pfkey_sadb2xfrm_user_sec_ctx(struct sadb_x_sec_ctx *sec_ctx) +{ + struct xfrm_user_sec_ctx *uctx = NULL; + int ctx_size = sec_ctx->sadb_x_ctx_len; + + uctx = kmalloc((sizeof(*uctx)+ctx_size), GFP_KERNEL); + + if (!uctx) + return NULL; + + uctx->len = pfkey_sec_ctx_len(sec_ctx); + uctx->exttype = sec_ctx->sadb_x_sec_exttype; + uctx->ctx_doi = sec_ctx->sadb_x_ctx_doi; + uctx->ctx_alg = sec_ctx->sadb_x_ctx_alg; + uctx->ctx_len = sec_ctx->sadb_x_ctx_len; + memcpy(uctx + 1, sec_ctx + 1, + uctx->ctx_len); + + return uctx; +} + static int present_and_same_family(struct sadb_address *src, struct sadb_address *dst) { @@ -438,6 +488,10 @@ static int parse_exthdrs(struct sk_buff *skb, struct sadb_msg *hdr, void **ext_h if (verify_address_len(p)) return -EINVAL; } + if (ext_type == SADB_X_EXT_SEC_CTX) { + if (verify_sec_ctx_len(p)) + return -EINVAL; + } ext_hdrs[ext_type-1] = p; } p += ext_len; @@ -586,6 +640,9 @@ static struct sk_buff * pfkey_xfrm_state2msg(struct xfrm_state *x, int add_keys, struct sadb_key *key; struct sadb_x_sa2 *sa2; struct sockaddr_in *sin; + struct sadb_x_sec_ctx *sec_ctx; + struct xfrm_sec_ctx *xfrm_ctx; + int ctx_size = 0; #if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE) struct sockaddr_in6 *sin6; #endif @@ -609,6 +666,12 @@ static struct sk_buff * pfkey_xfrm_state2msg(struct xfrm_state *x, int add_keys, sizeof(struct sadb_address)*2 + sockaddr_size*2 + sizeof(struct sadb_x_sa2); + + if ((xfrm_ctx = x->security)) { + ctx_size = PFKEY_ALIGN8(xfrm_ctx->ctx_len); + size += sizeof(struct sadb_x_sec_ctx) + ctx_size; + } + /* identity & sensitivity */ if ((x->props.family == AF_INET && @@ -899,6 +962,20 @@ static struct sk_buff * pfkey_xfrm_state2msg(struct xfrm_state *x, int add_keys, n_port->sadb_x_nat_t_port_reserved = 0; } + /* security context */ + if (xfrm_ctx) { + sec_ctx = (struct sadb_x_sec_ctx *) skb_put(skb, + sizeof(struct sadb_x_sec_ctx) + ctx_size); + sec_ctx->sadb_x_sec_len = + (sizeof(struct sadb_x_sec_ctx) + ctx_size) / sizeof(uint64_t); + sec_ctx->sadb_x_sec_exttype = SADB_X_EXT_SEC_CTX; + sec_ctx->sadb_x_ctx_doi = xfrm_ctx->ctx_doi; + sec_ctx->sadb_x_ctx_alg = xfrm_ctx->ctx_alg; + sec_ctx->sadb_x_ctx_len = xfrm_ctx->ctx_len; + memcpy(sec_ctx + 1, xfrm_ctx->ctx_str, + xfrm_ctx->ctx_len); + } + return skb; } @@ -909,6 +986,7 @@ static struct xfrm_state * pfkey_msg2xfrm_state(struct sadb_msg *hdr, struct sadb_lifetime *lifetime; struct sadb_sa *sa; struct sadb_key *key; + struct sadb_x_sec_ctx *sec_ctx; uint16_t proto; int err; @@ -993,6 +1071,21 @@ static struct xfrm_state * pfkey_msg2xfrm_state(struct sadb_msg *hdr, x->lft.soft_add_expires_seconds = lifetime->sadb_lifetime_addtime; x->lft.soft_use_expires_seconds = lifetime->sadb_lifetime_usetime; } + + sec_ctx = (struct sadb_x_sec_ctx *) ext_hdrs[SADB_X_EXT_SEC_CTX-1]; + if (sec_ctx != NULL) { + struct xfrm_user_sec_ctx *uctx = pfkey_sadb2xfrm_user_sec_ctx(sec_ctx); + + if (!uctx) + goto out; + + err = security_xfrm_state_alloc(x, uctx); + kfree(uctx); + + if (err) + goto out; + } + key = (struct sadb_key*) ext_hdrs[SADB_EXT_KEY_AUTH-1]; if (sa->sadb_sa_auth) { int keysize = 0; @@ -1720,6 +1813,18 @@ parse_ipsecrequests(struct xfrm_policy *xp, struct sadb_x_policy *pol) return 0; } +static inline int pfkey_xfrm_policy2sec_ctx_size(struct xfrm_policy *xp) +{ + struct xfrm_sec_ctx *xfrm_ctx = xp->security; + + if (xfrm_ctx) { + int len = sizeof(struct sadb_x_sec_ctx); + len += xfrm_ctx->ctx_len; + return PFKEY_ALIGN8(len); + } + return 0; +} + static int pfkey_xfrm_policy2msg_size(struct xfrm_policy *xp) { int sockaddr_size = pfkey_sockaddr_size(xp->family); @@ -1733,7 +1838,8 @@ static int pfkey_xfrm_policy2msg_size(struct xfrm_policy *xp) (sockaddr_size * 2) + sizeof(struct sadb_x_policy) + (xp->xfrm_nr * (sizeof(struct sadb_x_ipsecrequest) + - (socklen * 2))); + (socklen * 2))) + + pfkey_xfrm_policy2sec_ctx_size(xp); } static struct sk_buff * pfkey_xfrm_policy2msg_prep(struct xfrm_policy *xp) @@ -1757,6 +1863,8 @@ static void pfkey_xfrm_policy2msg(struct sk_buff *skb, struct xfrm_policy *xp, i struct sadb_lifetime *lifetime; struct sadb_x_policy *pol; struct sockaddr_in *sin; + struct sadb_x_sec_ctx *sec_ctx; + struct xfrm_sec_ctx *xfrm_ctx; #if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE) struct sockaddr_in6 *sin6; #endif @@ -1941,6 +2049,21 @@ static void pfkey_xfrm_policy2msg(struct sk_buff *skb, struct xfrm_policy *xp, i } } } + + /* security context */ + if ((xfrm_ctx = xp->security)) { + int ctx_size = pfkey_xfrm_policy2sec_ctx_size(xp); + + sec_ctx = (struct sadb_x_sec_ctx *) skb_put(skb, ctx_size); + sec_ctx->sadb_x_sec_len = ctx_size / sizeof(uint64_t); + sec_ctx->sadb_x_sec_exttype = SADB_X_EXT_SEC_CTX; + sec_ctx->sadb_x_ctx_doi = xfrm_ctx->ctx_doi; + sec_ctx->sadb_x_ctx_alg = xfrm_ctx->ctx_alg; + sec_ctx->sadb_x_ctx_len = xfrm_ctx->ctx_len; + memcpy(sec_ctx + 1, xfrm_ctx->ctx_str, + xfrm_ctx->ctx_len); + } + hdr->sadb_msg_len = size / sizeof(uint64_t); hdr->sadb_msg_reserved = atomic_read(&xp->refcnt); } @@ -1976,12 +2099,13 @@ out: static int pfkey_spdadd(struct sock *sk, struct sk_buff *skb, struct sadb_msg *hdr, void **ext_hdrs) { - int err; + int err = 0; struct sadb_lifetime *lifetime; struct sadb_address *sa; struct sadb_x_policy *pol; struct xfrm_policy *xp; struct km_event c; + struct sadb_x_sec_ctx *sec_ctx; if (!present_and_same_family(ext_hdrs[SADB_EXT_ADDRESS_SRC-1], ext_hdrs[SADB_EXT_ADDRESS_DST-1]) || @@ -2028,6 +2152,22 @@ static int pfkey_spdadd(struct sock *sk, struct sk_buff *skb, struct sadb_msg *h if (xp->selector.dport) xp->selector.dport_mask = ~0; + sec_ctx = (struct sadb_x_sec_ctx *) ext_hdrs[SADB_X_EXT_SEC_CTX-1]; + if (sec_ctx != NULL) { + struct xfrm_user_sec_ctx *uctx = pfkey_sadb2xfrm_user_sec_ctx(sec_ctx); + + if (!uctx) { + err = -ENOBUFS; + goto out; + } + + err = security_xfrm_policy_alloc(xp, uctx); + kfree(uctx); + + if (err) + goto out; + } + xp->lft.soft_byte_limit = XFRM_INF; xp->lft.hard_byte_limit = XFRM_INF; xp->lft.soft_packet_limit = XFRM_INF; @@ -2051,10 +2191,9 @@ static int pfkey_spdadd(struct sock *sk, struct sk_buff *skb, struct sadb_msg *h err = xfrm_policy_insert(pol->sadb_x_policy_dir-1, xp, hdr->sadb_msg_type != SADB_X_SPDUPDATE); - if (err) { - kfree(xp); - return err; - } + + if (err) + goto out; if (hdr->sadb_msg_type == SADB_X_SPDUPDATE) c.event = XFRM_MSG_UPDPOLICY; @@ -2069,6 +2208,7 @@ static int pfkey_spdadd(struct sock *sk, struct sk_buff *skb, struct sadb_msg *h return 0; out: + security_xfrm_policy_free(xp); kfree(xp); return err; } @@ -2078,9 +2218,10 @@ static int pfkey_spddelete(struct sock *sk, struct sk_buff *skb, struct sadb_msg int err; struct sadb_address *sa; struct sadb_x_policy *pol; - struct xfrm_policy *xp; + struct xfrm_policy *xp, tmp; struct xfrm_selector sel; struct km_event c; + struct sadb_x_sec_ctx *sec_ctx; if (!present_and_same_family(ext_hdrs[SADB_EXT_ADDRESS_SRC-1], ext_hdrs[SADB_EXT_ADDRESS_DST-1]) || @@ -2109,7 +2250,24 @@ static int pfkey_spddelete(struct sock *sk, struct sk_buff *skb, struct sadb_msg if (sel.dport) sel.dport_mask = ~0; - xp = xfrm_policy_bysel(pol->sadb_x_policy_dir-1, &sel, 1); + sec_ctx = (struct sadb_x_sec_ctx *) ext_hdrs[SADB_X_EXT_SEC_CTX-1]; + memset(&tmp, 0, sizeof(struct xfrm_policy)); + + if (sec_ctx != NULL) { + struct xfrm_user_sec_ctx *uctx = pfkey_sadb2xfrm_user_sec_ctx(sec_ctx); + + if (!uctx) + return -ENOMEM; + + err = security_xfrm_policy_alloc(&tmp, uctx); + kfree(uctx); + + if (err) + return err; + } + + xp = xfrm_policy_bysel_ctx(pol->sadb_x_policy_dir-1, &sel, tmp.security, 1); + security_xfrm_policy_free(&tmp); if (xp == NULL) return -ENOENT; @@ -2660,6 +2818,7 @@ static struct xfrm_policy *pfkey_compile_policy(u16 family, int opt, { struct xfrm_policy *xp; struct sadb_x_policy *pol = (struct sadb_x_policy*)data; + struct sadb_x_sec_ctx *sec_ctx; switch (family) { case AF_INET: @@ -2709,10 +2868,32 @@ static struct xfrm_policy *pfkey_compile_policy(u16 family, int opt, (*dir = parse_ipsecrequests(xp, pol)) < 0) goto out; + /* security context too */ + if (len >= (pol->sadb_x_policy_len*8 + + sizeof(struct sadb_x_sec_ctx))) { + char *p = (char *)pol; + struct xfrm_user_sec_ctx *uctx; + + p += pol->sadb_x_policy_len*8; + sec_ctx = (struct sadb_x_sec_ctx *)p; + if (len < pol->sadb_x_policy_len*8 + + sec_ctx->sadb_x_sec_len) + goto out; + if ((*dir = verify_sec_ctx_len(p))) + goto out; + uctx = pfkey_sadb2xfrm_user_sec_ctx(sec_ctx); + *dir = security_xfrm_policy_alloc(xp, uctx); + kfree(uctx); + + if (*dir) + goto out; + } + *dir = pol->sadb_x_policy_dir-1; return xp; out: + security_xfrm_policy_free(xp); kfree(xp); return NULL; } diff --git a/net/xfrm/xfrm_policy.c b/net/xfrm/xfrm_policy.c index d19e274b9c4a..64a447375fdb 100644 --- a/net/xfrm/xfrm_policy.c +++ b/net/xfrm/xfrm_policy.c @@ -10,7 +10,7 @@ * YOSHIFUJI Hideaki * Split up af-specific portion * Derek Atkins Add the post_input processor - * + * */ #include @@ -256,6 +256,7 @@ void __xfrm_policy_destroy(struct xfrm_policy *policy) if (del_timer(&policy->timer)) BUG(); + security_xfrm_policy_free(policy); kfree(policy); } EXPORT_SYMBOL(__xfrm_policy_destroy); @@ -350,7 +351,8 @@ int xfrm_policy_insert(int dir, struct xfrm_policy *policy, int excl) write_lock_bh(&xfrm_policy_lock); for (p = &xfrm_policy_list[dir]; (pol=*p)!=NULL;) { - if (!delpol && memcmp(&policy->selector, &pol->selector, sizeof(pol->selector)) == 0) { + if (!delpol && memcmp(&policy->selector, &pol->selector, sizeof(pol->selector)) == 0 && + xfrm_sec_ctx_match(pol->security, policy->security)) { if (excl) { write_unlock_bh(&xfrm_policy_lock); return -EEXIST; @@ -416,14 +418,15 @@ int xfrm_policy_insert(int dir, struct xfrm_policy *policy, int excl) } EXPORT_SYMBOL(xfrm_policy_insert); -struct xfrm_policy *xfrm_policy_bysel(int dir, struct xfrm_selector *sel, - int delete) +struct xfrm_policy *xfrm_policy_bysel_ctx(int dir, struct xfrm_selector *sel, + struct xfrm_sec_ctx *ctx, int delete) { struct xfrm_policy *pol, **p; write_lock_bh(&xfrm_policy_lock); for (p = &xfrm_policy_list[dir]; (pol=*p)!=NULL; p = &pol->next) { - if (memcmp(sel, &pol->selector, sizeof(*sel)) == 0) { + if ((memcmp(sel, &pol->selector, sizeof(*sel)) == 0) && + (xfrm_sec_ctx_match(ctx, pol->security))) { xfrm_pol_hold(pol); if (delete) *p = pol->next; @@ -438,7 +441,7 @@ struct xfrm_policy *xfrm_policy_bysel(int dir, struct xfrm_selector *sel, } return pol; } -EXPORT_SYMBOL(xfrm_policy_bysel); +EXPORT_SYMBOL(xfrm_policy_bysel_ctx); struct xfrm_policy *xfrm_policy_byid(int dir, u32 id, int delete) { @@ -519,7 +522,7 @@ EXPORT_SYMBOL(xfrm_policy_walk); /* Find policy to apply to this flow. */ -static void xfrm_policy_lookup(struct flowi *fl, u16 family, u8 dir, +static void xfrm_policy_lookup(struct flowi *fl, u32 sk_sid, u16 family, u8 dir, void **objp, atomic_t **obj_refp) { struct xfrm_policy *pol; @@ -533,9 +536,12 @@ static void xfrm_policy_lookup(struct flowi *fl, u16 family, u8 dir, continue; match = xfrm_selector_match(sel, fl, family); + if (match) { - xfrm_pol_hold(pol); - break; + if (!security_xfrm_policy_lookup(pol, sk_sid, dir)) { + xfrm_pol_hold(pol); + break; + } } } read_unlock_bh(&xfrm_policy_lock); @@ -543,15 +549,37 @@ static void xfrm_policy_lookup(struct flowi *fl, u16 family, u8 dir, *obj_refp = &pol->refcnt; } -static struct xfrm_policy *xfrm_sk_policy_lookup(struct sock *sk, int dir, struct flowi *fl) +static inline int policy_to_flow_dir(int dir) +{ + if (XFRM_POLICY_IN == FLOW_DIR_IN && + XFRM_POLICY_OUT == FLOW_DIR_OUT && + XFRM_POLICY_FWD == FLOW_DIR_FWD) + return dir; + switch (dir) { + default: + case XFRM_POLICY_IN: + return FLOW_DIR_IN; + case XFRM_POLICY_OUT: + return FLOW_DIR_OUT; + case XFRM_POLICY_FWD: + return FLOW_DIR_FWD; + }; +} + +static struct xfrm_policy *xfrm_sk_policy_lookup(struct sock *sk, int dir, struct flowi *fl, u32 sk_sid) { struct xfrm_policy *pol; read_lock_bh(&xfrm_policy_lock); if ((pol = sk->sk_policy[dir]) != NULL) { - int match = xfrm_selector_match(&pol->selector, fl, + int match = xfrm_selector_match(&pol->selector, fl, sk->sk_family); + int err = 0; + if (match) + err = security_xfrm_policy_lookup(pol, sk_sid, policy_to_flow_dir(dir)); + + if (match && !err) xfrm_pol_hold(pol); else pol = NULL; @@ -624,6 +652,10 @@ static struct xfrm_policy *clone_policy(struct xfrm_policy *old, int dir) if (newp) { newp->selector = old->selector; + if (security_xfrm_policy_clone(old, newp)) { + kfree(newp); + return NULL; /* ENOMEM */ + } newp->lft = old->lft; newp->curlft = old->curlft; newp->action = old->action; @@ -735,22 +767,6 @@ xfrm_bundle_create(struct xfrm_policy *policy, struct xfrm_state **xfrm, int nx, return err; } -static inline int policy_to_flow_dir(int dir) -{ - if (XFRM_POLICY_IN == FLOW_DIR_IN && - XFRM_POLICY_OUT == FLOW_DIR_OUT && - XFRM_POLICY_FWD == FLOW_DIR_FWD) - return dir; - switch (dir) { - default: - case XFRM_POLICY_IN: - return FLOW_DIR_IN; - case XFRM_POLICY_OUT: - return FLOW_DIR_OUT; - case XFRM_POLICY_FWD: - return FLOW_DIR_FWD; - }; -} static int stale_bundle(struct dst_entry *dst); @@ -769,19 +785,20 @@ int xfrm_lookup(struct dst_entry **dst_p, struct flowi *fl, int err; u32 genid; u16 family = dst_orig->ops->family; + u8 dir = policy_to_flow_dir(XFRM_POLICY_OUT); + u32 sk_sid = security_sk_sid(sk, fl, dir); restart: genid = atomic_read(&flow_cache_genid); policy = NULL; if (sk && sk->sk_policy[1]) - policy = xfrm_sk_policy_lookup(sk, XFRM_POLICY_OUT, fl); + policy = xfrm_sk_policy_lookup(sk, XFRM_POLICY_OUT, fl, sk_sid); if (!policy) { /* To accelerate a bit... */ if ((dst_orig->flags & DST_NOXFRM) || !xfrm_policy_list[XFRM_POLICY_OUT]) return 0; - policy = flow_cache_lookup(fl, family, - policy_to_flow_dir(XFRM_POLICY_OUT), + policy = flow_cache_lookup(fl, sk_sid, family, dir, xfrm_policy_lookup); } @@ -962,16 +979,20 @@ int __xfrm_policy_check(struct sock *sk, int dir, struct sk_buff *skb, { struct xfrm_policy *pol; struct flowi fl; + u8 fl_dir = policy_to_flow_dir(dir); + u32 sk_sid; if (_decode_session(skb, &fl, family) < 0) return 0; + sk_sid = security_sk_sid(sk, &fl, fl_dir); + /* First, check used SA against their selectors. */ if (skb->sp) { int i; for (i=skb->sp->len-1; i>=0; i--) { - struct sec_decap_state *xvec = &(skb->sp->x[i]); + struct sec_decap_state *xvec = &(skb->sp->x[i]); if (!xfrm_selector_match(&xvec->xvec->sel, &fl, family)) return 0; @@ -986,11 +1007,10 @@ int __xfrm_policy_check(struct sock *sk, int dir, struct sk_buff *skb, pol = NULL; if (sk && sk->sk_policy[dir]) - pol = xfrm_sk_policy_lookup(sk, dir, &fl); + pol = xfrm_sk_policy_lookup(sk, dir, &fl, sk_sid); if (!pol) - pol = flow_cache_lookup(&fl, family, - policy_to_flow_dir(dir), + pol = flow_cache_lookup(&fl, sk_sid, family, fl_dir, xfrm_policy_lookup); if (!pol) diff --git a/net/xfrm/xfrm_state.c b/net/xfrm/xfrm_state.c index 479effc97666..e12d0be5f976 100644 --- a/net/xfrm/xfrm_state.c +++ b/net/xfrm/xfrm_state.c @@ -10,7 +10,7 @@ * Split up af-specific functions * Derek Atkins * Add UDP Encapsulation - * + * */ #include @@ -70,6 +70,7 @@ static void xfrm_state_gc_destroy(struct xfrm_state *x) x->type->destructor(x); xfrm_put_type(x->type); } + security_xfrm_state_free(x); kfree(x); } @@ -343,7 +344,8 @@ xfrm_state_find(xfrm_address_t *daddr, xfrm_address_t *saddr, selector. */ if (x->km.state == XFRM_STATE_VALID) { - if (!xfrm_selector_match(&x->sel, fl, family)) + if (!xfrm_selector_match(&x->sel, fl, family) || + !xfrm_sec_ctx_match(pol->security, x->security)) continue; if (!best || best->km.dying > x->km.dying || @@ -354,7 +356,8 @@ xfrm_state_find(xfrm_address_t *daddr, xfrm_address_t *saddr, acquire_in_progress = 1; } else if (x->km.state == XFRM_STATE_ERROR || x->km.state == XFRM_STATE_EXPIRED) { - if (xfrm_selector_match(&x->sel, fl, family)) + if (xfrm_selector_match(&x->sel, fl, family) && + xfrm_sec_ctx_match(pol->security, x->security)) error = -ESRCH; } } diff --git a/net/xfrm/xfrm_user.c b/net/xfrm/xfrm_user.c index 0cdd9a07e043..92e2b804c606 100644 --- a/net/xfrm/xfrm_user.c +++ b/net/xfrm/xfrm_user.c @@ -7,7 +7,7 @@ * Kazunori MIYAZAWA @USAGI * Kunihiro Ishiguro * IPv6 support - * + * */ #include @@ -88,6 +88,34 @@ static int verify_encap_tmpl(struct rtattr **xfrma) return 0; } + +static inline int verify_sec_ctx_len(struct rtattr **xfrma) +{ + struct rtattr *rt = xfrma[XFRMA_SEC_CTX - 1]; + struct xfrm_user_sec_ctx *uctx; + int len = 0; + + if (!rt) + return 0; + + if (rt->rta_len < sizeof(*uctx)) + return -EINVAL; + + uctx = RTA_DATA(rt); + + if (uctx->ctx_len > PAGE_SIZE) + return -EINVAL; + + len += sizeof(struct xfrm_user_sec_ctx); + len += uctx->ctx_len; + + if (uctx->len != len) + return -EINVAL; + + return 0; +} + + static int verify_newsa_info(struct xfrm_usersa_info *p, struct rtattr **xfrma) { @@ -145,6 +173,8 @@ static int verify_newsa_info(struct xfrm_usersa_info *p, goto out; if ((err = verify_encap_tmpl(xfrma))) goto out; + if ((err = verify_sec_ctx_len(xfrma))) + goto out; err = -EINVAL; switch (p->mode) { @@ -209,6 +239,30 @@ static int attach_encap_tmpl(struct xfrm_encap_tmpl **encapp, struct rtattr *u_a return 0; } + +static inline int xfrm_user_sec_ctx_size(struct xfrm_policy *xp) +{ + struct xfrm_sec_ctx *xfrm_ctx = xp->security; + int len = 0; + + if (xfrm_ctx) { + len += sizeof(struct xfrm_user_sec_ctx); + len += xfrm_ctx->ctx_len; + } + return len; +} + +static int attach_sec_ctx(struct xfrm_state *x, struct rtattr *u_arg) +{ + struct xfrm_user_sec_ctx *uctx; + + if (!u_arg) + return 0; + + uctx = RTA_DATA(u_arg); + return security_xfrm_state_alloc(x, uctx); +} + static void copy_from_user_state(struct xfrm_state *x, struct xfrm_usersa_info *p) { memcpy(&x->id, &p->id, sizeof(x->id)); @@ -253,6 +307,9 @@ static struct xfrm_state *xfrm_state_construct(struct xfrm_usersa_info *p, if (err) goto error; + if ((err = attach_sec_ctx(x, xfrma[XFRMA_SEC_CTX-1]))) + goto error; + x->km.seq = p->seq; return x; @@ -272,11 +329,11 @@ static int xfrm_add_sa(struct sk_buff *skb, struct nlmsghdr *nlh, void **xfrma) int err; struct km_event c; - err = verify_newsa_info(p, (struct rtattr **) xfrma); + err = verify_newsa_info(p, (struct rtattr **)xfrma); if (err) return err; - x = xfrm_state_construct(p, (struct rtattr **) xfrma, &err); + x = xfrm_state_construct(p, (struct rtattr **)xfrma, &err); if (!x) return err; @@ -390,6 +447,19 @@ static int dump_one_state(struct xfrm_state *x, int count, void *ptr) if (x->encap) RTA_PUT(skb, XFRMA_ENCAP, sizeof(*x->encap), x->encap); + if (x->security) { + int ctx_size = sizeof(struct xfrm_sec_ctx) + + x->security->ctx_len; + struct rtattr *rt = __RTA_PUT(skb, XFRMA_SEC_CTX, ctx_size); + struct xfrm_user_sec_ctx *uctx = RTA_DATA(rt); + + uctx->exttype = XFRMA_SEC_CTX; + uctx->len = ctx_size; + uctx->ctx_doi = x->security->ctx_doi; + uctx->ctx_alg = x->security->ctx_alg; + uctx->ctx_len = x->security->ctx_len; + memcpy(uctx + 1, x->security->ctx_str, x->security->ctx_len); + } nlh->nlmsg_len = skb->tail - b; out: sp->this_idx++; @@ -603,6 +673,18 @@ static int verify_newpolicy_info(struct xfrm_userpolicy_info *p) return verify_policy_dir(p->dir); } +static int copy_from_user_sec_ctx(struct xfrm_policy *pol, struct rtattr **xfrma) +{ + struct rtattr *rt = xfrma[XFRMA_SEC_CTX-1]; + struct xfrm_user_sec_ctx *uctx; + + if (!rt) + return 0; + + uctx = RTA_DATA(rt); + return security_xfrm_policy_alloc(pol, uctx); +} + static void copy_templates(struct xfrm_policy *xp, struct xfrm_user_tmpl *ut, int nr) { @@ -681,7 +763,10 @@ static struct xfrm_policy *xfrm_policy_construct(struct xfrm_userpolicy_info *p, } copy_from_user_policy(xp, p); - err = copy_from_user_tmpl(xp, xfrma); + + if (!(err = copy_from_user_tmpl(xp, xfrma))) + err = copy_from_user_sec_ctx(xp, xfrma); + if (err) { *errp = err; kfree(xp); @@ -700,10 +785,13 @@ static int xfrm_add_policy(struct sk_buff *skb, struct nlmsghdr *nlh, void **xfr int excl; err = verify_newpolicy_info(p); + if (err) + return err; + err = verify_sec_ctx_len((struct rtattr **)xfrma); if (err) return err; - xp = xfrm_policy_construct(p, (struct rtattr **) xfrma, &err); + xp = xfrm_policy_construct(p, (struct rtattr **)xfrma, &err); if (!xp) return err; @@ -761,6 +849,27 @@ rtattr_failure: return -1; } +static int copy_to_user_sec_ctx(struct xfrm_policy *xp, struct sk_buff *skb) +{ + if (xp->security) { + int ctx_size = sizeof(struct xfrm_sec_ctx) + + xp->security->ctx_len; + struct rtattr *rt = __RTA_PUT(skb, XFRMA_SEC_CTX, ctx_size); + struct xfrm_user_sec_ctx *uctx = RTA_DATA(rt); + + uctx->exttype = XFRMA_SEC_CTX; + uctx->len = ctx_size; + uctx->ctx_doi = xp->security->ctx_doi; + uctx->ctx_alg = xp->security->ctx_alg; + uctx->ctx_len = xp->security->ctx_len; + memcpy(uctx + 1, xp->security->ctx_str, xp->security->ctx_len); + } + return 0; + + rtattr_failure: + return -1; +} + static int dump_one_policy(struct xfrm_policy *xp, int dir, int count, void *ptr) { struct xfrm_dump_info *sp = ptr; @@ -782,6 +891,8 @@ static int dump_one_policy(struct xfrm_policy *xp, int dir, int count, void *ptr copy_to_user_policy(xp, p, dir); if (copy_to_user_tmpl(xp, skb) < 0) goto nlmsg_failure; + if (copy_to_user_sec_ctx(xp, skb)) + goto nlmsg_failure; nlh->nlmsg_len = skb->tail - b; out: @@ -852,8 +963,25 @@ static int xfrm_get_policy(struct sk_buff *skb, struct nlmsghdr *nlh, void **xfr if (p->index) xp = xfrm_policy_byid(p->dir, p->index, delete); - else - xp = xfrm_policy_bysel(p->dir, &p->sel, delete); + else { + struct rtattr **rtattrs = (struct rtattr **)xfrma; + struct rtattr *rt = rtattrs[XFRMA_SEC_CTX-1]; + struct xfrm_policy tmp; + + err = verify_sec_ctx_len(rtattrs); + if (err) + return err; + + memset(&tmp, 0, sizeof(struct xfrm_policy)); + if (rt) { + struct xfrm_user_sec_ctx *uctx = RTA_DATA(rt); + + if ((err = security_xfrm_policy_alloc(&tmp, uctx))) + return err; + } + xp = xfrm_policy_bysel_ctx(p->dir, &p->sel, tmp.security, delete); + security_xfrm_policy_free(&tmp); + } if (xp == NULL) return -ENOENT; @@ -1224,6 +1352,8 @@ static int build_acquire(struct sk_buff *skb, struct xfrm_state *x, if (copy_to_user_tmpl(xp, skb) < 0) goto nlmsg_failure; + if (copy_to_user_sec_ctx(xp, skb)) + goto nlmsg_failure; nlh->nlmsg_len = skb->tail - b; return skb->len; @@ -1241,6 +1371,7 @@ static int xfrm_send_acquire(struct xfrm_state *x, struct xfrm_tmpl *xt, len = RTA_SPACE(sizeof(struct xfrm_user_tmpl) * xp->xfrm_nr); len += NLMSG_SPACE(sizeof(struct xfrm_user_acquire)); + len += RTA_SPACE(xfrm_user_sec_ctx_size(xp)); skb = alloc_skb(len, GFP_ATOMIC); if (skb == NULL) return -ENOMEM; @@ -1324,6 +1455,8 @@ static int build_polexpire(struct sk_buff *skb, struct xfrm_policy *xp, copy_to_user_policy(xp, &upe->pol, dir); if (copy_to_user_tmpl(xp, skb) < 0) goto nlmsg_failure; + if (copy_to_user_sec_ctx(xp, skb)) + goto nlmsg_failure; upe->hard = !!hard; nlh->nlmsg_len = skb->tail - b; @@ -1341,6 +1474,7 @@ static int xfrm_exp_policy_notify(struct xfrm_policy *xp, int dir, struct km_eve len = RTA_SPACE(sizeof(struct xfrm_user_tmpl) * xp->xfrm_nr); len += NLMSG_SPACE(sizeof(struct xfrm_user_polexpire)); + len += RTA_SPACE(xfrm_user_sec_ctx_size(xp)); skb = alloc_skb(len, GFP_ATOMIC); if (skb == NULL) return -ENOMEM; diff --git a/security/Kconfig b/security/Kconfig index 64d3f1e9ca85..34f593410d57 100644 --- a/security/Kconfig +++ b/security/Kconfig @@ -54,6 +54,19 @@ config SECURITY_NETWORK implement socket and networking access controls. If you are unsure how to answer this question, answer N. +config SECURITY_NETWORK_XFRM + bool "XFRM (IPSec) Networking Security Hooks" + depends on XFRM && SECURITY_NETWORK + help + This enables the XFRM (IPSec) networking security hooks. + If enabled, a security module can use these hooks to + implement per-packet access controls based on labels + derived from IPSec policy. Non-IPSec communications are + designated as unlabelled, and only sockets authorized + to communicate unlabelled data can send without using + IPSec. + If you are unsure how to answer this question, answer N. + config SECURITY_CAPABILITIES tristate "Default Linux Capabilities" depends on SECURITY diff --git a/security/dummy.c b/security/dummy.c index 3ca5f2b828a0..a15c54709fde 100644 --- a/security/dummy.c +++ b/security/dummy.c @@ -776,8 +776,42 @@ static inline int dummy_sk_alloc_security (struct sock *sk, int family, gfp_t pr static inline void dummy_sk_free_security (struct sock *sk) { } + +static unsigned int dummy_sk_getsid(struct sock *sk, struct flowi *fl, u8 dir) +{ + return 0; +} #endif /* CONFIG_SECURITY_NETWORK */ +#ifdef CONFIG_SECURITY_NETWORK_XFRM +static int dummy_xfrm_policy_alloc_security(struct xfrm_policy *xp, struct xfrm_user_sec_ctx *sec_ctx) +{ + return 0; +} + +static inline int dummy_xfrm_policy_clone_security(struct xfrm_policy *old, struct xfrm_policy *new) +{ + return 0; +} + +static void dummy_xfrm_policy_free_security(struct xfrm_policy *xp) +{ +} + +static int dummy_xfrm_state_alloc_security(struct xfrm_state *x, struct xfrm_user_sec_ctx *sec_ctx) +{ + return 0; +} + +static void dummy_xfrm_state_free_security(struct xfrm_state *x) +{ +} + +static int dummy_xfrm_policy_lookup(struct xfrm_policy *xp, u32 sk_sid, u8 dir) +{ + return 0; +} +#endif /* CONFIG_SECURITY_NETWORK_XFRM */ static int dummy_register_security (const char *name, struct security_operations *ops) { return -EINVAL; @@ -970,7 +1004,16 @@ void security_fixup_ops (struct security_operations *ops) set_to_dummy_if_null(ops, socket_getpeersec); set_to_dummy_if_null(ops, sk_alloc_security); set_to_dummy_if_null(ops, sk_free_security); -#endif /* CONFIG_SECURITY_NETWORK */ + set_to_dummy_if_null(ops, sk_getsid); + #endif /* CONFIG_SECURITY_NETWORK */ +#ifdef CONFIG_SECURITY_NETWORK_XFRM + set_to_dummy_if_null(ops, xfrm_policy_alloc_security); + set_to_dummy_if_null(ops, xfrm_policy_clone_security); + set_to_dummy_if_null(ops, xfrm_policy_free_security); + set_to_dummy_if_null(ops, xfrm_state_alloc_security); + set_to_dummy_if_null(ops, xfrm_state_free_security); + set_to_dummy_if_null(ops, xfrm_policy_lookup); +#endif /* CONFIG_SECURITY_NETWORK_XFRM */ #ifdef CONFIG_KEYS set_to_dummy_if_null(ops, key_alloc); set_to_dummy_if_null(ops, key_free); -- cgit v1.2.3-71-gd317 From 89cee8b1cbb9dac40c92ef1968aea2b45f82fd18 Mon Sep 17 00:00:00 2001 From: Herbert Xu Date: Tue, 13 Dec 2005 23:14:27 -0800 Subject: [IPV4]: Safer reassembly Another spin of Herbert Xu's "safer ip reassembly" patch for 2.6.16. (The original patch is here: http://marc.theaimsgroup.com/?l=linux-netdev&m=112281936522415&w=2 and my only contribution is to have tested it.) This patch (optionally) does additional checks before accepting IP fragments, which can greatly reduce the possibility of reassembling fragments which originated from different IP datagrams. Signed-off-by: Herbert Xu Signed-off-by: Arthur Kepner Signed-off-by: David S. Miller --- Documentation/networking/ip-sysctl.txt | 23 ++++++++++++ include/linux/sysctl.h | 1 + include/net/inetpeer.h | 1 + include/net/ip.h | 2 + net/ipv4/inetpeer.c | 1 + net/ipv4/ip_fragment.c | 68 +++++++++++++++++++++++++++++++++- net/ipv4/ip_output.c | 1 + net/ipv4/sysctl_net_ipv4.c | 10 +++++ 8 files changed, 106 insertions(+), 1 deletion(-) (limited to 'include/linux') diff --git a/Documentation/networking/ip-sysctl.txt b/Documentation/networking/ip-sysctl.txt index ebc09a159f62..2b7cf19a06ad 100644 --- a/Documentation/networking/ip-sysctl.txt +++ b/Documentation/networking/ip-sysctl.txt @@ -46,6 +46,29 @@ ipfrag_secret_interval - INTEGER for the hash secret) for IP fragments. Default: 600 +ipfrag_max_dist - INTEGER + ipfrag_max_dist is a non-negative integer value which defines the + maximum "disorder" which is allowed among fragments which share a + common IP source address. Note that reordering of packets is + not unusual, but if a large number of fragments arrive from a source + IP address while a particular fragment queue remains incomplete, it + probably indicates that one or more fragments belonging to that queue + have been lost. When ipfrag_max_dist is positive, an additional check + is done on fragments before they are added to a reassembly queue - if + ipfrag_max_dist (or more) fragments have arrived from a particular IP + address between additions to any IP fragment queue using that source + address, it's presumed that one or more fragments in the queue are + lost. The existing fragment queue will be dropped, and a new one + started. An ipfrag_max_dist value of zero disables this check. + + Using a very small value, e.g. 1 or 2, for ipfrag_max_dist can + result in unnecessarily dropping fragment queues when normal + reordering of packets occurs, which could lead to poor application + performance. Using a very large value, e.g. 50000, increases the + likelihood of incorrectly reassembling IP fragments that originate + from different IP datagrams, which could result in data corruption. + Default: 64 + INET peer storage: inet_peer_threshold - INTEGER diff --git a/include/linux/sysctl.h b/include/linux/sysctl.h index 4be34ef8c2f7..93fa765e47d3 100644 --- a/include/linux/sysctl.h +++ b/include/linux/sysctl.h @@ -390,6 +390,7 @@ enum NET_IPV4_ICMP_ERRORS_USE_INBOUND_IFADDR=109, NET_TCP_CONG_CONTROL=110, NET_TCP_ABC=111, + NET_IPV4_IPFRAG_MAX_DIST=112, }; enum { diff --git a/include/net/inetpeer.h b/include/net/inetpeer.h index 7fda471002b6..0965515f40cf 100644 --- a/include/net/inetpeer.h +++ b/include/net/inetpeer.h @@ -25,6 +25,7 @@ struct inet_peer __u32 v4daddr; /* peer's address */ __u16 avl_height; __u16 ip_id_count; /* IP ID for the next packet */ + atomic_t rid; /* Frag reception counter */ __u32 tcp_ts; unsigned long tcp_ts_stamp; }; diff --git a/include/net/ip.h b/include/net/ip.h index e4563bbee6ea..4d6294ba038e 100644 --- a/include/net/ip.h +++ b/include/net/ip.h @@ -45,6 +45,7 @@ struct inet_skb_parm #define IPSKB_TRANSLATED 2 #define IPSKB_FORWARDED 4 #define IPSKB_XFRM_TUNNEL_SIZE 8 +#define IPSKB_FRAG_COMPLETE 16 }; struct ipcm_cookie @@ -168,6 +169,7 @@ extern int sysctl_ipfrag_high_thresh; extern int sysctl_ipfrag_low_thresh; extern int sysctl_ipfrag_time; extern int sysctl_ipfrag_secret_interval; +extern int sysctl_ipfrag_max_dist; /* From inetpeer.c */ extern int inet_peer_threshold; diff --git a/net/ipv4/inetpeer.c b/net/ipv4/inetpeer.c index 2fc3fd38924f..ce5fe3f74a3d 100644 --- a/net/ipv4/inetpeer.c +++ b/net/ipv4/inetpeer.c @@ -401,6 +401,7 @@ struct inet_peer *inet_getpeer(__u32 daddr, int create) return NULL; n->v4daddr = daddr; atomic_set(&n->refcnt, 1); + atomic_set(&n->rid, 0); n->ip_id_count = secure_ip_id(daddr); n->tcp_ts_stamp = 0; diff --git a/net/ipv4/ip_fragment.c b/net/ipv4/ip_fragment.c index 8ce0ce2ee48e..ce2b70ce4018 100644 --- a/net/ipv4/ip_fragment.c +++ b/net/ipv4/ip_fragment.c @@ -22,6 +22,7 @@ * Patrick McHardy : LRU queue of frag heads for evictor. */ +#include #include #include #include @@ -38,6 +39,7 @@ #include #include #include +#include #include #include #include @@ -56,6 +58,8 @@ int sysctl_ipfrag_high_thresh = 256*1024; int sysctl_ipfrag_low_thresh = 192*1024; +int sysctl_ipfrag_max_dist = 64; + /* Important NOTE! Fragment queue must be destroyed before MSL expires. * RFC791 is wrong proposing to prolongate timer each fragment arrival by TTL. */ @@ -89,8 +93,10 @@ struct ipq { spinlock_t lock; atomic_t refcnt; struct timer_list timer; /* when will this queue expire? */ - int iif; struct timeval stamp; + int iif; + unsigned int rid; + struct inet_peer *peer; }; /* Hash table. */ @@ -195,6 +201,9 @@ static void ip_frag_destroy(struct ipq *qp, int *work) BUG_TRAP(qp->last_in&COMPLETE); BUG_TRAP(del_timer(&qp->timer) == 0); + if (qp->peer) + inet_putpeer(qp->peer); + /* Release all fragment data. */ fp = qp->fragments; while (fp) { @@ -353,6 +362,7 @@ static struct ipq *ip_frag_create(unsigned hash, struct iphdr *iph, u32 user) qp->meat = 0; qp->fragments = NULL; qp->iif = 0; + qp->peer = sysctl_ipfrag_max_dist ? inet_getpeer(iph->saddr, 1) : NULL; /* Initialize a timer for this entry. */ init_timer(&qp->timer); @@ -398,6 +408,56 @@ static inline struct ipq *ip_find(struct iphdr *iph, u32 user) return ip_frag_create(hash, iph, user); } +/* Is the fragment too far ahead to be part of ipq? */ +static inline int ip_frag_too_far(struct ipq *qp) +{ + struct inet_peer *peer = qp->peer; + unsigned int max = sysctl_ipfrag_max_dist; + unsigned int start, end; + + int rc; + + if (!peer || !max) + return 0; + + start = qp->rid; + end = atomic_inc_return(&peer->rid); + qp->rid = end; + + rc = qp->fragments && (end - start) > max; + + if (rc) { + IP_INC_STATS_BH(IPSTATS_MIB_REASMFAILS); + } + + return rc; +} + +static int ip_frag_reinit(struct ipq *qp) +{ + struct sk_buff *fp; + + if (!mod_timer(&qp->timer, jiffies + sysctl_ipfrag_time)) { + atomic_inc(&qp->refcnt); + return -ETIMEDOUT; + } + + fp = qp->fragments; + do { + struct sk_buff *xp = fp->next; + frag_kfree_skb(fp, NULL); + fp = xp; + } while (fp); + + qp->last_in = 0; + qp->len = 0; + qp->meat = 0; + qp->fragments = NULL; + qp->iif = 0; + + return 0; +} + /* Add new segment to existing queue. */ static void ip_frag_queue(struct ipq *qp, struct sk_buff *skb) { @@ -408,6 +468,12 @@ static void ip_frag_queue(struct ipq *qp, struct sk_buff *skb) if (qp->last_in & COMPLETE) goto err; + if (!(IPCB(skb)->flags & IPSKB_FRAG_COMPLETE) && + unlikely(ip_frag_too_far(qp)) && unlikely(ip_frag_reinit(qp))) { + ipq_kill(qp); + goto err; + } + offset = ntohs(skb->nh.iph->frag_off); flags = offset & ~IP_OFFSET; offset &= IP_OFFSET; diff --git a/net/ipv4/ip_output.c b/net/ipv4/ip_output.c index eba64e2bd397..2a830de3a699 100644 --- a/net/ipv4/ip_output.c +++ b/net/ipv4/ip_output.c @@ -445,6 +445,7 @@ int ip_fragment(struct sk_buff *skb, int (*output)(struct sk_buff*)) hlen = iph->ihl * 4; mtu = dst_mtu(&rt->u.dst) - hlen; /* Size of data space */ + IPCB(skb)->flags |= IPSKB_FRAG_COMPLETE; /* When frag_list is given, use it. First, check its validity: * some transformers could create wrong frag_list or break existing diff --git a/net/ipv4/sysctl_net_ipv4.c b/net/ipv4/sysctl_net_ipv4.c index 01444a02b48b..dbf82955aabe 100644 --- a/net/ipv4/sysctl_net_ipv4.c +++ b/net/ipv4/sysctl_net_ipv4.c @@ -22,6 +22,7 @@ extern int sysctl_ip_nonlocal_bind; #ifdef CONFIG_SYSCTL +static int zero; static int tcp_retr1_max = 255; static int ip_local_port_range_min[] = { 1, 1 }; static int ip_local_port_range_max[] = { 65535, 65535 }; @@ -613,6 +614,15 @@ ctl_table ipv4_table[] = { .proc_handler = &proc_dointvec_jiffies, .strategy = &sysctl_jiffies }, + { + .ctl_name = NET_IPV4_IPFRAG_MAX_DIST, + .procname = "ipfrag_max_dist", + .data = &sysctl_ipfrag_max_dist, + .maxlen = sizeof(int), + .mode = 0644, + .proc_handler = &proc_dointvec_minmax, + .extra1 = &zero + }, { .ctl_name = NET_TCP_NO_METRICS_SAVE, .procname = "tcp_no_metrics_save", -- cgit v1.2.3-71-gd317 From ca304b6104ffdd120bb6687a88a0625e58bc71cd Mon Sep 17 00:00:00 2001 From: Arnaldo Carvalho de Melo Date: Tue, 13 Dec 2005 23:15:40 -0800 Subject: [IPV6]: Introduce inet6_rsk() And inet6_rsk_offset in inet_request_sock, for the same reasons as inet_sock's pinfo6 member. Signed-off-by: Arnaldo Carvalho de Melo Signed-off-by: David S. Miller --- include/linux/ip.h | 4 ++++ include/linux/ipv6.h | 39 +++++++++++++++++++++++++++++++++------ net/ipv4/inet_diag.c | 8 ++++---- net/ipv6/inet6_connection_sock.c | 4 ++-- net/ipv6/tcp_ipv6.c | 19 +++++++++---------- 5 files changed, 52 insertions(+), 22 deletions(-) (limited to 'include/linux') diff --git a/include/linux/ip.h b/include/linux/ip.h index 33e8a19a1a0f..5a560daeade5 100644 --- a/include/linux/ip.h +++ b/include/linux/ip.h @@ -110,6 +110,10 @@ struct ip_options { struct inet_request_sock { struct request_sock req; +#if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE) + u16 inet6_rsk_offset; + /* 2 bytes hole, try to pack */ +#endif u32 loc_addr; u32 rmt_addr; u16 rmt_port; diff --git a/include/linux/ipv6.h b/include/linux/ipv6.h index e0b922785d98..7d3e86d9576e 100644 --- a/include/linux/ipv6.h +++ b/include/linux/ipv6.h @@ -199,18 +199,17 @@ static inline int inet6_iif(const struct sk_buff *skb) return IP6CB(skb)->iif; } -struct tcp6_request_sock { - struct tcp_request_sock req; +struct inet6_request_sock { struct in6_addr loc_addr; struct in6_addr rmt_addr; struct sk_buff *pktopts; int iif; }; -static inline struct tcp6_request_sock *tcp6_rsk(const struct request_sock *sk) -{ - return (struct tcp6_request_sock *)sk; -} +struct tcp6_request_sock { + struct tcp_request_sock tcp6rsk_tcp; + struct inet6_request_sock tcp6rsk_inet6; +}; /** * struct ipv6_pinfo - ipv6 private area @@ -304,6 +303,28 @@ static inline struct ipv6_pinfo * inet6_sk(const struct sock *__sk) return inet_sk(__sk)->pinet6; } +static inline struct inet6_request_sock * + inet6_rsk(const struct request_sock *rsk) +{ + return (struct inet6_request_sock *)(((u8 *)rsk) + + inet_rsk(rsk)->inet6_rsk_offset); +} + +static inline u32 inet6_rsk_offset(struct request_sock *rsk) +{ + return rsk->rsk_ops->obj_size - sizeof(struct inet6_request_sock); +} + +static inline struct request_sock *inet6_reqsk_alloc(struct request_sock_ops *ops) +{ + struct request_sock *req = reqsk_alloc(ops); + + if (req != NULL) + inet_rsk(req)->inet6_rsk_offset = inet6_rsk_offset(req); + + return req; +} + static inline struct raw6_sock *raw6_sk(const struct sock *sk) { return (struct raw6_sock *)sk; @@ -361,6 +382,12 @@ static inline struct ipv6_pinfo * inet6_sk(const struct sock *__sk) return NULL; } +static inline struct inet6_request_sock * + inet6_rsk(const struct request_sock *rsk) +{ + return NULL; +} + static inline struct raw6_sock *raw6_sk(const struct sock *sk) { return NULL; diff --git a/net/ipv4/inet_diag.c b/net/ipv4/inet_diag.c index 39061ed53cfd..3ce73b141d7e 100644 --- a/net/ipv4/inet_diag.c +++ b/net/ipv4/inet_diag.c @@ -489,9 +489,9 @@ static int inet_diag_fill_req(struct sk_buff *skb, struct sock *sk, #if defined(CONFIG_IPV6) || defined (CONFIG_IPV6_MODULE) if (r->idiag_family == AF_INET6) { ipv6_addr_copy((struct in6_addr *)r->id.idiag_src, - &tcp6_rsk(req)->loc_addr); + &inet6_rsk(req)->loc_addr); ipv6_addr_copy((struct in6_addr *)r->id.idiag_dst, - &tcp6_rsk(req)->rmt_addr); + &inet6_rsk(req)->rmt_addr); } #endif nlh->nlmsg_len = skb->tail - b; @@ -553,13 +553,13 @@ static int inet_diag_dump_reqs(struct sk_buff *skb, struct sock *sk, entry.saddr = #if defined(CONFIG_IPV6) || defined (CONFIG_IPV6_MODULE) (entry.family == AF_INET6) ? - tcp6_rsk(req)->loc_addr.s6_addr32 : + inet6_rsk(req)->loc_addr.s6_addr32 : #endif &ireq->loc_addr; entry.daddr = #if defined(CONFIG_IPV6) || defined (CONFIG_IPV6_MODULE) (entry.family == AF_INET6) ? - tcp6_rsk(req)->rmt_addr.s6_addr32 : + inet6_rsk(req)->rmt_addr.s6_addr32 : #endif &ireq->rmt_addr; entry.dport = ntohs(ireq->rmt_port); diff --git a/net/ipv6/inet6_connection_sock.c b/net/ipv6/inet6_connection_sock.c index 04ff44344f90..fe874eeaa40c 100644 --- a/net/ipv6/inet6_connection_sock.c +++ b/net/ipv6/inet6_connection_sock.c @@ -61,7 +61,7 @@ struct request_sock *inet6_csk_search_req(const struct sock *sk, lopt->nr_table_entries)]; (req = *prev) != NULL; prev = &req->dl_next) { - const struct tcp6_request_sock *treq = tcp6_rsk(req); + const struct inet6_request_sock *treq = inet6_rsk(req); if (inet_rsk(req)->rmt_port == rport && req->rsk_ops->family == AF_INET6 && @@ -85,7 +85,7 @@ void inet6_csk_reqsk_queue_hash_add(struct sock *sk, { struct inet_connection_sock *icsk = inet_csk(sk); struct listen_sock *lopt = icsk->icsk_accept_queue.listen_opt; - const u32 h = inet6_synq_hash(&tcp6_rsk(req)->rmt_addr, + const u32 h = inet6_synq_hash(&inet6_rsk(req)->rmt_addr, inet_rsk(req)->rmt_port, lopt->hash_rnd, lopt->nr_table_entries); diff --git a/net/ipv6/tcp_ipv6.c b/net/ipv6/tcp_ipv6.c index 5a10d30cec4a..c2472d771664 100644 --- a/net/ipv6/tcp_ipv6.c +++ b/net/ipv6/tcp_ipv6.c @@ -656,7 +656,7 @@ out: static int tcp_v6_send_synack(struct sock *sk, struct request_sock *req, struct dst_entry *dst) { - struct tcp6_request_sock *treq = tcp6_rsk(req); + struct inet6_request_sock *treq = inet6_rsk(req); struct ipv6_pinfo *np = inet6_sk(sk); struct sk_buff * skb; struct ipv6_txoptions *opt = NULL; @@ -722,8 +722,8 @@ done: static void tcp_v6_reqsk_destructor(struct request_sock *req) { - if (tcp6_rsk(req)->pktopts) - kfree_skb(tcp6_rsk(req)->pktopts); + if (inet6_rsk(req)->pktopts) + kfree_skb(inet6_rsk(req)->pktopts); } static struct request_sock_ops tcp6_request_sock_ops = { @@ -956,7 +956,7 @@ static struct sock *tcp_v6_hnd_req(struct sock *sk,struct sk_buff *skb) */ static int tcp_v6_conn_request(struct sock *sk, struct sk_buff *skb) { - struct tcp6_request_sock *treq; + struct inet6_request_sock *treq; struct ipv6_pinfo *np = inet6_sk(sk); struct tcp_options_received tmp_opt; struct tcp_sock *tp = tcp_sk(sk); @@ -981,7 +981,7 @@ static int tcp_v6_conn_request(struct sock *sk, struct sk_buff *skb) if (sk_acceptq_is_full(sk) && inet_csk_reqsk_queue_young(sk) > 1) goto drop; - req = reqsk_alloc(&tcp6_request_sock_ops); + req = inet6_reqsk_alloc(&tcp6_request_sock_ops); if (req == NULL) goto drop; @@ -994,7 +994,7 @@ static int tcp_v6_conn_request(struct sock *sk, struct sk_buff *skb) tmp_opt.tstamp_ok = tmp_opt.saw_tstamp; tcp_openreq_init(req, &tmp_opt, skb); - treq = tcp6_rsk(req); + treq = inet6_rsk(req); ipv6_addr_copy(&treq->rmt_addr, &skb->nh.ipv6h->saddr); ipv6_addr_copy(&treq->loc_addr, &skb->nh.ipv6h->daddr); TCP_ECN_create_request(req, skb->h.th); @@ -1035,7 +1035,7 @@ static struct sock * tcp_v6_syn_recv_sock(struct sock *sk, struct sk_buff *skb, struct request_sock *req, struct dst_entry *dst) { - struct tcp6_request_sock *treq = tcp6_rsk(req); + struct inet6_request_sock *treq = inet6_rsk(req); struct ipv6_pinfo *newnp, *np = inet6_sk(sk); struct tcp6_sock *newtcp6sk; struct inet_sock *newinet; @@ -1723,14 +1723,13 @@ static int tcp_v6_destroy_sock(struct sock *sk) static void get_openreq6(struct seq_file *seq, struct sock *sk, struct request_sock *req, int i, int uid) { - struct in6_addr *dest, *src; int ttd = req->expires - jiffies; + struct in6_addr *src = &inet6_rsk(req)->loc_addr; + struct in6_addr *dest = &inet6_rsk(req)->rmt_addr; if (ttd < 0) ttd = 0; - src = &tcp6_rsk(req)->loc_addr; - dest = &tcp6_rsk(req)->rmt_addr; seq_printf(seq, "%4d: %08X%08X%08X%08X:%04X %08X%08X%08X%08X:%04X " "%02X %08X:%08X %02X:%08lX %08X %5d %8d %d %d %p\n", -- cgit v1.2.3-71-gd317 From 8292a17a399ffb7c5c8b083db4ad994e090055f7 Mon Sep 17 00:00:00 2001 From: Arnaldo Carvalho de Melo Date: Tue, 13 Dec 2005 23:15:52 -0800 Subject: [ICSK]: Rename struct tcp_func to struct inet_connection_sock_af_ops And move it to struct inet_connection_sock. DCCP will use it in the upcoming changesets. Signed-off-by: Arnaldo Carvalho de Melo Signed-off-by: David S. Miller --- include/linux/tcp.h | 2 -- include/net/inet_connection_sock.h | 26 ++++++++++++++++++++ include/net/tcp.h | 50 +------------------------------------- include/net/transp_v6.h | 2 +- net/ipv4/syncookies.c | 4 +-- net/ipv4/tcp.c | 8 +++--- net/ipv4/tcp_input.c | 11 +++++---- net/ipv4/tcp_ipv4.c | 11 ++++----- net/ipv4/tcp_minisocks.c | 8 +++--- net/ipv4/tcp_output.c | 17 ++++++------- net/ipv6/ipv6_sockglue.c | 2 +- net/ipv6/tcp_ipv6.c | 28 ++++++++++----------- 12 files changed, 71 insertions(+), 98 deletions(-) (limited to 'include/linux') diff --git a/include/linux/tcp.h b/include/linux/tcp.h index 0e1da6602e05..4e1434007f44 100644 --- a/include/linux/tcp.h +++ b/include/linux/tcp.h @@ -295,8 +295,6 @@ struct tcp_sock { struct sk_buff_head out_of_order_queue; /* Out of order segments go here */ - struct tcp_func *af_specific; /* Operations which are AF_INET{4,6} specific */ - __u32 rcv_wnd; /* Current receiver window */ __u32 rcv_wup; /* rcv_nxt on last window update sent */ __u32 write_seq; /* Tail(+1) of data held in tcp send buffer */ diff --git a/include/net/inet_connection_sock.h b/include/net/inet_connection_sock.h index ccc81a1c550c..9e20d201e951 100644 --- a/include/net/inet_connection_sock.h +++ b/include/net/inet_connection_sock.h @@ -15,6 +15,7 @@ #ifndef _INET_CONNECTION_SOCK_H #define _INET_CONNECTION_SOCK_H +#include #include #include #include @@ -29,6 +30,29 @@ struct inet_bind_bucket; struct inet_hashinfo; struct tcp_congestion_ops; +/* + * Pointers to address related TCP functions + * (i.e. things that depend on the address family) + */ +struct inet_connection_sock_af_ops { + int (*queue_xmit)(struct sk_buff *skb, int ipfragok); + void (*send_check)(struct sock *sk, int len, + struct sk_buff *skb); + int (*rebuild_header)(struct sock *sk); + int (*conn_request)(struct sock *sk, struct sk_buff *skb); + struct sock *(*syn_recv_sock)(struct sock *sk, struct sk_buff *skb, + struct request_sock *req, + struct dst_entry *dst); + int (*remember_stamp)(struct sock *sk); + __u16 net_header_len; + int (*setsockopt)(struct sock *sk, int level, int optname, + char __user *optval, int optlen); + int (*getsockopt)(struct sock *sk, int level, int optname, + char __user *optval, int __user *optlen); + void (*addr2sockaddr)(struct sock *sk, struct sockaddr *); + int sockaddr_len; +}; + /** inet_connection_sock - INET connection oriented sock * * @icsk_accept_queue: FIFO of established children @@ -37,6 +61,7 @@ struct tcp_congestion_ops; * @icsk_retransmit_timer: Resend (no ack) * @icsk_rto: Retransmit timeout * @icsk_ca_ops Pluggable congestion control hook + * @icsk_af_ops Operations which are AF_INET{4,6} specific * @icsk_ca_state: Congestion control state * @icsk_retransmits: Number of unrecovered [RTO] timeouts * @icsk_pending: Scheduled timer event @@ -55,6 +80,7 @@ struct inet_connection_sock { struct timer_list icsk_delack_timer; __u32 icsk_rto; struct tcp_congestion_ops *icsk_ca_ops; + struct inet_connection_sock_af_ops *icsk_af_ops; __u8 icsk_ca_state; __u8 icsk_retransmits; __u8 icsk_pending; diff --git a/include/net/tcp.h b/include/net/tcp.h index d78025f9fbea..83b117a25c2a 100644 --- a/include/net/tcp.h +++ b/include/net/tcp.h @@ -224,53 +224,6 @@ extern atomic_t tcp_memory_allocated; extern atomic_t tcp_sockets_allocated; extern int tcp_memory_pressure; -/* - * Pointers to address related TCP functions - * (i.e. things that depend on the address family) - */ - -struct tcp_func { - int (*queue_xmit) (struct sk_buff *skb, - int ipfragok); - - void (*send_check) (struct sock *sk, - struct tcphdr *th, - int len, - struct sk_buff *skb); - - int (*rebuild_header) (struct sock *sk); - - int (*conn_request) (struct sock *sk, - struct sk_buff *skb); - - struct sock * (*syn_recv_sock) (struct sock *sk, - struct sk_buff *skb, - struct request_sock *req, - struct dst_entry *dst); - - int (*remember_stamp) (struct sock *sk); - - __u16 net_header_len; - - int (*setsockopt) (struct sock *sk, - int level, - int optname, - char __user *optval, - int optlen); - - int (*getsockopt) (struct sock *sk, - int level, - int optname, - char __user *optval, - int __user *optlen); - - - void (*addr2sockaddr) (struct sock *sk, - struct sockaddr *); - - int sockaddr_len; -}; - /* * The next routines deal with comparing 32 bit unsigned ints * and worry about wraparound (automatic with unsigned arithmetic). @@ -405,8 +358,7 @@ extern void tcp_parse_options(struct sk_buff *skb, * TCP v4 functions exported for the inet6 API */ -extern void tcp_v4_send_check(struct sock *sk, - struct tcphdr *th, int len, +extern void tcp_v4_send_check(struct sock *sk, int len, struct sk_buff *skb); extern int tcp_v4_conn_request(struct sock *sk, diff --git a/include/net/transp_v6.h b/include/net/transp_v6.h index 4e86f2de6638..61f724c1036f 100644 --- a/include/net/transp_v6.h +++ b/include/net/transp_v6.h @@ -44,7 +44,7 @@ extern int datagram_send_ctl(struct msghdr *msg, /* * address family specific functions */ -extern struct tcp_func ipv4_specific; +extern struct inet_connection_sock_af_ops ipv4_specific; extern int inet6_destroy_sock(struct sock *sk); diff --git a/net/ipv4/syncookies.c b/net/ipv4/syncookies.c index a34e60ea48a1..e20be3331f67 100644 --- a/net/ipv4/syncookies.c +++ b/net/ipv4/syncookies.c @@ -173,10 +173,10 @@ static inline struct sock *get_cookie_sock(struct sock *sk, struct sk_buff *skb, struct request_sock *req, struct dst_entry *dst) { - struct tcp_sock *tp = tcp_sk(sk); + struct inet_connection_sock *icsk = inet_csk(sk); struct sock *child; - child = tp->af_specific->syn_recv_sock(sk, skb, req, dst); + child = icsk->icsk_af_ops->syn_recv_sock(sk, skb, req, dst); if (child) inet_csk_reqsk_queue_add(sk, req, child); else diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c index ef98b14ac56d..eacfe6a3442c 100644 --- a/net/ipv4/tcp.c +++ b/net/ipv4/tcp.c @@ -1696,8 +1696,8 @@ int tcp_setsockopt(struct sock *sk, int level, int optname, char __user *optval, int err = 0; if (level != SOL_TCP) - return tp->af_specific->setsockopt(sk, level, optname, - optval, optlen); + return icsk->icsk_af_ops->setsockopt(sk, level, optname, + optval, optlen); /* This is a string value all the others are int's */ if (optname == TCP_CONGESTION) { @@ -1939,8 +1939,8 @@ int tcp_getsockopt(struct sock *sk, int level, int optname, char __user *optval, int val, len; if (level != SOL_TCP) - return tp->af_specific->getsockopt(sk, level, optname, - optval, optlen); + return icsk->icsk_af_ops->getsockopt(sk, level, optname, + optval, optlen); if (get_user(len, optlen)) return -EFAULT; diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c index bf2e23086bce..7de6184d4bd8 100644 --- a/net/ipv4/tcp_input.c +++ b/net/ipv4/tcp_input.c @@ -4071,8 +4071,10 @@ static int tcp_rcv_synsent_state_process(struct sock *sk, struct sk_buff *skb, mb(); tcp_set_state(sk, TCP_ESTABLISHED); + icsk = inet_csk(sk); + /* Make sure socket is routed, for correct metrics. */ - tp->af_specific->rebuild_header(sk); + icsk->icsk_af_ops->rebuild_header(sk); tcp_init_metrics(sk); @@ -4098,8 +4100,6 @@ static int tcp_rcv_synsent_state_process(struct sock *sk, struct sk_buff *skb, sk_wake_async(sk, 0, POLL_OUT); } - icsk = inet_csk(sk); - if (sk->sk_write_pending || icsk->icsk_accept_queue.rskq_defer_accept || icsk->icsk_ack.pingpong) { @@ -4220,6 +4220,7 @@ int tcp_rcv_state_process(struct sock *sk, struct sk_buff *skb, struct tcphdr *th, unsigned len) { struct tcp_sock *tp = tcp_sk(sk); + struct inet_connection_sock *icsk = inet_csk(sk); int queued = 0; tp->rx_opt.saw_tstamp = 0; @@ -4236,7 +4237,7 @@ int tcp_rcv_state_process(struct sock *sk, struct sk_buff *skb, goto discard; if(th->syn) { - if(tp->af_specific->conn_request(sk, skb) < 0) + if (icsk->icsk_af_ops->conn_request(sk, skb) < 0) return 1; /* Now we have several options: In theory there is @@ -4349,7 +4350,7 @@ int tcp_rcv_state_process(struct sock *sk, struct sk_buff *skb, /* Make sure socket is routed, for * correct metrics. */ - tp->af_specific->rebuild_header(sk); + icsk->icsk_af_ops->rebuild_header(sk); tcp_init_metrics(sk); diff --git a/net/ipv4/tcp_ipv4.c b/net/ipv4/tcp_ipv4.c index 2aa19c89a94a..704cf2105795 100644 --- a/net/ipv4/tcp_ipv4.c +++ b/net/ipv4/tcp_ipv4.c @@ -86,8 +86,7 @@ int sysctl_tcp_low_latency; /* Socket used for sending RSTs */ static struct socket *tcp_socket; -void tcp_v4_send_check(struct sock *sk, struct tcphdr *th, int len, - struct sk_buff *skb); +void tcp_v4_send_check(struct sock *sk, int len, struct sk_buff *skb); struct inet_hashinfo __cacheline_aligned tcp_hashinfo = { .lhash_lock = RW_LOCK_UNLOCKED, @@ -645,10 +644,10 @@ out: } /* This routine computes an IPv4 TCP checksum. */ -void tcp_v4_send_check(struct sock *sk, struct tcphdr *th, int len, - struct sk_buff *skb) +void tcp_v4_send_check(struct sock *sk, int len, struct sk_buff *skb) { struct inet_sock *inet = inet_sk(sk); + struct tcphdr *th = skb->h.th; if (skb->ip_summed == CHECKSUM_HW) { th->check = ~tcp_v4_check(th, len, inet->saddr, inet->daddr, 0); @@ -1383,7 +1382,7 @@ int tcp_v4_tw_remember_stamp(struct inet_timewait_sock *tw) return 0; } -struct tcp_func ipv4_specific = { +struct inet_connection_sock_af_ops ipv4_specific = { .queue_xmit = ip_queue_xmit, .send_check = tcp_v4_send_check, .rebuild_header = inet_sk_rebuild_header, @@ -1434,7 +1433,7 @@ static int tcp_v4_init_sock(struct sock *sk) sk->sk_write_space = sk_stream_write_space; sock_set_flag(sk, SOCK_USE_WRITE_QUEUE); - tp->af_specific = &ipv4_specific; + icsk->icsk_af_ops = &ipv4_specific; sk->sk_sndbuf = sysctl_tcp_wmem[1]; sk->sk_rcvbuf = sysctl_tcp_rmem[1]; diff --git a/net/ipv4/tcp_minisocks.c b/net/ipv4/tcp_minisocks.c index 1b66a2ac4321..9c029683a626 100644 --- a/net/ipv4/tcp_minisocks.c +++ b/net/ipv4/tcp_minisocks.c @@ -274,18 +274,18 @@ kill: void tcp_time_wait(struct sock *sk, int state, int timeo) { struct inet_timewait_sock *tw = NULL; + const struct inet_connection_sock *icsk = inet_csk(sk); const struct tcp_sock *tp = tcp_sk(sk); int recycle_ok = 0; if (tcp_death_row.sysctl_tw_recycle && tp->rx_opt.ts_recent_stamp) - recycle_ok = tp->af_specific->remember_stamp(sk); + recycle_ok = icsk->icsk_af_ops->remember_stamp(sk); if (tcp_death_row.tw_count < tcp_death_row.sysctl_max_tw_buckets) tw = inet_twsk_alloc(sk, state); if (tw != NULL) { struct tcp_timewait_sock *tcptw = tcp_twsk((struct sock *)tw); - const struct inet_connection_sock *icsk = inet_csk(sk); const int rto = (icsk->icsk_rto << 2) - (icsk->icsk_rto >> 1); tw->tw_rcv_wscale = tp->rx_opt.rcv_wscale; @@ -456,7 +456,6 @@ struct sock *tcp_check_req(struct sock *sk,struct sk_buff *skb, struct request_sock **prev) { struct tcphdr *th = skb->h.th; - struct tcp_sock *tp = tcp_sk(sk); u32 flg = tcp_flag_word(th) & (TCP_FLAG_RST|TCP_FLAG_SYN|TCP_FLAG_ACK); int paws_reject = 0; struct tcp_options_received tmp_opt; @@ -613,7 +612,8 @@ struct sock *tcp_check_req(struct sock *sk,struct sk_buff *skb, * ESTABLISHED STATE. If it will be dropped after * socket is created, wait for troubles. */ - child = tp->af_specific->syn_recv_sock(sk, skb, req, NULL); + child = inet_csk(sk)->icsk_af_ops->syn_recv_sock(sk, skb, + req, NULL); if (child == NULL) goto listen_overflow; diff --git a/net/ipv4/tcp_output.c b/net/ipv4/tcp_output.c index b7325e0b406a..af1946c52c37 100644 --- a/net/ipv4/tcp_output.c +++ b/net/ipv4/tcp_output.c @@ -371,7 +371,7 @@ static int tcp_transmit_skb(struct sock *sk, struct sk_buff *skb, int clone_it, TCP_ECN_send(sk, tp, skb, tcp_header_size); } - tp->af_specific->send_check(sk, th, skb->len, skb); + icsk->icsk_af_ops->send_check(sk, skb->len, skb); if (likely(tcb->flags & TCPCB_FLAG_ACK)) tcp_event_ack_sent(sk, tcp_skb_pcount(skb)); @@ -381,7 +381,7 @@ static int tcp_transmit_skb(struct sock *sk, struct sk_buff *skb, int clone_it, TCP_INC_STATS(TCP_MIB_OUTSEGS); - err = tp->af_specific->queue_xmit(skb, 0); + err = icsk->icsk_af_ops->queue_xmit(skb, 0); if (unlikely(err <= 0)) return err; @@ -638,12 +638,11 @@ int tcp_trim_head(struct sock *sk, struct sk_buff *skb, u32 len) unsigned int tcp_sync_mss(struct sock *sk, u32 pmtu) { struct tcp_sock *tp = tcp_sk(sk); - int mss_now; - /* Calculate base mss without TCP options: It is MMS_S - sizeof(tcphdr) of rfc1122 */ - mss_now = pmtu - tp->af_specific->net_header_len - sizeof(struct tcphdr); + int mss_now = (pmtu - inet_csk(sk)->icsk_af_ops->net_header_len - + sizeof(struct tcphdr)); /* Clamp it (mss_clamp does not include tcp options) */ if (mss_now > tp->rx_opt.mss_clamp) @@ -705,9 +704,9 @@ unsigned int tcp_current_mss(struct sock *sk, int large_allowed) xmit_size_goal = mss_now; if (doing_tso) { - xmit_size_goal = 65535 - - tp->af_specific->net_header_len - - tp->ext_header_len - tp->tcp_header_len; + xmit_size_goal = (65535 - + inet_csk(sk)->icsk_af_ops->net_header_len - + tp->ext_header_len - tp->tcp_header_len); if (tp->max_window && (xmit_size_goal > (tp->max_window >> 1))) @@ -1422,7 +1421,7 @@ int tcp_retransmit_skb(struct sock *sk, struct sk_buff *skb) (sysctl_tcp_retrans_collapse != 0)) tcp_retrans_try_collapse(sk, skb, cur_mss); - if(tp->af_specific->rebuild_header(sk)) + if (inet_csk(sk)->icsk_af_ops->rebuild_header(sk)) return -EHOSTUNREACH; /* Routing failure or similar. */ /* Some Solaris stacks overoptimize and ignore the FIN on a diff --git a/net/ipv6/ipv6_sockglue.c b/net/ipv6/ipv6_sockglue.c index 3620718defe6..b6b63fa8454c 100644 --- a/net/ipv6/ipv6_sockglue.c +++ b/net/ipv6/ipv6_sockglue.c @@ -170,7 +170,7 @@ int ipv6_setsockopt(struct sock *sk, int level, int optname, sock_prot_inc_use(&tcp_prot); local_bh_enable(); sk->sk_prot = &tcp_prot; - tp->af_specific = &ipv4_specific; + inet_csk(sk)->icsk_af_ops = &ipv4_specific; sk->sk_socket->ops = &inet_stream_ops; sk->sk_family = PF_INET; tcp_sync_mss(sk, tp->pmtu_cookie); diff --git a/net/ipv6/tcp_ipv6.c b/net/ipv6/tcp_ipv6.c index c2472d771664..8ce8a1359d2b 100644 --- a/net/ipv6/tcp_ipv6.c +++ b/net/ipv6/tcp_ipv6.c @@ -68,14 +68,14 @@ static void tcp_v6_send_reset(struct sk_buff *skb); static void tcp_v6_reqsk_send_ack(struct sk_buff *skb, struct request_sock *req); -static void tcp_v6_send_check(struct sock *sk, struct tcphdr *th, int len, +static void tcp_v6_send_check(struct sock *sk, int len, struct sk_buff *skb); static int tcp_v6_do_rcv(struct sock *sk, struct sk_buff *skb); static int tcp_v6_xmit(struct sk_buff *skb, int ipfragok); -static struct tcp_func ipv6_mapped; -static struct tcp_func ipv6_specific; +static struct inet_connection_sock_af_ops ipv6_mapped; +static struct inet_connection_sock_af_ops ipv6_specific; int inet6_csk_bind_conflict(const struct sock *sk, const struct inet_bind_bucket *tb) @@ -107,9 +107,7 @@ static int tcp_v6_get_port(struct sock *sk, unsigned short snum) static void tcp_v6_hash(struct sock *sk) { if (sk->sk_state != TCP_CLOSE) { - struct tcp_sock *tp = tcp_sk(sk); - - if (tp->af_specific == &ipv6_mapped) { + if (inet_csk(sk)->icsk_af_ops == &ipv6_mapped) { tcp_prot.hash(sk); return; } @@ -417,14 +415,14 @@ static int tcp_v6_connect(struct sock *sk, struct sockaddr *uaddr, sin.sin_port = usin->sin6_port; sin.sin_addr.s_addr = usin->sin6_addr.s6_addr32[3]; - tp->af_specific = &ipv6_mapped; + inet_csk(sk)->icsk_af_ops = &ipv6_mapped; sk->sk_backlog_rcv = tcp_v4_do_rcv; err = tcp_v4_connect(sk, (struct sockaddr *)&sin, sizeof(sin)); if (err) { tp->ext_header_len = exthdrlen; - tp->af_specific = &ipv6_specific; + inet_csk(sk)->icsk_af_ops = &ipv6_specific; sk->sk_backlog_rcv = tcp_v6_do_rcv; goto failure; } else { @@ -751,10 +749,10 @@ static int ipv6_opt_accepted(struct sock *sk, struct sk_buff *skb) } -static void tcp_v6_send_check(struct sock *sk, struct tcphdr *th, int len, - struct sk_buff *skb) +static void tcp_v6_send_check(struct sock *sk, int len, struct sk_buff *skb) { struct ipv6_pinfo *np = inet6_sk(sk); + struct tcphdr *th = skb->h.th; if (skb->ip_summed == CHECKSUM_HW) { th->check = ~csum_ipv6_magic(&np->saddr, &np->daddr, len, IPPROTO_TCP, 0); @@ -1070,7 +1068,7 @@ static struct sock * tcp_v6_syn_recv_sock(struct sock *sk, struct sk_buff *skb, ipv6_addr_copy(&newnp->rcv_saddr, &newnp->saddr); - newtp->af_specific = &ipv6_mapped; + inet_csk(newsk)->icsk_af_ops = &ipv6_mapped; newsk->sk_backlog_rcv = tcp_v4_do_rcv; newnp->pktoptions = NULL; newnp->opt = NULL; @@ -1084,7 +1082,7 @@ static struct sock * tcp_v6_syn_recv_sock(struct sock *sk, struct sk_buff *skb, */ /* It is tricky place. Until this moment IPv4 tcp - worked with IPv6 af_tcp.af_specific. + worked with IPv6 icsk.icsk_af_ops. Sync it now. */ tcp_sync_mss(newsk, newtp->pmtu_cookie); @@ -1631,7 +1629,7 @@ static int tcp_v6_remember_stamp(struct sock *sk) return 0; } -static struct tcp_func ipv6_specific = { +static struct inet_connection_sock_af_ops ipv6_specific = { .queue_xmit = tcp_v6_xmit, .send_check = tcp_v6_send_check, .rebuild_header = tcp_v6_rebuild_header, @@ -1650,7 +1648,7 @@ static struct tcp_func ipv6_specific = { * TCP over IPv4 via INET6 API */ -static struct tcp_func ipv6_mapped = { +static struct inet_connection_sock_af_ops ipv6_mapped = { .queue_xmit = ip_queue_xmit, .send_check = tcp_v4_send_check, .rebuild_header = inet_sk_rebuild_header, @@ -1700,7 +1698,7 @@ static int tcp_v6_init_sock(struct sock *sk) sk->sk_state = TCP_CLOSE; - tp->af_specific = &ipv6_specific; + icsk->icsk_af_ops = &ipv6_specific; icsk->icsk_ca_ops = &tcp_init_congestion_ops; sk->sk_write_space = sk_stream_write_space; sock_set_flag(sk, SOCK_USE_WRITE_QUEUE); -- cgit v1.2.3-71-gd317 From 3305b80c214c642b89cd5c21af83bc91ec13f8bd Mon Sep 17 00:00:00 2001 From: Herbert Xu Date: Tue, 13 Dec 2005 23:16:37 -0800 Subject: [IP]: Simplify and consolidate MSG_PEEK error handling When a packet is obtained from skb_recv_datagram with MSG_PEEK enabled it is left on the socket receive queue. This means that when we detect a checksum error we have to be careful when trying to free the packet as someone could have dequeued it in the time being. Currently this delicate logic is duplicated three times between UDPv4, UDPv6 and RAWv6. This patch moves them into a one place and simplifies the code somewhat. This is based on a suggestion by Eric Dumazet. Signed-off-by: Herbert Xu Signed-off-by: David S. Miller --- include/linux/skbuff.h | 2 ++ net/core/datagram.c | 36 ++++++++++++++++++++++++++++++++++++ net/ipv4/udp.c | 15 +-------------- net/ipv6/raw.c | 16 +++------------- net/ipv6/udp.c | 16 ++-------------- 5 files changed, 44 insertions(+), 41 deletions(-) (limited to 'include/linux') diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h index 8c5d6001a923..97f6580ce039 100644 --- a/include/linux/skbuff.h +++ b/include/linux/skbuff.h @@ -1239,6 +1239,8 @@ extern int skb_copy_and_csum_datagram_iovec(struct sk_buff *skb, int hlen, struct iovec *iov); extern void skb_free_datagram(struct sock *sk, struct sk_buff *skb); +extern void skb_kill_datagram(struct sock *sk, struct sk_buff *skb, + unsigned int flags); extern unsigned int skb_checksum(const struct sk_buff *skb, int offset, int len, unsigned int csum); extern int skb_copy_bits(const struct sk_buff *skb, int offset, diff --git a/net/core/datagram.c b/net/core/datagram.c index 1bcfef51ac58..f8d322e1ea92 100644 --- a/net/core/datagram.c +++ b/net/core/datagram.c @@ -47,6 +47,7 @@ #include #include #include +#include #include #include @@ -199,6 +200,41 @@ void skb_free_datagram(struct sock *sk, struct sk_buff *skb) kfree_skb(skb); } +/** + * skb_kill_datagram - Free a datagram skbuff forcibly + * @sk: socket + * @skb: datagram skbuff + * @flags: MSG_ flags + * + * This function frees a datagram skbuff that was received by + * skb_recv_datagram. The flags argument must match the one + * used for skb_recv_datagram. + * + * If the MSG_PEEK flag is set, and the packet is still on the + * receive queue of the socket, it will be taken off the queue + * before it is freed. + * + * This function currently only disables BH when acquiring the + * sk_receive_queue lock. Therefore it must not be used in a + * context where that lock is acquired in an IRQ context. + */ + +void skb_kill_datagram(struct sock *sk, struct sk_buff *skb, unsigned int flags) +{ + if (flags & MSG_PEEK) { + spin_lock_bh(&sk->sk_receive_queue.lock); + if (skb == skb_peek(&sk->sk_receive_queue)) { + __skb_unlink(skb, &sk->sk_receive_queue); + atomic_dec(&skb->users); + } + spin_unlock_bh(&sk->sk_receive_queue.lock); + } + + kfree_skb(skb); +} + +EXPORT_SYMBOL(skb_kill_datagram); + /** * skb_copy_datagram_iovec - Copy a datagram to an iovec. * @skb: buffer to copy diff --git a/net/ipv4/udp.c b/net/ipv4/udp.c index 2422a5f7195d..012c4621e40a 100644 --- a/net/ipv4/udp.c +++ b/net/ipv4/udp.c @@ -846,20 +846,7 @@ out: csum_copy_err: UDP_INC_STATS_BH(UDP_MIB_INERRORS); - /* Clear queue. */ - if (flags&MSG_PEEK) { - int clear = 0; - spin_lock_bh(&sk->sk_receive_queue.lock); - if (skb == skb_peek(&sk->sk_receive_queue)) { - __skb_unlink(skb, &sk->sk_receive_queue); - clear = 1; - } - spin_unlock_bh(&sk->sk_receive_queue.lock); - if (clear) - kfree_skb(skb); - } - - skb_free_datagram(sk, skb); + skb_kill_datagram(sk, skb, flags); if (noblock) return -EAGAIN; diff --git a/net/ipv6/raw.c b/net/ipv6/raw.c index a66900cda2af..66f1d12ea578 100644 --- a/net/ipv6/raw.c +++ b/net/ipv6/raw.c @@ -32,6 +32,7 @@ #include #include #include +#include #include #include #include @@ -433,25 +434,14 @@ out: return err; csum_copy_err: - /* Clear queue. */ - if (flags&MSG_PEEK) { - int clear = 0; - spin_lock_bh(&sk->sk_receive_queue.lock); - if (skb == skb_peek(&sk->sk_receive_queue)) { - __skb_unlink(skb, &sk->sk_receive_queue); - clear = 1; - } - spin_unlock_bh(&sk->sk_receive_queue.lock); - if (clear) - kfree_skb(skb); - } + skb_kill_datagram(sk, skb, flags); /* Error for blocking case is chosen to masquerade as some normal condition. */ err = (flags&MSG_DONTWAIT) ? -EAGAIN : -EHOSTUNREACH; /* FIXME: increment a raw6 drops counter here */ - goto out_free; + goto out; } static int rawv6_push_pending_frames(struct sock *sk, struct flowi *fl, diff --git a/net/ipv6/udp.c b/net/ipv6/udp.c index 5cc8731eb55b..d8538dcea813 100644 --- a/net/ipv6/udp.c +++ b/net/ipv6/udp.c @@ -36,6 +36,7 @@ #include #include #include +#include #include #include @@ -300,20 +301,7 @@ out: return err; csum_copy_err: - /* Clear queue. */ - if (flags&MSG_PEEK) { - int clear = 0; - spin_lock_bh(&sk->sk_receive_queue.lock); - if (skb == skb_peek(&sk->sk_receive_queue)) { - __skb_unlink(skb, &sk->sk_receive_queue); - clear = 1; - } - spin_unlock_bh(&sk->sk_receive_queue.lock); - if (clear) - kfree_skb(skb); - } - - skb_free_datagram(sk, skb); + skb_kill_datagram(sk, skb, flags); if (flags & MSG_DONTWAIT) { UDP6_INC_STATS_USER(UDP_MIB_INERRORS); -- cgit v1.2.3-71-gd317 From b9750ce13c08aa8a71a9b138d741f3046aefd991 Mon Sep 17 00:00:00 2001 From: Arnaldo Carvalho de Melo Date: Tue, 13 Dec 2005 23:22:54 -0800 Subject: [IPV6]: Generalise some functions Using sk->sk_protocol instead of IPPROTO_TCP. Will be used by DCCPv6 in the next changesets. Signed-off-by: Arnaldo Carvalho de Melo Signed-off-by: David S. Miller --- include/linux/ipv6.h | 2 + include/net/inet6_connection_sock.h | 13 ++- net/ipv6/af_inet6.c | 52 ++++++++++++ net/ipv6/inet6_connection_sock.c | 103 ++++++++++++++++++++++++ net/ipv6/tcp_ipv6.c | 153 +----------------------------------- 5 files changed, 173 insertions(+), 150 deletions(-) (limited to 'include/linux') diff --git a/include/linux/ipv6.h b/include/linux/ipv6.h index 7d3e86d9576e..69a0decfbdf4 100644 --- a/include/linux/ipv6.h +++ b/include/linux/ipv6.h @@ -297,6 +297,8 @@ struct tcp6_sock { struct ipv6_pinfo inet6; }; +extern int inet6_sk_rebuild_header(struct sock *sk); + #if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE) static inline struct ipv6_pinfo * inet6_sk(const struct sock *__sk) { diff --git a/include/net/inet6_connection_sock.h b/include/net/inet6_connection_sock.h index aa30ebde70dc..b33b438bffcc 100644 --- a/include/net/inet6_connection_sock.h +++ b/include/net/inet6_connection_sock.h @@ -15,8 +15,15 @@ #include -struct sock; +struct in6_addr; +struct inet_bind_bucket; struct request_sock; +struct sk_buff; +struct sock; +struct sockaddr; + +extern int inet6_csk_bind_conflict(const struct sock *sk, + const struct inet_bind_bucket *tb); extern struct request_sock *inet6_csk_search_req(const struct sock *sk, struct request_sock ***prevp, @@ -28,4 +35,8 @@ extern struct request_sock *inet6_csk_search_req(const struct sock *sk, extern void inet6_csk_reqsk_queue_hash_add(struct sock *sk, struct request_sock *req, const unsigned long timeout); + +extern void inet6_csk_addr2sockaddr(struct sock *sk, struct sockaddr *uaddr); + +extern int inet6_csk_xmit(struct sk_buff *skb, int ipfragok); #endif /* _INET6_CONNECTION_SOCK_H */ diff --git a/net/ipv6/af_inet6.c b/net/ipv6/af_inet6.c index d9546380fa04..fd040e9a1f47 100644 --- a/net/ipv6/af_inet6.c +++ b/net/ipv6/af_inet6.c @@ -609,6 +609,58 @@ inet6_unregister_protosw(struct inet_protosw *p) } } +int inet6_sk_rebuild_header(struct sock *sk) +{ + int err; + struct dst_entry *dst; + struct ipv6_pinfo *np = inet6_sk(sk); + + dst = __sk_dst_check(sk, np->dst_cookie); + + if (dst == NULL) { + struct inet_sock *inet = inet_sk(sk); + struct in6_addr *final_p = NULL, final; + struct flowi fl; + + memset(&fl, 0, sizeof(fl)); + fl.proto = sk->sk_protocol; + ipv6_addr_copy(&fl.fl6_dst, &np->daddr); + ipv6_addr_copy(&fl.fl6_src, &np->saddr); + fl.fl6_flowlabel = np->flow_label; + fl.oif = sk->sk_bound_dev_if; + fl.fl_ip_dport = inet->dport; + fl.fl_ip_sport = inet->sport; + + if (np->opt && np->opt->srcrt) { + struct rt0_hdr *rt0 = (struct rt0_hdr *) np->opt->srcrt; + ipv6_addr_copy(&final, &fl.fl6_dst); + ipv6_addr_copy(&fl.fl6_dst, rt0->addr); + final_p = &final; + } + + err = ip6_dst_lookup(sk, &dst, &fl); + if (err) { + sk->sk_route_caps = 0; + return err; + } + if (final_p) + ipv6_addr_copy(&fl.fl6_dst, final_p); + + if ((err = xfrm_lookup(&dst, &fl, sk, 0)) < 0) { + sk->sk_err_soft = -err; + return err; + } + + ip6_dst_store(sk, dst, NULL); + sk->sk_route_caps = dst->dev->features & + ~(NETIF_F_IP_CSUM | NETIF_F_TSO); + } + + return 0; +} + +EXPORT_SYMBOL_GPL(inet6_sk_rebuild_header); + int snmp6_mib_init(void *ptr[2], size_t mibsize, size_t mibalign) { diff --git a/net/ipv6/inet6_connection_sock.c b/net/ipv6/inet6_connection_sock.c index fe874eeaa40c..792f90f0f9ec 100644 --- a/net/ipv6/inet6_connection_sock.c +++ b/net/ipv6/inet6_connection_sock.c @@ -21,8 +21,34 @@ #include #include +#include +#include +#include #include +int inet6_csk_bind_conflict(const struct sock *sk, + const struct inet_bind_bucket *tb) +{ + const struct sock *sk2; + const struct hlist_node *node; + + /* We must walk the whole port owner list in this case. -DaveM */ + sk_for_each_bound(sk2, node, &tb->owners) { + if (sk != sk2 && + (!sk->sk_bound_dev_if || + !sk2->sk_bound_dev_if || + sk->sk_bound_dev_if == sk2->sk_bound_dev_if) && + (!sk->sk_reuse || !sk2->sk_reuse || + sk2->sk_state == TCP_LISTEN) && + ipv6_rcv_saddr_equal(sk, sk2)) + break; + } + + return node != NULL; +} + +EXPORT_SYMBOL_GPL(inet6_csk_bind_conflict); + /* * request_sock (formerly open request) hash tables. */ @@ -94,3 +120,80 @@ void inet6_csk_reqsk_queue_hash_add(struct sock *sk, } EXPORT_SYMBOL_GPL(inet6_csk_reqsk_queue_hash_add); + +void inet6_csk_addr2sockaddr(struct sock *sk, struct sockaddr * uaddr) +{ + struct ipv6_pinfo *np = inet6_sk(sk); + struct sockaddr_in6 *sin6 = (struct sockaddr_in6 *) uaddr; + + sin6->sin6_family = AF_INET6; + ipv6_addr_copy(&sin6->sin6_addr, &np->daddr); + sin6->sin6_port = inet_sk(sk)->dport; + /* We do not store received flowlabel for TCP */ + sin6->sin6_flowinfo = 0; + sin6->sin6_scope_id = 0; + if (sk->sk_bound_dev_if && + ipv6_addr_type(&sin6->sin6_addr) & IPV6_ADDR_LINKLOCAL) + sin6->sin6_scope_id = sk->sk_bound_dev_if; +} + +EXPORT_SYMBOL_GPL(inet6_csk_addr2sockaddr); + +int inet6_csk_xmit(struct sk_buff *skb, int ipfragok) +{ + struct sock *sk = skb->sk; + struct inet_sock *inet = inet_sk(sk); + struct ipv6_pinfo *np = inet6_sk(sk); + struct flowi fl; + struct dst_entry *dst; + struct in6_addr *final_p = NULL, final; + + memset(&fl, 0, sizeof(fl)); + fl.proto = sk->sk_protocol; + ipv6_addr_copy(&fl.fl6_dst, &np->daddr); + ipv6_addr_copy(&fl.fl6_src, &np->saddr); + fl.fl6_flowlabel = np->flow_label; + IP6_ECN_flow_xmit(sk, fl.fl6_flowlabel); + fl.oif = sk->sk_bound_dev_if; + fl.fl_ip_sport = inet->sport; + fl.fl_ip_dport = inet->dport; + + if (np->opt && np->opt->srcrt) { + struct rt0_hdr *rt0 = (struct rt0_hdr *)np->opt->srcrt; + ipv6_addr_copy(&final, &fl.fl6_dst); + ipv6_addr_copy(&fl.fl6_dst, rt0->addr); + final_p = &final; + } + + dst = __sk_dst_check(sk, np->dst_cookie); + + if (dst == NULL) { + int err = ip6_dst_lookup(sk, &dst, &fl); + + if (err) { + sk->sk_err_soft = -err; + return err; + } + + if (final_p) + ipv6_addr_copy(&fl.fl6_dst, final_p); + + if ((err = xfrm_lookup(&dst, &fl, sk, 0)) < 0) { + sk->sk_route_caps = 0; + return err; + } + + ip6_dst_store(sk, dst, NULL); + sk->sk_route_caps = dst->dev->features & + ~(NETIF_F_IP_CSUM | NETIF_F_TSO); + } + + skb->dst = dst_clone(dst); + + /* Restore final destination back after routing done */ + ipv6_addr_copy(&fl.fl6_dst, &np->daddr); + + return ip6_xmit(sk, skb, &fl, np->opt, 0); +} + +EXPORT_SYMBOL_GPL(inet6_csk_xmit); diff --git a/net/ipv6/tcp_ipv6.c b/net/ipv6/tcp_ipv6.c index 8ce8a1359d2b..2f932ce72610 100644 --- a/net/ipv6/tcp_ipv6.c +++ b/net/ipv6/tcp_ipv6.c @@ -72,32 +72,10 @@ static void tcp_v6_send_check(struct sock *sk, int len, struct sk_buff *skb); static int tcp_v6_do_rcv(struct sock *sk, struct sk_buff *skb); -static int tcp_v6_xmit(struct sk_buff *skb, int ipfragok); static struct inet_connection_sock_af_ops ipv6_mapped; static struct inet_connection_sock_af_ops ipv6_specific; -int inet6_csk_bind_conflict(const struct sock *sk, - const struct inet_bind_bucket *tb) -{ - const struct sock *sk2; - const struct hlist_node *node; - - /* We must walk the whole port owner list in this case. -DaveM */ - sk_for_each_bound(sk2, node, &tb->owners) { - if (sk != sk2 && - (!sk->sk_bound_dev_if || - !sk2->sk_bound_dev_if || - sk->sk_bound_dev_if == sk2->sk_bound_dev_if) && - (!sk->sk_reuse || !sk2->sk_reuse || - sk2->sk_state == TCP_LISTEN) && - ipv6_rcv_saddr_equal(sk, sk2)) - break; - } - - return node != NULL; -} - static int tcp_v6_get_port(struct sock *sk, unsigned short snum) { return inet_csk_get_port(&tcp_hashinfo, sk, snum, @@ -1500,129 +1478,6 @@ do_time_wait: goto discard_it; } -static int tcp_v6_rebuild_header(struct sock *sk) -{ - int err; - struct dst_entry *dst; - struct ipv6_pinfo *np = inet6_sk(sk); - - dst = __sk_dst_check(sk, np->dst_cookie); - - if (dst == NULL) { - struct inet_sock *inet = inet_sk(sk); - struct in6_addr *final_p = NULL, final; - struct flowi fl; - - memset(&fl, 0, sizeof(fl)); - fl.proto = IPPROTO_TCP; - ipv6_addr_copy(&fl.fl6_dst, &np->daddr); - ipv6_addr_copy(&fl.fl6_src, &np->saddr); - fl.fl6_flowlabel = np->flow_label; - fl.oif = sk->sk_bound_dev_if; - fl.fl_ip_dport = inet->dport; - fl.fl_ip_sport = inet->sport; - - if (np->opt && np->opt->srcrt) { - struct rt0_hdr *rt0 = (struct rt0_hdr *) np->opt->srcrt; - ipv6_addr_copy(&final, &fl.fl6_dst); - ipv6_addr_copy(&fl.fl6_dst, rt0->addr); - final_p = &final; - } - - err = ip6_dst_lookup(sk, &dst, &fl); - if (err) { - sk->sk_route_caps = 0; - return err; - } - if (final_p) - ipv6_addr_copy(&fl.fl6_dst, final_p); - - if ((err = xfrm_lookup(&dst, &fl, sk, 0)) < 0) { - sk->sk_err_soft = -err; - return err; - } - - ip6_dst_store(sk, dst, NULL); - sk->sk_route_caps = dst->dev->features & - ~(NETIF_F_IP_CSUM | NETIF_F_TSO); - } - - return 0; -} - -static int tcp_v6_xmit(struct sk_buff *skb, int ipfragok) -{ - struct sock *sk = skb->sk; - struct inet_sock *inet = inet_sk(sk); - struct ipv6_pinfo *np = inet6_sk(sk); - struct flowi fl; - struct dst_entry *dst; - struct in6_addr *final_p = NULL, final; - - memset(&fl, 0, sizeof(fl)); - fl.proto = IPPROTO_TCP; - ipv6_addr_copy(&fl.fl6_dst, &np->daddr); - ipv6_addr_copy(&fl.fl6_src, &np->saddr); - fl.fl6_flowlabel = np->flow_label; - IP6_ECN_flow_xmit(sk, fl.fl6_flowlabel); - fl.oif = sk->sk_bound_dev_if; - fl.fl_ip_sport = inet->sport; - fl.fl_ip_dport = inet->dport; - - if (np->opt && np->opt->srcrt) { - struct rt0_hdr *rt0 = (struct rt0_hdr *) np->opt->srcrt; - ipv6_addr_copy(&final, &fl.fl6_dst); - ipv6_addr_copy(&fl.fl6_dst, rt0->addr); - final_p = &final; - } - - dst = __sk_dst_check(sk, np->dst_cookie); - - if (dst == NULL) { - int err = ip6_dst_lookup(sk, &dst, &fl); - - if (err) { - sk->sk_err_soft = -err; - return err; - } - - if (final_p) - ipv6_addr_copy(&fl.fl6_dst, final_p); - - if ((err = xfrm_lookup(&dst, &fl, sk, 0)) < 0) { - sk->sk_route_caps = 0; - return err; - } - - ip6_dst_store(sk, dst, NULL); - sk->sk_route_caps = dst->dev->features & - ~(NETIF_F_IP_CSUM | NETIF_F_TSO); - } - - skb->dst = dst_clone(dst); - - /* Restore final destination back after routing done */ - ipv6_addr_copy(&fl.fl6_dst, &np->daddr); - - return ip6_xmit(sk, skb, &fl, np->opt, 0); -} - -static void v6_addr2sockaddr(struct sock *sk, struct sockaddr * uaddr) -{ - struct ipv6_pinfo *np = inet6_sk(sk); - struct sockaddr_in6 *sin6 = (struct sockaddr_in6 *) uaddr; - - sin6->sin6_family = AF_INET6; - ipv6_addr_copy(&sin6->sin6_addr, &np->daddr); - sin6->sin6_port = inet_sk(sk)->dport; - /* We do not store received flowlabel for TCP */ - sin6->sin6_flowinfo = 0; - sin6->sin6_scope_id = 0; - if (sk->sk_bound_dev_if && - ipv6_addr_type(&sin6->sin6_addr) & IPV6_ADDR_LINKLOCAL) - sin6->sin6_scope_id = sk->sk_bound_dev_if; -} - static int tcp_v6_remember_stamp(struct sock *sk) { /* Alas, not yet... */ @@ -1630,9 +1485,9 @@ static int tcp_v6_remember_stamp(struct sock *sk) } static struct inet_connection_sock_af_ops ipv6_specific = { - .queue_xmit = tcp_v6_xmit, + .queue_xmit = inet6_csk_xmit, .send_check = tcp_v6_send_check, - .rebuild_header = tcp_v6_rebuild_header, + .rebuild_header = inet6_sk_rebuild_header, .conn_request = tcp_v6_conn_request, .syn_recv_sock = tcp_v6_syn_recv_sock, .remember_stamp = tcp_v6_remember_stamp, @@ -1640,7 +1495,7 @@ static struct inet_connection_sock_af_ops ipv6_specific = { .setsockopt = ipv6_setsockopt, .getsockopt = ipv6_getsockopt, - .addr2sockaddr = v6_addr2sockaddr, + .addr2sockaddr = inet6_csk_addr2sockaddr, .sockaddr_len = sizeof(struct sockaddr_in6) }; @@ -1659,7 +1514,7 @@ static struct inet_connection_sock_af_ops ipv6_mapped = { .setsockopt = ipv6_setsockopt, .getsockopt = ipv6_getsockopt, - .addr2sockaddr = v6_addr2sockaddr, + .addr2sockaddr = inet6_csk_addr2sockaddr, .sockaddr_len = sizeof(struct sockaddr_in6) }; -- cgit v1.2.3-71-gd317 From 0fa1a53e1f055a6c790f40e7728f42a825b29248 Mon Sep 17 00:00:00 2001 From: Arnaldo Carvalho de Melo Date: Tue, 13 Dec 2005 23:23:09 -0800 Subject: [IPV6]: Introduce inet6_timewait_sock Out of tcp6_timewait_sock, that now is just an aggregation of inet_timewait_sock and inet6_timewait_sock, using tw_ipv6_offset in struct inet_timewait_sock, that is common to the IPv6 transport protocols that use timewait sockets, like DCCP and TCP. tw_ipv6_offset plays the struct inet_sock pinfo6 role, i.e. for the generic code to find the IPv6 area in a timewait sock. Signed-off-by: Arnaldo Carvalho de Melo Signed-off-by: David S. Miller --- include/linux/ipv6.h | 32 +++++++++++++++++++++----------- include/net/inet6_hashtables.h | 6 +++--- include/net/inet_timewait_sock.h | 3 ++- net/ipv4/inet_diag.c | 6 +++--- net/ipv4/tcp_minisocks.c | 8 +++++--- net/ipv6/addrconf.c | 2 +- net/ipv6/tcp_ipv6.c | 12 ++++++------ 7 files changed, 41 insertions(+), 28 deletions(-) (limited to 'include/linux') diff --git a/include/linux/ipv6.h b/include/linux/ipv6.h index 69a0decfbdf4..7d3908594fac 100644 --- a/include/linux/ipv6.h +++ b/include/linux/ipv6.h @@ -348,26 +348,36 @@ static inline void inet_sk_copy_descendant(struct sock *sk_to, #include +struct inet6_timewait_sock { + struct in6_addr tw_v6_daddr; + struct in6_addr tw_v6_rcv_saddr; +}; + struct tcp6_timewait_sock { - struct tcp_timewait_sock tw_v6_sk; - struct in6_addr tw_v6_daddr; - struct in6_addr tw_v6_rcv_saddr; + struct tcp_timewait_sock tcp6tw_tcp; + struct inet6_timewait_sock tcp6tw_inet6; }; -static inline struct tcp6_timewait_sock *tcp6_twsk(const struct sock *sk) +static inline u16 inet6_tw_offset(const struct proto *prot) +{ + return prot->twsk_obj_size - sizeof(struct inet6_timewait_sock); +} + +static inline struct inet6_timewait_sock *inet6_twsk(const struct sock *sk) { - return (struct tcp6_timewait_sock *)sk; + return (struct inet6_timewait_sock *)(((u8 *)sk) + + inet_twsk(sk)->tw_ipv6_offset); } -static inline struct in6_addr *__tcp_v6_rcv_saddr(const struct sock *sk) +static inline struct in6_addr *__inet6_rcv_saddr(const struct sock *sk) { return likely(sk->sk_state != TCP_TIME_WAIT) ? - &inet6_sk(sk)->rcv_saddr : &tcp6_twsk(sk)->tw_v6_rcv_saddr; + &inet6_sk(sk)->rcv_saddr : &inet6_twsk(sk)->tw_v6_rcv_saddr; } -static inline struct in6_addr *tcp_v6_rcv_saddr(const struct sock *sk) +static inline struct in6_addr *inet6_rcv_saddr(const struct sock *sk) { - return sk->sk_family == AF_INET6 ? __tcp_v6_rcv_saddr(sk) : NULL; + return sk->sk_family == AF_INET6 ? __inet6_rcv_saddr(sk) : NULL; } static inline int inet_v6_ipv6only(const struct sock *sk) @@ -395,8 +405,8 @@ static inline struct raw6_sock *raw6_sk(const struct sock *sk) return NULL; } -#define __tcp_v6_rcv_saddr(__sk) NULL -#define tcp_v6_rcv_saddr(__sk) NULL +#define __inet6_rcv_saddr(__sk) NULL +#define inet6_rcv_saddr(__sk) NULL #define tcp_twsk_ipv6only(__sk) 0 #define inet_v6_ipv6only(__sk) 0 #endif /* defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE) */ diff --git a/include/net/inet6_hashtables.h b/include/net/inet6_hashtables.h index a4a204f99ea6..25f708ff020e 100644 --- a/include/net/inet6_hashtables.h +++ b/include/net/inet6_hashtables.h @@ -110,10 +110,10 @@ static inline struct sock * if(*((__u32 *)&(tw->tw_dport)) == ports && sk->sk_family == PF_INET6) { - const struct tcp6_timewait_sock *tcp6tw = tcp6_twsk(sk); + const struct inet6_timewait_sock *tw6 = inet6_twsk(sk); - if (ipv6_addr_equal(&tcp6tw->tw_v6_daddr, saddr) && - ipv6_addr_equal(&tcp6tw->tw_v6_rcv_saddr, daddr) && + if (ipv6_addr_equal(&tw6->tw_v6_daddr, saddr) && + ipv6_addr_equal(&tw6->tw_v6_rcv_saddr, daddr) && (!sk->sk_bound_dev_if || sk->sk_bound_dev_if == dif)) goto hit; } diff --git a/include/net/inet_timewait_sock.h b/include/net/inet_timewait_sock.h index 28f7b2103505..ca240f856c46 100644 --- a/include/net/inet_timewait_sock.h +++ b/include/net/inet_timewait_sock.h @@ -127,7 +127,8 @@ struct inet_timewait_sock { __u16 tw_num; /* And these are ours. */ __u8 tw_ipv6only:1; - /* 31 bits hole, try to pack */ + /* 15 bits hole, try to pack */ + __u16 tw_ipv6_offset; int tw_timeout; unsigned long tw_ttd; struct inet_bind_bucket *tw_tb; diff --git a/net/ipv4/inet_diag.c b/net/ipv4/inet_diag.c index 3ce73b141d7e..c49908192047 100644 --- a/net/ipv4/inet_diag.c +++ b/net/ipv4/inet_diag.c @@ -112,12 +112,12 @@ static int inet_diag_fill(struct sk_buff *skb, struct sock *sk, r->idiag_inode = 0; #if defined(CONFIG_IPV6) || defined (CONFIG_IPV6_MODULE) if (r->idiag_family == AF_INET6) { - const struct tcp6_timewait_sock *tcp6tw = tcp6_twsk(sk); + const struct inet6_timewait_sock *tw6 = inet6_twsk(sk); ipv6_addr_copy((struct in6_addr *)r->id.idiag_src, - &tcp6tw->tw_v6_rcv_saddr); + &tw6->tw_v6_rcv_saddr); ipv6_addr_copy((struct in6_addr *)r->id.idiag_dst, - &tcp6tw->tw_v6_daddr); + &tw6->tw_v6_daddr); } #endif nlh->nlmsg_len = skb->tail - b; diff --git a/net/ipv4/tcp_minisocks.c b/net/ipv4/tcp_minisocks.c index 9c029683a626..2b9b7f6c7f7c 100644 --- a/net/ipv4/tcp_minisocks.c +++ b/net/ipv4/tcp_minisocks.c @@ -298,10 +298,12 @@ void tcp_time_wait(struct sock *sk, int state, int timeo) #if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE) if (tw->tw_family == PF_INET6) { struct ipv6_pinfo *np = inet6_sk(sk); - struct tcp6_timewait_sock *tcp6tw = tcp6_twsk((struct sock *)tw); + struct inet6_timewait_sock *tw6; - ipv6_addr_copy(&tcp6tw->tw_v6_daddr, &np->daddr); - ipv6_addr_copy(&tcp6tw->tw_v6_rcv_saddr, &np->rcv_saddr); + tw->tw_ipv6_offset = inet6_tw_offset(sk->sk_prot); + tw6 = inet6_twsk((struct sock *)tw); + ipv6_addr_copy(&tw6->tw_v6_daddr, &np->daddr); + ipv6_addr_copy(&tw6->tw_v6_rcv_saddr, &np->rcv_saddr); tw->tw_ipv6only = np->ipv6only; } #endif diff --git a/net/ipv6/addrconf.c b/net/ipv6/addrconf.c index a60585fd85ad..704fb73e6c5f 100644 --- a/net/ipv6/addrconf.c +++ b/net/ipv6/addrconf.c @@ -1195,7 +1195,7 @@ struct inet6_ifaddr * ipv6_get_ifaddr(struct in6_addr *addr, struct net_device * int ipv6_rcv_saddr_equal(const struct sock *sk, const struct sock *sk2) { const struct in6_addr *sk_rcv_saddr6 = &inet6_sk(sk)->rcv_saddr; - const struct in6_addr *sk2_rcv_saddr6 = tcp_v6_rcv_saddr(sk2); + const struct in6_addr *sk2_rcv_saddr6 = inet6_rcv_saddr(sk2); u32 sk_rcv_saddr = inet_sk(sk)->rcv_saddr; u32 sk2_rcv_saddr = inet_rcv_saddr(sk2); int sk_ipv6only = ipv6_only_sock(sk); diff --git a/net/ipv6/tcp_ipv6.c b/net/ipv6/tcp_ipv6.c index 2f932ce72610..cb880079daf3 100644 --- a/net/ipv6/tcp_ipv6.c +++ b/net/ipv6/tcp_ipv6.c @@ -138,14 +138,14 @@ static int __tcp_v6_check_established(struct sock *sk, const __u16 lport, /* Check TIME-WAIT sockets first. */ sk_for_each(sk2, node, &(head + tcp_hashinfo.ehash_size)->chain) { - const struct tcp6_timewait_sock *tcp6tw = tcp6_twsk(sk2); + const struct inet6_timewait_sock *tw6 = inet6_twsk(sk2); tw = inet_twsk(sk2); if(*((__u32 *)&(tw->tw_dport)) == ports && sk2->sk_family == PF_INET6 && - ipv6_addr_equal(&tcp6tw->tw_v6_daddr, saddr) && - ipv6_addr_equal(&tcp6tw->tw_v6_rcv_saddr, daddr) && + ipv6_addr_equal(&tw6->tw_v6_daddr, saddr) && + ipv6_addr_equal(&tw6->tw_v6_rcv_saddr, daddr) && sk2->sk_bound_dev_if == sk->sk_bound_dev_if) { const struct tcp_timewait_sock *tcptw = tcp_twsk(sk2); struct tcp_sock *tp = tcp_sk(sk); @@ -1663,14 +1663,14 @@ static void get_timewait6_sock(struct seq_file *seq, { struct in6_addr *dest, *src; __u16 destp, srcp; - struct tcp6_timewait_sock *tcp6tw = tcp6_twsk((struct sock *)tw); + struct inet6_timewait_sock *tw6 = inet6_twsk((struct sock *)tw); int ttd = tw->tw_ttd - jiffies; if (ttd < 0) ttd = 0; - dest = &tcp6tw->tw_v6_daddr; - src = &tcp6tw->tw_v6_rcv_saddr; + dest = &tw6->tw_v6_daddr; + src = &tw6->tw_v6_rcv_saddr; destp = ntohs(tw->tw_dport); srcp = ntohs(tw->tw_sport); -- cgit v1.2.3-71-gd317 From 6d6ee43e0b8b8d4847627fd43739b98ec2b9404f Mon Sep 17 00:00:00 2001 From: Arnaldo Carvalho de Melo Date: Tue, 13 Dec 2005 23:25:19 -0800 Subject: [TWSK]: Introduce struct timewait_sock_ops So that we can share several timewait sockets related functions and make the timewait mini sockets infrastructure closer to the request mini sockets one. Next changesets will take advantage of this, moving more code out of TCP and DCCP v4 and v6 to common infrastructure. Signed-off-by: Arnaldo Carvalho de Melo Signed-off-by: David S. Miller --- include/linux/ipv6.h | 3 +- include/net/inet_timewait_sock.h | 3 +- include/net/sock.h | 4 +-- include/net/tcp.h | 3 ++ include/net/timewait_sock.h | 31 ++++++++++++++++++ net/core/sock.c | 21 ++++++------ net/dccp/ipv4.c | 9 ++++- net/dccp/ipv6.c | 6 +++- net/ipv4/inet_timewait_sock.c | 5 +-- net/ipv4/tcp_ipv4.c | 71 ++++++++++++++++++++++++---------------- net/ipv6/tcp_ipv6.c | 25 +++++--------- 11 files changed, 118 insertions(+), 63 deletions(-) create mode 100644 include/net/timewait_sock.h (limited to 'include/linux') diff --git a/include/linux/ipv6.h b/include/linux/ipv6.h index 7d3908594fac..a0d04891fe12 100644 --- a/include/linux/ipv6.h +++ b/include/linux/ipv6.h @@ -360,7 +360,8 @@ struct tcp6_timewait_sock { static inline u16 inet6_tw_offset(const struct proto *prot) { - return prot->twsk_obj_size - sizeof(struct inet6_timewait_sock); + return prot->twsk_prot->twsk_obj_size - + sizeof(struct inet6_timewait_sock); } static inline struct inet6_timewait_sock *inet6_twsk(const struct sock *sk) diff --git a/include/net/inet_timewait_sock.h b/include/net/inet_timewait_sock.h index ca240f856c46..e396a65473d7 100644 --- a/include/net/inet_timewait_sock.h +++ b/include/net/inet_timewait_sock.h @@ -26,6 +26,7 @@ #include #include +#include #include @@ -200,7 +201,7 @@ static inline void inet_twsk_put(struct inet_timewait_sock *tw) printk(KERN_DEBUG "%s timewait_sock %p released\n", tw->tw_prot->name, tw); #endif - kmem_cache_free(tw->tw_prot->twsk_slab, tw); + kmem_cache_free(tw->tw_prot->twsk_prot->twsk_slab, tw); module_put(owner); } } diff --git a/include/net/sock.h b/include/net/sock.h index 0fbae85c6d55..91d28957dc10 100644 --- a/include/net/sock.h +++ b/include/net/sock.h @@ -493,6 +493,7 @@ extern void sk_stream_kill_queues(struct sock *sk); extern int sk_wait_data(struct sock *sk, long *timeo); struct request_sock_ops; +struct timewait_sock_ops; /* Networking protocol blocks we attach to sockets. * socket layer -> transport layer interface @@ -557,11 +558,10 @@ struct proto { kmem_cache_t *slab; unsigned int obj_size; - kmem_cache_t *twsk_slab; - unsigned int twsk_obj_size; atomic_t *orphan_count; struct request_sock_ops *rsk_prot; + struct timewait_sock_ops *twsk_prot; struct module *owner; diff --git a/include/net/tcp.h b/include/net/tcp.h index 83b117a25c2a..176221cd0cce 100644 --- a/include/net/tcp.h +++ b/include/net/tcp.h @@ -287,6 +287,9 @@ extern int tcp_rcv_established(struct sock *sk, extern void tcp_rcv_space_adjust(struct sock *sk); +extern int tcp_twsk_unique(struct sock *sk, + struct sock *sktw, void *twp); + static inline void tcp_dec_quickack_mode(struct sock *sk, const unsigned int pkts) { diff --git a/include/net/timewait_sock.h b/include/net/timewait_sock.h new file mode 100644 index 000000000000..2544281e1d5e --- /dev/null +++ b/include/net/timewait_sock.h @@ -0,0 +1,31 @@ +/* + * NET Generic infrastructure for Network protocols. + * + * Authors: Arnaldo Carvalho de Melo + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version + * 2 of the License, or (at your option) any later version. + */ +#ifndef _TIMEWAIT_SOCK_H +#define _TIMEWAIT_SOCK_H + +#include +#include + +struct timewait_sock_ops { + kmem_cache_t *twsk_slab; + unsigned int twsk_obj_size; + int (*twsk_unique)(struct sock *sk, + struct sock *sktw, void *twp); +}; + +static inline int twsk_unique(struct sock *sk, struct sock *sktw, void *twp) +{ + if (sk->sk_prot->twsk_prot->twsk_unique != NULL) + return sk->sk_prot->twsk_prot->twsk_unique(sk, sktw, twp); + return 0; +} + +#endif /* _TIMEWAIT_SOCK_H */ diff --git a/net/core/sock.c b/net/core/sock.c index 13cc3be4f056..6465b0e4c8cb 100644 --- a/net/core/sock.c +++ b/net/core/sock.c @@ -1488,7 +1488,7 @@ int proto_register(struct proto *prot, int alloc_slab) } } - if (prot->twsk_obj_size) { + if (prot->twsk_prot != NULL) { static const char mask[] = "tw_sock_%s"; timewait_sock_slab_name = kmalloc(strlen(prot->name) + sizeof(mask) - 1, GFP_KERNEL); @@ -1497,11 +1497,12 @@ int proto_register(struct proto *prot, int alloc_slab) goto out_free_request_sock_slab; sprintf(timewait_sock_slab_name, mask, prot->name); - prot->twsk_slab = kmem_cache_create(timewait_sock_slab_name, - prot->twsk_obj_size, - 0, SLAB_HWCACHE_ALIGN, - NULL, NULL); - if (prot->twsk_slab == NULL) + prot->twsk_prot->twsk_slab = + kmem_cache_create(timewait_sock_slab_name, + prot->twsk_prot->twsk_obj_size, + 0, SLAB_HWCACHE_ALIGN, + NULL, NULL); + if (prot->twsk_prot->twsk_slab == NULL) goto out_free_timewait_sock_slab_name; } } @@ -1548,12 +1549,12 @@ void proto_unregister(struct proto *prot) prot->rsk_prot->slab = NULL; } - if (prot->twsk_slab != NULL) { - const char *name = kmem_cache_name(prot->twsk_slab); + if (prot->twsk_prot != NULL && prot->twsk_prot->twsk_slab != NULL) { + const char *name = kmem_cache_name(prot->twsk_prot->twsk_slab); - kmem_cache_destroy(prot->twsk_slab); + kmem_cache_destroy(prot->twsk_prot->twsk_slab); kfree(name); - prot->twsk_slab = NULL; + prot->twsk_prot->twsk_slab = NULL; } } diff --git a/net/dccp/ipv4.c b/net/dccp/ipv4.c index bc28d71905e2..e11cda0cb6b3 100644 --- a/net/dccp/ipv4.c +++ b/net/dccp/ipv4.c @@ -20,6 +20,7 @@ #include #include #include +#include #include #include @@ -1309,6 +1310,10 @@ static struct request_sock_ops dccp_request_sock_ops = { .send_reset = dccp_v4_ctl_send_reset, }; +static struct timewait_sock_ops dccp_timewait_sock_ops = { + .twsk_obj_size = sizeof(struct inet_timewait_sock), +}; + struct proto dccp_prot = { .name = "DCCP", .owner = THIS_MODULE, @@ -1332,5 +1337,7 @@ struct proto dccp_prot = { .max_header = MAX_DCCP_HEADER, .obj_size = sizeof(struct dccp_sock), .rsk_prot = &dccp_request_sock_ops, - .twsk_obj_size = sizeof(struct inet_timewait_sock), + .twsk_prot = &dccp_timewait_sock_ops, }; + +EXPORT_SYMBOL_GPL(dccp_prot); diff --git a/net/dccp/ipv6.c b/net/dccp/ipv6.c index a7d2aee5b3af..4d078f5b911b 100644 --- a/net/dccp/ipv6.c +++ b/net/dccp/ipv6.c @@ -652,6 +652,10 @@ static struct request_sock_ops dccp6_request_sock_ops = { .send_reset = dccp_v6_ctl_send_reset, }; +static struct timewait_sock_ops dccp6_timewait_sock_ops = { + .twsk_obj_size = sizeof(struct dccp6_timewait_sock), +}; + static void dccp_v6_send_check(struct sock *sk, int len, struct sk_buff *skb) { struct ipv6_pinfo *np = inet6_sk(sk); @@ -1359,7 +1363,7 @@ static struct proto dccp_v6_prot = { .max_header = MAX_DCCP_HEADER, .obj_size = sizeof(struct dccp6_sock), .rsk_prot = &dccp6_request_sock_ops, - .twsk_obj_size = sizeof(struct dccp6_timewait_sock), + .twsk_prot = &dccp6_timewait_sock_ops, }; static struct inet6_protocol dccp_v6_protocol = { diff --git a/net/ipv4/inet_timewait_sock.c b/net/ipv4/inet_timewait_sock.c index a010e9a68811..417f126c749e 100644 --- a/net/ipv4/inet_timewait_sock.c +++ b/net/ipv4/inet_timewait_sock.c @@ -90,8 +90,9 @@ EXPORT_SYMBOL_GPL(__inet_twsk_hashdance); struct inet_timewait_sock *inet_twsk_alloc(const struct sock *sk, const int state) { - struct inet_timewait_sock *tw = kmem_cache_alloc(sk->sk_prot_creator->twsk_slab, - SLAB_ATOMIC); + struct inet_timewait_sock *tw = + kmem_cache_alloc(sk->sk_prot_creator->twsk_prot->twsk_slab, + SLAB_ATOMIC); if (tw != NULL) { const struct inet_sock *inet = inet_sk(sk); diff --git a/net/ipv4/tcp_ipv4.c b/net/ipv4/tcp_ipv4.c index 0b5ab04d3c5a..6728772a943a 100644 --- a/net/ipv4/tcp_ipv4.c +++ b/net/ipv4/tcp_ipv4.c @@ -69,6 +69,7 @@ #include #include #include +#include #include #include @@ -118,6 +119,39 @@ static inline __u32 tcp_v4_init_sequence(struct sock *sk, struct sk_buff *skb) skb->h.th->source); } +int tcp_twsk_unique(struct sock *sk, struct sock *sktw, void *twp) +{ + const struct tcp_timewait_sock *tcptw = tcp_twsk(sktw); + struct tcp_sock *tp = tcp_sk(sk); + + /* With PAWS, it is safe from the viewpoint + of data integrity. Even without PAWS it is safe provided sequence + spaces do not overlap i.e. at data rates <= 80Mbit/sec. + + Actually, the idea is close to VJ's one, only timestamp cache is + held not per host, but per port pair and TW bucket is used as state + holder. + + If TW bucket has been already destroyed we fall back to VJ's scheme + and use initial timestamp retrieved from peer table. + */ + if (tcptw->tw_ts_recent_stamp && + (twp == NULL || (sysctl_tcp_tw_reuse && + xtime.tv_sec - tcptw->tw_ts_recent_stamp > 1))) { + tp->write_seq = tcptw->tw_snd_nxt + 65535 + 2; + if (tp->write_seq == 0) + tp->write_seq = 1; + tp->rx_opt.ts_recent = tcptw->tw_ts_recent; + tp->rx_opt.ts_recent_stamp = tcptw->tw_ts_recent_stamp; + sock_hold(sktw); + return 1; + } + + return 0; +} + +EXPORT_SYMBOL_GPL(tcp_twsk_unique); + /* called with local bh disabled */ static int __tcp_v4_check_established(struct sock *sk, __u16 lport, struct inet_timewait_sock **twp) @@ -142,35 +176,9 @@ static int __tcp_v4_check_established(struct sock *sk, __u16 lport, tw = inet_twsk(sk2); if (INET_TW_MATCH(sk2, hash, acookie, saddr, daddr, ports, dif)) { - const struct tcp_timewait_sock *tcptw = tcp_twsk(sk2); - struct tcp_sock *tp = tcp_sk(sk); - - /* With PAWS, it is safe from the viewpoint - of data integrity. Even without PAWS it - is safe provided sequence spaces do not - overlap i.e. at data rates <= 80Mbit/sec. - - Actually, the idea is close to VJ's one, - only timestamp cache is held not per host, - but per port pair and TW bucket is used - as state holder. - - If TW bucket has been already destroyed we - fall back to VJ's scheme and use initial - timestamp retrieved from peer table. - */ - if (tcptw->tw_ts_recent_stamp && - (!twp || (sysctl_tcp_tw_reuse && - xtime.tv_sec - - tcptw->tw_ts_recent_stamp > 1))) { - tp->write_seq = tcptw->tw_snd_nxt + 65535 + 2; - if (tp->write_seq == 0) - tp->write_seq = 1; - tp->rx_opt.ts_recent = tcptw->tw_ts_recent; - tp->rx_opt.ts_recent_stamp = tcptw->tw_ts_recent_stamp; - sock_hold(sk2); + if (twsk_unique(sk, sk2, twp)) goto unique; - } else + else goto not_unique; } } @@ -869,6 +877,11 @@ struct request_sock_ops tcp_request_sock_ops = { .send_reset = tcp_v4_send_reset, }; +static struct timewait_sock_ops tcp_timewait_sock_ops = { + .twsk_obj_size = sizeof(struct tcp_timewait_sock), + .twsk_unique = tcp_twsk_unique, +}; + int tcp_v4_conn_request(struct sock *sk, struct sk_buff *skb) { struct inet_request_sock *ireq; @@ -1979,7 +1992,7 @@ struct proto tcp_prot = { .sysctl_rmem = sysctl_tcp_rmem, .max_header = MAX_TCP_HEADER, .obj_size = sizeof(struct tcp_sock), - .twsk_obj_size = sizeof(struct tcp_timewait_sock), + .twsk_prot = &tcp_timewait_sock_ops, .rsk_prot = &tcp_request_sock_ops, }; diff --git a/net/ipv6/tcp_ipv6.c b/net/ipv6/tcp_ipv6.c index e5c8a669e84e..514b57bb80b7 100644 --- a/net/ipv6/tcp_ipv6.c +++ b/net/ipv6/tcp_ipv6.c @@ -60,6 +60,7 @@ #include #include #include +#include #include @@ -147,22 +148,9 @@ static int __tcp_v6_check_established(struct sock *sk, const __u16 lport, ipv6_addr_equal(&tw6->tw_v6_daddr, saddr) && ipv6_addr_equal(&tw6->tw_v6_rcv_saddr, daddr) && sk2->sk_bound_dev_if == sk->sk_bound_dev_if) { - const struct tcp_timewait_sock *tcptw = tcp_twsk(sk2); - struct tcp_sock *tp = tcp_sk(sk); - - if (tcptw->tw_ts_recent_stamp && - (!twp || - (sysctl_tcp_tw_reuse && - xtime.tv_sec - tcptw->tw_ts_recent_stamp > 1))) { - /* See comment in tcp_ipv4.c */ - tp->write_seq = tcptw->tw_snd_nxt + 65535 + 2; - if (!tp->write_seq) - tp->write_seq = 1; - tp->rx_opt.ts_recent = tcptw->tw_ts_recent; - tp->rx_opt.ts_recent_stamp = tcptw->tw_ts_recent_stamp; - sock_hold(sk2); + if (twsk_unique(sk, sk2, twp)) goto unique; - } else + else goto not_unique; } } @@ -711,6 +699,11 @@ static struct request_sock_ops tcp6_request_sock_ops = { .send_reset = tcp_v6_send_reset }; +static struct timewait_sock_ops tcp6_timewait_sock_ops = { + .twsk_obj_size = sizeof(struct tcp6_timewait_sock), + .twsk_unique = tcp_twsk_unique, +}; + static void tcp_v6_send_check(struct sock *sk, int len, struct sk_buff *skb) { struct ipv6_pinfo *np = inet6_sk(sk); @@ -1752,7 +1745,7 @@ struct proto tcpv6_prot = { .sysctl_rmem = sysctl_tcp_rmem, .max_header = MAX_TCP_HEADER, .obj_size = sizeof(struct tcp6_sock), - .twsk_obj_size = sizeof(struct tcp6_timewait_sock), + .twsk_prot = &tcp6_timewait_sock_ops, .rsk_prot = &tcp6_request_sock_ops, }; -- cgit v1.2.3-71-gd317 From a7f5e7f164788a22eb5d3de8e2d3cee1bf58fdca Mon Sep 17 00:00:00 2001 From: Arnaldo Carvalho de Melo Date: Tue, 13 Dec 2005 23:25:31 -0800 Subject: [INET]: Generalise tcp_v4_hash_connect Renaming it to inet_hash_connect, making it possible to ditch dccp_v4_hash_connect and share the same code with TCP instead. Signed-off-by: Arnaldo Carvalho de Melo Signed-off-by: David S. Miller --- drivers/char/random.c | 6 +- include/linux/random.h | 2 +- include/net/inet_hashtables.h | 3 + net/dccp/ipv4.c | 160 +------------------------------------ net/ipv4/inet_hashtables.c | 178 ++++++++++++++++++++++++++++++++++++++++++ net/ipv4/tcp_ipv4.c | 173 +--------------------------------------- 6 files changed, 186 insertions(+), 336 deletions(-) (limited to 'include/linux') diff --git a/drivers/char/random.c b/drivers/char/random.c index 7999da25fe40..79b59d986af4 100644 --- a/drivers/char/random.c +++ b/drivers/char/random.c @@ -1554,10 +1554,8 @@ __u32 secure_tcp_sequence_number(__u32 saddr, __u32 daddr, EXPORT_SYMBOL(secure_tcp_sequence_number); - - -/* Generate secure starting point for ephemeral TCP port search */ -u32 secure_tcp_port_ephemeral(__u32 saddr, __u32 daddr, __u16 dport) +/* Generate secure starting point for ephemeral IPV4 transport port search */ +u32 secure_ipv4_port_ephemeral(__u32 saddr, __u32 daddr, __u16 dport) { struct keydata *keyptr = get_keyptr(); u32 hash[4]; diff --git a/include/linux/random.h b/include/linux/random.h index 7b2adb3322d5..01424a8e621c 100644 --- a/include/linux/random.h +++ b/include/linux/random.h @@ -52,7 +52,7 @@ extern void get_random_bytes(void *buf, int nbytes); void generate_random_uuid(unsigned char uuid_out[16]); extern __u32 secure_ip_id(__u32 daddr); -extern u32 secure_tcp_port_ephemeral(__u32 saddr, __u32 daddr, __u16 dport); +extern u32 secure_ipv4_port_ephemeral(__u32 saddr, __u32 daddr, __u16 dport); extern u32 secure_tcpv6_port_ephemeral(const __u32 *saddr, const __u32 *daddr, __u16 dport); extern __u32 secure_tcp_sequence_number(__u32 saddr, __u32 daddr, diff --git a/include/net/inet_hashtables.h b/include/net/inet_hashtables.h index 07840baa9341..c83baa79f66e 100644 --- a/include/net/inet_hashtables.h +++ b/include/net/inet_hashtables.h @@ -434,4 +434,7 @@ static inline struct sock *inet_lookup(struct inet_hashinfo *hashinfo, return sk; } + +extern int inet_hash_connect(struct inet_timewait_death_row *death_row, + struct sock *sk); #endif /* _INET_HASHTABLES_H */ diff --git a/net/dccp/ipv4.c b/net/dccp/ipv4.c index e11cda0cb6b3..671fbf3b2379 100644 --- a/net/dccp/ipv4.c +++ b/net/dccp/ipv4.c @@ -54,164 +54,6 @@ void dccp_unhash(struct sock *sk) EXPORT_SYMBOL_GPL(dccp_unhash); -/* called with local bh disabled */ -static int __dccp_v4_check_established(struct sock *sk, const __u16 lport, - struct inet_timewait_sock **twp) -{ - struct inet_sock *inet = inet_sk(sk); - const u32 daddr = inet->rcv_saddr; - const u32 saddr = inet->daddr; - const int dif = sk->sk_bound_dev_if; - INET_ADDR_COOKIE(acookie, saddr, daddr) - const __u32 ports = INET_COMBINED_PORTS(inet->dport, lport); - unsigned int hash = inet_ehashfn(daddr, lport, saddr, inet->dport); - struct inet_ehash_bucket *head = inet_ehash_bucket(&dccp_hashinfo, hash); - const struct sock *sk2; - const struct hlist_node *node; - struct inet_timewait_sock *tw; - - prefetch(head->chain.first); - write_lock(&head->lock); - - /* Check TIME-WAIT sockets first. */ - sk_for_each(sk2, node, &(head + dccp_hashinfo.ehash_size)->chain) { - tw = inet_twsk(sk2); - - if (INET_TW_MATCH(sk2, hash, acookie, saddr, daddr, ports, dif)) - goto not_unique; - } - tw = NULL; - - /* And established part... */ - sk_for_each(sk2, node, &head->chain) { - if (INET_MATCH(sk2, hash, acookie, saddr, daddr, ports, dif)) - goto not_unique; - } - - /* Must record num and sport now. Otherwise we will see - * in hash table socket with a funny identity. */ - inet->num = lport; - inet->sport = htons(lport); - sk->sk_hash = hash; - BUG_TRAP(sk_unhashed(sk)); - __sk_add_node(sk, &head->chain); - sock_prot_inc_use(sk->sk_prot); - write_unlock(&head->lock); - - if (twp != NULL) { - *twp = tw; - NET_INC_STATS_BH(LINUX_MIB_TIMEWAITRECYCLED); - } else if (tw != NULL) { - /* Silly. Should hash-dance instead... */ - inet_twsk_deschedule(tw, &dccp_death_row); - NET_INC_STATS_BH(LINUX_MIB_TIMEWAITRECYCLED); - - inet_twsk_put(tw); - } - - return 0; - -not_unique: - write_unlock(&head->lock); - return -EADDRNOTAVAIL; -} - -/* - * Bind a port for a connect operation and hash it. - */ -static int dccp_v4_hash_connect(struct sock *sk) -{ - const unsigned short snum = inet_sk(sk)->num; - struct inet_bind_hashbucket *head; - struct inet_bind_bucket *tb; - int ret; - - if (snum == 0) { - int low = sysctl_local_port_range[0]; - int high = sysctl_local_port_range[1]; - int remaining = (high - low) + 1; - int rover = net_random() % (high - low) + low; - struct hlist_node *node; - struct inet_timewait_sock *tw = NULL; - - local_bh_disable(); - do { - head = &dccp_hashinfo.bhash[inet_bhashfn(rover, - dccp_hashinfo.bhash_size)]; - spin_lock(&head->lock); - - /* Does not bother with rcv_saddr checks, - * because the established check is already - * unique enough. - */ - inet_bind_bucket_for_each(tb, node, &head->chain) { - if (tb->port == rover) { - BUG_TRAP(!hlist_empty(&tb->owners)); - if (tb->fastreuse >= 0) - goto next_port; - if (!__dccp_v4_check_established(sk, - rover, - &tw)) - goto ok; - goto next_port; - } - } - - tb = inet_bind_bucket_create(dccp_hashinfo.bind_bucket_cachep, - head, rover); - if (tb == NULL) { - spin_unlock(&head->lock); - break; - } - tb->fastreuse = -1; - goto ok; - - next_port: - spin_unlock(&head->lock); - if (++rover > high) - rover = low; - } while (--remaining > 0); - - local_bh_enable(); - - return -EADDRNOTAVAIL; - -ok: - /* All locks still held and bhs disabled */ - inet_bind_hash(sk, tb, rover); - if (sk_unhashed(sk)) { - inet_sk(sk)->sport = htons(rover); - __inet_hash(&dccp_hashinfo, sk, 0); - } - spin_unlock(&head->lock); - - if (tw != NULL) { - inet_twsk_deschedule(tw, &dccp_death_row); - inet_twsk_put(tw); - } - - ret = 0; - goto out; - } - - head = &dccp_hashinfo.bhash[inet_bhashfn(snum, - dccp_hashinfo.bhash_size)]; - tb = inet_csk(sk)->icsk_bind_hash; - spin_lock_bh(&head->lock); - if (sk_head(&tb->owners) == sk && sk->sk_bind_node.next == NULL) { - __inet_hash(&dccp_hashinfo, sk, 0); - spin_unlock_bh(&head->lock); - return 0; - } else { - spin_unlock(&head->lock); - /* No definite answer... Walk to established hash table */ - ret = __dccp_v4_check_established(sk, snum, NULL); -out: - local_bh_enable(); - return ret; - } -} - int dccp_v4_connect(struct sock *sk, struct sockaddr *uaddr, int addr_len) { struct inet_sock *inet = inet_sk(sk); @@ -272,7 +114,7 @@ int dccp_v4_connect(struct sock *sk, struct sockaddr *uaddr, int addr_len) * complete initialization after this. */ dccp_set_state(sk, DCCP_REQUESTING); - err = dccp_v4_hash_connect(sk); + err = inet_hash_connect(&dccp_death_row, sk); if (err != 0) goto failure; diff --git a/net/ipv4/inet_hashtables.c b/net/ipv4/inet_hashtables.c index e8d29fe736d2..33228115cda4 100644 --- a/net/ipv4/inet_hashtables.c +++ b/net/ipv4/inet_hashtables.c @@ -15,12 +15,14 @@ #include #include +#include #include #include #include #include #include +#include /* * Allocate and initialize a new local port bind bucket. @@ -163,3 +165,179 @@ struct sock *__inet_lookup_listener(const struct hlist_head *head, const u32 dad } EXPORT_SYMBOL_GPL(__inet_lookup_listener); + +/* called with local bh disabled */ +static int __inet_check_established(struct inet_timewait_death_row *death_row, + struct sock *sk, __u16 lport, + struct inet_timewait_sock **twp) +{ + struct inet_hashinfo *hinfo = death_row->hashinfo; + struct inet_sock *inet = inet_sk(sk); + u32 daddr = inet->rcv_saddr; + u32 saddr = inet->daddr; + int dif = sk->sk_bound_dev_if; + INET_ADDR_COOKIE(acookie, saddr, daddr) + const __u32 ports = INET_COMBINED_PORTS(inet->dport, lport); + unsigned int hash = inet_ehashfn(daddr, lport, saddr, inet->dport); + struct inet_ehash_bucket *head = inet_ehash_bucket(hinfo, hash); + struct sock *sk2; + const struct hlist_node *node; + struct inet_timewait_sock *tw; + + prefetch(head->chain.first); + write_lock(&head->lock); + + /* Check TIME-WAIT sockets first. */ + sk_for_each(sk2, node, &(head + hinfo->ehash_size)->chain) { + tw = inet_twsk(sk2); + + if (INET_TW_MATCH(sk2, hash, acookie, saddr, daddr, ports, dif)) { + if (twsk_unique(sk, sk2, twp)) + goto unique; + else + goto not_unique; + } + } + tw = NULL; + + /* And established part... */ + sk_for_each(sk2, node, &head->chain) { + if (INET_MATCH(sk2, hash, acookie, saddr, daddr, ports, dif)) + goto not_unique; + } + +unique: + /* Must record num and sport now. Otherwise we will see + * in hash table socket with a funny identity. */ + inet->num = lport; + inet->sport = htons(lport); + sk->sk_hash = hash; + BUG_TRAP(sk_unhashed(sk)); + __sk_add_node(sk, &head->chain); + sock_prot_inc_use(sk->sk_prot); + write_unlock(&head->lock); + + if (twp) { + *twp = tw; + NET_INC_STATS_BH(LINUX_MIB_TIMEWAITRECYCLED); + } else if (tw) { + /* Silly. Should hash-dance instead... */ + inet_twsk_deschedule(tw, death_row); + NET_INC_STATS_BH(LINUX_MIB_TIMEWAITRECYCLED); + + inet_twsk_put(tw); + } + + return 0; + +not_unique: + write_unlock(&head->lock); + return -EADDRNOTAVAIL; +} + +static inline u32 inet_sk_port_offset(const struct sock *sk) +{ + const struct inet_sock *inet = inet_sk(sk); + return secure_ipv4_port_ephemeral(inet->rcv_saddr, inet->daddr, + inet->dport); +} + +/* + * Bind a port for a connect operation and hash it. + */ +int inet_hash_connect(struct inet_timewait_death_row *death_row, + struct sock *sk) +{ + struct inet_hashinfo *hinfo = death_row->hashinfo; + const unsigned short snum = inet_sk(sk)->num; + struct inet_bind_hashbucket *head; + struct inet_bind_bucket *tb; + int ret; + + if (!snum) { + int low = sysctl_local_port_range[0]; + int high = sysctl_local_port_range[1]; + int range = high - low; + int i; + int port; + static u32 hint; + u32 offset = hint + inet_sk_port_offset(sk); + struct hlist_node *node; + struct inet_timewait_sock *tw = NULL; + + local_bh_disable(); + for (i = 1; i <= range; i++) { + port = low + (i + offset) % range; + head = &hinfo->bhash[inet_bhashfn(port, hinfo->bhash_size)]; + spin_lock(&head->lock); + + /* Does not bother with rcv_saddr checks, + * because the established check is already + * unique enough. + */ + inet_bind_bucket_for_each(tb, node, &head->chain) { + if (tb->port == port) { + BUG_TRAP(!hlist_empty(&tb->owners)); + if (tb->fastreuse >= 0) + goto next_port; + if (!__inet_check_established(death_row, + sk, port, + &tw)) + goto ok; + goto next_port; + } + } + + tb = inet_bind_bucket_create(hinfo->bind_bucket_cachep, head, port); + if (!tb) { + spin_unlock(&head->lock); + break; + } + tb->fastreuse = -1; + goto ok; + + next_port: + spin_unlock(&head->lock); + } + local_bh_enable(); + + return -EADDRNOTAVAIL; + +ok: + hint += i; + + /* Head lock still held and bh's disabled */ + inet_bind_hash(sk, tb, port); + if (sk_unhashed(sk)) { + inet_sk(sk)->sport = htons(port); + __inet_hash(hinfo, sk, 0); + } + spin_unlock(&head->lock); + + if (tw) { + inet_twsk_deschedule(tw, death_row);; + inet_twsk_put(tw); + } + + ret = 0; + goto out; + } + + head = &hinfo->bhash[inet_bhashfn(snum, hinfo->bhash_size)]; + tb = inet_csk(sk)->icsk_bind_hash; + spin_lock_bh(&head->lock); + if (sk_head(&tb->owners) == sk && !sk->sk_bind_node.next) { + __inet_hash(hinfo, sk, 0); + spin_unlock_bh(&head->lock); + return 0; + } else { + spin_unlock(&head->lock); + /* No definite answer... Walk to established hash table */ + ret = __inet_check_established(death_row, sk, snum, NULL); +out: + local_bh_enable(); + return ret; + } +} + +EXPORT_SYMBOL_GPL(inet_hash_connect); diff --git a/net/ipv4/tcp_ipv4.c b/net/ipv4/tcp_ipv4.c index 6728772a943a..c2fe61becd61 100644 --- a/net/ipv4/tcp_ipv4.c +++ b/net/ipv4/tcp_ipv4.c @@ -152,177 +152,6 @@ int tcp_twsk_unique(struct sock *sk, struct sock *sktw, void *twp) EXPORT_SYMBOL_GPL(tcp_twsk_unique); -/* called with local bh disabled */ -static int __tcp_v4_check_established(struct sock *sk, __u16 lport, - struct inet_timewait_sock **twp) -{ - struct inet_sock *inet = inet_sk(sk); - u32 daddr = inet->rcv_saddr; - u32 saddr = inet->daddr; - int dif = sk->sk_bound_dev_if; - INET_ADDR_COOKIE(acookie, saddr, daddr) - const __u32 ports = INET_COMBINED_PORTS(inet->dport, lport); - unsigned int hash = inet_ehashfn(daddr, lport, saddr, inet->dport); - struct inet_ehash_bucket *head = inet_ehash_bucket(&tcp_hashinfo, hash); - struct sock *sk2; - const struct hlist_node *node; - struct inet_timewait_sock *tw; - - prefetch(head->chain.first); - write_lock(&head->lock); - - /* Check TIME-WAIT sockets first. */ - sk_for_each(sk2, node, &(head + tcp_hashinfo.ehash_size)->chain) { - tw = inet_twsk(sk2); - - if (INET_TW_MATCH(sk2, hash, acookie, saddr, daddr, ports, dif)) { - if (twsk_unique(sk, sk2, twp)) - goto unique; - else - goto not_unique; - } - } - tw = NULL; - - /* And established part... */ - sk_for_each(sk2, node, &head->chain) { - if (INET_MATCH(sk2, hash, acookie, saddr, daddr, ports, dif)) - goto not_unique; - } - -unique: - /* Must record num and sport now. Otherwise we will see - * in hash table socket with a funny identity. */ - inet->num = lport; - inet->sport = htons(lport); - sk->sk_hash = hash; - BUG_TRAP(sk_unhashed(sk)); - __sk_add_node(sk, &head->chain); - sock_prot_inc_use(sk->sk_prot); - write_unlock(&head->lock); - - if (twp) { - *twp = tw; - NET_INC_STATS_BH(LINUX_MIB_TIMEWAITRECYCLED); - } else if (tw) { - /* Silly. Should hash-dance instead... */ - inet_twsk_deschedule(tw, &tcp_death_row); - NET_INC_STATS_BH(LINUX_MIB_TIMEWAITRECYCLED); - - inet_twsk_put(tw); - } - - return 0; - -not_unique: - write_unlock(&head->lock); - return -EADDRNOTAVAIL; -} - -static inline u32 connect_port_offset(const struct sock *sk) -{ - const struct inet_sock *inet = inet_sk(sk); - - return secure_tcp_port_ephemeral(inet->rcv_saddr, inet->daddr, - inet->dport); -} - -/* - * Bind a port for a connect operation and hash it. - */ -static inline int tcp_v4_hash_connect(struct sock *sk) -{ - const unsigned short snum = inet_sk(sk)->num; - struct inet_bind_hashbucket *head; - struct inet_bind_bucket *tb; - int ret; - - if (!snum) { - int low = sysctl_local_port_range[0]; - int high = sysctl_local_port_range[1]; - int range = high - low; - int i; - int port; - static u32 hint; - u32 offset = hint + connect_port_offset(sk); - struct hlist_node *node; - struct inet_timewait_sock *tw = NULL; - - local_bh_disable(); - for (i = 1; i <= range; i++) { - port = low + (i + offset) % range; - head = &tcp_hashinfo.bhash[inet_bhashfn(port, tcp_hashinfo.bhash_size)]; - spin_lock(&head->lock); - - /* Does not bother with rcv_saddr checks, - * because the established check is already - * unique enough. - */ - inet_bind_bucket_for_each(tb, node, &head->chain) { - if (tb->port == port) { - BUG_TRAP(!hlist_empty(&tb->owners)); - if (tb->fastreuse >= 0) - goto next_port; - if (!__tcp_v4_check_established(sk, - port, - &tw)) - goto ok; - goto next_port; - } - } - - tb = inet_bind_bucket_create(tcp_hashinfo.bind_bucket_cachep, head, port); - if (!tb) { - spin_unlock(&head->lock); - break; - } - tb->fastreuse = -1; - goto ok; - - next_port: - spin_unlock(&head->lock); - } - local_bh_enable(); - - return -EADDRNOTAVAIL; - -ok: - hint += i; - - /* Head lock still held and bh's disabled */ - inet_bind_hash(sk, tb, port); - if (sk_unhashed(sk)) { - inet_sk(sk)->sport = htons(port); - __inet_hash(&tcp_hashinfo, sk, 0); - } - spin_unlock(&head->lock); - - if (tw) { - inet_twsk_deschedule(tw, &tcp_death_row);; - inet_twsk_put(tw); - } - - ret = 0; - goto out; - } - - head = &tcp_hashinfo.bhash[inet_bhashfn(snum, tcp_hashinfo.bhash_size)]; - tb = inet_csk(sk)->icsk_bind_hash; - spin_lock_bh(&head->lock); - if (sk_head(&tb->owners) == sk && !sk->sk_bind_node.next) { - __inet_hash(&tcp_hashinfo, sk, 0); - spin_unlock_bh(&head->lock); - return 0; - } else { - spin_unlock(&head->lock); - /* No definite answer... Walk to established hash table */ - ret = __tcp_v4_check_established(sk, snum, NULL); -out: - local_bh_enable(); - return ret; - } -} - /* This will initiate an outgoing connection. */ int tcp_v4_connect(struct sock *sk, struct sockaddr *uaddr, int addr_len) { @@ -403,7 +232,7 @@ int tcp_v4_connect(struct sock *sk, struct sockaddr *uaddr, int addr_len) * complete initialization after this. */ tcp_set_state(sk, TCP_SYN_SENT); - err = tcp_v4_hash_connect(sk); + err = inet_hash_connect(&tcp_death_row, sk); if (err) goto failure; -- cgit v1.2.3-71-gd317 From d8313f5ca2b1f86b7df6c99fc4b3fffa1f84e92b Mon Sep 17 00:00:00 2001 From: Arnaldo Carvalho de Melo Date: Tue, 13 Dec 2005 23:25:44 -0800 Subject: [INET6]: Generalise tcp_v6_hash_connect Renaming it to inet6_hash_connect, making it possible to ditch dccp_v6_hash_connect and share the same code with TCP instead. Signed-off-by: Arnaldo Carvalho de Melo Signed-off-by: David S. Miller --- drivers/char/random.c | 4 +- include/linux/random.h | 4 +- include/net/ipv6.h | 3 + net/dccp/ipv6.c | 171 +---------------------------------------- net/ipv6/inet6_hashtables.c | 183 +++++++++++++++++++++++++++++++++++++++++++- net/ipv6/tcp_ipv6.c | 173 +---------------------------------------- 6 files changed, 190 insertions(+), 348 deletions(-) (limited to 'include/linux') diff --git a/drivers/char/random.c b/drivers/char/random.c index 79b59d986af4..bdfdfd28594d 100644 --- a/drivers/char/random.c +++ b/drivers/char/random.c @@ -1573,7 +1573,7 @@ u32 secure_ipv4_port_ephemeral(__u32 saddr, __u32 daddr, __u16 dport) } #if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE) -u32 secure_tcpv6_port_ephemeral(const __u32 *saddr, const __u32 *daddr, __u16 dport) +u32 secure_ipv6_port_ephemeral(const __u32 *saddr, const __u32 *daddr, __u16 dport) { struct keydata *keyptr = get_keyptr(); u32 hash[12]; @@ -1584,7 +1584,7 @@ u32 secure_tcpv6_port_ephemeral(const __u32 *saddr, const __u32 *daddr, __u16 dp return twothirdsMD4Transform(daddr, hash); } -EXPORT_SYMBOL(secure_tcpv6_port_ephemeral); +EXPORT_SYMBOL(secure_ipv6_port_ephemeral); #endif #if defined(CONFIG_IP_DCCP) || defined(CONFIG_IP_DCCP_MODULE) diff --git a/include/linux/random.h b/include/linux/random.h index 01424a8e621c..5d6456bcdeba 100644 --- a/include/linux/random.h +++ b/include/linux/random.h @@ -53,8 +53,8 @@ void generate_random_uuid(unsigned char uuid_out[16]); extern __u32 secure_ip_id(__u32 daddr); extern u32 secure_ipv4_port_ephemeral(__u32 saddr, __u32 daddr, __u16 dport); -extern u32 secure_tcpv6_port_ephemeral(const __u32 *saddr, const __u32 *daddr, - __u16 dport); +extern u32 secure_ipv6_port_ephemeral(const __u32 *saddr, const __u32 *daddr, + __u16 dport); extern __u32 secure_tcp_sequence_number(__u32 saddr, __u32 daddr, __u16 sport, __u16 dport); extern __u32 secure_tcpv6_sequence_number(__u32 *saddr, __u32 *daddr, diff --git a/include/net/ipv6.h b/include/net/ipv6.h index 851376108ac2..e3d5d7bc8837 100644 --- a/include/net/ipv6.h +++ b/include/net/ipv6.h @@ -527,6 +527,9 @@ extern int inet6_getname(struct socket *sock, struct sockaddr *uaddr, extern int inet6_ioctl(struct socket *sock, unsigned int cmd, unsigned long arg); +extern int inet6_hash_connect(struct inet_timewait_death_row *death_row, + struct sock *sk); + /* * reassembly.c */ diff --git a/net/dccp/ipv6.c b/net/dccp/ipv6.c index 4d078f5b911b..71bf04eb21e1 100644 --- a/net/dccp/ipv6.c +++ b/net/dccp/ipv6.c @@ -84,175 +84,6 @@ static __u32 dccp_v6_init_sequence(struct sock *sk, struct sk_buff *skb) dh->dccph_sport); } -static int __dccp_v6_check_established(struct sock *sk, const __u16 lport, - struct inet_timewait_sock **twp) -{ - struct inet_sock *inet = inet_sk(sk); - const struct ipv6_pinfo *np = inet6_sk(sk); - const struct in6_addr *daddr = &np->rcv_saddr; - const struct in6_addr *saddr = &np->daddr; - const int dif = sk->sk_bound_dev_if; - const u32 ports = INET_COMBINED_PORTS(inet->dport, lport); - const unsigned int hash = inet6_ehashfn(daddr, inet->num, - saddr, inet->dport); - struct inet_ehash_bucket *head = inet_ehash_bucket(&dccp_hashinfo, hash); - struct sock *sk2; - const struct hlist_node *node; - struct inet_timewait_sock *tw; - - prefetch(head->chain.first); - write_lock(&head->lock); - - /* Check TIME-WAIT sockets first. */ - sk_for_each(sk2, node, &(head + dccp_hashinfo.ehash_size)->chain) { - const struct inet6_timewait_sock *tw6 = inet6_twsk(sk2); - - tw = inet_twsk(sk2); - - if(*((__u32 *)&(tw->tw_dport)) == ports && - sk2->sk_family == PF_INET6 && - ipv6_addr_equal(&tw6->tw_v6_daddr, saddr) && - ipv6_addr_equal(&tw6->tw_v6_rcv_saddr, daddr) && - sk2->sk_bound_dev_if == sk->sk_bound_dev_if) - goto not_unique; - } - tw = NULL; - - /* And established part... */ - sk_for_each(sk2, node, &head->chain) { - if (INET6_MATCH(sk2, hash, saddr, daddr, ports, dif)) - goto not_unique; - } - - BUG_TRAP(sk_unhashed(sk)); - __sk_add_node(sk, &head->chain); - sk->sk_hash = hash; - sock_prot_inc_use(sk->sk_prot); - write_unlock(&head->lock); - - if (twp) { - *twp = tw; - NET_INC_STATS_BH(LINUX_MIB_TIMEWAITRECYCLED); - } else if (tw) { - /* Silly. Should hash-dance instead... */ - inet_twsk_deschedule(tw, &dccp_death_row); - NET_INC_STATS_BH(LINUX_MIB_TIMEWAITRECYCLED); - - inet_twsk_put(tw); - } - return 0; - -not_unique: - write_unlock(&head->lock); - return -EADDRNOTAVAIL; -} - -static inline u32 dccp_v6_port_offset(const struct sock *sk) -{ - const struct inet_sock *inet = inet_sk(sk); - const struct ipv6_pinfo *np = inet6_sk(sk); - - return secure_tcpv6_port_ephemeral(np->rcv_saddr.s6_addr32, - np->daddr.s6_addr32, - inet->dport); -} - -static int dccp_v6_hash_connect(struct sock *sk) -{ - const unsigned short snum = inet_sk(sk)->num; - struct inet_bind_hashbucket *head; - struct inet_bind_bucket *tb; - int ret; - - if (snum == 0) { - int low = sysctl_local_port_range[0]; - int high = sysctl_local_port_range[1]; - int range = high - low; - int i; - int port; - static u32 hint; - u32 offset = hint + dccp_v6_port_offset(sk); - struct hlist_node *node; - struct inet_timewait_sock *tw = NULL; - - local_bh_disable(); - for (i = 1; i <= range; i++) { - port = low + (i + offset) % range; - head = &dccp_hashinfo.bhash[inet_bhashfn(port, - dccp_hashinfo.bhash_size)]; - spin_lock(&head->lock); - - /* Does not bother with rcv_saddr checks, - * because the established check is already - * unique enough. - */ - inet_bind_bucket_for_each(tb, node, &head->chain) { - if (tb->port == port) { - BUG_TRAP(!hlist_empty(&tb->owners)); - if (tb->fastreuse >= 0) - goto next_port; - if (!__dccp_v6_check_established(sk, - port, - &tw)) - goto ok; - goto next_port; - } - } - - tb = inet_bind_bucket_create(dccp_hashinfo.bind_bucket_cachep, - head, port); - if (!tb) { - spin_unlock(&head->lock); - break; - } - tb->fastreuse = -1; - goto ok; - - next_port: - spin_unlock(&head->lock); - } - local_bh_enable(); - - return -EADDRNOTAVAIL; -ok: - hint += i; - - /* Head lock still held and bh's disabled */ - inet_bind_hash(sk, tb, port); - if (sk_unhashed(sk)) { - inet_sk(sk)->sport = htons(port); - __inet6_hash(&dccp_hashinfo, sk); - } - spin_unlock(&head->lock); - - if (tw) { - inet_twsk_deschedule(tw, &dccp_death_row); - inet_twsk_put(tw); - } - - ret = 0; - goto out; - } - - head = &dccp_hashinfo.bhash[inet_bhashfn(snum, - dccp_hashinfo.bhash_size)]; - tb = inet_csk(sk)->icsk_bind_hash; - spin_lock_bh(&head->lock); - - if (sk_head(&tb->owners) == sk && !sk->sk_bind_node.next) { - __inet6_hash(&dccp_hashinfo, sk); - spin_unlock_bh(&head->lock); - return 0; - } else { - spin_unlock(&head->lock); - /* No definite answer... Walk to established hash table */ - ret = __dccp_v6_check_established(sk, snum, NULL); -out: - local_bh_enable(); - return ret; - } -} - static int dccp_v6_connect(struct sock *sk, struct sockaddr *uaddr, int addr_len) { @@ -403,7 +234,7 @@ static int dccp_v6_connect(struct sock *sk, struct sockaddr *uaddr, inet->dport = usin->sin6_port; dccp_set_state(sk, DCCP_REQUESTING); - err = dccp_v6_hash_connect(sk); + err = inet6_hash_connect(&dccp_death_row, sk); if (err) goto late_failure; /* FIXME */ diff --git a/net/ipv6/inet6_hashtables.c b/net/ipv6/inet6_hashtables.c index 01d5f46d4e40..4154f3a8b6cf 100644 --- a/net/ipv6/inet6_hashtables.c +++ b/net/ipv6/inet6_hashtables.c @@ -5,7 +5,8 @@ * * Generic INET6 transport hashtables * - * Authors: Lotsa people, from code originally in tcp + * Authors: Lotsa people, from code originally in tcp, generalised here + * by Arnaldo Carvalho de Melo * * This program is free software; you can redistribute it and/or * modify it under the terms of the GNU General Public License @@ -14,12 +15,13 @@ */ #include - #include +#include #include #include #include +#include struct sock *inet6_lookup_listener(struct inet_hashinfo *hashinfo, const struct in6_addr *daddr, @@ -79,3 +81,180 @@ struct sock *inet6_lookup(struct inet_hashinfo *hashinfo, } EXPORT_SYMBOL_GPL(inet6_lookup); + +static int __inet6_check_established(struct inet_timewait_death_row *death_row, + struct sock *sk, const __u16 lport, + struct inet_timewait_sock **twp) +{ + struct inet_hashinfo *hinfo = death_row->hashinfo; + const struct inet_sock *inet = inet_sk(sk); + const struct ipv6_pinfo *np = inet6_sk(sk); + const struct in6_addr *daddr = &np->rcv_saddr; + const struct in6_addr *saddr = &np->daddr; + const int dif = sk->sk_bound_dev_if; + const u32 ports = INET_COMBINED_PORTS(inet->dport, lport); + const unsigned int hash = inet6_ehashfn(daddr, inet->num, saddr, + inet->dport); + struct inet_ehash_bucket *head = inet_ehash_bucket(hinfo, hash); + struct sock *sk2; + const struct hlist_node *node; + struct inet_timewait_sock *tw; + + prefetch(head->chain.first); + write_lock(&head->lock); + + /* Check TIME-WAIT sockets first. */ + sk_for_each(sk2, node, &(head + hinfo->ehash_size)->chain) { + const struct inet6_timewait_sock *tw6 = inet6_twsk(sk2); + + tw = inet_twsk(sk2); + + if(*((__u32 *)&(tw->tw_dport)) == ports && + sk2->sk_family == PF_INET6 && + ipv6_addr_equal(&tw6->tw_v6_daddr, saddr) && + ipv6_addr_equal(&tw6->tw_v6_rcv_saddr, daddr) && + sk2->sk_bound_dev_if == sk->sk_bound_dev_if) { + if (twsk_unique(sk, sk2, twp)) + goto unique; + else + goto not_unique; + } + } + tw = NULL; + + /* And established part... */ + sk_for_each(sk2, node, &head->chain) { + if (INET6_MATCH(sk2, hash, saddr, daddr, ports, dif)) + goto not_unique; + } + +unique: + BUG_TRAP(sk_unhashed(sk)); + __sk_add_node(sk, &head->chain); + sk->sk_hash = hash; + sock_prot_inc_use(sk->sk_prot); + write_unlock(&head->lock); + + if (twp != NULL) { + *twp = tw; + NET_INC_STATS_BH(LINUX_MIB_TIMEWAITRECYCLED); + } else if (tw != NULL) { + /* Silly. Should hash-dance instead... */ + inet_twsk_deschedule(tw, death_row); + NET_INC_STATS_BH(LINUX_MIB_TIMEWAITRECYCLED); + + inet_twsk_put(tw); + } + return 0; + +not_unique: + write_unlock(&head->lock); + return -EADDRNOTAVAIL; +} + +static inline u32 inet6_sk_port_offset(const struct sock *sk) +{ + const struct inet_sock *inet = inet_sk(sk); + const struct ipv6_pinfo *np = inet6_sk(sk); + return secure_ipv6_port_ephemeral(np->rcv_saddr.s6_addr32, + np->daddr.s6_addr32, + inet->dport); +} + +int inet6_hash_connect(struct inet_timewait_death_row *death_row, + struct sock *sk) +{ + struct inet_hashinfo *hinfo = death_row->hashinfo; + const unsigned short snum = inet_sk(sk)->num; + struct inet_bind_hashbucket *head; + struct inet_bind_bucket *tb; + int ret; + + if (snum == 0) { + const int low = sysctl_local_port_range[0]; + const int high = sysctl_local_port_range[1]; + const int range = high - low; + int i, port; + static u32 hint; + const u32 offset = hint + inet6_sk_port_offset(sk); + struct hlist_node *node; + struct inet_timewait_sock *tw = NULL; + + local_bh_disable(); + for (i = 1; i <= range; i++) { + port = low + (i + offset) % range; + head = &hinfo->bhash[inet_bhashfn(port, hinfo->bhash_size)]; + spin_lock(&head->lock); + + /* Does not bother with rcv_saddr checks, + * because the established check is already + * unique enough. + */ + inet_bind_bucket_for_each(tb, node, &head->chain) { + if (tb->port == port) { + BUG_TRAP(!hlist_empty(&tb->owners)); + if (tb->fastreuse >= 0) + goto next_port; + if (!__inet6_check_established(death_row, + sk, port, + &tw)) + goto ok; + goto next_port; + } + } + + tb = inet_bind_bucket_create(hinfo->bind_bucket_cachep, + head, port); + if (!tb) { + spin_unlock(&head->lock); + break; + } + tb->fastreuse = -1; + goto ok; + + next_port: + spin_unlock(&head->lock); + } + local_bh_enable(); + + return -EADDRNOTAVAIL; + +ok: + hint += i; + + /* Head lock still held and bh's disabled */ + inet_bind_hash(sk, tb, port); + if (sk_unhashed(sk)) { + inet_sk(sk)->sport = htons(port); + __inet6_hash(hinfo, sk); + } + spin_unlock(&head->lock); + + if (tw) { + inet_twsk_deschedule(tw, death_row); + inet_twsk_put(tw); + } + + ret = 0; + goto out; + } + + head = &hinfo->bhash[inet_bhashfn(snum, hinfo->bhash_size)]; + tb = inet_csk(sk)->icsk_bind_hash; + spin_lock_bh(&head->lock); + + if (sk_head(&tb->owners) == sk && sk->sk_bind_node.next == NULL) { + __inet6_hash(hinfo, sk); + spin_unlock_bh(&head->lock); + return 0; + } else { + spin_unlock(&head->lock); + /* No definite answer... Walk to established hash table */ + ret = __inet6_check_established(death_row, sk, snum, NULL); +out: + local_bh_enable(); + return ret; + } +} + +EXPORT_SYMBOL_GPL(inet6_hash_connect); diff --git a/net/ipv6/tcp_ipv6.c b/net/ipv6/tcp_ipv6.c index 514b57bb80b7..a682eb9093e1 100644 --- a/net/ipv6/tcp_ipv6.c +++ b/net/ipv6/tcp_ipv6.c @@ -119,177 +119,6 @@ static __u32 tcp_v6_init_sequence(struct sock *sk, struct sk_buff *skb) } } -static int __tcp_v6_check_established(struct sock *sk, const __u16 lport, - struct inet_timewait_sock **twp) -{ - struct inet_sock *inet = inet_sk(sk); - const struct ipv6_pinfo *np = inet6_sk(sk); - const struct in6_addr *daddr = &np->rcv_saddr; - const struct in6_addr *saddr = &np->daddr; - const int dif = sk->sk_bound_dev_if; - const u32 ports = INET_COMBINED_PORTS(inet->dport, lport); - unsigned int hash = inet6_ehashfn(daddr, inet->num, saddr, inet->dport); - struct inet_ehash_bucket *head = inet_ehash_bucket(&tcp_hashinfo, hash); - struct sock *sk2; - const struct hlist_node *node; - struct inet_timewait_sock *tw; - - prefetch(head->chain.first); - write_lock(&head->lock); - - /* Check TIME-WAIT sockets first. */ - sk_for_each(sk2, node, &(head + tcp_hashinfo.ehash_size)->chain) { - const struct inet6_timewait_sock *tw6 = inet6_twsk(sk2); - - tw = inet_twsk(sk2); - - if(*((__u32 *)&(tw->tw_dport)) == ports && - sk2->sk_family == PF_INET6 && - ipv6_addr_equal(&tw6->tw_v6_daddr, saddr) && - ipv6_addr_equal(&tw6->tw_v6_rcv_saddr, daddr) && - sk2->sk_bound_dev_if == sk->sk_bound_dev_if) { - if (twsk_unique(sk, sk2, twp)) - goto unique; - else - goto not_unique; - } - } - tw = NULL; - - /* And established part... */ - sk_for_each(sk2, node, &head->chain) { - if (INET6_MATCH(sk2, hash, saddr, daddr, ports, dif)) - goto not_unique; - } - -unique: - BUG_TRAP(sk_unhashed(sk)); - __sk_add_node(sk, &head->chain); - sk->sk_hash = hash; - sock_prot_inc_use(sk->sk_prot); - write_unlock(&head->lock); - - if (twp) { - *twp = tw; - NET_INC_STATS_BH(LINUX_MIB_TIMEWAITRECYCLED); - } else if (tw) { - /* Silly. Should hash-dance instead... */ - inet_twsk_deschedule(tw, &tcp_death_row); - NET_INC_STATS_BH(LINUX_MIB_TIMEWAITRECYCLED); - - inet_twsk_put(tw); - } - return 0; - -not_unique: - write_unlock(&head->lock); - return -EADDRNOTAVAIL; -} - -static inline u32 tcpv6_port_offset(const struct sock *sk) -{ - const struct inet_sock *inet = inet_sk(sk); - const struct ipv6_pinfo *np = inet6_sk(sk); - - return secure_tcpv6_port_ephemeral(np->rcv_saddr.s6_addr32, - np->daddr.s6_addr32, - inet->dport); -} - -static int tcp_v6_hash_connect(struct sock *sk) -{ - unsigned short snum = inet_sk(sk)->num; - struct inet_bind_hashbucket *head; - struct inet_bind_bucket *tb; - int ret; - - if (!snum) { - int low = sysctl_local_port_range[0]; - int high = sysctl_local_port_range[1]; - int range = high - low; - int i; - int port; - static u32 hint; - u32 offset = hint + tcpv6_port_offset(sk); - struct hlist_node *node; - struct inet_timewait_sock *tw = NULL; - - local_bh_disable(); - for (i = 1; i <= range; i++) { - port = low + (i + offset) % range; - head = &tcp_hashinfo.bhash[inet_bhashfn(port, tcp_hashinfo.bhash_size)]; - spin_lock(&head->lock); - - /* Does not bother with rcv_saddr checks, - * because the established check is already - * unique enough. - */ - inet_bind_bucket_for_each(tb, node, &head->chain) { - if (tb->port == port) { - BUG_TRAP(!hlist_empty(&tb->owners)); - if (tb->fastreuse >= 0) - goto next_port; - if (!__tcp_v6_check_established(sk, - port, - &tw)) - goto ok; - goto next_port; - } - } - - tb = inet_bind_bucket_create(tcp_hashinfo.bind_bucket_cachep, head, port); - if (!tb) { - spin_unlock(&head->lock); - break; - } - tb->fastreuse = -1; - goto ok; - - next_port: - spin_unlock(&head->lock); - } - local_bh_enable(); - - return -EADDRNOTAVAIL; - -ok: - hint += i; - - /* Head lock still held and bh's disabled */ - inet_bind_hash(sk, tb, port); - if (sk_unhashed(sk)) { - inet_sk(sk)->sport = htons(port); - __inet6_hash(&tcp_hashinfo, sk); - } - spin_unlock(&head->lock); - - if (tw) { - inet_twsk_deschedule(tw, &tcp_death_row); - inet_twsk_put(tw); - } - - ret = 0; - goto out; - } - - head = &tcp_hashinfo.bhash[inet_bhashfn(snum, tcp_hashinfo.bhash_size)]; - tb = inet_csk(sk)->icsk_bind_hash; - spin_lock_bh(&head->lock); - - if (sk_head(&tb->owners) == sk && !sk->sk_bind_node.next) { - __inet6_hash(&tcp_hashinfo, sk); - spin_unlock_bh(&head->lock); - return 0; - } else { - spin_unlock(&head->lock); - /* No definite answer... Walk to established hash table */ - ret = __tcp_v6_check_established(sk, snum, NULL); -out: - local_bh_enable(); - return ret; - } -} - static int tcp_v6_connect(struct sock *sk, struct sockaddr *uaddr, int addr_len) { @@ -450,7 +279,7 @@ static int tcp_v6_connect(struct sock *sk, struct sockaddr *uaddr, inet->dport = usin->sin6_port; tcp_set_state(sk, TCP_SYN_SENT); - err = tcp_v6_hash_connect(sk); + err = inet6_hash_connect(&tcp_death_row, sk); if (err) goto late_failure; -- cgit v1.2.3-71-gd317 From 22712813620fa8e682dbfb253a60ca0131da1e07 Mon Sep 17 00:00:00 2001 From: Arnaldo Carvalho de Melo Date: Tue, 13 Dec 2005 23:25:56 -0800 Subject: [TCP]: Move the TCPF_ enum to tcp_states.h Upcoming patches will make, for instance, ip_sockglue.c need just this enum and not all of tcp.h. Signed-off-by: Arnaldo Carvalho de Melo Signed-off-by: David S. Miller --- include/linux/tcp.h | 16 ---------------- include/net/tcp_states.h | 16 ++++++++++++++++ 2 files changed, 16 insertions(+), 16 deletions(-) (limited to 'include/linux') diff --git a/include/linux/tcp.h b/include/linux/tcp.h index 4e1434007f44..da38eea1994b 100644 --- a/include/linux/tcp.h +++ b/include/linux/tcp.h @@ -55,22 +55,6 @@ struct tcphdr { __u16 urg_ptr; }; -#define TCP_ACTION_FIN (1 << 7) - -enum { - TCPF_ESTABLISHED = (1 << 1), - TCPF_SYN_SENT = (1 << 2), - TCPF_SYN_RECV = (1 << 3), - TCPF_FIN_WAIT1 = (1 << 4), - TCPF_FIN_WAIT2 = (1 << 5), - TCPF_TIME_WAIT = (1 << 6), - TCPF_CLOSE = (1 << 7), - TCPF_CLOSE_WAIT = (1 << 8), - TCPF_LAST_ACK = (1 << 9), - TCPF_LISTEN = (1 << 10), - TCPF_CLOSING = (1 << 11) -}; - /* * The union cast uses a gcc extension to avoid aliasing problems * (union is compatible to any of its members) diff --git a/include/net/tcp_states.h b/include/net/tcp_states.h index b9d4176b2d15..b0b645988bd8 100644 --- a/include/net/tcp_states.h +++ b/include/net/tcp_states.h @@ -31,4 +31,20 @@ enum { #define TCP_STATE_MASK 0xF +#define TCP_ACTION_FIN (1 << 7) + +enum { + TCPF_ESTABLISHED = (1 << 1), + TCPF_SYN_SENT = (1 << 2), + TCPF_SYN_RECV = (1 << 3), + TCPF_FIN_WAIT1 = (1 << 4), + TCPF_FIN_WAIT2 = (1 << 5), + TCPF_TIME_WAIT = (1 << 6), + TCPF_CLOSE = (1 << 7), + TCPF_CLOSE_WAIT = (1 << 8), + TCPF_LAST_ACK = (1 << 9), + TCPF_LISTEN = (1 << 10), + TCPF_CLOSING = (1 << 11) +}; + #endif /* _LINUX_TCP_STATES_H */ -- cgit v1.2.3-71-gd317 From d83d8461f902c672bc1bd8fbc6a94e19f092da97 Mon Sep 17 00:00:00 2001 From: Arnaldo Carvalho de Melo Date: Tue, 13 Dec 2005 23:26:10 -0800 Subject: [IP_SOCKGLUE]: Remove most of the tcp specific calls As DCCP needs to be called in the same spots. Now we have a member in inet_sock (is_icsk), set at sock creation time from struct inet_protosw->flags (if INET_PROTOSW_ICSK is set, like for TCP and DCCP) to see if a struct sock instance is a inet_connection_sock for places like the ones in ip_sockglue.c (v4 and v6) where we previously were looking if sk_type was SOCK_STREAM, that is insufficient because we now use the same code for DCCP, that has sk_type SOCK_DCCP. Signed-off-by: Arnaldo Carvalho de Melo Signed-off-by: David S. Miller --- include/linux/dccp.h | 4 ---- include/linux/ip.h | 1 + include/linux/tcp.h | 3 +-- include/net/inet_connection_sock.h | 6 +++++- include/net/protocol.h | 1 + net/dccp/diag.c | 2 +- net/dccp/input.c | 2 +- net/dccp/ipv4.c | 12 +++++++----- net/dccp/ipv6.c | 26 ++++++++++++++------------ net/dccp/output.c | 7 ++++--- net/dccp/proto.c | 2 +- net/ipv4/af_inet.c | 4 +++- net/ipv4/ip_sockglue.c | 13 ++++++------- net/ipv4/tcp.c | 2 +- net/ipv4/tcp_input.c | 10 ++++------ net/ipv4/tcp_ipv4.c | 12 ++++++------ net/ipv4/tcp_output.c | 18 ++++++++++-------- net/ipv6/af_inet6.c | 1 + net/ipv6/ipv6_sockglue.c | 24 +++++++++++++----------- net/ipv6/tcp_ipv6.c | 30 +++++++++++++++++------------- 20 files changed, 97 insertions(+), 83 deletions(-) (limited to 'include/linux') diff --git a/include/linux/dccp.h b/include/linux/dccp.h index 71fab4311e92..d0bdb499cf8d 100644 --- a/include/linux/dccp.h +++ b/include/linux/dccp.h @@ -408,8 +408,6 @@ struct dccp_ackvec; * @dccps_gar - greatest valid ack number received on a non-Sync; initialized to %dccps_iss * @dccps_timestamp_time - time of latest TIMESTAMP option * @dccps_timestamp_echo - latest timestamp received on a TIMESTAMP option - * @dccps_ext_header_len - network protocol overhead (IP/IPv6 options) - * @dccps_pmtu_cookie - Last pmtu seen by socket * @dccps_packet_size - Set thru setsockopt * @dccps_role - Role of this sock, one of %dccp_role * @dccps_ndp_count - number of Non Data Packets since last data packet @@ -434,8 +432,6 @@ struct dccp_sock { __u32 dccps_timestamp_echo; __u32 dccps_packet_size; unsigned long dccps_ndp_count; - __u16 dccps_ext_header_len; - __u32 dccps_pmtu_cookie; __u32 dccps_mss_cache; struct dccp_options dccps_options; struct dccp_ackvec *dccps_hc_rx_ackvec; diff --git a/include/linux/ip.h b/include/linux/ip.h index 5a560daeade5..6ccc596c19c8 100644 --- a/include/linux/ip.h +++ b/include/linux/ip.h @@ -155,6 +155,7 @@ struct inet_sock { __u8 mc_ttl; /* Multicasting TTL */ __u8 pmtudisc; unsigned recverr : 1, + is_icsk : 1, /* inet_connection_sock? */ freebind : 1, hdrincl : 1, mc_loop : 1; diff --git a/include/linux/tcp.h b/include/linux/tcp.h index da38eea1994b..f2bb2396853f 100644 --- a/include/linux/tcp.h +++ b/include/linux/tcp.h @@ -238,10 +238,9 @@ struct tcp_sock { __u32 snd_wl1; /* Sequence for window update */ __u32 snd_wnd; /* The window we expect to receive */ __u32 max_window; /* Maximal window ever seen from peer */ - __u32 pmtu_cookie; /* Last pmtu seen by socket */ __u32 mss_cache; /* Cached effective mss, not including SACKS */ __u16 xmit_size_goal; /* Goal for segmenting output packets */ - __u16 ext_header_len; /* Network protocol overhead (IP/IPv6 options) */ + /* XXX Two bytes hole, try to pack */ __u32 window_clamp; /* Maximal window to advertise */ __u32 rcv_ssthresh; /* Current window clamp */ diff --git a/include/net/inet_connection_sock.h b/include/net/inet_connection_sock.h index e50e2b890c6d..91888967d3e3 100644 --- a/include/net/inet_connection_sock.h +++ b/include/net/inet_connection_sock.h @@ -60,6 +60,7 @@ struct inet_connection_sock_af_ops { * @icsk_timeout: Timeout * @icsk_retransmit_timer: Resend (no ack) * @icsk_rto: Retransmit timeout + * @icsk_pmtu_cookie Last pmtu seen by socket * @icsk_ca_ops Pluggable congestion control hook * @icsk_af_ops Operations which are AF_INET{4,6} specific * @icsk_ca_state: Congestion control state @@ -68,6 +69,7 @@ struct inet_connection_sock_af_ops { * @icsk_backoff: Backoff * @icsk_syn_retries: Number of allowed SYN (or equivalent) retries * @icsk_probes_out: unanswered 0 window probes + * @icsk_ext_hdr_len: Network protocol overhead (IP/IPv6 options) * @icsk_ack: Delayed ACK control data */ struct inet_connection_sock { @@ -79,15 +81,17 @@ struct inet_connection_sock { struct timer_list icsk_retransmit_timer; struct timer_list icsk_delack_timer; __u32 icsk_rto; + __u32 icsk_pmtu_cookie; struct tcp_congestion_ops *icsk_ca_ops; struct inet_connection_sock_af_ops *icsk_af_ops; + unsigned int (*icsk_sync_mss)(struct sock *sk, u32 pmtu); __u8 icsk_ca_state; __u8 icsk_retransmits; __u8 icsk_pending; __u8 icsk_backoff; __u8 icsk_syn_retries; __u8 icsk_probes_out; - /* 2 BYTES HOLE, TRY TO PACK! */ + __u16 icsk_ext_hdr_len; struct { __u8 pending; /* ACK is pending */ __u8 quick; /* Scheduled number of quick acks */ diff --git a/include/net/protocol.h b/include/net/protocol.h index 357691f6a45f..a29cb29647d0 100644 --- a/include/net/protocol.h +++ b/include/net/protocol.h @@ -76,6 +76,7 @@ struct inet_protosw { }; #define INET_PROTOSW_REUSE 0x01 /* Are ports automatically reusable? */ #define INET_PROTOSW_PERMANENT 0x02 /* Permanent protocols are unremovable. */ +#define INET_PROTOSW_ICSK 0x04 /* Is this an inet_connection_sock? */ extern struct net_protocol *inet_protocol_base; extern struct net_protocol *inet_protos[MAX_INET_PROTOS]; diff --git a/net/dccp/diag.c b/net/dccp/diag.c index f675d8e642d3..3f78c00e3822 100644 --- a/net/dccp/diag.c +++ b/net/dccp/diag.c @@ -28,7 +28,7 @@ static void dccp_get_info(struct sock *sk, struct tcp_info *info) info->tcpi_retransmits = icsk->icsk_retransmits; info->tcpi_probes = icsk->icsk_probes_out; info->tcpi_backoff = icsk->icsk_backoff; - info->tcpi_pmtu = dp->dccps_pmtu_cookie; + info->tcpi_pmtu = icsk->icsk_pmtu_cookie; if (dp->dccps_options.dccpo_send_ack_vector) info->tcpi_options |= TCPI_OPT_SACK; diff --git a/net/dccp/input.c b/net/dccp/input.c index 9a724ff2a622..55e921bdd131 100644 --- a/net/dccp/input.c +++ b/net/dccp/input.c @@ -311,7 +311,7 @@ static int dccp_rcv_request_sent_state_process(struct sock *sk, goto out_invalid_packet; } - dccp_sync_mss(sk, dp->dccps_pmtu_cookie); + dccp_sync_mss(sk, icsk->icsk_pmtu_cookie); /* * Step 10: Process REQUEST state (second part) diff --git a/net/dccp/ipv4.c b/net/dccp/ipv4.c index 671fbf3b2379..c363051a7f16 100644 --- a/net/dccp/ipv4.c +++ b/net/dccp/ipv4.c @@ -104,9 +104,9 @@ int dccp_v4_connect(struct sock *sk, struct sockaddr *uaddr, int addr_len) inet->dport = usin->sin_port; inet->daddr = daddr; - dp->dccps_ext_header_len = 0; + inet_csk(sk)->icsk_ext_hdr_len = 0; if (inet->opt != NULL) - dp->dccps_ext_header_len = inet->opt->optlen; + inet_csk(sk)->icsk_ext_hdr_len = inet->opt->optlen; /* * Socket identity is still unknown (sport may be zero). * However we set state to DCCP_REQUESTING and not releasing socket @@ -191,7 +191,7 @@ static inline void dccp_do_pmtu_discovery(struct sock *sk, mtu = dst_mtu(dst); if (inet->pmtudisc != IP_PMTUDISC_DONT && - dp->dccps_pmtu_cookie > mtu) { + inet_csk(sk)->icsk_pmtu_cookie > mtu) { dccp_sync_mss(sk, mtu); /* @@ -1051,6 +1051,7 @@ struct inet_connection_sock_af_ops dccp_ipv4_af_ops = { int dccp_v4_init_sock(struct sock *sk) { struct dccp_sock *dp = dccp_sk(sk); + struct inet_connection_sock *icsk = inet_csk(sk); static int dccp_ctl_socket_init = 1; dccp_options_init(&dp->dccps_options); @@ -1090,10 +1091,11 @@ int dccp_v4_init_sock(struct sock *sk) dccp_ctl_socket_init = 0; dccp_init_xmit_timers(sk); - inet_csk(sk)->icsk_rto = DCCP_TIMEOUT_INIT; + icsk->icsk_rto = DCCP_TIMEOUT_INIT; sk->sk_state = DCCP_CLOSED; sk->sk_write_space = dccp_write_space; - inet_csk(sk)->icsk_af_ops = &dccp_ipv4_af_ops; + icsk->icsk_af_ops = &dccp_ipv4_af_ops; + icsk->icsk_sync_mss = dccp_sync_mss; dp->dccps_mss_cache = 536; dp->dccps_role = DCCP_ROLE_UNDEFINED; dp->dccps_service = DCCP_SERVICE_INVALID_VALUE; diff --git a/net/dccp/ipv6.c b/net/dccp/ipv6.c index 71bf04eb21e1..599b0be21515 100644 --- a/net/dccp/ipv6.c +++ b/net/dccp/ipv6.c @@ -88,6 +88,7 @@ static int dccp_v6_connect(struct sock *sk, struct sockaddr *uaddr, int addr_len) { struct sockaddr_in6 *usin = (struct sockaddr_in6 *) uaddr; + struct inet_connection_sock *icsk = inet_csk(sk); struct inet_sock *inet = inet_sk(sk); struct ipv6_pinfo *np = inet6_sk(sk); struct dccp_sock *dp = dccp_sk(sk); @@ -158,7 +159,7 @@ static int dccp_v6_connect(struct sock *sk, struct sockaddr *uaddr, */ if (addr_type == IPV6_ADDR_MAPPED) { - u32 exthdrlen = dp->dccps_ext_header_len; + u32 exthdrlen = icsk->icsk_ext_hdr_len; struct sockaddr_in sin; SOCK_DEBUG(sk, "connect: ipv4 mapped\n"); @@ -170,14 +171,14 @@ static int dccp_v6_connect(struct sock *sk, struct sockaddr *uaddr, sin.sin_port = usin->sin6_port; sin.sin_addr.s_addr = usin->sin6_addr.s6_addr32[3]; - inet_csk(sk)->icsk_af_ops = &dccp_ipv6_mapped; + icsk->icsk_af_ops = &dccp_ipv6_mapped; sk->sk_backlog_rcv = dccp_v4_do_rcv; err = dccp_v4_connect(sk, (struct sockaddr *)&sin, sizeof(sin)); if (err) { - dp->dccps_ext_header_len = exthdrlen; - inet_csk(sk)->icsk_af_ops = &dccp_ipv6_af_ops; + icsk->icsk_ext_hdr_len = exthdrlen; + icsk->icsk_af_ops = &dccp_ipv6_af_ops; sk->sk_backlog_rcv = dccp_v6_do_rcv; goto failure; } else { @@ -227,9 +228,10 @@ static int dccp_v6_connect(struct sock *sk, struct sockaddr *uaddr, ip6_dst_store(sk, dst, NULL); - dp->dccps_ext_header_len = 0; + icsk->icsk_ext_hdr_len = 0; if (np->opt) - dp->dccps_ext_header_len = np->opt->opt_flen + np->opt->opt_nflen; + icsk->icsk_ext_hdr_len = (np->opt->opt_flen + + np->opt->opt_nflen); inet->dport = usin->sin6_port; @@ -292,7 +294,6 @@ static void dccp_v6_err(struct sk_buff *skb, struct inet6_skb_parm *opt, np = inet6_sk(sk); if (type == ICMPV6_PKT_TOOBIG) { - struct dccp_sock *dp = dccp_sk(sk); struct dst_entry *dst = NULL; if (sock_owned_by_user(sk)) @@ -332,7 +333,7 @@ static void dccp_v6_err(struct sk_buff *skb, struct inet6_skb_parm *opt, } else dst_hold(dst); - if (dp->dccps_pmtu_cookie > dst_mtu(dst)) { + if (inet_csk(sk)->icsk_pmtu_cookie > dst_mtu(dst)) { dccp_sync_mss(sk, dst_mtu(dst)); } /* else let the usual retransmit timer handle it */ dst_release(dst); @@ -808,7 +809,7 @@ static struct sock *dccp_v6_request_recv_sock(struct sock *sk, worked with IPv6 icsk.icsk_af_ops. Sync it now. */ - dccp_sync_mss(newsk, newdp->dccps_pmtu_cookie); + dccp_sync_mss(newsk, inet_csk(newsk)->icsk_pmtu_cookie); return newsk; } @@ -916,10 +917,10 @@ static struct sock *dccp_v6_request_recv_sock(struct sock *sk, sock_kfree_s(sk, opt, opt->tot_len); } - newdp->dccps_ext_header_len = 0; + inet_csk(newsk)->icsk_ext_hdr_len = 0; if (newnp->opt) - newdp->dccps_ext_header_len = newnp->opt->opt_nflen + - newnp->opt->opt_flen; + inet_csk(newsk)->icsk_ext_hdr_len = (newnp->opt->opt_nflen + + newnp->opt->opt_flen); dccp_sync_mss(newsk, dst_mtu(dst)); @@ -1230,6 +1231,7 @@ static struct inet_protosw dccp_v6_protosw = { .prot = &dccp_v6_prot, .ops = &inet6_dccp_ops, .capability = -1, + .flags = INET_PROTOSW_ICSK, }; static int __init dccp_v6_init(void) diff --git a/net/dccp/output.c b/net/dccp/output.c index c40f7f8a328b..95a3c2c6a3ce 100644 --- a/net/dccp/output.c +++ b/net/dccp/output.c @@ -134,12 +134,13 @@ static int dccp_transmit_skb(struct sock *sk, struct sk_buff *skb) unsigned int dccp_sync_mss(struct sock *sk, u32 pmtu) { + struct inet_connection_sock *icsk = inet_csk(sk); struct dccp_sock *dp = dccp_sk(sk); - int mss_now = (pmtu - inet_csk(sk)->icsk_af_ops->net_header_len - + int mss_now = (pmtu - icsk->icsk_af_ops->net_header_len - sizeof(struct dccp_hdr) - sizeof(struct dccp_hdr_ext)); /* Now subtract optional transport overhead */ - mss_now -= dp->dccps_ext_header_len; + mss_now -= icsk->icsk_ext_hdr_len; /* * FIXME: this should come from the CCID infrastructure, where, say, @@ -152,7 +153,7 @@ unsigned int dccp_sync_mss(struct sock *sk, u32 pmtu) mss_now -= ((5 + 6 + 10 + 6 + 6 + 6 + 3) / 4) * 4; /* And store cached results */ - dp->dccps_pmtu_cookie = pmtu; + icsk->icsk_pmtu_cookie = pmtu; dp->dccps_mss_cache = mss_now; return mss_now; diff --git a/net/dccp/proto.c b/net/dccp/proto.c index 51dfacd22a6e..40a4c6899051 100644 --- a/net/dccp/proto.c +++ b/net/dccp/proto.c @@ -712,7 +712,7 @@ static struct inet_protosw dccp_v4_protosw = { .ops = &inet_dccp_ops, .capability = -1, .no_check = 0, - .flags = 0, + .flags = INET_PROTOSW_ICSK, }; /* diff --git a/net/ipv4/af_inet.c b/net/ipv4/af_inet.c index d368cf249000..617e858beff1 100644 --- a/net/ipv4/af_inet.c +++ b/net/ipv4/af_inet.c @@ -302,6 +302,7 @@ lookup_protocol: sk->sk_reuse = 1; inet = inet_sk(sk); + inet->is_icsk = INET_PROTOSW_ICSK & answer_flags; if (SOCK_RAW == sock->type) { inet->num = protocol; @@ -869,7 +870,8 @@ static struct inet_protosw inetsw_array[] = .ops = &inet_stream_ops, .capability = -1, .no_check = 0, - .flags = INET_PROTOSW_PERMANENT, + .flags = INET_PROTOSW_PERMANENT | + INET_PROTOSW_ICSK, }, { diff --git a/net/ipv4/ip_sockglue.c b/net/ipv4/ip_sockglue.c index 4f2d87257309..add019c746f8 100644 --- a/net/ipv4/ip_sockglue.c +++ b/net/ipv4/ip_sockglue.c @@ -29,8 +29,7 @@ #include #include #include -#include -#include +#include #include #include #include @@ -427,8 +426,8 @@ int ip_setsockopt(struct sock *sk, int level, int optname, char __user *optval, err = ip_options_get_from_user(&opt, optval, optlen); if (err) break; - if (sk->sk_type == SOCK_STREAM) { - struct tcp_sock *tp = tcp_sk(sk); + if (inet->is_icsk) { + struct inet_connection_sock *icsk = inet_csk(sk); #if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE) if (sk->sk_family == PF_INET || (!((1 << sk->sk_state) & @@ -436,10 +435,10 @@ int ip_setsockopt(struct sock *sk, int level, int optname, char __user *optval, inet->daddr != LOOPBACK4_IPV6)) { #endif if (inet->opt) - tp->ext_header_len -= inet->opt->optlen; + icsk->icsk_ext_hdr_len -= inet->opt->optlen; if (opt) - tp->ext_header_len += opt->optlen; - tcp_sync_mss(sk, tp->pmtu_cookie); + icsk->icsk_ext_hdr_len += opt->optlen; + icsk->icsk_sync_mss(sk, icsk->icsk_pmtu_cookie); #if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE) } #endif diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c index eacfe6a3442c..00aa80e93243 100644 --- a/net/ipv4/tcp.c +++ b/net/ipv4/tcp.c @@ -1914,7 +1914,7 @@ void tcp_get_info(struct sock *sk, struct tcp_info *info) info->tcpi_last_data_recv = jiffies_to_msecs(now - icsk->icsk_ack.lrcvtime); info->tcpi_last_ack_recv = jiffies_to_msecs(now - tp->rcv_tstamp); - info->tcpi_pmtu = tp->pmtu_cookie; + info->tcpi_pmtu = icsk->icsk_pmtu_cookie; info->tcpi_rcv_ssthresh = tp->rcv_ssthresh; info->tcpi_rtt = jiffies_to_usecs(tp->srtt)>>3; info->tcpi_rttvar = jiffies_to_usecs(tp->mdev)>>2; diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c index 7de6184d4bd8..981d1203b152 100644 --- a/net/ipv4/tcp_input.c +++ b/net/ipv4/tcp_input.c @@ -2342,7 +2342,7 @@ static int tcp_ack_update_window(struct sock *sk, struct tcp_sock *tp, if (nwin > tp->max_window) { tp->max_window = nwin; - tcp_sync_mss(sk, tp->pmtu_cookie); + tcp_sync_mss(sk, inet_csk(sk)->icsk_pmtu_cookie); } } } @@ -3967,12 +3967,12 @@ static int tcp_rcv_synsent_state_process(struct sock *sk, struct sk_buff *skb, struct tcphdr *th, unsigned len) { struct tcp_sock *tp = tcp_sk(sk); + struct inet_connection_sock *icsk = inet_csk(sk); int saved_clamp = tp->rx_opt.mss_clamp; tcp_parse_options(skb, &tp->rx_opt, 0); if (th->ack) { - struct inet_connection_sock *icsk; /* rfc793: * "If the state is SYN-SENT then * first check the ACK bit @@ -4061,7 +4061,7 @@ static int tcp_rcv_synsent_state_process(struct sock *sk, struct sk_buff *skb, if (tp->rx_opt.sack_ok && sysctl_tcp_fack) tp->rx_opt.sack_ok |= 2; - tcp_sync_mss(sk, tp->pmtu_cookie); + tcp_sync_mss(sk, icsk->icsk_pmtu_cookie); tcp_initialize_rcv_mss(sk); /* Remember, tcp_poll() does not lock socket! @@ -4071,8 +4071,6 @@ static int tcp_rcv_synsent_state_process(struct sock *sk, struct sk_buff *skb, mb(); tcp_set_state(sk, TCP_ESTABLISHED); - icsk = inet_csk(sk); - /* Make sure socket is routed, for correct metrics. */ icsk->icsk_af_ops->rebuild_header(sk); @@ -4173,7 +4171,7 @@ discard: if (tp->ecn_flags&TCP_ECN_OK) sock_set_flag(sk, SOCK_NO_LARGESEND); - tcp_sync_mss(sk, tp->pmtu_cookie); + tcp_sync_mss(sk, icsk->icsk_pmtu_cookie); tcp_initialize_rcv_mss(sk); diff --git a/net/ipv4/tcp_ipv4.c b/net/ipv4/tcp_ipv4.c index c2fe61becd61..9b62d80bb20f 100644 --- a/net/ipv4/tcp_ipv4.c +++ b/net/ipv4/tcp_ipv4.c @@ -220,9 +220,9 @@ int tcp_v4_connect(struct sock *sk, struct sockaddr *uaddr, int addr_len) inet->dport = usin->sin_port; inet->daddr = daddr; - tp->ext_header_len = 0; + inet_csk(sk)->icsk_ext_hdr_len = 0; if (inet->opt) - tp->ext_header_len = inet->opt->optlen; + inet_csk(sk)->icsk_ext_hdr_len = inet->opt->optlen; tp->rx_opt.mss_clamp = 536; @@ -275,7 +275,6 @@ static inline void do_pmtu_discovery(struct sock *sk, struct iphdr *iph, { struct dst_entry *dst; struct inet_sock *inet = inet_sk(sk); - struct tcp_sock *tp = tcp_sk(sk); /* We are not interested in TCP_LISTEN and open_requests (SYN-ACKs * send out by Linux are always <576bytes so they should go through @@ -304,7 +303,7 @@ static inline void do_pmtu_discovery(struct sock *sk, struct iphdr *iph, mtu = dst_mtu(dst); if (inet->pmtudisc != IP_PMTUDISC_DONT && - tp->pmtu_cookie > mtu) { + inet_csk(sk)->icsk_pmtu_cookie > mtu) { tcp_sync_mss(sk, mtu); /* Resend the TCP packet because it's @@ -895,9 +894,9 @@ struct sock *tcp_v4_syn_recv_sock(struct sock *sk, struct sk_buff *skb, ireq->opt = NULL; newinet->mc_index = inet_iif(skb); newinet->mc_ttl = skb->nh.iph->ttl; - newtp->ext_header_len = 0; + inet_csk(newsk)->icsk_ext_hdr_len = 0; if (newinet->opt) - newtp->ext_header_len = newinet->opt->optlen; + inet_csk(newsk)->icsk_ext_hdr_len = newinet->opt->optlen; newinet->id = newtp->write_seq ^ jiffies; tcp_sync_mss(newsk, dst_mtu(dst)); @@ -1266,6 +1265,7 @@ static int tcp_v4_init_sock(struct sock *sk) sock_set_flag(sk, SOCK_USE_WRITE_QUEUE); icsk->icsk_af_ops = &ipv4_specific; + icsk->icsk_sync_mss = tcp_sync_mss; sk->sk_sndbuf = sysctl_tcp_wmem[1]; sk->sk_rcvbuf = sysctl_tcp_rmem[1]; diff --git a/net/ipv4/tcp_output.c b/net/ipv4/tcp_output.c index af1946c52c37..3a0a914de917 100644 --- a/net/ipv4/tcp_output.c +++ b/net/ipv4/tcp_output.c @@ -621,7 +621,7 @@ int tcp_trim_head(struct sock *sk, struct sk_buff *skb, u32 len) It is minimum of user_mss and mss received with SYN. It also does not include TCP options. - tp->pmtu_cookie is last pmtu, seen by this function. + inet_csk(sk)->icsk_pmtu_cookie is last pmtu, seen by this function. tp->mss_cache is current effective sending mss, including all tcp options except for SACKs. It is evaluated, @@ -631,17 +631,18 @@ int tcp_trim_head(struct sock *sk, struct sk_buff *skb, u32 len) NOTE1. rfc1122 clearly states that advertised MSS DOES NOT include either tcp or ip options. - NOTE2. tp->pmtu_cookie and tp->mss_cache are READ ONLY outside - this function. --ANK (980731) + NOTE2. inet_csk(sk)->icsk_pmtu_cookie and tp->mss_cache + are READ ONLY outside this function. --ANK (980731) */ unsigned int tcp_sync_mss(struct sock *sk, u32 pmtu) { struct tcp_sock *tp = tcp_sk(sk); + struct inet_connection_sock *icsk = inet_csk(sk); /* Calculate base mss without TCP options: It is MMS_S - sizeof(tcphdr) of rfc1122 */ - int mss_now = (pmtu - inet_csk(sk)->icsk_af_ops->net_header_len - + int mss_now = (pmtu - icsk->icsk_af_ops->net_header_len - sizeof(struct tcphdr)); /* Clamp it (mss_clamp does not include tcp options) */ @@ -649,7 +650,7 @@ unsigned int tcp_sync_mss(struct sock *sk, u32 pmtu) mss_now = tp->rx_opt.mss_clamp; /* Now subtract optional transport overhead */ - mss_now -= tp->ext_header_len; + mss_now -= icsk->icsk_ext_hdr_len; /* Then reserve room for full set of TCP options and 8 bytes of data */ if (mss_now < 48) @@ -663,7 +664,7 @@ unsigned int tcp_sync_mss(struct sock *sk, u32 pmtu) mss_now = max((tp->max_window>>1), 68U - tp->tcp_header_len); /* And store cached results */ - tp->pmtu_cookie = pmtu; + icsk->icsk_pmtu_cookie = pmtu; tp->mss_cache = mss_now; return mss_now; @@ -693,7 +694,7 @@ unsigned int tcp_current_mss(struct sock *sk, int large_allowed) if (dst) { u32 mtu = dst_mtu(dst); - if (mtu != tp->pmtu_cookie) + if (mtu != inet_csk(sk)->icsk_pmtu_cookie) mss_now = tcp_sync_mss(sk, mtu); } @@ -706,7 +707,8 @@ unsigned int tcp_current_mss(struct sock *sk, int large_allowed) if (doing_tso) { xmit_size_goal = (65535 - inet_csk(sk)->icsk_af_ops->net_header_len - - tp->ext_header_len - tp->tcp_header_len); + inet_csk(sk)->icsk_ext_hdr_len - + tp->tcp_header_len); if (tp->max_window && (xmit_size_goal > (tp->max_window >> 1))) diff --git a/net/ipv6/af_inet6.c b/net/ipv6/af_inet6.c index bf17aab9b776..70a510ff31ee 100644 --- a/net/ipv6/af_inet6.c +++ b/net/ipv6/af_inet6.c @@ -167,6 +167,7 @@ lookup_protocol: sk->sk_reuse = 1; inet = inet_sk(sk); + inet->is_icsk = INET_PROTOSW_ICSK & answer_flags; if (SOCK_RAW == sock->type) { inet->num = protocol; diff --git a/net/ipv6/ipv6_sockglue.c b/net/ipv6/ipv6_sockglue.c index b6b63fa8454c..c63868dd2ca2 100644 --- a/net/ipv6/ipv6_sockglue.c +++ b/net/ipv6/ipv6_sockglue.c @@ -163,17 +163,17 @@ int ipv6_setsockopt(struct sock *sk, int level, int optname, sk_refcnt_debug_dec(sk); if (sk->sk_protocol == IPPROTO_TCP) { - struct tcp_sock *tp = tcp_sk(sk); + struct inet_connection_sock *icsk = inet_csk(sk); local_bh_disable(); sock_prot_dec_use(sk->sk_prot); sock_prot_inc_use(&tcp_prot); local_bh_enable(); sk->sk_prot = &tcp_prot; - inet_csk(sk)->icsk_af_ops = &ipv4_specific; + icsk->icsk_af_ops = &ipv4_specific; sk->sk_socket->ops = &inet_stream_ops; sk->sk_family = PF_INET; - tcp_sync_mss(sk, tp->pmtu_cookie); + tcp_sync_mss(sk, icsk->icsk_pmtu_cookie); } else { local_bh_disable(); sock_prot_dec_use(sk->sk_prot); @@ -317,14 +317,15 @@ int ipv6_setsockopt(struct sock *sk, int level, int optname, } retv = 0; - if (sk->sk_type == SOCK_STREAM) { + if (inet_sk(sk)->is_icsk) { if (opt) { - struct tcp_sock *tp = tcp_sk(sk); + struct inet_connection_sock *icsk = inet_csk(sk); if (!((1 << sk->sk_state) & (TCPF_LISTEN | TCPF_CLOSE)) && inet_sk(sk)->daddr != LOOPBACK4_IPV6) { - tp->ext_header_len = opt->opt_flen + opt->opt_nflen; - tcp_sync_mss(sk, tp->pmtu_cookie); + icsk->icsk_ext_hdr_len = + opt->opt_flen + opt->opt_nflen; + icsk->icsk_sync_mss(sk, icsk->icsk_pmtu_cookie); } } opt = xchg(&np->opt, opt); @@ -380,14 +381,15 @@ sticky_done: goto done; update: retv = 0; - if (sk->sk_type == SOCK_STREAM) { + if (inet_sk(sk)->is_icsk) { if (opt) { - struct tcp_sock *tp = tcp_sk(sk); + struct inet_connection_sock *icsk = inet_csk(sk); if (!((1 << sk->sk_state) & (TCPF_LISTEN | TCPF_CLOSE)) && inet_sk(sk)->daddr != LOOPBACK4_IPV6) { - tp->ext_header_len = opt->opt_flen + opt->opt_nflen; - tcp_sync_mss(sk, tp->pmtu_cookie); + icsk->icsk_ext_hdr_len = + opt->opt_flen + opt->opt_nflen; + icsk->icsk_sync_mss(sk, icsk->icsk_pmtu_cookie); } } opt = xchg(&np->opt, opt); diff --git a/net/ipv6/tcp_ipv6.c b/net/ipv6/tcp_ipv6.c index a682eb9093e1..2947bc56d8a0 100644 --- a/net/ipv6/tcp_ipv6.c +++ b/net/ipv6/tcp_ipv6.c @@ -123,7 +123,8 @@ static int tcp_v6_connect(struct sock *sk, struct sockaddr *uaddr, int addr_len) { struct sockaddr_in6 *usin = (struct sockaddr_in6 *) uaddr; - struct inet_sock *inet = inet_sk(sk); + struct inet_sock *inet = inet_sk(sk); + struct inet_connection_sock *icsk = inet_csk(sk); struct ipv6_pinfo *np = inet6_sk(sk); struct tcp_sock *tp = tcp_sk(sk); struct in6_addr *saddr = NULL, *final_p = NULL, final; @@ -198,7 +199,7 @@ static int tcp_v6_connect(struct sock *sk, struct sockaddr *uaddr, */ if (addr_type == IPV6_ADDR_MAPPED) { - u32 exthdrlen = tp->ext_header_len; + u32 exthdrlen = icsk->icsk_ext_hdr_len; struct sockaddr_in sin; SOCK_DEBUG(sk, "connect: ipv4 mapped\n"); @@ -210,14 +211,14 @@ static int tcp_v6_connect(struct sock *sk, struct sockaddr *uaddr, sin.sin_port = usin->sin6_port; sin.sin_addr.s_addr = usin->sin6_addr.s6_addr32[3]; - inet_csk(sk)->icsk_af_ops = &ipv6_mapped; + icsk->icsk_af_ops = &ipv6_mapped; sk->sk_backlog_rcv = tcp_v4_do_rcv; err = tcp_v4_connect(sk, (struct sockaddr *)&sin, sizeof(sin)); if (err) { - tp->ext_header_len = exthdrlen; - inet_csk(sk)->icsk_af_ops = &ipv6_specific; + icsk->icsk_ext_hdr_len = exthdrlen; + icsk->icsk_af_ops = &ipv6_specific; sk->sk_backlog_rcv = tcp_v6_do_rcv; goto failure; } else { @@ -270,9 +271,10 @@ static int tcp_v6_connect(struct sock *sk, struct sockaddr *uaddr, sk->sk_route_caps = dst->dev->features & ~(NETIF_F_IP_CSUM | NETIF_F_TSO); - tp->ext_header_len = 0; + icsk->icsk_ext_hdr_len = 0; if (np->opt) - tp->ext_header_len = np->opt->opt_flen + np->opt->opt_nflen; + icsk->icsk_ext_hdr_len = (np->opt->opt_flen + + np->opt->opt_nflen); tp->rx_opt.mss_clamp = IPV6_MIN_MTU - sizeof(struct tcphdr) - sizeof(struct ipv6hdr); @@ -385,7 +387,7 @@ static void tcp_v6_err(struct sk_buff *skb, struct inet6_skb_parm *opt, } else dst_hold(dst); - if (tp->pmtu_cookie > dst_mtu(dst)) { + if (inet_csk(sk)->icsk_pmtu_cookie > dst_mtu(dst)) { tcp_sync_mss(sk, dst_mtu(dst)); tcp_simple_retransmit(sk); } /* else let the usual retransmit timer handle it */ @@ -869,7 +871,7 @@ static struct sock * tcp_v6_syn_recv_sock(struct sock *sk, struct sk_buff *skb, worked with IPv6 icsk.icsk_af_ops. Sync it now. */ - tcp_sync_mss(newsk, newtp->pmtu_cookie); + tcp_sync_mss(newsk, inet_csk(newsk)->icsk_pmtu_cookie); return newsk; } @@ -976,10 +978,10 @@ static struct sock * tcp_v6_syn_recv_sock(struct sock *sk, struct sk_buff *skb, sock_kfree_s(sk, opt, opt->tot_len); } - newtp->ext_header_len = 0; + inet_csk(newsk)->icsk_ext_hdr_len = 0; if (newnp->opt) - newtp->ext_header_len = newnp->opt->opt_nflen + - newnp->opt->opt_flen; + inet_csk(newsk)->icsk_ext_hdr_len = (newnp->opt->opt_nflen + + newnp->opt->opt_flen); tcp_sync_mss(newsk, dst_mtu(dst)); newtp->advmss = dst_metric(dst, RTAX_ADVMSS); @@ -1361,6 +1363,7 @@ static int tcp_v6_init_sock(struct sock *sk) icsk->icsk_af_ops = &ipv6_specific; icsk->icsk_ca_ops = &tcp_init_congestion_ops; + icsk->icsk_sync_mss = tcp_sync_mss; sk->sk_write_space = sk_stream_write_space; sock_set_flag(sk, SOCK_USE_WRITE_QUEUE); @@ -1591,7 +1594,8 @@ static struct inet_protosw tcpv6_protosw = { .ops = &inet6_stream_ops, .capability = -1, .no_check = 0, - .flags = INET_PROTOSW_PERMANENT, + .flags = INET_PROTOSW_PERMANENT | + INET_PROTOSW_ICSK, }; void __init tcpv6_init(void) -- cgit v1.2.3-71-gd317 From c865e5d99e25a171e8262fc0f7ba608568633c64 Mon Sep 17 00:00:00 2001 From: Stephen Hemminger Date: Wed, 21 Dec 2005 19:03:44 -0800 Subject: [PKT_SCHED] netem: packet corruption option Here is a new feature for netem in 2.6.16. It adds the ability to randomly corrupt packets with netem. A version was done by Hagen Paul Pfeifer, but I redid it to handle the cases of backwards compatibility with netlink interface and presence of hardware checksum offload. It is useful for testing hardware offload in devices. Signed-off-by: Stephen Hemminger Signed-off-by: David S. Miller --- include/linux/pkt_sched.h | 7 +++++++ net/sched/sch_netem.c | 49 ++++++++++++++++++++++++++++++++++++++++++++--- 2 files changed, 53 insertions(+), 3 deletions(-) (limited to 'include/linux') diff --git a/include/linux/pkt_sched.h b/include/linux/pkt_sched.h index e87b233615b3..d10f35338507 100644 --- a/include/linux/pkt_sched.h +++ b/include/linux/pkt_sched.h @@ -429,6 +429,7 @@ enum TCA_NETEM_CORR, TCA_NETEM_DELAY_DIST, TCA_NETEM_REORDER, + TCA_NETEM_CORRUPT, __TCA_NETEM_MAX, }; @@ -457,6 +458,12 @@ struct tc_netem_reorder __u32 correlation; }; +struct tc_netem_corrupt +{ + __u32 probability; + __u32 correlation; +}; + #define NETEM_DIST_SCALE 8192 #endif diff --git a/net/sched/sch_netem.c b/net/sched/sch_netem.c index 82fb07aa06a5..ba5283204837 100644 --- a/net/sched/sch_netem.c +++ b/net/sched/sch_netem.c @@ -25,7 +25,7 @@ #include -#define VERSION "1.1" +#define VERSION "1.2" /* Network Emulation Queuing algorithm. ==================================== @@ -65,11 +65,12 @@ struct netem_sched_data { u32 jitter; u32 duplicate; u32 reorder; + u32 corrupt; struct crndstate { unsigned long last; unsigned long rho; - } delay_cor, loss_cor, dup_cor, reorder_cor; + } delay_cor, loss_cor, dup_cor, reorder_cor, corrupt_cor; struct disttable { u32 size; @@ -183,6 +184,23 @@ static int netem_enqueue(struct sk_buff *skb, struct Qdisc *sch) q->duplicate = dupsave; } + /* + * Randomized packet corruption. + * Make copy if needed since we are modifying + * If packet is going to be hardware checksummed, then + * do it now in software before we mangle it. + */ + if (q->corrupt && q->corrupt >= get_crandom(&q->corrupt_cor)) { + if (!(skb = skb_unshare(skb, GFP_ATOMIC)) + || (skb->ip_summed == CHECKSUM_HW + && skb_checksum_help(skb, 0))) { + sch->qstats.drops++; + return NET_XMIT_DROP; + } + + skb->data[net_random() % skb_headlen(skb)] ^= 1<<(net_random() % 8); + } + if (q->gap == 0 /* not doing reordering */ || q->counter < q->gap /* inside last reordering gap */ || q->reorder < get_crandom(&q->reorder_cor)) { @@ -382,6 +400,20 @@ static int get_reorder(struct Qdisc *sch, const struct rtattr *attr) return 0; } +static int get_corrupt(struct Qdisc *sch, const struct rtattr *attr) +{ + struct netem_sched_data *q = qdisc_priv(sch); + const struct tc_netem_corrupt *r = RTA_DATA(attr); + + if (RTA_PAYLOAD(attr) != sizeof(*r)) + return -EINVAL; + + q->corrupt = r->probability; + init_crandom(&q->corrupt_cor, r->correlation); + return 0; +} + +/* Parse netlink message to set options */ static int netem_change(struct Qdisc *sch, struct rtattr *opt) { struct netem_sched_data *q = qdisc_priv(sch); @@ -432,13 +464,19 @@ static int netem_change(struct Qdisc *sch, struct rtattr *opt) if (ret) return ret; } + if (tb[TCA_NETEM_REORDER-1]) { ret = get_reorder(sch, tb[TCA_NETEM_REORDER-1]); if (ret) return ret; } - } + if (tb[TCA_NETEM_CORRUPT-1]) { + ret = get_corrupt(sch, tb[TCA_NETEM_CORRUPT-1]); + if (ret) + return ret; + } + } return 0; } @@ -564,6 +602,7 @@ static int netem_dump(struct Qdisc *sch, struct sk_buff *skb) struct tc_netem_qopt qopt; struct tc_netem_corr cor; struct tc_netem_reorder reorder; + struct tc_netem_corrupt corrupt; qopt.latency = q->latency; qopt.jitter = q->jitter; @@ -582,6 +621,10 @@ static int netem_dump(struct Qdisc *sch, struct sk_buff *skb) reorder.correlation = q->reorder_cor.rho; RTA_PUT(skb, TCA_NETEM_REORDER, sizeof(reorder), &reorder); + corrupt.probability = q->corrupt; + corrupt.correlation = q->corrupt_cor.rho; + RTA_PUT(skb, TCA_NETEM_CORRUPT, sizeof(corrupt), &corrupt); + rta->rta_len = skb->tail - b; return skb->len; -- cgit v1.2.3-71-gd317 From 3821af2fe13700cab6fd67367128fa180e43f8b8 Mon Sep 17 00:00:00 2001 From: Stephen Hemminger Date: Wed, 21 Dec 2005 19:30:53 -0800 Subject: [FLS64]: generic version Signed-off-by: Stephen Hemminger Signed-off-by: David S. Miller --- include/asm-alpha/bitops.h | 1 + include/asm-arm/bitops.h | 2 ++ include/asm-arm26/bitops.h | 1 + include/asm-cris/bitops.h | 1 + include/asm-frv/bitops.h | 1 + include/asm-generic/bitops.h | 1 + include/asm-h8300/bitops.h | 1 + include/asm-i386/bitops.h | 1 + include/asm-ia64/bitops.h | 1 + include/asm-m32r/bitops.h | 1 + include/asm-m68k/bitops.h | 1 + include/asm-m68knommu/bitops.h | 1 + include/asm-mips/bitops.h | 2 +- include/asm-parisc/bitops.h | 1 + include/asm-powerpc/bitops.h | 1 + include/asm-s390/bitops.h | 1 + include/asm-sh/bitops.h | 1 + include/asm-sh64/bitops.h | 1 + include/asm-sparc/bitops.h | 1 + include/asm-sparc64/bitops.h | 1 + include/asm-v850/bitops.h | 1 + include/asm-x86_64/bitops.h | 1 + include/asm-xtensa/bitops.h | 1 + include/linux/bitops.h | 9 +++++++++ 24 files changed, 33 insertions(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/asm-alpha/bitops.h b/include/asm-alpha/bitops.h index 578ed3f1a607..302201f1a097 100644 --- a/include/asm-alpha/bitops.h +++ b/include/asm-alpha/bitops.h @@ -321,6 +321,7 @@ static inline int fls(int word) #else #define fls generic_fls #endif +#define fls64 generic_fls64 /* Compute powers of two for the given integer. */ static inline long floor_log2(unsigned long word) diff --git a/include/asm-arm/bitops.h b/include/asm-arm/bitops.h index 7399d431edfe..d02de721ecc1 100644 --- a/include/asm-arm/bitops.h +++ b/include/asm-arm/bitops.h @@ -332,6 +332,7 @@ static inline unsigned long __ffs(unsigned long word) */ #define fls(x) generic_fls(x) +#define fls64(x) generic_fls64(x) /* * ffs: find first bit set. This is defined the same way as @@ -351,6 +352,7 @@ static inline unsigned long __ffs(unsigned long word) #define fls(x) \ ( __builtin_constant_p(x) ? generic_fls(x) : \ ({ int __r; asm("clz\t%0, %1" : "=r"(__r) : "r"(x) : "cc"); 32-__r; }) ) +#define fls64(x) generic_fls64(x) #define ffs(x) ({ unsigned long __t = (x); fls(__t & -__t); }) #define __ffs(x) (ffs(x) - 1) #define ffz(x) __ffs( ~(x) ) diff --git a/include/asm-arm26/bitops.h b/include/asm-arm26/bitops.h index 7d062fb2e343..15cc6f2da792 100644 --- a/include/asm-arm26/bitops.h +++ b/include/asm-arm26/bitops.h @@ -259,6 +259,7 @@ static inline unsigned long __ffs(unsigned long word) */ #define fls(x) generic_fls(x) +#define fls64(x) generic_fls64(x) /* * ffs: find first bit set. This is defined the same way as diff --git a/include/asm-cris/bitops.h b/include/asm-cris/bitops.h index 1bddb3f3a289..d3eb0f1e4208 100644 --- a/include/asm-cris/bitops.h +++ b/include/asm-cris/bitops.h @@ -240,6 +240,7 @@ static inline int test_bit(int nr, const volatile unsigned long *addr) */ #define fls(x) generic_fls(x) +#define fls64(x) generic_fls64(x) /* * hweightN - returns the hamming weight of a N-bit word diff --git a/include/asm-frv/bitops.h b/include/asm-frv/bitops.h index b664bd5b6663..02be7b3a8a83 100644 --- a/include/asm-frv/bitops.h +++ b/include/asm-frv/bitops.h @@ -228,6 +228,7 @@ found_middle: \ bit ? 33 - bit : bit; \ }) +#define fls64(x) generic_fls64(x) /* * Every architecture must define this function. It's the fastest diff --git a/include/asm-generic/bitops.h b/include/asm-generic/bitops.h index ce31b739fd80..0e6d9852008c 100644 --- a/include/asm-generic/bitops.h +++ b/include/asm-generic/bitops.h @@ -56,6 +56,7 @@ extern __inline__ int test_bit(int nr, const unsigned long * addr) */ #define fls(x) generic_fls(x) +#define fls64(x) generic_fls64(x) #ifdef __KERNEL__ diff --git a/include/asm-h8300/bitops.h b/include/asm-h8300/bitops.h index 5036f595f8c9..c0411ec9d651 100644 --- a/include/asm-h8300/bitops.h +++ b/include/asm-h8300/bitops.h @@ -406,5 +406,6 @@ found_middle: #endif /* __KERNEL__ */ #define fls(x) generic_fls(x) +#define fls64(x) generic_fls64(x) #endif /* _H8300_BITOPS_H */ diff --git a/include/asm-i386/bitops.h b/include/asm-i386/bitops.h index ddf1739dc7fd..4807aa1d2e3d 100644 --- a/include/asm-i386/bitops.h +++ b/include/asm-i386/bitops.h @@ -372,6 +372,7 @@ static inline unsigned long ffz(unsigned long word) */ #define fls(x) generic_fls(x) +#define fls64(x) generic_fls64(x) #ifdef __KERNEL__ diff --git a/include/asm-ia64/bitops.h b/include/asm-ia64/bitops.h index 7232528e2d0c..36d0fb95ea89 100644 --- a/include/asm-ia64/bitops.h +++ b/include/asm-ia64/bitops.h @@ -345,6 +345,7 @@ fls (int t) x |= x >> 16; return ia64_popcnt(x); } +#define fls64(x) generic_fls64(x) /* * ffs: find first bit set. This is defined the same way as the libc and compiler builtin diff --git a/include/asm-m32r/bitops.h b/include/asm-m32r/bitops.h index e78443981349..abea2fdd8689 100644 --- a/include/asm-m32r/bitops.h +++ b/include/asm-m32r/bitops.h @@ -465,6 +465,7 @@ static __inline__ unsigned long __ffs(unsigned long word) * fls: find last bit set. */ #define fls(x) generic_fls(x) +#define fls64(x) generic_fls64(x) #ifdef __KERNEL__ diff --git a/include/asm-m68k/bitops.h b/include/asm-m68k/bitops.h index b1bcf7c66516..13f4c0048463 100644 --- a/include/asm-m68k/bitops.h +++ b/include/asm-m68k/bitops.h @@ -310,6 +310,7 @@ static inline int fls(int x) return 32 - cnt; } +#define fls64(x) generic_fls64(x) /* * Every architecture must define this function. It's the fastest diff --git a/include/asm-m68knommu/bitops.h b/include/asm-m68knommu/bitops.h index c42f88a9b9f9..4058dd086a02 100644 --- a/include/asm-m68knommu/bitops.h +++ b/include/asm-m68knommu/bitops.h @@ -499,5 +499,6 @@ found_middle: * fls: find last bit set. */ #define fls(x) generic_fls(x) +#define fls64(x) generic_fls64(x) #endif /* _M68KNOMMU_BITOPS_H */ diff --git a/include/asm-mips/bitops.h b/include/asm-mips/bitops.h index 5496f9064a6a..3b0c8aaf6e8b 100644 --- a/include/asm-mips/bitops.h +++ b/include/asm-mips/bitops.h @@ -695,7 +695,7 @@ static inline unsigned long fls(unsigned long word) return flz(~word) + 1; } - +#define fls64(x) generic_fls64(x) /* * find_next_zero_bit - find the first zero bit in a memory region diff --git a/include/asm-parisc/bitops.h b/include/asm-parisc/bitops.h index 55b98c67fd82..15d8c2b51584 100644 --- a/include/asm-parisc/bitops.h +++ b/include/asm-parisc/bitops.h @@ -263,6 +263,7 @@ static __inline__ int fls(int x) return ret; } +#define fls64(x) generic_fls64(x) /* * hweightN: returns the hamming weight (i.e. the number diff --git a/include/asm-powerpc/bitops.h b/include/asm-powerpc/bitops.h index 5727229b0444..1996eaa8aeae 100644 --- a/include/asm-powerpc/bitops.h +++ b/include/asm-powerpc/bitops.h @@ -310,6 +310,7 @@ static __inline__ int fls(unsigned int x) asm ("cntlzw %0,%1" : "=r" (lz) : "r" (x)); return 32 - lz; } +#define fls64(x) generic_fls64(x) /* * hweightN: returns the hamming weight (i.e. the number diff --git a/include/asm-s390/bitops.h b/include/asm-s390/bitops.h index b07c578b22ea..61232760cc3b 100644 --- a/include/asm-s390/bitops.h +++ b/include/asm-s390/bitops.h @@ -839,6 +839,7 @@ static inline int sched_find_first_bit(unsigned long *b) * fls: find last bit set. */ #define fls(x) generic_fls(x) +#define fls64(x) generic_fls64(x) /* * hweightN: returns the hamming weight (i.e. the number diff --git a/include/asm-sh/bitops.h b/include/asm-sh/bitops.h index 5163d1ff2f1b..1c5260860045 100644 --- a/include/asm-sh/bitops.h +++ b/include/asm-sh/bitops.h @@ -470,6 +470,7 @@ found_middle: */ #define fls(x) generic_fls(x) +#define fls64(x) generic_fls64(x) #endif /* __KERNEL__ */ diff --git a/include/asm-sh64/bitops.h b/include/asm-sh64/bitops.h index e1ff63e09227..ce9c3ad45fe0 100644 --- a/include/asm-sh64/bitops.h +++ b/include/asm-sh64/bitops.h @@ -510,6 +510,7 @@ found_middle: #define ffs(x) generic_ffs(x) #define fls(x) generic_fls(x) +#define fls64(x) generic_fls64(x) #endif /* __KERNEL__ */ diff --git a/include/asm-sparc/bitops.h b/include/asm-sparc/bitops.h index bfbd795a0a80..41722b5e45ef 100644 --- a/include/asm-sparc/bitops.h +++ b/include/asm-sparc/bitops.h @@ -298,6 +298,7 @@ static inline int ffs(int x) * Note fls(0) = 0, fls(1) = 1, fls(0x80000000) = 32. */ #define fls(x) generic_fls(x) +#define fls64(x) generic_fls64(x) /* * hweightN: returns the hamming weight (i.e. the number diff --git a/include/asm-sparc64/bitops.h b/include/asm-sparc64/bitops.h index 6388b8376c50..6efc0162fb09 100644 --- a/include/asm-sparc64/bitops.h +++ b/include/asm-sparc64/bitops.h @@ -119,6 +119,7 @@ static inline unsigned long __ffs(unsigned long word) */ #define fls(x) generic_fls(x) +#define fls64(x) generic_fls64(x) #ifdef __KERNEL__ diff --git a/include/asm-v850/bitops.h b/include/asm-v850/bitops.h index b91e799763fd..8955d2376ac8 100644 --- a/include/asm-v850/bitops.h +++ b/include/asm-v850/bitops.h @@ -276,6 +276,7 @@ found_middle: #define ffs(x) generic_ffs (x) #define fls(x) generic_fls (x) +#define fls64(x) generic_fls64(x) #define __ffs(x) ffs(x) diff --git a/include/asm-x86_64/bitops.h b/include/asm-x86_64/bitops.h index 05a0d374404b..94b52c8ce97f 100644 --- a/include/asm-x86_64/bitops.h +++ b/include/asm-x86_64/bitops.h @@ -409,6 +409,7 @@ static __inline__ int ffs(int x) /* find last set bit */ #define fls(x) generic_fls(x) +#define fls64(x) generic_fls64(x) #endif /* __KERNEL__ */ diff --git a/include/asm-xtensa/bitops.h b/include/asm-xtensa/bitops.h index e76ee889e21d..0a2065f1a372 100644 --- a/include/asm-xtensa/bitops.h +++ b/include/asm-xtensa/bitops.h @@ -245,6 +245,7 @@ static __inline__ int fls (unsigned int x) { return __cntlz(x); } +#define fls64(x) generic_fls64(x) static __inline__ int find_next_bit(const unsigned long *addr, int size, int offset) diff --git a/include/linux/bitops.h b/include/linux/bitops.h index 38c2fb7ebe09..6a2a19f14bb2 100644 --- a/include/linux/bitops.h +++ b/include/linux/bitops.h @@ -76,6 +76,15 @@ static __inline__ int generic_fls(int x) */ #include + +static inline int generic_fls64(__u64 x) +{ + __u32 h = x >> 32; + if (h) + return fls(x) + 32; + return fls(x); +} + static __inline__ int get_bitmask_order(unsigned int count) { int order; -- cgit v1.2.3-71-gd317 From 77d76ea310b50a9c8ff15bd290fcb4ed4961adf2 Mon Sep 17 00:00:00 2001 From: Andi Kleen Date: Thu, 22 Dec 2005 12:43:42 -0800 Subject: [NET]: Small cleanup to socket initialization sock_init can be done as a core_initcall instead of calling it directly in init/main.c Also I removed an out of date #ifdef. Signed-off-by: Andi Kleen Signed-off-by: David S. Miller --- include/linux/skbuff.h | 1 - include/linux/socket.h | 1 - init/main.c | 4 ---- net/socket.c | 10 +++++----- 4 files changed, 5 insertions(+), 11 deletions(-) (limited to 'include/linux') diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h index 97f6580ce039..971677178e0c 100644 --- a/include/linux/skbuff.h +++ b/include/linux/skbuff.h @@ -32,7 +32,6 @@ #define HAVE_ALLOC_SKB /* For the drivers to know */ #define HAVE_ALIGNABLE_SKB /* Ditto 8) */ -#define SLAB_SKB /* Slabified skbuffs */ #define CHECKSUM_NONE 0 #define CHECKSUM_HW 1 diff --git a/include/linux/socket.h b/include/linux/socket.h index 1739c2d5b95b..9f4019156fd8 100644 --- a/include/linux/socket.h +++ b/include/linux/socket.h @@ -27,7 +27,6 @@ struct __kernel_sockaddr_storage { #include /* __user */ extern int sysctl_somaxconn; -extern void sock_init(void); #ifdef CONFIG_PROC_FS struct seq_file; extern void socket_seq_show(struct seq_file *seq); diff --git a/init/main.c b/init/main.c index 27f97f9b4636..54aaf561cf66 100644 --- a/init/main.c +++ b/init/main.c @@ -47,7 +47,6 @@ #include #include #include -#include #include #include @@ -614,9 +613,6 @@ static void __init do_basic_setup(void) sysctl_init(); #endif - /* Networking initialization needs a process context */ - sock_init(); - do_initcalls(); } diff --git a/net/socket.c b/net/socket.c index 3145103cdf54..98be7ef3c086 100644 --- a/net/socket.c +++ b/net/socket.c @@ -2036,7 +2036,7 @@ int sock_unregister(int family) return 0; } -void __init sock_init(void) +static int __init sock_init(void) { /* * Initialize sock SLAB cache. @@ -2044,12 +2044,10 @@ void __init sock_init(void) sk_init(); -#ifdef SLAB_SKB /* * Initialize skbuff SLAB cache */ skb_init(); -#endif /* * Initialize the protocols module. @@ -2058,8 +2056,8 @@ void __init sock_init(void) init_inodecache(); register_filesystem(&sock_fs_type); sock_mnt = kern_mount(&sock_fs_type); - /* The real protocol initialization is performed when - * do_initcalls is run. + + /* The real protocol initialization is performed in later initcalls. */ #ifdef CONFIG_NETFILTER @@ -2067,6 +2065,8 @@ void __init sock_init(void) #endif } +core_initcall(sock_init); /* early initcall */ + #ifdef CONFIG_PROC_FS void socket_seq_show(struct seq_file *seq) { -- cgit v1.2.3-71-gd317 From 90ddc4f0470427df306f308ad03db6b6b21644b8 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Thu, 22 Dec 2005 12:49:22 -0800 Subject: [NET]: move struct proto_ops to const I noticed that some of 'struct proto_ops' used in the kernel may share a cache line used by locks or other heavily modified data. (default linker alignement is 32 bytes, and L1_CACHE_LINE is 64 or 128 at least) This patch makes sure a 'struct proto_ops' can be declared as const, so that all cpus can share all parts of it without false sharing. This is not mandatory : a driver can still use a read/write structure if it needs to (and eventually a __read_mostly) I made a global stubstitute to change all existing occurences to make them const. This should reduce the possibility of false sharing on SMP, and speedup some socket system calls. Signed-off-by: Eric Dumazet Signed-off-by: David S. Miller --- include/linux/net.h | 4 ++-- include/net/inet_common.h | 4 ++-- include/net/ipv6.h | 4 ++-- include/net/protocol.h | 2 +- net/appletalk/ddp.c | 4 ++-- net/atm/pvc.c | 2 +- net/atm/svc.c | 2 +- net/ax25/af_ax25.c | 4 ++-- net/bluetooth/bnep/sock.c | 2 +- net/bluetooth/cmtp/sock.c | 2 +- net/bluetooth/hci_sock.c | 2 +- net/bluetooth/hidp/sock.c | 2 +- net/bluetooth/l2cap.c | 4 ++-- net/bluetooth/rfcomm/sock.c | 4 ++-- net/bluetooth/sco.c | 4 ++-- net/dccp/proto.c | 2 +- net/decnet/af_decnet.c | 4 ++-- net/econet/af_econet.c | 4 ++-- net/ipv4/af_inet.c | 6 +++--- net/ipv6/af_inet6.c | 6 +++--- net/ipx/af_ipx.c | 4 ++-- net/irda/af_irda.c | 16 ++++++++-------- net/key/af_key.c | 4 ++-- net/llc/af_llc.c | 4 ++-- net/netlink/af_netlink.c | 4 ++-- net/netrom/af_netrom.c | 4 ++-- net/packet/af_packet.c | 8 ++++---- net/sctp/ipv6.c | 2 +- net/sctp/protocol.c | 2 +- net/sunrpc/svcsock.c | 2 +- net/unix/af_unix.c | 6 +++--- net/wanrouter/af_wanpipe.c | 4 ++-- net/x25/af_x25.c | 4 ++-- 33 files changed, 66 insertions(+), 66 deletions(-) (limited to 'include/linux') diff --git a/include/linux/net.h b/include/linux/net.h index d6a41e6577f6..28195a2d8ff0 100644 --- a/include/linux/net.h +++ b/include/linux/net.h @@ -107,7 +107,7 @@ enum sock_type { struct socket { socket_state state; unsigned long flags; - struct proto_ops *ops; + const struct proto_ops *ops; struct fasync_struct *fasync_list; struct file *file; struct sock *sk; @@ -260,7 +260,7 @@ SOCKCALL_WRAP(name, recvmsg, (struct kiocb *iocb, struct socket *sock, struct ms SOCKCALL_WRAP(name, mmap, (struct file *file, struct socket *sock, struct vm_area_struct *vma), \ (file, sock, vma)) \ \ -static struct proto_ops name##_ops = { \ +static const struct proto_ops name##_ops = { \ .family = fam, \ .owner = THIS_MODULE, \ .release = __lock_##name##_release, \ diff --git a/include/net/inet_common.h b/include/net/inet_common.h index f943306ce5ff..227adcbdfec8 100644 --- a/include/net/inet_common.h +++ b/include/net/inet_common.h @@ -1,8 +1,8 @@ #ifndef _INET_COMMON_H #define _INET_COMMON_H -extern struct proto_ops inet_stream_ops; -extern struct proto_ops inet_dgram_ops; +extern const struct proto_ops inet_stream_ops; +extern const struct proto_ops inet_dgram_ops; /* * INET4 prototypes used by INET6 diff --git a/include/net/ipv6.h b/include/net/ipv6.h index e3d5d7bc8837..11a725662c36 100644 --- a/include/net/ipv6.h +++ b/include/net/ipv6.h @@ -538,8 +538,8 @@ extern int sysctl_ip6frag_low_thresh; extern int sysctl_ip6frag_time; extern int sysctl_ip6frag_secret_interval; -extern struct proto_ops inet6_stream_ops; -extern struct proto_ops inet6_dgram_ops; +extern const struct proto_ops inet6_stream_ops; +extern const struct proto_ops inet6_dgram_ops; extern int ip6_mc_source(int add, int omode, struct sock *sk, struct group_source_req *pgsr); diff --git a/include/net/protocol.h b/include/net/protocol.h index a29cb29647d0..63f7db99c2a6 100644 --- a/include/net/protocol.h +++ b/include/net/protocol.h @@ -65,7 +65,7 @@ struct inet_protosw { int protocol; /* This is the L4 protocol number. */ struct proto *prot; - struct proto_ops *ops; + const struct proto_ops *ops; int capability; /* Which (if any) capability do * we need to use this socket diff --git a/net/appletalk/ddp.c b/net/appletalk/ddp.c index 7982656b9c83..296f186802ff 100644 --- a/net/appletalk/ddp.c +++ b/net/appletalk/ddp.c @@ -63,7 +63,7 @@ #include struct datalink_proto *ddp_dl, *aarp_dl; -static struct proto_ops atalk_dgram_ops; +static const struct proto_ops atalk_dgram_ops; /**************************************************************************\ * * @@ -1841,7 +1841,7 @@ static struct net_proto_family atalk_family_ops = { .owner = THIS_MODULE, }; -static struct proto_ops SOCKOPS_WRAPPED(atalk_dgram_ops) = { +static const struct proto_ops SOCKOPS_WRAPPED(atalk_dgram_ops) = { .family = PF_APPLETALK, .owner = THIS_MODULE, .release = atalk_release, diff --git a/net/atm/pvc.c b/net/atm/pvc.c index 2684a92da22b..f2c541774dcd 100644 --- a/net/atm/pvc.c +++ b/net/atm/pvc.c @@ -102,7 +102,7 @@ static int pvc_getname(struct socket *sock,struct sockaddr *sockaddr, } -static struct proto_ops pvc_proto_ops = { +static const struct proto_ops pvc_proto_ops = { .family = PF_ATMPVC, .owner = THIS_MODULE, diff --git a/net/atm/svc.c b/net/atm/svc.c index d7b266136bf6..3a180cfd7b48 100644 --- a/net/atm/svc.c +++ b/net/atm/svc.c @@ -613,7 +613,7 @@ static int svc_ioctl(struct socket *sock, unsigned int cmd, unsigned long arg) return error; } -static struct proto_ops svc_proto_ops = { +static const struct proto_ops svc_proto_ops = { .family = PF_ATMSVC, .owner = THIS_MODULE, diff --git a/net/ax25/af_ax25.c b/net/ax25/af_ax25.c index 1b683f302657..8b5d10aaba05 100644 --- a/net/ax25/af_ax25.c +++ b/net/ax25/af_ax25.c @@ -54,7 +54,7 @@ HLIST_HEAD(ax25_list); DEFINE_SPINLOCK(ax25_list_lock); -static struct proto_ops ax25_proto_ops; +static const struct proto_ops ax25_proto_ops; static void ax25_free_sock(struct sock *sk) { @@ -1944,7 +1944,7 @@ static struct net_proto_family ax25_family_ops = { .owner = THIS_MODULE, }; -static struct proto_ops ax25_proto_ops = { +static const struct proto_ops ax25_proto_ops = { .family = PF_AX25, .owner = THIS_MODULE, .release = ax25_release, diff --git a/net/bluetooth/bnep/sock.c b/net/bluetooth/bnep/sock.c index 9778c6acd53b..ccbaf69afc5b 100644 --- a/net/bluetooth/bnep/sock.c +++ b/net/bluetooth/bnep/sock.c @@ -146,7 +146,7 @@ static int bnep_sock_ioctl(struct socket *sock, unsigned int cmd, unsigned long return 0; } -static struct proto_ops bnep_sock_ops = { +static const struct proto_ops bnep_sock_ops = { .family = PF_BLUETOOTH, .owner = THIS_MODULE, .release = bnep_sock_release, diff --git a/net/bluetooth/cmtp/sock.c b/net/bluetooth/cmtp/sock.c index beb045bf5714..5e22343b6090 100644 --- a/net/bluetooth/cmtp/sock.c +++ b/net/bluetooth/cmtp/sock.c @@ -137,7 +137,7 @@ static int cmtp_sock_ioctl(struct socket *sock, unsigned int cmd, unsigned long return -EINVAL; } -static struct proto_ops cmtp_sock_ops = { +static const struct proto_ops cmtp_sock_ops = { .family = PF_BLUETOOTH, .owner = THIS_MODULE, .release = cmtp_sock_release, diff --git a/net/bluetooth/hci_sock.c b/net/bluetooth/hci_sock.c index 1d6d0a15c099..84e6c93a044a 100644 --- a/net/bluetooth/hci_sock.c +++ b/net/bluetooth/hci_sock.c @@ -575,7 +575,7 @@ static int hci_sock_getsockopt(struct socket *sock, int level, int optname, char return 0; } -static struct proto_ops hci_sock_ops = { +static const struct proto_ops hci_sock_ops = { .family = PF_BLUETOOTH, .owner = THIS_MODULE, .release = hci_sock_release, diff --git a/net/bluetooth/hidp/sock.c b/net/bluetooth/hidp/sock.c index f8986f881431..8f8dd931b294 100644 --- a/net/bluetooth/hidp/sock.c +++ b/net/bluetooth/hidp/sock.c @@ -143,7 +143,7 @@ static int hidp_sock_ioctl(struct socket *sock, unsigned int cmd, unsigned long return -EINVAL; } -static struct proto_ops hidp_sock_ops = { +static const struct proto_ops hidp_sock_ops = { .family = PF_BLUETOOTH, .owner = THIS_MODULE, .release = hidp_sock_release, diff --git a/net/bluetooth/l2cap.c b/net/bluetooth/l2cap.c index 95f33cc7a24e..7f0781e4326f 100644 --- a/net/bluetooth/l2cap.c +++ b/net/bluetooth/l2cap.c @@ -57,7 +57,7 @@ #define VERSION "2.8" -static struct proto_ops l2cap_sock_ops; +static const struct proto_ops l2cap_sock_ops; static struct bt_sock_list l2cap_sk_list = { .lock = RW_LOCK_UNLOCKED @@ -2161,7 +2161,7 @@ static ssize_t l2cap_sysfs_show(struct class *dev, char *buf) static CLASS_ATTR(l2cap, S_IRUGO, l2cap_sysfs_show, NULL); -static struct proto_ops l2cap_sock_ops = { +static const struct proto_ops l2cap_sock_ops = { .family = PF_BLUETOOTH, .owner = THIS_MODULE, .release = l2cap_sock_release, diff --git a/net/bluetooth/rfcomm/sock.c b/net/bluetooth/rfcomm/sock.c index 6c34261b232e..757d2dd3b02f 100644 --- a/net/bluetooth/rfcomm/sock.c +++ b/net/bluetooth/rfcomm/sock.c @@ -58,7 +58,7 @@ #define BT_DBG(D...) #endif -static struct proto_ops rfcomm_sock_ops; +static const struct proto_ops rfcomm_sock_ops; static struct bt_sock_list rfcomm_sk_list = { .lock = RW_LOCK_UNLOCKED @@ -907,7 +907,7 @@ static ssize_t rfcomm_sock_sysfs_show(struct class *dev, char *buf) static CLASS_ATTR(rfcomm, S_IRUGO, rfcomm_sock_sysfs_show, NULL); -static struct proto_ops rfcomm_sock_ops = { +static const struct proto_ops rfcomm_sock_ops = { .family = PF_BLUETOOTH, .owner = THIS_MODULE, .release = rfcomm_sock_release, diff --git a/net/bluetooth/sco.c b/net/bluetooth/sco.c index 648181430699..6b61323ce23c 100644 --- a/net/bluetooth/sco.c +++ b/net/bluetooth/sco.c @@ -56,7 +56,7 @@ #define VERSION "0.5" -static struct proto_ops sco_sock_ops; +static const struct proto_ops sco_sock_ops; static struct bt_sock_list sco_sk_list = { .lock = RW_LOCK_UNLOCKED @@ -914,7 +914,7 @@ static ssize_t sco_sysfs_show(struct class *dev, char *buf) static CLASS_ATTR(sco, S_IRUGO, sco_sysfs_show, NULL); -static struct proto_ops sco_sock_ops = { +static const struct proto_ops sco_sock_ops = { .family = PF_BLUETOOTH, .owner = THIS_MODULE, .release = sco_sock_release, diff --git a/net/dccp/proto.c b/net/dccp/proto.c index 40a4c6899051..e4e629ed9bf7 100644 --- a/net/dccp/proto.c +++ b/net/dccp/proto.c @@ -680,7 +680,7 @@ void dccp_shutdown(struct sock *sk, int how) EXPORT_SYMBOL_GPL(dccp_shutdown); -static struct proto_ops inet_dccp_ops = { +static const struct proto_ops inet_dccp_ops = { .family = PF_INET, .owner = THIS_MODULE, .release = inet_release, diff --git a/net/decnet/af_decnet.c b/net/decnet/af_decnet.c index d402e9020c68..65e3baed0251 100644 --- a/net/decnet/af_decnet.c +++ b/net/decnet/af_decnet.c @@ -149,7 +149,7 @@ static void dn_keepalive(struct sock *sk); #define DN_SK_HASH_MASK (DN_SK_HASH_SIZE - 1) -static struct proto_ops dn_proto_ops; +static const struct proto_ops dn_proto_ops; static DEFINE_RWLOCK(dn_hash_lock); static struct hlist_head dn_sk_hash[DN_SK_HASH_SIZE]; static struct hlist_head dn_wild_sk; @@ -2342,7 +2342,7 @@ static struct net_proto_family dn_family_ops = { .owner = THIS_MODULE, }; -static struct proto_ops dn_proto_ops = { +static const struct proto_ops dn_proto_ops = { .family = AF_DECnet, .owner = THIS_MODULE, .release = dn_release, diff --git a/net/econet/af_econet.c b/net/econet/af_econet.c index 34fdac51df96..ff58f49c8b4a 100644 --- a/net/econet/af_econet.c +++ b/net/econet/af_econet.c @@ -45,7 +45,7 @@ #include #include -static struct proto_ops econet_ops; +static const struct proto_ops econet_ops; static struct hlist_head econet_sklist; static DEFINE_RWLOCK(econet_lock); @@ -698,7 +698,7 @@ static struct net_proto_family econet_family_ops = { .owner = THIS_MODULE, }; -static struct proto_ops SOCKOPS_WRAPPED(econet_ops) = { +static const struct proto_ops SOCKOPS_WRAPPED(econet_ops) = { .family = PF_ECONET, .owner = THIS_MODULE, .release = econet_release, diff --git a/net/ipv4/af_inet.c b/net/ipv4/af_inet.c index 617e858beff1..4ed8a814c6cb 100644 --- a/net/ipv4/af_inet.c +++ b/net/ipv4/af_inet.c @@ -785,7 +785,7 @@ int inet_ioctl(struct socket *sock, unsigned int cmd, unsigned long arg) return err; } -struct proto_ops inet_stream_ops = { +const struct proto_ops inet_stream_ops = { .family = PF_INET, .owner = THIS_MODULE, .release = inet_release, @@ -806,7 +806,7 @@ struct proto_ops inet_stream_ops = { .sendpage = tcp_sendpage }; -struct proto_ops inet_dgram_ops = { +const struct proto_ops inet_dgram_ops = { .family = PF_INET, .owner = THIS_MODULE, .release = inet_release, @@ -831,7 +831,7 @@ struct proto_ops inet_dgram_ops = { * For SOCK_RAW sockets; should be the same as inet_dgram_ops but without * udp_poll */ -static struct proto_ops inet_sockraw_ops = { +static const struct proto_ops inet_sockraw_ops = { .family = PF_INET, .owner = THIS_MODULE, .release = inet_release, diff --git a/net/ipv6/af_inet6.c b/net/ipv6/af_inet6.c index 70a510ff31ee..7c9f19269f21 100644 --- a/net/ipv6/af_inet6.c +++ b/net/ipv6/af_inet6.c @@ -462,7 +462,7 @@ int inet6_ioctl(struct socket *sock, unsigned int cmd, unsigned long arg) return(0); } -struct proto_ops inet6_stream_ops = { +const struct proto_ops inet6_stream_ops = { .family = PF_INET6, .owner = THIS_MODULE, .release = inet6_release, @@ -483,7 +483,7 @@ struct proto_ops inet6_stream_ops = { .sendpage = tcp_sendpage }; -struct proto_ops inet6_dgram_ops = { +const struct proto_ops inet6_dgram_ops = { .family = PF_INET6, .owner = THIS_MODULE, .release = inet6_release, @@ -511,7 +511,7 @@ static struct net_proto_family inet6_family_ops = { }; /* Same as inet6_dgram_ops, sans udp_poll. */ -static struct proto_ops inet6_sockraw_ops = { +static const struct proto_ops inet6_sockraw_ops = { .family = PF_INET6, .owner = THIS_MODULE, .release = inet6_release, diff --git a/net/ipx/af_ipx.c b/net/ipx/af_ipx.c index 34b3bb868409..6c464c11bb09 100644 --- a/net/ipx/af_ipx.c +++ b/net/ipx/af_ipx.c @@ -75,7 +75,7 @@ static struct datalink_proto *pEII_datalink; static struct datalink_proto *p8023_datalink; static struct datalink_proto *pSNAP_datalink; -static struct proto_ops ipx_dgram_ops; +static const struct proto_ops ipx_dgram_ops; LIST_HEAD(ipx_interfaces); DEFINE_SPINLOCK(ipx_interfaces_lock); @@ -1901,7 +1901,7 @@ static struct net_proto_family ipx_family_ops = { .owner = THIS_MODULE, }; -static struct proto_ops SOCKOPS_WRAPPED(ipx_dgram_ops) = { +static const struct proto_ops SOCKOPS_WRAPPED(ipx_dgram_ops) = { .family = PF_IPX, .owner = THIS_MODULE, .release = ipx_release, diff --git a/net/irda/af_irda.c b/net/irda/af_irda.c index f121f7de2032..e57683d424f7 100644 --- a/net/irda/af_irda.c +++ b/net/irda/af_irda.c @@ -62,12 +62,12 @@ static int irda_create(struct socket *sock, int protocol); -static struct proto_ops irda_stream_ops; -static struct proto_ops irda_seqpacket_ops; -static struct proto_ops irda_dgram_ops; +static const struct proto_ops irda_stream_ops; +static const struct proto_ops irda_seqpacket_ops; +static const struct proto_ops irda_dgram_ops; #ifdef CONFIG_IRDA_ULTRA -static struct proto_ops irda_ultra_ops; +static const struct proto_ops irda_ultra_ops; #define ULTRA_MAX_DATA 382 #endif /* CONFIG_IRDA_ULTRA */ @@ -2464,7 +2464,7 @@ static struct net_proto_family irda_family_ops = { .owner = THIS_MODULE, }; -static struct proto_ops SOCKOPS_WRAPPED(irda_stream_ops) = { +static const struct proto_ops SOCKOPS_WRAPPED(irda_stream_ops) = { .family = PF_IRDA, .owner = THIS_MODULE, .release = irda_release, @@ -2485,7 +2485,7 @@ static struct proto_ops SOCKOPS_WRAPPED(irda_stream_ops) = { .sendpage = sock_no_sendpage, }; -static struct proto_ops SOCKOPS_WRAPPED(irda_seqpacket_ops) = { +static const struct proto_ops SOCKOPS_WRAPPED(irda_seqpacket_ops) = { .family = PF_IRDA, .owner = THIS_MODULE, .release = irda_release, @@ -2506,7 +2506,7 @@ static struct proto_ops SOCKOPS_WRAPPED(irda_seqpacket_ops) = { .sendpage = sock_no_sendpage, }; -static struct proto_ops SOCKOPS_WRAPPED(irda_dgram_ops) = { +static const struct proto_ops SOCKOPS_WRAPPED(irda_dgram_ops) = { .family = PF_IRDA, .owner = THIS_MODULE, .release = irda_release, @@ -2528,7 +2528,7 @@ static struct proto_ops SOCKOPS_WRAPPED(irda_dgram_ops) = { }; #ifdef CONFIG_IRDA_ULTRA -static struct proto_ops SOCKOPS_WRAPPED(irda_ultra_ops) = { +static const struct proto_ops SOCKOPS_WRAPPED(irda_ultra_ops) = { .family = PF_IRDA, .owner = THIS_MODULE, .release = irda_release, diff --git a/net/key/af_key.c b/net/key/af_key.c index d32f7791f1e4..52efd04cbedb 100644 --- a/net/key/af_key.c +++ b/net/key/af_key.c @@ -113,7 +113,7 @@ static __inline__ void pfkey_unlock_table(void) } -static struct proto_ops pfkey_ops; +static const struct proto_ops pfkey_ops; static void pfkey_insert(struct sock *sk) { @@ -3127,7 +3127,7 @@ out: return err; } -static struct proto_ops pfkey_ops = { +static const struct proto_ops pfkey_ops = { .family = PF_KEY, .owner = THIS_MODULE, /* Operations that make no sense on pfkey sockets. */ diff --git a/net/llc/af_llc.c b/net/llc/af_llc.c index b6d3df5c911c..9cf65f9d8902 100644 --- a/net/llc/af_llc.c +++ b/net/llc/af_llc.c @@ -36,7 +36,7 @@ static u16 llc_ui_sap_last_autoport = LLC_SAP_DYN_START; static u16 llc_ui_sap_link_no_max[256]; static struct sockaddr_llc llc_ui_addrnull; -static struct proto_ops llc_ui_ops; +static const struct proto_ops llc_ui_ops; static int llc_ui_wait_for_conn(struct sock *sk, long timeout); static int llc_ui_wait_for_disc(struct sock *sk, long timeout); @@ -1098,7 +1098,7 @@ static struct net_proto_family llc_ui_family_ops = { .owner = THIS_MODULE, }; -static struct proto_ops llc_ui_ops = { +static const struct proto_ops llc_ui_ops = { .family = PF_LLC, .owner = THIS_MODULE, .release = llc_ui_release, diff --git a/net/netlink/af_netlink.c b/net/netlink/af_netlink.c index 96020d7087e8..7849cac14d3a 100644 --- a/net/netlink/af_netlink.c +++ b/net/netlink/af_netlink.c @@ -293,7 +293,7 @@ static inline int nl_pid_hash_dilute(struct nl_pid_hash *hash, int len) return 0; } -static struct proto_ops netlink_ops; +static const struct proto_ops netlink_ops; static int netlink_insert(struct sock *sk, u32 pid) { @@ -1656,7 +1656,7 @@ int netlink_unregister_notifier(struct notifier_block *nb) return notifier_chain_unregister(&netlink_chain, nb); } -static struct proto_ops netlink_ops = { +static const struct proto_ops netlink_ops = { .family = PF_NETLINK, .owner = THIS_MODULE, .release = netlink_release, diff --git a/net/netrom/af_netrom.c b/net/netrom/af_netrom.c index e5d82d711cae..05b653c05971 100644 --- a/net/netrom/af_netrom.c +++ b/net/netrom/af_netrom.c @@ -63,7 +63,7 @@ static unsigned short circuit = 0x101; static HLIST_HEAD(nr_list); static DEFINE_SPINLOCK(nr_list_lock); -static struct proto_ops nr_proto_ops; +static const struct proto_ops nr_proto_ops; /* * Socket removal during an interrupt is now safe. @@ -1337,7 +1337,7 @@ static struct net_proto_family nr_family_ops = { .owner = THIS_MODULE, }; -static struct proto_ops nr_proto_ops = { +static const struct proto_ops nr_proto_ops = { .family = PF_NETROM, .owner = THIS_MODULE, .release = nr_release, diff --git a/net/packet/af_packet.c b/net/packet/af_packet.c index 3e2462760413..deda6fdb1e53 100644 --- a/net/packet/af_packet.c +++ b/net/packet/af_packet.c @@ -251,10 +251,10 @@ static void packet_sock_destruct(struct sock *sk) } -static struct proto_ops packet_ops; +static const struct proto_ops packet_ops; #ifdef CONFIG_SOCK_PACKET -static struct proto_ops packet_ops_spkt; +static const struct proto_ops packet_ops_spkt; static int packet_rcv_spkt(struct sk_buff *skb, struct net_device *dev, struct packet_type *pt, struct net_device *orig_dev) { @@ -1784,7 +1784,7 @@ out: #ifdef CONFIG_SOCK_PACKET -static struct proto_ops packet_ops_spkt = { +static const struct proto_ops packet_ops_spkt = { .family = PF_PACKET, .owner = THIS_MODULE, .release = packet_release, @@ -1806,7 +1806,7 @@ static struct proto_ops packet_ops_spkt = { }; #endif -static struct proto_ops packet_ops = { +static const struct proto_ops packet_ops = { .family = PF_PACKET, .owner = THIS_MODULE, .release = packet_release, diff --git a/net/sctp/ipv6.c b/net/sctp/ipv6.c index fa3be2b8fb5f..15c05165c905 100644 --- a/net/sctp/ipv6.c +++ b/net/sctp/ipv6.c @@ -866,7 +866,7 @@ static int sctp_inet6_supported_addrs(const struct sctp_sock *opt, return 2; } -static struct proto_ops inet6_seqpacket_ops = { +static const struct proto_ops inet6_seqpacket_ops = { .family = PF_INET6, .owner = THIS_MODULE, .release = inet6_release, diff --git a/net/sctp/protocol.c b/net/sctp/protocol.c index f775d78aa59d..d1b0747a5b9d 100644 --- a/net/sctp/protocol.c +++ b/net/sctp/protocol.c @@ -829,7 +829,7 @@ static struct notifier_block sctp_inetaddr_notifier = { }; /* Socket operations. */ -static struct proto_ops inet_seqpacket_ops = { +static const struct proto_ops inet_seqpacket_ops = { .family = PF_INET, .owner = THIS_MODULE, .release = inet_release, /* Needs to be wrapped... */ diff --git a/net/sunrpc/svcsock.c b/net/sunrpc/svcsock.c index c6a51911e71e..d68eba481291 100644 --- a/net/sunrpc/svcsock.c +++ b/net/sunrpc/svcsock.c @@ -758,7 +758,7 @@ svc_tcp_accept(struct svc_sock *svsk) struct svc_serv *serv = svsk->sk_server; struct socket *sock = svsk->sk_sock; struct socket *newsock; - struct proto_ops *ops; + const struct proto_ops *ops; struct svc_sock *newsvsk; int err, slen; diff --git a/net/unix/af_unix.c b/net/unix/af_unix.c index 04e850e04e3d..7d3fe6aebcdb 100644 --- a/net/unix/af_unix.c +++ b/net/unix/af_unix.c @@ -473,7 +473,7 @@ static int unix_dgram_connect(struct socket *, struct sockaddr *, static int unix_seqpacket_sendmsg(struct kiocb *, struct socket *, struct msghdr *, size_t); -static struct proto_ops unix_stream_ops = { +static const struct proto_ops unix_stream_ops = { .family = PF_UNIX, .owner = THIS_MODULE, .release = unix_release, @@ -494,7 +494,7 @@ static struct proto_ops unix_stream_ops = { .sendpage = sock_no_sendpage, }; -static struct proto_ops unix_dgram_ops = { +static const struct proto_ops unix_dgram_ops = { .family = PF_UNIX, .owner = THIS_MODULE, .release = unix_release, @@ -515,7 +515,7 @@ static struct proto_ops unix_dgram_ops = { .sendpage = sock_no_sendpage, }; -static struct proto_ops unix_seqpacket_ops = { +static const struct proto_ops unix_seqpacket_ops = { .family = PF_UNIX, .owner = THIS_MODULE, .release = unix_release, diff --git a/net/wanrouter/af_wanpipe.c b/net/wanrouter/af_wanpipe.c index 59fec59b2132..67948bf22dc4 100644 --- a/net/wanrouter/af_wanpipe.c +++ b/net/wanrouter/af_wanpipe.c @@ -181,7 +181,7 @@ struct wanpipe_opt #endif static int sk_count; -extern struct proto_ops wanpipe_ops; +extern const struct proto_ops wanpipe_ops; static unsigned long find_free_critical; static void wanpipe_unlink_driver(struct sock *sk); @@ -2546,7 +2546,7 @@ static int wanpipe_connect(struct socket *sock, struct sockaddr *uaddr, int addr return 0; } -struct proto_ops wanpipe_ops = { +const struct proto_ops wanpipe_ops = { .family = PF_WANPIPE, .owner = THIS_MODULE, .release = wanpipe_release, diff --git a/net/x25/af_x25.c b/net/x25/af_x25.c index 020d73cc8414..ca8b3b0b920d 100644 --- a/net/x25/af_x25.c +++ b/net/x25/af_x25.c @@ -64,7 +64,7 @@ int sysctl_x25_ack_holdback_timeout = X25_DEFAULT_T2; HLIST_HEAD(x25_list); DEFINE_RWLOCK(x25_list_lock); -static struct proto_ops x25_proto_ops; +static const struct proto_ops x25_proto_ops; static struct x25_address null_x25_address = {" "}; @@ -1391,7 +1391,7 @@ static struct net_proto_family x25_family_ops = { .owner = THIS_MODULE, }; -static struct proto_ops SOCKOPS_WRAPPED(x25_proto_ops) = { +static const struct proto_ops SOCKOPS_WRAPPED(x25_proto_ops) = { .family = AF_X25, .owner = THIS_MODULE, .release = x25_release, -- cgit v1.2.3-71-gd317 From 14c850212ed8f8cbb5972ad6b8812e08a0bc901c Mon Sep 17 00:00:00 2001 From: Arnaldo Carvalho de Melo Date: Tue, 27 Dec 2005 02:43:12 -0200 Subject: [INET_SOCK]: Move struct inet_sock & helper functions to net/inet_sock.h To help in reducing the number of include dependencies, several files were touched as they were getting needed headers indirectly for stuff they use. Thanks also to Alan Menegotto for pointing out that net/dccp/proto.c had linux/dccp.h include twice. Signed-off-by: Arnaldo Carvalho de Melo Signed-off-by: David S. Miller --- drivers/infiniband/ulp/ipoib/ipoib_main.c | 2 + drivers/infiniband/ulp/ipoib/ipoib_multicast.c | 2 + drivers/net/ns83820.c | 1 + drivers/net/sk98lin/skge.c | 1 + drivers/net/skge.c | 1 + drivers/net/tg3.c | 1 + fs/9p/trans_sock.c | 1 + fs/nfs/callback.c | 3 + include/linux/dccp.h | 3 +- include/linux/ip.h | 126 +--------------- include/linux/ipv6.h | 7 +- include/linux/udp.h | 6 +- include/net/atmclip.h | 2 +- include/net/dst.h | 1 + include/net/icmp.h | 9 +- include/net/ieee80211_crypt.h | 9 +- include/net/inet_connection_sock.h | 3 +- include/net/inet_ecn.h | 2 + include/net/inet_hashtables.h | 21 +-- include/net/inet_sock.h | 193 +++++++++++++++++++++++++ include/net/inet_timewait_sock.h | 2 +- include/net/ip.h | 17 ++- include/net/ip_fib.h | 2 + include/net/ip_vs.h | 12 +- include/net/ipv6.h | 3 + include/net/ndisc.h | 17 ++- include/net/neighbour.h | 2 +- include/net/pkt_act.h | 1 - include/net/raw.h | 2 + include/net/udp.h | 4 +- include/net/xfrm.h | 3 +- net/bridge/br_netfilter.c | 4 + net/bridge/netfilter/ebt_log.c | 1 + net/core/netpoll.c | 1 + net/dccp/ccid.h | 2 + net/dccp/ipv4.c | 1 + net/dccp/ipv6.c | 1 + net/dccp/output.c | 1 + net/dccp/proto.c | 3 +- net/econet/af_econet.c | 1 + net/ipv4/af_inet.c | 1 + net/ipv4/ah4.c | 1 + net/ipv4/arp.c | 1 + net/ipv4/devinet.c | 1 + net/ipv4/esp4.c | 1 + net/ipv4/fib_frontend.c | 1 + net/ipv4/fib_hash.c | 1 + net/ipv4/fib_rules.c | 1 + net/ipv4/fib_semantics.c | 2 + net/ipv4/icmp.c | 1 + net/ipv4/igmp.c | 2 + net/ipv4/ip_input.c | 1 + net/ipv4/ip_options.c | 1 + net/ipv4/ip_sockglue.c | 1 + net/ipv4/ipcomp.c | 1 + net/ipv4/ipconfig.c | 2 + net/ipv4/ipmr.c | 1 + net/ipv4/ipvs/ip_vs_conn.c | 2 + net/ipv4/ipvs/ip_vs_ctl.c | 1 + net/ipv4/ipvs/ip_vs_dh.c | 2 + net/ipv4/ipvs/ip_vs_est.c | 3 + net/ipv4/ipvs/ip_vs_lblc.c | 2 + net/ipv4/ipvs/ip_vs_lblcr.c | 2 + net/ipv4/ipvs/ip_vs_proto_ah.c | 2 + net/ipv4/ipvs/ip_vs_proto_esp.c | 2 + net/ipv4/ipvs/ip_vs_proto_udp.c | 3 + net/ipv4/ipvs/ip_vs_sh.c | 2 + net/ipv4/ipvs/ip_vs_sync.c | 2 + net/ipv4/netfilter/ip_conntrack_amanda.c | 2 + net/ipv4/netfilter/ip_conntrack_proto_gre.c | 1 + net/ipv4/netfilter/ip_conntrack_proto_udp.c | 1 + net/ipv4/netfilter/ip_conntrack_standalone.c | 1 + net/ipv4/netfilter/ip_nat_snmp_basic.c | 2 + net/ipv4/netfilter/ipt_MASQUERADE.c | 2 + net/ipv4/netfilter/ipt_physdev.c | 1 + net/ipv4/proc.c | 1 + net/ipv4/sysctl_net_ipv4.c | 1 + net/ipv4/udp.c | 1 + net/ipv6/ah6.c | 1 + net/ipv6/esp6.c | 1 + net/ipv6/ipcomp6.c | 1 + net/ipv6/netfilter/ip6_tables.c | 1 + net/ipv6/netfilter/ip6t_LOG.c | 1 + net/ipv6/netfilter/ip6t_ah.c | 1 + net/ipv6/netfilter/ip6t_esp.c | 1 + net/sched/sch_teql.c | 1 + net/sctp/protocol.c | 1 + 87 files changed, 355 insertions(+), 184 deletions(-) create mode 100644 include/net/inet_sock.h (limited to 'include/linux') diff --git a/drivers/infiniband/ulp/ipoib/ipoib_main.c b/drivers/infiniband/ulp/ipoib/ipoib_main.c index 475d98fa9e26..780009c7eaa6 100644 --- a/drivers/infiniband/ulp/ipoib/ipoib_main.c +++ b/drivers/infiniband/ulp/ipoib/ipoib_main.c @@ -47,6 +47,8 @@ #include #include +#include + MODULE_AUTHOR("Roland Dreier"); MODULE_DESCRIPTION("IP-over-InfiniBand net driver"); MODULE_LICENSE("Dual BSD/GPL"); diff --git a/drivers/infiniband/ulp/ipoib/ipoib_multicast.c b/drivers/infiniband/ulp/ipoib/ipoib_multicast.c index ef3ee035bbc8..ed0c2ead8bc1 100644 --- a/drivers/infiniband/ulp/ipoib/ipoib_multicast.c +++ b/drivers/infiniband/ulp/ipoib/ipoib_multicast.c @@ -43,6 +43,8 @@ #include #include +#include + #include "ipoib.h" #ifdef CONFIG_INFINIBAND_IPOIB_DEBUG diff --git a/drivers/net/ns83820.c b/drivers/net/ns83820.c index f857ae94d261..b0c3b6ab6263 100644 --- a/drivers/net/ns83820.c +++ b/drivers/net/ns83820.c @@ -115,6 +115,7 @@ #include #include #include +#include #include #include diff --git a/drivers/net/sk98lin/skge.c b/drivers/net/sk98lin/skge.c index ae7343934758..e1a2d52cc1fe 100644 --- a/drivers/net/sk98lin/skge.c +++ b/drivers/net/sk98lin/skge.c @@ -107,6 +107,7 @@ #include "h/skversion.h" +#include #include #include #include diff --git a/drivers/net/skge.c b/drivers/net/skge.c index 00d683063c01..d8cc3aea032a 100644 --- a/drivers/net/skge.c +++ b/drivers/net/skge.c @@ -25,6 +25,7 @@ */ #include +#include #include #include #include diff --git a/drivers/net/tg3.c b/drivers/net/tg3.c index 2fc9893d69e1..59d916ccc810 100644 --- a/drivers/net/tg3.c +++ b/drivers/net/tg3.c @@ -24,6 +24,7 @@ #include #include #include +#include #include #include #include diff --git a/fs/9p/trans_sock.c b/fs/9p/trans_sock.c index a93c2bf94c33..6a9a75d40f73 100644 --- a/fs/9p/trans_sock.c +++ b/fs/9p/trans_sock.c @@ -26,6 +26,7 @@ */ #include +#include #include #include #include diff --git a/fs/nfs/callback.c b/fs/nfs/callback.c index f2ca782aba33..30cae3602867 100644 --- a/fs/nfs/callback.c +++ b/fs/nfs/callback.c @@ -14,6 +14,9 @@ #include #include #include + +#include + #include "nfs4_fs.h" #include "callback.h" diff --git a/include/linux/dccp.h b/include/linux/dccp.h index d0bdb499cf8d..088529f54965 100644 --- a/include/linux/dccp.h +++ b/include/linux/dccp.h @@ -192,10 +192,9 @@ enum { #include #include +#include #include -#include #include -#include enum dccp_state { DCCP_OPEN = TCP_ESTABLISHED, diff --git a/include/linux/ip.h b/include/linux/ip.h index 6ccc596c19c8..9e2eb9a602eb 100644 --- a/include/linux/ip.h +++ b/include/linux/ip.h @@ -16,6 +16,7 @@ */ #ifndef _LINUX_IP_H #define _LINUX_IP_H +#include #include #define IPTOS_TOS_MASK 0x1E @@ -78,131 +79,6 @@ #define IPOPT_TS_TSANDADDR 1 /* timestamps and addresses */ #define IPOPT_TS_PRESPEC 3 /* specified modules only */ -#ifdef __KERNEL__ -#include -#include -#include -#include -#include -#include - -struct ip_options { - __u32 faddr; /* Saved first hop address */ - unsigned char optlen; - unsigned char srr; - unsigned char rr; - unsigned char ts; - unsigned char is_setbyuser:1, /* Set by setsockopt? */ - is_data:1, /* Options in __data, rather than skb */ - is_strictroute:1, /* Strict source route */ - srr_is_hit:1, /* Packet destination addr was our one */ - is_changed:1, /* IP checksum more not valid */ - rr_needaddr:1, /* Need to record addr of outgoing dev */ - ts_needtime:1, /* Need to record timestamp */ - ts_needaddr:1; /* Need to record addr of outgoing dev */ - unsigned char router_alert; - unsigned char __pad1; - unsigned char __pad2; - unsigned char __data[0]; -}; - -#define optlength(opt) (sizeof(struct ip_options) + opt->optlen) - -struct inet_request_sock { - struct request_sock req; -#if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE) - u16 inet6_rsk_offset; - /* 2 bytes hole, try to pack */ -#endif - u32 loc_addr; - u32 rmt_addr; - u16 rmt_port; - u16 snd_wscale : 4, - rcv_wscale : 4, - tstamp_ok : 1, - sack_ok : 1, - wscale_ok : 1, - ecn_ok : 1, - acked : 1; - struct ip_options *opt; -}; - -static inline struct inet_request_sock *inet_rsk(const struct request_sock *sk) -{ - return (struct inet_request_sock *)sk; -} - -struct ipv6_pinfo; - -struct inet_sock { - /* sk and pinet6 has to be the first two members of inet_sock */ - struct sock sk; -#if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE) - struct ipv6_pinfo *pinet6; -#endif - /* Socket demultiplex comparisons on incoming packets. */ - __u32 daddr; /* Foreign IPv4 addr */ - __u32 rcv_saddr; /* Bound local IPv4 addr */ - __u16 dport; /* Destination port */ - __u16 num; /* Local port */ - __u32 saddr; /* Sending source */ - __s16 uc_ttl; /* Unicast TTL */ - __u16 cmsg_flags; - struct ip_options *opt; - __u16 sport; /* Source port */ - __u16 id; /* ID counter for DF pkts */ - __u8 tos; /* TOS */ - __u8 mc_ttl; /* Multicasting TTL */ - __u8 pmtudisc; - unsigned recverr : 1, - is_icsk : 1, /* inet_connection_sock? */ - freebind : 1, - hdrincl : 1, - mc_loop : 1; - int mc_index; /* Multicast device index */ - __u32 mc_addr; - struct ip_mc_socklist *mc_list; /* Group array */ - /* - * Following members are used to retain the infomation to build - * an ip header on each ip fragmentation while the socket is corked. - */ - struct { - unsigned int flags; - unsigned int fragsize; - struct ip_options *opt; - struct rtable *rt; - int length; /* Total length of all frames */ - u32 addr; - struct flowi fl; - } cork; -}; - -#define IPCORK_OPT 1 /* ip-options has been held in ipcork.opt */ -#define IPCORK_ALLFRAG 2 /* always fragment (for ipv6 for now) */ - -static inline struct inet_sock *inet_sk(const struct sock *sk) -{ - return (struct inet_sock *)sk; -} - -static inline void __inet_sk_copy_descendant(struct sock *sk_to, - const struct sock *sk_from, - const int ancestor_size) -{ - memcpy(inet_sk(sk_to) + 1, inet_sk(sk_from) + 1, - sk_from->sk_prot->obj_size - ancestor_size); -} -#if !(defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE)) -static inline void inet_sk_copy_descendant(struct sock *sk_to, - const struct sock *sk_from) -{ - __inet_sk_copy_descendant(sk_to, sk_from, sizeof(struct inet_sock)); -} -#endif -#endif - -extern int inet_sk_rebuild_header(struct sock *sk); - struct iphdr { #if defined(__LITTLE_ENDIAN_BITFIELD) __u8 ihl:4, diff --git a/include/linux/ipv6.h b/include/linux/ipv6.h index a0d04891fe12..93bbed5c6cf4 100644 --- a/include/linux/ipv6.h +++ b/include/linux/ipv6.h @@ -171,12 +171,13 @@ enum { }; #ifdef __KERNEL__ -#include /* struct sockaddr_in6 */ #include -#include /* struct ipv6_mc_socklist */ #include #include +#include /* struct ipv6_mc_socklist */ +#include + /* This structure contains results of exthdrs parsing as offsets from skb->nh. @@ -346,8 +347,6 @@ static inline void inet_sk_copy_descendant(struct sock *sk_to, #define __ipv6_only_sock(sk) (inet6_sk(sk)->ipv6only) #define ipv6_only_sock(sk) ((sk)->sk_family == PF_INET6 && __ipv6_only_sock(sk)) -#include - struct inet6_timewait_sock { struct in6_addr tw_v6_daddr; struct in6_addr tw_v6_rcv_saddr; diff --git a/include/linux/udp.h b/include/linux/udp.h index b60e0b4a25c4..85a55658831c 100644 --- a/include/linux/udp.h +++ b/include/linux/udp.h @@ -35,10 +35,10 @@ struct udphdr { #define UDP_ENCAP_ESPINUDP 2 /* draft-ietf-ipsec-udp-encaps-06 */ #ifdef __KERNEL__ - #include -#include -#include +#include + +#include struct udp_sock { /* inet_sock has to be the first member */ diff --git a/include/net/atmclip.h b/include/net/atmclip.h index 47048b1d179a..90fcc98e676f 100644 --- a/include/net/atmclip.h +++ b/include/net/atmclip.h @@ -7,7 +7,6 @@ #define _ATMCLIP_H #include -#include #include #include #include @@ -18,6 +17,7 @@ #define CLIP_VCC(vcc) ((struct clip_vcc *) ((vcc)->user_back)) #define NEIGH2ENTRY(neigh) ((struct atmarp_entry *) (neigh)->primary_key) +struct sk_buff; struct clip_vcc { struct atm_vcc *vcc; /* VCC descriptor */ diff --git a/include/net/dst.h b/include/net/dst.h index 6c196a5baf24..bee8b84d329d 100644 --- a/include/net/dst.h +++ b/include/net/dst.h @@ -9,6 +9,7 @@ #define _NET_DST_H #include +#include #include #include #include diff --git a/include/net/icmp.h b/include/net/icmp.h index 6cdebeee5f96..e7c3f20fbafc 100644 --- a/include/net/icmp.h +++ b/include/net/icmp.h @@ -20,12 +20,9 @@ #include #include -#include -#include -#include +#include #include -#include struct icmp_err { int errno; @@ -38,6 +35,10 @@ DECLARE_SNMP_STAT(struct icmp_mib, icmp_statistics); #define ICMP_INC_STATS_BH(field) SNMP_INC_STATS_BH(icmp_statistics, field) #define ICMP_INC_STATS_USER(field) SNMP_INC_STATS_USER(icmp_statistics, field) +struct dst_entry; +struct net_proto_family; +struct sk_buff; + extern void icmp_send(struct sk_buff *skb_in, int type, int code, u32 info); extern int icmp_rcv(struct sk_buff *skb); extern int icmp_ioctl(struct sock *sk, int cmd, unsigned long arg); diff --git a/include/net/ieee80211_crypt.h b/include/net/ieee80211_crypt.h index 225fc751d464..03b766afdc39 100644 --- a/include/net/ieee80211_crypt.h +++ b/include/net/ieee80211_crypt.h @@ -23,12 +23,17 @@ #ifndef IEEE80211_CRYPT_H #define IEEE80211_CRYPT_H -#include +#include +#include +#include enum { IEEE80211_CRYPTO_TKIP_COUNTERMEASURES = (1 << 0), }; +struct sk_buff; +struct module; + struct ieee80211_crypto_ops { const char *name; struct list_head list; @@ -87,6 +92,8 @@ struct ieee80211_crypt_data { atomic_t refcnt; }; +struct ieee80211_device; + int ieee80211_register_crypto_ops(struct ieee80211_crypto_ops *ops); int ieee80211_unregister_crypto_ops(struct ieee80211_crypto_ops *ops); struct ieee80211_crypto_ops *ieee80211_get_crypto_ops(const char *name); diff --git a/include/net/inet_connection_sock.h b/include/net/inet_connection_sock.h index 91888967d3e3..50234fa56a68 100644 --- a/include/net/inet_connection_sock.h +++ b/include/net/inet_connection_sock.h @@ -16,9 +16,10 @@ #define _INET_CONNECTION_SOCK_H #include -#include #include #include + +#include #include #define INET_CSK_DEBUG 1 diff --git a/include/net/inet_ecn.h b/include/net/inet_ecn.h index b0c47e2eccf1..d599c6bfbb86 100644 --- a/include/net/inet_ecn.h +++ b/include/net/inet_ecn.h @@ -3,6 +3,8 @@ #include #include + +#include #include enum { diff --git a/include/net/inet_hashtables.h b/include/net/inet_hashtables.h index c83baa79f66e..135d80fd658e 100644 --- a/include/net/inet_hashtables.h +++ b/include/net/inet_hashtables.h @@ -26,6 +26,7 @@ #include #include +#include #include #include #include @@ -128,26 +129,6 @@ struct inet_hashinfo { kmem_cache_t *bind_bucket_cachep; }; -static inline unsigned int inet_ehashfn(const __u32 laddr, const __u16 lport, - const __u32 faddr, const __u16 fport) -{ - unsigned int h = (laddr ^ lport) ^ (faddr ^ fport); - h ^= h >> 16; - h ^= h >> 8; - return h; -} - -static inline int inet_sk_ehashfn(const struct sock *sk) -{ - const struct inet_sock *inet = inet_sk(sk); - const __u32 laddr = inet->rcv_saddr; - const __u16 lport = inet->num; - const __u32 faddr = inet->daddr; - const __u16 fport = inet->dport; - - return inet_ehashfn(laddr, lport, faddr, fport); -} - static inline struct inet_ehash_bucket *inet_ehash_bucket( struct inet_hashinfo *hashinfo, unsigned int hash) diff --git a/include/net/inet_sock.h b/include/net/inet_sock.h new file mode 100644 index 000000000000..883eb529ef8e --- /dev/null +++ b/include/net/inet_sock.h @@ -0,0 +1,193 @@ +/* + * INET An implementation of the TCP/IP protocol suite for the LINUX + * operating system. INET is implemented using the BSD Socket + * interface as the means of communication with the user level. + * + * Definitions for inet_sock + * + * Authors: Many, reorganised here by + * Arnaldo Carvalho de Melo + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version + * 2 of the License, or (at your option) any later version. + */ +#ifndef _INET_SOCK_H +#define _INET_SOCK_H + +#include + +#include +#include + +#include +#include +#include + +/** struct ip_options - IP Options + * + * @faddr - Saved first hop address + * @is_setbyuser - Set by setsockopt? + * @is_data - Options in __data, rather than skb + * @is_strictroute - Strict source route + * @srr_is_hit - Packet destination addr was our one + * @is_changed - IP checksum more not valid + * @rr_needaddr - Need to record addr of outgoing dev + * @ts_needtime - Need to record timestamp + * @ts_needaddr - Need to record addr of outgoing dev + */ +struct ip_options { + __u32 faddr; + unsigned char optlen; + unsigned char srr; + unsigned char rr; + unsigned char ts; + unsigned char is_setbyuser:1, + is_data:1, + is_strictroute:1, + srr_is_hit:1, + is_changed:1, + rr_needaddr:1, + ts_needtime:1, + ts_needaddr:1; + unsigned char router_alert; + unsigned char __pad1; + unsigned char __pad2; + unsigned char __data[0]; +}; + +#define optlength(opt) (sizeof(struct ip_options) + opt->optlen) + +struct inet_request_sock { + struct request_sock req; +#if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE) + u16 inet6_rsk_offset; + /* 2 bytes hole, try to pack */ +#endif + u32 loc_addr; + u32 rmt_addr; + u16 rmt_port; + u16 snd_wscale : 4, + rcv_wscale : 4, + tstamp_ok : 1, + sack_ok : 1, + wscale_ok : 1, + ecn_ok : 1, + acked : 1; + struct ip_options *opt; +}; + +static inline struct inet_request_sock *inet_rsk(const struct request_sock *sk) +{ + return (struct inet_request_sock *)sk; +} + +struct ip_mc_socklist; +struct ipv6_pinfo; +struct rtable; + +/** struct inet_sock - representation of INET sockets + * + * @sk - ancestor class + * @pinet6 - pointer to IPv6 control block + * @daddr - Foreign IPv4 addr + * @rcv_saddr - Bound local IPv4 addr + * @dport - Destination port + * @num - Local port + * @saddr - Sending source + * @uc_ttl - Unicast TTL + * @sport - Source port + * @id - ID counter for DF pkts + * @tos - TOS + * @mc_ttl - Multicasting TTL + * @is_icsk - is this an inet_connection_sock? + * @mc_index - Multicast device index + * @mc_list - Group array + * @cork - info to build ip hdr on each ip frag while socket is corked + */ +struct inet_sock { + /* sk and pinet6 has to be the first two members of inet_sock */ + struct sock sk; +#if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE) + struct ipv6_pinfo *pinet6; +#endif + /* Socket demultiplex comparisons on incoming packets. */ + __u32 daddr; + __u32 rcv_saddr; + __u16 dport; + __u16 num; + __u32 saddr; + __s16 uc_ttl; + __u16 cmsg_flags; + struct ip_options *opt; + __u16 sport; + __u16 id; + __u8 tos; + __u8 mc_ttl; + __u8 pmtudisc; + __u8 recverr:1, + is_icsk:1, + freebind:1, + hdrincl:1, + mc_loop:1; + int mc_index; + __u32 mc_addr; + struct ip_mc_socklist *mc_list; + struct { + unsigned int flags; + unsigned int fragsize; + struct ip_options *opt; + struct rtable *rt; + int length; /* Total length of all frames */ + u32 addr; + struct flowi fl; + } cork; +}; + +#define IPCORK_OPT 1 /* ip-options has been held in ipcork.opt */ +#define IPCORK_ALLFRAG 2 /* always fragment (for ipv6 for now) */ + +static inline struct inet_sock *inet_sk(const struct sock *sk) +{ + return (struct inet_sock *)sk; +} + +static inline void __inet_sk_copy_descendant(struct sock *sk_to, + const struct sock *sk_from, + const int ancestor_size) +{ + memcpy(inet_sk(sk_to) + 1, inet_sk(sk_from) + 1, + sk_from->sk_prot->obj_size - ancestor_size); +} +#if !(defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE)) +static inline void inet_sk_copy_descendant(struct sock *sk_to, + const struct sock *sk_from) +{ + __inet_sk_copy_descendant(sk_to, sk_from, sizeof(struct inet_sock)); +} +#endif + +extern int inet_sk_rebuild_header(struct sock *sk); + +static inline unsigned int inet_ehashfn(const __u32 laddr, const __u16 lport, + const __u32 faddr, const __u16 fport) +{ + unsigned int h = (laddr ^ lport) ^ (faddr ^ fport); + h ^= h >> 16; + h ^= h >> 8; + return h; +} + +static inline int inet_sk_ehashfn(const struct sock *sk) +{ + const struct inet_sock *inet = inet_sk(sk); + const __u32 laddr = inet->rcv_saddr; + const __u16 lport = inet->num; + const __u32 faddr = inet->daddr; + const __u16 fport = inet->dport; + + return inet_ehashfn(laddr, lport, faddr, fport); +} + +#endif /* _INET_SOCK_H */ diff --git a/include/net/inet_timewait_sock.h b/include/net/inet_timewait_sock.h index e396a65473d7..1da294c47522 100644 --- a/include/net/inet_timewait_sock.h +++ b/include/net/inet_timewait_sock.h @@ -17,13 +17,13 @@ #include -#include #include #include #include #include #include +#include #include #include #include diff --git a/include/net/ip.h b/include/net/ip.h index 4d6294ba038e..f7e7fd728b67 100644 --- a/include/net/ip.h +++ b/include/net/ip.h @@ -24,14 +24,10 @@ #include #include -#include #include #include -#include -#include -#include -#include -#include + +#include #include struct sock; @@ -75,6 +71,13 @@ extern rwlock_t ip_ra_lock; #define IP_FRAG_TIME (30 * HZ) /* fragment lifetime */ +struct msghdr; +struct net_device; +struct packet_type; +struct rtable; +struct sk_buff; +struct sockaddr; + extern void ip_mc_dropsocket(struct sock *); extern void ip_mc_dropdevice(struct net_device *dev); extern int igmp_mc_proc_init(void); @@ -184,6 +187,8 @@ extern int sysctl_ip_dynaddr; extern void ipfrag_init(void); #ifdef CONFIG_INET +#include + /* The function in 2.2 was invalid, producing wrong result for * check=0xFEFF. It was noticed by Arthur Skawina _year_ ago. --ANK(000625) */ static inline diff --git a/include/net/ip_fib.h b/include/net/ip_fib.h index 14de4ebd1211..e000fa2cd5f6 100644 --- a/include/net/ip_fib.h +++ b/include/net/ip_fib.h @@ -238,6 +238,8 @@ extern int fib_validate_source(u32 src, u32 dst, u8 tos, int oif, struct net_device *dev, u32 *spec_dst, u32 *itag); extern void fib_select_multipath(const struct flowi *flp, struct fib_result *res); +struct rtentry; + /* Exported by fib_semantics.c */ extern int ip_fib_check_default(u32 gw, struct net_device *dev); extern int fib_sync_down(u32 local, struct net_device *dev, int force); diff --git a/include/net/ip_vs.h b/include/net/ip_vs.h index 3b5559a023a4..7d2674fde19a 100644 --- a/include/net/ip_vs.h +++ b/include/net/ip_vs.h @@ -251,16 +251,15 @@ struct ip_vs_daemon_user { #include #include /* for struct list_head */ #include /* for struct rwlock_t */ -#include /* for struct sk_buff */ -#include /* for struct iphdr */ #include /* for struct atomic_t */ -#include /* for struct neighbour */ -#include /* for struct dst_entry */ -#include #include +#include +#include #ifdef CONFIG_IP_VS_DEBUG +#include + extern int ip_vs_get_debug_level(void); #define IP_VS_DBG(level, msg...) \ do { \ @@ -429,8 +428,11 @@ struct ip_vs_stats spinlock_t lock; /* spin lock */ }; +struct dst_entry; +struct iphdr; struct ip_vs_conn; struct ip_vs_app; +struct sk_buff; struct ip_vs_protocol { struct ip_vs_protocol *next; diff --git a/include/net/ipv6.h b/include/net/ipv6.h index 11a725662c36..860bbac4c4ee 100644 --- a/include/net/ipv6.h +++ b/include/net/ipv6.h @@ -541,6 +541,9 @@ extern int sysctl_ip6frag_secret_interval; extern const struct proto_ops inet6_stream_ops; extern const struct proto_ops inet6_dgram_ops; +struct group_source_req; +struct group_filter; + extern int ip6_mc_source(int add, int omode, struct sock *sk, struct group_source_req *pgsr); extern int ip6_mc_msfilter(struct sock *sk, struct group_filter *gsf); diff --git a/include/net/ndisc.h b/include/net/ndisc.h index f85d6e4b7442..bbac87eeb422 100644 --- a/include/net/ndisc.h +++ b/include/net/ndisc.h @@ -35,11 +35,20 @@ enum { #ifdef __KERNEL__ -#include -#include +#include +#include #include +#include +#include + #include -#include + +struct ctl_table; +struct file; +struct inet6_dev; +struct net_device; +struct net_proto_family; +struct sk_buff; extern struct neigh_table nd_tbl; @@ -108,7 +117,7 @@ extern int igmp6_event_report(struct sk_buff *skb); extern void igmp6_cleanup(void); #ifdef CONFIG_SYSCTL -extern int ndisc_ifinfo_sysctl_change(ctl_table *ctl, +extern int ndisc_ifinfo_sysctl_change(struct ctl_table *ctl, int write, struct file * filp, void __user *buffer, diff --git a/include/net/neighbour.h b/include/net/neighbour.h index 34c07731933d..6fa9ae190741 100644 --- a/include/net/neighbour.h +++ b/include/net/neighbour.h @@ -49,8 +49,8 @@ #ifdef __KERNEL__ #include -#include #include +#include #include #include diff --git a/include/net/pkt_act.h b/include/net/pkt_act.h index bd08964b72c0..b225d8472b7e 100644 --- a/include/net/pkt_act.h +++ b/include/net/pkt_act.h @@ -15,7 +15,6 @@ #include #include #include -#include #include #include #include diff --git a/include/net/raw.h b/include/net/raw.h index f47917469b12..e67b28a0248c 100644 --- a/include/net/raw.h +++ b/include/net/raw.h @@ -19,6 +19,8 @@ #include +#include + extern struct proto raw_prot; extern void raw_err(struct sock *, struct sk_buff *, u32 info); diff --git a/include/net/udp.h b/include/net/udp.h index 107b9d791a1f..766fba1369ce 100644 --- a/include/net/udp.h +++ b/include/net/udp.h @@ -22,9 +22,8 @@ #ifndef _UDP_H #define _UDP_H -#include -#include #include +#include #include #include #include @@ -62,6 +61,7 @@ static inline int udp_lport_inuse(u16 num) extern struct proto udp_prot; +struct sk_buff; extern void udp_err(struct sk_buff *, u32); diff --git a/include/net/xfrm.h b/include/net/xfrm.h index 487abca3ca6f..07d7b50cdd76 100644 --- a/include/net/xfrm.h +++ b/include/net/xfrm.h @@ -2,11 +2,12 @@ #define _NET_XFRM_H #include +#include #include #include #include #include -#include +#include #include #include #include diff --git a/net/bridge/br_netfilter.c b/net/bridge/br_netfilter.c index 23422bd53a5e..223f8270daee 100644 --- a/net/bridge/br_netfilter.c +++ b/net/bridge/br_netfilter.c @@ -26,6 +26,7 @@ #include #include #include +#include #include #include #include @@ -33,8 +34,11 @@ #include #include #include + #include #include +#include + #include #include #include "br_private.h" diff --git a/net/bridge/netfilter/ebt_log.c b/net/bridge/netfilter/ebt_log.c index c436e6c6242b..9f6e0193ae10 100644 --- a/net/bridge/netfilter/ebt_log.c +++ b/net/bridge/netfilter/ebt_log.c @@ -9,6 +9,7 @@ * */ +#include #include #include #include diff --git a/net/core/netpoll.c b/net/core/netpoll.c index 49424a42a2c0..281a632fa6a6 100644 --- a/net/core/netpoll.c +++ b/net/core/netpoll.c @@ -13,6 +13,7 @@ #include #include #include +#include #include #include #include diff --git a/net/dccp/ccid.h b/net/dccp/ccid.h index c37eeeaf5c6e..de681c6ad081 100644 --- a/net/dccp/ccid.h +++ b/net/dccp/ccid.h @@ -21,6 +21,8 @@ #define CCID_MAX 255 +struct tcp_info; + struct ccid { unsigned char ccid_id; const char *ccid_name; diff --git a/net/dccp/ipv4.c b/net/dccp/ipv4.c index 99e8afa7ba1e..3f244670764a 100644 --- a/net/dccp/ipv4.c +++ b/net/dccp/ipv4.c @@ -19,6 +19,7 @@ #include #include +#include #include #include #include diff --git a/net/dccp/ipv6.c b/net/dccp/ipv6.c index 2e194c8f9953..c609dc78f487 100644 --- a/net/dccp/ipv6.c +++ b/net/dccp/ipv6.c @@ -20,6 +20,7 @@ #include #include #include +#include #include #include #include diff --git a/net/dccp/output.c b/net/dccp/output.c index 95a3c2c6a3ce..efd7ffb903a1 100644 --- a/net/dccp/output.c +++ b/net/dccp/output.c @@ -15,6 +15,7 @@ #include #include +#include #include #include "ackvec.h" diff --git a/net/dccp/proto.c b/net/dccp/proto.c index e4e629ed9bf7..65b11ea90d85 100644 --- a/net/dccp/proto.c +++ b/net/dccp/proto.c @@ -24,7 +24,7 @@ #include #include -#include +#include #include #include #include @@ -34,7 +34,6 @@ #include #include #include -#include #include "ccid.h" #include "dccp.h" diff --git a/net/econet/af_econet.c b/net/econet/af_econet.c index ff58f49c8b4a..70fb2b88da65 100644 --- a/net/econet/af_econet.c +++ b/net/econet/af_econet.c @@ -31,6 +31,7 @@ #include #include #include +#include #include #include #include diff --git a/net/ipv4/af_inet.c b/net/ipv4/af_inet.c index 4ed8a814c6cb..36a6306ca5a3 100644 --- a/net/ipv4/af_inet.c +++ b/net/ipv4/af_inet.c @@ -93,6 +93,7 @@ #include #include #include +#include #include #include #include diff --git a/net/ipv4/ah4.c b/net/ipv4/ah4.c index 035ad2c9e1ba..aed537fa2c88 100644 --- a/net/ipv4/ah4.c +++ b/net/ipv4/ah4.c @@ -6,6 +6,7 @@ #include #include #include +#include #include diff --git a/net/ipv4/arp.c b/net/ipv4/arp.c index b425748f02d7..37432088fe6d 100644 --- a/net/ipv4/arp.c +++ b/net/ipv4/arp.c @@ -86,6 +86,7 @@ #include #include #include +#include #include #include #include diff --git a/net/ipv4/devinet.c b/net/ipv4/devinet.c index 04a6fe3e95a2..7b9bb28e2ee9 100644 --- a/net/ipv4/devinet.c +++ b/net/ipv4/devinet.c @@ -58,6 +58,7 @@ #endif #include +#include #include #include #include diff --git a/net/ipv4/esp4.c b/net/ipv4/esp4.c index 1b18ce66e7b7..73bfcae8af9c 100644 --- a/net/ipv4/esp4.c +++ b/net/ipv4/esp4.c @@ -9,6 +9,7 @@ #include #include #include +#include #include /* decapsulation data for use when post-processing */ diff --git a/net/ipv4/fib_frontend.c b/net/ipv4/fib_frontend.c index 19b1b984d687..18f5e509281a 100644 --- a/net/ipv4/fib_frontend.c +++ b/net/ipv4/fib_frontend.c @@ -30,6 +30,7 @@ #include #include #include +#include #include #include #include diff --git a/net/ipv4/fib_hash.c b/net/ipv4/fib_hash.c index 7ea0209cb169..e2890ec8159e 100644 --- a/net/ipv4/fib_hash.c +++ b/net/ipv4/fib_hash.c @@ -29,6 +29,7 @@ #include #include #include +#include #include #include #include diff --git a/net/ipv4/fib_rules.c b/net/ipv4/fib_rules.c index 0b298bbc1518..0dd4d06e456d 100644 --- a/net/ipv4/fib_rules.c +++ b/net/ipv4/fib_rules.c @@ -33,6 +33,7 @@ #include #include #include +#include #include #include #include diff --git a/net/ipv4/fib_semantics.c b/net/ipv4/fib_semantics.c index 6d2a6ac070e3..ef4724de7350 100644 --- a/net/ipv4/fib_semantics.c +++ b/net/ipv4/fib_semantics.c @@ -29,6 +29,7 @@ #include #include #include +#include #include #include #include @@ -36,6 +37,7 @@ #include #include +#include #include #include #include diff --git a/net/ipv4/icmp.c b/net/ipv4/icmp.c index 92e23b2ad4d2..be5a519cd2f8 100644 --- a/net/ipv4/icmp.c +++ b/net/ipv4/icmp.c @@ -73,6 +73,7 @@ #include #include #include +#include #include #include #include diff --git a/net/ipv4/igmp.c b/net/ipv4/igmp.c index 4a195c724f01..34758118c10c 100644 --- a/net/ipv4/igmp.c +++ b/net/ipv4/igmp.c @@ -91,6 +91,8 @@ #include #include #include + +#include #include #include #include diff --git a/net/ipv4/ip_input.c b/net/ipv4/ip_input.c index 473d0f2b2e0d..e45846ae570b 100644 --- a/net/ipv4/ip_input.c +++ b/net/ipv4/ip_input.c @@ -128,6 +128,7 @@ #include #include #include +#include #include #include diff --git a/net/ipv4/ip_options.c b/net/ipv4/ip_options.c index dbe12da8d8b3..d3f6c468faf4 100644 --- a/net/ipv4/ip_options.c +++ b/net/ipv4/ip_options.c @@ -22,6 +22,7 @@ #include #include #include +#include /* * Write options to IP header, record destination address to diff --git a/net/ipv4/ip_sockglue.c b/net/ipv4/ip_sockglue.c index add019c746f8..6986e11d65cc 100644 --- a/net/ipv4/ip_sockglue.c +++ b/net/ipv4/ip_sockglue.c @@ -25,6 +25,7 @@ #include #include #include +#include #include #include #include diff --git a/net/ipv4/ipcomp.c b/net/ipv4/ipcomp.c index fc718df17b40..d64e2ec8da7b 100644 --- a/net/ipv4/ipcomp.c +++ b/net/ipv4/ipcomp.c @@ -28,6 +28,7 @@ #include #include #include +#include struct ipcomp_tfms { struct list_head list; diff --git a/net/ipv4/ipconfig.c b/net/ipv4/ipconfig.c index e8674baaa8d9..bb3613ec448c 100644 --- a/net/ipv4/ipconfig.c +++ b/net/ipv4/ipconfig.c @@ -42,6 +42,7 @@ #include #include #include +#include #include #include #include @@ -58,6 +59,7 @@ #include #include #include +#include #include #include diff --git a/net/ipv4/ipmr.c b/net/ipv4/ipmr.c index 302b7eb507c9..caa3b7d2e48a 100644 --- a/net/ipv4/ipmr.c +++ b/net/ipv4/ipmr.c @@ -52,6 +52,7 @@ #include #include #include +#include #include #include #include diff --git a/net/ipv4/ipvs/ip_vs_conn.c b/net/ipv4/ipvs/ip_vs_conn.c index 2a3a8c59c655..d35cea31cb55 100644 --- a/net/ipv4/ipvs/ip_vs_conn.c +++ b/net/ipv4/ipvs/ip_vs_conn.c @@ -24,7 +24,9 @@ * */ +#include #include +#include #include #include /* for proc_net_* */ #include diff --git a/net/ipv4/ipvs/ip_vs_ctl.c b/net/ipv4/ipvs/ip_vs_ctl.c index 9bdcf31b760e..fe2c39d2a002 100644 --- a/net/ipv4/ipvs/ip_vs_ctl.c +++ b/net/ipv4/ipvs/ip_vs_ctl.c @@ -35,6 +35,7 @@ #include #include +#include #include #include diff --git a/net/ipv4/ipvs/ip_vs_dh.c b/net/ipv4/ipvs/ip_vs_dh.c index f3bc320dce93..9fee19c4c617 100644 --- a/net/ipv4/ipvs/ip_vs_dh.c +++ b/net/ipv4/ipvs/ip_vs_dh.c @@ -37,8 +37,10 @@ * */ +#include #include #include +#include #include diff --git a/net/ipv4/ipvs/ip_vs_est.c b/net/ipv4/ipvs/ip_vs_est.c index 67b3e2fc1fa1..e7004741ac73 100644 --- a/net/ipv4/ipvs/ip_vs_est.c +++ b/net/ipv4/ipvs/ip_vs_est.c @@ -13,7 +13,10 @@ * Changes: * */ +#include #include +#include +#include #include #include diff --git a/net/ipv4/ipvs/ip_vs_lblc.c b/net/ipv4/ipvs/ip_vs_lblc.c index b6dad7e3710c..6e5cb92a5c83 100644 --- a/net/ipv4/ipvs/ip_vs_lblc.c +++ b/net/ipv4/ipvs/ip_vs_lblc.c @@ -41,8 +41,10 @@ * me to write this module. */ +#include #include #include +#include /* for sysctl */ #include diff --git a/net/ipv4/ipvs/ip_vs_lblcr.c b/net/ipv4/ipvs/ip_vs_lblcr.c index 8c78ef76c121..32ba37ba72d8 100644 --- a/net/ipv4/ipvs/ip_vs_lblcr.c +++ b/net/ipv4/ipvs/ip_vs_lblcr.c @@ -39,8 +39,10 @@ * */ +#include #include #include +#include /* for sysctl */ #include diff --git a/net/ipv4/ipvs/ip_vs_proto_ah.c b/net/ipv4/ipvs/ip_vs_proto_ah.c index 453e94a0bbd7..8b0505b09317 100644 --- a/net/ipv4/ipvs/ip_vs_proto_ah.c +++ b/net/ipv4/ipvs/ip_vs_proto_ah.c @@ -12,6 +12,8 @@ * */ +#include +#include #include #include #include diff --git a/net/ipv4/ipvs/ip_vs_proto_esp.c b/net/ipv4/ipvs/ip_vs_proto_esp.c index 478e5c7c7e8e..c36ccf057a19 100644 --- a/net/ipv4/ipvs/ip_vs_proto_esp.c +++ b/net/ipv4/ipvs/ip_vs_proto_esp.c @@ -12,6 +12,8 @@ * */ +#include +#include #include #include #include diff --git a/net/ipv4/ipvs/ip_vs_proto_udp.c b/net/ipv4/ipvs/ip_vs_proto_udp.c index 8ae5f2e0aefa..89d9175d8f28 100644 --- a/net/ipv4/ipvs/ip_vs_proto_udp.c +++ b/net/ipv4/ipvs/ip_vs_proto_udp.c @@ -15,8 +15,11 @@ * */ +#include +#include #include #include +#include #include diff --git a/net/ipv4/ipvs/ip_vs_sh.c b/net/ipv4/ipvs/ip_vs_sh.c index 6f7c50e44a39..7775e6cc68be 100644 --- a/net/ipv4/ipvs/ip_vs_sh.c +++ b/net/ipv4/ipvs/ip_vs_sh.c @@ -34,8 +34,10 @@ * */ +#include #include #include +#include #include diff --git a/net/ipv4/ipvs/ip_vs_sync.c b/net/ipv4/ipvs/ip_vs_sync.c index 2e5ced3d8062..1bca714bda3d 100644 --- a/net/ipv4/ipvs/ip_vs_sync.c +++ b/net/ipv4/ipvs/ip_vs_sync.c @@ -21,12 +21,14 @@ #include #include +#include #include #include #include #include #include #include /* for ip_mc_join_group */ +#include #include #include diff --git a/net/ipv4/netfilter/ip_conntrack_amanda.c b/net/ipv4/netfilter/ip_conntrack_amanda.c index e52847fa10f5..0366eedb4d70 100644 --- a/net/ipv4/netfilter/ip_conntrack_amanda.c +++ b/net/ipv4/netfilter/ip_conntrack_amanda.c @@ -18,11 +18,13 @@ * */ +#include #include #include #include #include #include +#include #include #include diff --git a/net/ipv4/netfilter/ip_conntrack_proto_gre.c b/net/ipv4/netfilter/ip_conntrack_proto_gre.c index 744abb9d377a..57956dee60c8 100644 --- a/net/ipv4/netfilter/ip_conntrack_proto_gre.c +++ b/net/ipv4/netfilter/ip_conntrack_proto_gre.c @@ -31,6 +31,7 @@ #include #include #include +#include static DEFINE_RWLOCK(ip_ct_gre_lock); #define ASSERT_READ_LOCK(x) diff --git a/net/ipv4/netfilter/ip_conntrack_proto_udp.c b/net/ipv4/netfilter/ip_conntrack_proto_udp.c index f2dcac7c7660..46becbe4fe58 100644 --- a/net/ipv4/netfilter/ip_conntrack_proto_udp.c +++ b/net/ipv4/netfilter/ip_conntrack_proto_udp.c @@ -11,6 +11,7 @@ #include #include #include +#include #include #include #include diff --git a/net/ipv4/netfilter/ip_conntrack_standalone.c b/net/ipv4/netfilter/ip_conntrack_standalone.c index dd476b191f4b..a88bcc551244 100644 --- a/net/ipv4/netfilter/ip_conntrack_standalone.c +++ b/net/ipv4/netfilter/ip_conntrack_standalone.c @@ -27,6 +27,7 @@ #endif #include #include +#include #define ASSERT_READ_LOCK(x) #define ASSERT_WRITE_LOCK(x) diff --git a/net/ipv4/netfilter/ip_nat_snmp_basic.c b/net/ipv4/netfilter/ip_nat_snmp_basic.c index 8acb7ed40b47..4f95d477805c 100644 --- a/net/ipv4/netfilter/ip_nat_snmp_basic.c +++ b/net/ipv4/netfilter/ip_nat_snmp_basic.c @@ -44,6 +44,7 @@ * */ #include +#include #include #include #include @@ -53,6 +54,7 @@ #include #include #include +#include #include #include #include diff --git a/net/ipv4/netfilter/ipt_MASQUERADE.c b/net/ipv4/netfilter/ipt_MASQUERADE.c index 275a174c6fe6..27860510ca6d 100644 --- a/net/ipv4/netfilter/ipt_MASQUERADE.c +++ b/net/ipv4/netfilter/ipt_MASQUERADE.c @@ -11,6 +11,7 @@ #include #include +#include #include #include #include @@ -18,6 +19,7 @@ #include #include #include +#include #include #include #include diff --git a/net/ipv4/netfilter/ipt_physdev.c b/net/ipv4/netfilter/ipt_physdev.c index 1a53924041fc..03f554857a4d 100644 --- a/net/ipv4/netfilter/ipt_physdev.c +++ b/net/ipv4/netfilter/ipt_physdev.c @@ -9,6 +9,7 @@ */ #include +#include #include #include #include diff --git a/net/ipv4/proc.c b/net/ipv4/proc.c index 0d7dc668db46..39d49dc333a7 100644 --- a/net/ipv4/proc.c +++ b/net/ipv4/proc.c @@ -38,6 +38,7 @@ #include #include #include +#include #include #include #include diff --git a/net/ipv4/sysctl_net_ipv4.c b/net/ipv4/sysctl_net_ipv4.c index dbf82955aabe..16984d4a8a06 100644 --- a/net/ipv4/sysctl_net_ipv4.c +++ b/net/ipv4/sysctl_net_ipv4.c @@ -12,6 +12,7 @@ #include #include #include +#include #include #include #include diff --git a/net/ipv4/udp.c b/net/ipv4/udp.c index 67c036384e77..223abaa72bc5 100644 --- a/net/ipv4/udp.c +++ b/net/ipv4/udp.c @@ -86,6 +86,7 @@ #include #include #include +#include #include #include #include diff --git a/net/ipv6/ah6.c b/net/ipv6/ah6.c index f3629730eb15..13cc7f895583 100644 --- a/net/ipv6/ah6.c +++ b/net/ipv6/ah6.c @@ -33,6 +33,7 @@ #include #include #include +#include #include #include diff --git a/net/ipv6/esp6.c b/net/ipv6/esp6.c index 8bfbe9970793..6de8ee1a5ad9 100644 --- a/net/ipv6/esp6.c +++ b/net/ipv6/esp6.c @@ -36,6 +36,7 @@ #include #include #include +#include #include static int esp6_output(struct xfrm_state *x, struct sk_buff *skb) diff --git a/net/ipv6/ipcomp6.c b/net/ipv6/ipcomp6.c index 55917fb17094..626dd39685f2 100644 --- a/net/ipv6/ipcomp6.c +++ b/net/ipv6/ipcomp6.c @@ -47,6 +47,7 @@ #include #include #include +#include #include #include diff --git a/net/ipv6/netfilter/ip6_tables.c b/net/ipv6/netfilter/ip6_tables.c index dd80020d8740..ea43ef1d94a7 100644 --- a/net/ipv6/netfilter/ip6_tables.c +++ b/net/ipv6/netfilter/ip6_tables.c @@ -15,6 +15,7 @@ * - new extension header parser code */ #include +#include #include #include #include diff --git a/net/ipv6/netfilter/ip6t_LOG.c b/net/ipv6/netfilter/ip6t_LOG.c index 0cd1d1bd9033..ae4653bfd654 100644 --- a/net/ipv6/netfilter/ip6t_LOG.c +++ b/net/ipv6/netfilter/ip6t_LOG.c @@ -13,6 +13,7 @@ #include #include #include +#include #include #include #include diff --git a/net/ipv6/netfilter/ip6t_ah.c b/net/ipv6/netfilter/ip6t_ah.c index dde37793d20b..268918d5deea 100644 --- a/net/ipv6/netfilter/ip6t_ah.c +++ b/net/ipv6/netfilter/ip6t_ah.c @@ -9,6 +9,7 @@ #include #include +#include #include #include #include diff --git a/net/ipv6/netfilter/ip6t_esp.c b/net/ipv6/netfilter/ip6t_esp.c index 24bc0cde43a1..65937de1b58c 100644 --- a/net/ipv6/netfilter/ip6t_esp.c +++ b/net/ipv6/netfilter/ip6t_esp.c @@ -9,6 +9,7 @@ #include #include +#include #include #include #include diff --git a/net/sched/sch_teql.c b/net/sched/sch_teql.c index 6cf0342706b5..c4a2a8c4c339 100644 --- a/net/sched/sch_teql.c +++ b/net/sched/sch_teql.c @@ -22,6 +22,7 @@ #include #include #include +#include #include #include #include diff --git a/net/sctp/protocol.c b/net/sctp/protocol.c index d1b0747a5b9d..de693b43c8ea 100644 --- a/net/sctp/protocol.c +++ b/net/sctp/protocol.c @@ -54,6 +54,7 @@ #include #include #include +#include #include #include #include -- cgit v1.2.3-71-gd317 From 17ba15fb6264f27374bc87f4c3f8519b80289d85 Mon Sep 17 00:00:00 2001 From: "David S. Miller" Date: Tue, 27 Dec 2005 20:57:40 -0800 Subject: [PPPOX]: Fix assignment into const proto_ops. And actually, with this, the whole pppox layer can basically be removed and subsumed into pppoe.c, no other pppox sub-protocol implementation exists and we've had this thing for at least 4 years. Signed-off-by: David S. Miller --- drivers/net/pppoe.c | 9 ++++----- drivers/net/pppox.c | 10 +++------- include/linux/if_pppox.h | 3 +-- 3 files changed, 8 insertions(+), 14 deletions(-) (limited to 'include/linux') diff --git a/drivers/net/pppoe.c b/drivers/net/pppoe.c index 71e303b28646..9369f811075d 100644 --- a/drivers/net/pppoe.c +++ b/drivers/net/pppoe.c @@ -85,7 +85,7 @@ static int pppoe_ioctl(struct socket *sock, unsigned int cmd, unsigned long arg) static int pppoe_xmit(struct ppp_channel *chan, struct sk_buff *skb); static int __pppoe_xmit(struct sock *sk, struct sk_buff *skb); -static struct proto_ops pppoe_ops; +static const struct proto_ops pppoe_ops; static DEFINE_RWLOCK(pppoe_hash_lock); static struct ppp_channel_ops pppoe_chan_ops; @@ -1063,9 +1063,7 @@ static int __init pppoe_proc_init(void) static inline int pppoe_proc_init(void) { return 0; } #endif /* CONFIG_PROC_FS */ -/* ->ioctl are set at pppox_create */ - -static struct proto_ops pppoe_ops = { +static const struct proto_ops pppoe_ops = { .family = AF_PPPOX, .owner = THIS_MODULE, .release = pppoe_release, @@ -1081,7 +1079,8 @@ static struct proto_ops pppoe_ops = { .getsockopt = sock_no_getsockopt, .sendmsg = pppoe_sendmsg, .recvmsg = pppoe_recvmsg, - .mmap = sock_no_mmap + .mmap = sock_no_mmap, + .ioctl = pppox_ioctl, }; static struct pppox_proto pppoe_proto = { diff --git a/drivers/net/pppox.c b/drivers/net/pppox.c index 0c1e114527fb..9315046b3f55 100644 --- a/drivers/net/pppox.c +++ b/drivers/net/pppox.c @@ -68,8 +68,7 @@ EXPORT_SYMBOL(register_pppox_proto); EXPORT_SYMBOL(unregister_pppox_proto); EXPORT_SYMBOL(pppox_unbind_sock); -static int pppox_ioctl(struct socket* sock, unsigned int cmd, - unsigned long arg) +int pppox_ioctl(struct socket *sock, unsigned int cmd, unsigned long arg) { struct sock *sk = sock->sk; struct pppox_sock *po = pppox_sk(sk); @@ -105,6 +104,7 @@ static int pppox_ioctl(struct socket* sock, unsigned int cmd, return rc; } +EXPORT_SYMBOL(pppox_ioctl); static int pppox_create(struct socket *sock, int protocol) { @@ -119,11 +119,7 @@ static int pppox_create(struct socket *sock, int protocol) goto out; rc = pppox_protos[protocol]->create(sock); - if (!rc) { - /* We get to set the ioctl handler. */ - /* For everything else, pppox is just a shell. */ - sock->ops->ioctl = pppox_ioctl; - } + module_put(pppox_protos[protocol]->owner); out: return rc; diff --git a/include/linux/if_pppox.h b/include/linux/if_pppox.h index e677f73f13dd..4fab3d0a4bce 100644 --- a/include/linux/if_pppox.h +++ b/include/linux/if_pppox.h @@ -157,8 +157,7 @@ struct pppox_proto { extern int register_pppox_proto(int proto_num, struct pppox_proto *pp); extern void unregister_pppox_proto(int proto_num); extern void pppox_unbind_sock(struct sock *sk);/* delete ppp-channel binding */ -extern int pppox_channel_ioctl(struct ppp_channel *pc, unsigned int cmd, - unsigned long arg); +extern int pppox_ioctl(struct socket *sock, unsigned int cmd, unsigned long arg); /* PPPoX socket states */ enum { -- cgit v1.2.3-71-gd317 From 4947d3ef8de7b4f42aed6ea9ba689dc8fb45b5a5 Mon Sep 17 00:00:00 2001 From: Benjamin LaHaise Date: Tue, 3 Jan 2006 14:06:50 -0800 Subject: [NET]: Speed up __alloc_skb() From: Benjamin LaHaise In __alloc_skb(), the use of skb_shinfo() which casts a u8 * to the shared info structure results in gcc being forced to do a reload of the pointer since it has no information on possible aliasing. Fix this by using a pointer to refer to skb_shared_info. By initializing skb_shared_info sequentially, the write combining buffers can reduce the number of memory transactions to a single write. Reorder the initialization in __alloc_skb() to match the structure definition. There is also an alignment issue on 64 bit systems with skb_shared_info by converting nr_frags to a short everything packs up nicely. Also, pass the slab cache pointer according to the fclone flag instead of using two almost identical function calls. This raises bw_unix performance up to a peak of 707KB/s when combined with the spinlock patch. It should help other networking protocols, too. Signed-off-by: David S. Miller --- include/linux/skbuff.h | 2 +- net/core/skbuff.c | 27 +++++++++++++-------------- 2 files changed, 14 insertions(+), 15 deletions(-) (limited to 'include/linux') diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h index 971677178e0c..483cfc47ec34 100644 --- a/include/linux/skbuff.h +++ b/include/linux/skbuff.h @@ -133,7 +133,7 @@ struct skb_frag_struct { */ struct skb_shared_info { atomic_t dataref; - unsigned int nr_frags; + unsigned short nr_frags; unsigned short tso_size; unsigned short tso_segs; unsigned short ufo_size; diff --git a/net/core/skbuff.c b/net/core/skbuff.c index 83fee37de38e..070f91cfde59 100644 --- a/net/core/skbuff.c +++ b/net/core/skbuff.c @@ -135,17 +135,13 @@ void skb_under_panic(struct sk_buff *skb, int sz, void *here) struct sk_buff *__alloc_skb(unsigned int size, gfp_t gfp_mask, int fclone) { + struct skb_shared_info *shinfo; struct sk_buff *skb; u8 *data; /* Get the HEAD */ - if (fclone) - skb = kmem_cache_alloc(skbuff_fclone_cache, - gfp_mask & ~__GFP_DMA); - else - skb = kmem_cache_alloc(skbuff_head_cache, - gfp_mask & ~__GFP_DMA); - + skb = kmem_cache_alloc(fclone ? skbuff_fclone_cache : skbuff_head_cache, + gfp_mask & ~__GFP_DMA); if (!skb) goto out; @@ -162,6 +158,16 @@ struct sk_buff *__alloc_skb(unsigned int size, gfp_t gfp_mask, skb->data = data; skb->tail = data; skb->end = data + size; + /* make sure we initialize shinfo sequentially */ + shinfo = skb_shinfo(skb); + atomic_set(&shinfo->dataref, 1); + shinfo->nr_frags = 0; + shinfo->tso_size = 0; + shinfo->tso_segs = 0; + shinfo->ufo_size = 0; + shinfo->ip6_frag_id = 0; + shinfo->frag_list = NULL; + if (fclone) { struct sk_buff *child = skb + 1; atomic_t *fclone_ref = (atomic_t *) (child + 1); @@ -171,13 +177,6 @@ struct sk_buff *__alloc_skb(unsigned int size, gfp_t gfp_mask, child->fclone = SKB_FCLONE_UNAVAILABLE; } - atomic_set(&(skb_shinfo(skb)->dataref), 1); - skb_shinfo(skb)->nr_frags = 0; - skb_shinfo(skb)->tso_size = 0; - skb_shinfo(skb)->tso_segs = 0; - skb_shinfo(skb)->frag_list = NULL; - skb_shinfo(skb)->ufo_size = 0; - skb_shinfo(skb)->ip6_frag_id = 0; out: return skb; nodata: -- cgit v1.2.3-71-gd317 From a6f6c96b65d7f65a7a7bf5cbe874eda182a6b2cc Mon Sep 17 00:00:00 2001 From: Russell King Date: Tue, 3 Jan 2006 22:38:44 +0000 Subject: [MMC] Improve MMC card block size selection Select a block size for IO based on the read and write block size combinations, and whether the card supports partial block reads and/or partial block writes. If we are able to satisfy block reads but not block writes, mark the device read only. Signed-off-by: Russell King --- drivers/mmc/mmc.c | 10 +++ drivers/mmc/mmc_block.c | 175 ++++++++++++++++++++++++++++++----------------- include/linux/mmc/card.h | 5 ++ 3 files changed, 128 insertions(+), 62 deletions(-) (limited to 'include/linux') diff --git a/drivers/mmc/mmc.c b/drivers/mmc/mmc.c index eb41391e06e9..6696f71363b9 100644 --- a/drivers/mmc/mmc.c +++ b/drivers/mmc/mmc.c @@ -550,6 +550,11 @@ static void mmc_decode_csd(struct mmc_card *card) csd->capacity = (1 + m) << (e + 2); csd->read_blkbits = UNSTUFF_BITS(resp, 80, 4); + csd->read_partial = UNSTUFF_BITS(resp, 79, 1); + csd->write_misalign = UNSTUFF_BITS(resp, 78, 1); + csd->read_misalign = UNSTUFF_BITS(resp, 77, 1); + csd->write_blkbits = UNSTUFF_BITS(resp, 22, 4); + csd->write_partial = UNSTUFF_BITS(resp, 21, 1); } else { /* * We only understand CSD structure v1.1 and v1.2. @@ -579,6 +584,11 @@ static void mmc_decode_csd(struct mmc_card *card) csd->capacity = (1 + m) << (e + 2); csd->read_blkbits = UNSTUFF_BITS(resp, 80, 4); + csd->read_partial = UNSTUFF_BITS(resp, 79, 1); + csd->write_misalign = UNSTUFF_BITS(resp, 78, 1); + csd->read_misalign = UNSTUFF_BITS(resp, 77, 1); + csd->write_blkbits = UNSTUFF_BITS(resp, 22, 4); + csd->write_partial = UNSTUFF_BITS(resp, 21, 1); } } diff --git a/drivers/mmc/mmc_block.c b/drivers/mmc/mmc_block.c index abcf19116d70..b9837cc5b9ac 100644 --- a/drivers/mmc/mmc_block.c +++ b/drivers/mmc/mmc_block.c @@ -54,6 +54,7 @@ struct mmc_blk_data { unsigned int usage; unsigned int block_bits; + unsigned int read_only; }; static DECLARE_MUTEX(open_lock); @@ -85,12 +86,6 @@ static void mmc_blk_put(struct mmc_blk_data *md) up(&open_lock); } -static inline int mmc_blk_readonly(struct mmc_card *card) -{ - return mmc_card_readonly(card) || - !(card->csd.cmdclass & CCC_BLOCK_WRITE); -} - static int mmc_blk_open(struct inode *inode, struct file *filp) { struct mmc_blk_data *md; @@ -102,8 +97,7 @@ static int mmc_blk_open(struct inode *inode, struct file *filp) check_disk_change(inode->i_bdev); ret = 0; - if ((filp->f_mode & FMODE_WRITE) && - mmc_blk_readonly(md->queue.card)) + if ((filp->f_mode & FMODE_WRITE) && md->read_only) ret = -EROFS; } @@ -299,6 +293,12 @@ static int mmc_blk_issue_rq(struct mmc_queue *mq, struct request *req) static unsigned long dev_use[MMC_NUM_MINORS/(8*sizeof(unsigned long))]; +static inline int mmc_blk_readonly(struct mmc_card *card) +{ + return mmc_card_readonly(card) || + !(card->csd.cmdclass & CCC_BLOCK_WRITE); +} + static struct mmc_blk_data *mmc_blk_alloc(struct mmc_card *card) { struct mmc_blk_data *md; @@ -310,64 +310,121 @@ static struct mmc_blk_data *mmc_blk_alloc(struct mmc_card *card) __set_bit(devidx, dev_use); md = kmalloc(sizeof(struct mmc_blk_data), GFP_KERNEL); - if (md) { - memset(md, 0, sizeof(struct mmc_blk_data)); + if (!md) { + ret = -ENOMEM; + goto out; + } - md->disk = alloc_disk(1 << MMC_SHIFT); - if (md->disk == NULL) { - kfree(md); - md = ERR_PTR(-ENOMEM); - goto out; - } + memset(md, 0, sizeof(struct mmc_blk_data)); - spin_lock_init(&md->lock); - md->usage = 1; + /* + * Set the read-only status based on the supported commands + * and the write protect switch. + */ + md->read_only = mmc_blk_readonly(card); - ret = mmc_init_queue(&md->queue, card, &md->lock); - if (ret) { - put_disk(md->disk); - kfree(md); - md = ERR_PTR(ret); - goto out; + /* + * Figure out a workable block size. MMC cards have: + * - two block sizes, one for read and one for write. + * - may support partial reads and/or writes + * (allows block sizes smaller than specified) + */ + md->block_bits = card->csd.read_blkbits; + if (card->csd.write_blkbits != card->csd.read_blkbits) { + if (card->csd.write_blkbits < card->csd.read_blkbits && + card->csd.read_partial) { + /* + * write block size is smaller than read block + * size, but we support partial reads, so choose + * the smaller write block size. + */ + md->block_bits = card->csd.write_blkbits; + } else if (card->csd.write_blkbits > card->csd.read_blkbits && + card->csd.write_partial) { + /* + * read block size is smaller than write block + * size, but we support partial writes. Use read + * block size. + */ + } else { + /* + * We don't support this configuration for writes. + */ + printk(KERN_ERR "%s: unable to select block size for " + "writing (rb%u wb%u rp%u wp%u)\n", + md->disk->disk_name, + 1 << card->csd.read_blkbits, + 1 << card->csd.write_blkbits, + card->csd.read_partial, + card->csd.write_partial); + md->read_only = 1; } - md->queue.prep_fn = mmc_blk_prep_rq; - md->queue.issue_fn = mmc_blk_issue_rq; - md->queue.data = md; + } - md->disk->major = major; - md->disk->first_minor = devidx << MMC_SHIFT; - md->disk->fops = &mmc_bdops; - md->disk->private_data = md; - md->disk->queue = md->queue.queue; - md->disk->driverfs_dev = &card->dev; + /* + * Refuse to allow block sizes smaller than 512 bytes. + */ + if (md->block_bits < 9) { + printk(KERN_ERR "%s: unable to support block size %u\n", + mmc_card_id(card), 1 << md->block_bits); + ret = -EINVAL; + goto err_kfree; + } - /* - * As discussed on lkml, GENHD_FL_REMOVABLE should: - * - * - be set for removable media with permanent block devices - * - be unset for removable block devices with permanent media - * - * Since MMC block devices clearly fall under the second - * case, we do not set GENHD_FL_REMOVABLE. Userspace - * should use the block device creation/destruction hotplug - * messages to tell when the card is present. - */ + md->disk = alloc_disk(1 << MMC_SHIFT); + if (md->disk == NULL) { + ret = -ENOMEM; + goto err_kfree; + } - sprintf(md->disk->disk_name, "mmcblk%d", devidx); - sprintf(md->disk->devfs_name, "mmc/blk%d", devidx); + spin_lock_init(&md->lock); + md->usage = 1; - md->block_bits = card->csd.read_blkbits; + ret = mmc_init_queue(&md->queue, card, &md->lock); + if (ret) + goto err_putdisk; - blk_queue_hardsect_size(md->queue.queue, 1 << md->block_bits); + md->queue.prep_fn = mmc_blk_prep_rq; + md->queue.issue_fn = mmc_blk_issue_rq; + md->queue.data = md; - /* - * The CSD capacity field is in units of read_blkbits. - * set_capacity takes units of 512 bytes. - */ - set_capacity(md->disk, card->csd.capacity << (card->csd.read_blkbits - 9)); - } - out: + md->disk->major = major; + md->disk->first_minor = devidx << MMC_SHIFT; + md->disk->fops = &mmc_bdops; + md->disk->private_data = md; + md->disk->queue = md->queue.queue; + md->disk->driverfs_dev = &card->dev; + + /* + * As discussed on lkml, GENHD_FL_REMOVABLE should: + * + * - be set for removable media with permanent block devices + * - be unset for removable block devices with permanent media + * + * Since MMC block devices clearly fall under the second + * case, we do not set GENHD_FL_REMOVABLE. Userspace + * should use the block device creation/destruction hotplug + * messages to tell when the card is present. + */ + + sprintf(md->disk->disk_name, "mmcblk%d", devidx); + sprintf(md->disk->devfs_name, "mmc/blk%d", devidx); + + blk_queue_hardsect_size(md->queue.queue, 1 << md->block_bits); + + /* + * The CSD capacity field is in units of read_blkbits. + * set_capacity takes units of 512 bytes. + */ + set_capacity(md->disk, card->csd.capacity << (card->csd.read_blkbits - 9)); return md; + + err_putdisk: + put_disk(md->disk); + err_kfree: + kfree(md); + out: + return ERR_PTR(ret); } static int @@ -403,12 +460,6 @@ static int mmc_blk_probe(struct mmc_card *card) if (!(card->csd.cmdclass & CCC_BLOCK_READ)) return -ENODEV; - if (card->csd.read_blkbits < 9) { - printk(KERN_WARNING "%s: read blocksize too small (%u)\n", - mmc_card_id(card), 1 << card->csd.read_blkbits); - return -ENODEV; - } - md = mmc_blk_alloc(card); if (IS_ERR(md)) return PTR_ERR(md); @@ -419,7 +470,7 @@ static int mmc_blk_probe(struct mmc_card *card) printk(KERN_INFO "%s: %s %s %luKiB %s\n", md->disk->disk_name, mmc_card_id(card), mmc_card_name(card), - get_capacity(md->disk) >> 1, mmc_blk_readonly(card)?"(ro)":""); + get_capacity(md->disk) >> 1, md->read_only ? "(ro)" : ""); mmc_set_drvdata(card, md); add_disk(md->disk); diff --git a/include/linux/mmc/card.h b/include/linux/mmc/card.h index 18fc77f682de..7f7d40684288 100644 --- a/include/linux/mmc/card.h +++ b/include/linux/mmc/card.h @@ -30,7 +30,12 @@ struct mmc_csd { unsigned int tacc_ns; unsigned int max_dtr; unsigned int read_blkbits; + unsigned int write_blkbits; unsigned int capacity; + unsigned int read_partial:1, + read_misalign:1, + write_partial:1 + write_misalign:1; }; struct sd_scr { -- cgit v1.2.3-71-gd317 From 88df8ef59a3eb54b1e2412765ff2736d2376d1ca Mon Sep 17 00:00:00 2001 From: Stephen Hemminger Date: Tue, 3 Jan 2006 15:25:45 -0800 Subject: [NET]: Don't exclude broadcast addresses from is_multicast_ether_addr() The check for multicast shouldn't exclude broadcast type addresses. This reverts the incorrect change done in 2.6.13. The broadcast address is a multicast address and should be excluded from being a valid_ether_address for use in bridging or device address. Signed-off-by: Stephen Hemminger Signed-off-by: David S. Miller --- include/linux/etherdevice.h | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/etherdevice.h b/include/linux/etherdevice.h index 5f49a30eb6f2..745c988359c0 100644 --- a/include/linux/etherdevice.h +++ b/include/linux/etherdevice.h @@ -63,10 +63,11 @@ static inline int is_zero_ether_addr(const u8 *addr) * @addr: Pointer to a six-byte array containing the Ethernet address * * Return true if the address is a multicast address. + * By definition the broadcast address is also a multicast address. */ static inline int is_multicast_ether_addr(const u8 *addr) { - return ((addr[0] != 0xff) && (0x01 & addr[0])); + return (0x01 & addr[0]); } /** -- cgit v1.2.3-71-gd317 From ce11a161c11868f268964274edc7a26a3e063e08 Mon Sep 17 00:00:00 2001 From: Russell King Date: Wed, 4 Jan 2006 12:40:39 +0000 Subject: [MMC] Fix missing ',' Signed-off-by: Russell King --- include/linux/mmc/card.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/mmc/card.h b/include/linux/mmc/card.h index 7f7d40684288..30dd978c1ec8 100644 --- a/include/linux/mmc/card.h +++ b/include/linux/mmc/card.h @@ -34,7 +34,7 @@ struct mmc_csd { unsigned int capacity; unsigned int read_partial:1, read_misalign:1, - write_partial:1 + write_partial:1, write_misalign:1; }; -- cgit v1.2.3-71-gd317 From 26e92861be9c0da3be30718de693976b3f6a8026 Mon Sep 17 00:00:00 2001 From: Gareth Howlett Date: Wed, 4 Jan 2006 17:00:42 +0000 Subject: [SERIAL] Add support for more Connect Tech PCI serial boards I've also fixed the sort-ordering comments on this naming convention. Signed-off-by: Stuart MacDonald Signed-off-by: Russell King --- drivers/serial/8250_pci.c | 120 +++++++++++++++++++++++++++++++++++++++++++--- include/linux/pci_ids.h | 17 +++++++ 2 files changed, 131 insertions(+), 6 deletions(-) (limited to 'include/linux') diff --git a/drivers/serial/8250_pci.c b/drivers/serial/8250_pci.c index 8adca0ce267f..4a589a9456f5 100644 --- a/drivers/serial/8250_pci.c +++ b/drivers/serial/8250_pci.c @@ -853,14 +853,15 @@ get_pci_irq(struct pci_dev *dev, struct pciserial_board *board) * driver_data member. * * The makeup of these names are: - * pbn_bn{_bt}_n_baud + * pbn_bn{_bt}_n_baud{_offsetinhex} * - * bn = PCI BAR number - * bt = Index using PCI BARs - * n = number of serial ports - * baud = baud rate + * bn = PCI BAR number + * bt = Index using PCI BARs + * n = number of serial ports + * baud = baud rate + * offsetinhex = offset for each sequential port (in hex) * - * This table is sorted by (in order): baud, bt, bn, n. + * This table is sorted by (in order): bn, bt, baud, offsetindex, n. * * Please note: in theory if n = 1, _bt infix should make no difference. * ie, pbn_b0_1_115200 is the same as pbn_b0_bt_1_115200 @@ -881,6 +882,13 @@ enum pci_board_num_t { pbn_b0_4_1152000, + pbn_b0_2_1843200, + pbn_b0_4_1843200, + + pbn_b0_2_1843200_200, + pbn_b0_4_1843200_200, + pbn_b0_8_1843200_200, + pbn_b0_bt_1_115200, pbn_b0_bt_2_115200, pbn_b0_bt_8_115200, @@ -904,6 +912,8 @@ enum pci_board_num_t { pbn_b1_4_921600, pbn_b1_8_921600, + pbn_b1_2_1250000, + pbn_b1_bt_2_921600, pbn_b1_1_1382400, @@ -1029,6 +1039,38 @@ static struct pciserial_board pci_boards[] __devinitdata = { .uart_offset = 8, }, + [pbn_b0_2_1843200] = { + .flags = FL_BASE0, + .num_ports = 2, + .base_baud = 1843200, + .uart_offset = 8, + }, + [pbn_b0_4_1843200] = { + .flags = FL_BASE0, + .num_ports = 4, + .base_baud = 1843200, + .uart_offset = 8, + }, + + [pbn_b0_2_1843200_200] = { + .flags = FL_BASE0, + .num_ports = 2, + .base_baud = 1843200, + .uart_offset = 0x200, + }, + [pbn_b0_4_1843200_200] = { + .flags = FL_BASE0, + .num_ports = 4, + .base_baud = 1843200, + .uart_offset = 0x200, + }, + [pbn_b0_8_1843200_200] = { + .flags = FL_BASE0, + .num_ports = 8, + .base_baud = 1843200, + .uart_offset = 0x200, + }, + [pbn_b0_bt_1_115200] = { .flags = FL_BASE0|FL_BASE_BARS, .num_ports = 1, @@ -1141,6 +1183,12 @@ static struct pciserial_board pci_boards[] __devinitdata = { .base_baud = 921600, .uart_offset = 8, }, + [pbn_b1_2_1250000] = { + .flags = FL_BASE1, + .num_ports = 2, + .base_baud = 1250000, + .uart_offset = 8, + }, [pbn_b1_bt_2_921600] = { .flags = FL_BASE1|FL_BASE_BARS, @@ -1801,6 +1849,66 @@ static struct pci_device_id serial_pci_tbl[] = { PCI_SUBVENDOR_ID_CONNECT_TECH, PCI_SUBDEVICE_ID_CONNECT_TECH_BH041101V1, 0, 0, pbn_b1_4_921600 }, + { PCI_VENDOR_ID_V3, PCI_DEVICE_ID_V3_V351, + PCI_SUBVENDOR_ID_CONNECT_TECH, + PCI_SUBDEVICE_ID_CONNECT_TECH_BH2_20MHZ, 0, 0, + pbn_b1_2_1250000 }, + { PCI_VENDOR_ID_OXSEMI, PCI_DEVICE_ID_OXSEMI_16PCI954, + PCI_SUBVENDOR_ID_CONNECT_TECH, + PCI_SUBDEVICE_ID_CONNECT_TECH_TITAN_2, 0, 0, + pbn_b0_2_1843200 }, + { PCI_VENDOR_ID_OXSEMI, PCI_DEVICE_ID_OXSEMI_16PCI954, + PCI_SUBVENDOR_ID_CONNECT_TECH, + PCI_SUBDEVICE_ID_CONNECT_TECH_TITAN_4, 0, 0, + pbn_b0_4_1843200 }, + { PCI_VENDOR_ID_EXAR, PCI_DEVICE_ID_EXAR_XR17C152, + PCI_SUBVENDOR_ID_CONNECT_TECH, + PCI_SUBDEVICE_ID_CONNECT_TECH_PCI_UART_2_232, 0, 0, + pbn_b0_2_1843200_200 }, + { PCI_VENDOR_ID_EXAR, PCI_DEVICE_ID_EXAR_XR17C154, + PCI_SUBVENDOR_ID_CONNECT_TECH, + PCI_SUBDEVICE_ID_CONNECT_TECH_PCI_UART_4_232, 0, 0, + pbn_b0_4_1843200_200 }, + { PCI_VENDOR_ID_EXAR, PCI_DEVICE_ID_EXAR_XR17C158, + PCI_SUBVENDOR_ID_CONNECT_TECH, + PCI_SUBDEVICE_ID_CONNECT_TECH_PCI_UART_8_232, 0, 0, + pbn_b0_8_1843200_200 }, + { PCI_VENDOR_ID_EXAR, PCI_DEVICE_ID_EXAR_XR17C152, + PCI_SUBVENDOR_ID_CONNECT_TECH, + PCI_SUBDEVICE_ID_CONNECT_TECH_PCI_UART_1_1, 0, 0, + pbn_b0_2_1843200_200 }, + { PCI_VENDOR_ID_EXAR, PCI_DEVICE_ID_EXAR_XR17C154, + PCI_SUBVENDOR_ID_CONNECT_TECH, + PCI_SUBDEVICE_ID_CONNECT_TECH_PCI_UART_2_2, 0, 0, + pbn_b0_4_1843200_200 }, + { PCI_VENDOR_ID_EXAR, PCI_DEVICE_ID_EXAR_XR17C158, + PCI_SUBVENDOR_ID_CONNECT_TECH, + PCI_SUBDEVICE_ID_CONNECT_TECH_PCI_UART_4_4, 0, 0, + pbn_b0_8_1843200_200 }, + { PCI_VENDOR_ID_EXAR, PCI_DEVICE_ID_EXAR_XR17C152, + PCI_SUBVENDOR_ID_CONNECT_TECH, + PCI_SUBDEVICE_ID_CONNECT_TECH_PCI_UART_2, 0, 0, + pbn_b0_2_1843200_200 }, + { PCI_VENDOR_ID_EXAR, PCI_DEVICE_ID_EXAR_XR17C154, + PCI_SUBVENDOR_ID_CONNECT_TECH, + PCI_SUBDEVICE_ID_CONNECT_TECH_PCI_UART_4, 0, 0, + pbn_b0_4_1843200_200 }, + { PCI_VENDOR_ID_EXAR, PCI_DEVICE_ID_EXAR_XR17C158, + PCI_SUBVENDOR_ID_CONNECT_TECH, + PCI_SUBDEVICE_ID_CONNECT_TECH_PCI_UART_8, 0, 0, + pbn_b0_8_1843200_200 }, + { PCI_VENDOR_ID_EXAR, PCI_DEVICE_ID_EXAR_XR17C152, + PCI_SUBVENDOR_ID_CONNECT_TECH, + PCI_SUBDEVICE_ID_CONNECT_TECH_PCI_UART_2_485, 0, 0, + pbn_b0_2_1843200_200 }, + { PCI_VENDOR_ID_EXAR, PCI_DEVICE_ID_EXAR_XR17C154, + PCI_SUBVENDOR_ID_CONNECT_TECH, + PCI_SUBDEVICE_ID_CONNECT_TECH_PCI_UART_4_485, 0, 0, + pbn_b0_4_1843200_200 }, + { PCI_VENDOR_ID_EXAR, PCI_DEVICE_ID_EXAR_XR17C158, + PCI_SUBVENDOR_ID_CONNECT_TECH, + PCI_SUBDEVICE_ID_CONNECT_TECH_PCI_UART_8_485, 0, 0, + pbn_b0_8_1843200_200 }, { PCI_VENDOR_ID_SEALEVEL, PCI_DEVICE_ID_SEALEVEL_U530, PCI_ANY_ID, PCI_ANY_ID, 0, 0, diff --git a/include/linux/pci_ids.h b/include/linux/pci_ids.h index 4db67b3b05cc..efb5d22f8c09 100644 --- a/include/linux/pci_ids.h +++ b/include/linux/pci_ids.h @@ -1567,6 +1567,23 @@ #define PCI_SUBDEVICE_ID_CONNECT_TECH_BH8_485_2_6 0x0009 #define PCI_SUBDEVICE_ID_CONNECT_TECH_BH081101V1 0x000A #define PCI_SUBDEVICE_ID_CONNECT_TECH_BH041101V1 0x000B +#define PCI_SUBDEVICE_ID_CONNECT_TECH_BH2_20MHZ 0x000C +#define PCI_SUBDEVICE_ID_CONNECT_TECH_BH2_PTM 0x000D +#define PCI_SUBDEVICE_ID_CONNECT_TECH_NT960PCI 0x0100 +#define PCI_SUBDEVICE_ID_CONNECT_TECH_TITAN_2 0x0201 +#define PCI_SUBDEVICE_ID_CONNECT_TECH_TITAN_4 0x0202 +#define PCI_SUBDEVICE_ID_CONNECT_TECH_PCI_UART_2_232 0x0300 +#define PCI_SUBDEVICE_ID_CONNECT_TECH_PCI_UART_4_232 0x0301 +#define PCI_SUBDEVICE_ID_CONNECT_TECH_PCI_UART_8_232 0x0302 +#define PCI_SUBDEVICE_ID_CONNECT_TECH_PCI_UART_1_1 0x0310 +#define PCI_SUBDEVICE_ID_CONNECT_TECH_PCI_UART_2_2 0x0311 +#define PCI_SUBDEVICE_ID_CONNECT_TECH_PCI_UART_4_4 0x0312 +#define PCI_SUBDEVICE_ID_CONNECT_TECH_PCI_UART_2 0x0320 +#define PCI_SUBDEVICE_ID_CONNECT_TECH_PCI_UART_4 0x0321 +#define PCI_SUBDEVICE_ID_CONNECT_TECH_PCI_UART_8 0x0322 +#define PCI_SUBDEVICE_ID_CONNECT_TECH_PCI_UART_2_485 0x0330 +#define PCI_SUBDEVICE_ID_CONNECT_TECH_PCI_UART_4_485 0x0331 +#define PCI_SUBDEVICE_ID_CONNECT_TECH_PCI_UART_8_485 0x0332 #define PCI_VENDOR_ID_NVIDIA_SGS 0x12d2 -- cgit v1.2.3-71-gd317 From a00828e9ac62caed7b830d631914d7748817ccd1 Mon Sep 17 00:00:00 2001 From: Pete Zaitcev Date: Sat, 22 Oct 2005 20:15:09 -0700 Subject: [PATCH] USB: drivers/usb/storage/libusual This patch adds a shim driver libusual, which routes devices between usb-storage and ub according to the common table, based on unusual_devs.h. The help and example syntax is in Kconfig. Signed-off-by: Pete Zaitcev Signed-off-by: Greg Kroah-Hartman --- drivers/block/Kconfig | 3 +- drivers/block/ub.c | 23 ++-- drivers/usb/Makefile | 1 + drivers/usb/storage/Kconfig | 14 ++ drivers/usb/storage/Makefile | 4 + drivers/usb/storage/libusual.c | 266 +++++++++++++++++++++++++++++++++++++ drivers/usb/storage/protocol.h | 14 -- drivers/usb/storage/transport.h | 31 ----- drivers/usb/storage/unusual_devs.h | 24 ++++ drivers/usb/storage/usb.c | 119 ++++++----------- drivers/usb/storage/usb.h | 31 +---- include/linux/usb_usual.h | 123 +++++++++++++++++ 12 files changed, 486 insertions(+), 167 deletions(-) create mode 100644 drivers/usb/storage/libusual.c create mode 100644 include/linux/usb_usual.h (limited to 'include/linux') diff --git a/drivers/block/Kconfig b/drivers/block/Kconfig index 7b1cd93892be..c4b9d2adfc08 100644 --- a/drivers/block/Kconfig +++ b/drivers/block/Kconfig @@ -358,7 +358,8 @@ config BLK_DEV_UB This driver supports certain USB attached storage devices such as flash keys. - Warning: Enabling this cripples the usb-storage driver. + If you enable this driver, it is recommended to avoid conflicts + with usb-storage by enabling USB_LIBUSUAL. If unsure, say N. diff --git a/drivers/block/ub.c b/drivers/block/ub.c index bfb23d543ff7..06d741d58a68 100644 --- a/drivers/block/ub.c +++ b/drivers/block/ub.c @@ -29,6 +29,7 @@ #include #include #include +#include #include #include #include @@ -106,16 +107,6 @@ * +--------+ */ -/* - * Definitions which have to be scattered once we understand the layout better. - */ - -/* Transport (despite PR in the name) */ -#define US_PR_BULK 0x50 /* bulk only */ - -/* Protocol */ -#define US_SC_SCSI 0x06 /* Transparent */ - /* * This many LUNs per USB device. * Every one of them takes a host, see UB_MAX_HOSTS. @@ -422,13 +413,18 @@ static int ub_probe_lun(struct ub_dev *sc, int lnum); /* */ +#ifdef CONFIG_USB_LIBUSUAL + +#define ub_usb_ids storage_usb_ids +#else + static struct usb_device_id ub_usb_ids[] = { - // { USB_DEVICE_VER(0x0781, 0x0002, 0x0009, 0x0009) }, /* SDDR-31 */ { USB_INTERFACE_INFO(USB_CLASS_MASS_STORAGE, US_SC_SCSI, US_PR_BULK) }, { } }; MODULE_DEVICE_TABLE(usb, ub_usb_ids); +#endif /* CONFIG_USB_LIBUSUAL */ /* * Find me a way to identify "next free minor" for add_disk(), @@ -2172,6 +2168,9 @@ static int ub_probe(struct usb_interface *intf, int rc; int i; + if (usb_usual_check_type(dev_id, USB_US_TYPE_UB)) + return -ENXIO; + rc = -ENOMEM; if ((sc = kmalloc(sizeof(struct ub_dev), GFP_KERNEL)) == NULL) goto err_core; @@ -2479,6 +2478,7 @@ static int __init ub_init(void) if ((rc = usb_register(&ub_driver)) != 0) goto err_register; + usb_usual_set_present(USB_US_TYPE_UB); return 0; err_register: @@ -2494,6 +2494,7 @@ static void __exit ub_exit(void) devfs_remove(DEVFS_NAME); unregister_blkdev(UB_MAJOR, DRV_NAME); + usb_usual_clear_present(USB_US_TYPE_UB); } module_init(ub_init); diff --git a/drivers/usb/Makefile b/drivers/usb/Makefile index a50c2bc506f2..3639c3f8d357 100644 --- a/drivers/usb/Makefile +++ b/drivers/usb/Makefile @@ -22,6 +22,7 @@ obj-$(CONFIG_USB_MIDI) += class/ obj-$(CONFIG_USB_PRINTER) += class/ obj-$(CONFIG_USB_STORAGE) += storage/ +obj-$(CONFIG_USB) += storage/ obj-$(CONFIG_USB_AIPTEK) += input/ obj-$(CONFIG_USB_ATI_REMOTE) += input/ diff --git a/drivers/usb/storage/Kconfig b/drivers/usb/storage/Kconfig index c41d64dbb0f0..bdfcb95d9c12 100644 --- a/drivers/usb/storage/Kconfig +++ b/drivers/usb/storage/Kconfig @@ -124,3 +124,17 @@ config USB_STORAGE_ONETOUCH hard drive's as an input device. An action can be associated with this input in any keybinding software. (e.g. gnome's keyboard short- cuts) + +config USB_LIBUSUAL + bool "The shared table of common (or usual) storage devices" + depends on USB + help + This module contains a table of common (or usual) devices + for usb-storage and ub drivers, and allows to switch binding + of these devices without rebuilding modules. + + Typical syntax of /etc/modprobe.conf is: + + options libusual bias="ub" + + If unsure, say N. diff --git a/drivers/usb/storage/Makefile b/drivers/usb/storage/Makefile index 44ab8f9978fe..2d416e9028bb 100644 --- a/drivers/usb/storage/Makefile +++ b/drivers/usb/storage/Makefile @@ -22,3 +22,7 @@ usb-storage-obj-$(CONFIG_USB_STORAGE_ONETOUCH) += onetouch.o usb-storage-objs := scsiglue.o protocol.o transport.o usb.o \ initializers.o $(usb-storage-obj-y) + +ifneq ($(CONFIG_USB_LIBUSUAL),) + obj-$(CONFIG_USB) += libusual.o +endif diff --git a/drivers/usb/storage/libusual.c b/drivers/usb/storage/libusual.c new file mode 100644 index 000000000000..61f73d8a2c0f --- /dev/null +++ b/drivers/usb/storage/libusual.c @@ -0,0 +1,266 @@ +/* + * libusual + * + * The libusual contains the table of devices common for ub and usb-storage. + */ +#include +#include +#include +#include +#include + +/* + */ +#define USU_MOD_FL_THREAD 1 /* Thread is running */ +#define USU_MOD_FL_PRESENT 2 /* The module is loaded */ + +struct mod_status { + unsigned long fls; +}; + +static struct mod_status stat[3]; +static DEFINE_SPINLOCK(usu_lock); + +/* + */ +#define USB_US_DEFAULT_BIAS USB_US_TYPE_STOR + +#define BIAS_NAME_SIZE (sizeof("usb-storage")) +static char bias[BIAS_NAME_SIZE]; +static int usb_usual_bias; +static const char *bias_names[3] = { "none", "usb-storage", "ub" }; + +static DECLARE_MUTEX_LOCKED(usu_init_notify); +static DECLARE_COMPLETION(usu_end_notify); +static atomic_t total_threads = ATOMIC_INIT(0); + +static int usu_probe_thread(void *arg); +static int parse_bias(const char *bias_s); + +/* + * The table. + */ +#define UNUSUAL_DEV(id_vendor, id_product, bcdDeviceMin, bcdDeviceMax, \ + vendorName, productName,useProtocol, useTransport, \ + initFunction, flags) \ +{ USB_DEVICE_VER(id_vendor, id_product, bcdDeviceMin,bcdDeviceMax), \ + .driver_info = (flags)|(USB_US_TYPE_STOR<<24) } + +#define USUAL_DEV(useProto, useTrans, useType) \ +{ USB_INTERFACE_INFO(USB_CLASS_MASS_STORAGE, useProto, useTrans), \ + .driver_info = ((useType)<<24) } + +struct usb_device_id storage_usb_ids [] = { +# include "unusual_devs.h" + { } /* Terminating entry */ +}; + +#undef USUAL_DEV +#undef UNUSUAL_DEV + +MODULE_DEVICE_TABLE(usb, storage_usb_ids); +EXPORT_SYMBOL_GPL(storage_usb_ids); + +/* + * @type: the module type as an integer + */ +void usb_usual_set_present(int type) +{ + struct mod_status *st; + unsigned long flags; + + if (type <= 0 || type >= 3) + return; + st = &stat[type]; + spin_lock_irqsave(&usu_lock, flags); + st->fls |= USU_MOD_FL_PRESENT; + spin_unlock_irqrestore(&usu_lock, flags); +} +EXPORT_SYMBOL_GPL(usb_usual_set_present); + +void usb_usual_clear_present(int type) +{ + struct mod_status *st; + unsigned long flags; + + if (type <= 0 || type >= 3) + return; + st = &stat[type]; + spin_lock_irqsave(&usu_lock, flags); + st->fls &= ~USU_MOD_FL_PRESENT; + spin_unlock_irqrestore(&usu_lock, flags); +} +EXPORT_SYMBOL_GPL(usb_usual_clear_present); + +/* + * Match the calling driver type against the table. + * Returns: 0 if the device matches. + */ +int usb_usual_check_type(const struct usb_device_id *id, int caller_type) +{ + int id_type = USB_US_TYPE(id->driver_info); + + if (caller_type <= 0 || caller_type >= 3) + return -EINVAL; + + /* Drivers grab fixed assignment devices */ + if (id_type == caller_type) + return 0; + /* Drivers grab devices biased to them */ + if (id_type == USB_US_TYPE_NONE && caller_type == usb_usual_bias) + return 0; + return -ENODEV; +} +EXPORT_SYMBOL_GPL(usb_usual_check_type); + +/* + */ +static int usu_probe(struct usb_interface *intf, + const struct usb_device_id *id) +{ + int type; + int rc; + unsigned long flags; + + type = USB_US_TYPE(id->driver_info); + if (type == 0) + type = usb_usual_bias; + + spin_lock_irqsave(&usu_lock, flags); + if ((stat[type].fls & (USU_MOD_FL_THREAD|USU_MOD_FL_PRESENT)) != 0) { + spin_unlock_irqrestore(&usu_lock, flags); + return -ENXIO; + } + stat[type].fls |= USU_MOD_FL_THREAD; + spin_unlock_irqrestore(&usu_lock, flags); + + rc = kernel_thread(usu_probe_thread, (void*)type, CLONE_VM); + if (rc < 0) { + printk(KERN_WARNING "libusual: " + "Unable to start the thread for %s: %d\n", + bias_names[type], rc); + spin_lock_irqsave(&usu_lock, flags); + stat[type].fls &= ~USU_MOD_FL_THREAD; + spin_unlock_irqrestore(&usu_lock, flags); + return rc; /* Not being -ENXIO causes a message printed */ + } + atomic_inc(&total_threads); + + return -ENXIO; +} + +static void usu_disconnect(struct usb_interface *intf) +{ + ; /* We should not be here. */ +} + +static struct usb_driver usu_driver = { + .owner = THIS_MODULE, + .name = "libusual", + .probe = usu_probe, + .disconnect = usu_disconnect, + .id_table = storage_usb_ids, +}; + +/* + * A whole new thread for a purpose of request_module seems quite stupid. + * The request_module forks once inside again. However, if we attempt + * to load a storage module from our own modprobe thread, that module + * references our symbols, which cannot be resolved until our module is + * initialized. I wish there was a way to wait for the end of initialization. + * The module notifier reports MODULE_STATE_COMING only. + * So, we wait until module->init ends as the next best thing. + */ +static int usu_probe_thread(void *arg) +{ + int type = (unsigned long) arg; + struct mod_status *st = &stat[type]; + int rc; + unsigned long flags; + + daemonize("libusual_%d", type); /* "usb-storage" is kinda too long */ + + /* A completion does not work here because it's counted. */ + down(&usu_init_notify); + up(&usu_init_notify); + + rc = request_module(bias_names[type]); + spin_lock_irqsave(&usu_lock, flags); + if (rc == 0 && (st->fls & USU_MOD_FL_PRESENT) == 0) { + /* + * This should not happen, but let us keep tabs on it. + */ + printk(KERN_NOTICE "libusual: " + "modprobe for %s succeeded, but module is not present\n", + bias_names[type]); + } + st->fls &= ~USU_MOD_FL_THREAD; + spin_unlock_irqrestore(&usu_lock, flags); + + complete_and_exit(&usu_end_notify, 0); +} + +/* + */ +static int __init usb_usual_init(void) +{ + int rc; + + bias[BIAS_NAME_SIZE-1] = 0; + usb_usual_bias = parse_bias(bias); + + rc = usb_register(&usu_driver); + up(&usu_init_notify); + return rc; +} + +static void __exit usb_usual_exit(void) +{ + /* + * We do not check for any drivers present, because + * they keep us pinned with symbol references. + */ + + usb_deregister(&usu_driver); + + while (atomic_read(&total_threads) > 0) { + wait_for_completion(&usu_end_notify); + atomic_dec(&total_threads); + } +} + +/* + * Validate and accept the bias parameter. + * Maybe make an sysfs method later. XXX + */ +static int parse_bias(const char *bias_s) +{ + int i; + int bias_n = 0; + + if (bias_s[0] == 0 || bias_s[0] == ' ') { + bias_n = USB_US_DEFAULT_BIAS; + } else { + for (i = 1; i < 3; i++) { + if (strcmp(bias_s, bias_names[i]) == 0) { + bias_n = i; + break; + } + } + if (bias_n == 0) { + bias_n = USB_US_DEFAULT_BIAS; + printk(KERN_INFO + "libusual: unknown bias \"%s\", using \"%s\"\n", + bias_s, bias_names[bias_n]); + } + } + return bias_n; +} + +module_init(usb_usual_init); +module_exit(usb_usual_exit); + +module_param_string(bias, bias, BIAS_NAME_SIZE, S_IRUGO|S_IWUSR); +MODULE_PARM_DESC(bias, "Bias to usb-storage or ub"); + +MODULE_LICENSE("GPL"); diff --git a/drivers/usb/storage/protocol.h b/drivers/usb/storage/protocol.h index 02bff01ab09c..845bed4b8031 100644 --- a/drivers/usb/storage/protocol.h +++ b/drivers/usb/storage/protocol.h @@ -41,20 +41,6 @@ #ifndef _PROTOCOL_H_ #define _PROTOCOL_H_ -/* Sub Classes */ - -#define US_SC_RBC 0x01 /* Typically, flash devices */ -#define US_SC_8020 0x02 /* CD-ROM */ -#define US_SC_QIC 0x03 /* QIC-157 Tapes */ -#define US_SC_UFI 0x04 /* Floppy */ -#define US_SC_8070 0x05 /* Removable media */ -#define US_SC_SCSI 0x06 /* Transparent */ -#define US_SC_ISD200 0x07 /* ISD200 ATA */ -#define US_SC_MIN US_SC_RBC -#define US_SC_MAX US_SC_ISD200 - -#define US_SC_DEVICE 0xff /* Use device's value */ - /* Protocol handling routines */ extern void usb_stor_ATAPI_command(struct scsi_cmnd*, struct us_data*); extern void usb_stor_qic157_command(struct scsi_cmnd*, struct us_data*); diff --git a/drivers/usb/storage/transport.h b/drivers/usb/storage/transport.h index 0a362cc781ad..633a715850a4 100644 --- a/drivers/usb/storage/transport.h +++ b/drivers/usb/storage/transport.h @@ -41,39 +41,8 @@ #ifndef _TRANSPORT_H_ #define _TRANSPORT_H_ -#include #include -/* Protocols */ - -#define US_PR_CBI 0x00 /* Control/Bulk/Interrupt */ -#define US_PR_CB 0x01 /* Control/Bulk w/o interrupt */ -#define US_PR_BULK 0x50 /* bulk only */ -#ifdef CONFIG_USB_STORAGE_USBAT -#define US_PR_USBAT 0x80 /* SCM-ATAPI bridge */ -#endif -#ifdef CONFIG_USB_STORAGE_SDDR09 -#define US_PR_EUSB_SDDR09 0x81 /* SCM-SCSI bridge for SDDR-09 */ -#endif -#ifdef CONFIG_USB_STORAGE_SDDR55 -#define US_PR_SDDR55 0x82 /* SDDR-55 (made up) */ -#endif -#define US_PR_DPCM_USB 0xf0 /* Combination CB/SDDR09 */ - -#ifdef CONFIG_USB_STORAGE_FREECOM -#define US_PR_FREECOM 0xf1 /* Freecom */ -#endif - -#ifdef CONFIG_USB_STORAGE_DATAFAB -#define US_PR_DATAFAB 0xf2 /* Datafab chipsets */ -#endif - -#ifdef CONFIG_USB_STORAGE_JUMPSHOT -#define US_PR_JUMPSHOT 0xf3 /* Lexar Jumpshot */ -#endif - -#define US_PR_DEVICE 0xff /* Use device's value */ - /* * Bulk only data structures */ diff --git a/drivers/usb/storage/unusual_devs.h b/drivers/usb/storage/unusual_devs.h index f5f47a34b168..76904ad11241 100644 --- a/drivers/usb/storage/unusual_devs.h +++ b/drivers/usb/storage/unusual_devs.h @@ -1134,3 +1134,27 @@ UNUSUAL_DEV( 0x55aa, 0xa103, 0x0000, 0x9999, US_SC_SCSI, US_PR_SDDR55, NULL, US_FL_SINGLE_LUN), #endif + +/* Control/Bulk transport for all SubClass values */ +USUAL_DEV(US_SC_RBC, US_PR_CB, USB_US_TYPE_STOR), +USUAL_DEV(US_SC_8020, US_PR_CB, USB_US_TYPE_STOR), +USUAL_DEV(US_SC_QIC, US_PR_CB, USB_US_TYPE_STOR), +USUAL_DEV(US_SC_UFI, US_PR_CB, USB_US_TYPE_STOR), +USUAL_DEV(US_SC_8070, US_PR_CB, USB_US_TYPE_STOR), +USUAL_DEV(US_SC_SCSI, US_PR_CB, USB_US_TYPE_STOR), + +/* Control/Bulk/Interrupt transport for all SubClass values */ +USUAL_DEV(US_SC_RBC, US_PR_CBI, USB_US_TYPE_STOR), +USUAL_DEV(US_SC_8020, US_PR_CBI, USB_US_TYPE_STOR), +USUAL_DEV(US_SC_QIC, US_PR_CBI, USB_US_TYPE_STOR), +USUAL_DEV(US_SC_UFI, US_PR_CBI, USB_US_TYPE_STOR), +USUAL_DEV(US_SC_8070, US_PR_CBI, USB_US_TYPE_STOR), +USUAL_DEV(US_SC_SCSI, US_PR_CBI, USB_US_TYPE_STOR), + +/* Bulk-only transport for all SubClass values */ +USUAL_DEV(US_SC_RBC, US_PR_BULK, USB_US_TYPE_STOR), +USUAL_DEV(US_SC_8020, US_PR_BULK, USB_US_TYPE_STOR), +USUAL_DEV(US_SC_QIC, US_PR_BULK, USB_US_TYPE_STOR), +USUAL_DEV(US_SC_UFI, US_PR_BULK, USB_US_TYPE_STOR), +USUAL_DEV(US_SC_8070, US_PR_BULK, USB_US_TYPE_STOR), +USUAL_DEV(US_SC_SCSI, US_PR_BULK, 0), diff --git a/drivers/usb/storage/usb.c b/drivers/usb/storage/usb.c index 3847ebed2aa4..c8375aa62723 100644 --- a/drivers/usb/storage/usb.c +++ b/drivers/usb/storage/usb.c @@ -112,49 +112,33 @@ static atomic_t total_threads = ATOMIC_INIT(0); static DECLARE_COMPLETION(threads_gone); -/* The entries in this table, except for final ones here - * (USB_MASS_STORAGE_CLASS and the empty entry), correspond, - * line for line with the entries of us_unsuaul_dev_list[]. +/* + * The entries in this table correspond, line for line, + * with the entries of us_unusual_dev_list[]. */ +#ifndef CONFIG_USB_LIBUSUAL #define UNUSUAL_DEV(id_vendor, id_product, bcdDeviceMin, bcdDeviceMax, \ vendorName, productName,useProtocol, useTransport, \ initFunction, flags) \ -{ USB_DEVICE_VER(id_vendor, id_product, bcdDeviceMin,bcdDeviceMax) } +{ USB_DEVICE_VER(id_vendor, id_product, bcdDeviceMin,bcdDeviceMax), \ + .driver_info = (flags)|(USB_US_TYPE_STOR<<24) } + +#define USUAL_DEV(useProto, useTrans, useType) \ +{ USB_INTERFACE_INFO(USB_CLASS_MASS_STORAGE, useProto, useTrans), \ + .driver_info = (USB_US_TYPE_STOR<<24) } static struct usb_device_id storage_usb_ids [] = { # include "unusual_devs.h" #undef UNUSUAL_DEV - /* Control/Bulk transport for all SubClass values */ - { USB_INTERFACE_INFO(USB_CLASS_MASS_STORAGE, US_SC_RBC, US_PR_CB) }, - { USB_INTERFACE_INFO(USB_CLASS_MASS_STORAGE, US_SC_8020, US_PR_CB) }, - { USB_INTERFACE_INFO(USB_CLASS_MASS_STORAGE, US_SC_QIC, US_PR_CB) }, - { USB_INTERFACE_INFO(USB_CLASS_MASS_STORAGE, US_SC_UFI, US_PR_CB) }, - { USB_INTERFACE_INFO(USB_CLASS_MASS_STORAGE, US_SC_8070, US_PR_CB) }, - { USB_INTERFACE_INFO(USB_CLASS_MASS_STORAGE, US_SC_SCSI, US_PR_CB) }, - - /* Control/Bulk/Interrupt transport for all SubClass values */ - { USB_INTERFACE_INFO(USB_CLASS_MASS_STORAGE, US_SC_RBC, US_PR_CBI) }, - { USB_INTERFACE_INFO(USB_CLASS_MASS_STORAGE, US_SC_8020, US_PR_CBI) }, - { USB_INTERFACE_INFO(USB_CLASS_MASS_STORAGE, US_SC_QIC, US_PR_CBI) }, - { USB_INTERFACE_INFO(USB_CLASS_MASS_STORAGE, US_SC_UFI, US_PR_CBI) }, - { USB_INTERFACE_INFO(USB_CLASS_MASS_STORAGE, US_SC_8070, US_PR_CBI) }, - { USB_INTERFACE_INFO(USB_CLASS_MASS_STORAGE, US_SC_SCSI, US_PR_CBI) }, - - /* Bulk-only transport for all SubClass values */ - { USB_INTERFACE_INFO(USB_CLASS_MASS_STORAGE, US_SC_RBC, US_PR_BULK) }, - { USB_INTERFACE_INFO(USB_CLASS_MASS_STORAGE, US_SC_8020, US_PR_BULK) }, - { USB_INTERFACE_INFO(USB_CLASS_MASS_STORAGE, US_SC_QIC, US_PR_BULK) }, - { USB_INTERFACE_INFO(USB_CLASS_MASS_STORAGE, US_SC_UFI, US_PR_BULK) }, - { USB_INTERFACE_INFO(USB_CLASS_MASS_STORAGE, US_SC_8070, US_PR_BULK) }, - { USB_INTERFACE_INFO(USB_CLASS_MASS_STORAGE, US_SC_SCSI, US_PR_BULK) }, - +#undef USUAL_DEV /* Terminating entry */ { } }; MODULE_DEVICE_TABLE (usb, storage_usb_ids); +#endif /* CONFIG_USB_LIBUSUAL */ /* This is the list of devices we recognize, along with their flag data */ @@ -167,7 +151,6 @@ MODULE_DEVICE_TABLE (usb, storage_usb_ids); * are free to use as many characters as you like. */ -#undef UNUSUAL_DEV #define UNUSUAL_DEV(idVendor, idProduct, bcdDeviceMin, bcdDeviceMax, \ vendor_name, product_name, use_protocol, use_transport, \ init_function, Flags) \ @@ -177,53 +160,18 @@ MODULE_DEVICE_TABLE (usb, storage_usb_ids); .useProtocol = use_protocol, \ .useTransport = use_transport, \ .initFunction = init_function, \ - .flags = Flags, \ +} + +#define USUAL_DEV(use_protocol, use_transport, use_type) \ +{ \ + .useProtocol = use_protocol, \ + .useTransport = use_transport, \ } static struct us_unusual_dev us_unusual_dev_list[] = { # include "unusual_devs.h" # undef UNUSUAL_DEV - /* Control/Bulk transport for all SubClass values */ - { .useProtocol = US_SC_RBC, - .useTransport = US_PR_CB}, - { .useProtocol = US_SC_8020, - .useTransport = US_PR_CB}, - { .useProtocol = US_SC_QIC, - .useTransport = US_PR_CB}, - { .useProtocol = US_SC_UFI, - .useTransport = US_PR_CB}, - { .useProtocol = US_SC_8070, - .useTransport = US_PR_CB}, - { .useProtocol = US_SC_SCSI, - .useTransport = US_PR_CB}, - - /* Control/Bulk/Interrupt transport for all SubClass values */ - { .useProtocol = US_SC_RBC, - .useTransport = US_PR_CBI}, - { .useProtocol = US_SC_8020, - .useTransport = US_PR_CBI}, - { .useProtocol = US_SC_QIC, - .useTransport = US_PR_CBI}, - { .useProtocol = US_SC_UFI, - .useTransport = US_PR_CBI}, - { .useProtocol = US_SC_8070, - .useTransport = US_PR_CBI}, - { .useProtocol = US_SC_SCSI, - .useTransport = US_PR_CBI}, - - /* Bulk-only transport for all SubClass values */ - { .useProtocol = US_SC_RBC, - .useTransport = US_PR_BULK}, - { .useProtocol = US_SC_8020, - .useTransport = US_PR_BULK}, - { .useProtocol = US_SC_QIC, - .useTransport = US_PR_BULK}, - { .useProtocol = US_SC_UFI, - .useTransport = US_PR_BULK}, - { .useProtocol = US_SC_8070, - .useTransport = US_PR_BULK}, - { .useProtocol = US_SC_SCSI, - .useTransport = US_PR_BULK}, +# undef USUAL_DEV /* Terminating entry */ { NULL } @@ -484,14 +432,20 @@ static int associate_dev(struct us_data *us, struct usb_interface *intf) return 0; } +/* Find an unusual_dev descriptor (always succeeds in the current code) */ +static struct us_unusual_dev *find_unusual(const struct usb_device_id *id) +{ + const int id_index = id - storage_usb_ids; + return &us_unusual_dev_list[id_index]; +} + /* Get the unusual_devs entries and the string descriptors */ -static void get_device_info(struct us_data *us, int id_index) +static void get_device_info(struct us_data *us, const struct usb_device_id *id) { struct usb_device *dev = us->pusb_dev; struct usb_interface_descriptor *idesc = &us->pusb_intf->cur_altsetting->desc; - struct us_unusual_dev *unusual_dev = &us_unusual_dev_list[id_index]; - struct usb_device_id *id = &storage_usb_ids[id_index]; + struct us_unusual_dev *unusual_dev = find_unusual(id); /* Store the entries */ us->unusual_dev = unusual_dev; @@ -501,7 +455,7 @@ static void get_device_info(struct us_data *us, int id_index) us->protocol = (unusual_dev->useTransport == US_PR_DEVICE) ? idesc->bInterfaceProtocol : unusual_dev->useTransport; - us->flags = unusual_dev->flags; + us->flags = USB_US_ORIG_FLAGS(id->driver_info); /* * This flag is only needed when we're in high-speed, so let's @@ -529,7 +483,7 @@ static void get_device_info(struct us_data *us, int id_index) if (unusual_dev->useTransport != US_PR_DEVICE && us->protocol == idesc->bInterfaceProtocol) msg += 2; - if (msg >= 0 && !(unusual_dev->flags & US_FL_NEED_OVERRIDE)) + if (msg >= 0 && !(us->flags & US_FL_NEED_OVERRIDE)) printk(KERN_NOTICE USB_STORAGE "This device " "(%04x,%04x,%04x S %02x P %02x)" " has %s in unusual_devs.h\n" @@ -921,10 +875,12 @@ static int storage_probe(struct usb_interface *intf, { struct Scsi_Host *host; struct us_data *us; - const int id_index = id - storage_usb_ids; int result; struct task_struct *th; + if (usb_usual_check_type(id, USB_US_TYPE_STOR)) + return -ENXIO; + US_DEBUGP("USB Mass Storage device detected\n"); /* @@ -957,7 +913,7 @@ static int storage_probe(struct usb_interface *intf, * of the match from the usb_device_id table, so we can find the * corresponding entry in the private table. */ - get_device_info(us, id_index); + get_device_info(us, id); #ifdef CONFIG_USB_STORAGE_SDDR09 if (us->protocol == US_PR_EUSB_SDDR09 || @@ -1062,9 +1018,10 @@ static int __init usb_stor_init(void) /* register the driver, return usb_register return code if error */ retval = usb_register(&usb_storage_driver); - if (retval == 0) + if (retval == 0) { printk(KERN_INFO "USB Mass Storage support registered.\n"); - + usb_usual_set_present(USB_US_TYPE_STOR); + } return retval; } @@ -1088,6 +1045,8 @@ static void __exit usb_stor_exit(void) wait_for_completion(&threads_gone); atomic_dec(&total_threads); } + + usb_usual_clear_present(USB_US_TYPE_STOR); } module_init(usb_stor_init); diff --git a/drivers/usb/storage/usb.h b/drivers/usb/storage/usb.h index 98b09711a739..0cd1eebc4497 100644 --- a/drivers/usb/storage/usb.h +++ b/drivers/usb/storage/usb.h @@ -45,6 +45,7 @@ #define _USB_H_ #include +#include #include #include #include @@ -63,38 +64,8 @@ struct us_unusual_dev { __u8 useProtocol; __u8 useTransport; int (*initFunction)(struct us_data *); - unsigned int flags; }; -/* - * Static flag definitions. We use this roundabout technique so that the - * proc_info() routine can automatically display a message for each flag. - */ -#define US_DO_ALL_FLAGS \ - US_FLAG(SINGLE_LUN, 0x00000001) \ - /* allow access to only LUN 0 */ \ - US_FLAG(NEED_OVERRIDE, 0x00000002) \ - /* unusual_devs entry is necessary */ \ - US_FLAG(SCM_MULT_TARG, 0x00000004) \ - /* supports multiple targets */ \ - US_FLAG(FIX_INQUIRY, 0x00000008) \ - /* INQUIRY response needs faking */ \ - US_FLAG(FIX_CAPACITY, 0x00000010) \ - /* READ CAPACITY response too big */ \ - US_FLAG(IGNORE_RESIDUE, 0x00000020) \ - /* reported residue is wrong */ \ - US_FLAG(BULK32, 0x00000040) \ - /* Uses 32-byte CBW length */ \ - US_FLAG(NOT_LOCKABLE, 0x00000080) \ - /* PREVENT/ALLOW not supported */ \ - US_FLAG(GO_SLOW, 0x00000100) \ - /* Need delay after Command phase */ \ - US_FLAG(NO_WP_DETECT, 0x00000200) \ - /* Don't check for write-protect */ \ - -#define US_FLAG(name, value) US_FL_##name = value , -enum { US_DO_ALL_FLAGS }; -#undef US_FLAG /* Dynamic flag definitions: used in set_bit() etc. */ #define US_FLIDX_URB_ACTIVE 18 /* 0x00040000 current_urb is in use */ diff --git a/include/linux/usb_usual.h b/include/linux/usb_usual.h new file mode 100644 index 000000000000..f9c058f33712 --- /dev/null +++ b/include/linux/usb_usual.h @@ -0,0 +1,123 @@ +/* + * Interface to the libusual. + * + * Copyright (c) 2005 Pete Zaitcev + * Copyright (c) 1999-2002 Matthew Dharm (mdharm-usb@one-eyed-alien.net) + * Copyright (c) 1999 Michael Gee (michael@linuxspecific.com) + */ + +#ifndef __LINUX_USB_USUAL_H +#define __LINUX_USB_USUAL_H + +#include + +/* We should do this for cleanliness... But other usb_foo.h do not do this. */ +/* #include */ + +/* + * The flags field, which we store in usb_device_id.driver_info. + * It is compatible with the old usb-storage flags in lower 24 bits. + */ + +/* + * Static flag definitions. We use this roundabout technique so that the + * proc_info() routine can automatically display a message for each flag. + */ +#define US_DO_ALL_FLAGS \ + US_FLAG(SINGLE_LUN, 0x00000001) \ + /* allow access to only LUN 0 */ \ + US_FLAG(NEED_OVERRIDE, 0x00000002) \ + /* unusual_devs entry is necessary */ \ + US_FLAG(SCM_MULT_TARG, 0x00000004) \ + /* supports multiple targets */ \ + US_FLAG(FIX_INQUIRY, 0x00000008) \ + /* INQUIRY response needs faking */ \ + US_FLAG(FIX_CAPACITY, 0x00000010) \ + /* READ CAPACITY response too big */ \ + US_FLAG(IGNORE_RESIDUE, 0x00000020) \ + /* reported residue is wrong */ \ + US_FLAG(BULK32, 0x00000040) \ + /* Uses 32-byte CBW length */ \ + US_FLAG(NOT_LOCKABLE, 0x00000080) \ + /* PREVENT/ALLOW not supported */ \ + US_FLAG(GO_SLOW, 0x00000100) \ + /* Need delay after Command phase */ \ + US_FLAG(NO_WP_DETECT, 0x00000200) \ + /* Don't check for write-protect */ \ + +#define US_FLAG(name, value) US_FL_##name = value , +enum { US_DO_ALL_FLAGS }; +#undef US_FLAG + +/* + * The bias field for libusual and friends. + */ +#define USB_US_TYPE_NONE 0 +#define USB_US_TYPE_STOR 1 /* usb-storage */ +#define USB_US_TYPE_UB 2 /* ub */ + +#define USB_US_TYPE(flags) (((flags) >> 24) & 0xFF) +#define USB_US_ORIG_FLAGS(flags) ((flags) & 0x00FFFFFF) + +/* + * This is probably not the best place to keep these constants, conceptually. + * But it's the only header included into all places which need them. + */ + +/* Sub Classes */ + +#define US_SC_RBC 0x01 /* Typically, flash devices */ +#define US_SC_8020 0x02 /* CD-ROM */ +#define US_SC_QIC 0x03 /* QIC-157 Tapes */ +#define US_SC_UFI 0x04 /* Floppy */ +#define US_SC_8070 0x05 /* Removable media */ +#define US_SC_SCSI 0x06 /* Transparent */ +#define US_SC_ISD200 0x07 /* ISD200 ATA */ +#define US_SC_MIN US_SC_RBC +#define US_SC_MAX US_SC_ISD200 + +#define US_SC_DEVICE 0xff /* Use device's value */ + +/* Protocols */ + +#define US_PR_CBI 0x00 /* Control/Bulk/Interrupt */ +#define US_PR_CB 0x01 /* Control/Bulk w/o interrupt */ +#define US_PR_BULK 0x50 /* bulk only */ +#ifdef CONFIG_USB_STORAGE_USBAT +#define US_PR_USBAT 0x80 /* SCM-ATAPI bridge */ +#endif +#ifdef CONFIG_USB_STORAGE_SDDR09 +#define US_PR_EUSB_SDDR09 0x81 /* SCM-SCSI bridge for SDDR-09 */ +#endif +#ifdef CONFIG_USB_STORAGE_SDDR55 +#define US_PR_SDDR55 0x82 /* SDDR-55 (made up) */ +#endif +#define US_PR_DPCM_USB 0xf0 /* Combination CB/SDDR09 */ +#ifdef CONFIG_USB_STORAGE_FREECOM +#define US_PR_FREECOM 0xf1 /* Freecom */ +#endif +#ifdef CONFIG_USB_STORAGE_DATAFAB +#define US_PR_DATAFAB 0xf2 /* Datafab chipsets */ +#endif +#ifdef CONFIG_USB_STORAGE_JUMPSHOT +#define US_PR_JUMPSHOT 0xf3 /* Lexar Jumpshot */ +#endif + +#define US_PR_DEVICE 0xff /* Use device's value */ + +/* + */ +#ifdef CONFIG_USB_LIBUSUAL + +extern struct usb_device_id storage_usb_ids[]; +extern void usb_usual_set_present(int type); +extern void usb_usual_clear_present(int type); +extern int usb_usual_check_type(const struct usb_device_id *, int type); +#else + +#define usb_usual_set_present(t) do { } while(0) +#define usb_usual_clear_present(t) do { } while(0) +#define usb_usual_check_type(id, t) (0) +#endif /* CONFIG_USB_LIBUSUAL */ + +#endif /* __LINUX_USB_USUAL_H */ -- cgit v1.2.3-71-gd317 From 733260ff9c45bd4db60f45d17e8560a4a68dff4d Mon Sep 17 00:00:00 2001 From: Greg Kroah-Hartman Date: Wed, 16 Nov 2005 13:41:28 -0800 Subject: [PATCH] USB: add dynamic id functionality to USB core Echo the usb vendor and product id to the "new_id" file in the driver's sysfs directory, and then that driver will be able to bind to a device with those ids if it is present. Example: echo 0557 2008 > /sys/bus/usb/drivers/foo_driver/new_id adds the hex values 0557 and 2008 to the device id table for the foo_driver. Note, usb-serial drivers do not currently work with this capability yet. usb-storage also might have some oddities. Signed-off-by: Greg Kroah-Hartman --- drivers/usb/core/driver.c | 218 +++++++++++++++++++++++++++++++++++----------- include/linux/usb.h | 8 ++ 2 files changed, 176 insertions(+), 50 deletions(-) (limited to 'include/linux') diff --git a/drivers/usb/core/driver.c b/drivers/usb/core/driver.c index 921a21be651d..1c0611045379 100644 --- a/drivers/usb/core/driver.c +++ b/drivers/usb/core/driver.c @@ -27,6 +27,15 @@ #include "hcd.h" #include "usb.h" +static int usb_match_one_id(struct usb_interface *interface, + const struct usb_device_id *id); + +struct usb_dynid { + struct list_head node; + struct usb_device_id id; +}; + + static int generic_probe(struct device *dev) { return 0; @@ -58,6 +67,96 @@ struct device_driver usb_generic_driver = { * usb device or a usb interface. */ int usb_generic_driver_data; +#ifdef CONFIG_HOTPLUG + +/* + * Adds a new dynamic USBdevice ID to this driver, + * and cause the driver to probe for all devices again. + */ +static ssize_t store_new_id(struct device_driver *driver, + const char *buf, size_t count) +{ + struct usb_driver *usb_drv = to_usb_driver(driver); + struct usb_dynid *dynid; + u32 idVendor = 0; + u32 idProduct = 0; + int fields = 0; + + fields = sscanf(buf, "%x %x", &idVendor, &idProduct); + if (fields < 2) + return -EINVAL; + + dynid = kzalloc(sizeof(*dynid), GFP_KERNEL); + if (!dynid) + return -ENOMEM; + + INIT_LIST_HEAD(&dynid->node); + dynid->id.idVendor = idVendor; + dynid->id.idProduct = idProduct; + dynid->id.match_flags = USB_DEVICE_ID_MATCH_DEVICE; + + spin_lock(&usb_drv->dynids.lock); + list_add_tail(&usb_drv->dynids.list, &dynid->node); + spin_unlock(&usb_drv->dynids.lock); + + if (get_driver(driver)) { + driver_attach(driver); + put_driver(driver); + } + + return count; +} +static DRIVER_ATTR(new_id, S_IWUSR, NULL, store_new_id); + +static int usb_create_newid_file(struct usb_driver *usb_drv) +{ + int error = 0; + + if (usb_drv->probe != NULL) + error = sysfs_create_file(&usb_drv->driver.kobj, + &driver_attr_new_id.attr); + return error; +} + +static void usb_free_dynids(struct usb_driver *usb_drv) +{ + struct usb_dynid *dynid, *n; + + spin_lock(&usb_drv->dynids.lock); + list_for_each_entry_safe(dynid, n, &usb_drv->dynids.list, node) { + list_del(&dynid->node); + kfree(dynid); + } + spin_unlock(&usb_drv->dynids.lock); +} +#else +static inline int usb_create_newid_file(struct usb_driver *usb_drv) +{ + return 0; +} + +static inline void usb_free_dynids(struct usb_driver *usb_drv) +{ +} +#endif + +static const struct usb_device_id *usb_match_dynamic_id(struct usb_interface *intf, + struct usb_driver *drv) +{ + struct usb_dynid *dynid; + + spin_lock(&drv->dynids.lock); + list_for_each_entry(dynid, &drv->dynids.list, node) { + if (usb_match_one_id(intf, &dynid->id)) { + spin_unlock(&drv->dynids.lock); + return &dynid->id; + } + } + spin_unlock(&drv->dynids.lock); + return NULL; +} + + /* called from driver core with usb_bus_type.subsys writelock */ static int usb_probe_interface(struct device *dev) { @@ -75,6 +174,8 @@ static int usb_probe_interface(struct device *dev) return -EHOSTUNREACH; id = usb_match_id(intf, driver->id_table); + if (!id) + id = usb_match_dynamic_id(intf, driver); if (id) { dev_dbg(dev, "%s - got id\n", __FUNCTION__); @@ -120,6 +221,64 @@ static int usb_unbind_interface(struct device *dev) return 0; } +/* returns 0 if no match, 1 if match */ +static int usb_match_one_id(struct usb_interface *interface, + const struct usb_device_id *id) +{ + struct usb_host_interface *intf; + struct usb_device *dev; + + /* proc_connectinfo in devio.c may call us with id == NULL. */ + if (id == NULL) + return 0; + + intf = interface->cur_altsetting; + dev = interface_to_usbdev(interface); + + if ((id->match_flags & USB_DEVICE_ID_MATCH_VENDOR) && + id->idVendor != le16_to_cpu(dev->descriptor.idVendor)) + return 0; + + if ((id->match_flags & USB_DEVICE_ID_MATCH_PRODUCT) && + id->idProduct != le16_to_cpu(dev->descriptor.idProduct)) + return 0; + + /* No need to test id->bcdDevice_lo != 0, since 0 is never + greater than any unsigned number. */ + if ((id->match_flags & USB_DEVICE_ID_MATCH_DEV_LO) && + (id->bcdDevice_lo > le16_to_cpu(dev->descriptor.bcdDevice))) + return 0; + + if ((id->match_flags & USB_DEVICE_ID_MATCH_DEV_HI) && + (id->bcdDevice_hi < le16_to_cpu(dev->descriptor.bcdDevice))) + return 0; + + if ((id->match_flags & USB_DEVICE_ID_MATCH_DEV_CLASS) && + (id->bDeviceClass != dev->descriptor.bDeviceClass)) + return 0; + + if ((id->match_flags & USB_DEVICE_ID_MATCH_DEV_SUBCLASS) && + (id->bDeviceSubClass!= dev->descriptor.bDeviceSubClass)) + return 0; + + if ((id->match_flags & USB_DEVICE_ID_MATCH_DEV_PROTOCOL) && + (id->bDeviceProtocol != dev->descriptor.bDeviceProtocol)) + return 0; + + if ((id->match_flags & USB_DEVICE_ID_MATCH_INT_CLASS) && + (id->bInterfaceClass != intf->desc.bInterfaceClass)) + return 0; + + if ((id->match_flags & USB_DEVICE_ID_MATCH_INT_SUBCLASS) && + (id->bInterfaceSubClass != intf->desc.bInterfaceSubClass)) + return 0; + + if ((id->match_flags & USB_DEVICE_ID_MATCH_INT_PROTOCOL) && + (id->bInterfaceProtocol != intf->desc.bInterfaceProtocol)) + return 0; + + return 1; +} /** * usb_match_id - find first usb_device_id matching device or interface * @interface: the interface of interest @@ -184,16 +343,10 @@ static int usb_unbind_interface(struct device *dev) const struct usb_device_id *usb_match_id(struct usb_interface *interface, const struct usb_device_id *id) { - struct usb_host_interface *intf; - struct usb_device *dev; - /* proc_connectinfo in devio.c may call us with id == NULL. */ if (id == NULL) return NULL; - intf = interface->cur_altsetting; - dev = interface_to_usbdev(interface); - /* It is important to check that id->driver_info is nonzero, since an entry that is all zeroes except for a nonzero id->driver_info is the way to create an entry that @@ -201,50 +354,8 @@ const struct usb_device_id *usb_match_id(struct usb_interface *interface, device and interface. */ for (; id->idVendor || id->bDeviceClass || id->bInterfaceClass || id->driver_info; id++) { - - if ((id->match_flags & USB_DEVICE_ID_MATCH_VENDOR) && - id->idVendor != le16_to_cpu(dev->descriptor.idVendor)) - continue; - - if ((id->match_flags & USB_DEVICE_ID_MATCH_PRODUCT) && - id->idProduct != le16_to_cpu(dev->descriptor.idProduct)) - continue; - - /* No need to test id->bcdDevice_lo != 0, since 0 is never - greater than any unsigned number. */ - if ((id->match_flags & USB_DEVICE_ID_MATCH_DEV_LO) && - (id->bcdDevice_lo > le16_to_cpu(dev->descriptor.bcdDevice))) - continue; - - if ((id->match_flags & USB_DEVICE_ID_MATCH_DEV_HI) && - (id->bcdDevice_hi < le16_to_cpu(dev->descriptor.bcdDevice))) - continue; - - if ((id->match_flags & USB_DEVICE_ID_MATCH_DEV_CLASS) && - (id->bDeviceClass != dev->descriptor.bDeviceClass)) - continue; - - if ((id->match_flags & USB_DEVICE_ID_MATCH_DEV_SUBCLASS) && - (id->bDeviceSubClass!= dev->descriptor.bDeviceSubClass)) - continue; - - if ((id->match_flags & USB_DEVICE_ID_MATCH_DEV_PROTOCOL) && - (id->bDeviceProtocol != dev->descriptor.bDeviceProtocol)) - continue; - - if ((id->match_flags & USB_DEVICE_ID_MATCH_INT_CLASS) && - (id->bInterfaceClass != intf->desc.bInterfaceClass)) - continue; - - if ((id->match_flags & USB_DEVICE_ID_MATCH_INT_SUBCLASS) && - (id->bInterfaceSubClass != intf->desc.bInterfaceSubClass)) - continue; - - if ((id->match_flags & USB_DEVICE_ID_MATCH_INT_PROTOCOL) && - (id->bInterfaceProtocol != intf->desc.bInterfaceProtocol)) - continue; - - return id; + if (usb_match_one_id(interface, id)) + return id; } return NULL; @@ -268,6 +379,9 @@ int usb_device_match(struct device *dev, struct device_driver *drv) if (id) return 1; + id = usb_match_dynamic_id(intf, usb_drv); + if (id) + return 1; return 0; } @@ -296,6 +410,8 @@ int usb_register(struct usb_driver *new_driver) new_driver->driver.probe = usb_probe_interface; new_driver->driver.remove = usb_unbind_interface; new_driver->driver.owner = new_driver->owner; + spin_lock_init(&new_driver->dynids.lock); + INIT_LIST_HEAD(&new_driver->dynids.list); usb_lock_all_devices(); retval = driver_register(&new_driver->driver); @@ -305,6 +421,7 @@ int usb_register(struct usb_driver *new_driver) pr_info("%s: registered new driver %s\n", usbcore_name, new_driver->name); usbfs_update_special(); + usb_create_newid_file(new_driver); } else { printk(KERN_ERR "%s: error %d registering driver %s\n", usbcore_name, retval, new_driver->name); @@ -330,6 +447,7 @@ void usb_deregister(struct usb_driver *driver) pr_info("%s: deregistering driver %s\n", usbcore_name, driver->name); usb_lock_all_devices(); + usb_free_dynids(driver); driver_unregister(&driver->driver); usb_unlock_all_devices(); diff --git a/include/linux/usb.h b/include/linux/usb.h index d81b050e5955..0dd96ef78c13 100644 --- a/include/linux/usb.h +++ b/include/linux/usb.h @@ -529,6 +529,11 @@ static inline int usb_make_path (struct usb_device *dev, char *buf, /* ----------------------------------------------------------------------- */ +struct usb_dynids { + spinlock_t lock; + struct list_head list; +}; + /** * struct usb_driver - identifies USB driver to usbcore * @owner: Pointer to the module owner of this driver; initialize @@ -553,6 +558,8 @@ static inline int usb_make_path (struct usb_device *dev, char *buf, * @id_table: USB drivers use ID table to support hotplugging. * Export this with MODULE_DEVICE_TABLE(usb,...). This must be set * or your driver's probe function will never get called. + * @dynids: used internally to hold the list of dynamically added device + * ids for this driver. * @driver: the driver model core driver structure. * * USB drivers must provide a name, probe() and disconnect() methods, @@ -588,6 +595,7 @@ struct usb_driver { const struct usb_device_id *id_table; + struct usb_dynids dynids; struct device_driver driver; }; #define to_usb_driver(d) container_of(d, struct usb_driver, driver) -- cgit v1.2.3-71-gd317 From ba9dc657af86d05d2971633e57d1f6f94ed60472 Mon Sep 17 00:00:00 2001 From: Greg Kroah-Hartman Date: Wed, 16 Nov 2005 13:41:28 -0800 Subject: [PATCH] USB: allow usb drivers to disable dynamic ids This lets drivers, like the usb-serial ones, disable the ability to add ids from sysfs. The usb-serial drivers are "odd" in that they are really usb-serial bus drivers, not usb bus drivers, so the dynamic id logic will have to go into the usb-serial bus core for those drivers to get that ability. Signed-off-by: Greg Kroah-Hartman --- drivers/usb/core/driver.c | 19 +++++++++++++++++++ drivers/usb/serial/airprime.c | 1 + drivers/usb/serial/anydata.c | 1 + drivers/usb/serial/belkin_sa.c | 1 + drivers/usb/serial/cp2101.c | 1 + drivers/usb/serial/cyberjack.c | 1 + drivers/usb/serial/cypress_m8.c | 1 + drivers/usb/serial/digi_acceleport.c | 1 + drivers/usb/serial/empeg.c | 1 + drivers/usb/serial/ftdi_sio.c | 1 + drivers/usb/serial/garmin_gps.c | 1 + drivers/usb/serial/generic.c | 1 + drivers/usb/serial/hp4x.c | 1 + drivers/usb/serial/io_edgeport.c | 1 + drivers/usb/serial/io_ti.c | 1 + drivers/usb/serial/ipaq.c | 1 + drivers/usb/serial/ipw.c | 1 + drivers/usb/serial/ir-usb.c | 1 + drivers/usb/serial/keyspan.h | 1 + drivers/usb/serial/keyspan_pda.c | 1 + drivers/usb/serial/kl5kusb105.c | 1 + drivers/usb/serial/kobil_sct.c | 1 + drivers/usb/serial/mct_u232.c | 1 + drivers/usb/serial/omninet.c | 1 + drivers/usb/serial/option.c | 1 + drivers/usb/serial/pl2303.c | 1 + drivers/usb/serial/safe_serial.c | 1 + drivers/usb/serial/ti_usb_3410_5052.c | 1 + drivers/usb/serial/usb-serial.c | 1 + drivers/usb/serial/visor.c | 1 + drivers/usb/serial/whiteheat.c | 1 + include/linux/usb.h | 3 +++ 32 files changed, 52 insertions(+) (limited to 'include/linux') diff --git a/drivers/usb/core/driver.c b/drivers/usb/core/driver.c index 1c0611045379..5e65bc258e1b 100644 --- a/drivers/usb/core/driver.c +++ b/drivers/usb/core/driver.c @@ -112,12 +112,26 @@ static int usb_create_newid_file(struct usb_driver *usb_drv) { int error = 0; + if (usb_drv->no_dynamic_id) + goto exit; + if (usb_drv->probe != NULL) error = sysfs_create_file(&usb_drv->driver.kobj, &driver_attr_new_id.attr); +exit: return error; } +static void usb_remove_newid_file(struct usb_driver *usb_drv) +{ + if (usb_drv->no_dynamic_id) + return; + + if (usb_drv->probe != NULL) + sysfs_remove_file(&usb_drv->driver.kobj, + &driver_attr_new_id.attr); +} + static void usb_free_dynids(struct usb_driver *usb_drv) { struct usb_dynid *dynid, *n; @@ -135,6 +149,10 @@ static inline int usb_create_newid_file(struct usb_driver *usb_drv) return 0; } +static void usb_remove_newid_file(struct usb_driver *usb_drv) +{ +} + static inline void usb_free_dynids(struct usb_driver *usb_drv) { } @@ -447,6 +465,7 @@ void usb_deregister(struct usb_driver *driver) pr_info("%s: deregistering driver %s\n", usbcore_name, driver->name); usb_lock_all_devices(); + usb_remove_newid_file(driver); usb_free_dynids(driver); driver_unregister(&driver->driver); usb_unlock_all_devices(); diff --git a/drivers/usb/serial/airprime.c b/drivers/usb/serial/airprime.c index 1f29d8837327..2ef9945a6c07 100644 --- a/drivers/usb/serial/airprime.c +++ b/drivers/usb/serial/airprime.c @@ -28,6 +28,7 @@ static struct usb_driver airprime_driver = { .probe = usb_serial_probe, .disconnect = usb_serial_disconnect, .id_table = id_table, + .no_dynamic_id = 1, }; static struct usb_serial_driver airprime_device = { diff --git a/drivers/usb/serial/anydata.c b/drivers/usb/serial/anydata.c index 18022a74a3dc..7a171e034b59 100644 --- a/drivers/usb/serial/anydata.c +++ b/drivers/usb/serial/anydata.c @@ -32,6 +32,7 @@ static struct usb_driver anydata_driver = { .probe = usb_serial_probe, .disconnect = usb_serial_disconnect, .id_table = id_table, + .no_dynamic_id = 1, }; static int anydata_open(struct usb_serial_port *port, struct file *filp) diff --git a/drivers/usb/serial/belkin_sa.c b/drivers/usb/serial/belkin_sa.c index 84bc0ee4f061..69039bd9fc5e 100644 --- a/drivers/usb/serial/belkin_sa.c +++ b/drivers/usb/serial/belkin_sa.c @@ -118,6 +118,7 @@ static struct usb_driver belkin_driver = { .probe = usb_serial_probe, .disconnect = usb_serial_disconnect, .id_table = id_table_combined, + .no_dynamic_id = 1, }; /* All of the device info needed for the serial converters */ diff --git a/drivers/usb/serial/cp2101.c b/drivers/usb/serial/cp2101.c index c9787001cf2a..813bab37e076 100644 --- a/drivers/usb/serial/cp2101.c +++ b/drivers/usb/serial/cp2101.c @@ -72,6 +72,7 @@ static struct usb_driver cp2101_driver = { .probe = usb_serial_probe, .disconnect = usb_serial_disconnect, .id_table = id_table, + .no_dynamic_id = 1, }; static struct usb_serial_driver cp2101_device = { diff --git a/drivers/usb/serial/cyberjack.c b/drivers/usb/serial/cyberjack.c index e581e4ae8483..8c10e4004905 100644 --- a/drivers/usb/serial/cyberjack.c +++ b/drivers/usb/serial/cyberjack.c @@ -81,6 +81,7 @@ static struct usb_driver cyberjack_driver = { .probe = usb_serial_probe, .disconnect = usb_serial_disconnect, .id_table = id_table, + .no_dynamic_id = 1, }; static struct usb_serial_driver cyberjack_device = { diff --git a/drivers/usb/serial/cypress_m8.c b/drivers/usb/serial/cypress_m8.c index af9290ed257b..af18355e94cc 100644 --- a/drivers/usb/serial/cypress_m8.c +++ b/drivers/usb/serial/cypress_m8.c @@ -112,6 +112,7 @@ static struct usb_driver cypress_driver = { .probe = usb_serial_probe, .disconnect = usb_serial_disconnect, .id_table = id_table_combined, + .no_dynamic_id = 1, }; struct cypress_private { diff --git a/drivers/usb/serial/digi_acceleport.c b/drivers/usb/serial/digi_acceleport.c index dc74644a603d..c50cec95f49b 100644 --- a/drivers/usb/serial/digi_acceleport.c +++ b/drivers/usb/serial/digi_acceleport.c @@ -498,6 +498,7 @@ static struct usb_driver digi_driver = { .probe = usb_serial_probe, .disconnect = usb_serial_disconnect, .id_table = id_table_combined, + .no_dynamic_id = 1, }; diff --git a/drivers/usb/serial/empeg.c b/drivers/usb/serial/empeg.c index 0b0546dcc7b9..e5e40064caf2 100644 --- a/drivers/usb/serial/empeg.c +++ b/drivers/usb/serial/empeg.c @@ -110,6 +110,7 @@ static struct usb_driver empeg_driver = { .probe = usb_serial_probe, .disconnect = usb_serial_disconnect, .id_table = id_table, + .no_dynamic_id = 1, }; static struct usb_serial_driver empeg_device = { diff --git a/drivers/usb/serial/ftdi_sio.c b/drivers/usb/serial/ftdi_sio.c index 06e04b442ff1..857fe791d702 100644 --- a/drivers/usb/serial/ftdi_sio.c +++ b/drivers/usb/serial/ftdi_sio.c @@ -488,6 +488,7 @@ static struct usb_driver ftdi_driver = { .probe = usb_serial_probe, .disconnect = usb_serial_disconnect, .id_table = id_table_combined, + .no_dynamic_id = 1, }; static char *ftdi_chip_name[] = { diff --git a/drivers/usb/serial/garmin_gps.c b/drivers/usb/serial/garmin_gps.c index 35820bda7ae1..198a322286f9 100644 --- a/drivers/usb/serial/garmin_gps.c +++ b/drivers/usb/serial/garmin_gps.c @@ -227,6 +227,7 @@ static struct usb_driver garmin_driver = { .probe = usb_serial_probe, .disconnect = usb_serial_disconnect, .id_table = id_table, + .no_dynamic_id = 1, }; diff --git a/drivers/usb/serial/generic.c b/drivers/usb/serial/generic.c index 53a47c31cd0e..c00a440dc421 100644 --- a/drivers/usb/serial/generic.c +++ b/drivers/usb/serial/generic.c @@ -73,6 +73,7 @@ static struct usb_driver generic_driver = { .probe = generic_probe, .disconnect = usb_serial_disconnect, .id_table = generic_serial_ids, + .no_dynamic_id = 1, }; #endif diff --git a/drivers/usb/serial/hp4x.c b/drivers/usb/serial/hp4x.c index 8eadfb705601..e588c3fe632d 100644 --- a/drivers/usb/serial/hp4x.c +++ b/drivers/usb/serial/hp4x.c @@ -42,6 +42,7 @@ static struct usb_driver hp49gp_driver = { .probe = usb_serial_probe, .disconnect = usb_serial_disconnect, .id_table = id_table, + .no_dynamic_id = 1, }; static struct usb_serial_driver hp49gp_device = { diff --git a/drivers/usb/serial/io_edgeport.c b/drivers/usb/serial/io_edgeport.c index dc4c498bd1ed..276bd425a474 100644 --- a/drivers/usb/serial/io_edgeport.c +++ b/drivers/usb/serial/io_edgeport.c @@ -247,6 +247,7 @@ static struct usb_driver io_driver = { .probe = usb_serial_probe, .disconnect = usb_serial_disconnect, .id_table = id_table_combined, + .no_dynamic_id = 1, }; /* function prototypes for all of our local functions */ diff --git a/drivers/usb/serial/io_ti.c b/drivers/usb/serial/io_ti.c index 832b6d6734c0..8b2e4c78abcd 100644 --- a/drivers/usb/serial/io_ti.c +++ b/drivers/usb/serial/io_ti.c @@ -221,6 +221,7 @@ static struct usb_driver io_driver = { .probe = usb_serial_probe, .disconnect = usb_serial_disconnect, .id_table = id_table_combined, + .no_dynamic_id = 1, }; diff --git a/drivers/usb/serial/ipaq.c b/drivers/usb/serial/ipaq.c index d5d066488100..efb568be7015 100644 --- a/drivers/usb/serial/ipaq.c +++ b/drivers/usb/serial/ipaq.c @@ -547,6 +547,7 @@ static struct usb_driver ipaq_driver = { .probe = usb_serial_probe, .disconnect = usb_serial_disconnect, .id_table = ipaq_id_table, + .no_dynamic_id = 1, }; diff --git a/drivers/usb/serial/ipw.c b/drivers/usb/serial/ipw.c index 7744b8148bc5..64e2cda2a84a 100644 --- a/drivers/usb/serial/ipw.c +++ b/drivers/usb/serial/ipw.c @@ -157,6 +157,7 @@ static struct usb_driver usb_ipw_driver = { .probe = usb_serial_probe, .disconnect = usb_serial_disconnect, .id_table = usb_ipw_ids, + .no_dynamic_id = 1, }; static int debug; diff --git a/drivers/usb/serial/ir-usb.c b/drivers/usb/serial/ir-usb.c index 19f329e9bdcf..647431c1ccb1 100644 --- a/drivers/usb/serial/ir-usb.c +++ b/drivers/usb/serial/ir-usb.c @@ -130,6 +130,7 @@ static struct usb_driver ir_driver = { .probe = usb_serial_probe, .disconnect = usb_serial_disconnect, .id_table = id_table, + .no_dynamic_id = 1, }; diff --git a/drivers/usb/serial/keyspan.h b/drivers/usb/serial/keyspan.h index 5cfc13b5e56f..4e6f626f6062 100644 --- a/drivers/usb/serial/keyspan.h +++ b/drivers/usb/serial/keyspan.h @@ -525,6 +525,7 @@ static struct usb_driver keyspan_driver = { .probe = usb_serial_probe, .disconnect = usb_serial_disconnect, .id_table = keyspan_ids_combined, + .no_dynamic_id = 1, }; /* usb_device_id table for the pre-firmware download keyspan devices */ diff --git a/drivers/usb/serial/keyspan_pda.c b/drivers/usb/serial/keyspan_pda.c index cd4f48bd83b6..0d1f15268549 100644 --- a/drivers/usb/serial/keyspan_pda.c +++ b/drivers/usb/serial/keyspan_pda.c @@ -155,6 +155,7 @@ static struct usb_driver keyspan_pda_driver = { .probe = usb_serial_probe, .disconnect = usb_serial_disconnect, .id_table = id_table_combined, + .no_dynamic_id = 1, }; static struct usb_device_id id_table_std [] = { diff --git a/drivers/usb/serial/kl5kusb105.c b/drivers/usb/serial/kl5kusb105.c index a8951c0fd020..bd68638b7c35 100644 --- a/drivers/usb/serial/kl5kusb105.c +++ b/drivers/usb/serial/kl5kusb105.c @@ -121,6 +121,7 @@ static struct usb_driver kl5kusb105d_driver = { .probe = usb_serial_probe, .disconnect = usb_serial_disconnect, .id_table = id_table, + .no_dynamic_id = 1, }; static struct usb_serial_driver kl5kusb105d_device = { diff --git a/drivers/usb/serial/kobil_sct.c b/drivers/usb/serial/kobil_sct.c index 9456dd9dd136..4c853afea385 100644 --- a/drivers/usb/serial/kobil_sct.c +++ b/drivers/usb/serial/kobil_sct.c @@ -102,6 +102,7 @@ static struct usb_driver kobil_driver = { .probe = usb_serial_probe, .disconnect = usb_serial_disconnect, .id_table = id_table, + .no_dynamic_id = 1, }; diff --git a/drivers/usb/serial/mct_u232.c b/drivers/usb/serial/mct_u232.c index ca5dbadb9b7e..b0415e7542c4 100644 --- a/drivers/usb/serial/mct_u232.c +++ b/drivers/usb/serial/mct_u232.c @@ -130,6 +130,7 @@ static struct usb_driver mct_u232_driver = { .probe = usb_serial_probe, .disconnect = usb_serial_disconnect, .id_table = id_table_combined, + .no_dynamic_id = 1, }; static struct usb_serial_driver mct_u232_device = { diff --git a/drivers/usb/serial/omninet.c b/drivers/usb/serial/omninet.c index 3caf97072ac0..b595befb24cf 100644 --- a/drivers/usb/serial/omninet.c +++ b/drivers/usb/serial/omninet.c @@ -85,6 +85,7 @@ static struct usb_driver omninet_driver = { .probe = usb_serial_probe, .disconnect = usb_serial_disconnect, .id_table = id_table, + .no_dynamic_id = 1, }; diff --git a/drivers/usb/serial/option.c b/drivers/usb/serial/option.c index 7716000045b7..4ee657eaaa0b 100644 --- a/drivers/usb/serial/option.c +++ b/drivers/usb/serial/option.c @@ -100,6 +100,7 @@ static struct usb_driver option_driver = { .probe = usb_serial_probe, .disconnect = usb_serial_disconnect, .id_table = option_ids, + .no_dynamic_id = 1, }; /* The card has three separate interfaces, wich the serial driver diff --git a/drivers/usb/serial/pl2303.c b/drivers/usb/serial/pl2303.c index 41a45a5025b2..e302a320444c 100644 --- a/drivers/usb/serial/pl2303.c +++ b/drivers/usb/serial/pl2303.c @@ -87,6 +87,7 @@ static struct usb_driver pl2303_driver = { .probe = usb_serial_probe, .disconnect = usb_serial_disconnect, .id_table = id_table, + .no_dynamic_id = 1, }; #define SET_LINE_REQUEST_TYPE 0x21 diff --git a/drivers/usb/serial/safe_serial.c b/drivers/usb/serial/safe_serial.c index c22bdc0c4dfd..f8241c152043 100644 --- a/drivers/usb/serial/safe_serial.c +++ b/drivers/usb/serial/safe_serial.c @@ -165,6 +165,7 @@ static struct usb_driver safe_driver = { .probe = usb_serial_probe, .disconnect = usb_serial_disconnect, .id_table = id_table, + .no_dynamic_id = 1, }; static __u16 crc10_table[256] = { diff --git a/drivers/usb/serial/ti_usb_3410_5052.c b/drivers/usb/serial/ti_usb_3410_5052.c index 205dbf7201da..17a1f09483bd 100644 --- a/drivers/usb/serial/ti_usb_3410_5052.c +++ b/drivers/usb/serial/ti_usb_3410_5052.c @@ -253,6 +253,7 @@ static struct usb_driver ti_usb_driver = { .probe = usb_serial_probe, .disconnect = usb_serial_disconnect, .id_table = ti_id_table_combined, + .no_dynamic_id = 1, }; static struct usb_serial_driver ti_1port_device = { diff --git a/drivers/usb/serial/usb-serial.c b/drivers/usb/serial/usb-serial.c index 0c4881d18cd5..2ac37b52485a 100644 --- a/drivers/usb/serial/usb-serial.c +++ b/drivers/usb/serial/usb-serial.c @@ -46,6 +46,7 @@ static struct usb_driver usb_serial_driver = { .name = "usbserial", .probe = usb_serial_probe, .disconnect = usb_serial_disconnect, + .no_dynamic_id = 1, }; /* There is no MODULE_DEVICE_TABLE for usbserial.c. Instead diff --git a/drivers/usb/serial/visor.c b/drivers/usb/serial/visor.c index a473c1c34559..2973f5564c06 100644 --- a/drivers/usb/serial/visor.c +++ b/drivers/usb/serial/visor.c @@ -178,6 +178,7 @@ static struct usb_driver visor_driver = { .probe = usb_serial_probe, .disconnect = usb_serial_disconnect, .id_table = id_table_combined, + .no_dynamic_id = 1, }; /* All of the device info needed for the Handspring Visor, and Palm 4.0 devices */ diff --git a/drivers/usb/serial/whiteheat.c b/drivers/usb/serial/whiteheat.c index 18c3183be769..19c6386bb692 100644 --- a/drivers/usb/serial/whiteheat.c +++ b/drivers/usb/serial/whiteheat.c @@ -132,6 +132,7 @@ static struct usb_driver whiteheat_driver = { .probe = usb_serial_probe, .disconnect = usb_serial_disconnect, .id_table = id_table_combined, + .no_dynamic_id = 1, }; /* function prototypes for the Connect Tech WhiteHEAT prerenumeration device */ diff --git a/include/linux/usb.h b/include/linux/usb.h index 0dd96ef78c13..8d5829936bc4 100644 --- a/include/linux/usb.h +++ b/include/linux/usb.h @@ -561,6 +561,8 @@ struct usb_dynids { * @dynids: used internally to hold the list of dynamically added device * ids for this driver. * @driver: the driver model core driver structure. + * @no_dynamic_id: if set to 1, the USB core will not allow dynamic ids to be + * added to this driver by preventing the sysfs file from being created. * * USB drivers must provide a name, probe() and disconnect() methods, * and an id_table. Other driver fields are optional. @@ -597,6 +599,7 @@ struct usb_driver { struct usb_dynids dynids; struct device_driver driver; + unsigned int no_dynamic_id:1; }; #define to_usb_driver(d) container_of(d, struct usb_driver, driver) -- cgit v1.2.3-71-gd317 From 2143acc6dc79bdbff812f02a7dc5ab9d4fc81fc8 Mon Sep 17 00:00:00 2001 From: Greg Kroah-Hartman Date: Mon, 21 Nov 2005 14:53:03 -0800 Subject: [PATCH] USB: make registering a usb driver automatically set the module owner This fixes the driver that forgot to set the module owner up. Now we can remove the unneeded pointer from the usb driver structure. The idea for how to do this was from Al Viro, who did this for the PCI drivers. Signed-off-by: Greg Kroah-Hartman --- drivers/usb/core/driver.c | 9 +++++---- include/linux/usb.h | 6 +++++- 2 files changed, 10 insertions(+), 5 deletions(-) (limited to 'include/linux') diff --git a/drivers/usb/core/driver.c b/drivers/usb/core/driver.c index 5e65bc258e1b..bb139f06bcd6 100644 --- a/drivers/usb/core/driver.c +++ b/drivers/usb/core/driver.c @@ -404,8 +404,9 @@ int usb_device_match(struct device *dev, struct device_driver *drv) } /** - * usb_register - register a USB driver + * usb_register_driver - register a USB driver * @new_driver: USB operations for the driver + * @owner: module owner of this driver. * * Registers a USB driver with the USB core. The list of unattached * interfaces will be rescanned whenever a new driver is added, allowing @@ -416,7 +417,7 @@ int usb_device_match(struct device *dev, struct device_driver *drv) * usb_register_dev() to enable that functionality. This function no longer * takes care of that. */ -int usb_register(struct usb_driver *new_driver) +int usb_register_driver(struct usb_driver *new_driver, struct module *owner) { int retval = 0; @@ -427,7 +428,7 @@ int usb_register(struct usb_driver *new_driver) new_driver->driver.bus = &usb_bus_type; new_driver->driver.probe = usb_probe_interface; new_driver->driver.remove = usb_unbind_interface; - new_driver->driver.owner = new_driver->owner; + new_driver->driver.owner = owner; spin_lock_init(&new_driver->dynids.lock); INIT_LIST_HEAD(&new_driver->dynids.list); @@ -447,7 +448,7 @@ int usb_register(struct usb_driver *new_driver) return retval; } -EXPORT_SYMBOL_GPL(usb_register); +EXPORT_SYMBOL_GPL(usb_register_driver); /** * usb_deregister - unregister a USB driver diff --git a/include/linux/usb.h b/include/linux/usb.h index 8d5829936bc4..3d05c63451a8 100644 --- a/include/linux/usb.h +++ b/include/linux/usb.h @@ -625,7 +625,11 @@ struct usb_class_driver { * use these in module_init()/module_exit() * and don't forget MODULE_DEVICE_TABLE(usb, ...) */ -extern int usb_register(struct usb_driver *); +int usb_register_driver(struct usb_driver *, struct module *); +static inline int usb_register(struct usb_driver *driver) +{ + return usb_register_driver(driver, THIS_MODULE); +} extern void usb_deregister(struct usb_driver *); extern int usb_register_dev(struct usb_interface *intf, -- cgit v1.2.3-71-gd317 From 75318d2d7cab77b14c5d3dbd5e69f2680a769e16 Mon Sep 17 00:00:00 2001 From: Greg Kroah-Hartman Date: Mon, 21 Nov 2005 14:53:03 -0800 Subject: [PATCH] USB: remove .owner field from struct usb_driver It is no longer needed, so let's remove it, saving a bit of memory. Signed-off-by: Greg Kroah-Hartman --- drivers/block/ub.c | 1 - drivers/bluetooth/bcm203x.c | 1 - drivers/bluetooth/bfusb.c | 1 - drivers/bluetooth/bpa10x.c | 1 - drivers/bluetooth/hci_usb.c | 1 - drivers/char/watchdog/pcwd_usb.c | 1 - drivers/input/joystick/iforce/iforce-usb.c | 1 - drivers/isdn/hisax/hfc_usb.c | 1 - drivers/isdn/hisax/st5481_init.c | 1 - drivers/media/dvb/b2c2/flexcop-usb.c | 1 - drivers/media/dvb/cinergyT2/cinergyT2.c | 1 - drivers/media/dvb/dvb-usb/a800.c | 1 - drivers/media/dvb/dvb-usb/cxusb.c | 1 - drivers/media/dvb/dvb-usb/dibusb-mb.c | 1 - drivers/media/dvb/dvb-usb/dibusb-mc.c | 1 - drivers/media/dvb/dvb-usb/digitv.c | 1 - drivers/media/dvb/dvb-usb/dtt200u.c | 1 - drivers/media/dvb/dvb-usb/nova-t-usb2.c | 1 - drivers/media/dvb/dvb-usb/umt-010.c | 1 - drivers/media/dvb/dvb-usb/vp702x.c | 1 - drivers/media/dvb/dvb-usb/vp7045.c | 1 - drivers/media/video/cpia_usb.c | 1 - drivers/media/video/em28xx/em28xx-video.c | 1 - drivers/net/irda/irda-usb.c | 1 - drivers/net/irda/stir4200.c | 1 - drivers/usb/atm/cxacru.c | 1 - drivers/usb/atm/speedtch.c | 1 - drivers/usb/atm/ueagle-atm.c | 1 - drivers/usb/atm/xusbatm.c | 1 - drivers/usb/class/audio.c | 1 - drivers/usb/class/cdc-acm.c | 1 - drivers/usb/class/usb-midi.c | 1 - drivers/usb/class/usblp.c | 1 - drivers/usb/core/devio.c | 1 - drivers/usb/core/hub.c | 1 - drivers/usb/image/mdc800.c | 1 - drivers/usb/image/microtek.c | 1 - drivers/usb/input/acecad.c | 1 - drivers/usb/input/aiptek.c | 1 - drivers/usb/input/appletouch.c | 1 - drivers/usb/input/ati_remote.c | 1 - drivers/usb/input/hid-core.c | 1 - drivers/usb/input/hiddev.c | 1 - drivers/usb/input/itmtouch.c | 1 - drivers/usb/input/kbtab.c | 1 - drivers/usb/input/keyspan_remote.c | 1 - drivers/usb/input/mtouchusb.c | 1 - drivers/usb/input/powermate.c | 1 - drivers/usb/input/touchkitusb.c | 1 - drivers/usb/input/usbkbd.c | 1 - drivers/usb/input/usbmouse.c | 1 - drivers/usb/input/wacom.c | 1 - drivers/usb/input/xpad.c | 1 - drivers/usb/input/yealink.c | 1 - drivers/usb/media/dabusb.c | 1 - drivers/usb/media/dsbr100.c | 1 - drivers/usb/media/ov511.c | 1 - drivers/usb/media/pwc/pwc-if.c | 1 - drivers/usb/media/se401.c | 1 - drivers/usb/media/sn9c102_core.c | 1 - drivers/usb/media/stv680.c | 1 - drivers/usb/media/vicam.c | 1 - drivers/usb/media/w9968cf.c | 1 - drivers/usb/misc/auerswald.c | 1 - drivers/usb/misc/cytherm.c | 1 - drivers/usb/misc/emi26.c | 1 - drivers/usb/misc/emi62.c | 1 - drivers/usb/misc/idmouse.c | 1 - drivers/usb/misc/ldusb.c | 1 - drivers/usb/misc/legousbtower.c | 1 - drivers/usb/misc/phidgetkit.c | 1 - drivers/usb/misc/phidgetservo.c | 1 - drivers/usb/misc/rio500.c | 1 - drivers/usb/misc/sisusbvga/sisusb.c | 1 - drivers/usb/misc/usblcd.c | 1 - drivers/usb/misc/usbled.c | 1 - drivers/usb/misc/usbtest.c | 1 - drivers/usb/misc/uss720.c | 1 - drivers/usb/net/asix.c | 1 - drivers/usb/net/catc.c | 1 - drivers/usb/net/cdc_ether.c | 1 - drivers/usb/net/cdc_subset.c | 1 - drivers/usb/net/gl620a.c | 1 - drivers/usb/net/kaweth.c | 1 - drivers/usb/net/net1080.c | 1 - drivers/usb/net/plusb.c | 1 - drivers/usb/net/rndis_host.c | 1 - drivers/usb/net/rtl8150.c | 1 - drivers/usb/net/zaurus.c | 1 - drivers/usb/net/zd1201.c | 1 - drivers/usb/serial/airprime.c | 1 - drivers/usb/serial/anydata.c | 1 - drivers/usb/serial/belkin_sa.c | 1 - drivers/usb/serial/cp2101.c | 1 - drivers/usb/serial/cyberjack.c | 1 - drivers/usb/serial/digi_acceleport.c | 1 - drivers/usb/serial/empeg.c | 1 - drivers/usb/serial/garmin_gps.c | 1 - drivers/usb/serial/generic.c | 1 - drivers/usb/serial/hp4x.c | 1 - drivers/usb/serial/io_edgeport.c | 1 - drivers/usb/serial/io_ti.c | 1 - drivers/usb/serial/ipaq.c | 1 - drivers/usb/serial/ipw.c | 1 - drivers/usb/serial/ir-usb.c | 1 - drivers/usb/serial/keyspan.h | 1 - drivers/usb/serial/keyspan_pda.c | 1 - drivers/usb/serial/kl5kusb105.c | 1 - drivers/usb/serial/kobil_sct.c | 1 - drivers/usb/serial/mct_u232.c | 1 - drivers/usb/serial/omninet.c | 1 - drivers/usb/serial/option.c | 1 - drivers/usb/serial/pl2303.c | 1 - drivers/usb/serial/safe_serial.c | 1 - drivers/usb/serial/ti_usb_3410_5052.c | 1 - drivers/usb/serial/usb-serial.c | 1 - drivers/usb/serial/visor.c | 1 - drivers/usb/serial/whiteheat.c | 1 - drivers/usb/storage/libusual.c | 1 - drivers/usb/storage/usb.c | 1 - drivers/usb/usb-skeleton.c | 1 - drivers/w1/dscore.c | 1 - include/linux/usb.h | 4 ---- sound/usb/usbaudio.c | 1 - sound/usb/usx2y/usbusx2y.c | 1 - 125 files changed, 128 deletions(-) (limited to 'include/linux') diff --git a/drivers/block/ub.c b/drivers/block/ub.c index 06d741d58a68..c7a28f5be42f 100644 --- a/drivers/block/ub.c +++ b/drivers/block/ub.c @@ -2460,7 +2460,6 @@ static void ub_disconnect(struct usb_interface *intf) } static struct usb_driver ub_driver = { - .owner = THIS_MODULE, .name = "ub", .probe = ub_probe, .disconnect = ub_disconnect, diff --git a/drivers/bluetooth/bcm203x.c b/drivers/bluetooth/bcm203x.c index 8e7fb3551775..3e7a067cc087 100644 --- a/drivers/bluetooth/bcm203x.c +++ b/drivers/bluetooth/bcm203x.c @@ -275,7 +275,6 @@ static void bcm203x_disconnect(struct usb_interface *intf) } static struct usb_driver bcm203x_driver = { - .owner = THIS_MODULE, .name = "bcm203x", .probe = bcm203x_probe, .disconnect = bcm203x_disconnect, diff --git a/drivers/bluetooth/bfusb.c b/drivers/bluetooth/bfusb.c index 067e27893e4a..8947c8837dac 100644 --- a/drivers/bluetooth/bfusb.c +++ b/drivers/bluetooth/bfusb.c @@ -768,7 +768,6 @@ static void bfusb_disconnect(struct usb_interface *intf) } static struct usb_driver bfusb_driver = { - .owner = THIS_MODULE, .name = "bfusb", .probe = bfusb_probe, .disconnect = bfusb_disconnect, diff --git a/drivers/bluetooth/bpa10x.c b/drivers/bluetooth/bpa10x.c index 394796315adc..9446960ac742 100644 --- a/drivers/bluetooth/bpa10x.c +++ b/drivers/bluetooth/bpa10x.c @@ -619,7 +619,6 @@ static void bpa10x_disconnect(struct usb_interface *intf) } static struct usb_driver bpa10x_driver = { - .owner = THIS_MODULE, .name = "bpa10x", .probe = bpa10x_probe, .disconnect = bpa10x_disconnect, diff --git a/drivers/bluetooth/hci_usb.c b/drivers/bluetooth/hci_usb.c index 057cb2b6e6d1..92382e823285 100644 --- a/drivers/bluetooth/hci_usb.c +++ b/drivers/bluetooth/hci_usb.c @@ -1044,7 +1044,6 @@ static void hci_usb_disconnect(struct usb_interface *intf) } static struct usb_driver hci_usb_driver = { - .owner = THIS_MODULE, .name = "hci_usb", .probe = hci_usb_probe, .disconnect = hci_usb_disconnect, diff --git a/drivers/char/watchdog/pcwd_usb.c b/drivers/char/watchdog/pcwd_usb.c index 092e9b133750..1533f56baa42 100644 --- a/drivers/char/watchdog/pcwd_usb.c +++ b/drivers/char/watchdog/pcwd_usb.c @@ -151,7 +151,6 @@ static void usb_pcwd_disconnect (struct usb_interface *interface); /* usb specific object needed to register this driver with the usb subsystem */ static struct usb_driver usb_pcwd_driver = { - .owner = THIS_MODULE, .name = DRIVER_NAME, .probe = usb_pcwd_probe, .disconnect = usb_pcwd_disconnect, diff --git a/drivers/input/joystick/iforce/iforce-usb.c b/drivers/input/joystick/iforce/iforce-usb.c index 64b4a3080985..bc2fce60f9f8 100644 --- a/drivers/input/joystick/iforce/iforce-usb.c +++ b/drivers/input/joystick/iforce/iforce-usb.c @@ -235,7 +235,6 @@ static struct usb_device_id iforce_usb_ids [] = { MODULE_DEVICE_TABLE (usb, iforce_usb_ids); struct usb_driver iforce_usb_driver = { - .owner = THIS_MODULE, .name = "iforce", .probe = iforce_usb_probe, .disconnect = iforce_usb_disconnect, diff --git a/drivers/isdn/hisax/hfc_usb.c b/drivers/isdn/hisax/hfc_usb.c index f8457ef48826..ca5b4a3b683e 100644 --- a/drivers/isdn/hisax/hfc_usb.c +++ b/drivers/isdn/hisax/hfc_usb.c @@ -1715,7 +1715,6 @@ hfc_usb_disconnect(struct usb_interface /* our driver information structure */ /************************************/ static struct usb_driver hfc_drv = { - .owner = THIS_MODULE, .name = "hfc_usb", .id_table = hfcusb_idtab, .probe = hfc_usb_probe, diff --git a/drivers/isdn/hisax/st5481_init.c b/drivers/isdn/hisax/st5481_init.c index 8e192a3a3490..99cb0f3d59a1 100644 --- a/drivers/isdn/hisax/st5481_init.c +++ b/drivers/isdn/hisax/st5481_init.c @@ -180,7 +180,6 @@ static struct usb_device_id st5481_ids[] = { MODULE_DEVICE_TABLE (usb, st5481_ids); static struct usb_driver st5481_usb_driver = { - .owner = THIS_MODULE, .name = "st5481_usb", .probe = probe_st5481, .disconnect = disconnect_st5481, diff --git a/drivers/media/dvb/b2c2/flexcop-usb.c b/drivers/media/dvb/b2c2/flexcop-usb.c index 0a78ba3737a5..a6c91db40ad6 100644 --- a/drivers/media/dvb/b2c2/flexcop-usb.c +++ b/drivers/media/dvb/b2c2/flexcop-usb.c @@ -544,7 +544,6 @@ static struct usb_device_id flexcop_usb_table [] = { /* usb specific object needed to register this driver with the usb subsystem */ static struct usb_driver flexcop_usb_driver = { - .owner = THIS_MODULE, .name = "b2c2_flexcop_usb", .probe = flexcop_usb_probe, .disconnect = flexcop_usb_disconnect, diff --git a/drivers/media/dvb/cinergyT2/cinergyT2.c b/drivers/media/dvb/cinergyT2/cinergyT2.c index 336fc284fa52..b996fb59b7e4 100644 --- a/drivers/media/dvb/cinergyT2/cinergyT2.c +++ b/drivers/media/dvb/cinergyT2/cinergyT2.c @@ -986,7 +986,6 @@ static const struct usb_device_id cinergyt2_table [] __devinitdata = { MODULE_DEVICE_TABLE(usb, cinergyt2_table); static struct usb_driver cinergyt2_driver = { - .owner = THIS_MODULE, .name = "cinergyT2", .probe = cinergyt2_probe, .disconnect = cinergyt2_disconnect, diff --git a/drivers/media/dvb/dvb-usb/a800.c b/drivers/media/dvb/dvb-usb/a800.c index 8c7beffb045f..ce44aa6bbb83 100644 --- a/drivers/media/dvb/dvb-usb/a800.c +++ b/drivers/media/dvb/dvb-usb/a800.c @@ -144,7 +144,6 @@ static struct dvb_usb_properties a800_properties = { }; static struct usb_driver a800_driver = { - .owner = THIS_MODULE, .name = "dvb_usb_a800", .probe = a800_probe, .disconnect = dvb_usb_device_exit, diff --git a/drivers/media/dvb/dvb-usb/cxusb.c b/drivers/media/dvb/dvb-usb/cxusb.c index 3fe383f4bb4c..d05fab01cccd 100644 --- a/drivers/media/dvb/dvb-usb/cxusb.c +++ b/drivers/media/dvb/dvb-usb/cxusb.c @@ -241,7 +241,6 @@ static struct dvb_usb_properties cxusb_properties = { }; static struct usb_driver cxusb_driver = { - .owner = THIS_MODULE, .name = "dvb_usb_cxusb", .probe = cxusb_probe, .disconnect = dvb_usb_device_exit, diff --git a/drivers/media/dvb/dvb-usb/dibusb-mb.c b/drivers/media/dvb/dvb-usb/dibusb-mb.c index aa271a2496d5..52ac3e5adf5d 100644 --- a/drivers/media/dvb/dvb-usb/dibusb-mb.c +++ b/drivers/media/dvb/dvb-usb/dibusb-mb.c @@ -373,7 +373,6 @@ static struct dvb_usb_properties artec_t1_usb2_properties = { }; static struct usb_driver dibusb_driver = { - .owner = THIS_MODULE, .name = "dvb_usb_dibusb_mb", .probe = dibusb_probe, .disconnect = dvb_usb_device_exit, diff --git a/drivers/media/dvb/dvb-usb/dibusb-mc.c b/drivers/media/dvb/dvb-usb/dibusb-mc.c index 6a0912eab396..55802fba3c29 100644 --- a/drivers/media/dvb/dvb-usb/dibusb-mc.c +++ b/drivers/media/dvb/dvb-usb/dibusb-mc.c @@ -82,7 +82,6 @@ static struct dvb_usb_properties dibusb_mc_properties = { }; static struct usb_driver dibusb_mc_driver = { - .owner = THIS_MODULE, .name = "dvb_usb_dibusb_mc", .probe = dibusb_mc_probe, .disconnect = dvb_usb_device_exit, diff --git a/drivers/media/dvb/dvb-usb/digitv.c b/drivers/media/dvb/dvb-usb/digitv.c index f98e306a5759..450417a9e64b 100644 --- a/drivers/media/dvb/dvb-usb/digitv.c +++ b/drivers/media/dvb/dvb-usb/digitv.c @@ -233,7 +233,6 @@ static struct dvb_usb_properties digitv_properties = { }; static struct usb_driver digitv_driver = { - .owner = THIS_MODULE, .name = "dvb_usb_digitv", .probe = digitv_probe, .disconnect = dvb_usb_device_exit, diff --git a/drivers/media/dvb/dvb-usb/dtt200u.c b/drivers/media/dvb/dvb-usb/dtt200u.c index b595476332cd..6e2bac873445 100644 --- a/drivers/media/dvb/dvb-usb/dtt200u.c +++ b/drivers/media/dvb/dvb-usb/dtt200u.c @@ -198,7 +198,6 @@ static struct dvb_usb_properties wt220u_properties = { /* usb specific object needed to register this driver with the usb subsystem */ static struct usb_driver dtt200u_usb_driver = { - .owner = THIS_MODULE, .name = "dvb_usb_dtt200u", .probe = dtt200u_usb_probe, .disconnect = dvb_usb_device_exit, diff --git a/drivers/media/dvb/dvb-usb/nova-t-usb2.c b/drivers/media/dvb/dvb-usb/nova-t-usb2.c index 1841a66427bf..fac48fc7a4ac 100644 --- a/drivers/media/dvb/dvb-usb/nova-t-usb2.c +++ b/drivers/media/dvb/dvb-usb/nova-t-usb2.c @@ -202,7 +202,6 @@ static struct dvb_usb_properties nova_t_properties = { }; static struct usb_driver nova_t_driver = { - .owner = THIS_MODULE, .name = "dvb_usb_nova_t_usb2", .probe = nova_t_probe, .disconnect = dvb_usb_device_exit, diff --git a/drivers/media/dvb/dvb-usb/umt-010.c b/drivers/media/dvb/dvb-usb/umt-010.c index 6fd67657c269..14f1911c79bb 100644 --- a/drivers/media/dvb/dvb-usb/umt-010.c +++ b/drivers/media/dvb/dvb-usb/umt-010.c @@ -128,7 +128,6 @@ static struct dvb_usb_properties umt_properties = { }; static struct usb_driver umt_driver = { - .owner = THIS_MODULE, .name = "dvb_usb_umt_010", .probe = umt_probe, .disconnect = dvb_usb_device_exit, diff --git a/drivers/media/dvb/dvb-usb/vp702x.c b/drivers/media/dvb/dvb-usb/vp702x.c index de13c04e8e64..afa00fdb5ec0 100644 --- a/drivers/media/dvb/dvb-usb/vp702x.c +++ b/drivers/media/dvb/dvb-usb/vp702x.c @@ -256,7 +256,6 @@ static struct dvb_usb_properties vp702x_properties = { /* usb specific object needed to register this driver with the usb subsystem */ static struct usb_driver vp702x_usb_driver = { - .owner = THIS_MODULE, .name = "dvb-usb-vp702x", .probe = vp702x_usb_probe, .disconnect = dvb_usb_device_exit, diff --git a/drivers/media/dvb/dvb-usb/vp7045.c b/drivers/media/dvb/dvb-usb/vp7045.c index 75765e3a569c..3835235b68df 100644 --- a/drivers/media/dvb/dvb-usb/vp7045.c +++ b/drivers/media/dvb/dvb-usb/vp7045.c @@ -253,7 +253,6 @@ static struct dvb_usb_properties vp7045_properties = { /* usb specific object needed to register this driver with the usb subsystem */ static struct usb_driver vp7045_usb_driver = { - .owner = THIS_MODULE, .name = "dvb_usb_vp7045", .probe = vp7045_usb_probe, .disconnect = dvb_usb_device_exit, diff --git a/drivers/media/video/cpia_usb.c b/drivers/media/video/cpia_usb.c index 9774e94d1e7d..1439cb752874 100644 --- a/drivers/media/video/cpia_usb.c +++ b/drivers/media/video/cpia_usb.c @@ -582,7 +582,6 @@ MODULE_LICENSE("GPL"); static struct usb_driver cpia_driver = { - .owner = THIS_MODULE, .name = "cpia", .probe = cpia_probe, .disconnect = cpia_disconnect, diff --git a/drivers/media/video/em28xx/em28xx-video.c b/drivers/media/video/em28xx/em28xx-video.c index 06d76879bde2..3a56120397ae 100644 --- a/drivers/media/video/em28xx/em28xx-video.c +++ b/drivers/media/video/em28xx/em28xx-video.c @@ -1884,7 +1884,6 @@ static void em28xx_usb_disconnect(struct usb_interface *interface) } static struct usb_driver em28xx_usb_driver = { - .owner = THIS_MODULE, .name = "em28xx", .probe = em28xx_usb_probe, .disconnect = em28xx_usb_disconnect, diff --git a/drivers/net/irda/irda-usb.c b/drivers/net/irda/irda-usb.c index c22c0517883c..fa176ffb4ad5 100644 --- a/drivers/net/irda/irda-usb.c +++ b/drivers/net/irda/irda-usb.c @@ -1539,7 +1539,6 @@ static void irda_usb_disconnect(struct usb_interface *intf) * USB device callbacks */ static struct usb_driver irda_driver = { - .owner = THIS_MODULE, .name = "irda-usb", .probe = irda_usb_probe, .disconnect = irda_usb_disconnect, diff --git a/drivers/net/irda/stir4200.c b/drivers/net/irda/stir4200.c index 3961a754e920..31867e4b891b 100644 --- a/drivers/net/irda/stir4200.c +++ b/drivers/net/irda/stir4200.c @@ -1152,7 +1152,6 @@ static int stir_resume(struct usb_interface *intf) * USB device callbacks */ static struct usb_driver irda_driver = { - .owner = THIS_MODULE, .name = "stir4200", .probe = stir_probe, .disconnect = stir_disconnect, diff --git a/drivers/usb/atm/cxacru.c b/drivers/usb/atm/cxacru.c index 9d59dc62e6d2..af0a41e7870e 100644 --- a/drivers/usb/atm/cxacru.c +++ b/drivers/usb/atm/cxacru.c @@ -853,7 +853,6 @@ static int cxacru_usb_probe(struct usb_interface *intf, const struct usb_device_ } static struct usb_driver cxacru_usb_driver = { - .owner = THIS_MODULE, .name = cxacru_driver_name, .probe = cxacru_usb_probe, .disconnect = usbatm_usb_disconnect, diff --git a/drivers/usb/atm/speedtch.c b/drivers/usb/atm/speedtch.c index d0cbbb7f0385..b28336148658 100644 --- a/drivers/usb/atm/speedtch.c +++ b/drivers/usb/atm/speedtch.c @@ -659,7 +659,6 @@ MODULE_DEVICE_TABLE(usb, speedtch_usb_ids); static int speedtch_usb_probe(struct usb_interface *, const struct usb_device_id *); static struct usb_driver speedtch_usb_driver = { - .owner = THIS_MODULE, .name = speedtch_driver_name, .probe = speedtch_usb_probe, .disconnect = usbatm_usb_disconnect, diff --git a/drivers/usb/atm/ueagle-atm.c b/drivers/usb/atm/ueagle-atm.c index be08e16df09f..7d2a679989ed 100644 --- a/drivers/usb/atm/ueagle-atm.c +++ b/drivers/usb/atm/ueagle-atm.c @@ -1776,7 +1776,6 @@ static const struct usb_device_id uea_ids[] = { * USB driver descriptor */ static struct usb_driver uea_driver = { - .owner = THIS_MODULE, .name = "ueagle-atm", .id_table = uea_ids, .probe = uea_probe, diff --git a/drivers/usb/atm/xusbatm.c b/drivers/usb/atm/xusbatm.c index 7fe7fb484d10..5c76e3aaaa5e 100644 --- a/drivers/usb/atm/xusbatm.c +++ b/drivers/usb/atm/xusbatm.c @@ -140,7 +140,6 @@ static int xusbatm_usb_probe(struct usb_interface *intf, } static struct usb_driver xusbatm_usb_driver = { - .owner = THIS_MODULE, .name = xusbatm_driver_name, .probe = xusbatm_usb_probe, .disconnect = usbatm_usb_disconnect, diff --git a/drivers/usb/class/audio.c b/drivers/usb/class/audio.c index 50858273f8d3..3ad9ee8b84a9 100644 --- a/drivers/usb/class/audio.c +++ b/drivers/usb/class/audio.c @@ -2732,7 +2732,6 @@ static struct usb_device_id usb_audio_ids [] = { MODULE_DEVICE_TABLE (usb, usb_audio_ids); static struct usb_driver usb_audio_driver = { - .owner = THIS_MODULE, .name = "audio", .probe = usb_audio_probe, .disconnect = usb_audio_disconnect, diff --git a/drivers/usb/class/cdc-acm.c b/drivers/usb/class/cdc-acm.c index 72936dc15ec9..93de121f52a8 100644 --- a/drivers/usb/class/cdc-acm.c +++ b/drivers/usb/class/cdc-acm.c @@ -1088,7 +1088,6 @@ static struct usb_device_id acm_ids[] = { MODULE_DEVICE_TABLE (usb, acm_ids); static struct usb_driver acm_driver = { - .owner = THIS_MODULE, .name = "cdc_acm", .probe = acm_probe, .disconnect = acm_disconnect, diff --git a/drivers/usb/class/usb-midi.c b/drivers/usb/class/usb-midi.c index 5f8af35e7633..f13f004d311f 100644 --- a/drivers/usb/class/usb-midi.c +++ b/drivers/usb/class/usb-midi.c @@ -2027,7 +2027,6 @@ static struct usb_device_id id_table[] = { }; static struct usb_driver usb_midi_driver = { - .owner = THIS_MODULE, .name = "midi", .probe = usb_midi_probe, .disconnect = usb_midi_disconnect, diff --git a/drivers/usb/class/usblp.c b/drivers/usb/class/usblp.c index 357e75335f17..10406b857ac7 100644 --- a/drivers/usb/class/usblp.c +++ b/drivers/usb/class/usblp.c @@ -1186,7 +1186,6 @@ static struct usb_device_id usblp_ids [] = { MODULE_DEVICE_TABLE (usb, usblp_ids); static struct usb_driver usblp_driver = { - .owner = THIS_MODULE, .name = "usblp", .probe = usblp_probe, .disconnect = usblp_disconnect, diff --git a/drivers/usb/core/devio.c b/drivers/usb/core/devio.c index b1d6e9af732d..3a73170e95dd 100644 --- a/drivers/usb/core/devio.c +++ b/drivers/usb/core/devio.c @@ -402,7 +402,6 @@ static void driver_disconnect(struct usb_interface *intf) } struct usb_driver usbfs_driver = { - .owner = THIS_MODULE, .name = "usbfs", .probe = driver_probe, .disconnect = driver_disconnect, diff --git a/drivers/usb/core/hub.c b/drivers/usb/core/hub.c index 5faf7edd73cb..40c6c50c6bd9 100644 --- a/drivers/usb/core/hub.c +++ b/drivers/usb/core/hub.c @@ -2865,7 +2865,6 @@ static struct usb_device_id hub_id_table [] = { MODULE_DEVICE_TABLE (usb, hub_id_table); static struct usb_driver hub_driver = { - .owner = THIS_MODULE, .name = "hub", .probe = hub_probe, .disconnect = hub_disconnect, diff --git a/drivers/usb/image/mdc800.c b/drivers/usb/image/mdc800.c index 1d973bcf56aa..049871145d63 100644 --- a/drivers/usb/image/mdc800.c +++ b/drivers/usb/image/mdc800.c @@ -962,7 +962,6 @@ MODULE_DEVICE_TABLE (usb, mdc800_table); */ static struct usb_driver mdc800_usb_driver = { - .owner = THIS_MODULE, .name = "mdc800", .probe = mdc800_usb_probe, .disconnect = mdc800_usb_disconnect, diff --git a/drivers/usb/image/microtek.c b/drivers/usb/image/microtek.c index 950543aa5ac7..458f2acdeb0a 100644 --- a/drivers/usb/image/microtek.c +++ b/drivers/usb/image/microtek.c @@ -160,7 +160,6 @@ static void mts_usb_disconnect(struct usb_interface *intf); static struct usb_device_id mts_usb_ids []; static struct usb_driver mts_usb_driver = { - .owner = THIS_MODULE, .name = "microtekX6", .probe = mts_usb_probe, .disconnect = mts_usb_disconnect, diff --git a/drivers/usb/input/acecad.c b/drivers/usb/input/acecad.c index a32558b4048e..df29b8078b54 100644 --- a/drivers/usb/input/acecad.c +++ b/drivers/usb/input/acecad.c @@ -261,7 +261,6 @@ static struct usb_device_id usb_acecad_id_table [] = { MODULE_DEVICE_TABLE(usb, usb_acecad_id_table); static struct usb_driver usb_acecad_driver = { - .owner = THIS_MODULE, .name = "usb_acecad", .probe = usb_acecad_probe, .disconnect = usb_acecad_disconnect, diff --git a/drivers/usb/input/aiptek.c b/drivers/usb/input/aiptek.c index 0e2505c073db..356284c746a0 100644 --- a/drivers/usb/input/aiptek.c +++ b/drivers/usb/input/aiptek.c @@ -2190,7 +2190,6 @@ fail1: input_free_device(inputdev); static void aiptek_disconnect(struct usb_interface *intf); static struct usb_driver aiptek_driver = { - .owner = THIS_MODULE, .name = "aiptek", .probe = aiptek_probe, .disconnect = aiptek_disconnect, diff --git a/drivers/usb/input/appletouch.c b/drivers/usb/input/appletouch.c index 15840db092a5..1949b54f41f2 100644 --- a/drivers/usb/input/appletouch.c +++ b/drivers/usb/input/appletouch.c @@ -452,7 +452,6 @@ static int atp_resume(struct usb_interface *iface) } static struct usb_driver atp_driver = { - .owner = THIS_MODULE, .name = "appletouch", .probe = atp_probe, .disconnect = atp_disconnect, diff --git a/drivers/usb/input/ati_remote.c b/drivers/usb/input/ati_remote.c index 9a2a47db9494..8948e5c3941f 100644 --- a/drivers/usb/input/ati_remote.c +++ b/drivers/usb/input/ati_remote.c @@ -295,7 +295,6 @@ static void ati_remote_disconnect (struct usb_interface *interface); /* usb specific object to register with the usb subsystem */ static struct usb_driver ati_remote_driver = { - .owner = THIS_MODULE, .name = "ati_remote", .probe = ati_remote_probe, .disconnect = ati_remote_disconnect, diff --git a/drivers/usb/input/hid-core.c b/drivers/usb/input/hid-core.c index a3e44ef1df43..256d7325d4a5 100644 --- a/drivers/usb/input/hid-core.c +++ b/drivers/usb/input/hid-core.c @@ -1930,7 +1930,6 @@ static struct usb_device_id hid_usb_ids [] = { MODULE_DEVICE_TABLE (usb, hid_usb_ids); static struct usb_driver hid_driver = { - .owner = THIS_MODULE, .name = "usbhid", .probe = hid_probe, .disconnect = hid_disconnect, diff --git a/drivers/usb/input/hiddev.c b/drivers/usb/input/hiddev.c index 440377c7a0da..4dff8473553d 100644 --- a/drivers/usb/input/hiddev.c +++ b/drivers/usb/input/hiddev.c @@ -826,7 +826,6 @@ static int hiddev_usbd_probe(struct usb_interface *intf, static /* const */ struct usb_driver hiddev_driver = { - .owner = THIS_MODULE, .name = "hiddev", .probe = hiddev_usbd_probe, }; diff --git a/drivers/usb/input/itmtouch.c b/drivers/usb/input/itmtouch.c index 4a50acb39d29..7618ae5c104f 100644 --- a/drivers/usb/input/itmtouch.c +++ b/drivers/usb/input/itmtouch.c @@ -250,7 +250,6 @@ static void itmtouch_disconnect(struct usb_interface *intf) MODULE_DEVICE_TABLE(usb, itmtouch_ids); static struct usb_driver itmtouch_driver = { - .owner = THIS_MODULE, .name = "itmtouch", .probe = itmtouch_probe, .disconnect = itmtouch_disconnect, diff --git a/drivers/usb/input/kbtab.c b/drivers/usb/input/kbtab.c index fd48e74e78ed..f6d5cead542b 100644 --- a/drivers/usb/input/kbtab.c +++ b/drivers/usb/input/kbtab.c @@ -197,7 +197,6 @@ static void kbtab_disconnect(struct usb_interface *intf) } static struct usb_driver kbtab_driver = { - .owner = THIS_MODULE, .name = "kbtab", .probe = kbtab_probe, .disconnect = kbtab_disconnect, diff --git a/drivers/usb/input/keyspan_remote.c b/drivers/usb/input/keyspan_remote.c index a32cfe51b77d..5ae5201dbf5a 100644 --- a/drivers/usb/input/keyspan_remote.c +++ b/drivers/usb/input/keyspan_remote.c @@ -559,7 +559,6 @@ static void keyspan_disconnect(struct usb_interface *interface) */ static struct usb_driver keyspan_driver = { - .owner = THIS_MODULE, .name = "keyspan_remote", .probe = keyspan_probe, .disconnect = keyspan_disconnect, diff --git a/drivers/usb/input/mtouchusb.c b/drivers/usb/input/mtouchusb.c index 52cc18cd247d..f018953a5485 100644 --- a/drivers/usb/input/mtouchusb.c +++ b/drivers/usb/input/mtouchusb.c @@ -310,7 +310,6 @@ static void mtouchusb_disconnect(struct usb_interface *intf) MODULE_DEVICE_TABLE(usb, mtouchusb_devices); static struct usb_driver mtouchusb_driver = { - .owner = THIS_MODULE, .name = "mtouchusb", .probe = mtouchusb_probe, .disconnect = mtouchusb_disconnect, diff --git a/drivers/usb/input/powermate.c b/drivers/usb/input/powermate.c index b7476233ef5d..fdf0f788062c 100644 --- a/drivers/usb/input/powermate.c +++ b/drivers/usb/input/powermate.c @@ -441,7 +441,6 @@ static struct usb_device_id powermate_devices [] = { MODULE_DEVICE_TABLE (usb, powermate_devices); static struct usb_driver powermate_driver = { - .owner = THIS_MODULE, .name = "powermate", .probe = powermate_probe, .disconnect = powermate_disconnect, diff --git a/drivers/usb/input/touchkitusb.c b/drivers/usb/input/touchkitusb.c index 7420c6b84284..75e7c12e7189 100644 --- a/drivers/usb/input/touchkitusb.c +++ b/drivers/usb/input/touchkitusb.c @@ -267,7 +267,6 @@ static void touchkit_disconnect(struct usb_interface *intf) MODULE_DEVICE_TABLE(usb, touchkit_devices); static struct usb_driver touchkit_driver = { - .owner = THIS_MODULE, .name = "touchkitusb", .probe = touchkit_probe, .disconnect = touchkit_disconnect, diff --git a/drivers/usb/input/usbkbd.c b/drivers/usb/input/usbkbd.c index 226b6f90a907..2f3edc26cb50 100644 --- a/drivers/usb/input/usbkbd.c +++ b/drivers/usb/input/usbkbd.c @@ -345,7 +345,6 @@ static struct usb_device_id usb_kbd_id_table [] = { MODULE_DEVICE_TABLE (usb, usb_kbd_id_table); static struct usb_driver usb_kbd_driver = { - .owner = THIS_MODULE, .name = "usbkbd", .probe = usb_kbd_probe, .disconnect = usb_kbd_disconnect, diff --git a/drivers/usb/input/usbmouse.c b/drivers/usb/input/usbmouse.c index 230f6b1b314a..af526135d210 100644 --- a/drivers/usb/input/usbmouse.c +++ b/drivers/usb/input/usbmouse.c @@ -226,7 +226,6 @@ static struct usb_device_id usb_mouse_id_table [] = { MODULE_DEVICE_TABLE (usb, usb_mouse_id_table); static struct usb_driver usb_mouse_driver = { - .owner = THIS_MODULE, .name = "usbmouse", .probe = usb_mouse_probe, .disconnect = usb_mouse_disconnect, diff --git a/drivers/usb/input/wacom.c b/drivers/usb/input/wacom.c index dc099bbe12bf..48df4cfd5a42 100644 --- a/drivers/usb/input/wacom.c +++ b/drivers/usb/input/wacom.c @@ -945,7 +945,6 @@ static void wacom_disconnect(struct usb_interface *intf) } static struct usb_driver wacom_driver = { - .owner = THIS_MODULE, .name = "wacom", .probe = wacom_probe, .disconnect = wacom_disconnect, diff --git a/drivers/usb/input/xpad.c b/drivers/usb/input/xpad.c index 43112f040b6d..e421328615fb 100644 --- a/drivers/usb/input/xpad.c +++ b/drivers/usb/input/xpad.c @@ -316,7 +316,6 @@ static void xpad_disconnect(struct usb_interface *intf) } static struct usb_driver xpad_driver = { - .owner = THIS_MODULE, .name = "xpad", .probe = xpad_probe, .disconnect = xpad_disconnect, diff --git a/drivers/usb/input/yealink.c b/drivers/usb/input/yealink.c index f526aebea502..1bfc105ad4d6 100644 --- a/drivers/usb/input/yealink.c +++ b/drivers/usb/input/yealink.c @@ -987,7 +987,6 @@ static int usb_probe(struct usb_interface *intf, const struct usb_device_id *id) } static struct usb_driver yealink_driver = { - .owner = THIS_MODULE, .name = "yealink", .probe = usb_probe, .disconnect = usb_disconnect, diff --git a/drivers/usb/media/dabusb.c b/drivers/usb/media/dabusb.c index 27b23c55bbc7..18d8eaf408d5 100644 --- a/drivers/usb/media/dabusb.c +++ b/drivers/usb/media/dabusb.c @@ -812,7 +812,6 @@ static struct usb_device_id dabusb_ids [] = { MODULE_DEVICE_TABLE (usb, dabusb_ids); static struct usb_driver dabusb_driver = { - .owner = THIS_MODULE, .name = "dabusb", .probe = dabusb_probe, .disconnect = dabusb_disconnect, diff --git a/drivers/usb/media/dsbr100.c b/drivers/usb/media/dsbr100.c index 7503f5b96f59..6a5700e9d428 100644 --- a/drivers/usb/media/dsbr100.c +++ b/drivers/usb/media/dsbr100.c @@ -150,7 +150,6 @@ MODULE_DEVICE_TABLE (usb, usb_dsbr100_device_table); /* USB subsystem interface */ static struct usb_driver usb_dsbr100_driver = { - .owner = THIS_MODULE, .name = "dsbr100", .probe = usb_dsbr100_probe, .disconnect = usb_dsbr100_disconnect, diff --git a/drivers/usb/media/ov511.c b/drivers/usb/media/ov511.c index 036c485d1d1e..8df4f9de5ee5 100644 --- a/drivers/usb/media/ov511.c +++ b/drivers/usb/media/ov511.c @@ -6008,7 +6008,6 @@ ov51x_disconnect(struct usb_interface *intf) } static struct usb_driver ov511_driver = { - .owner = THIS_MODULE, .name = "ov511", .id_table = device_table, .probe = ov51x_probe, diff --git a/drivers/usb/media/pwc/pwc-if.c b/drivers/usb/media/pwc/pwc-if.c index 5524fd70210b..09ca6128ac20 100644 --- a/drivers/usb/media/pwc/pwc-if.c +++ b/drivers/usb/media/pwc/pwc-if.c @@ -111,7 +111,6 @@ static int usb_pwc_probe(struct usb_interface *intf, const struct usb_device_id static void usb_pwc_disconnect(struct usb_interface *intf); static struct usb_driver pwc_driver = { - .owner = THIS_MODULE, .name = "Philips webcam", /* name */ .id_table = pwc_device_table, .probe = usb_pwc_probe, /* probe() */ diff --git a/drivers/usb/media/se401.c b/drivers/usb/media/se401.c index f69e443cd1bc..b2ae29af5940 100644 --- a/drivers/usb/media/se401.c +++ b/drivers/usb/media/se401.c @@ -1401,7 +1401,6 @@ static void se401_disconnect(struct usb_interface *intf) } static struct usb_driver se401_driver = { - .owner = THIS_MODULE, .name = "se401", .id_table = device_table, .probe = se401_probe, diff --git a/drivers/usb/media/sn9c102_core.c b/drivers/usb/media/sn9c102_core.c index b2e66e3b90aa..08723459da86 100644 --- a/drivers/usb/media/sn9c102_core.c +++ b/drivers/usb/media/sn9c102_core.c @@ -2711,7 +2711,6 @@ static void sn9c102_usb_disconnect(struct usb_interface* intf) static struct usb_driver sn9c102_usb_driver = { - .owner = THIS_MODULE, .name = "sn9c102", .id_table = sn9c102_id_table, .probe = sn9c102_usb_probe, diff --git a/drivers/usb/media/stv680.c b/drivers/usb/media/stv680.c index 0fd0fa9fec21..774038b352cd 100644 --- a/drivers/usb/media/stv680.c +++ b/drivers/usb/media/stv680.c @@ -1477,7 +1477,6 @@ static void stv680_disconnect (struct usb_interface *intf) } static struct usb_driver stv680_driver = { - .owner = THIS_MODULE, .name = "stv680", .probe = stv680_probe, .disconnect = stv680_disconnect, diff --git a/drivers/usb/media/vicam.c b/drivers/usb/media/vicam.c index 0bc0b1247a6b..1c73155c8d77 100644 --- a/drivers/usb/media/vicam.c +++ b/drivers/usb/media/vicam.c @@ -1257,7 +1257,6 @@ static struct usb_device_id vicam_table[] = { MODULE_DEVICE_TABLE(usb, vicam_table); static struct usb_driver vicam_driver = { - .owner = THIS_MODULE, .name = "vicam", .probe = vicam_probe, .disconnect = vicam_disconnect, diff --git a/drivers/usb/media/w9968cf.c b/drivers/usb/media/w9968cf.c index 67612c81cb9f..52b90d50febb 100644 --- a/drivers/usb/media/w9968cf.c +++ b/drivers/usb/media/w9968cf.c @@ -3668,7 +3668,6 @@ static void w9968cf_usb_disconnect(struct usb_interface* intf) static struct usb_driver w9968cf_usb_driver = { - .owner = THIS_MODULE, .name = "w9968cf", .id_table = winbond_id_table, .probe = w9968cf_usb_probe, diff --git a/drivers/usb/misc/auerswald.c b/drivers/usb/misc/auerswald.c index b293db3c28c3..fad387f21891 100644 --- a/drivers/usb/misc/auerswald.c +++ b/drivers/usb/misc/auerswald.c @@ -2103,7 +2103,6 @@ MODULE_DEVICE_TABLE (usb, auerswald_ids); /* Standard usb driver struct */ static struct usb_driver auerswald_driver = { - .owner = THIS_MODULE, .name = "auerswald", .probe = auerswald_probe, .disconnect = auerswald_disconnect, diff --git a/drivers/usb/misc/cytherm.c b/drivers/usb/misc/cytherm.c index b33044d56a1e..6671317b495f 100644 --- a/drivers/usb/misc/cytherm.c +++ b/drivers/usb/misc/cytherm.c @@ -50,7 +50,6 @@ static void cytherm_disconnect(struct usb_interface *interface); /* usb specific object needed to register this driver with the usb subsystem */ static struct usb_driver cytherm_driver = { - .owner = THIS_MODULE, .name = "cytherm", .probe = cytherm_probe, .disconnect = cytherm_disconnect, diff --git a/drivers/usb/misc/emi26.c b/drivers/usb/misc/emi26.c index c8155209bf4b..3824df33094e 100644 --- a/drivers/usb/misc/emi26.c +++ b/drivers/usb/misc/emi26.c @@ -227,7 +227,6 @@ static void emi26_disconnect(struct usb_interface *intf) } static struct usb_driver emi26_driver = { - .owner = THIS_MODULE, .name = "emi26 - firmware loader", .probe = emi26_probe, .disconnect = emi26_disconnect, diff --git a/drivers/usb/misc/emi62.c b/drivers/usb/misc/emi62.c index 189986af2ac7..52fea2e08db8 100644 --- a/drivers/usb/misc/emi62.c +++ b/drivers/usb/misc/emi62.c @@ -266,7 +266,6 @@ static void emi62_disconnect(struct usb_interface *intf) } static struct usb_driver emi62_driver = { - .owner = THIS_MODULE, .name = "emi62 - firmware loader", .probe = emi62_probe, .disconnect = emi62_disconnect, diff --git a/drivers/usb/misc/idmouse.c b/drivers/usb/misc/idmouse.c index 1dc3e0f73014..d8cde1017985 100644 --- a/drivers/usb/misc/idmouse.c +++ b/drivers/usb/misc/idmouse.c @@ -114,7 +114,6 @@ static struct usb_class_driver idmouse_class = { /* usb specific object needed to register this driver with the usb subsystem */ static struct usb_driver idmouse_driver = { - .owner = THIS_MODULE, .name = DRIVER_SHORT, .probe = idmouse_probe, .disconnect = idmouse_disconnect, diff --git a/drivers/usb/misc/ldusb.c b/drivers/usb/misc/ldusb.c index 7e93ac96490f..981d8a5fbfd9 100644 --- a/drivers/usb/misc/ldusb.c +++ b/drivers/usb/misc/ldusb.c @@ -763,7 +763,6 @@ static void ld_usb_disconnect(struct usb_interface *intf) /* usb specific object needed to register this driver with the usb subsystem */ static struct usb_driver ld_usb_driver = { - .owner = THIS_MODULE, .name = "ldusb", .probe = ld_usb_probe, .disconnect = ld_usb_disconnect, diff --git a/drivers/usb/misc/legousbtower.c b/drivers/usb/misc/legousbtower.c index 2703e205bc8f..1336745b8f55 100644 --- a/drivers/usb/misc/legousbtower.c +++ b/drivers/usb/misc/legousbtower.c @@ -282,7 +282,6 @@ static struct usb_class_driver tower_class = { /* usb specific object needed to register this driver with the usb subsystem */ static struct usb_driver tower_driver = { - .owner = THIS_MODULE, .name = "legousbtower", .probe = tower_probe, .disconnect = tower_disconnect, diff --git a/drivers/usb/misc/phidgetkit.c b/drivers/usb/misc/phidgetkit.c index 067a81486921..605a3c87e05c 100644 --- a/drivers/usb/misc/phidgetkit.c +++ b/drivers/usb/misc/phidgetkit.c @@ -555,7 +555,6 @@ static void interfacekit_disconnect(struct usb_interface *interface) } static struct usb_driver interfacekit_driver = { - .owner = THIS_MODULE, .name = "phidgetkit", .probe = interfacekit_probe, .disconnect = interfacekit_disconnect, diff --git a/drivers/usb/misc/phidgetservo.c b/drivers/usb/misc/phidgetservo.c index a30d4a6ee824..b3418d2bcc69 100644 --- a/drivers/usb/misc/phidgetservo.c +++ b/drivers/usb/misc/phidgetservo.c @@ -306,7 +306,6 @@ servo_disconnect(struct usb_interface *interface) } static struct usb_driver servo_driver = { - .owner = THIS_MODULE, .name = "phidgetservo", .probe = servo_probe, .disconnect = servo_disconnect, diff --git a/drivers/usb/misc/rio500.c b/drivers/usb/misc/rio500.c index 9590dbac5d9a..b9d66074b80c 100644 --- a/drivers/usb/misc/rio500.c +++ b/drivers/usb/misc/rio500.c @@ -522,7 +522,6 @@ static struct usb_device_id rio_table [] = { MODULE_DEVICE_TABLE (usb, rio_table); static struct usb_driver rio_driver = { - .owner = THIS_MODULE, .name = "rio500", .probe = probe_rio, .disconnect = disconnect_rio, diff --git a/drivers/usb/misc/sisusbvga/sisusb.c b/drivers/usb/misc/sisusbvga/sisusb.c index 41ef2b606751..44350d49ad0a 100644 --- a/drivers/usb/misc/sisusbvga/sisusb.c +++ b/drivers/usb/misc/sisusbvga/sisusb.c @@ -3489,7 +3489,6 @@ static struct usb_device_id sisusb_table [] = { MODULE_DEVICE_TABLE (usb, sisusb_table); static struct usb_driver sisusb_driver = { - .owner = THIS_MODULE, .name = "sisusb", .probe = sisusb_probe, .disconnect = sisusb_disconnect, diff --git a/drivers/usb/misc/usblcd.c b/drivers/usb/misc/usblcd.c index 85f3725334b0..cc3dae3f34e0 100644 --- a/drivers/usb/misc/usblcd.c +++ b/drivers/usb/misc/usblcd.c @@ -371,7 +371,6 @@ static void lcd_disconnect(struct usb_interface *interface) } static struct usb_driver lcd_driver = { - .owner = THIS_MODULE, .name = "usblcd", .probe = lcd_probe, .disconnect = lcd_disconnect, diff --git a/drivers/usb/misc/usbled.c b/drivers/usb/misc/usbled.c index 3c93921cb6b3..877b081a3a6e 100644 --- a/drivers/usb/misc/usbled.c +++ b/drivers/usb/misc/usbled.c @@ -148,7 +148,6 @@ static void led_disconnect(struct usb_interface *interface) } static struct usb_driver led_driver = { - .owner = THIS_MODULE, .name = "usbled", .probe = led_probe, .disconnect = led_disconnect, diff --git a/drivers/usb/misc/usbtest.c b/drivers/usb/misc/usbtest.c index 605a2afe34ed..84fa1728f052 100644 --- a/drivers/usb/misc/usbtest.c +++ b/drivers/usb/misc/usbtest.c @@ -2134,7 +2134,6 @@ static struct usb_device_id id_table [] = { MODULE_DEVICE_TABLE (usb, id_table); static struct usb_driver usbtest_driver = { - .owner = THIS_MODULE, .name = "usbtest", .id_table = id_table, .probe = usbtest_probe, diff --git a/drivers/usb/misc/uss720.c b/drivers/usb/misc/uss720.c index 1cabe7ed91f5..4081990b7d1a 100644 --- a/drivers/usb/misc/uss720.c +++ b/drivers/usb/misc/uss720.c @@ -780,7 +780,6 @@ MODULE_DEVICE_TABLE (usb, uss720_table); static struct usb_driver uss720_driver = { - .owner = THIS_MODULE, .name = "uss720", .probe = uss720_probe, .disconnect = uss720_disconnect, diff --git a/drivers/usb/net/asix.c b/drivers/usb/net/asix.c index 542120ef1fd2..2faf2f2bdcdd 100644 --- a/drivers/usb/net/asix.c +++ b/drivers/usb/net/asix.c @@ -918,7 +918,6 @@ static const struct usb_device_id products [] = { MODULE_DEVICE_TABLE(usb, products); static struct usb_driver asix_driver = { - .owner = THIS_MODULE, .name = "asix", .id_table = products, .probe = usbnet_probe, diff --git a/drivers/usb/net/catc.c b/drivers/usb/net/catc.c index 37ef365a2472..be5f5e142dd0 100644 --- a/drivers/usb/net/catc.c +++ b/drivers/usb/net/catc.c @@ -934,7 +934,6 @@ static struct usb_device_id catc_id_table [] = { MODULE_DEVICE_TABLE(usb, catc_id_table); static struct usb_driver catc_driver = { - .owner = THIS_MODULE, .name = driver_name, .probe = catc_probe, .disconnect = catc_disconnect, diff --git a/drivers/usb/net/cdc_ether.c b/drivers/usb/net/cdc_ether.c index c008c981862b..63f1f3ba8e0b 100644 --- a/drivers/usb/net/cdc_ether.c +++ b/drivers/usb/net/cdc_ether.c @@ -476,7 +476,6 @@ static const struct usb_device_id products [] = { MODULE_DEVICE_TABLE(usb, products); static struct usb_driver cdc_driver = { - .owner = THIS_MODULE, .name = "cdc_ether", .id_table = products, .probe = usbnet_probe, diff --git a/drivers/usb/net/cdc_subset.c b/drivers/usb/net/cdc_subset.c index f05cfb83c82d..ec801e8bb1bb 100644 --- a/drivers/usb/net/cdc_subset.c +++ b/drivers/usb/net/cdc_subset.c @@ -306,7 +306,6 @@ MODULE_DEVICE_TABLE(usb, products); /*-------------------------------------------------------------------------*/ static struct usb_driver cdc_subset_driver = { - .owner = THIS_MODULE, .name = "cdc_subset", .probe = usbnet_probe, .suspend = usbnet_suspend, diff --git a/drivers/usb/net/gl620a.c b/drivers/usb/net/gl620a.c index 2455e9a85674..faf1e86be687 100644 --- a/drivers/usb/net/gl620a.c +++ b/drivers/usb/net/gl620a.c @@ -377,7 +377,6 @@ static const struct usb_device_id products [] = { MODULE_DEVICE_TABLE(usb, products); static struct usb_driver gl620a_driver = { - .owner = THIS_MODULE, .name = "gl620a", .id_table = products, .probe = usbnet_probe, diff --git a/drivers/usb/net/kaweth.c b/drivers/usb/net/kaweth.c index b5776518020f..def3bb8e2290 100644 --- a/drivers/usb/net/kaweth.c +++ b/drivers/usb/net/kaweth.c @@ -175,7 +175,6 @@ MODULE_DEVICE_TABLE (usb, usb_klsi_table); * kaweth_driver ****************************************************************/ static struct usb_driver kaweth_driver = { - .owner = THIS_MODULE, .name = driver_name, .probe = kaweth_probe, .disconnect = kaweth_disconnect, diff --git a/drivers/usb/net/net1080.c b/drivers/usb/net/net1080.c index b3799b1a2b0d..78e6a43b1087 100644 --- a/drivers/usb/net/net1080.c +++ b/drivers/usb/net/net1080.c @@ -593,7 +593,6 @@ static const struct usb_device_id products [] = { MODULE_DEVICE_TABLE(usb, products); static struct usb_driver net1080_driver = { - .owner = THIS_MODULE, .name = "net1080", .id_table = products, .probe = usbnet_probe, diff --git a/drivers/usb/net/plusb.c b/drivers/usb/net/plusb.c index 89856aa0e3b8..4fe863389cb7 100644 --- a/drivers/usb/net/plusb.c +++ b/drivers/usb/net/plusb.c @@ -127,7 +127,6 @@ static const struct usb_device_id products [] = { MODULE_DEVICE_TABLE(usb, products); static struct usb_driver plusb_driver = { - .owner = THIS_MODULE, .name = "plusb", .id_table = products, .probe = usbnet_probe, diff --git a/drivers/usb/net/rndis_host.c b/drivers/usb/net/rndis_host.c index c0ecbab6f6ba..49991ac1bf3b 100644 --- a/drivers/usb/net/rndis_host.c +++ b/drivers/usb/net/rndis_host.c @@ -586,7 +586,6 @@ static const struct usb_device_id products [] = { MODULE_DEVICE_TABLE(usb, products); static struct usb_driver rndis_driver = { - .owner = THIS_MODULE, .name = "rndis_host", .id_table = products, .probe = usbnet_probe, diff --git a/drivers/usb/net/rtl8150.c b/drivers/usb/net/rtl8150.c index 787dd3591d6a..8ca52be23976 100644 --- a/drivers/usb/net/rtl8150.c +++ b/drivers/usb/net/rtl8150.c @@ -177,7 +177,6 @@ static int rtl8150_probe(struct usb_interface *intf, static const char driver_name [] = "rtl8150"; static struct usb_driver rtl8150_driver = { - .owner = THIS_MODULE, .name = driver_name, .probe = rtl8150_probe, .disconnect = rtl8150_disconnect, diff --git a/drivers/usb/net/zaurus.c b/drivers/usb/net/zaurus.c index 680d13957af4..9c5ab251370c 100644 --- a/drivers/usb/net/zaurus.c +++ b/drivers/usb/net/zaurus.c @@ -357,7 +357,6 @@ static const struct usb_device_id products [] = { MODULE_DEVICE_TABLE(usb, products); static struct usb_driver zaurus_driver = { - .owner = THIS_MODULE, .name = "zaurus", .id_table = products, .probe = usbnet_probe, diff --git a/drivers/usb/net/zd1201.c b/drivers/usb/net/zd1201.c index 2f52261c7cc1..4d6673adc8fc 100644 --- a/drivers/usb/net/zd1201.c +++ b/drivers/usb/net/zd1201.c @@ -1923,7 +1923,6 @@ static int zd1201_resume(struct usb_interface *interface) #endif static struct usb_driver zd1201_usb = { - .owner = THIS_MODULE, .name = "zd1201", .probe = zd1201_probe, .disconnect = zd1201_disconnect, diff --git a/drivers/usb/serial/airprime.c b/drivers/usb/serial/airprime.c index 2ef9945a6c07..dbf1f063098c 100644 --- a/drivers/usb/serial/airprime.c +++ b/drivers/usb/serial/airprime.c @@ -23,7 +23,6 @@ static struct usb_device_id id_table [] = { MODULE_DEVICE_TABLE(usb, id_table); static struct usb_driver airprime_driver = { - .owner = THIS_MODULE, .name = "airprime", .probe = usb_serial_probe, .disconnect = usb_serial_disconnect, diff --git a/drivers/usb/serial/anydata.c b/drivers/usb/serial/anydata.c index 7a171e034b59..343f6f228220 100644 --- a/drivers/usb/serial/anydata.c +++ b/drivers/usb/serial/anydata.c @@ -27,7 +27,6 @@ static int buffer_size; static int debug; static struct usb_driver anydata_driver = { - .owner = THIS_MODULE, .name = "anydata", .probe = usb_serial_probe, .disconnect = usb_serial_disconnect, diff --git a/drivers/usb/serial/belkin_sa.c b/drivers/usb/serial/belkin_sa.c index 69039bd9fc5e..4144777ea18b 100644 --- a/drivers/usb/serial/belkin_sa.c +++ b/drivers/usb/serial/belkin_sa.c @@ -113,7 +113,6 @@ static struct usb_device_id id_table_combined [] = { MODULE_DEVICE_TABLE (usb, id_table_combined); static struct usb_driver belkin_driver = { - .owner = THIS_MODULE, .name = "belkin", .probe = usb_serial_probe, .disconnect = usb_serial_disconnect, diff --git a/drivers/usb/serial/cp2101.c b/drivers/usb/serial/cp2101.c index 813bab37e076..da46b351e188 100644 --- a/drivers/usb/serial/cp2101.c +++ b/drivers/usb/serial/cp2101.c @@ -67,7 +67,6 @@ static struct usb_device_id id_table [] = { MODULE_DEVICE_TABLE (usb, id_table); static struct usb_driver cp2101_driver = { - .owner = THIS_MODULE, .name = "cp2101", .probe = usb_serial_probe, .disconnect = usb_serial_disconnect, diff --git a/drivers/usb/serial/cyberjack.c b/drivers/usb/serial/cyberjack.c index 8c10e4004905..6d18d4eaba35 100644 --- a/drivers/usb/serial/cyberjack.c +++ b/drivers/usb/serial/cyberjack.c @@ -76,7 +76,6 @@ static struct usb_device_id id_table [] = { MODULE_DEVICE_TABLE (usb, id_table); static struct usb_driver cyberjack_driver = { - .owner = THIS_MODULE, .name = "cyberjack", .probe = usb_serial_probe, .disconnect = usb_serial_disconnect, diff --git a/drivers/usb/serial/digi_acceleport.c b/drivers/usb/serial/digi_acceleport.c index c50cec95f49b..8fc414bd5b24 100644 --- a/drivers/usb/serial/digi_acceleport.c +++ b/drivers/usb/serial/digi_acceleport.c @@ -493,7 +493,6 @@ static struct usb_device_id id_table_4 [] = { MODULE_DEVICE_TABLE (usb, id_table_combined); static struct usb_driver digi_driver = { - .owner = THIS_MODULE, .name = "digi_acceleport", .probe = usb_serial_probe, .disconnect = usb_serial_disconnect, diff --git a/drivers/usb/serial/empeg.c b/drivers/usb/serial/empeg.c index e5e40064caf2..79a766e9ca23 100644 --- a/drivers/usb/serial/empeg.c +++ b/drivers/usb/serial/empeg.c @@ -105,7 +105,6 @@ static struct usb_device_id id_table [] = { MODULE_DEVICE_TABLE (usb, id_table); static struct usb_driver empeg_driver = { - .owner = THIS_MODULE, .name = "empeg", .probe = usb_serial_probe, .disconnect = usb_serial_disconnect, diff --git a/drivers/usb/serial/garmin_gps.c b/drivers/usb/serial/garmin_gps.c index 198a322286f9..452efce72714 100644 --- a/drivers/usb/serial/garmin_gps.c +++ b/drivers/usb/serial/garmin_gps.c @@ -222,7 +222,6 @@ static struct usb_device_id id_table [] = { MODULE_DEVICE_TABLE (usb, id_table); static struct usb_driver garmin_driver = { - .owner = THIS_MODULE, .name = "garmin_gps", .probe = usb_serial_probe, .disconnect = usb_serial_disconnect, diff --git a/drivers/usb/serial/generic.c b/drivers/usb/serial/generic.c index c00a440dc421..4ddac620fc0c 100644 --- a/drivers/usb/serial/generic.c +++ b/drivers/usb/serial/generic.c @@ -68,7 +68,6 @@ static int generic_probe(struct usb_interface *interface, } static struct usb_driver generic_driver = { - .owner = THIS_MODULE, .name = "usbserial_generic", .probe = generic_probe, .disconnect = usb_serial_disconnect, diff --git a/drivers/usb/serial/hp4x.c b/drivers/usb/serial/hp4x.c index e588c3fe632d..e9719da2aca1 100644 --- a/drivers/usb/serial/hp4x.c +++ b/drivers/usb/serial/hp4x.c @@ -37,7 +37,6 @@ static struct usb_device_id id_table [] = { MODULE_DEVICE_TABLE(usb, id_table); static struct usb_driver hp49gp_driver = { - .owner = THIS_MODULE, .name = "hp4X", .probe = usb_serial_probe, .disconnect = usb_serial_disconnect, diff --git a/drivers/usb/serial/io_edgeport.c b/drivers/usb/serial/io_edgeport.c index 276bd425a474..4e2b599d85a6 100644 --- a/drivers/usb/serial/io_edgeport.c +++ b/drivers/usb/serial/io_edgeport.c @@ -242,7 +242,6 @@ static void edge_shutdown (struct usb_serial *serial); #include "io_tables.h" /* all of the devices that this driver supports */ static struct usb_driver io_driver = { - .owner = THIS_MODULE, .name = "io_edgeport", .probe = usb_serial_probe, .disconnect = usb_serial_disconnect, diff --git a/drivers/usb/serial/io_ti.c b/drivers/usb/serial/io_ti.c index 8b2e4c78abcd..22ad1a5a8f9e 100644 --- a/drivers/usb/serial/io_ti.c +++ b/drivers/usb/serial/io_ti.c @@ -216,7 +216,6 @@ static struct usb_device_id id_table_combined [] = { MODULE_DEVICE_TABLE (usb, id_table_combined); static struct usb_driver io_driver = { - .owner = THIS_MODULE, .name = "io_ti", .probe = usb_serial_probe, .disconnect = usb_serial_disconnect, diff --git a/drivers/usb/serial/ipaq.c b/drivers/usb/serial/ipaq.c index efb568be7015..06d07cea0b70 100644 --- a/drivers/usb/serial/ipaq.c +++ b/drivers/usb/serial/ipaq.c @@ -542,7 +542,6 @@ static struct usb_device_id ipaq_id_table [] = { MODULE_DEVICE_TABLE (usb, ipaq_id_table); static struct usb_driver ipaq_driver = { - .owner = THIS_MODULE, .name = "ipaq", .probe = usb_serial_probe, .disconnect = usb_serial_disconnect, diff --git a/drivers/usb/serial/ipw.c b/drivers/usb/serial/ipw.c index 64e2cda2a84a..2dd191f5fe76 100644 --- a/drivers/usb/serial/ipw.c +++ b/drivers/usb/serial/ipw.c @@ -152,7 +152,6 @@ static struct usb_device_id usb_ipw_ids[] = { MODULE_DEVICE_TABLE(usb, usb_ipw_ids); static struct usb_driver usb_ipw_driver = { - .owner = THIS_MODULE, .name = "ipwtty", .probe = usb_serial_probe, .disconnect = usb_serial_disconnect, diff --git a/drivers/usb/serial/ir-usb.c b/drivers/usb/serial/ir-usb.c index 647431c1ccb1..a59010421444 100644 --- a/drivers/usb/serial/ir-usb.c +++ b/drivers/usb/serial/ir-usb.c @@ -125,7 +125,6 @@ static struct usb_device_id id_table [] = { MODULE_DEVICE_TABLE (usb, id_table); static struct usb_driver ir_driver = { - .owner = THIS_MODULE, .name = "ir-usb", .probe = usb_serial_probe, .disconnect = usb_serial_disconnect, diff --git a/drivers/usb/serial/keyspan.h b/drivers/usb/serial/keyspan.h index 4e6f626f6062..7472ed6bf626 100644 --- a/drivers/usb/serial/keyspan.h +++ b/drivers/usb/serial/keyspan.h @@ -520,7 +520,6 @@ static struct usb_device_id keyspan_ids_combined[] = { MODULE_DEVICE_TABLE(usb, keyspan_ids_combined); static struct usb_driver keyspan_driver = { - .owner = THIS_MODULE, .name = "keyspan", .probe = usb_serial_probe, .disconnect = usb_serial_disconnect, diff --git a/drivers/usb/serial/keyspan_pda.c b/drivers/usb/serial/keyspan_pda.c index 0d1f15268549..b0441c35f98f 100644 --- a/drivers/usb/serial/keyspan_pda.c +++ b/drivers/usb/serial/keyspan_pda.c @@ -150,7 +150,6 @@ static struct usb_device_id id_table_combined [] = { MODULE_DEVICE_TABLE (usb, id_table_combined); static struct usb_driver keyspan_pda_driver = { - .owner = THIS_MODULE, .name = "keyspan_pda", .probe = usb_serial_probe, .disconnect = usb_serial_disconnect, diff --git a/drivers/usb/serial/kl5kusb105.c b/drivers/usb/serial/kl5kusb105.c index bd68638b7c35..4e2f7dfb58b2 100644 --- a/drivers/usb/serial/kl5kusb105.c +++ b/drivers/usb/serial/kl5kusb105.c @@ -116,7 +116,6 @@ static struct usb_device_id id_table [] = { MODULE_DEVICE_TABLE (usb, id_table); static struct usb_driver kl5kusb105d_driver = { - .owner = THIS_MODULE, .name = "kl5kusb105d", .probe = usb_serial_probe, .disconnect = usb_serial_disconnect, diff --git a/drivers/usb/serial/kobil_sct.c b/drivers/usb/serial/kobil_sct.c index 4c853afea385..d9c21e275130 100644 --- a/drivers/usb/serial/kobil_sct.c +++ b/drivers/usb/serial/kobil_sct.c @@ -97,7 +97,6 @@ static struct usb_device_id id_table [] = { MODULE_DEVICE_TABLE (usb, id_table); static struct usb_driver kobil_driver = { - .owner = THIS_MODULE, .name = "kobil", .probe = usb_serial_probe, .disconnect = usb_serial_disconnect, diff --git a/drivers/usb/serial/mct_u232.c b/drivers/usb/serial/mct_u232.c index b0415e7542c4..b6d6cab9c859 100644 --- a/drivers/usb/serial/mct_u232.c +++ b/drivers/usb/serial/mct_u232.c @@ -125,7 +125,6 @@ static struct usb_device_id id_table_combined [] = { MODULE_DEVICE_TABLE (usb, id_table_combined); static struct usb_driver mct_u232_driver = { - .owner = THIS_MODULE, .name = "mct_u232", .probe = usb_serial_probe, .disconnect = usb_serial_disconnect, diff --git a/drivers/usb/serial/omninet.c b/drivers/usb/serial/omninet.c index b595befb24cf..762d8ff9a1e4 100644 --- a/drivers/usb/serial/omninet.c +++ b/drivers/usb/serial/omninet.c @@ -80,7 +80,6 @@ static struct usb_device_id id_table [] = { MODULE_DEVICE_TABLE (usb, id_table); static struct usb_driver omninet_driver = { - .owner = THIS_MODULE, .name = "omninet", .probe = usb_serial_probe, .disconnect = usb_serial_disconnect, diff --git a/drivers/usb/serial/option.c b/drivers/usb/serial/option.c index 4ee657eaaa0b..3fd2405304fd 100644 --- a/drivers/usb/serial/option.c +++ b/drivers/usb/serial/option.c @@ -95,7 +95,6 @@ static struct usb_device_id option_ids[] = { MODULE_DEVICE_TABLE(usb, option_ids); static struct usb_driver option_driver = { - .owner = THIS_MODULE, .name = "option", .probe = usb_serial_probe, .disconnect = usb_serial_disconnect, diff --git a/drivers/usb/serial/pl2303.c b/drivers/usb/serial/pl2303.c index e302a320444c..c96ba9fc19e0 100644 --- a/drivers/usb/serial/pl2303.c +++ b/drivers/usb/serial/pl2303.c @@ -82,7 +82,6 @@ static struct usb_device_id id_table [] = { MODULE_DEVICE_TABLE (usb, id_table); static struct usb_driver pl2303_driver = { - .owner = THIS_MODULE, .name = "pl2303", .probe = usb_serial_probe, .disconnect = usb_serial_disconnect, diff --git a/drivers/usb/serial/safe_serial.c b/drivers/usb/serial/safe_serial.c index f8241c152043..3ea284ce7b8b 100644 --- a/drivers/usb/serial/safe_serial.c +++ b/drivers/usb/serial/safe_serial.c @@ -160,7 +160,6 @@ static struct usb_device_id id_table[] = { MODULE_DEVICE_TABLE (usb, id_table); static struct usb_driver safe_driver = { - .owner = THIS_MODULE, .name = "safe_serial", .probe = usb_serial_probe, .disconnect = usb_serial_disconnect, diff --git a/drivers/usb/serial/ti_usb_3410_5052.c b/drivers/usb/serial/ti_usb_3410_5052.c index 17a1f09483bd..9e53ec75bcfd 100644 --- a/drivers/usb/serial/ti_usb_3410_5052.c +++ b/drivers/usb/serial/ti_usb_3410_5052.c @@ -248,7 +248,6 @@ static struct usb_device_id ti_id_table_combined[] = { }; static struct usb_driver ti_usb_driver = { - .owner = THIS_MODULE, .name = "ti_usb_3410_5052", .probe = usb_serial_probe, .disconnect = usb_serial_disconnect, diff --git a/drivers/usb/serial/usb-serial.c b/drivers/usb/serial/usb-serial.c index 2ac37b52485a..12aaf18ff9ea 100644 --- a/drivers/usb/serial/usb-serial.c +++ b/drivers/usb/serial/usb-serial.c @@ -42,7 +42,6 @@ /* Driver structure we register with the USB core */ static struct usb_driver usb_serial_driver = { - .owner = THIS_MODULE, .name = "usbserial", .probe = usb_serial_probe, .disconnect = usb_serial_disconnect, diff --git a/drivers/usb/serial/visor.c b/drivers/usb/serial/visor.c index 2973f5564c06..49b1fbe61f25 100644 --- a/drivers/usb/serial/visor.c +++ b/drivers/usb/serial/visor.c @@ -173,7 +173,6 @@ static struct usb_device_id id_table_combined [] = { MODULE_DEVICE_TABLE (usb, id_table_combined); static struct usb_driver visor_driver = { - .owner = THIS_MODULE, .name = "visor", .probe = usb_serial_probe, .disconnect = usb_serial_disconnect, diff --git a/drivers/usb/serial/whiteheat.c b/drivers/usb/serial/whiteheat.c index 19c6386bb692..a7c3c4734d83 100644 --- a/drivers/usb/serial/whiteheat.c +++ b/drivers/usb/serial/whiteheat.c @@ -127,7 +127,6 @@ static struct usb_device_id id_table_combined [] = { MODULE_DEVICE_TABLE (usb, id_table_combined); static struct usb_driver whiteheat_driver = { - .owner = THIS_MODULE, .name = "whiteheat", .probe = usb_serial_probe, .disconnect = usb_serial_disconnect, diff --git a/drivers/usb/storage/libusual.c b/drivers/usb/storage/libusual.c index 2680c69a2417..b28151d1b609 100644 --- a/drivers/usb/storage/libusual.c +++ b/drivers/usb/storage/libusual.c @@ -153,7 +153,6 @@ static void usu_disconnect(struct usb_interface *intf) } static struct usb_driver usu_driver = { - .owner = THIS_MODULE, .name = "libusual", .probe = usu_probe, .disconnect = usu_disconnect, diff --git a/drivers/usb/storage/usb.c b/drivers/usb/storage/usb.c index c8375aa62723..484ed297bed0 100644 --- a/drivers/usb/storage/usb.c +++ b/drivers/usb/storage/usb.c @@ -1000,7 +1000,6 @@ static void storage_disconnect(struct usb_interface *intf) ***********************************************************************/ static struct usb_driver usb_storage_driver = { - .owner = THIS_MODULE, .name = "usb-storage", .probe = storage_probe, .disconnect = storage_disconnect, diff --git a/drivers/usb/usb-skeleton.c b/drivers/usb/usb-skeleton.c index 6c3a53f8f26c..60c458ebaa2d 100644 --- a/drivers/usb/usb-skeleton.c +++ b/drivers/usb/usb-skeleton.c @@ -330,7 +330,6 @@ static void skel_disconnect(struct usb_interface *interface) } static struct usb_driver skel_driver = { - .owner = THIS_MODULE, .name = "skeleton", .probe = skel_probe, .disconnect = skel_disconnect, diff --git a/drivers/w1/dscore.c b/drivers/w1/dscore.c index 15fb250451e5..b9146306df49 100644 --- a/drivers/w1/dscore.c +++ b/drivers/w1/dscore.c @@ -52,7 +52,6 @@ static int ds_send_control_cmd(struct ds_device *, u16, u16); static struct usb_driver ds_driver = { - .owner = THIS_MODULE, .name = "DS9490R", .probe = ds_probe, .disconnect = ds_disconnect, diff --git a/include/linux/usb.h b/include/linux/usb.h index 3d05c63451a8..2714814ab66c 100644 --- a/include/linux/usb.h +++ b/include/linux/usb.h @@ -536,8 +536,6 @@ struct usb_dynids { /** * struct usb_driver - identifies USB driver to usbcore - * @owner: Pointer to the module owner of this driver; initialize - * it using THIS_MODULE. * @name: The driver name should be unique among USB drivers, * and should normally be the same as the module name. * @probe: Called to see if the driver is willing to manage a particular @@ -580,8 +578,6 @@ struct usb_dynids { * them as necessary, and blocking until the unlinks complete). */ struct usb_driver { - struct module *owner; - const char *name; int (*probe) (struct usb_interface *intf, diff --git a/sound/usb/usbaudio.c b/sound/usb/usbaudio.c index 99dae024b640..22f8bb612bff 100644 --- a/sound/usb/usbaudio.c +++ b/sound/usb/usbaudio.c @@ -1996,7 +1996,6 @@ static struct usb_device_id usb_audio_ids [] = { MODULE_DEVICE_TABLE (usb, usb_audio_ids); static struct usb_driver usb_audio_driver = { - .owner = THIS_MODULE, .name = "snd-usb-audio", .probe = usb_audio_probe, .disconnect = usb_audio_disconnect, diff --git a/sound/usb/usx2y/usbusx2y.c b/sound/usb/usx2y/usbusx2y.c index cf77313c609d..a3967f72ab4e 100644 --- a/sound/usb/usx2y/usbusx2y.c +++ b/sound/usb/usx2y/usbusx2y.c @@ -409,7 +409,6 @@ static void snd_usX2Y_disconnect(struct usb_interface *intf) MODULE_DEVICE_TABLE(usb, snd_usX2Y_usb_id_table); static struct usb_driver snd_usX2Y_usb_driver = { - .owner = THIS_MODULE, .name = "snd-usb-usx2y", .probe = snd_usX2Y_probe, .disconnect = snd_usX2Y_disconnect, -- cgit v1.2.3-71-gd317 From 9ad3d6ccf5eee285e233dbaf186369b8d477a666 Mon Sep 17 00:00:00 2001 From: Alan Stern Date: Thu, 17 Nov 2005 17:10:32 -0500 Subject: [PATCH] USB: Remove USB private semaphore This patch (as605) removes the private udev->serialize semaphore, relying instead on the locking provided by the embedded struct device's semaphore. The changes are confined to the core, except that the usb_trylock_device routine now uses the return convention of down_trylock rather than down_read_trylock (they return opposite values for no good reason). A couple of other associated changes are included as well: Now that we aren't concerned about HCDs that avoid using the hcd glue layer, usb_disconnect no longer needs to acquire the usb_bus_lock -- that can be done by usb_remove_hcd where it belongs. Devices aren't locked over the same scope of code in usb_new_device and hub_port_connect_change as they used to be. This shouldn't cause any trouble. Along with the preceding driver core patch, this needs a lot of testing. Signed-off-by: Alan Stern Signed-off-by: Greg Kroah-Hartman --- drivers/usb/core/devices.c | 4 +- drivers/usb/core/devio.c | 2 - drivers/usb/core/driver.c | 4 -- drivers/usb/core/hcd.c | 5 +- drivers/usb/core/hub.c | 48 +++++++------------ drivers/usb/core/usb.c | 114 ++++---------------------------------------- drivers/usb/core/usb.h | 3 -- drivers/usb/host/ohci-hub.c | 2 +- include/linux/usb.h | 9 ++-- 9 files changed, 37 insertions(+), 154 deletions(-) (limited to 'include/linux') diff --git a/drivers/usb/core/devices.c b/drivers/usb/core/devices.c index 83e815d3cd52..55bc563a3256 100644 --- a/drivers/usb/core/devices.c +++ b/drivers/usb/core/devices.c @@ -545,10 +545,10 @@ static ssize_t usb_device_dump(char __user **buffer, size_t *nbytes, loff_t *ski struct usb_device *childdev = usbdev->children[chix]; if (childdev) { - down(&childdev->serialize); + usb_lock_device(childdev); ret = usb_device_dump(buffer, nbytes, skip_bytes, file_offset, childdev, bus, level + 1, chix, ++cnt); - up(&childdev->serialize); + usb_unlock_device(childdev); if (ret == -EFAULT) return total_written; total_written += ret; diff --git a/drivers/usb/core/devio.c b/drivers/usb/core/devio.c index 3a73170e95dd..2b68998fe4b3 100644 --- a/drivers/usb/core/devio.c +++ b/drivers/usb/core/devio.c @@ -1349,9 +1349,7 @@ static int proc_ioctl(struct dev_state *ps, struct usbdevfs_ioctl *ctl) /* let kernel drivers try to (re)bind to the interface */ case USBDEVFS_CONNECT: usb_unlock_device(ps->dev); - usb_lock_all_devices(); bus_rescan_devices(intf->dev.bus); - usb_unlock_all_devices(); usb_lock_device(ps->dev); break; diff --git a/drivers/usb/core/driver.c b/drivers/usb/core/driver.c index bb139f06bcd6..076462c8ba2a 100644 --- a/drivers/usb/core/driver.c +++ b/drivers/usb/core/driver.c @@ -432,9 +432,7 @@ int usb_register_driver(struct usb_driver *new_driver, struct module *owner) spin_lock_init(&new_driver->dynids.lock); INIT_LIST_HEAD(&new_driver->dynids.list); - usb_lock_all_devices(); retval = driver_register(&new_driver->driver); - usb_unlock_all_devices(); if (!retval) { pr_info("%s: registered new driver %s\n", @@ -465,11 +463,9 @@ void usb_deregister(struct usb_driver *driver) { pr_info("%s: deregistering driver %s\n", usbcore_name, driver->name); - usb_lock_all_devices(); usb_remove_newid_file(driver); usb_free_dynids(driver); driver_unregister(&driver->driver); - usb_unlock_all_devices(); usbfs_update_special(); } diff --git a/drivers/usb/core/hcd.c b/drivers/usb/core/hcd.c index da24c31ee00d..d16a0e8a7d72 100644 --- a/drivers/usb/core/hcd.c +++ b/drivers/usb/core/hcd.c @@ -857,9 +857,7 @@ static int register_root_hub (struct usb_device *usb_dev, return (retval < 0) ? retval : -EMSGSIZE; } - usb_lock_device (usb_dev); retval = usb_new_device (usb_dev); - usb_unlock_device (usb_dev); if (retval) { usb_dev->bus->root_hub = NULL; dev_err (parent_dev, "can't register root hub for %s, %d\n", @@ -1891,7 +1889,10 @@ void usb_remove_hcd(struct usb_hcd *hcd) spin_lock_irq (&hcd_root_hub_lock); hcd->rh_registered = 0; spin_unlock_irq (&hcd_root_hub_lock); + + down(&usb_bus_list_lock); usb_disconnect(&hcd->self.root_hub); + up(&usb_bus_list_lock); hcd->poll_rh = 0; del_timer_sync(&hcd->rh_timer); diff --git a/drivers/usb/core/hub.c b/drivers/usb/core/hub.c index 40c6c50c6bd9..dd3bcfb2bcb6 100644 --- a/drivers/usb/core/hub.c +++ b/drivers/usb/core/hub.c @@ -32,7 +32,7 @@ #include "hub.h" /* Protect struct usb_device->state and ->children members - * Note: Both are also protected by ->serialize, except that ->state can + * Note: Both are also protected by ->dev.sem, except that ->state can * change to USB_STATE_NOTATTACHED even when the semaphore isn't held. */ static DEFINE_SPINLOCK(device_state_lock); @@ -975,8 +975,8 @@ static int locktree(struct usb_device *udev) /* when everyone grabs locks top->bottom, * non-overlapping work may be concurrent */ - down(&udev->serialize); - up(&hdev->serialize); + usb_lock_device(udev); + usb_unlock_device(hdev); return t + 1; } } @@ -1132,16 +1132,10 @@ void usb_disconnect(struct usb_device **pdev) * this quiesces everyting except pending urbs. */ usb_set_device_state(udev, USB_STATE_NOTATTACHED); - - /* lock the bus list on behalf of HCDs unregistering their root hubs */ - if (!udev->parent) { - down(&usb_bus_list_lock); - usb_lock_device(udev); - } else - down(&udev->serialize); - dev_info (&udev->dev, "USB disconnect, address %d\n", udev->devnum); + usb_lock_device(udev); + /* Free up all the children before we remove this device */ for (i = 0; i < USB_MAXCHILDREN; i++) { if (udev->children[i]) @@ -1169,11 +1163,7 @@ void usb_disconnect(struct usb_device **pdev) *pdev = NULL; spin_unlock_irq(&device_state_lock); - if (!udev->parent) { - usb_unlock_device(udev); - up(&usb_bus_list_lock); - } else - up(&udev->serialize); + usb_unlock_device(udev); device_unregister(&udev->dev); } @@ -1243,8 +1233,8 @@ static inline void show_string(struct usb_device *udev, char *id, char *string) * * This is called with devices which have been enumerated, but not yet * configured. The device descriptor is available, but not descriptors - * for any device configuration. The caller must have locked udev and - * either the parent hub (if udev is a normal device) or else the + * for any device configuration. The caller must have locked either + * the parent hub (if udev is a normal device) or else the * usb_bus_list_lock (if udev is a root hub). The parent's pointer to * udev has already been installed, but udev is not yet visible through * sysfs or other filesystem code. @@ -1254,8 +1244,7 @@ static inline void show_string(struct usb_device *udev, char *id, char *string) * * This call is synchronous, and may not be used in an interrupt context. * - * Only the hub driver should ever call this; root hub registration - * uses it indirectly. + * Only the hub driver or root-hub registrar should ever call this. */ int usb_new_device(struct usb_device *udev) { @@ -1364,6 +1353,8 @@ int usb_new_device(struct usb_device *udev) } usb_create_sysfs_dev_files (udev); + usb_lock_device(udev); + /* choose and set the configuration. that registers the interfaces * with the driver core, and lets usb device drivers bind to them. */ @@ -1385,6 +1376,8 @@ int usb_new_device(struct usb_device *udev) /* USB device state == configured ... usable */ usb_notify_add_device(udev); + usb_unlock_device(udev); + return 0; fail: @@ -1872,11 +1865,8 @@ int usb_resume_device(struct usb_device *udev) usb_unlock_device(udev); /* rebind drivers that had no suspend() */ - if (status == 0) { - usb_lock_all_devices(); + if (status == 0) bus_rescan_devices(&usb_bus_type); - usb_unlock_all_devices(); - } return status; } @@ -1889,14 +1879,14 @@ static int remote_wakeup(struct usb_device *udev) /* don't repeat RESUME sequence if this device * was already woken up by some other task */ - down(&udev->serialize); + usb_lock_device(udev); if (udev->state == USB_STATE_SUSPENDED) { dev_dbg(&udev->dev, "RESUME (wakeup)\n"); /* TRSMRCY = 10 msec */ msleep(10); status = finish_device_resume(udev); } - up(&udev->serialize); + usb_unlock_device(udev); #endif return status; } @@ -1997,7 +1987,7 @@ static int hub_resume(struct usb_interface *intf) if (!udev || status < 0) continue; - down (&udev->serialize); + usb_lock_device(udev); if (portstat & USB_PORT_STAT_SUSPEND) status = hub_port_resume(hub, port1, udev); else { @@ -2008,7 +1998,7 @@ static int hub_resume(struct usb_interface *intf) hub_port_logical_disconnect(hub, port1); } } - up(&udev->serialize); + usb_unlock_device(udev); } } #endif @@ -2573,7 +2563,6 @@ static void hub_port_connect_change(struct usb_hub *hub, int port1, * udev becomes globally accessible, although presumably * no one will look at it until hdev is unlocked. */ - down (&udev->serialize); status = 0; /* We mustn't add new devices if the parent hub has @@ -2597,7 +2586,6 @@ static void hub_port_connect_change(struct usb_hub *hub, int port1, } } - up (&udev->serialize); if (status) goto loop_disable; diff --git a/drivers/usb/core/usb.c b/drivers/usb/core/usb.c index 294e9f127477..fcfda21be499 100644 --- a/drivers/usb/core/usb.c +++ b/drivers/usb/core/usb.c @@ -32,7 +32,6 @@ #include #include #include -#include #include #include @@ -49,8 +48,6 @@ const char *usbcore_name = "usbcore"; static int nousb; /* Disable USB when built into kernel image */ /* Not honored on modular build */ -static DECLARE_RWSEM(usb_all_devices_rwsem); - /** * usb_ifnum_to_if - get the interface object with a given interface number @@ -446,8 +443,6 @@ usb_alloc_dev(struct usb_device *parent, struct usb_bus *bus, unsigned port1) dev->parent = parent; INIT_LIST_HEAD(&dev->filelist); - init_MUTEX(&dev->serialize); - return dev; } @@ -520,75 +515,20 @@ void usb_put_intf(struct usb_interface *intf) /* USB device locking * - * Although locking USB devices should be straightforward, it is - * complicated by the way the driver-model core works. When a new USB - * driver is registered or unregistered, the core will automatically - * probe or disconnect all matching interfaces on all USB devices while - * holding the USB subsystem writelock. There's no good way for us to - * tell which devices will be used or to lock them beforehand; our only - * option is to effectively lock all the USB devices. - * - * We do that by using a private rw-semaphore, usb_all_devices_rwsem. - * When locking an individual device you must first acquire the rwsem's - * readlock. When a driver is registered or unregistered the writelock - * must be held. These actions are encapsulated in the subroutines - * below, so all a driver needs to do is call usb_lock_device() and - * usb_unlock_device(). + * USB devices and interfaces are locked using the semaphore in their + * embedded struct device. The hub driver guarantees that whenever a + * device is connected or disconnected, drivers are called with the + * USB device locked as well as their particular interface. * * Complications arise when several devices are to be locked at the same * time. Only hub-aware drivers that are part of usbcore ever have to - * do this; nobody else needs to worry about it. The problem is that - * usb_lock_device() must not be called to lock a second device since it - * would acquire the rwsem's readlock reentrantly, leading to deadlock if - * another thread was waiting for the writelock. The solution is simple: - * - * When locking more than one device, call usb_lock_device() - * to lock the first one. Lock the others by calling - * down(&udev->serialize) directly. - * - * When unlocking multiple devices, use up(&udev->serialize) - * to unlock all but the last one. Unlock the last one by - * calling usb_unlock_device(). + * do this; nobody else needs to worry about it. The rule for locking + * is simple: * * When locking both a device and its parent, always lock the * the parent first. */ -/** - * usb_lock_device - acquire the lock for a usb device structure - * @udev: device that's being locked - * - * Use this routine when you don't hold any other device locks; - * to acquire nested inner locks call down(&udev->serialize) directly. - * This is necessary for proper interaction with usb_lock_all_devices(). - */ -void usb_lock_device(struct usb_device *udev) -{ - down_read(&usb_all_devices_rwsem); - down(&udev->serialize); -} - -/** - * usb_trylock_device - attempt to acquire the lock for a usb device structure - * @udev: device that's being locked - * - * Don't use this routine if you already hold a device lock; - * use down_trylock(&udev->serialize) instead. - * This is necessary for proper interaction with usb_lock_all_devices(). - * - * Returns 1 if successful, 0 if contention. - */ -int usb_trylock_device(struct usb_device *udev) -{ - if (!down_read_trylock(&usb_all_devices_rwsem)) - return 0; - if (down_trylock(&udev->serialize)) { - up_read(&usb_all_devices_rwsem); - return 0; - } - return 1; -} - /** * usb_lock_device_for_reset - cautiously acquire the lock for a * usb device structure @@ -627,7 +567,7 @@ int usb_lock_device_for_reset(struct usb_device *udev, } } - while (!usb_trylock_device(udev)) { + while (usb_trylock_device(udev) != 0) { /* If we can't acquire the lock after waiting one second, * we're probably deadlocked */ @@ -645,39 +585,6 @@ int usb_lock_device_for_reset(struct usb_device *udev, return 1; } -/** - * usb_unlock_device - release the lock for a usb device structure - * @udev: device that's being unlocked - * - * Use this routine when releasing the only device lock you hold; - * to release inner nested locks call up(&udev->serialize) directly. - * This is necessary for proper interaction with usb_lock_all_devices(). - */ -void usb_unlock_device(struct usb_device *udev) -{ - up(&udev->serialize); - up_read(&usb_all_devices_rwsem); -} - -/** - * usb_lock_all_devices - acquire the lock for all usb device structures - * - * This is necessary when registering a new driver or probing a bus, - * since the driver-model core may try to use any usb_device. - */ -void usb_lock_all_devices(void) -{ - down_write(&usb_all_devices_rwsem); -} - -/** - * usb_unlock_all_devices - release the lock for all usb device structures - */ -void usb_unlock_all_devices(void) -{ - up_write(&usb_all_devices_rwsem); -} - static struct usb_device *match_device(struct usb_device *dev, u16 vendor_id, u16 product_id) @@ -700,10 +607,10 @@ static struct usb_device *match_device(struct usb_device *dev, /* look through all of the children of this device */ for (child = 0; child < dev->maxchild; ++child) { if (dev->children[child]) { - down(&dev->children[child]->serialize); + usb_lock_device(dev->children[child]); ret_dev = match_device(dev->children[child], vendor_id, product_id); - up(&dev->children[child]->serialize); + usb_unlock_device(dev->children[child]); if (ret_dev) goto exit; } @@ -1300,10 +1207,7 @@ EXPORT_SYMBOL(usb_put_dev); EXPORT_SYMBOL(usb_get_dev); EXPORT_SYMBOL(usb_hub_tt_clear_buffer); -EXPORT_SYMBOL(usb_lock_device); -EXPORT_SYMBOL(usb_trylock_device); EXPORT_SYMBOL(usb_lock_device_for_reset); -EXPORT_SYMBOL(usb_unlock_device); EXPORT_SYMBOL(usb_driver_claim_interface); EXPORT_SYMBOL(usb_driver_release_interface); diff --git a/drivers/usb/core/usb.h b/drivers/usb/core/usb.h index 98e85fb4d3b7..4647e1ebc68d 100644 --- a/drivers/usb/core/usb.h +++ b/drivers/usb/core/usb.h @@ -16,9 +16,6 @@ extern int usb_get_device_descriptor(struct usb_device *dev, extern char *usb_cache_string(struct usb_device *udev, int index); extern int usb_set_configuration(struct usb_device *dev, int configuration); -extern void usb_lock_all_devices(void); -extern void usb_unlock_all_devices(void); - extern void usb_kick_khubd(struct usb_device *dev); extern void usb_suspend_root_hub(struct usb_device *hdev); extern void usb_resume_root_hub(struct usb_device *dev); diff --git a/drivers/usb/host/ohci-hub.c b/drivers/usb/host/ohci-hub.c index 72e3b12a1926..4b2226d77b34 100644 --- a/drivers/usb/host/ohci-hub.c +++ b/drivers/usb/host/ohci-hub.c @@ -372,7 +372,7 @@ done: & ohci->hc_control) == OHCI_USB_OPER && time_after (jiffies, ohci->next_statechange) - && usb_trylock_device (hcd->self.root_hub) + && usb_trylock_device (hcd->self.root_hub) == 0 ) { ohci_vdbg (ohci, "autosuspend\n"); (void) ohci_bus_suspend (hcd); diff --git a/include/linux/usb.h b/include/linux/usb.h index 2714814ab66c..46dc0421d19e 100644 --- a/include/linux/usb.h +++ b/include/linux/usb.h @@ -329,8 +329,6 @@ struct usb_device { struct usb_tt *tt; /* low/full speed dev, highspeed hub */ int ttport; /* device port on that tt hub */ - struct semaphore serialize; - unsigned int toggle[2]; /* one bit for each endpoint * ([0] = IN, [1] = OUT) */ @@ -377,11 +375,12 @@ struct usb_device { extern struct usb_device *usb_get_dev(struct usb_device *dev); extern void usb_put_dev(struct usb_device *dev); -extern void usb_lock_device(struct usb_device *udev); -extern int usb_trylock_device(struct usb_device *udev); +/* USB device locking */ +#define usb_lock_device(udev) down(&(udev)->dev.sem) +#define usb_unlock_device(udev) up(&(udev)->dev.sem) +#define usb_trylock_device(udev) down_trylock(&(udev)->dev.sem) extern int usb_lock_device_for_reset(struct usb_device *udev, struct usb_interface *iface); -extern void usb_unlock_device(struct usb_device *udev); /* USB port reset for device reinitialization */ extern int usb_reset_device(struct usb_device *dev); -- cgit v1.2.3-71-gd317 From 55c527187c9d78f840b284d596a0b298bc1493af Mon Sep 17 00:00:00 2001 From: Alan Stern Date: Wed, 23 Nov 2005 12:03:12 -0500 Subject: [PATCH] USB: Consider power budget when choosing configuration This patch (as609) changes the way we keep track of power budgeting for USB hubs and devices, and it updates the choose_configuration routine to take this information into account. (This is something we should have been doing all along.) A new field in struct usb_device holds the amount of bus current available from the upstream port, and the usb_hub structure keeps track of the current available for each downstream port. Two new rules for configuration selection are added: Don't select a self-powered configuration when only bus power is available. Don't select a configuration requiring more bus power than is available. However the first rule is #if-ed out, because I found that the internal hub in my HP USB keyboard claims that its only configuration is self-powered. The rule would prevent the configuration from being chosen, leaving the hub & keyboard unconfigured. Since similar descriptor errors may turn out to be fairly common, it seemed wise not to include a rule that would break automatic configuration unnecessarily for such devices. The second rule may also trigger unnecessarily, although this should be less common. More likely it will annoy people by sometimes failing to accept configurations that should never have been chosen in the first place. The patch also changes usbcore's reaction when no configuration is suitable. Instead of raising an error and rejecting the device, now the core will simply leave the device unconfigured. People can always work around such problems by installing configurations manually through sysfs. Signed-off-by: Alan Stern Signed-off-by: Greg Kroah-Hartman --- drivers/usb/core/hcd.c | 5 +- drivers/usb/core/hub.c | 229 ++++++++++++++++++++++++++++++--------------- drivers/usb/core/hub.h | 3 +- drivers/usb/core/message.c | 6 ++ include/linux/usb.h | 2 + 5 files changed, 164 insertions(+), 81 deletions(-) (limited to 'include/linux') diff --git a/drivers/usb/core/hcd.c b/drivers/usb/core/hcd.c index d16a0e8a7d72..0018bbc4de34 100644 --- a/drivers/usb/core/hcd.c +++ b/drivers/usb/core/hcd.c @@ -1825,8 +1825,6 @@ int usb_add_hcd(struct usb_hcd *hcd, retval = -ENOMEM; goto err_allocate_root_hub; } - rhdev->speed = (hcd->driver->flags & HCD_USB2) ? USB_SPEED_HIGH : - USB_SPEED_FULL; /* Although in principle hcd->driver->start() might need to use rhdev, * none of the current drivers do. @@ -1844,6 +1842,9 @@ int usb_add_hcd(struct usb_hcd *hcd, dev_dbg(hcd->self.controller, "supports USB remote wakeup\n"); hcd->remote_wakeup = hcd->can_wakeup; + rhdev->speed = (hcd->driver->flags & HCD_USB2) ? USB_SPEED_HIGH : + USB_SPEED_FULL; + rhdev->bus_mA = min(500u, hcd->power_budget); if ((retval = register_root_hub(rhdev, hcd)) != 0) goto err_register_root_hub; diff --git a/drivers/usb/core/hub.c b/drivers/usb/core/hub.c index 895ac829b9cf..b311005ff1a6 100644 --- a/drivers/usb/core/hub.c +++ b/drivers/usb/core/hub.c @@ -702,26 +702,40 @@ static int hub_configure(struct usb_hub *hub, * and battery-powered root hubs (may provide just 8 mA). */ ret = usb_get_status(hdev, USB_RECIP_DEVICE, 0, &hubstatus); - if (ret < 0) { + if (ret < 2) { message = "can't get hub status"; goto fail; } le16_to_cpus(&hubstatus); if (hdev == hdev->bus->root_hub) { - struct usb_hcd *hcd = - container_of(hdev->bus, struct usb_hcd, self); - - hub->power_budget = min(500u, hcd->power_budget) / 2; + if (hdev->bus_mA == 0 || hdev->bus_mA >= 500) + hub->mA_per_port = 500; + else { + hub->mA_per_port = hdev->bus_mA; + hub->limited_power = 1; + } } else if ((hubstatus & (1 << USB_DEVICE_SELF_POWERED)) == 0) { dev_dbg(hub_dev, "hub controller current requirement: %dmA\n", hub->descriptor->bHubContrCurrent); - hub->power_budget = (501 - hub->descriptor->bHubContrCurrent) - / 2; + hub->limited_power = 1; + if (hdev->maxchild > 0) { + int remaining = hdev->bus_mA - + hub->descriptor->bHubContrCurrent; + + if (remaining < hdev->maxchild * 100) + dev_warn(hub_dev, + "insufficient power available " + "to use all downstream ports\n"); + hub->mA_per_port = 100; /* 7.2.1.1 */ + } + } else { /* Self-powered external hub */ + /* FIXME: What about battery-powered external hubs that + * provide less current per port? */ + hub->mA_per_port = 500; } - if (hub->power_budget) - dev_dbg(hub_dev, "%dmA bus power budget for children\n", - hub->power_budget * 2); - + if (hub->mA_per_port < 500) + dev_dbg(hub_dev, "%umA bus power budget for each child\n", + hub->mA_per_port); ret = hub_hub_status(hub, &hubstatus, &hubchange); if (ret < 0) { @@ -1136,45 +1150,107 @@ void usb_disconnect(struct usb_device **pdev) device_unregister(&udev->dev); } +static inline const char *plural(int n) +{ + return (n == 1 ? "" : "s"); +} + static int choose_configuration(struct usb_device *udev) { - int c, i; + int i; + u16 devstatus; + int bus_powered; + int num_configs; + struct usb_host_config *c, *best; + + /* If this fails, assume the device is bus-powered */ + devstatus = 0; + usb_get_status(udev, USB_RECIP_DEVICE, 0, &devstatus); + le16_to_cpus(&devstatus); + bus_powered = ((devstatus & (1 << USB_DEVICE_SELF_POWERED)) == 0); + dev_dbg(&udev->dev, "device is %s-powered\n", + bus_powered ? "bus" : "self"); + + best = NULL; + c = udev->config; + num_configs = udev->descriptor.bNumConfigurations; + for (i = 0; i < num_configs; (i++, c++)) { + struct usb_interface_descriptor *desc = + &c->intf_cache[0]->altsetting->desc; + + /* + * HP's USB bus-powered keyboard has only one configuration + * and it claims to be self-powered; other devices may have + * similar errors in their descriptors. If the next test + * were allowed to execute, such configurations would always + * be rejected and the devices would not work as expected. + */ +#if 0 + /* Rule out self-powered configs for a bus-powered device */ + if (bus_powered && (c->desc.bmAttributes & + USB_CONFIG_ATT_SELFPOWER)) + continue; +#endif - /* NOTE: this should interact with hub power budgeting */ + /* + * The next test may not be as effective as it should be. + * Some hubs have errors in their descriptor, claiming + * to be self-powered when they are really bus-powered. + * We will overestimate the amount of current such hubs + * make available for each port. + * + * This is a fairly benign sort of failure. It won't + * cause us to reject configurations that we should have + * accepted. + */ - c = udev->config[0].desc.bConfigurationValue; - if (udev->descriptor.bNumConfigurations != 1) { - for (i = 0; i < udev->descriptor.bNumConfigurations; i++) { - struct usb_interface_descriptor *desc; + /* Rule out configs that draw too much bus current */ + if (c->desc.bMaxPower * 2 > udev->bus_mA) + continue; - /* heuristic: Linux is more likely to have class - * drivers, so avoid vendor-specific interfaces. - */ - desc = &udev->config[i].intf_cache[0] - ->altsetting->desc; - if (desc->bInterfaceClass == USB_CLASS_VENDOR_SPEC) - continue; - /* COMM/2/all is CDC ACM, except 0xff is MSFT RNDIS. - * MSFT needs this to be the first config; never use - * it as the default unless Linux has host-side RNDIS. - * A second config would ideally be CDC-Ethernet, but - * may instead be the "vendor specific" CDC subset - * long used by ARM Linux for sa1100 or pxa255. - */ - if (desc->bInterfaceClass == USB_CLASS_COMM - && desc->bInterfaceSubClass == 2 - && desc->bInterfaceProtocol == 0xff) { - c = udev->config[1].desc.bConfigurationValue; - continue; - } - c = udev->config[i].desc.bConfigurationValue; + /* If the first config's first interface is COMM/2/0xff + * (MSFT RNDIS), rule it out unless Linux has host-side + * RNDIS support. */ + if (i == 0 && desc->bInterfaceClass == USB_CLASS_COMM + && desc->bInterfaceSubClass == 2 + && desc->bInterfaceProtocol == 0xff) { +#ifndef CONFIG_USB_NET_RNDIS + continue; +#else + best = c; +#endif + } + + /* From the remaining configs, choose the first one whose + * first interface is for a non-vendor-specific class. + * Reason: Linux is more likely to have a class driver + * than a vendor-specific driver. */ + else if (udev->descriptor.bDeviceClass != + USB_CLASS_VENDOR_SPEC && + desc->bInterfaceClass != + USB_CLASS_VENDOR_SPEC) { + best = c; break; } + + /* If all the remaining configs are vendor-specific, + * choose the first one. */ + else if (!best) + best = c; + } + + if (best) { + i = best->desc.bConfigurationValue; dev_info(&udev->dev, - "configuration #%d chosen from %d choices\n", - c, udev->descriptor.bNumConfigurations); + "configuration #%d chosen from %d choice%s\n", + i, num_configs, plural(num_configs)); + } else { + i = -1; + dev_warn(&udev->dev, + "no configuration chosen from %d choice%s\n", + num_configs, plural(num_configs)); } - return c; + return i; } #ifdef DEBUG @@ -1327,17 +1403,13 @@ int usb_new_device(struct usb_device *udev) * with the driver core, and lets usb device drivers bind to them. */ c = choose_configuration(udev); - if (c < 0) - dev_warn(&udev->dev, - "can't choose an initial configuration\n"); - else { + if (c >= 0) { err = usb_set_configuration(udev, c); if (err) { dev_err(&udev->dev, "can't set config #%d, error %d\n", c, err); - usb_remove_sysfs_dev_files(udev); - device_del(&udev->dev); - goto fail; + /* This need not be fatal. The user can try to + * set other configurations. */ } } @@ -1702,7 +1774,7 @@ static int finish_device_resume(struct usb_device *udev) * and device drivers will know about any resume quirks. */ status = usb_get_status(udev, USB_RECIP_DEVICE, 0, &devstatus); - if (status < 0) + if (status < 2) dev_dbg(&udev->dev, "gone after usb resume? status %d\n", status); @@ -1711,7 +1783,7 @@ static int finish_device_resume(struct usb_device *udev) int (*resume)(struct device *); le16_to_cpus(&devstatus); - if (devstatus & (1 << USB_DEVICE_REMOTE_WAKEUP) + if ((devstatus & (1 << USB_DEVICE_REMOTE_WAKEUP)) && udev->parent) { status = usb_control_msg(udev, usb_sndctrlpipe(udev, 0), @@ -2374,39 +2446,36 @@ hub_power_remaining (struct usb_hub *hub) { struct usb_device *hdev = hub->hdev; int remaining; - unsigned i; + int port1; - remaining = hub->power_budget; - if (!remaining) /* self-powered */ + if (!hub->limited_power) return 0; - for (i = 0; i < hdev->maxchild; i++) { - struct usb_device *udev = hdev->children[i]; - int delta, ceiling; + remaining = hdev->bus_mA - hub->descriptor->bHubContrCurrent; + for (port1 = 1; port1 <= hdev->maxchild; ++port1) { + struct usb_device *udev = hdev->children[port1 - 1]; + int delta; if (!udev) continue; - /* 100mA per-port ceiling, or 8mA for OTG ports */ - if (i != (udev->bus->otg_port - 1) || hdev->parent) - ceiling = 50; - else - ceiling = 4; - + /* Unconfigured devices may not use more than 100mA, + * or 8mA for OTG ports */ if (udev->actconfig) - delta = udev->actconfig->desc.bMaxPower; + delta = udev->actconfig->desc.bMaxPower * 2; + else if (port1 != udev->bus->otg_port || hdev->parent) + delta = 100; else - delta = ceiling; - // dev_dbg(&udev->dev, "budgeted %dmA\n", 2 * delta); - if (delta > ceiling) - dev_warn(&udev->dev, "%dmA over %dmA budget!\n", - 2 * (delta - ceiling), 2 * ceiling); + delta = 8; + if (delta > hub->mA_per_port) + dev_warn(&udev->dev, "%dmA is over %umA budget " + "for port %d!\n", + delta, hub->mA_per_port, port1); remaining -= delta; } if (remaining < 0) { - dev_warn(hub->intfdev, - "%dmA over power budget!\n", - -2 * remaining); + dev_warn(hub->intfdev, "%dmA over power budget!\n", + - remaining); remaining = 0; } return remaining; @@ -2501,7 +2570,8 @@ static void hub_port_connect_change(struct usb_hub *hub, int port1, usb_set_device_state(udev, USB_STATE_POWERED); udev->speed = USB_SPEED_UNKNOWN; - + udev->bus_mA = hub->mA_per_port; + /* set the address */ choose_address(udev); if (udev->devnum <= 0) { @@ -2521,16 +2591,16 @@ static void hub_port_connect_change(struct usb_hub *hub, int port1, * on the parent. */ if (udev->descriptor.bDeviceClass == USB_CLASS_HUB - && hub->power_budget) { + && udev->bus_mA <= 100) { u16 devstat; status = usb_get_status(udev, USB_RECIP_DEVICE, 0, &devstat); - if (status < 0) { + if (status < 2) { dev_dbg(&udev->dev, "get status %d ?\n", status); goto loop_disable; } - cpu_to_le16s(&devstat); + le16_to_cpus(&devstat); if ((devstat & (1 << USB_DEVICE_SELF_POWERED)) == 0) { dev_err(&udev->dev, "can't connect bus-powered hub " @@ -2583,9 +2653,7 @@ static void hub_port_connect_change(struct usb_hub *hub, int port1, status = hub_power_remaining(hub); if (status) - dev_dbg(hub_dev, - "%dmA power budget left\n", - 2 * status); + dev_dbg(hub_dev, "%dmA power budget left\n", status); return; @@ -2797,6 +2865,11 @@ static void hub_events(void) if (hubchange & HUB_CHANGE_LOCAL_POWER) { dev_dbg (hub_dev, "power change\n"); clear_hub_feature(hdev, C_HUB_LOCAL_POWER); + if (hubstatus & HUB_STATUS_LOCAL_POWER) + /* FIXME: Is this always true? */ + hub->limited_power = 0; + else + hub->limited_power = 1; } if (hubchange & HUB_CHANGE_OVERCURRENT) { dev_dbg (hub_dev, "overcurrent change\n"); diff --git a/drivers/usb/core/hub.h b/drivers/usb/core/hub.h index bf23f8978024..29d5f45a8456 100644 --- a/drivers/usb/core/hub.h +++ b/drivers/usb/core/hub.h @@ -220,8 +220,9 @@ struct usb_hub { struct usb_hub_descriptor *descriptor; /* class descriptor */ struct usb_tt tt; /* Transaction Translator */ - u8 power_budget; /* in 2mA units; or zero */ + unsigned mA_per_port; /* current for each child */ + unsigned limited_power:1; unsigned quiescing:1; unsigned activating:1; unsigned resume_root_hub:1; diff --git a/drivers/usb/core/message.c b/drivers/usb/core/message.c index fe74f99ca5f4..99ab774d4fdb 100644 --- a/drivers/usb/core/message.c +++ b/drivers/usb/core/message.c @@ -1387,6 +1387,12 @@ free_interfaces: if (dev->state != USB_STATE_ADDRESS) usb_disable_device (dev, 1); // Skip ep0 + n = dev->bus_mA - cp->desc.bMaxPower * 2; + if (n < 0) + dev_warn(&dev->dev, "new config #%d exceeds power " + "limit by %dmA\n", + configuration, -n); + if ((ret = usb_control_msg(dev, usb_sndctrlpipe(dev, 0), USB_REQ_SET_CONFIGURATION, 0, configuration, 0, NULL, 0, USB_CTRL_SET_TIMEOUT)) < 0) diff --git a/include/linux/usb.h b/include/linux/usb.h index 46dc0421d19e..27575e678a7c 100644 --- a/include/linux/usb.h +++ b/include/linux/usb.h @@ -347,6 +347,8 @@ struct usb_device { char **rawdescriptors; /* Raw descriptors for each config */ + unsigned short bus_mA; /* Current available from the bus */ + int have_langid; /* whether string_langid is valid */ int string_langid; /* language ID for strings */ -- cgit v1.2.3-71-gd317 From 12c3da346eb81b6a281031f62eda3bca993dff5a Mon Sep 17 00:00:00 2001 From: Alan Stern Date: Wed, 23 Nov 2005 12:09:52 -0500 Subject: [PATCH] USB: Store port number in usb_device This patch (as610) adds a field to struct usb_device to store the device's port number. This allows us to remove several loops in the hub driver (searching for a particular device among all the entries in the parent's array of children). Signed-off-by: Alan Stern Signed-off-by: Greg Kroah-Hartman --- drivers/usb/core/hub.c | 79 ++++++++++++-------------------------------------- drivers/usb/core/usb.c | 1 + include/linux/usb.h | 1 + 3 files changed, 20 insertions(+), 61 deletions(-) (limited to 'include/linux') diff --git a/drivers/usb/core/hub.c b/drivers/usb/core/hub.c index b311005ff1a6..a523c8f20b5d 100644 --- a/drivers/usb/core/hub.c +++ b/drivers/usb/core/hub.c @@ -946,24 +946,21 @@ static int locktree(struct usb_device *udev) t = locktree(hdev); if (t < 0) return t; - for (t = 0; t < hdev->maxchild; t++) { - if (hdev->children[t] == udev) { - /* everything is fail-fast once disconnect - * processing starts - */ - if (udev->state == USB_STATE_NOTATTACHED) - break; - /* when everyone grabs locks top->bottom, - * non-overlapping work may be concurrent - */ - usb_lock_device(udev); - usb_unlock_device(hdev); - return t + 1; - } + /* everything is fail-fast once disconnect + * processing starts + */ + if (udev->state == USB_STATE_NOTATTACHED) { + usb_unlock_device(hdev); + return -ENODEV; } + + /* when everyone grabs locks top->bottom, + * non-overlapping work may be concurrent + */ + usb_lock_device(udev); usb_unlock_device(hdev); - return -ENODEV; + return udev->portnum; } static void recursively_mark_NOTATTACHED(struct usb_device *udev) @@ -1335,15 +1332,9 @@ int usb_new_device(struct usb_device *udev) le16_to_cpu(udev->config[0].desc.wTotalLength), USB_DT_OTG, (void **) &desc) == 0) { if (desc->bmAttributes & USB_OTG_HNP) { - unsigned port1; + unsigned port1 = udev->portnum; struct usb_device *root = udev->parent; - for (port1 = 1; port1 <= root->maxchild; - port1++) { - if (root->children[port1-1] == udev) - break; - } - dev_info(&udev->dev, "Dual-Role OTG device on %sHNP port\n", (port1 == bus->otg_port) @@ -1720,22 +1711,9 @@ static int __usb_suspend_device (struct usb_device *udev, int port1) int usb_suspend_device(struct usb_device *udev) { #ifdef CONFIG_USB_SUSPEND - int port1; - if (udev->state == USB_STATE_NOTATTACHED) return -ENODEV; - if (!udev->parent) - port1 = 0; - else { - for (port1 = udev->parent->maxchild; port1 > 0; --port1) { - if (udev->parent->children[port1-1] == udev) - break; - } - if (port1 == 0) - return -ENODEV; - } - - return __usb_suspend_device(udev, port1); + return __usb_suspend_device(udev, udev->portnum); #else /* NOTE: udev->state unchanged, it's not lying ... */ udev->dev.power.power_state = PMSG_SUSPEND; @@ -1893,20 +1871,10 @@ hub_port_resume(struct usb_hub *hub, int port1, struct usb_device *udev) */ int usb_resume_device(struct usb_device *udev) { - int port1, status; + int status; if (udev->state == USB_STATE_NOTATTACHED) return -ENODEV; - if (!udev->parent) - port1 = 0; - else { - for (port1 = udev->parent->maxchild; port1 > 0; --port1) { - if (udev->parent->children[port1-1] == udev) - break; - } - if (port1 == 0) - return -ENODEV; - } #ifdef CONFIG_USB_SUSPEND /* selective resume of one downstream hub-to-device port */ @@ -1915,7 +1883,7 @@ int usb_resume_device(struct usb_device *udev) // NOTE swsusp may bork us, device state being wrong... // NOTE this fails if parent is also suspended... status = hub_port_resume(hdev_to_hub(udev->parent), - port1, udev); + udev->portnum, udev); } else status = 0; } else @@ -3029,7 +2997,8 @@ int usb_reset_device(struct usb_device *udev) struct usb_hub *parent_hub; struct usb_device_descriptor descriptor = udev->descriptor; struct usb_hub *hub = NULL; - int i, ret = 0, port1 = -1; + int i, ret = 0; + int port1 = udev->portnum; if (udev->state == USB_STATE_NOTATTACHED || udev->state == USB_STATE_SUSPENDED) { @@ -3043,18 +3012,6 @@ int usb_reset_device(struct usb_device *udev) dev_dbg(&udev->dev, "%s for root hub!\n", __FUNCTION__); return -EISDIR; } - - for (i = 0; i < parent_hdev->maxchild; i++) - if (parent_hdev->children[i] == udev) { - port1 = i + 1; - break; - } - - if (port1 < 0) { - /* If this ever happens, it's very bad */ - dev_err(&udev->dev, "Can't locate device's port!\n"); - return -ENOENT; - } parent_hub = hdev_to_hub(parent_hdev); /* If we're resetting an active hub, take some special actions */ diff --git a/drivers/usb/core/usb.c b/drivers/usb/core/usb.c index fcfda21be499..39e6b61b898a 100644 --- a/drivers/usb/core/usb.c +++ b/drivers/usb/core/usb.c @@ -439,6 +439,7 @@ usb_alloc_dev(struct usb_device *parent, struct usb_bus *bus, unsigned port1) /* hub driver sets up TT records */ } + dev->portnum = port1; dev->bus = bus; dev->parent = parent; INIT_LIST_HEAD(&dev->filelist); diff --git a/include/linux/usb.h b/include/linux/usb.h index 27575e678a7c..e59d1bd52d4f 100644 --- a/include/linux/usb.h +++ b/include/linux/usb.h @@ -348,6 +348,7 @@ struct usb_device { char **rawdescriptors; /* Raw descriptors for each config */ unsigned short bus_mA; /* Current available from the bus */ + u8 portnum; /* Parent port number (origin 1) */ int have_langid; /* whether string_langid is valid */ int string_langid; /* language ID for strings */ -- cgit v1.2.3-71-gd317 From e80b0fade09ef1ee67b0898d480d4c588f124d5f Mon Sep 17 00:00:00 2001 From: Matthew Dharm Date: Sun, 4 Dec 2005 22:02:44 -0800 Subject: [PATCH] USB Storage: add alauda support This patch adds another usb-storage subdriver, which supports two fairly old dual-XD/SmartMedia reader-writers (USB1.1 devices). This driver was written by Daniel Drake -- he notes that he wrote this driver without specs, however a vendor-supplied GPL driver for the previous generation of products ("sma03") did prove to be quite useful, as did the sddr09 driver which also has to deal with low-level physical block layout on SmartMedia. The original patch has been reformed by me, as it clashed with the libusual patches. We really need to consolidate some of this common SmartMedia code, and get together with the MTD guys to share it with them as well. Signed-off-by: Daniel Drake Signed-off-by: Matthew Dharm Signed-off-by: Greg Kroah-Hartman --- drivers/usb/storage/Kconfig | 9 + drivers/usb/storage/Makefile | 1 + drivers/usb/storage/alauda.c | 1119 ++++++++++++++++++++++++++++++++++++ drivers/usb/storage/alauda.h | 100 ++++ drivers/usb/storage/unusual_devs.h | 14 + drivers/usb/storage/usb.c | 12 + include/linux/usb_usual.h | 3 + 7 files changed, 1258 insertions(+) create mode 100644 drivers/usb/storage/alauda.c create mode 100644 drivers/usb/storage/alauda.h (limited to 'include/linux') diff --git a/drivers/usb/storage/Kconfig b/drivers/usb/storage/Kconfig index bdfcb95d9c12..92be101feba7 100644 --- a/drivers/usb/storage/Kconfig +++ b/drivers/usb/storage/Kconfig @@ -112,6 +112,15 @@ config USB_STORAGE_JUMPSHOT Say Y here to include additional code to support the Lexar Jumpshot USB CompactFlash reader. +config USB_STORAGE_ALAUDA + bool "Olympus MAUSB-10/Fuji DPC-R1 support (EXPERIMENTAL)" + depends on USB_STORAGE && EXPERIMENTAL + help + Say Y here to include additional code to support the Olympus MAUSB-10 + and Fujifilm DPC-R1 USB Card reader/writer devices. + + These devices are based on the Alauda chip and support support both + XD and SmartMedia cards. config USB_STORAGE_ONETOUCH bool "Support OneTouch Button on Maxtor Hard Drives (EXPERIMENTAL)" diff --git a/drivers/usb/storage/Makefile b/drivers/usb/storage/Makefile index 2d416e9028bb..8cbba22508a4 100644 --- a/drivers/usb/storage/Makefile +++ b/drivers/usb/storage/Makefile @@ -18,6 +18,7 @@ usb-storage-obj-$(CONFIG_USB_STORAGE_DPCM) += dpcm.o usb-storage-obj-$(CONFIG_USB_STORAGE_ISD200) += isd200.o usb-storage-obj-$(CONFIG_USB_STORAGE_DATAFAB) += datafab.o usb-storage-obj-$(CONFIG_USB_STORAGE_JUMPSHOT) += jumpshot.o +usb-storage-obj-$(CONFIG_USB_STORAGE_ALAUDA) += alauda.o usb-storage-obj-$(CONFIG_USB_STORAGE_ONETOUCH) += onetouch.o usb-storage-objs := scsiglue.o protocol.o transport.o usb.o \ diff --git a/drivers/usb/storage/alauda.c b/drivers/usb/storage/alauda.c new file mode 100644 index 000000000000..4d3cbb12b713 --- /dev/null +++ b/drivers/usb/storage/alauda.c @@ -0,0 +1,1119 @@ +/* + * Driver for Alauda-based card readers + * + * Current development and maintenance by: + * (c) 2005 Daniel Drake + * + * The 'Alauda' is a chip manufacturered by RATOC for OEM use. + * + * Alauda implements a vendor-specific command set to access two media reader + * ports (XD, SmartMedia). This driver converts SCSI commands to the commands + * which are accepted by these devices. + * + * The driver was developed through reverse-engineering, with the help of the + * sddr09 driver which has many similarities, and with some help from the + * (very old) vendor-supplied GPL sma03 driver. + * + * For protocol info, see http://alauda.sourceforge.net + * + * This program is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License as published by the + * Free Software Foundation; either version 2, or (at your option) any + * later version. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License for more details. + * + * You should have received a copy of the GNU General Public License along + * with this program; if not, write to the Free Software Foundation, Inc., + * 675 Mass Ave, Cambridge, MA 02139, USA. + */ + +#include +#include +#include + +#include "usb.h" +#include "transport.h" +#include "protocol.h" +#include "debug.h" +#include "alauda.h" + +#define short_pack(lsb,msb) ( ((u16)(lsb)) | ( ((u16)(msb))<<8 ) ) +#define LSB_of(s) ((s)&0xFF) +#define MSB_of(s) ((s)>>8) + +#define MEDIA_PORT(us) us->srb->device->lun +#define MEDIA_INFO(us) ((struct alauda_info *)us->extra)->port[MEDIA_PORT(us)] + +#define PBA_LO(pba) ((pba & 0xF) << 5) +#define PBA_HI(pba) (pba >> 3) +#define PBA_ZONE(pba) (pba >> 11) + +/* + * Media handling + */ + +struct alauda_card_info { + unsigned char id; /* id byte */ + unsigned char chipshift; /* 1< LBA mappings for a particular port + */ +static void alauda_free_maps (struct alauda_media_info *media_info) +{ + unsigned int shift = media_info->zoneshift + + media_info->blockshift + media_info->pageshift; + unsigned int num_zones = media_info->capacity >> shift; + unsigned int i; + + if (media_info->lba_to_pba != NULL) + for (i = 0; i < num_zones; i++) { + kfree(media_info->lba_to_pba[i]); + media_info->lba_to_pba[i] = NULL; + } + + if (media_info->pba_to_lba != NULL) + for (i = 0; i < num_zones; i++) { + kfree(media_info->pba_to_lba[i]); + media_info->pba_to_lba[i] = NULL; + } +} + +/* + * Returns 2 bytes of status data + * The first byte describes media status, and second byte describes door status + */ +static int alauda_get_media_status(struct us_data *us, unsigned char *data) +{ + int rc; + unsigned char command; + + if (MEDIA_PORT(us) == ALAUDA_PORT_XD) + command = ALAUDA_GET_XD_MEDIA_STATUS; + else + command = ALAUDA_GET_SM_MEDIA_STATUS; + + rc = usb_stor_ctrl_transfer(us, us->recv_ctrl_pipe, + command, 0xc0, 0, 1, data, 2); + + US_DEBUGP("alauda_get_media_status: Media status %02X %02X\n", + data[0], data[1]); + + return rc; +} + +/* + * Clears the "media was changed" bit so that we know when it changes again + * in the future. + */ +static int alauda_ack_media(struct us_data *us) +{ + unsigned char command; + + if (MEDIA_PORT(us) == ALAUDA_PORT_XD) + command = ALAUDA_ACK_XD_MEDIA_CHANGE; + else + command = ALAUDA_ACK_SM_MEDIA_CHANGE; + + return usb_stor_ctrl_transfer(us, us->send_ctrl_pipe, + command, 0x40, 0, 1, NULL, 0); +} + +/* + * Retrieves a 4-byte media signature, which indicates manufacturer, capacity, + * and some other details. + */ +static int alauda_get_media_signature(struct us_data *us, unsigned char *data) +{ + unsigned char command; + + if (MEDIA_PORT(us) == ALAUDA_PORT_XD) + command = ALAUDA_GET_XD_MEDIA_SIG; + else + command = ALAUDA_GET_SM_MEDIA_SIG; + + return usb_stor_ctrl_transfer(us, us->recv_ctrl_pipe, + command, 0xc0, 0, 0, data, 4); +} + +/* + * Resets the media status (but not the whole device?) + */ +static int alauda_reset_media(struct us_data *us) +{ + unsigned char *command = us->iobuf; + + memset(command, 0, 9); + command[0] = ALAUDA_BULK_CMD; + command[1] = ALAUDA_BULK_RESET_MEDIA; + command[8] = MEDIA_PORT(us); + + return usb_stor_bulk_transfer_buf(us, us->send_bulk_pipe, + command, 9, NULL); +} + +/* + * Examines the media and deduces capacity, etc. + */ +static int alauda_init_media(struct us_data *us) +{ + unsigned char *data = us->iobuf; + int ready = 0; + struct alauda_card_info *media_info; + unsigned int num_zones; + + while (ready == 0) { + msleep(20); + + if (alauda_get_media_status(us, data) != USB_STOR_XFER_GOOD) + return USB_STOR_TRANSPORT_ERROR; + + if (data[0] & 0x10) + ready = 1; + } + + US_DEBUGP("alauda_init_media: We are ready for action!\n"); + + if (alauda_ack_media(us) != USB_STOR_XFER_GOOD) + return USB_STOR_TRANSPORT_ERROR; + + msleep(10); + + if (alauda_get_media_status(us, data) != USB_STOR_XFER_GOOD) + return USB_STOR_TRANSPORT_ERROR; + + if (data[0] != 0x14) { + US_DEBUGP("alauda_init_media: Media not ready after ack\n"); + return USB_STOR_TRANSPORT_ERROR; + } + + if (alauda_get_media_signature(us, data) != USB_STOR_XFER_GOOD) + return USB_STOR_TRANSPORT_ERROR; + + US_DEBUGP("alauda_init_media: Media signature: %02X %02X %02X %02X\n", + data[0], data[1], data[2], data[3]); + media_info = alauda_card_find_id(data[1]); + if (media_info == NULL) { + printk("alauda_init_media: Unrecognised media signature: " + "%02X %02X %02X %02X\n", + data[0], data[1], data[2], data[3]); + return USB_STOR_TRANSPORT_ERROR; + } + + MEDIA_INFO(us).capacity = 1 << media_info->chipshift; + US_DEBUGP("Found media with capacity: %ldMB\n", + MEDIA_INFO(us).capacity >> 20); + + MEDIA_INFO(us).pageshift = media_info->pageshift; + MEDIA_INFO(us).blockshift = media_info->blockshift; + MEDIA_INFO(us).zoneshift = media_info->zoneshift; + + MEDIA_INFO(us).pagesize = 1 << media_info->pageshift; + MEDIA_INFO(us).blocksize = 1 << media_info->blockshift; + MEDIA_INFO(us).zonesize = 1 << media_info->zoneshift; + + MEDIA_INFO(us).uzonesize = ((1 << media_info->zoneshift) / 128) * 125; + MEDIA_INFO(us).blockmask = MEDIA_INFO(us).blocksize - 1; + + num_zones = MEDIA_INFO(us).capacity >> (MEDIA_INFO(us).zoneshift + + MEDIA_INFO(us).blockshift + MEDIA_INFO(us).pageshift); + MEDIA_INFO(us).pba_to_lba = kcalloc(num_zones, sizeof(u16*), GFP_NOIO); + MEDIA_INFO(us).lba_to_pba = kcalloc(num_zones, sizeof(u16*), GFP_NOIO); + + if (alauda_reset_media(us) != USB_STOR_XFER_GOOD) + return USB_STOR_TRANSPORT_ERROR; + + return USB_STOR_TRANSPORT_GOOD; +} + +/* + * Examines the media status and does the right thing when the media has gone, + * appeared, or changed. + */ +static int alauda_check_media(struct us_data *us) +{ + struct alauda_info *info = (struct alauda_info *) us->extra; + unsigned char status[2]; + int rc; + + rc = alauda_get_media_status(us, status); + + /* Check for no media or door open */ + if ((status[0] & 0x80) || ((status[0] & 0x1F) == 0x10) + || ((status[1] & 0x01) == 0)) { + US_DEBUGP("alauda_check_media: No media, or door open\n"); + alauda_free_maps(&MEDIA_INFO(us)); + info->sense_key = 0x02; + info->sense_asc = 0x3A; + info->sense_ascq = 0x00; + return USB_STOR_TRANSPORT_FAILED; + } + + /* Check for media change */ + if (status[0] & 0x08) { + US_DEBUGP("alauda_check_media: Media change detected\n"); + alauda_free_maps(&MEDIA_INFO(us)); + alauda_init_media(us); + + info->sense_key = UNIT_ATTENTION; + info->sense_asc = 0x28; + info->sense_ascq = 0x00; + return USB_STOR_TRANSPORT_FAILED; + } + + return USB_STOR_TRANSPORT_GOOD; +} + +/* + * Checks the status from the 2nd status register + * Returns 3 bytes of status data, only the first is known + */ +static int alauda_check_status2(struct us_data *us) +{ + int rc; + unsigned char command[] = { + ALAUDA_BULK_CMD, ALAUDA_BULK_GET_STATUS2, + 0, 0, 0, 0, 3, 0, MEDIA_PORT(us) + }; + unsigned char data[3]; + + rc = usb_stor_bulk_transfer_buf(us, us->send_bulk_pipe, + command, 9, NULL); + if (rc != USB_STOR_XFER_GOOD) + return rc; + + rc = usb_stor_bulk_transfer_buf(us, us->recv_bulk_pipe, + data, 3, NULL); + if (rc != USB_STOR_XFER_GOOD) + return rc; + + US_DEBUGP("alauda_check_status2: %02X %02X %02X\n", data[0], data[1], data[2]); + if (data[0] & ALAUDA_STATUS_ERROR) + return USB_STOR_XFER_ERROR; + + return USB_STOR_XFER_GOOD; +} + +/* + * Gets the redundancy data for the first page of a PBA + * Returns 16 bytes. + */ +static int alauda_get_redu_data(struct us_data *us, u16 pba, unsigned char *data) +{ + int rc; + unsigned char command[] = { + ALAUDA_BULK_CMD, ALAUDA_BULK_GET_REDU_DATA, + PBA_HI(pba), PBA_ZONE(pba), 0, PBA_LO(pba), 0, 0, MEDIA_PORT(us) + }; + + rc = usb_stor_bulk_transfer_buf(us, us->send_bulk_pipe, + command, 9, NULL); + if (rc != USB_STOR_XFER_GOOD) + return rc; + + return usb_stor_bulk_transfer_buf(us, us->recv_bulk_pipe, + data, 16, NULL); +} + +/* + * Finds the first unused PBA in a zone + * Returns the absolute PBA of an unused PBA, or 0 if none found. + */ +static u16 alauda_find_unused_pba(struct alauda_media_info *info, + unsigned int zone) +{ + u16 *pba_to_lba = info->pba_to_lba[zone]; + unsigned int i; + + for (i = 0; i < info->zonesize; i++) + if (pba_to_lba[i] == UNDEF) + return (zone << info->zoneshift) + i; + + return 0; +} + +/* + * Reads the redundancy data for all PBA's in a zone + * Produces lba <--> pba mappings + */ +static int alauda_read_map(struct us_data *us, unsigned int zone) +{ + unsigned char *data = us->iobuf; + int result; + int i, j; + unsigned int zonesize = MEDIA_INFO(us).zonesize; + unsigned int uzonesize = MEDIA_INFO(us).uzonesize; + unsigned int lba_offset, lba_real, blocknum; + unsigned int zone_base_lba = zone * uzonesize; + unsigned int zone_base_pba = zone * zonesize; + u16 *lba_to_pba = kcalloc(zonesize, sizeof(u16), GFP_NOIO); + u16 *pba_to_lba = kcalloc(zonesize, sizeof(u16), GFP_NOIO); + if (lba_to_pba == NULL || pba_to_lba == NULL) { + result = USB_STOR_TRANSPORT_ERROR; + goto error; + } + + US_DEBUGP("alauda_read_map: Mapping blocks for zone %d\n", zone); + + /* 1024 PBA's per zone */ + for (i = 0; i < zonesize; i++) + lba_to_pba[i] = pba_to_lba[i] = UNDEF; + + for (i = 0; i < zonesize; i++) { + blocknum = zone_base_pba + i; + + result = alauda_get_redu_data(us, blocknum, data); + if (result != USB_STOR_XFER_GOOD) { + result = USB_STOR_TRANSPORT_ERROR; + goto error; + } + + /* special PBAs have control field 0^16 */ + for (j = 0; j < 16; j++) + if (data[j] != 0) + goto nonz; + pba_to_lba[i] = UNUSABLE; + US_DEBUGP("alauda_read_map: PBA %d has no logical mapping\n", blocknum); + continue; + + nonz: + /* unwritten PBAs have control field FF^16 */ + for (j = 0; j < 16; j++) + if (data[j] != 0xff) + goto nonff; + continue; + + nonff: + /* normal PBAs start with six FFs */ + if (j < 6) { + US_DEBUGP("alauda_read_map: PBA %d has no logical mapping: " + "reserved area = %02X%02X%02X%02X " + "data status %02X block status %02X\n", + blocknum, data[0], data[1], data[2], data[3], + data[4], data[5]); + pba_to_lba[i] = UNUSABLE; + continue; + } + + if ((data[6] >> 4) != 0x01) { + US_DEBUGP("alauda_read_map: PBA %d has invalid address " + "field %02X%02X/%02X%02X\n", + blocknum, data[6], data[7], data[11], data[12]); + pba_to_lba[i] = UNUSABLE; + continue; + } + + /* check even parity */ + if (parity[data[6] ^ data[7]]) { + printk("alauda_read_map: Bad parity in LBA for block %d" + " (%02X %02X)\n", i, data[6], data[7]); + pba_to_lba[i] = UNUSABLE; + continue; + } + + lba_offset = short_pack(data[7], data[6]); + lba_offset = (lba_offset & 0x07FF) >> 1; + lba_real = lba_offset + zone_base_lba; + + /* + * Every 1024 physical blocks ("zone"), the LBA numbers + * go back to zero, but are within a higher block of LBA's. + * Also, there is a maximum of 1000 LBA's per zone. + * In other words, in PBA 1024-2047 you will find LBA 0-999 + * which are really LBA 1000-1999. This allows for 24 bad + * or special physical blocks per zone. + */ + + if (lba_offset >= uzonesize) { + printk("alauda_read_map: Bad low LBA %d for block %d\n", + lba_real, blocknum); + continue; + } + + if (lba_to_pba[lba_offset] != UNDEF) { + printk("alauda_read_map: LBA %d seen for PBA %d and %d\n", + lba_real, lba_to_pba[lba_offset], blocknum); + continue; + } + + pba_to_lba[i] = lba_real; + lba_to_pba[lba_offset] = blocknum; + continue; + } + + MEDIA_INFO(us).lba_to_pba[zone] = lba_to_pba; + MEDIA_INFO(us).pba_to_lba[zone] = pba_to_lba; + result = 0; + goto out; + +error: + kfree(lba_to_pba); + kfree(pba_to_lba); +out: + return result; +} + +/* + * Checks to see whether we have already mapped a certain zone + * If we haven't, the map is generated + */ +static void alauda_ensure_map_for_zone(struct us_data *us, unsigned int zone) +{ + if (MEDIA_INFO(us).lba_to_pba[zone] == NULL + || MEDIA_INFO(us).pba_to_lba[zone] == NULL) + alauda_read_map(us, zone); +} + +/* + * Erases an entire block + */ +static int alauda_erase_block(struct us_data *us, u16 pba) +{ + int rc; + unsigned char command[] = { + ALAUDA_BULK_CMD, ALAUDA_BULK_ERASE_BLOCK, PBA_HI(pba), + PBA_ZONE(pba), 0, PBA_LO(pba), 0x02, 0, MEDIA_PORT(us) + }; + unsigned char buf[2]; + + US_DEBUGP("alauda_erase_block: Erasing PBA %d\n", pba); + + rc = usb_stor_bulk_transfer_buf(us, us->send_bulk_pipe, + command, 9, NULL); + if (rc != USB_STOR_XFER_GOOD) + return rc; + + rc = usb_stor_bulk_transfer_buf(us, us->recv_bulk_pipe, + buf, 2, NULL); + if (rc != USB_STOR_XFER_GOOD) + return rc; + + US_DEBUGP("alauda_erase_block: Erase result: %02X %02X\n", + buf[0], buf[1]); + return rc; +} + +/* + * Reads data from a certain offset page inside a PBA, including interleaved + * redundancy data. Returns (pagesize+64)*pages bytes in data. + */ +static int alauda_read_block_raw(struct us_data *us, u16 pba, + unsigned int page, unsigned int pages, unsigned char *data) +{ + int rc; + unsigned char command[] = { + ALAUDA_BULK_CMD, ALAUDA_BULK_READ_BLOCK, PBA_HI(pba), + PBA_ZONE(pba), 0, PBA_LO(pba) + page, pages, 0, MEDIA_PORT(us) + }; + + US_DEBUGP("alauda_read_block: pba %d page %d count %d\n", + pba, page, pages); + + rc = usb_stor_bulk_transfer_buf(us, us->send_bulk_pipe, + command, 9, NULL); + if (rc != USB_STOR_XFER_GOOD) + return rc; + + return usb_stor_bulk_transfer_buf(us, us->recv_bulk_pipe, + data, (MEDIA_INFO(us).pagesize + 64) * pages, NULL); +} + +/* + * Reads data from a certain offset page inside a PBA, excluding redundancy + * data. Returns pagesize*pages bytes in data. Note that data must be big enough + * to hold (pagesize+64)*pages bytes of data, but you can ignore those 'extra' + * trailing bytes outside this function. + */ +static int alauda_read_block(struct us_data *us, u16 pba, + unsigned int page, unsigned int pages, unsigned char *data) +{ + int i, rc; + unsigned int pagesize = MEDIA_INFO(us).pagesize; + + rc = alauda_read_block_raw(us, pba, page, pages, data); + if (rc != USB_STOR_XFER_GOOD) + return rc; + + /* Cut out the redundancy data */ + for (i = 0; i < pages; i++) { + int dest_offset = i * pagesize; + int src_offset = i * (pagesize + 64); + memmove(data + dest_offset, data + src_offset, pagesize); + } + + return rc; +} + +/* + * Writes an entire block of data and checks status after write. + * Redundancy data must be already included in data. Data should be + * (pagesize+64)*blocksize bytes in length. + */ +static int alauda_write_block(struct us_data *us, u16 pba, unsigned char *data) +{ + int rc; + struct alauda_info *info = (struct alauda_info *) us->extra; + unsigned char command[] = { + ALAUDA_BULK_CMD, ALAUDA_BULK_WRITE_BLOCK, PBA_HI(pba), + PBA_ZONE(pba), 0, PBA_LO(pba), 32, 0, MEDIA_PORT(us) + }; + + US_DEBUGP("alauda_write_block: pba %d\n", pba); + + rc = usb_stor_bulk_transfer_buf(us, us->send_bulk_pipe, + command, 9, NULL); + if (rc != USB_STOR_XFER_GOOD) + return rc; + + rc = usb_stor_bulk_transfer_buf(us, info->wr_ep, data, + (MEDIA_INFO(us).pagesize + 64) * MEDIA_INFO(us).blocksize, + NULL); + if (rc != USB_STOR_XFER_GOOD) + return rc; + + return alauda_check_status2(us); +} + +/* + * Write some data to a specific LBA. + */ +static int alauda_write_lba(struct us_data *us, u16 lba, + unsigned int page, unsigned int pages, + unsigned char *ptr, unsigned char *blockbuffer) +{ + u16 pba, lbap, new_pba; + unsigned char *bptr, *cptr, *xptr; + unsigned char ecc[3]; + int i, result; + unsigned int uzonesize = MEDIA_INFO(us).uzonesize; + unsigned int zonesize = MEDIA_INFO(us).zonesize; + unsigned int pagesize = MEDIA_INFO(us).pagesize; + unsigned int blocksize = MEDIA_INFO(us).blocksize; + unsigned int lba_offset = lba % uzonesize; + unsigned int new_pba_offset; + unsigned int zone = lba / uzonesize; + + alauda_ensure_map_for_zone(us, zone); + + pba = MEDIA_INFO(us).lba_to_pba[zone][lba_offset]; + if (pba == 1) { + /* Maybe it is impossible to write to PBA 1. + Fake success, but don't do anything. */ + printk("alauda_write_lba: avoid writing to pba 1\n"); + return USB_STOR_TRANSPORT_GOOD; + } + + new_pba = alauda_find_unused_pba(&MEDIA_INFO(us), zone); + if (!new_pba) { + printk("alauda_write_lba: Out of unused blocks\n"); + return USB_STOR_TRANSPORT_ERROR; + } + + /* read old contents */ + if (pba != UNDEF) { + result = alauda_read_block_raw(us, pba, 0, + blocksize, blockbuffer); + if (result != USB_STOR_XFER_GOOD) + return result; + } else { + memset(blockbuffer, 0, blocksize * (pagesize + 64)); + } + + lbap = (lba_offset << 1) | 0x1000; + if (parity[MSB_of(lbap) ^ LSB_of(lbap)]) + lbap ^= 1; + + /* check old contents and fill lba */ + for (i = 0; i < blocksize; i++) { + bptr = blockbuffer + (i * (pagesize + 64)); + cptr = bptr + pagesize; + nand_compute_ecc(bptr, ecc); + if (!nand_compare_ecc(cptr+13, ecc)) { + US_DEBUGP("Warning: bad ecc in page %d- of pba %d\n", + i, pba); + nand_store_ecc(cptr+13, ecc); + } + nand_compute_ecc(bptr + (pagesize / 2), ecc); + if (!nand_compare_ecc(cptr+8, ecc)) { + US_DEBUGP("Warning: bad ecc in page %d+ of pba %d\n", + i, pba); + nand_store_ecc(cptr+8, ecc); + } + cptr[6] = cptr[11] = MSB_of(lbap); + cptr[7] = cptr[12] = LSB_of(lbap); + } + + /* copy in new stuff and compute ECC */ + xptr = ptr; + for (i = page; i < page+pages; i++) { + bptr = blockbuffer + (i * (pagesize + 64)); + cptr = bptr + pagesize; + memcpy(bptr, xptr, pagesize); + xptr += pagesize; + nand_compute_ecc(bptr, ecc); + nand_store_ecc(cptr+13, ecc); + nand_compute_ecc(bptr + (pagesize / 2), ecc); + nand_store_ecc(cptr+8, ecc); + } + + result = alauda_write_block(us, new_pba, blockbuffer); + if (result != USB_STOR_XFER_GOOD) + return result; + + new_pba_offset = new_pba - (zone * zonesize); + MEDIA_INFO(us).pba_to_lba[zone][new_pba_offset] = lba; + MEDIA_INFO(us).lba_to_pba[zone][lba_offset] = new_pba; + US_DEBUGP("alauda_write_lba: Remapped LBA %d to PBA %d\n", + lba, new_pba); + + if (pba != UNDEF) { + unsigned int pba_offset = pba - (zone * zonesize); + result = alauda_erase_block(us, pba); + if (result != USB_STOR_XFER_GOOD) + return result; + MEDIA_INFO(us).pba_to_lba[zone][pba_offset] = UNDEF; + } + + return USB_STOR_TRANSPORT_GOOD; +} + +/* + * Read data from a specific sector address + */ +static int alauda_read_data(struct us_data *us, unsigned long address, + unsigned int sectors) +{ + unsigned char *buffer; + u16 lba, max_lba; + unsigned int page, len, index, offset; + unsigned int blockshift = MEDIA_INFO(us).blockshift; + unsigned int pageshift = MEDIA_INFO(us).pageshift; + unsigned int blocksize = MEDIA_INFO(us).blocksize; + unsigned int pagesize = MEDIA_INFO(us).pagesize; + unsigned int uzonesize = MEDIA_INFO(us).uzonesize; + int result; + + /* + * Since we only read in one block at a time, we have to create + * a bounce buffer and move the data a piece at a time between the + * bounce buffer and the actual transfer buffer. + * We make this buffer big enough to hold temporary redundancy data, + * which we use when reading the data blocks. + */ + + len = min(sectors, blocksize) * (pagesize + 64); + buffer = kmalloc(len, GFP_NOIO); + if (buffer == NULL) { + printk("alauda_read_data: Out of memory\n"); + return USB_STOR_TRANSPORT_ERROR; + } + + /* Figure out the initial LBA and page */ + lba = address >> blockshift; + page = (address & MEDIA_INFO(us).blockmask); + max_lba = MEDIA_INFO(us).capacity >> (blockshift + pageshift); + + result = USB_STOR_TRANSPORT_GOOD; + index = offset = 0; + + while (sectors > 0) { + unsigned int zone = lba / uzonesize; /* integer division */ + unsigned int lba_offset = lba - (zone * uzonesize); + unsigned int pages; + u16 pba; + alauda_ensure_map_for_zone(us, zone); + + /* Not overflowing capacity? */ + if (lba >= max_lba) { + US_DEBUGP("Error: Requested lba %u exceeds " + "maximum %u\n", lba, max_lba); + result = USB_STOR_TRANSPORT_ERROR; + break; + } + + /* Find number of pages we can read in this block */ + pages = min(sectors, blocksize - page); + len = pages << pageshift; + + /* Find where this lba lives on disk */ + pba = MEDIA_INFO(us).lba_to_pba[zone][lba_offset]; + + if (pba == UNDEF) { /* this lba was never written */ + US_DEBUGP("Read %d zero pages (LBA %d) page %d\n", + pages, lba, page); + + /* This is not really an error. It just means + that the block has never been written. + Instead of returning USB_STOR_TRANSPORT_ERROR + it is better to return all zero data. */ + + memset(buffer, 0, len); + } else { + US_DEBUGP("Read %d pages, from PBA %d" + " (LBA %d) page %d\n", + pages, pba, lba, page); + + result = alauda_read_block(us, pba, page, pages, buffer); + if (result != USB_STOR_TRANSPORT_GOOD) + break; + } + + /* Store the data in the transfer buffer */ + usb_stor_access_xfer_buf(buffer, len, us->srb, + &index, &offset, TO_XFER_BUF); + + page = 0; + lba++; + sectors -= pages; + } + + kfree(buffer); + return result; +} + +/* + * Write data to a specific sector address + */ +static int alauda_write_data(struct us_data *us, unsigned long address, + unsigned int sectors) +{ + unsigned char *buffer, *blockbuffer; + unsigned int page, len, index, offset; + unsigned int blockshift = MEDIA_INFO(us).blockshift; + unsigned int pageshift = MEDIA_INFO(us).pageshift; + unsigned int blocksize = MEDIA_INFO(us).blocksize; + unsigned int pagesize = MEDIA_INFO(us).pagesize; + u16 lba, max_lba; + int result; + + /* + * Since we don't write the user data directly to the device, + * we have to create a bounce buffer and move the data a piece + * at a time between the bounce buffer and the actual transfer buffer. + */ + + len = min(sectors, blocksize) * pagesize; + buffer = kmalloc(len, GFP_NOIO); + if (buffer == NULL) { + printk("alauda_write_data: Out of memory\n"); + return USB_STOR_TRANSPORT_ERROR; + } + + /* + * We also need a temporary block buffer, where we read in the old data, + * overwrite parts with the new data, and manipulate the redundancy data + */ + blockbuffer = kmalloc((pagesize + 64) * blocksize, GFP_NOIO); + if (blockbuffer == NULL) { + printk("alauda_write_data: Out of memory\n"); + kfree(buffer); + return USB_STOR_TRANSPORT_ERROR; + } + + /* Figure out the initial LBA and page */ + lba = address >> blockshift; + page = (address & MEDIA_INFO(us).blockmask); + max_lba = MEDIA_INFO(us).capacity >> (pageshift + blockshift); + + result = USB_STOR_TRANSPORT_GOOD; + index = offset = 0; + + while (sectors > 0) { + /* Write as many sectors as possible in this block */ + unsigned int pages = min(sectors, blocksize - page); + len = pages << pageshift; + + /* Not overflowing capacity? */ + if (lba >= max_lba) { + US_DEBUGP("alauda_write_data: Requested lba %u exceeds " + "maximum %u\n", lba, max_lba); + result = USB_STOR_TRANSPORT_ERROR; + break; + } + + /* Get the data from the transfer buffer */ + usb_stor_access_xfer_buf(buffer, len, us->srb, + &index, &offset, FROM_XFER_BUF); + + result = alauda_write_lba(us, lba, page, pages, buffer, + blockbuffer); + if (result != USB_STOR_TRANSPORT_GOOD) + break; + + page = 0; + lba++; + sectors -= pages; + } + + kfree(buffer); + kfree(blockbuffer); + return result; +} + +/* + * Our interface with the rest of the world + */ + +static void alauda_info_destructor(void *extra) +{ + struct alauda_info *info = (struct alauda_info *) extra; + int port; + + if (!info) + return; + + for (port = 0; port < 2; port++) { + struct alauda_media_info *media_info = &info->port[port]; + + alauda_free_maps(media_info); + kfree(media_info->lba_to_pba); + kfree(media_info->pba_to_lba); + } +} + +/* + * Initialize alauda_info struct and find the data-write endpoint + */ +int init_alauda(struct us_data *us) +{ + struct alauda_info *info; + struct usb_host_interface *altsetting = us->pusb_intf->cur_altsetting; + nand_init_ecc(); + + us->extra = kzalloc(sizeof(struct alauda_info), GFP_NOIO); + if (!us->extra) { + US_DEBUGP("init_alauda: Gah! Can't allocate storage for" + "alauda info struct!\n"); + return USB_STOR_TRANSPORT_ERROR; + } + info = (struct alauda_info *) us->extra; + us->extra_destructor = alauda_info_destructor; + + info->wr_ep = usb_sndbulkpipe(us->pusb_dev, + altsetting->endpoint[0].desc.bEndpointAddress + & USB_ENDPOINT_NUMBER_MASK); + + return USB_STOR_TRANSPORT_GOOD; +} + +int alauda_transport(struct scsi_cmnd *srb, struct us_data *us) +{ + int rc; + struct alauda_info *info = (struct alauda_info *) us->extra; + unsigned char *ptr = us->iobuf; + static unsigned char inquiry_response[36] = { + 0x00, 0x80, 0x00, 0x01, 0x1F, 0x00, 0x00, 0x00 + }; + + if (srb->cmnd[0] == INQUIRY) { + US_DEBUGP("alauda_transport: INQUIRY. " + "Returning bogus response.\n"); + memcpy(ptr, inquiry_response, sizeof(inquiry_response)); + fill_inquiry_response(us, ptr, 36); + return USB_STOR_TRANSPORT_GOOD; + } + + if (srb->cmnd[0] == TEST_UNIT_READY) { + US_DEBUGP("alauda_transport: TEST_UNIT_READY.\n"); + return alauda_check_media(us); + } + + if (srb->cmnd[0] == READ_CAPACITY) { + unsigned int num_zones; + unsigned long capacity; + + rc = alauda_check_media(us); + if (rc != USB_STOR_TRANSPORT_GOOD) + return rc; + + num_zones = MEDIA_INFO(us).capacity >> (MEDIA_INFO(us).zoneshift + + MEDIA_INFO(us).blockshift + MEDIA_INFO(us).pageshift); + + capacity = num_zones * MEDIA_INFO(us).uzonesize + * MEDIA_INFO(us).blocksize; + + /* Report capacity and page size */ + ((__be32 *) ptr)[0] = cpu_to_be32(capacity - 1); + ((__be32 *) ptr)[1] = cpu_to_be32(512); + + usb_stor_set_xfer_buf(ptr, 8, srb); + return USB_STOR_TRANSPORT_GOOD; + } + + if (srb->cmnd[0] == READ_10) { + unsigned int page, pages; + + rc = alauda_check_media(us); + if (rc != USB_STOR_TRANSPORT_GOOD) + return rc; + + page = short_pack(srb->cmnd[3], srb->cmnd[2]); + page <<= 16; + page |= short_pack(srb->cmnd[5], srb->cmnd[4]); + pages = short_pack(srb->cmnd[8], srb->cmnd[7]); + + US_DEBUGP("alauda_transport: READ_10: page %d pagect %d\n", + page, pages); + + return alauda_read_data(us, page, pages); + } + + if (srb->cmnd[0] == WRITE_10) { + unsigned int page, pages; + + rc = alauda_check_media(us); + if (rc != USB_STOR_TRANSPORT_GOOD) + return rc; + + page = short_pack(srb->cmnd[3], srb->cmnd[2]); + page <<= 16; + page |= short_pack(srb->cmnd[5], srb->cmnd[4]); + pages = short_pack(srb->cmnd[8], srb->cmnd[7]); + + US_DEBUGP("alauda_transport: WRITE_10: page %d pagect %d\n", + page, pages); + + return alauda_write_data(us, page, pages); + } + + if (srb->cmnd[0] == REQUEST_SENSE) { + US_DEBUGP("alauda_transport: REQUEST_SENSE.\n"); + + memset(ptr, 0, 18); + ptr[0] = 0xF0; + ptr[2] = info->sense_key; + ptr[7] = 11; + ptr[12] = info->sense_asc; + ptr[13] = info->sense_ascq; + usb_stor_set_xfer_buf(ptr, 18, srb); + + return USB_STOR_TRANSPORT_GOOD; + } + + if (srb->cmnd[0] == ALLOW_MEDIUM_REMOVAL) { + /* sure. whatever. not like we can stop the user from popping + the media out of the device (no locking doors, etc) */ + return USB_STOR_TRANSPORT_GOOD; + } + + US_DEBUGP("alauda_transport: Gah! Unknown command: %d (0x%x)\n", + srb->cmnd[0], srb->cmnd[0]); + info->sense_key = 0x05; + info->sense_asc = 0x20; + info->sense_ascq = 0x00; + return USB_STOR_TRANSPORT_FAILED; +} + diff --git a/drivers/usb/storage/alauda.h b/drivers/usb/storage/alauda.h new file mode 100644 index 000000000000..a700f87d0803 --- /dev/null +++ b/drivers/usb/storage/alauda.h @@ -0,0 +1,100 @@ +/* + * Driver for Alauda-based card readers + * + * Current development and maintenance by: + * (c) 2005 Daniel Drake + * + * See alauda.c for more explanation. + * + * This program is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License as published by the + * Free Software Foundation; either version 2, or (at your option) any + * later version. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License for more details. + * + * You should have received a copy of the GNU General Public License along + * with this program; if not, write to the Free Software Foundation, Inc., + * 675 Mass Ave, Cambridge, MA 02139, USA. + */ + +#ifndef _USB_ALAUDA_H +#define _USB_ALAUDA_H + +/* + * Status bytes + */ +#define ALAUDA_STATUS_ERROR 0x01 +#define ALAUDA_STATUS_READY 0x40 + +/* + * Control opcodes (for request field) + */ +#define ALAUDA_GET_XD_MEDIA_STATUS 0x08 +#define ALAUDA_GET_SM_MEDIA_STATUS 0x98 +#define ALAUDA_ACK_XD_MEDIA_CHANGE 0x0a +#define ALAUDA_ACK_SM_MEDIA_CHANGE 0x9a +#define ALAUDA_GET_XD_MEDIA_SIG 0x86 +#define ALAUDA_GET_SM_MEDIA_SIG 0x96 + +/* + * Bulk command identity (byte 0) + */ +#define ALAUDA_BULK_CMD 0x40 + +/* + * Bulk opcodes (byte 1) + */ +#define ALAUDA_BULK_GET_REDU_DATA 0x85 +#define ALAUDA_BULK_READ_BLOCK 0x94 +#define ALAUDA_BULK_ERASE_BLOCK 0xa3 +#define ALAUDA_BULK_WRITE_BLOCK 0xb4 +#define ALAUDA_BULK_GET_STATUS2 0xb7 +#define ALAUDA_BULK_RESET_MEDIA 0xe0 + +/* + * Port to operate on (byte 8) + */ +#define ALAUDA_PORT_XD 0x00 +#define ALAUDA_PORT_SM 0x01 + +/* + * LBA and PBA are unsigned ints. Special values. + */ +#define UNDEF 0xffff +#define SPARE 0xfffe +#define UNUSABLE 0xfffd + +int init_alauda(struct us_data *us); +int alauda_transport(struct scsi_cmnd *srb, struct us_data *us); + +struct alauda_media_info { + unsigned long capacity; /* total media size in bytes */ + unsigned int pagesize; /* page size in bytes */ + unsigned int blocksize; /* number of pages per block */ + unsigned int uzonesize; /* number of usable blocks per zone */ + unsigned int zonesize; /* number of blocks per zone */ + unsigned int blockmask; /* mask to get page from address */ + + unsigned char pageshift; + unsigned char blockshift; + unsigned char zoneshift; + + u16 **lba_to_pba; /* logical to physical block map */ + u16 **pba_to_lba; /* physical to logical block map */ +}; + +struct alauda_info { + struct alauda_media_info port[2]; + int wr_ep; /* endpoint to write data out of */ + + unsigned char sense_key; + unsigned long sense_asc; /* additional sense code */ + unsigned long sense_ascq; /* additional sense code qualifier */ +}; + +#endif + diff --git a/drivers/usb/storage/unusual_devs.h b/drivers/usb/storage/unusual_devs.h index be3c06d17533..7a865dd04683 100644 --- a/drivers/usb/storage/unusual_devs.h +++ b/drivers/usb/storage/unusual_devs.h @@ -535,6 +535,13 @@ UNUSUAL_DEV( 0x057b, 0x0022, 0x0000, 0x9999, "Silicon Media R/W", US_SC_DEVICE, US_PR_DEVICE, NULL, 0), +#ifdef CONFIG_USB_STORAGE_ALAUDA +UNUSUAL_DEV( 0x0584, 0x0008, 0x0102, 0x0102, + "Fujifilm", + "DPC-R1 (Alauda)", + US_SC_SCSI, US_PR_ALAUDA, init_alauda, 0 ), +#endif + /* Fabrizio Fellini */ UNUSUAL_DEV( 0x0595, 0x4343, 0x0000, 0x2210, "Fujifilm", @@ -784,6 +791,13 @@ UNUSUAL_DEV( 0x07af, 0x0006, 0x0100, 0x0100, US_SC_SCSI, US_PR_DPCM_USB, NULL, 0 ), #endif +#ifdef CONFIG_USB_STORAGE_ALAUDA +UNUSUAL_DEV( 0x07b4, 0x010a, 0x0102, 0x0102, + "Olympus", + "MAUSB-10 (Alauda)", + US_SC_SCSI, US_PR_ALAUDA, init_alauda, 0 ), +#endif + #ifdef CONFIG_USB_STORAGE_DATAFAB UNUSUAL_DEV( 0x07c4, 0xa000, 0x0000, 0x0015, "Datafab", diff --git a/drivers/usb/storage/usb.c b/drivers/usb/storage/usb.c index 85c8c17b3c0c..dbcf23980ff1 100644 --- a/drivers/usb/storage/usb.c +++ b/drivers/usb/storage/usb.c @@ -94,6 +94,9 @@ #ifdef CONFIG_USB_STORAGE_ONETOUCH #include "onetouch.h" #endif +#ifdef CONFIG_USB_STORAGE_ALAUDA +#include "alauda.h" +#endif /* Some informational data */ MODULE_AUTHOR("Matthew Dharm "); @@ -644,6 +647,15 @@ static int get_protocol(struct us_data *us) break; #endif +#ifdef CONFIG_USB_STORAGE_ALAUDA + case US_PR_ALAUDA: + us->transport_name = "Alauda Control/Bulk"; + us->transport = alauda_transport; + us->transport_reset = usb_stor_Bulk_reset; + us->max_lun = 1; + break; +#endif + default: return -EIO; } diff --git a/include/linux/usb_usual.h b/include/linux/usb_usual.h index f9c058f33712..b2d08984a9f7 100644 --- a/include/linux/usb_usual.h +++ b/include/linux/usb_usual.h @@ -102,6 +102,9 @@ enum { US_DO_ALL_FLAGS }; #ifdef CONFIG_USB_STORAGE_JUMPSHOT #define US_PR_JUMPSHOT 0xf3 /* Lexar Jumpshot */ #endif +#ifdef CONFIG_USB_STORAGE_ALAUDA +#define US_PR_ALAUDA 0xf4 /* Alauda chipsets */ +#endif #define US_PR_DEVICE 0xff /* Use device's value */ -- cgit v1.2.3-71-gd317 From 0296b2281352e4794e174b393c37f131502e09f0 Mon Sep 17 00:00:00 2001 From: Kay Sievers Date: Fri, 11 Nov 2005 05:33:52 +0100 Subject: [PATCH] remove CONFIG_KOBJECT_UEVENT option It makes zero sense to have hotplug, but not the netlink events enabled today. Remove this option and merge the kobject_uevent.h header into the kobject.h header file. Signed-off-by: Kay Sievers Signed-off-by: Greg Kroah-Hartman --- MAINTAINERS | 6 ----- drivers/input/input.c | 1 - drivers/s390/crypto/z90main.c | 1 - include/linux/kobject.h | 35 +++++++++++++++++++++++++- include/linux/kobject_uevent.h | 57 ------------------------------------------ init/Kconfig | 19 -------------- kernel/sysctl.c | 4 +-- lib/kobject_uevent.c | 24 ++++-------------- 8 files changed, 40 insertions(+), 107 deletions(-) delete mode 100644 include/linux/kobject_uevent.h (limited to 'include/linux') diff --git a/MAINTAINERS b/MAINTAINERS index 6af683025ae0..b49a4ad3b872 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -1476,12 +1476,6 @@ W: http://nfs.sourceforge.net/ W: http://www.cse.unsw.edu.au/~neilb/patches/linux-devel/ S: Maintained -KERNEL EVENT LAYER (KOBJECT_UEVENT) -P: Robert Love -M: rml@novell.com -L: linux-kernel@vger.kernel.org -S: Maintained - KEXEC P: Eric Biederman P: Randy Dunlap diff --git a/drivers/input/input.c b/drivers/input/input.c index bdd2a7fc268d..43b49ccd7dad 100644 --- a/drivers/input/input.c +++ b/drivers/input/input.c @@ -18,7 +18,6 @@ #include #include #include -#include #include #include #include diff --git a/drivers/s390/crypto/z90main.c b/drivers/s390/crypto/z90main.c index 4010f2bb85af..790fcbb74b43 100644 --- a/drivers/s390/crypto/z90main.c +++ b/drivers/s390/crypto/z90main.c @@ -34,7 +34,6 @@ #include #include #include -#include #include #include #include "z90crypt.h" diff --git a/include/linux/kobject.h b/include/linux/kobject.h index 7f7403aa4a41..baf5251d9f63 100644 --- a/include/linux/kobject.h +++ b/include/linux/kobject.h @@ -23,15 +23,31 @@ #include #include #include -#include #include #include #define KOBJ_NAME_LEN 20 +#define HOTPLUG_PATH_LEN 256 + +/* path to the userspace helper executed on an event */ +extern char hotplug_path[]; + /* counter to tag the hotplug event, read only except for the kobject core */ extern u64 hotplug_seqnum; +/* the actions here must match the proper string in lib/kobject_uevent.c */ +typedef int __bitwise kobject_action_t; +enum kobject_action { + KOBJ_ADD = (__force kobject_action_t) 0x01, /* add event, for hotplug */ + KOBJ_REMOVE = (__force kobject_action_t) 0x02, /* remove event, for hotplug */ + KOBJ_CHANGE = (__force kobject_action_t) 0x03, /* a sysfs attribute file has changed */ + KOBJ_MOUNT = (__force kobject_action_t) 0x04, /* mount event for block devices */ + KOBJ_UMOUNT = (__force kobject_action_t) 0x05, /* umount event for block devices */ + KOBJ_OFFLINE = (__force kobject_action_t) 0x06, /* offline event for hotplug devices */ + KOBJ_ONLINE = (__force kobject_action_t) 0x07, /* online event for hotplug devices */ +}; + struct kobject { const char * k_name; char name[KOBJ_NAME_LEN]; @@ -243,16 +259,33 @@ extern void subsys_remove_file(struct subsystem * , struct subsys_attribute *); #ifdef CONFIG_HOTPLUG void kobject_hotplug(struct kobject *kobj, enum kobject_action action); + int add_hotplug_env_var(char **envp, int num_envp, int *cur_index, char *buffer, int buffer_size, int *cur_len, const char *format, ...) __attribute__((format (printf, 7, 8))); + +int kobject_uevent(struct kobject *kobj, + enum kobject_action action, + struct attribute *attr); +int kobject_uevent_atomic(struct kobject *kobj, + enum kobject_action action, + struct attribute *attr); + #else static inline void kobject_hotplug(struct kobject *kobj, enum kobject_action action) { } static inline int add_hotplug_env_var(char **envp, int num_envp, int *cur_index, char *buffer, int buffer_size, int *cur_len, const char *format, ...) { return 0; } +int kobject_uevent(struct kobject *kobj, + enum kobject_action action, + struct attribute *attr) +{ return 0; } +int kobject_uevent_atomic(struct kobject *kobj, + enum kobject_action action, + struct attribute *attr) +{ return 0; } #endif #endif /* __KERNEL__ */ diff --git a/include/linux/kobject_uevent.h b/include/linux/kobject_uevent.h deleted file mode 100644 index aa664fe7e561..000000000000 --- a/include/linux/kobject_uevent.h +++ /dev/null @@ -1,57 +0,0 @@ -/* - * kobject_uevent.h - list of kobject user events that can be generated - * - * Copyright (C) 2004 IBM Corp. - * Copyright (C) 2004 Greg Kroah-Hartman - * - * This file is released under the GPLv2. - * - */ - -#ifndef _KOBJECT_EVENT_H_ -#define _KOBJECT_EVENT_H_ - -#define HOTPLUG_PATH_LEN 256 - -/* path to the hotplug userspace helper executed on an event */ -extern char hotplug_path[]; - -/* - * If you add an action here, you must also add the proper string to the - * lib/kobject_uevent.c file. - */ -typedef int __bitwise kobject_action_t; -enum kobject_action { - KOBJ_ADD = (__force kobject_action_t) 0x01, /* add event, for hotplug */ - KOBJ_REMOVE = (__force kobject_action_t) 0x02, /* remove event, for hotplug */ - KOBJ_CHANGE = (__force kobject_action_t) 0x03, /* a sysfs attribute file has changed */ - KOBJ_MOUNT = (__force kobject_action_t) 0x04, /* mount event for block devices */ - KOBJ_UMOUNT = (__force kobject_action_t) 0x05, /* umount event for block devices */ - KOBJ_OFFLINE = (__force kobject_action_t) 0x06, /* offline event for hotplug devices */ - KOBJ_ONLINE = (__force kobject_action_t) 0x07, /* online event for hotplug devices */ -}; - - -#ifdef CONFIG_KOBJECT_UEVENT -int kobject_uevent(struct kobject *kobj, - enum kobject_action action, - struct attribute *attr); -int kobject_uevent_atomic(struct kobject *kobj, - enum kobject_action action, - struct attribute *attr); -#else -static inline int kobject_uevent(struct kobject *kobj, - enum kobject_action action, - struct attribute *attr) -{ - return 0; -} -static inline int kobject_uevent_atomic(struct kobject *kobj, - enum kobject_action action, - struct attribute *attr) -{ - return 0; -} -#endif - -#endif diff --git a/init/Kconfig b/init/Kconfig index 9fc0759fa942..0de8b7765ae4 100644 --- a/init/Kconfig +++ b/init/Kconfig @@ -205,25 +205,6 @@ config HOTPLUG modules require HOTPLUG functionality, but a module built outside the kernel tree does. Such modules require Y here. -config KOBJECT_UEVENT - bool "Kernel Userspace Events" if EMBEDDED - depends on NET - default y - help - This option enables the kernel userspace event layer, which is a - simple mechanism for kernel-to-user communication over a netlink - socket. - The goal of the kernel userspace events layer is to provide a simple - and efficient events system, that notifies userspace about kobject - state changes. This will enable applications to just listen for - events instead of polling system devices and files. - Hotplug events (kobject addition and removal) are also available on - the netlink socket in addition to the execution of /sbin/hotplug if - CONFIG_HOTPLUG is enabled. - - Say Y, unless you are building a system requiring minimal memory - consumption. - config IKCONFIG bool "Kernel .config support" ---help--- diff --git a/kernel/sysctl.c b/kernel/sysctl.c index b53115b882e1..6a51e25d4466 100644 --- a/kernel/sysctl.c +++ b/kernel/sysctl.c @@ -31,6 +31,7 @@ #include #include #include +#include #include #include #include @@ -83,9 +84,6 @@ static int ngroups_max = NGROUPS_MAX; #ifdef CONFIG_KMOD extern char modprobe_path[]; #endif -#ifdef CONFIG_HOTPLUG -extern char hotplug_path[]; -#endif #ifdef CONFIG_CHR_DEV_SG extern int sg_big_buff; #endif diff --git a/lib/kobject_uevent.c b/lib/kobject_uevent.c index 3ab375411e38..1f90eea7eebc 100644 --- a/lib/kobject_uevent.c +++ b/lib/kobject_uevent.c @@ -19,14 +19,17 @@ #include #include #include -#include #include #include #define BUFFER_SIZE 1024 /* buffer for the hotplug env */ #define NUM_ENVP 32 /* number of env pointers */ -#if defined(CONFIG_KOBJECT_UEVENT) || defined(CONFIG_HOTPLUG) +#if defined(CONFIG_HOTPLUG) +char hotplug_path[HOTPLUG_PATH_LEN] = "/sbin/hotplug"; +u64 hotplug_seqnum; +static DEFINE_SPINLOCK(sequence_lock); + static char *action_to_string(enum kobject_action action) { switch (action) { @@ -48,9 +51,7 @@ static char *action_to_string(enum kobject_action action) return NULL; } } -#endif -#ifdef CONFIG_KOBJECT_UEVENT static struct sock *uevent_sock; /** @@ -168,21 +169,6 @@ static int __init kobject_uevent_init(void) postcore_initcall(kobject_uevent_init); -#else -static inline int send_uevent(const char *signal, const char *obj, - char **envp, int gfp_mask) -{ - return 0; -} - -#endif /* CONFIG_KOBJECT_UEVENT */ - - -#ifdef CONFIG_HOTPLUG -char hotplug_path[HOTPLUG_PATH_LEN] = "/sbin/hotplug"; -u64 hotplug_seqnum; -static DEFINE_SPINLOCK(sequence_lock); - /** * kobject_hotplug - notify userspace by executing /sbin/hotplug * -- cgit v1.2.3-71-gd317 From 033b96fd30db52a710d97b06f87d16fc59fee0f1 Mon Sep 17 00:00:00 2001 From: Kay Sievers Date: Fri, 11 Nov 2005 06:09:55 +0100 Subject: [PATCH] remove mount/umount uevents from superblock handling The names of these events have been confusing from the beginning on, as they have been more like claim/release events. We needed these events for noticing HAL if storage devices have been mounted. Thanks to Al, we have the proper solution now and can poll() /proc/mounts instead to get notfied about mount tree changes. Signed-off-by: Kay Sievers Signed-off-by: Greg Kroah-Hartman --- fs/super.c | 15 +-------------- include/linux/kobject.h | 6 ++---- lib/kobject_uevent.c | 4 ---- 3 files changed, 3 insertions(+), 22 deletions(-) (limited to 'include/linux') diff --git a/fs/super.c b/fs/super.c index 6689dded3c84..5a347a4f673a 100644 --- a/fs/super.c +++ b/fs/super.c @@ -665,16 +665,6 @@ static int test_bdev_super(struct super_block *s, void *data) return (void *)s->s_bdev == data; } -static void bdev_uevent(struct block_device *bdev, enum kobject_action action) -{ - if (bdev->bd_disk) { - if (bdev->bd_part) - kobject_uevent(&bdev->bd_part->kobj, action, NULL); - else - kobject_uevent(&bdev->bd_disk->kobj, action, NULL); - } -} - struct super_block *get_sb_bdev(struct file_system_type *fs_type, int flags, const char *dev_name, void *data, int (*fill_super)(struct super_block *, void *, int)) @@ -717,10 +707,8 @@ struct super_block *get_sb_bdev(struct file_system_type *fs_type, up_write(&s->s_umount); deactivate_super(s); s = ERR_PTR(error); - } else { + } else s->s_flags |= MS_ACTIVE; - bdev_uevent(bdev, KOBJ_MOUNT); - } } return s; @@ -736,7 +724,6 @@ void kill_block_super(struct super_block *sb) { struct block_device *bdev = sb->s_bdev; - bdev_uevent(bdev, KOBJ_UMOUNT); generic_shutdown_super(sb); sync_blockdev(bdev); close_bdev_excl(bdev); diff --git a/include/linux/kobject.h b/include/linux/kobject.h index baf5251d9f63..e6926b327538 100644 --- a/include/linux/kobject.h +++ b/include/linux/kobject.h @@ -42,10 +42,8 @@ enum kobject_action { KOBJ_ADD = (__force kobject_action_t) 0x01, /* add event, for hotplug */ KOBJ_REMOVE = (__force kobject_action_t) 0x02, /* remove event, for hotplug */ KOBJ_CHANGE = (__force kobject_action_t) 0x03, /* a sysfs attribute file has changed */ - KOBJ_MOUNT = (__force kobject_action_t) 0x04, /* mount event for block devices */ - KOBJ_UMOUNT = (__force kobject_action_t) 0x05, /* umount event for block devices */ - KOBJ_OFFLINE = (__force kobject_action_t) 0x06, /* offline event for hotplug devices */ - KOBJ_ONLINE = (__force kobject_action_t) 0x07, /* online event for hotplug devices */ + KOBJ_OFFLINE = (__force kobject_action_t) 0x04, /* offline event for hotplug devices */ + KOBJ_ONLINE = (__force kobject_action_t) 0x05, /* online event for hotplug devices */ }; struct kobject { diff --git a/lib/kobject_uevent.c b/lib/kobject_uevent.c index 1f90eea7eebc..845bf67d94ca 100644 --- a/lib/kobject_uevent.c +++ b/lib/kobject_uevent.c @@ -39,10 +39,6 @@ static char *action_to_string(enum kobject_action action) return "remove"; case KOBJ_CHANGE: return "change"; - case KOBJ_MOUNT: - return "mount"; - case KOBJ_UMOUNT: - return "umount"; case KOBJ_OFFLINE: return "offline"; case KOBJ_ONLINE: -- cgit v1.2.3-71-gd317 From 5f123fbd80f4f788554636f02bf73e40f914e0d6 Mon Sep 17 00:00:00 2001 From: Kay Sievers Date: Fri, 11 Nov 2005 14:43:07 +0100 Subject: [PATCH] merge kobject_uevent and kobject_hotplug The distinction between hotplug and uevent does not make sense these days, netlink events are the default. udev depends entirely on netlink uevents. Only during early boot and in initramfs, /sbin/hotplug is needed. So merge the two functions and provide only one interface without all the options. The netlink layer got a nice generic interface with named slots recently, which is probably a better facility to plug events for subsystem specific events. Also the new poll() interface to /proc/mounts is a nicer way to notify about changes than sending events through the core. The uevents should only be used for driver core related requests to userspace now. Signed-off-by: Kay Sievers Signed-off-by: Greg Kroah-Hartman --- drivers/scsi/ipr.c | 2 +- include/linux/kobject.h | 27 ++--- lib/kobject_uevent.c | 279 +++++++++++++++++------------------------------- 3 files changed, 102 insertions(+), 206 deletions(-) (limited to 'include/linux') diff --git a/drivers/scsi/ipr.c b/drivers/scsi/ipr.c index fa2cb3582cfa..bf44a409ba0d 100644 --- a/drivers/scsi/ipr.c +++ b/drivers/scsi/ipr.c @@ -2132,7 +2132,7 @@ restart: } spin_unlock_irqrestore(ioa_cfg->host->host_lock, lock_flags); - kobject_uevent(&ioa_cfg->host->shost_classdev.kobj, KOBJ_CHANGE, NULL); + kobject_hotplug(&ioa_cfg->host->shost_classdev.kobj, KOBJ_CHANGE); LEAVE; } diff --git a/include/linux/kobject.h b/include/linux/kobject.h index e6926b327538..5b08248fba72 100644 --- a/include/linux/kobject.h +++ b/include/linux/kobject.h @@ -39,11 +39,11 @@ extern u64 hotplug_seqnum; /* the actions here must match the proper string in lib/kobject_uevent.c */ typedef int __bitwise kobject_action_t; enum kobject_action { - KOBJ_ADD = (__force kobject_action_t) 0x01, /* add event, for hotplug */ - KOBJ_REMOVE = (__force kobject_action_t) 0x02, /* remove event, for hotplug */ - KOBJ_CHANGE = (__force kobject_action_t) 0x03, /* a sysfs attribute file has changed */ - KOBJ_OFFLINE = (__force kobject_action_t) 0x04, /* offline event for hotplug devices */ - KOBJ_ONLINE = (__force kobject_action_t) 0x05, /* online event for hotplug devices */ + KOBJ_ADD = (__force kobject_action_t) 0x01, /* exclusive to core */ + KOBJ_REMOVE = (__force kobject_action_t) 0x02, /* exclusive to core */ + KOBJ_CHANGE = (__force kobject_action_t) 0x03, /* device state change */ + KOBJ_OFFLINE = (__force kobject_action_t) 0x04, /* device offline */ + KOBJ_ONLINE = (__force kobject_action_t) 0x05, /* device online */ }; struct kobject { @@ -262,28 +262,13 @@ int add_hotplug_env_var(char **envp, int num_envp, int *cur_index, char *buffer, int buffer_size, int *cur_len, const char *format, ...) __attribute__((format (printf, 7, 8))); - -int kobject_uevent(struct kobject *kobj, - enum kobject_action action, - struct attribute *attr); -int kobject_uevent_atomic(struct kobject *kobj, - enum kobject_action action, - struct attribute *attr); - #else static inline void kobject_hotplug(struct kobject *kobj, enum kobject_action action) { } + static inline int add_hotplug_env_var(char **envp, int num_envp, int *cur_index, char *buffer, int buffer_size, int *cur_len, const char *format, ...) { return 0; } -int kobject_uevent(struct kobject *kobj, - enum kobject_action action, - struct attribute *attr) -{ return 0; } -int kobject_uevent_atomic(struct kobject *kobj, - enum kobject_action action, - struct attribute *attr) -{ return 0; } #endif #endif /* __KERNEL__ */ diff --git a/lib/kobject_uevent.c b/lib/kobject_uevent.c index 845bf67d94ca..dd061da3aba9 100644 --- a/lib/kobject_uevent.c +++ b/lib/kobject_uevent.c @@ -29,6 +29,7 @@ char hotplug_path[HOTPLUG_PATH_LEN] = "/sbin/hotplug"; u64 hotplug_seqnum; static DEFINE_SPINLOCK(sequence_lock); +static struct sock *uevent_sock; static char *action_to_string(enum kobject_action action) { @@ -48,123 +49,6 @@ static char *action_to_string(enum kobject_action action) } } -static struct sock *uevent_sock; - -/** - * send_uevent - notify userspace by sending event through netlink socket - * - * @signal: signal name - * @obj: object path (kobject) - * @envp: possible hotplug environment to pass with the message - * @gfp_mask: - */ -static int send_uevent(const char *signal, const char *obj, - char **envp, gfp_t gfp_mask) -{ - struct sk_buff *skb; - char *pos; - int len; - - if (!uevent_sock) - return -EIO; - - len = strlen(signal) + 1; - len += strlen(obj) + 1; - - /* allocate buffer with the maximum possible message size */ - skb = alloc_skb(len + BUFFER_SIZE, gfp_mask); - if (!skb) - return -ENOMEM; - - pos = skb_put(skb, len); - sprintf(pos, "%s@%s", signal, obj); - - /* copy the environment key by key to our continuous buffer */ - if (envp) { - int i; - - for (i = 2; envp[i]; i++) { - len = strlen(envp[i]) + 1; - pos = skb_put(skb, len); - strcpy(pos, envp[i]); - } - } - - NETLINK_CB(skb).dst_group = 1; - return netlink_broadcast(uevent_sock, skb, 0, 1, gfp_mask); -} - -static int do_kobject_uevent(struct kobject *kobj, enum kobject_action action, - struct attribute *attr, gfp_t gfp_mask) -{ - char *path; - char *attrpath; - char *signal; - int len; - int rc = -ENOMEM; - - path = kobject_get_path(kobj, gfp_mask); - if (!path) - return -ENOMEM; - - signal = action_to_string(action); - if (!signal) - return -EINVAL; - - if (attr) { - len = strlen(path); - len += strlen(attr->name) + 2; - attrpath = kmalloc(len, gfp_mask); - if (!attrpath) - goto exit; - sprintf(attrpath, "%s/%s", path, attr->name); - rc = send_uevent(signal, attrpath, NULL, gfp_mask); - kfree(attrpath); - } else - rc = send_uevent(signal, path, NULL, gfp_mask); - -exit: - kfree(path); - return rc; -} - -/** - * kobject_uevent - notify userspace by sending event through netlink socket - * - * @signal: signal name - * @kobj: struct kobject that the event is happening to - * @attr: optional struct attribute the event belongs to - */ -int kobject_uevent(struct kobject *kobj, enum kobject_action action, - struct attribute *attr) -{ - return do_kobject_uevent(kobj, action, attr, GFP_KERNEL); -} -EXPORT_SYMBOL_GPL(kobject_uevent); - -int kobject_uevent_atomic(struct kobject *kobj, enum kobject_action action, - struct attribute *attr) -{ - return do_kobject_uevent(kobj, action, attr, GFP_ATOMIC); -} -EXPORT_SYMBOL_GPL(kobject_uevent_atomic); - -static int __init kobject_uevent_init(void) -{ - uevent_sock = netlink_kernel_create(NETLINK_KOBJECT_UEVENT, 1, NULL, - THIS_MODULE); - - if (!uevent_sock) { - printk(KERN_ERR - "kobject_uevent: unable to create netlink socket!\n"); - return -ENODEV; - } - - return 0; -} - -postcore_initcall(kobject_uevent_init); - /** * kobject_hotplug - notify userspace by executing /sbin/hotplug * @@ -173,95 +57,84 @@ postcore_initcall(kobject_uevent_init); */ void kobject_hotplug(struct kobject *kobj, enum kobject_action action) { - char *argv [3]; - char **envp = NULL; - char *buffer = NULL; - char *seq_buff; + char **envp; + char *buffer; char *scratch; + const char *action_string; + const char *devpath = NULL; + const char *subsystem; + struct kobject *top_kobj; + struct kset *kset; + struct kset_hotplug_ops *hotplug_ops; + u64 seq; + char *seq_buff; int i = 0; int retval; - char *kobj_path = NULL; - const char *name = NULL; - char *action_string; - u64 seq; - struct kobject *top_kobj = kobj; - struct kset *kset; - static struct kset_hotplug_ops null_hotplug_ops; - struct kset_hotplug_ops *hotplug_ops = &null_hotplug_ops; - /* If this kobj does not belong to a kset, - try to find a parent that does. */ + pr_debug("%s\n", __FUNCTION__); + + action_string = action_to_string(action); + if (!action_string) + return; + + /* search the kset we belong to */ + top_kobj = kobj; if (!top_kobj->kset && top_kobj->parent) { do { top_kobj = top_kobj->parent; } while (!top_kobj->kset && top_kobj->parent); } - - if (top_kobj->kset) - kset = top_kobj->kset; - else + if (!top_kobj->kset) return; - if (kset->hotplug_ops) - hotplug_ops = kset->hotplug_ops; + kset = top_kobj->kset; + hotplug_ops = kset->hotplug_ops; - /* If the kset has a filter operation, call it. - Skip the event, if the filter returns zero. */ - if (hotplug_ops->filter) { + /* skip the event, if the filter returns zero. */ + if (hotplug_ops && hotplug_ops->filter) if (!hotplug_ops->filter(kset, kobj)) return; - } - - pr_debug ("%s\n", __FUNCTION__); - - action_string = action_to_string(action); - if (!action_string) - return; - envp = kmalloc(NUM_ENVP * sizeof (char *), GFP_KERNEL); + /* environment index */ + envp = kzalloc(NUM_ENVP * sizeof (char *), GFP_KERNEL); if (!envp) return; - memset (envp, 0x00, NUM_ENVP * sizeof (char *)); + /* environment values */ buffer = kmalloc(BUFFER_SIZE, GFP_KERNEL); if (!buffer) goto exit; - if (hotplug_ops->name) - name = hotplug_ops->name(kset, kobj); - if (name == NULL) - name = kobject_name(&kset->kobj); + /* complete object path */ + devpath = kobject_get_path(kobj, GFP_KERNEL); + if (!devpath) + goto exit; - argv [0] = hotplug_path; - argv [1] = (char *)name; /* won't be changed but 'const' has to go */ - argv [2] = NULL; + /* originating subsystem */ + if (hotplug_ops && hotplug_ops->name) + subsystem = hotplug_ops->name(kset, kobj); + else + subsystem = kobject_name(&kset->kobj); - /* minimal command environment */ - envp [i++] = "HOME=/"; - envp [i++] = "PATH=/sbin:/bin:/usr/sbin:/usr/bin"; + /* event environemnt for helper process only */ + envp[i++] = "HOME=/"; + envp[i++] = "PATH=/sbin:/bin:/usr/sbin:/usr/bin"; + /* default keys */ scratch = buffer; - envp [i++] = scratch; scratch += sprintf(scratch, "ACTION=%s", action_string) + 1; - - kobj_path = kobject_get_path(kobj, GFP_KERNEL); - if (!kobj_path) - goto exit; - envp [i++] = scratch; - scratch += sprintf (scratch, "DEVPATH=%s", kobj_path) + 1; - + scratch += sprintf (scratch, "DEVPATH=%s", devpath) + 1; envp [i++] = scratch; - scratch += sprintf(scratch, "SUBSYSTEM=%s", name) + 1; + scratch += sprintf(scratch, "SUBSYSTEM=%s", subsystem) + 1; - /* reserve space for the sequence, - * put the real one in after the hotplug call */ + /* just reserve the space, overwrite it after kset call has returned */ envp[i++] = seq_buff = scratch; scratch += strlen("SEQNUM=18446744073709551616") + 1; - if (hotplug_ops->hotplug) { - /* have the kset specific function add its stuff */ + /* let the kset specific function add its stuff */ + if (hotplug_ops && hotplug_ops->hotplug) { retval = hotplug_ops->hotplug (kset, kobj, &envp[i], NUM_ENVP - i, scratch, BUFFER_SIZE - (scratch - buffer)); @@ -272,27 +145,49 @@ void kobject_hotplug(struct kobject *kobj, enum kobject_action action) } } + /* we will send an event, request a new sequence number */ spin_lock(&sequence_lock); seq = ++hotplug_seqnum; spin_unlock(&sequence_lock); sprintf(seq_buff, "SEQNUM=%llu", (unsigned long long)seq); - pr_debug ("%s: %s %s seq=%llu %s %s %s %s %s\n", - __FUNCTION__, argv[0], argv[1], (unsigned long long)seq, - envp[0], envp[1], envp[2], envp[3], envp[4]); - - send_uevent(action_string, kobj_path, envp, GFP_KERNEL); + /* send netlink message */ + if (uevent_sock) { + struct sk_buff *skb; + size_t len; + + /* allocate message with the maximum possible size */ + len = strlen(action_string) + strlen(devpath) + 2; + skb = alloc_skb(len + BUFFER_SIZE, GFP_KERNEL); + if (skb) { + /* add header */ + scratch = skb_put(skb, len); + sprintf(scratch, "%s@%s", action_string, devpath); + + /* copy keys to our continuous event payload buffer */ + for (i = 2; envp[i]; i++) { + len = strlen(envp[i]) + 1; + scratch = skb_put(skb, len); + strcpy(scratch, envp[i]); + } + + NETLINK_CB(skb).dst_group = 1; + netlink_broadcast(uevent_sock, skb, 0, 1, GFP_KERNEL); + } + } - if (!hotplug_path[0]) - goto exit; + /* call uevent_helper, usually only enabled during early boot */ + if (hotplug_path[0]) { + char *argv [3]; - retval = call_usermodehelper (argv[0], argv, envp, 0); - if (retval) - pr_debug ("%s - call_usermodehelper returned %d\n", - __FUNCTION__, retval); + argv [0] = hotplug_path; + argv [1] = (char *)subsystem; + argv [2] = NULL; + call_usermodehelper (argv[0], argv, envp, 0); + } exit: - kfree(kobj_path); + kfree(devpath); kfree(buffer); kfree(envp); return; @@ -350,4 +245,20 @@ int add_hotplug_env_var(char **envp, int num_envp, int *cur_index, } EXPORT_SYMBOL(add_hotplug_env_var); +static int __init kobject_uevent_init(void) +{ + uevent_sock = netlink_kernel_create(NETLINK_KOBJECT_UEVENT, 1, NULL, + THIS_MODULE); + + if (!uevent_sock) { + printk(KERN_ERR + "kobject_uevent: unable to create netlink socket!\n"); + return -ENODEV; + } + + return 0; +} + +postcore_initcall(kobject_uevent_init); + #endif /* CONFIG_HOTPLUG */ -- cgit v1.2.3-71-gd317 From 312c004d36ce6c739512bac83b452f4c20ab1f62 Mon Sep 17 00:00:00 2001 From: Kay Sievers Date: Wed, 16 Nov 2005 09:00:00 +0100 Subject: [PATCH] driver core: replace "hotplug" by "uevent" Leave the overloaded "hotplug" word to susbsystems which are handling real devices. The driver core does not "plug" anything, it just exports the state to userspace and generates events. Signed-off-by: Kay Sievers Signed-off-by: Greg Kroah-Hartman --- Documentation/powerpc/eeh-pci-error-recovery.txt | 31 ++++----- arch/powerpc/kernel/vio.c | 2 +- block/genhd.c | 48 ++++++------- drivers/acpi/container.c | 8 +-- drivers/acpi/processor_core.c | 8 +-- drivers/acpi/scan.c | 14 ++-- drivers/base/Kconfig | 4 +- drivers/base/class.c | 66 +++++++++--------- drivers/base/core.c | 42 ++++++------ drivers/base/cpu.c | 4 +- drivers/base/firmware_class.c | 45 ++++++------- drivers/base/memory.c | 12 ++-- drivers/ieee1394/nodemgr.c | 20 +++--- drivers/infiniband/core/sysfs.c | 16 ++--- drivers/input/input.c | 14 ++-- drivers/input/serio/serio.c | 22 +++--- drivers/macintosh/macio_asic.c | 4 +- drivers/mmc/mmc_sysfs.c | 4 +- drivers/pci/hotplug.c | 44 ++++++------ drivers/pci/pci-driver.c | 6 +- drivers/pci/pci.h | 4 +- drivers/pcmcia/cs.c | 10 +-- drivers/pcmcia/ds.c | 50 +++++++------- drivers/scsi/ipr.c | 2 +- drivers/usb/core/usb.c | 86 +++++++++++------------- drivers/usb/host/hc_crisv10.c | 2 +- drivers/w1/w1.c | 14 ++-- fs/partitions/check.c | 6 +- include/linux/device.h | 14 ++-- include/linux/firmware.h | 2 +- include/linux/kobject.h | 40 ++++++----- include/linux/sysctl.h | 2 +- include/linux/usb.h | 2 +- kernel/ksysfs.c | 14 ++-- kernel/sysctl.c | 4 +- lib/kobject.c | 4 +- lib/kobject_uevent.c | 64 +++++++++--------- net/bluetooth/hci_sysfs.c | 4 +- net/bridge/br_sysfs_if.c | 4 +- net/core/net-sysfs.c | 8 +-- 40 files changed, 372 insertions(+), 378 deletions(-) (limited to 'include/linux') diff --git a/Documentation/powerpc/eeh-pci-error-recovery.txt b/Documentation/powerpc/eeh-pci-error-recovery.txt index e75d7474322c..67a11a36270c 100644 --- a/Documentation/powerpc/eeh-pci-error-recovery.txt +++ b/Documentation/powerpc/eeh-pci-error-recovery.txt @@ -115,7 +115,7 @@ Current PPC64 Linux EEH Implementation At this time, a generic EEH recovery mechanism has been implemented, so that individual device drivers do not need to be modified to support EEH recovery. This generic mechanism piggy-backs on the PCI hotplug -infrastructure, and percolates events up through the hotplug/udev +infrastructure, and percolates events up through the userspace/udev infrastructure. Followiing is a detailed description of how this is accomplished. @@ -172,7 +172,7 @@ A handler for the EEH notifier_block events is implemented in drivers/pci/hotplug/pSeries_pci.c, called handle_eeh_events(). It saves the device BAR's and then calls rpaphp_unconfig_pci_adapter(). This last call causes the device driver for the card to be stopped, -which causes hotplug events to go out to user space. This triggers +which causes uevents to go out to user space. This triggers user-space scripts that might issue commands such as "ifdown eth0" for ethernet cards, and so on. This handler then sleeps for 5 seconds, hoping to give the user-space scripts enough time to complete. @@ -258,29 +258,30 @@ rpa_php_unconfig_pci_adapter() { // in rpaphp_pci.c calls pci_destroy_dev (struct pci_dev *) { calls - device_unregister (&dev->dev) { // in /drivers/base/core.c + device_unregister (&dev->dev) { // in /drivers/base/core.c calls - device_del(struct device * dev) { // in /drivers/base/core.c + device_del(struct device * dev) { // in /drivers/base/core.c calls - kobject_del() { //in /libs/kobject.c + kobject_del() { //in /libs/kobject.c calls - kobject_hotplug() { // in /libs/kobject.c + kobject_uevent() { // in /libs/kobject.c calls - kset_hotplug() { // in /lib/kobject.c + kset_uevent() { // in /lib/kobject.c calls - kset->hotplug_ops->hotplug() which is really just + kset->uevent_ops->uevent() // which is really just a call to - dev_hotplug() { // in /drivers/base/core.c + dev_uevent() { // in /drivers/base/core.c calls - dev->bus->hotplug() which is really just a call to - pci_hotplug () { // in drivers/pci/hotplug.c + dev->bus->uevent() which is really just a call to + pci_uevent () { // in drivers/pci/hotplug.c which prints device name, etc.... } } - then kset_hotplug() calls - call_usermodehelper () with - argv[0]=hotplug_path[] which is "/sbin/hotplug" - --> event to userspace, + then kobject_uevent() sends a netlink uevent to userspace + --> userspace uevent + (during early boot, nobody listens to netlink events and + kobject_uevent() executes uevent_helper[], which runs the + event process /sbin/hotplug) } } kobject_del() then calls sysfs_remove_dir(), which would diff --git a/arch/powerpc/kernel/vio.c b/arch/powerpc/kernel/vio.c index 71a6addf9f7f..13c41495fe06 100644 --- a/arch/powerpc/kernel/vio.c +++ b/arch/powerpc/kernel/vio.c @@ -293,6 +293,6 @@ static int vio_hotplug(struct device *dev, char **envp, int num_envp, struct bus_type vio_bus_type = { .name = "vio", - .hotplug = vio_hotplug, + .uevent = vio_hotplug, .match = vio_bus_match, }; diff --git a/block/genhd.c b/block/genhd.c index f04609d553b8..f1ed83f3f083 100644 --- a/block/genhd.c +++ b/block/genhd.c @@ -358,7 +358,7 @@ static struct sysfs_ops disk_sysfs_ops = { static ssize_t disk_uevent_store(struct gendisk * disk, const char *buf, size_t count) { - kobject_hotplug(&disk->kobj, KOBJ_ADD); + kobject_uevent(&disk->kobj, KOBJ_ADD); return count; } static ssize_t disk_dev_read(struct gendisk * disk, char *page) @@ -455,14 +455,14 @@ static struct kobj_type ktype_block = { extern struct kobj_type ktype_part; -static int block_hotplug_filter(struct kset *kset, struct kobject *kobj) +static int block_uevent_filter(struct kset *kset, struct kobject *kobj) { struct kobj_type *ktype = get_ktype(kobj); return ((ktype == &ktype_block) || (ktype == &ktype_part)); } -static int block_hotplug(struct kset *kset, struct kobject *kobj, char **envp, +static int block_uevent(struct kset *kset, struct kobject *kobj, char **envp, int num_envp, char *buffer, int buffer_size) { struct kobj_type *ktype = get_ktype(kobj); @@ -474,40 +474,40 @@ static int block_hotplug(struct kset *kset, struct kobject *kobj, char **envp, if (ktype == &ktype_block) { disk = container_of(kobj, struct gendisk, kobj); - add_hotplug_env_var(envp, num_envp, &i, buffer, buffer_size, - &length, "MINOR=%u", disk->first_minor); + add_uevent_var(envp, num_envp, &i, buffer, buffer_size, + &length, "MINOR=%u", disk->first_minor); } else if (ktype == &ktype_part) { disk = container_of(kobj->parent, struct gendisk, kobj); part = container_of(kobj, struct hd_struct, kobj); - add_hotplug_env_var(envp, num_envp, &i, buffer, buffer_size, - &length, "MINOR=%u", - disk->first_minor + part->partno); + add_uevent_var(envp, num_envp, &i, buffer, buffer_size, + &length, "MINOR=%u", + disk->first_minor + part->partno); } else return 0; - add_hotplug_env_var(envp, num_envp, &i, buffer, buffer_size, &length, - "MAJOR=%u", disk->major); + add_uevent_var(envp, num_envp, &i, buffer, buffer_size, &length, + "MAJOR=%u", disk->major); /* add physical device, backing this device */ physdev = disk->driverfs_dev; if (physdev) { char *path = kobject_get_path(&physdev->kobj, GFP_KERNEL); - add_hotplug_env_var(envp, num_envp, &i, buffer, buffer_size, - &length, "PHYSDEVPATH=%s", path); + add_uevent_var(envp, num_envp, &i, buffer, buffer_size, + &length, "PHYSDEVPATH=%s", path); kfree(path); if (physdev->bus) - add_hotplug_env_var(envp, num_envp, &i, - buffer, buffer_size, &length, - "PHYSDEVBUS=%s", - physdev->bus->name); + add_uevent_var(envp, num_envp, &i, + buffer, buffer_size, &length, + "PHYSDEVBUS=%s", + physdev->bus->name); if (physdev->driver) - add_hotplug_env_var(envp, num_envp, &i, - buffer, buffer_size, &length, - "PHYSDEVDRIVER=%s", - physdev->driver->name); + add_uevent_var(envp, num_envp, &i, + buffer, buffer_size, &length, + "PHYSDEVDRIVER=%s", + physdev->driver->name); } /* terminate, set to next free slot, shrink available space */ @@ -520,13 +520,13 @@ static int block_hotplug(struct kset *kset, struct kobject *kobj, char **envp, return 0; } -static struct kset_hotplug_ops block_hotplug_ops = { - .filter = block_hotplug_filter, - .hotplug = block_hotplug, +static struct kset_uevent_ops block_uevent_ops = { + .filter = block_uevent_filter, + .uevent = block_uevent, }; /* declare block_subsys. */ -static decl_subsys(block, &ktype_block, &block_hotplug_ops); +static decl_subsys(block, &ktype_block, &block_uevent_ops); /* diff --git a/drivers/acpi/container.c b/drivers/acpi/container.c index 27ec12c1fab0..b69a8cad82b7 100644 --- a/drivers/acpi/container.c +++ b/drivers/acpi/container.c @@ -172,21 +172,21 @@ static void container_notify_cb(acpi_handle handle, u32 type, void *context) if (ACPI_FAILURE(status) || !device) { result = container_device_add(&device, handle); if (!result) - kobject_hotplug(&device->kobj, - KOBJ_ONLINE); + kobject_uevent(&device->kobj, + KOBJ_ONLINE); else printk("Failed to add container\n"); } } else { if (ACPI_SUCCESS(status)) { /* device exist and this is a remove request */ - kobject_hotplug(&device->kobj, KOBJ_OFFLINE); + kobject_uevent(&device->kobj, KOBJ_OFFLINE); } } break; case ACPI_NOTIFY_EJECT_REQUEST: if (!acpi_bus_get_device(handle, &device) && device) { - kobject_hotplug(&device->kobj, KOBJ_OFFLINE); + kobject_uevent(&device->kobj, KOBJ_OFFLINE); } break; default: diff --git a/drivers/acpi/processor_core.c b/drivers/acpi/processor_core.c index 0c561c571f29..1278aca96fe3 100644 --- a/drivers/acpi/processor_core.c +++ b/drivers/acpi/processor_core.c @@ -748,7 +748,7 @@ int acpi_processor_device_add(acpi_handle handle, struct acpi_device **device) return_VALUE(-ENODEV); if ((pr->id >= 0) && (pr->id < NR_CPUS)) { - kobject_hotplug(&(*device)->kobj, KOBJ_ONLINE); + kobject_uevent(&(*device)->kobj, KOBJ_ONLINE); } return_VALUE(0); } @@ -788,13 +788,13 @@ acpi_processor_hotplug_notify(acpi_handle handle, u32 event, void *data) } if (pr->id >= 0 && (pr->id < NR_CPUS)) { - kobject_hotplug(&device->kobj, KOBJ_OFFLINE); + kobject_uevent(&device->kobj, KOBJ_OFFLINE); break; } result = acpi_processor_start(device); if ((!result) && ((pr->id >= 0) && (pr->id < NR_CPUS))) { - kobject_hotplug(&device->kobj, KOBJ_ONLINE); + kobject_uevent(&device->kobj, KOBJ_ONLINE); } else { ACPI_DEBUG_PRINT((ACPI_DB_ERROR, "Device [%s] failed to start\n", @@ -818,7 +818,7 @@ acpi_processor_hotplug_notify(acpi_handle handle, u32 event, void *data) } if ((pr->id < NR_CPUS) && (cpu_present(pr->id))) - kobject_hotplug(&device->kobj, KOBJ_OFFLINE); + kobject_uevent(&device->kobj, KOBJ_OFFLINE); break; default: ACPI_DEBUG_PRINT((ACPI_DB_INFO, diff --git a/drivers/acpi/scan.c b/drivers/acpi/scan.c index 31218e1d2a18..0745d20afb8c 100644 --- a/drivers/acpi/scan.c +++ b/drivers/acpi/scan.c @@ -78,7 +78,7 @@ static struct kobj_type ktype_acpi_ns = { .release = acpi_device_release, }; -static int namespace_hotplug(struct kset *kset, struct kobject *kobj, +static int namespace_uevent(struct kset *kset, struct kobject *kobj, char **envp, int num_envp, char *buffer, int buffer_size) { @@ -89,8 +89,8 @@ static int namespace_hotplug(struct kset *kset, struct kobject *kobj, if (!dev->driver) return 0; - if (add_hotplug_env_var(envp, num_envp, &i, buffer, buffer_size, &len, - "PHYSDEVDRIVER=%s", dev->driver->name)) + if (add_uevent_var(envp, num_envp, &i, buffer, buffer_size, &len, + "PHYSDEVDRIVER=%s", dev->driver->name)) return -ENOMEM; envp[i] = NULL; @@ -98,8 +98,8 @@ static int namespace_hotplug(struct kset *kset, struct kobject *kobj, return 0; } -static struct kset_hotplug_ops namespace_hotplug_ops = { - .hotplug = &namespace_hotplug, +static struct kset_uevent_ops namespace_uevent_ops = { + .uevent = &namespace_uevent, }; static struct kset acpi_namespace_kset = { @@ -108,7 +108,7 @@ static struct kset acpi_namespace_kset = { }, .subsys = &acpi_subsys, .ktype = &ktype_acpi_ns, - .hotplug_ops = &namespace_hotplug_ops, + .uevent_ops = &namespace_uevent_ops, }; static void acpi_device_register(struct acpi_device *device, @@ -347,7 +347,7 @@ static int acpi_bus_get_wakeup_device_flags(struct acpi_device *device) } /* -------------------------------------------------------------------------- - ACPI hotplug sysfs device file support + ACPI sysfs device file support -------------------------------------------------------------------------- */ static ssize_t acpi_eject_store(struct acpi_device *device, const char *buf, size_t count); diff --git a/drivers/base/Kconfig b/drivers/base/Kconfig index 934149c1512b..f0eff3dac58d 100644 --- a/drivers/base/Kconfig +++ b/drivers/base/Kconfig @@ -19,11 +19,11 @@ config PREVENT_FIRMWARE_BUILD If unsure say Y here. config FW_LOADER - tristate "Hotplug firmware loading support" + tristate "Userspace firmware loading support" select HOTPLUG ---help--- This option is provided for the case where no in-kernel-tree modules - require hotplug firmware loading support, but a module built outside + require userspace firmware loading support, but a module built outside the kernel tree does. config DEBUG_DRIVER diff --git a/drivers/base/class.c b/drivers/base/class.c index db65fd0babe9..df7fdabd0730 100644 --- a/drivers/base/class.c +++ b/drivers/base/class.c @@ -178,7 +178,7 @@ static void class_device_create_release(struct class_device *class_dev) } /* needed to allow these devices to have parent class devices */ -static int class_device_create_hotplug(struct class_device *class_dev, +static int class_device_create_uevent(struct class_device *class_dev, char **envp, int num_envp, char *buffer, int buffer_size) { @@ -331,7 +331,7 @@ static struct kobj_type ktype_class_device = { .release = class_dev_release, }; -static int class_hotplug_filter(struct kset *kset, struct kobject *kobj) +static int class_uevent_filter(struct kset *kset, struct kobject *kobj) { struct kobj_type *ktype = get_ktype(kobj); @@ -343,14 +343,14 @@ static int class_hotplug_filter(struct kset *kset, struct kobject *kobj) return 0; } -static const char *class_hotplug_name(struct kset *kset, struct kobject *kobj) +static const char *class_uevent_name(struct kset *kset, struct kobject *kobj) { struct class_device *class_dev = to_class_dev(kobj); return class_dev->class->name; } -static int class_hotplug(struct kset *kset, struct kobject *kobj, char **envp, +static int class_uevent(struct kset *kset, struct kobject *kobj, char **envp, int num_envp, char *buffer, int buffer_size) { struct class_device *class_dev = to_class_dev(kobj); @@ -365,29 +365,29 @@ static int class_hotplug(struct kset *kset, struct kobject *kobj, char **envp, struct device *dev = class_dev->dev; char *path = kobject_get_path(&dev->kobj, GFP_KERNEL); - add_hotplug_env_var(envp, num_envp, &i, buffer, buffer_size, - &length, "PHYSDEVPATH=%s", path); + add_uevent_var(envp, num_envp, &i, buffer, buffer_size, + &length, "PHYSDEVPATH=%s", path); kfree(path); if (dev->bus) - add_hotplug_env_var(envp, num_envp, &i, - buffer, buffer_size, &length, - "PHYSDEVBUS=%s", dev->bus->name); + add_uevent_var(envp, num_envp, &i, + buffer, buffer_size, &length, + "PHYSDEVBUS=%s", dev->bus->name); if (dev->driver) - add_hotplug_env_var(envp, num_envp, &i, - buffer, buffer_size, &length, - "PHYSDEVDRIVER=%s", dev->driver->name); + add_uevent_var(envp, num_envp, &i, + buffer, buffer_size, &length, + "PHYSDEVDRIVER=%s", dev->driver->name); } if (MAJOR(class_dev->devt)) { - add_hotplug_env_var(envp, num_envp, &i, - buffer, buffer_size, &length, - "MAJOR=%u", MAJOR(class_dev->devt)); + add_uevent_var(envp, num_envp, &i, + buffer, buffer_size, &length, + "MAJOR=%u", MAJOR(class_dev->devt)); - add_hotplug_env_var(envp, num_envp, &i, - buffer, buffer_size, &length, - "MINOR=%u", MINOR(class_dev->devt)); + add_uevent_var(envp, num_envp, &i, + buffer, buffer_size, &length, + "MINOR=%u", MINOR(class_dev->devt)); } /* terminate, set to next free slot, shrink available space */ @@ -397,30 +397,30 @@ static int class_hotplug(struct kset *kset, struct kobject *kobj, char **envp, buffer = &buffer[length]; buffer_size -= length; - if (class_dev->hotplug) { + if (class_dev->uevent) { /* have the class device specific function add its stuff */ - retval = class_dev->hotplug(class_dev, envp, num_envp, + retval = class_dev->uevent(class_dev, envp, num_envp, buffer, buffer_size); if (retval) - pr_debug("class_dev->hotplug() returned %d\n", retval); - } else if (class_dev->class->hotplug) { + pr_debug("class_dev->uevent() returned %d\n", retval); + } else if (class_dev->class->uevent) { /* have the class specific function add its stuff */ - retval = class_dev->class->hotplug(class_dev, envp, num_envp, + retval = class_dev->class->uevent(class_dev, envp, num_envp, buffer, buffer_size); if (retval) - pr_debug("class->hotplug() returned %d\n", retval); + pr_debug("class->uevent() returned %d\n", retval); } return retval; } -static struct kset_hotplug_ops class_hotplug_ops = { - .filter = class_hotplug_filter, - .name = class_hotplug_name, - .hotplug = class_hotplug, +static struct kset_uevent_ops class_uevent_ops = { + .filter = class_uevent_filter, + .name = class_uevent_name, + .uevent = class_uevent, }; -static decl_subsys(class_obj, &ktype_class_device, &class_hotplug_ops); +static decl_subsys(class_obj, &ktype_class_device, &class_uevent_ops); static int class_device_add_attrs(struct class_device * cd) @@ -464,7 +464,7 @@ static ssize_t show_dev(struct class_device *class_dev, char *buf) static ssize_t store_uevent(struct class_device *class_dev, const char *buf, size_t count) { - kobject_hotplug(&class_dev->kobj, KOBJ_ADD); + kobject_uevent(&class_dev->kobj, KOBJ_ADD); return count; } @@ -559,7 +559,7 @@ int class_device_add(struct class_device *class_dev) class_name); } - kobject_hotplug(&class_dev->kobj, KOBJ_ADD); + kobject_uevent(&class_dev->kobj, KOBJ_ADD); /* notify any interfaces this device is now here */ if (parent_class) { @@ -632,7 +632,7 @@ struct class_device *class_device_create(struct class *cls, class_dev->class = cls; class_dev->parent = parent; class_dev->release = class_device_create_release; - class_dev->hotplug = class_device_create_hotplug; + class_dev->uevent = class_device_create_uevent; va_start(args, fmt); vsnprintf(class_dev->class_id, BUS_ID_SIZE, fmt, args); @@ -674,7 +674,7 @@ void class_device_del(struct class_device *class_dev) class_device_remove_file(class_dev, class_dev->devt_attr); class_device_remove_attrs(class_dev); - kobject_hotplug(&class_dev->kobj, KOBJ_REMOVE); + kobject_uevent(&class_dev->kobj, KOBJ_REMOVE); kobject_del(&class_dev->kobj); class_device_put(parent_device); diff --git a/drivers/base/core.c b/drivers/base/core.c index 8615b42b517a..fd8059920dbf 100644 --- a/drivers/base/core.c +++ b/drivers/base/core.c @@ -90,7 +90,7 @@ static struct kobj_type ktype_device = { }; -static int dev_hotplug_filter(struct kset *kset, struct kobject *kobj) +static int dev_uevent_filter(struct kset *kset, struct kobject *kobj) { struct kobj_type *ktype = get_ktype(kobj); @@ -102,14 +102,14 @@ static int dev_hotplug_filter(struct kset *kset, struct kobject *kobj) return 0; } -static const char *dev_hotplug_name(struct kset *kset, struct kobject *kobj) +static const char *dev_uevent_name(struct kset *kset, struct kobject *kobj) { struct device *dev = to_dev(kobj); return dev->bus->name; } -static int dev_hotplug(struct kset *kset, struct kobject *kobj, char **envp, +static int dev_uevent(struct kset *kset, struct kobject *kobj, char **envp, int num_envp, char *buffer, int buffer_size) { struct device *dev = to_dev(kobj); @@ -119,15 +119,15 @@ static int dev_hotplug(struct kset *kset, struct kobject *kobj, char **envp, /* add bus name of physical device */ if (dev->bus) - add_hotplug_env_var(envp, num_envp, &i, - buffer, buffer_size, &length, - "PHYSDEVBUS=%s", dev->bus->name); + add_uevent_var(envp, num_envp, &i, + buffer, buffer_size, &length, + "PHYSDEVBUS=%s", dev->bus->name); /* add driver name of physical device */ if (dev->driver) - add_hotplug_env_var(envp, num_envp, &i, - buffer, buffer_size, &length, - "PHYSDEVDRIVER=%s", dev->driver->name); + add_uevent_var(envp, num_envp, &i, + buffer, buffer_size, &length, + "PHYSDEVDRIVER=%s", dev->driver->name); /* terminate, set to next free slot, shrink available space */ envp[i] = NULL; @@ -136,11 +136,11 @@ static int dev_hotplug(struct kset *kset, struct kobject *kobj, char **envp, buffer = &buffer[length]; buffer_size -= length; - if (dev->bus && dev->bus->hotplug) { + if (dev->bus && dev->bus->uevent) { /* have the bus specific function add its stuff */ - retval = dev->bus->hotplug (dev, envp, num_envp, buffer, buffer_size); + retval = dev->bus->uevent(dev, envp, num_envp, buffer, buffer_size); if (retval) { - pr_debug ("%s - hotplug() returned %d\n", + pr_debug ("%s - uevent() returned %d\n", __FUNCTION__, retval); } } @@ -148,16 +148,16 @@ static int dev_hotplug(struct kset *kset, struct kobject *kobj, char **envp, return retval; } -static struct kset_hotplug_ops device_hotplug_ops = { - .filter = dev_hotplug_filter, - .name = dev_hotplug_name, - .hotplug = dev_hotplug, +static struct kset_uevent_ops device_uevent_ops = { + .filter = dev_uevent_filter, + .name = dev_uevent_name, + .uevent = dev_uevent, }; static ssize_t store_uevent(struct device *dev, struct device_attribute *attr, const char *buf, size_t count) { - kobject_hotplug(&dev->kobj, KOBJ_ADD); + kobject_uevent(&dev->kobj, KOBJ_ADD); return count; } @@ -165,7 +165,7 @@ static ssize_t store_uevent(struct device *dev, struct device_attribute *attr, * device_subsys - structure to be registered with kobject core. */ -decl_subsys(devices, &ktype_device, &device_hotplug_ops); +decl_subsys(devices, &ktype_device, &device_uevent_ops); /** @@ -274,7 +274,7 @@ int device_add(struct device *dev) dev->uevent_attr.store = store_uevent; device_create_file(dev, &dev->uevent_attr); - kobject_hotplug(&dev->kobj, KOBJ_ADD); + kobject_uevent(&dev->kobj, KOBJ_ADD); if ((error = device_pm_add(dev))) goto PMError; if ((error = bus_add_device(dev))) @@ -291,7 +291,7 @@ int device_add(struct device *dev) BusError: device_pm_remove(dev); PMError: - kobject_hotplug(&dev->kobj, KOBJ_REMOVE); + kobject_uevent(&dev->kobj, KOBJ_REMOVE); kobject_del(&dev->kobj); Error: if (parent) @@ -374,7 +374,7 @@ void device_del(struct device * dev) platform_notify_remove(dev); bus_remove_device(dev); device_pm_remove(dev); - kobject_hotplug(&dev->kobj, KOBJ_REMOVE); + kobject_uevent(&dev->kobj, KOBJ_REMOVE); kobject_del(&dev->kobj); if (parent) put_device(parent); diff --git a/drivers/base/cpu.c b/drivers/base/cpu.c index a95844790f7b..281d26784d25 100644 --- a/drivers/base/cpu.c +++ b/drivers/base/cpu.c @@ -41,14 +41,14 @@ static ssize_t store_online(struct sys_device *dev, const char *buf, case '0': ret = cpu_down(cpu->sysdev.id); if (!ret) - kobject_hotplug(&dev->kobj, KOBJ_OFFLINE); + kobject_uevent(&dev->kobj, KOBJ_OFFLINE); break; case '1': ret = smp_prepare_cpu(cpu->sysdev.id); if (!ret) ret = cpu_up(cpu->sysdev.id); if (!ret) - kobject_hotplug(&dev->kobj, KOBJ_ONLINE); + kobject_uevent(&dev->kobj, KOBJ_ONLINE); break; default: ret = -EINVAL; diff --git a/drivers/base/firmware_class.c b/drivers/base/firmware_class.c index 59dacb6552c0..5b3d5e9ddcb6 100644 --- a/drivers/base/firmware_class.c +++ b/drivers/base/firmware_class.c @@ -85,17 +85,17 @@ firmware_timeout_store(struct class *class, const char *buf, size_t count) static CLASS_ATTR(timeout, 0644, firmware_timeout_show, firmware_timeout_store); static void fw_class_dev_release(struct class_device *class_dev); -int firmware_class_hotplug(struct class_device *dev, char **envp, +int firmware_class_uevent(struct class_device *dev, char **envp, int num_envp, char *buffer, int buffer_size); static struct class firmware_class = { .name = "firmware", - .hotplug = firmware_class_hotplug, + .uevent = firmware_class_uevent, .release = fw_class_dev_release, }; int -firmware_class_hotplug(struct class_device *class_dev, char **envp, +firmware_class_uevent(struct class_device *class_dev, char **envp, int num_envp, char *buffer, int buffer_size) { struct firmware_priv *fw_priv = class_get_devdata(class_dev); @@ -104,13 +104,12 @@ firmware_class_hotplug(struct class_device *class_dev, char **envp, if (!test_bit(FW_STATUS_READY, &fw_priv->status)) return -ENODEV; - if (add_hotplug_env_var(envp, num_envp, &i, buffer, buffer_size, &len, - "FIRMWARE=%s", fw_priv->fw_id)) + if (add_uevent_var(envp, num_envp, &i, buffer, buffer_size, &len, + "FIRMWARE=%s", fw_priv->fw_id)) return -ENOMEM; - if (add_hotplug_env_var(envp, num_envp, &i, buffer, buffer_size, &len, - "TIMEOUT=%i", loading_timeout)) + if (add_uevent_var(envp, num_envp, &i, buffer, buffer_size, &len, + "TIMEOUT=%i", loading_timeout)) return -ENOMEM; - envp[i] = NULL; return 0; @@ -352,7 +351,7 @@ error_kfree: static int fw_setup_class_device(struct firmware *fw, struct class_device **class_dev_p, - const char *fw_name, struct device *device, int hotplug) + const char *fw_name, struct device *device, int uevent) { struct class_device *class_dev; struct firmware_priv *fw_priv; @@ -384,7 +383,7 @@ fw_setup_class_device(struct firmware *fw, struct class_device **class_dev_p, goto error_unreg; } - if (hotplug) + if (uevent) set_bit(FW_STATUS_READY, &fw_priv->status); else set_bit(FW_STATUS_READY_NOHOTPLUG, &fw_priv->status); @@ -399,7 +398,7 @@ out: static int _request_firmware(const struct firmware **firmware_p, const char *name, - struct device *device, int hotplug) + struct device *device, int uevent) { struct class_device *class_dev; struct firmware_priv *fw_priv; @@ -418,19 +417,19 @@ _request_firmware(const struct firmware **firmware_p, const char *name, } retval = fw_setup_class_device(firmware, &class_dev, name, device, - hotplug); + uevent); if (retval) goto error_kfree_fw; fw_priv = class_get_devdata(class_dev); - if (hotplug) { + if (uevent) { if (loading_timeout > 0) { fw_priv->timeout.expires = jiffies + loading_timeout * HZ; add_timer(&fw_priv->timeout); } - kobject_hotplug(&class_dev->kobj, KOBJ_ADD); + kobject_uevent(&class_dev->kobj, KOBJ_ADD); wait_for_completion(&fw_priv->completion); set_bit(FW_STATUS_DONE, &fw_priv->status); del_timer_sync(&fw_priv->timeout); @@ -456,7 +455,7 @@ out: } /** - * request_firmware: - request firmware to hotplug and wait for it + * request_firmware: - send firmware request and wait for it * @firmware_p: pointer to firmware image * @name: name of firmware file * @device: device for which firmware is being loaded @@ -466,7 +465,7 @@ out: * * Should be called from user context where sleeping is allowed. * - * @name will be used as $FIRMWARE in the hotplug environment and + * @name will be used as $FIRMWARE in the uevent environment and * should be distinctive enough not to be confused with any other * firmware image for this or any other device. **/ @@ -474,8 +473,8 @@ int request_firmware(const struct firmware **firmware_p, const char *name, struct device *device) { - int hotplug = 1; - return _request_firmware(firmware_p, name, device, hotplug); + int uevent = 1; + return _request_firmware(firmware_p, name, device, uevent); } /** @@ -518,7 +517,7 @@ struct firmware_work { struct device *device; void *context; void (*cont)(const struct firmware *fw, void *context); - int hotplug; + int uevent; }; static int @@ -533,7 +532,7 @@ request_firmware_work_func(void *arg) } daemonize("%s/%s", "firmware", fw_work->name); ret = _request_firmware(&fw, fw_work->name, fw_work->device, - fw_work->hotplug); + fw_work->uevent); if (ret < 0) fw_work->cont(NULL, fw_work->context); else { @@ -548,7 +547,7 @@ request_firmware_work_func(void *arg) /** * request_firmware_nowait: asynchronous version of request_firmware * @module: module requesting the firmware - * @hotplug: invokes hotplug event to copy the firmware image if this flag + * @uevent: sends uevent to copy the firmware image if this flag * is non-zero else the firmware copy must be done manually. * @name: name of firmware file * @device: device for which firmware is being loaded @@ -562,7 +561,7 @@ request_firmware_work_func(void *arg) **/ int request_firmware_nowait( - struct module *module, int hotplug, + struct module *module, int uevent, const char *name, struct device *device, void *context, void (*cont)(const struct firmware *fw, void *context)) { @@ -583,7 +582,7 @@ request_firmware_nowait( .device = device, .context = context, .cont = cont, - .hotplug = hotplug, + .uevent = uevent, }; ret = kernel_thread(request_firmware_work_func, fw_work, diff --git a/drivers/base/memory.c b/drivers/base/memory.c index bc3ca6a656b2..7e1d077874df 100644 --- a/drivers/base/memory.c +++ b/drivers/base/memory.c @@ -29,12 +29,12 @@ static struct sysdev_class memory_sysdev_class = { set_kset_name(MEMORY_CLASS_NAME), }; -static char *memory_hotplug_name(struct kset *kset, struct kobject *kobj) +static const char *memory_uevent_name(struct kset *kset, struct kobject *kobj) { return MEMORY_CLASS_NAME; } -static int memory_hotplug(struct kset *kset, struct kobject *kobj, char **envp, +static int memory_uevent(struct kset *kset, struct kobject *kobj, char **envp, int num_envp, char *buffer, int buffer_size) { int retval = 0; @@ -42,9 +42,9 @@ static int memory_hotplug(struct kset *kset, struct kobject *kobj, char **envp, return retval; } -static struct kset_hotplug_ops memory_hotplug_ops = { - .name = memory_hotplug_name, - .hotplug = memory_hotplug, +static struct kset_uevent_ops memory_uevent_ops = { + .name = memory_uevent_name, + .uevent = memory_uevent, }; static struct notifier_block *memory_chain; @@ -431,7 +431,7 @@ int __init memory_dev_init(void) unsigned int i; int ret; - memory_sysdev_class.kset.hotplug_ops = &memory_hotplug_ops; + memory_sysdev_class.kset.uevent_ops = &memory_uevent_ops; ret = sysdev_class_register(&memory_sysdev_class); /* diff --git a/drivers/ieee1394/nodemgr.c b/drivers/ieee1394/nodemgr.c index 0ea37b1bccb2..f2453668acf5 100644 --- a/drivers/ieee1394/nodemgr.c +++ b/drivers/ieee1394/nodemgr.c @@ -121,8 +121,8 @@ struct host_info { }; static int nodemgr_bus_match(struct device * dev, struct device_driver * drv); -static int nodemgr_hotplug(struct class_device *cdev, char **envp, int num_envp, - char *buffer, int buffer_size); +static int nodemgr_uevent(struct class_device *cdev, char **envp, int num_envp, + char *buffer, int buffer_size); static void nodemgr_resume_ne(struct node_entry *ne); static void nodemgr_remove_ne(struct node_entry *ne); static struct node_entry *find_entry_by_guid(u64 guid); @@ -162,7 +162,7 @@ static void ud_cls_release(struct class_device *class_dev) static struct class nodemgr_ud_class = { .name = "ieee1394", .release = ud_cls_release, - .hotplug = nodemgr_hotplug, + .uevent = nodemgr_uevent, }; static struct hpsb_highlevel nodemgr_highlevel; @@ -966,7 +966,7 @@ static struct unit_directory *nodemgr_process_unit_directory if (ud_child == NULL) break; - /* inherit unspecified values so hotplug picks it up */ + /* inherit unspecified values, the driver core picks it up */ if ((ud->flags & UNIT_DIRECTORY_MODEL_ID) && !(ud_child->flags & UNIT_DIRECTORY_MODEL_ID)) { @@ -1062,8 +1062,8 @@ static void nodemgr_process_root_directory(struct host_info *hi, struct node_ent #ifdef CONFIG_HOTPLUG -static int nodemgr_hotplug(struct class_device *cdev, char **envp, int num_envp, - char *buffer, int buffer_size) +static int nodemgr_uevent(struct class_device *cdev, char **envp, int num_envp, + char *buffer, int buffer_size) { struct unit_directory *ud; int i = 0; @@ -1112,8 +1112,8 @@ do { \ #else -static int nodemgr_hotplug(struct class_device *cdev, char **envp, int num_envp, - char *buffer, int buffer_size) +static int nodemgr_uevent(struct class_device *cdev, char **envp, int num_envp, + char *buffer, int buffer_size) { return -ENODEV; } @@ -1618,8 +1618,8 @@ static int nodemgr_host_thread(void *__hi) /* Scan our nodes to get the bus options and create node * entries. This does not do the sysfs stuff, since that - * would trigger hotplug callbacks and such, which is a - * bad idea at this point. */ + * would trigger uevents and such, which is a bad idea at + * this point. */ nodemgr_node_scan(hi, generation); /* This actually does the full probe, with sysfs diff --git a/drivers/infiniband/core/sysfs.c b/drivers/infiniband/core/sysfs.c index 08648b1a387e..1f1743c5c9a3 100644 --- a/drivers/infiniband/core/sysfs.c +++ b/drivers/infiniband/core/sysfs.c @@ -434,24 +434,24 @@ static void ib_device_release(struct class_device *cdev) kfree(dev); } -static int ib_device_hotplug(struct class_device *cdev, char **envp, - int num_envp, char *buf, int size) +static int ib_device_uevent(struct class_device *cdev, char **envp, + int num_envp, char *buf, int size) { struct ib_device *dev = container_of(cdev, struct ib_device, class_dev); int i = 0, len = 0; - if (add_hotplug_env_var(envp, num_envp, &i, buf, size, &len, - "NAME=%s", dev->name)) + if (add_uevent_var(envp, num_envp, &i, buf, size, &len, + "NAME=%s", dev->name)) return -ENOMEM; /* - * It might be nice to pass the node GUID to hotplug, but + * It might be nice to pass the node GUID with the event, but * right now the only way to get it is to query the device * provider, and this can crash during device removal because * we are will be running after driver removal has started. * We could add a node_guid field to struct ib_device, or we - * could just let the hotplug script read the node GUID from - * sysfs when devices are added. + * could just let userspace read the node GUID from sysfs when + * devices are added. */ envp[i] = NULL; @@ -653,7 +653,7 @@ static struct class_device_attribute *ib_class_attributes[] = { static struct class ib_class = { .name = "infiniband", .release = ib_device_release, - .hotplug = ib_device_hotplug, + .uevent = ib_device_uevent, }; int ib_device_register_sysfs(struct ib_device *device) diff --git a/drivers/input/input.c b/drivers/input/input.c index 43b49ccd7dad..2d37b394e384 100644 --- a/drivers/input/input.c +++ b/drivers/input/input.c @@ -610,10 +610,10 @@ static void input_dev_release(struct class_device *class_dev) } /* - * Input hotplugging interface - loading event handlers based on + * Input uevent interface - loading event handlers based on * device bitfields. */ -static int input_add_hotplug_bm_var(char **envp, int num_envp, int *cur_index, +static int input_add_uevent_bm_var(char **envp, int num_envp, int *cur_index, char *buffer, int buffer_size, int *cur_len, const char *name, unsigned long *bitmap, int max) { @@ -638,7 +638,7 @@ static int input_add_hotplug_bm_var(char **envp, int num_envp, int *cur_index, #define INPUT_ADD_HOTPLUG_VAR(fmt, val...) \ do { \ - int err = add_hotplug_env_var(envp, num_envp, &i, \ + int err = add_uevent_var(envp, num_envp, &i, \ buffer, buffer_size, &len, \ fmt, val); \ if (err) \ @@ -647,15 +647,15 @@ static int input_add_hotplug_bm_var(char **envp, int num_envp, int *cur_index, #define INPUT_ADD_HOTPLUG_BM_VAR(name, bm, max) \ do { \ - int err = input_add_hotplug_bm_var(envp, num_envp, &i, \ + int err = input_add_uevent_bm_var(envp, num_envp, &i, \ buffer, buffer_size, &len, \ name, bm, max); \ if (err) \ return err; \ } while (0) -static int input_dev_hotplug(struct class_device *cdev, char **envp, - int num_envp, char *buffer, int buffer_size) +static int input_dev_uevent(struct class_device *cdev, char **envp, + int num_envp, char *buffer, int buffer_size) { struct input_dev *dev = to_input_dev(cdev); int i = 0; @@ -697,7 +697,7 @@ static int input_dev_hotplug(struct class_device *cdev, char **envp, struct class input_class = { .name = "input", .release = input_dev_release, - .hotplug = input_dev_hotplug, + .uevent = input_dev_uevent, }; struct input_dev *input_allocate_device(void) diff --git a/drivers/input/serio/serio.c b/drivers/input/serio/serio.c index fbb69ef6a77b..8e530cc970e1 100644 --- a/drivers/input/serio/serio.c +++ b/drivers/input/serio/serio.c @@ -800,16 +800,16 @@ static int serio_bus_match(struct device *dev, struct device_driver *drv) #ifdef CONFIG_HOTPLUG -#define SERIO_ADD_HOTPLUG_VAR(fmt, val...) \ +#define SERIO_ADD_UEVENT_VAR(fmt, val...) \ do { \ - int err = add_hotplug_env_var(envp, num_envp, &i, \ + int err = add_uevent_var(envp, num_envp, &i, \ buffer, buffer_size, &len, \ fmt, val); \ if (err) \ return err; \ } while (0) -static int serio_hotplug(struct device *dev, char **envp, int num_envp, char *buffer, int buffer_size) +static int serio_uevent(struct device *dev, char **envp, int num_envp, char *buffer, int buffer_size) { struct serio *serio; int i = 0; @@ -820,21 +820,21 @@ static int serio_hotplug(struct device *dev, char **envp, int num_envp, char *bu serio = to_serio_port(dev); - SERIO_ADD_HOTPLUG_VAR("SERIO_TYPE=%02x", serio->id.type); - SERIO_ADD_HOTPLUG_VAR("SERIO_PROTO=%02x", serio->id.proto); - SERIO_ADD_HOTPLUG_VAR("SERIO_ID=%02x", serio->id.id); - SERIO_ADD_HOTPLUG_VAR("SERIO_EXTRA=%02x", serio->id.extra); - SERIO_ADD_HOTPLUG_VAR("MODALIAS=serio:ty%02Xpr%02Xid%02Xex%02X", + SERIO_ADD_UEVENT_VAR("SERIO_TYPE=%02x", serio->id.type); + SERIO_ADD_UEVENT_VAR("SERIO_PROTO=%02x", serio->id.proto); + SERIO_ADD_UEVENT_VAR("SERIO_ID=%02x", serio->id.id); + SERIO_ADD_UEVENT_VAR("SERIO_EXTRA=%02x", serio->id.extra); + SERIO_ADD_UEVENT_VAR("MODALIAS=serio:ty%02Xpr%02Xid%02Xex%02X", serio->id.type, serio->id.proto, serio->id.id, serio->id.extra); envp[i] = NULL; return 0; } -#undef SERIO_ADD_HOTPLUG_VAR +#undef SERIO_ADD_UEVENT_VAR #else -static int serio_hotplug(struct device *dev, char **envp, int num_envp, char *buffer, int buffer_size) +static int serio_uevent(struct device *dev, char **envp, int num_envp, char *buffer, int buffer_size) { return -ENODEV; } @@ -908,7 +908,7 @@ static int __init serio_init(void) serio_bus.dev_attrs = serio_device_attrs; serio_bus.drv_attrs = serio_driver_attrs; serio_bus.match = serio_bus_match; - serio_bus.hotplug = serio_hotplug; + serio_bus.uevent = serio_uevent; serio_bus.resume = serio_resume; bus_register(&serio_bus); diff --git a/drivers/macintosh/macio_asic.c b/drivers/macintosh/macio_asic.c index c34c96d18907..228e1852a836 100644 --- a/drivers/macintosh/macio_asic.c +++ b/drivers/macintosh/macio_asic.c @@ -128,7 +128,7 @@ static int macio_device_resume(struct device * dev) return 0; } -static int macio_hotplug (struct device *dev, char **envp, int num_envp, +static int macio_uevent(struct device *dev, char **envp, int num_envp, char *buffer, int buffer_size) { struct macio_dev * macio_dev; @@ -203,7 +203,7 @@ extern struct device_attribute macio_dev_attrs[]; struct bus_type macio_bus_type = { .name = "macio", .match = macio_bus_match, - .hotplug = macio_hotplug, + .uevent = macio_uevent, .suspend = macio_device_suspend, .resume = macio_device_resume, .dev_attrs = macio_dev_attrs, diff --git a/drivers/mmc/mmc_sysfs.c b/drivers/mmc/mmc_sysfs.c index 3f4a66ca9555..ec701667abfc 100644 --- a/drivers/mmc/mmc_sysfs.c +++ b/drivers/mmc/mmc_sysfs.c @@ -80,7 +80,7 @@ static int mmc_bus_match(struct device *dev, struct device_driver *drv) } static int -mmc_bus_hotplug(struct device *dev, char **envp, int num_envp, char *buf, +mmc_bus_uevent(struct device *dev, char **envp, int num_envp, char *buf, int buf_size) { struct mmc_card *card = dev_to_mmc_card(dev); @@ -140,7 +140,7 @@ static struct bus_type mmc_bus_type = { .name = "mmc", .dev_attrs = mmc_dev_attrs, .match = mmc_bus_match, - .hotplug = mmc_bus_hotplug, + .uevent = mmc_bus_uevent, .suspend = mmc_bus_suspend, .resume = mmc_bus_resume, }; diff --git a/drivers/pci/hotplug.c b/drivers/pci/hotplug.c index e1743be31909..1c97e7dd130b 100644 --- a/drivers/pci/hotplug.c +++ b/drivers/pci/hotplug.c @@ -3,8 +3,8 @@ #include #include "pci.h" -int pci_hotplug (struct device *dev, char **envp, int num_envp, - char *buffer, int buffer_size) +int pci_uevent(struct device *dev, char **envp, int num_envp, + char *buffer, int buffer_size) { struct pci_dev *pdev; int i = 0; @@ -17,34 +17,34 @@ int pci_hotplug (struct device *dev, char **envp, int num_envp, if (!pdev) return -ENODEV; - if (add_hotplug_env_var(envp, num_envp, &i, - buffer, buffer_size, &length, - "PCI_CLASS=%04X", pdev->class)) + if (add_uevent_var(envp, num_envp, &i, + buffer, buffer_size, &length, + "PCI_CLASS=%04X", pdev->class)) return -ENOMEM; - if (add_hotplug_env_var(envp, num_envp, &i, - buffer, buffer_size, &length, - "PCI_ID=%04X:%04X", pdev->vendor, pdev->device)) + if (add_uevent_var(envp, num_envp, &i, + buffer, buffer_size, &length, + "PCI_ID=%04X:%04X", pdev->vendor, pdev->device)) return -ENOMEM; - if (add_hotplug_env_var(envp, num_envp, &i, - buffer, buffer_size, &length, - "PCI_SUBSYS_ID=%04X:%04X", pdev->subsystem_vendor, - pdev->subsystem_device)) + if (add_uevent_var(envp, num_envp, &i, + buffer, buffer_size, &length, + "PCI_SUBSYS_ID=%04X:%04X", pdev->subsystem_vendor, + pdev->subsystem_device)) return -ENOMEM; - if (add_hotplug_env_var(envp, num_envp, &i, - buffer, buffer_size, &length, - "PCI_SLOT_NAME=%s", pci_name(pdev))) + if (add_uevent_var(envp, num_envp, &i, + buffer, buffer_size, &length, + "PCI_SLOT_NAME=%s", pci_name(pdev))) return -ENOMEM; - if (add_hotplug_env_var(envp, num_envp, &i, - buffer, buffer_size, &length, - "MODALIAS=pci:v%08Xd%08Xsv%08Xsd%08Xbc%02Xsc%02Xi%02x", - pdev->vendor, pdev->device, - pdev->subsystem_vendor, pdev->subsystem_device, - (u8)(pdev->class >> 16), (u8)(pdev->class >> 8), - (u8)(pdev->class))) + if (add_uevent_var(envp, num_envp, &i, + buffer, buffer_size, &length, + "MODALIAS=pci:v%08Xd%08Xsv%08Xsd%08Xbc%02Xsc%02Xi%02x", + pdev->vendor, pdev->device, + pdev->subsystem_vendor, pdev->subsystem_device, + (u8)(pdev->class >> 16), (u8)(pdev->class >> 8), + (u8)(pdev->class))) return -ENOMEM; envp[i] = NULL; diff --git a/drivers/pci/pci-driver.c b/drivers/pci/pci-driver.c index a9046d4b8af3..7146b69b812c 100644 --- a/drivers/pci/pci-driver.c +++ b/drivers/pci/pci-driver.c @@ -502,8 +502,8 @@ void pci_dev_put(struct pci_dev *dev) } #ifndef CONFIG_HOTPLUG -int pci_hotplug (struct device *dev, char **envp, int num_envp, - char *buffer, int buffer_size) +int pci_uevent(struct device *dev, char **envp, int num_envp, + char *buffer, int buffer_size) { return -ENODEV; } @@ -512,7 +512,7 @@ int pci_hotplug (struct device *dev, char **envp, int num_envp, struct bus_type pci_bus_type = { .name = "pci", .match = pci_bus_match, - .hotplug = pci_hotplug, + .uevent = pci_uevent, .suspend = pci_device_suspend, .resume = pci_device_resume, .dev_attrs = pci_dev_attrs, diff --git a/drivers/pci/pci.h b/drivers/pci/pci.h index 6527b36c9a61..294849d24590 100644 --- a/drivers/pci/pci.h +++ b/drivers/pci/pci.h @@ -1,7 +1,7 @@ /* Functions internal to the PCI core code */ -extern int pci_hotplug (struct device *dev, char **envp, int num_envp, - char *buffer, int buffer_size); +extern int pci_uevent(struct device *dev, char **envp, int num_envp, + char *buffer, int buffer_size); extern int pci_create_sysfs_dev_files(struct pci_dev *pdev); extern void pci_remove_sysfs_dev_files(struct pci_dev *pdev); extern void pci_cleanup_rom(struct pci_dev *dev); diff --git a/drivers/pcmcia/cs.c b/drivers/pcmcia/cs.c index a30aa74304a2..7cf09084ef61 100644 --- a/drivers/pcmcia/cs.c +++ b/drivers/pcmcia/cs.c @@ -901,14 +901,14 @@ int pcmcia_insert_card(struct pcmcia_socket *skt) EXPORT_SYMBOL(pcmcia_insert_card); -static int pcmcia_socket_hotplug(struct class_device *dev, char **envp, - int num_envp, char *buffer, int buffer_size) +static int pcmcia_socket_uevent(struct class_device *dev, char **envp, + int num_envp, char *buffer, int buffer_size) { struct pcmcia_socket *s = container_of(dev, struct pcmcia_socket, dev); int i = 0, length = 0; - if (add_hotplug_env_var(envp, num_envp, &i, buffer, buffer_size, - &length, "SOCKET_NO=%u", s->sock)) + if (add_uevent_var(envp, num_envp, &i, buffer, buffer_size, + &length, "SOCKET_NO=%u", s->sock)) return -ENOMEM; envp[i] = NULL; @@ -927,7 +927,7 @@ static void pcmcia_release_socket_class(struct class *data) struct class pcmcia_socket_class = { .name = "pcmcia_socket", - .hotplug = pcmcia_socket_hotplug, + .uevent = pcmcia_socket_uevent, .release = pcmcia_release_socket, .class_release = pcmcia_release_socket_class, }; diff --git a/drivers/pcmcia/ds.c b/drivers/pcmcia/ds.c index 7f8219f3fd9e..6fb76399547e 100644 --- a/drivers/pcmcia/ds.c +++ b/drivers/pcmcia/ds.c @@ -779,8 +779,8 @@ static int pcmcia_bus_match(struct device * dev, struct device_driver * drv) { #ifdef CONFIG_HOTPLUG -static int pcmcia_bus_hotplug(struct device *dev, char **envp, int num_envp, - char *buffer, int buffer_size) +static int pcmcia_bus_uevent(struct device *dev, char **envp, int num_envp, + char *buffer, int buffer_size) { struct pcmcia_device *p_dev; int i, length = 0; @@ -800,31 +800,31 @@ static int pcmcia_bus_hotplug(struct device *dev, char **envp, int num_envp, i = 0; - if (add_hotplug_env_var(envp, num_envp, &i, - buffer, buffer_size, &length, - "SOCKET_NO=%u", - p_dev->socket->sock)) + if (add_uevent_var(envp, num_envp, &i, + buffer, buffer_size, &length, + "SOCKET_NO=%u", + p_dev->socket->sock)) return -ENOMEM; - if (add_hotplug_env_var(envp, num_envp, &i, - buffer, buffer_size, &length, - "DEVICE_NO=%02X", - p_dev->device_no)) + if (add_uevent_var(envp, num_envp, &i, + buffer, buffer_size, &length, + "DEVICE_NO=%02X", + p_dev->device_no)) return -ENOMEM; - if (add_hotplug_env_var(envp, num_envp, &i, - buffer, buffer_size, &length, - "MODALIAS=pcmcia:m%04Xc%04Xf%02Xfn%02Xpfn%02X" - "pa%08Xpb%08Xpc%08Xpd%08X", - p_dev->has_manf_id ? p_dev->manf_id : 0, - p_dev->has_card_id ? p_dev->card_id : 0, - p_dev->has_func_id ? p_dev->func_id : 0, - p_dev->func, - p_dev->device_no, - hash[0], - hash[1], - hash[2], - hash[3])) + if (add_uevent_var(envp, num_envp, &i, + buffer, buffer_size, &length, + "MODALIAS=pcmcia:m%04Xc%04Xf%02Xfn%02Xpfn%02X" + "pa%08Xpb%08Xpc%08Xpd%08X", + p_dev->has_manf_id ? p_dev->manf_id : 0, + p_dev->has_card_id ? p_dev->card_id : 0, + p_dev->has_func_id ? p_dev->func_id : 0, + p_dev->func, + p_dev->device_no, + hash[0], + hash[1], + hash[2], + hash[3])) return -ENOMEM; envp[i] = NULL; @@ -834,7 +834,7 @@ static int pcmcia_bus_hotplug(struct device *dev, char **envp, int num_envp, #else -static int pcmcia_bus_hotplug(struct device *dev, char **envp, int num_envp, +static int pcmcia_bus_uevent(struct device *dev, char **envp, int num_envp, char *buffer, int buffer_size) { return -ENODEV; @@ -1223,7 +1223,7 @@ static struct class_interface pcmcia_bus_interface = { struct bus_type pcmcia_bus_type = { .name = "pcmcia", - .hotplug = pcmcia_bus_hotplug, + .uevent = pcmcia_bus_uevent, .match = pcmcia_bus_match, .dev_attrs = pcmcia_dev_attrs, }; diff --git a/drivers/scsi/ipr.c b/drivers/scsi/ipr.c index bf44a409ba0d..07ddf9a38758 100644 --- a/drivers/scsi/ipr.c +++ b/drivers/scsi/ipr.c @@ -2132,7 +2132,7 @@ restart: } spin_unlock_irqrestore(ioa_cfg->host->host_lock, lock_flags); - kobject_hotplug(&ioa_cfg->host->shost_classdev.kobj, KOBJ_CHANGE); + kobject_uevent(&ioa_cfg->host->shost_classdev.kobj, KOBJ_CHANGE); LEAVE; } diff --git a/drivers/usb/core/usb.c b/drivers/usb/core/usb.c index e80ef9467825..af2f0941baac 100644 --- a/drivers/usb/core/usb.c +++ b/drivers/usb/core/usb.c @@ -363,8 +363,7 @@ void usb_driver_release_interface(struct usb_driver *driver, * Most USB device drivers will use this indirectly, through the usb core, * but some layered driver frameworks use it directly. * These device tables are exported with MODULE_DEVICE_TABLE, through - * modutils and "modules.usbmap", to support the driver loading - * functionality of USB hotplugging. + * modutils, to support the driver loading functionality of USB hotplugging. * * What Matches: * @@ -545,10 +544,7 @@ static int usb_device_match (struct device *dev, struct device_driver *drv) #ifdef CONFIG_HOTPLUG /* - * USB hotplugging invokes what /proc/sys/kernel/hotplug says - * (normally /sbin/hotplug) when USB devices get added or removed. - * - * This invokes a user mode policy agent, typically helping to load driver + * This sends an uevent to userspace, typically helping to load driver * or other modules, configure the device, and more. Drivers can provide * a MODULE_DEVICE_TABLE to help with module loading subtasks. * @@ -557,8 +553,8 @@ static int usb_device_match (struct device *dev, struct device_driver *drv) * delays in event delivery. Use sysfs (and DEVPATH) to make sure the * device (and this configuration!) are still present. */ -static int usb_hotplug (struct device *dev, char **envp, int num_envp, - char *buffer, int buffer_size) +static int usb_uevent(struct device *dev, char **envp, int num_envp, + char *buffer, int buffer_size) { struct usb_interface *intf; struct usb_device *usb_dev; @@ -570,7 +566,7 @@ static int usb_hotplug (struct device *dev, char **envp, int num_envp, return -ENODEV; /* driver is often null here; dev_dbg() would oops */ - pr_debug ("usb %s: hotplug\n", dev->bus_id); + pr_debug ("usb %s: uevent\n", dev->bus_id); /* Must check driver_data here, as on remove driver is always NULL */ if ((dev->driver == &usb_generic_driver) || @@ -597,51 +593,51 @@ static int usb_hotplug (struct device *dev, char **envp, int num_envp, * * FIXME reduce hardwired intelligence here */ - if (add_hotplug_env_var(envp, num_envp, &i, - buffer, buffer_size, &length, - "DEVICE=/proc/bus/usb/%03d/%03d", - usb_dev->bus->busnum, usb_dev->devnum)) + if (add_uevent_var(envp, num_envp, &i, + buffer, buffer_size, &length, + "DEVICE=/proc/bus/usb/%03d/%03d", + usb_dev->bus->busnum, usb_dev->devnum)) return -ENOMEM; #endif /* per-device configurations are common */ - if (add_hotplug_env_var(envp, num_envp, &i, - buffer, buffer_size, &length, - "PRODUCT=%x/%x/%x", - le16_to_cpu(usb_dev->descriptor.idVendor), - le16_to_cpu(usb_dev->descriptor.idProduct), - le16_to_cpu(usb_dev->descriptor.bcdDevice))) + if (add_uevent_var(envp, num_envp, &i, + buffer, buffer_size, &length, + "PRODUCT=%x/%x/%x", + le16_to_cpu(usb_dev->descriptor.idVendor), + le16_to_cpu(usb_dev->descriptor.idProduct), + le16_to_cpu(usb_dev->descriptor.bcdDevice))) return -ENOMEM; /* class-based driver binding models */ - if (add_hotplug_env_var(envp, num_envp, &i, - buffer, buffer_size, &length, - "TYPE=%d/%d/%d", - usb_dev->descriptor.bDeviceClass, - usb_dev->descriptor.bDeviceSubClass, - usb_dev->descriptor.bDeviceProtocol)) + if (add_uevent_var(envp, num_envp, &i, + buffer, buffer_size, &length, + "TYPE=%d/%d/%d", + usb_dev->descriptor.bDeviceClass, + usb_dev->descriptor.bDeviceSubClass, + usb_dev->descriptor.bDeviceProtocol)) return -ENOMEM; - if (add_hotplug_env_var(envp, num_envp, &i, - buffer, buffer_size, &length, - "INTERFACE=%d/%d/%d", - alt->desc.bInterfaceClass, - alt->desc.bInterfaceSubClass, - alt->desc.bInterfaceProtocol)) + if (add_uevent_var(envp, num_envp, &i, + buffer, buffer_size, &length, + "INTERFACE=%d/%d/%d", + alt->desc.bInterfaceClass, + alt->desc.bInterfaceSubClass, + alt->desc.bInterfaceProtocol)) return -ENOMEM; - if (add_hotplug_env_var(envp, num_envp, &i, - buffer, buffer_size, &length, - "MODALIAS=usb:v%04Xp%04Xd%04Xdc%02Xdsc%02Xdp%02Xic%02Xisc%02Xip%02X", - le16_to_cpu(usb_dev->descriptor.idVendor), - le16_to_cpu(usb_dev->descriptor.idProduct), - le16_to_cpu(usb_dev->descriptor.bcdDevice), - usb_dev->descriptor.bDeviceClass, - usb_dev->descriptor.bDeviceSubClass, - usb_dev->descriptor.bDeviceProtocol, - alt->desc.bInterfaceClass, - alt->desc.bInterfaceSubClass, - alt->desc.bInterfaceProtocol)) + if (add_uevent_var(envp, num_envp, &i, + buffer, buffer_size, &length, + "MODALIAS=usb:v%04Xp%04Xd%04Xdc%02Xdsc%02Xdp%02Xic%02Xisc%02Xip%02X", + le16_to_cpu(usb_dev->descriptor.idVendor), + le16_to_cpu(usb_dev->descriptor.idProduct), + le16_to_cpu(usb_dev->descriptor.bcdDevice), + usb_dev->descriptor.bDeviceClass, + usb_dev->descriptor.bDeviceSubClass, + usb_dev->descriptor.bDeviceProtocol, + alt->desc.bInterfaceClass, + alt->desc.bInterfaceSubClass, + alt->desc.bInterfaceProtocol)) return -ENOMEM; envp[i] = NULL; @@ -651,7 +647,7 @@ static int usb_hotplug (struct device *dev, char **envp, int num_envp, #else -static int usb_hotplug (struct device *dev, char **envp, +static int usb_uevent(struct device *dev, char **envp, int num_envp, char *buffer, int buffer_size) { return -ENODEV; @@ -1491,7 +1487,7 @@ static int usb_generic_resume(struct device *dev) struct bus_type usb_bus_type = { .name = "usb", .match = usb_device_match, - .hotplug = usb_hotplug, + .uevent = usb_uevent, .suspend = usb_generic_suspend, .resume = usb_generic_resume, }; diff --git a/drivers/usb/host/hc_crisv10.c b/drivers/usb/host/hc_crisv10.c index 0eaabeb37ac3..641268d7e6f3 100644 --- a/drivers/usb/host/hc_crisv10.c +++ b/drivers/usb/host/hc_crisv10.c @@ -4397,7 +4397,7 @@ static int __init etrax_usb_hc_init(void) device_initialize(&fake_device); kobject_set_name(&fake_device.kobj, "etrax_usb"); kobject_add(&fake_device.kobj); - kobject_hotplug(&fake_device.kobj, KOBJ_ADD); + kobject_uevent(&fake_device.kobj, KOBJ_ADD); hc->bus->controller = &fake_device; usb_register_bus(hc->bus); diff --git a/drivers/w1/w1.c b/drivers/w1/w1.c index 14016b1cd948..024206c4a0e4 100644 --- a/drivers/w1/w1.c +++ b/drivers/w1/w1.c @@ -142,12 +142,12 @@ static struct bin_attribute w1_slave_attr_bin_id = { /* Default family */ static struct w1_family w1_default_family; -static int w1_hotplug(struct device *dev, char **envp, int num_envp, char *buffer, int buffer_size); +static int w1_uevent(struct device *dev, char **envp, int num_envp, char *buffer, int buffer_size); static struct bus_type w1_bus_type = { .name = "w1", .match = w1_master_match, - .hotplug = w1_hotplug, + .uevent = w1_uevent, }; struct device_driver w1_master_driver = { @@ -361,7 +361,7 @@ void w1_destroy_master_attributes(struct w1_master *master) } #ifdef CONFIG_HOTPLUG -static int w1_hotplug(struct device *dev, char **envp, int num_envp, char *buffer, int buffer_size) +static int w1_uevent(struct device *dev, char **envp, int num_envp, char *buffer, int buffer_size) { struct w1_master *md = NULL; struct w1_slave *sl = NULL; @@ -377,7 +377,7 @@ static int w1_hotplug(struct device *dev, char **envp, int num_envp, char *buffe event_owner = "slave"; name = sl->name; } else { - dev_dbg(dev, "Unknown hotplug event.\n"); + dev_dbg(dev, "Unknown event.\n"); return -EINVAL; } @@ -386,18 +386,18 @@ static int w1_hotplug(struct device *dev, char **envp, int num_envp, char *buffe if (dev->driver != &w1_slave_driver || !sl) return 0; - err = add_hotplug_env_var(envp, num_envp, &cur_index, buffer, buffer_size, &cur_len, "W1_FID=%02X", sl->reg_num.family); + err = add_uevent_var(envp, num_envp, &cur_index, buffer, buffer_size, &cur_len, "W1_FID=%02X", sl->reg_num.family); if (err) return err; - err = add_hotplug_env_var(envp, num_envp, &cur_index, buffer, buffer_size, &cur_len, "W1_SLAVE_ID=%024LX", (u64)sl->reg_num.id); + err = add_uevent_var(envp, num_envp, &cur_index, buffer, buffer_size, &cur_len, "W1_SLAVE_ID=%024LX", (u64)sl->reg_num.id); if (err) return err; return 0; }; #else -static int w1_hotplug(struct device *dev, char **envp, int num_envp, char *buffer, int buffer_size) +static int w1_uevent(struct device *dev, char **envp, int num_envp, char *buffer, int buffer_size) { return 0; } diff --git a/fs/partitions/check.c b/fs/partitions/check.c index 8dc1822a7022..7187a57d51e8 100644 --- a/fs/partitions/check.c +++ b/fs/partitions/check.c @@ -226,7 +226,7 @@ static struct sysfs_ops part_sysfs_ops = { static ssize_t part_uevent_store(struct hd_struct * p, const char *page, size_t count) { - kobject_hotplug(&p->kobj, KOBJ_ADD); + kobject_uevent(&p->kobj, KOBJ_ADD); return count; } static ssize_t part_dev_read(struct hd_struct * p, char *page) @@ -360,7 +360,7 @@ void register_disk(struct gendisk *disk) if ((err = kobject_add(&disk->kobj))) return; disk_sysfs_symlinks(disk); - kobject_hotplug(&disk->kobj, KOBJ_ADD); + kobject_uevent(&disk->kobj, KOBJ_ADD); /* No minors to use for partitions */ if (disk->minors == 1) { @@ -465,6 +465,6 @@ void del_gendisk(struct gendisk *disk) sysfs_remove_link(&disk->driverfs_dev->kobj, "block"); put_device(disk->driverfs_dev); } - kobject_hotplug(&disk->kobj, KOBJ_REMOVE); + kobject_uevent(&disk->kobj, KOBJ_REMOVE); kobject_del(&disk->kobj); } diff --git a/include/linux/device.h b/include/linux/device.h index 17cbc6db67b4..0cdee78e5ce1 100644 --- a/include/linux/device.h +++ b/include/linux/device.h @@ -47,8 +47,8 @@ struct bus_type { struct driver_attribute * drv_attrs; int (*match)(struct device * dev, struct device_driver * drv); - int (*hotplug) (struct device *dev, char **envp, - int num_envp, char *buffer, int buffer_size); + int (*uevent)(struct device *dev, char **envp, + int num_envp, char *buffer, int buffer_size); int (*suspend)(struct device * dev, pm_message_t state); int (*resume)(struct device * dev); }; @@ -151,7 +151,7 @@ struct class { struct class_attribute * class_attrs; struct class_device_attribute * class_dev_attrs; - int (*hotplug)(struct class_device *dev, char **envp, + int (*uevent)(struct class_device *dev, char **envp, int num_envp, char *buffer, int buffer_size); void (*release)(struct class_device *dev); @@ -209,9 +209,9 @@ extern int class_device_create_file(struct class_device *, * set, this will be called instead of the class specific release function. * Only use this if you want to override the default release function, like * when you are nesting class_device structures. - * @hotplug: pointer to a hotplug function for this struct class_device. If - * set, this will be called instead of the class specific hotplug function. - * Only use this if you want to override the default hotplug function, like + * @uevent: pointer to a uevent function for this struct class_device. If + * set, this will be called instead of the class specific uevent function. + * Only use this if you want to override the default uevent function, like * when you are nesting class_device structures. */ struct class_device { @@ -227,7 +227,7 @@ struct class_device { struct class_device *parent; /* parent of this child device, if there is one */ void (*release)(struct class_device *dev); - int (*hotplug)(struct class_device *dev, char **envp, + int (*uevent)(struct class_device *dev, char **envp, int num_envp, char *buffer, int buffer_size); char class_id[BUS_ID_SIZE]; /* unique to this class */ }; diff --git a/include/linux/firmware.h b/include/linux/firmware.h index 2063c0839d4f..2d716080be4a 100644 --- a/include/linux/firmware.h +++ b/include/linux/firmware.h @@ -14,7 +14,7 @@ struct device; int request_firmware(const struct firmware **fw, const char *name, struct device *device); int request_firmware_nowait( - struct module *module, int hotplug, + struct module *module, int uevent, const char *name, struct device *device, void *context, void (*cont)(const struct firmware *fw, void *context)); diff --git a/include/linux/kobject.h b/include/linux/kobject.h index 5b08248fba72..8eb21f2f25e1 100644 --- a/include/linux/kobject.h +++ b/include/linux/kobject.h @@ -26,15 +26,14 @@ #include #include -#define KOBJ_NAME_LEN 20 - -#define HOTPLUG_PATH_LEN 256 +#define KOBJ_NAME_LEN 20 +#define UEVENT_HELPER_PATH_LEN 256 /* path to the userspace helper executed on an event */ -extern char hotplug_path[]; +extern char uevent_helper[]; -/* counter to tag the hotplug event, read only except for the kobject core */ -extern u64 hotplug_seqnum; +/* counter to tag the uevent, read only except for the kobject core */ +extern u64 uevent_seqnum; /* the actions here must match the proper string in lib/kobject_uevent.c */ typedef int __bitwise kobject_action_t; @@ -101,15 +100,14 @@ struct kobj_type { * of object; multiple ksets can belong to one subsystem. All * ksets of a subsystem share the subsystem's lock. * - * Each kset can support hotplugging; if it does, it will be given - * the opportunity to filter out specific kobjects from being - * reported, as well as to add its own "data" elements to the - * environment being passed to the hotplug helper. + * Each kset can support specific event variables; it can + * supress the event generation or add subsystem specific + * variables carried with the event. */ -struct kset_hotplug_ops { +struct kset_uevent_ops { int (*filter)(struct kset *kset, struct kobject *kobj); const char *(*name)(struct kset *kset, struct kobject *kobj); - int (*hotplug)(struct kset *kset, struct kobject *kobj, char **envp, + int (*uevent)(struct kset *kset, struct kobject *kobj, char **envp, int num_envp, char *buffer, int buffer_size); }; @@ -119,7 +117,7 @@ struct kset { struct list_head list; spinlock_t list_lock; struct kobject kobj; - struct kset_hotplug_ops * hotplug_ops; + struct kset_uevent_ops * uevent_ops; }; @@ -167,20 +165,20 @@ struct subsystem { struct rw_semaphore rwsem; }; -#define decl_subsys(_name,_type,_hotplug_ops) \ +#define decl_subsys(_name,_type,_uevent_ops) \ struct subsystem _name##_subsys = { \ .kset = { \ .kobj = { .name = __stringify(_name) }, \ .ktype = _type, \ - .hotplug_ops =_hotplug_ops, \ + .uevent_ops =_uevent_ops, \ } \ } -#define decl_subsys_name(_varname,_name,_type,_hotplug_ops) \ +#define decl_subsys_name(_varname,_name,_type,_uevent_ops) \ struct subsystem _varname##_subsys = { \ .kset = { \ .kobj = { .name = __stringify(_name) }, \ .ktype = _type, \ - .hotplug_ops =_hotplug_ops, \ + .uevent_ops =_uevent_ops, \ } \ } @@ -256,16 +254,16 @@ extern int subsys_create_file(struct subsystem * , struct subsys_attribute *); extern void subsys_remove_file(struct subsystem * , struct subsys_attribute *); #ifdef CONFIG_HOTPLUG -void kobject_hotplug(struct kobject *kobj, enum kobject_action action); +void kobject_uevent(struct kobject *kobj, enum kobject_action action); -int add_hotplug_env_var(char **envp, int num_envp, int *cur_index, +int add_uevent_var(char **envp, int num_envp, int *cur_index, char *buffer, int buffer_size, int *cur_len, const char *format, ...) __attribute__((format (printf, 7, 8))); #else -static inline void kobject_hotplug(struct kobject *kobj, enum kobject_action action) { } +static inline void kobject_uevent(struct kobject *kobj, enum kobject_action action) { } -static inline int add_hotplug_env_var(char **envp, int num_envp, int *cur_index, +static inline int add_uevent_var(char **envp, int num_envp, int *cur_index, char *buffer, int buffer_size, int *cur_len, const char *format, ...) { return 0; } diff --git a/include/linux/sysctl.h b/include/linux/sysctl.h index 4be34ef8c2f7..501564264518 100644 --- a/include/linux/sysctl.h +++ b/include/linux/sysctl.h @@ -124,7 +124,7 @@ enum KERN_OVERFLOWUID=46, /* int: overflow UID */ KERN_OVERFLOWGID=47, /* int: overflow GID */ KERN_SHMPATH=48, /* string: path to shm fs */ - KERN_HOTPLUG=49, /* string: path to hotplug policy agent */ + KERN_HOTPLUG=49, /* string: path to uevent helper (deprecated) */ KERN_IEEE_EMULATION_WARNINGS=50, /* int: unimplemented ieee instructions */ KERN_S390_USER_DEBUG_LOGGING=51, /* int: dumps of user faults */ KERN_CORE_USES_PID=52, /* int: use core or core.%pid */ diff --git a/include/linux/usb.h b/include/linux/usb.h index d81b050e5955..7a20997e8071 100644 --- a/include/linux/usb.h +++ b/include/linux/usb.h @@ -225,7 +225,7 @@ struct usb_interface_cache { * Device drivers should not attempt to activate configurations. The choice * of which configuration to install is a policy decision based on such * considerations as available power, functionality provided, and the user's - * desires (expressed through hotplug scripts). However, drivers can call + * desires (expressed through userspace tools). However, drivers can call * usb_reset_configuration() to reinitialize the current configuration and * all its interfaces. */ diff --git a/kernel/ksysfs.c b/kernel/ksysfs.c index e975a76a9d5b..bfb4a7a54e22 100644 --- a/kernel/ksysfs.c +++ b/kernel/ksysfs.c @@ -26,23 +26,23 @@ static struct subsys_attribute _name##_attr = \ /* current uevent sequence number */ static ssize_t uevent_seqnum_show(struct subsystem *subsys, char *page) { - return sprintf(page, "%llu\n", (unsigned long long)hotplug_seqnum); + return sprintf(page, "%llu\n", (unsigned long long)uevent_seqnum); } KERNEL_ATTR_RO(uevent_seqnum); /* uevent helper program, used during early boo */ static ssize_t uevent_helper_show(struct subsystem *subsys, char *page) { - return sprintf(page, "%s\n", hotplug_path); + return sprintf(page, "%s\n", uevent_helper); } static ssize_t uevent_helper_store(struct subsystem *subsys, const char *page, size_t count) { - if (count+1 > HOTPLUG_PATH_LEN) + if (count+1 > UEVENT_HELPER_PATH_LEN) return -ENOENT; - memcpy(hotplug_path, page, count); - hotplug_path[count] = '\0'; - if (count && hotplug_path[count-1] == '\n') - hotplug_path[count-1] = '\0'; + memcpy(uevent_helper, page, count); + uevent_helper[count] = '\0'; + if (count && uevent_helper[count-1] == '\n') + uevent_helper[count-1] = '\0'; return count; } KERNEL_ATTR_RW(uevent_helper); diff --git a/kernel/sysctl.c b/kernel/sysctl.c index 6a51e25d4466..345f4a1d533f 100644 --- a/kernel/sysctl.c +++ b/kernel/sysctl.c @@ -395,8 +395,8 @@ static ctl_table kern_table[] = { { .ctl_name = KERN_HOTPLUG, .procname = "hotplug", - .data = &hotplug_path, - .maxlen = HOTPLUG_PATH_LEN, + .data = &uevent_helper, + .maxlen = UEVENT_HELPER_PATH_LEN, .mode = 0644, .proc_handler = &proc_dostring, .strategy = &sysctl_string, diff --git a/lib/kobject.c b/lib/kobject.c index a181abed89f6..7a0e6809490d 100644 --- a/lib/kobject.c +++ b/lib/kobject.c @@ -207,7 +207,7 @@ int kobject_register(struct kobject * kobj) kobject_name(kobj),error); dump_stack(); } else - kobject_hotplug(kobj, KOBJ_ADD); + kobject_uevent(kobj, KOBJ_ADD); } else error = -EINVAL; return error; @@ -312,7 +312,7 @@ void kobject_del(struct kobject * kobj) void kobject_unregister(struct kobject * kobj) { pr_debug("kobject %s: unregistering\n",kobject_name(kobj)); - kobject_hotplug(kobj, KOBJ_REMOVE); + kobject_uevent(kobj, KOBJ_REMOVE); kobject_del(kobj); kobject_put(kobj); } diff --git a/lib/kobject_uevent.c b/lib/kobject_uevent.c index dd061da3aba9..01479e5c6d18 100644 --- a/lib/kobject_uevent.c +++ b/lib/kobject_uevent.c @@ -22,12 +22,12 @@ #include #include -#define BUFFER_SIZE 1024 /* buffer for the hotplug env */ +#define BUFFER_SIZE 1024 /* buffer for the variables */ #define NUM_ENVP 32 /* number of env pointers */ #if defined(CONFIG_HOTPLUG) -char hotplug_path[HOTPLUG_PATH_LEN] = "/sbin/hotplug"; -u64 hotplug_seqnum; +char uevent_helper[UEVENT_HELPER_PATH_LEN] = "/sbin/hotplug"; +u64 uevent_seqnum; static DEFINE_SPINLOCK(sequence_lock); static struct sock *uevent_sock; @@ -50,12 +50,12 @@ static char *action_to_string(enum kobject_action action) } /** - * kobject_hotplug - notify userspace by executing /sbin/hotplug + * kobject_uevent - notify userspace by ending an uevent * - * @action: action that is happening (usually "ADD" or "REMOVE") + * @action: action that is happening (usually KOBJ_ADD and KOBJ_REMOVE) * @kobj: struct kobject that the action is happening to */ -void kobject_hotplug(struct kobject *kobj, enum kobject_action action) +void kobject_uevent(struct kobject *kobj, enum kobject_action action) { char **envp; char *buffer; @@ -65,7 +65,7 @@ void kobject_hotplug(struct kobject *kobj, enum kobject_action action) const char *subsystem; struct kobject *top_kobj; struct kset *kset; - struct kset_hotplug_ops *hotplug_ops; + struct kset_uevent_ops *uevent_ops; u64 seq; char *seq_buff; int i = 0; @@ -88,11 +88,11 @@ void kobject_hotplug(struct kobject *kobj, enum kobject_action action) return; kset = top_kobj->kset; - hotplug_ops = kset->hotplug_ops; + uevent_ops = kset->uevent_ops; /* skip the event, if the filter returns zero. */ - if (hotplug_ops && hotplug_ops->filter) - if (!hotplug_ops->filter(kset, kobj)) + if (uevent_ops && uevent_ops->filter) + if (!uevent_ops->filter(kset, kobj)) return; /* environment index */ @@ -111,8 +111,8 @@ void kobject_hotplug(struct kobject *kobj, enum kobject_action action) goto exit; /* originating subsystem */ - if (hotplug_ops && hotplug_ops->name) - subsystem = hotplug_ops->name(kset, kobj); + if (uevent_ops && uevent_ops->name) + subsystem = uevent_ops->name(kset, kobj); else subsystem = kobject_name(&kset->kobj); @@ -134,12 +134,12 @@ void kobject_hotplug(struct kobject *kobj, enum kobject_action action) scratch += strlen("SEQNUM=18446744073709551616") + 1; /* let the kset specific function add its stuff */ - if (hotplug_ops && hotplug_ops->hotplug) { - retval = hotplug_ops->hotplug (kset, kobj, + if (uevent_ops && uevent_ops->uevent) { + retval = uevent_ops->uevent(kset, kobj, &envp[i], NUM_ENVP - i, scratch, BUFFER_SIZE - (scratch - buffer)); if (retval) { - pr_debug ("%s - hotplug() returned %d\n", + pr_debug ("%s - uevent() returned %d\n", __FUNCTION__, retval); goto exit; } @@ -147,7 +147,7 @@ void kobject_hotplug(struct kobject *kobj, enum kobject_action action) /* we will send an event, request a new sequence number */ spin_lock(&sequence_lock); - seq = ++hotplug_seqnum; + seq = ++uevent_seqnum; spin_unlock(&sequence_lock); sprintf(seq_buff, "SEQNUM=%llu", (unsigned long long)seq); @@ -177,10 +177,10 @@ void kobject_hotplug(struct kobject *kobj, enum kobject_action action) } /* call uevent_helper, usually only enabled during early boot */ - if (hotplug_path[0]) { + if (uevent_helper[0]) { char *argv [3]; - argv [0] = hotplug_path; + argv [0] = uevent_helper; argv [1] = (char *)subsystem; argv [2] = NULL; call_usermodehelper (argv[0], argv, envp, 0); @@ -192,39 +192,39 @@ exit: kfree(envp); return; } -EXPORT_SYMBOL(kobject_hotplug); +EXPORT_SYMBOL_GPL(kobject_uevent); /** - * add_hotplug_env_var - helper for creating hotplug environment variables + * add_uevent_var - helper for creating event variables * @envp: Pointer to table of environment variables, as passed into - * hotplug() method. + * uevent() method. * @num_envp: Number of environment variable slots available, as - * passed into hotplug() method. + * passed into uevent() method. * @cur_index: Pointer to current index into @envp. It should be - * initialized to 0 before the first call to add_hotplug_env_var(), + * initialized to 0 before the first call to add_uevent_var(), * and will be incremented on success. * @buffer: Pointer to buffer for environment variables, as passed - * into hotplug() method. - * @buffer_size: Length of @buffer, as passed into hotplug() method. + * into uevent() method. + * @buffer_size: Length of @buffer, as passed into uevent() method. * @cur_len: Pointer to current length of space used in @buffer. * Should be initialized to 0 before the first call to - * add_hotplug_env_var(), and will be incremented on success. + * add_uevent_var(), and will be incremented on success. * @format: Format for creating environment variable (of the form * "XXX=%x") for snprintf(). * * Returns 0 if environment variable was added successfully or -ENOMEM * if no space was available. */ -int add_hotplug_env_var(char **envp, int num_envp, int *cur_index, - char *buffer, int buffer_size, int *cur_len, - const char *format, ...) +int add_uevent_var(char **envp, int num_envp, int *cur_index, + char *buffer, int buffer_size, int *cur_len, + const char *format, ...) { va_list args; /* * We check against num_envp - 1 to make sure there is at - * least one slot left after we return, since the hotplug - * method needs to set the last slot to NULL. + * least one slot left after we return, since kobject_uevent() + * needs to set the last slot to NULL. */ if (*cur_index >= num_envp - 1) return -ENOMEM; @@ -243,7 +243,7 @@ int add_hotplug_env_var(char **envp, int num_envp, int *cur_index, (*cur_index)++; return 0; } -EXPORT_SYMBOL(add_hotplug_env_var); +EXPORT_SYMBOL_GPL(add_uevent_var); static int __init kobject_uevent_init(void) { diff --git a/net/bluetooth/hci_sysfs.c b/net/bluetooth/hci_sysfs.c index bd7568ac87fc..0ed38740388c 100644 --- a/net/bluetooth/hci_sysfs.c +++ b/net/bluetooth/hci_sysfs.c @@ -78,7 +78,7 @@ static struct class_device_attribute *bt_attrs[] = { }; #ifdef CONFIG_HOTPLUG -static int bt_hotplug(struct class_device *cdev, char **envp, int num_envp, char *buf, int size) +static int bt_uevent(struct class_device *cdev, char **envp, int num_envp, char *buf, int size) { struct hci_dev *hdev = class_get_devdata(cdev); int n, i = 0; @@ -107,7 +107,7 @@ struct class bt_class = { .name = "bluetooth", .release = bt_release, #ifdef CONFIG_HOTPLUG - .hotplug = bt_hotplug, + .uevent = bt_uevent, #endif }; diff --git a/net/bridge/br_sysfs_if.c b/net/bridge/br_sysfs_if.c index f6a19d53eaeb..2ebdc23bbe26 100644 --- a/net/bridge/br_sysfs_if.c +++ b/net/bridge/br_sysfs_if.c @@ -248,7 +248,7 @@ int br_sysfs_addif(struct net_bridge_port *p) if (err) goto out2; - kobject_hotplug(&p->kobj, KOBJ_ADD); + kobject_uevent(&p->kobj, KOBJ_ADD); return 0; out2: kobject_del(&p->kobj); @@ -260,7 +260,7 @@ void br_sysfs_removeif(struct net_bridge_port *p) { pr_debug("br_sysfs_removeif\n"); sysfs_remove_link(&p->br->ifobj, p->dev->name); - kobject_hotplug(&p->kobj, KOBJ_REMOVE); + kobject_uevent(&p->kobj, KOBJ_REMOVE); kobject_del(&p->kobj); } diff --git a/net/core/net-sysfs.c b/net/core/net-sysfs.c index e2137f3e489d..198655dd9a77 100644 --- a/net/core/net-sysfs.c +++ b/net/core/net-sysfs.c @@ -369,14 +369,14 @@ static struct attribute_group wireless_group = { #endif #ifdef CONFIG_HOTPLUG -static int netdev_hotplug(struct class_device *cd, char **envp, - int num_envp, char *buf, int size) +static int netdev_uevent(struct class_device *cd, char **envp, + int num_envp, char *buf, int size) { struct net_device *dev = to_net_dev(cd); int i = 0; int n; - /* pass interface in env to hotplug. */ + /* pass interface to uevent. */ envp[i++] = buf; n = snprintf(buf, size, "INTERFACE=%s", dev->name) + 1; buf += n; @@ -408,7 +408,7 @@ static struct class net_class = { .name = "net", .release = netdev_release, #ifdef CONFIG_HOTPLUG - .hotplug = netdev_hotplug, + .uevent = netdev_uevent, #endif }; -- cgit v1.2.3-71-gd317 From f743ca5e10f4145e0b3e6d11b9b46171e16af7ce Mon Sep 17 00:00:00 2001 From: "akpm@osdl.org" Date: Tue, 22 Nov 2005 23:36:13 -0800 Subject: [PATCH] kobject_uevent CONFIG_NET=n fix lib/lib.a(kobject_uevent.o)(.text+0x25f): In function `kobject_uevent': : undefined reference to `__alloc_skb' lib/lib.a(kobject_uevent.o)(.text+0x2a1): In function `kobject_uevent': : undefined reference to `skb_over_panic' lib/lib.a(kobject_uevent.o)(.text+0x31d): In function `kobject_uevent': : undefined reference to `skb_over_panic' lib/lib.a(kobject_uevent.o)(.text+0x356): In function `kobject_uevent': : undefined reference to `netlink_broadcast' lib/lib.a(kobject_uevent.o)(.init.text+0x9): In function `kobject_uevent_init': : undefined reference to `netlink_kernel_create' make: *** [.tmp_vmlinux1] Error 1 Netlink is unconditionally enabled if CONFIG_NET, so that's OK. kobject_uevent.o is compiled even if !CONFIG_HOTPLUG, which is lazy. Let's compound the sin. Signed-off-by: Andrew Morton Signed-off-by: Greg Kroah-Hartman --- include/linux/kobject.h | 2 +- kernel/ksysfs.c | 3 +++ lib/kobject_uevent.c | 4 +--- 3 files changed, 5 insertions(+), 4 deletions(-) (limited to 'include/linux') diff --git a/include/linux/kobject.h b/include/linux/kobject.h index 8eb21f2f25e1..2a8d8da70961 100644 --- a/include/linux/kobject.h +++ b/include/linux/kobject.h @@ -253,7 +253,7 @@ struct subsys_attribute { extern int subsys_create_file(struct subsystem * , struct subsys_attribute *); extern void subsys_remove_file(struct subsystem * , struct subsys_attribute *); -#ifdef CONFIG_HOTPLUG +#if defined(CONFIG_HOTPLUG) & defined(CONFIG_NET) void kobject_uevent(struct kobject *kobj, enum kobject_action action); int add_uevent_var(char **envp, int num_envp, int *cur_index, diff --git a/kernel/ksysfs.c b/kernel/ksysfs.c index bfb4a7a54e22..99af8b05eeaa 100644 --- a/kernel/ksysfs.c +++ b/kernel/ksysfs.c @@ -15,6 +15,9 @@ #include #include +u64 uevent_seqnum; +char uevent_helper[UEVENT_HELPER_PATH_LEN] = "/sbin/hotplug"; + #define KERNEL_ATTR_RO(_name) \ static struct subsys_attribute _name##_attr = __ATTR_RO(_name) diff --git a/lib/kobject_uevent.c b/lib/kobject_uevent.c index 01479e5c6d18..f56e27ae9d52 100644 --- a/lib/kobject_uevent.c +++ b/lib/kobject_uevent.c @@ -25,9 +25,7 @@ #define BUFFER_SIZE 1024 /* buffer for the variables */ #define NUM_ENVP 32 /* number of env pointers */ -#if defined(CONFIG_HOTPLUG) -char uevent_helper[UEVENT_HELPER_PATH_LEN] = "/sbin/hotplug"; -u64 uevent_seqnum; +#if defined(CONFIG_HOTPLUG) && defined(CONFIG_NET) static DEFINE_SPINLOCK(sequence_lock); static struct sock *uevent_sock; -- cgit v1.2.3-71-gd317 From 1d8f430c15b3a345db990e285742c67c2f52f9a6 Mon Sep 17 00:00:00 2001 From: Rusty Russell Date: Wed, 7 Dec 2005 21:40:34 +0100 Subject: [PATCH] Input: add modalias support Here's the patch for modalias support for input classes. It uses comma-separated numbers, and doesn't describe all the potential keys (no module currently cares, and that would make the strings huge). The changes to input.h are to move the definitions needed by file2alias outside __KERNEL__. I chose not to move those definitions to mod_devicetable.h, because there are so many that it might break compile of something else in the kernel. The rest is fairly straightforward. Signed-off-by: Rusty Russell CC: Kay Sievers Signed-off-by: Greg Kroah-Hartman --- drivers/input/input.c | 39 ++++++++++++++++++++++++ include/linux/input.h | 79 +++++++++++++++++++++++++----------------------- scripts/mod/file2alias.c | 62 ++++++++++++++++++++++++++++++++++++- 3 files changed, 141 insertions(+), 39 deletions(-) (limited to 'include/linux') diff --git a/drivers/input/input.c b/drivers/input/input.c index 2d37b394e384..ef5824c8846b 100644 --- a/drivers/input/input.c +++ b/drivers/input/input.c @@ -528,10 +528,49 @@ INPUT_DEV_STRING_ATTR_SHOW(name); INPUT_DEV_STRING_ATTR_SHOW(phys); INPUT_DEV_STRING_ATTR_SHOW(uniq); +static int print_modalias_bits(char *buf, char prefix, unsigned long *arr, + unsigned int min, unsigned int max) +{ + int len, i; + + len = sprintf(buf, "%c", prefix); + for (i = min; i < max; i++) + if (arr[LONG(i)] & BIT(i)) + len += sprintf(buf+len, "%X,", i); + return len; +} + +static ssize_t input_dev_show_modalias(struct class_device *dev, char *buf) +{ + struct input_dev *id = to_input_dev(dev); + ssize_t len = 0; + + len += sprintf(buf+len, "input:b%04Xv%04Xp%04Xe%04X-", + id->id.bustype, + id->id.vendor, + id->id.product, + id->id.version); + + len += print_modalias_bits(buf+len, 'e', id->evbit, 0, EV_MAX); + len += print_modalias_bits(buf+len, 'k', id->keybit, + KEY_MIN_INTERESTING, KEY_MAX); + len += print_modalias_bits(buf+len, 'r', id->relbit, 0, REL_MAX); + len += print_modalias_bits(buf+len, 'a', id->absbit, 0, ABS_MAX); + len += print_modalias_bits(buf+len, 'm', id->mscbit, 0, MSC_MAX); + len += print_modalias_bits(buf+len, 'l', id->ledbit, 0, LED_MAX); + len += print_modalias_bits(buf+len, 's', id->sndbit, 0, SND_MAX); + len += print_modalias_bits(buf+len, 'f', id->ffbit, 0, FF_MAX); + len += print_modalias_bits(buf+len, 'w', id->swbit, 0, SW_MAX); + len += sprintf(buf+len, "\n"); + return len; +} +static CLASS_DEVICE_ATTR(modalias, S_IRUGO, input_dev_show_modalias, NULL); + static struct attribute *input_dev_attrs[] = { &class_device_attr_name.attr, &class_device_attr_phys.attr, &class_device_attr_uniq.attr, + &class_device_attr_modalias.attr, NULL }; diff --git a/include/linux/input.h b/include/linux/input.h index 3c5823368ddb..bef08551a33b 100644 --- a/include/linux/input.h +++ b/include/linux/input.h @@ -18,6 +18,7 @@ #include #include #endif +#include /* * The event structure itself @@ -511,6 +512,8 @@ struct input_absinfo { #define KEY_FN_S 0x1e3 #define KEY_FN_B 0x1e4 +/* We avoid low common keys in module aliases so they don't get huge. */ +#define KEY_MIN_INTERESTING KEY_MUTE #define KEY_MAX 0x1ff /* @@ -793,6 +796,44 @@ struct ff_effect { #define FF_MAX 0x7f +struct input_device_id { + + kernel_ulong_t flags; + + struct input_id id; + + kernel_ulong_t evbit[EV_MAX/BITS_PER_LONG+1]; + kernel_ulong_t keybit[KEY_MAX/BITS_PER_LONG+1]; + kernel_ulong_t relbit[REL_MAX/BITS_PER_LONG+1]; + kernel_ulong_t absbit[ABS_MAX/BITS_PER_LONG+1]; + kernel_ulong_t mscbit[MSC_MAX/BITS_PER_LONG+1]; + kernel_ulong_t ledbit[LED_MAX/BITS_PER_LONG+1]; + kernel_ulong_t sndbit[SND_MAX/BITS_PER_LONG+1]; + kernel_ulong_t ffbit[FF_MAX/BITS_PER_LONG+1]; + kernel_ulong_t swbit[SW_MAX/BITS_PER_LONG+1]; + + kernel_ulong_t driver_info; +}; + +/* + * Structure for hotplug & device<->driver matching. + */ + +#define INPUT_DEVICE_ID_MATCH_BUS 1 +#define INPUT_DEVICE_ID_MATCH_VENDOR 2 +#define INPUT_DEVICE_ID_MATCH_PRODUCT 4 +#define INPUT_DEVICE_ID_MATCH_VERSION 8 + +#define INPUT_DEVICE_ID_MATCH_EVBIT 0x010 +#define INPUT_DEVICE_ID_MATCH_KEYBIT 0x020 +#define INPUT_DEVICE_ID_MATCH_RELBIT 0x040 +#define INPUT_DEVICE_ID_MATCH_ABSBIT 0x080 +#define INPUT_DEVICE_ID_MATCH_MSCIT 0x100 +#define INPUT_DEVICE_ID_MATCH_LEDBIT 0x200 +#define INPUT_DEVICE_ID_MATCH_SNDBIT 0x400 +#define INPUT_DEVICE_ID_MATCH_FFBIT 0x800 +#define INPUT_DEVICE_ID_MATCH_SWBIT 0x1000 + #ifdef __KERNEL__ /* @@ -901,49 +942,11 @@ struct input_dev { }; #define to_input_dev(d) container_of(d, struct input_dev, cdev) -/* - * Structure for hotplug & device<->driver matching. - */ - -#define INPUT_DEVICE_ID_MATCH_BUS 1 -#define INPUT_DEVICE_ID_MATCH_VENDOR 2 -#define INPUT_DEVICE_ID_MATCH_PRODUCT 4 -#define INPUT_DEVICE_ID_MATCH_VERSION 8 - -#define INPUT_DEVICE_ID_MATCH_EVBIT 0x010 -#define INPUT_DEVICE_ID_MATCH_KEYBIT 0x020 -#define INPUT_DEVICE_ID_MATCH_RELBIT 0x040 -#define INPUT_DEVICE_ID_MATCH_ABSBIT 0x080 -#define INPUT_DEVICE_ID_MATCH_MSCIT 0x100 -#define INPUT_DEVICE_ID_MATCH_LEDBIT 0x200 -#define INPUT_DEVICE_ID_MATCH_SNDBIT 0x400 -#define INPUT_DEVICE_ID_MATCH_FFBIT 0x800 -#define INPUT_DEVICE_ID_MATCH_SWBIT 0x1000 - #define INPUT_DEVICE_ID_MATCH_DEVICE\ (INPUT_DEVICE_ID_MATCH_BUS | INPUT_DEVICE_ID_MATCH_VENDOR | INPUT_DEVICE_ID_MATCH_PRODUCT) #define INPUT_DEVICE_ID_MATCH_DEVICE_AND_VERSION\ (INPUT_DEVICE_ID_MATCH_DEVICE | INPUT_DEVICE_ID_MATCH_VERSION) -struct input_device_id { - - unsigned long flags; - - struct input_id id; - - unsigned long evbit[NBITS(EV_MAX)]; - unsigned long keybit[NBITS(KEY_MAX)]; - unsigned long relbit[NBITS(REL_MAX)]; - unsigned long absbit[NBITS(ABS_MAX)]; - unsigned long mscbit[NBITS(MSC_MAX)]; - unsigned long ledbit[NBITS(LED_MAX)]; - unsigned long sndbit[NBITS(SND_MAX)]; - unsigned long ffbit[NBITS(FF_MAX)]; - unsigned long swbit[NBITS(SW_MAX)]; - - unsigned long driver_info; -}; - struct input_handle; struct input_handler { diff --git a/scripts/mod/file2alias.c b/scripts/mod/file2alias.c index e3d144a3f10b..e0eedffe565b 100644 --- a/scripts/mod/file2alias.c +++ b/scripts/mod/file2alias.c @@ -16,8 +16,10 @@ * use either stdint.h or inttypes.h for the rest. */ #if KERNEL_ELFCLASS == ELFCLASS32 typedef Elf32_Addr kernel_ulong_t; +#define BITS_PER_LONG 32 #else typedef Elf64_Addr kernel_ulong_t; +#define BITS_PER_LONG 64 #endif #ifdef __sun__ #include @@ -35,6 +37,7 @@ typedef unsigned char __u8; * even potentially has different endianness and word sizes, since * we handle those differences explicitly below */ #include "../../include/linux/mod_devicetable.h" +#include "../../include/linux/input.h" #define ADD(str, sep, cond, field) \ do { \ @@ -366,6 +369,61 @@ static int do_i2c_entry(const char *filename, struct i2c_device_id *i2c, char *a return 1; } +#define ARRAY_SIZE(x) (sizeof(x) / sizeof((x)[0])) + +static void do_input(char *alias, + kernel_ulong_t *arr, unsigned int min, unsigned int max) +{ + unsigned int i; + for (i = min; i < max; i++) { + if (arr[i/BITS_PER_LONG] & (1 << (i%BITS_PER_LONG))) + sprintf(alias+strlen(alias), "%X,*", i); + } +} + +/* input:b0v0p0e0-eXkXrXaXmXlXsXfXwX where X is comma-separated %02X. */ +static int do_input_entry(const char *filename, struct input_device_id *id, + char *alias) +{ + sprintf(alias, "input:"); + + ADD(alias, "b", id->flags&INPUT_DEVICE_ID_MATCH_BUS, id->id.bustype); + ADD(alias, "v", id->flags&INPUT_DEVICE_ID_MATCH_VENDOR, id->id.vendor); + ADD(alias, "p", id->flags&INPUT_DEVICE_ID_MATCH_PRODUCT, + id->id.product); + ADD(alias, "e", id->flags&INPUT_DEVICE_ID_MATCH_VERSION, + id->id.version); + + sprintf(alias + strlen(alias), "-e*"); + if (id->flags&INPUT_DEVICE_ID_MATCH_EVBIT) + do_input(alias, id->evbit, 0, EV_MAX); + sprintf(alias + strlen(alias), "k*"); + if (id->flags&INPUT_DEVICE_ID_MATCH_KEYBIT) + do_input(alias, id->keybit, KEY_MIN_INTERESTING, KEY_MAX); + sprintf(alias + strlen(alias), "r*"); + if (id->flags&INPUT_DEVICE_ID_MATCH_RELBIT) + do_input(alias, id->relbit, 0, REL_MAX); + sprintf(alias + strlen(alias), "a*"); + if (id->flags&INPUT_DEVICE_ID_MATCH_ABSBIT) + do_input(alias, id->absbit, 0, ABS_MAX); + sprintf(alias + strlen(alias), "m*"); + if (id->flags&INPUT_DEVICE_ID_MATCH_MSCIT) + do_input(alias, id->mscbit, 0, MSC_MAX); + sprintf(alias + strlen(alias), "l*"); + if (id->flags&INPUT_DEVICE_ID_MATCH_LEDBIT) + do_input(alias, id->ledbit, 0, LED_MAX); + sprintf(alias + strlen(alias), "s*"); + if (id->flags&INPUT_DEVICE_ID_MATCH_SNDBIT) + do_input(alias, id->sndbit, 0, SND_MAX); + sprintf(alias + strlen(alias), "f*"); + if (id->flags&INPUT_DEVICE_ID_MATCH_FFBIT) + do_input(alias, id->ffbit, 0, SND_MAX); + sprintf(alias + strlen(alias), "w*"); + if (id->flags&INPUT_DEVICE_ID_MATCH_SWBIT) + do_input(alias, id->swbit, 0, SW_MAX); + return 1; +} + /* Ignore any prefix, eg. v850 prepends _ */ static inline int sym_is(const char *symbol, const char *name) { @@ -453,7 +511,9 @@ void handle_moddevtable(struct module *mod, struct elf_info *info, else if (sym_is(symname, "__mod_i2c_device_table")) do_table(symval, sym->st_size, sizeof(struct i2c_device_id), do_i2c_entry, mod); - + else if (sym_is(symname, "__mod_input_device_table")) + do_table(symval, sym->st_size, sizeof(struct input_device_id), + do_input_entry, mod); } /* Now add out buffered information to the generated C source */ -- cgit v1.2.3-71-gd317 From e39b84337b8aed3044683a57741a19e5002225b9 Mon Sep 17 00:00:00 2001 From: Rusty Russell Date: Sat, 10 Dec 2005 22:48:20 +1100 Subject: [PATCH] Input: fix add modalias support build error Fix build when scripts/mod/file2alias.c includes linux/input.h, which tries to include /usr/include/linux/mod_devicetable.h: In file included from scripts/mod/file2alias.c:40: include/linux/input.h:21:35: linux/mod_devicetable.h: No such file or directory make[2]: *** [scripts/mod/file2alias.o] Error 1 Signed-off-by: Rusty Russell Signed-off-by: Greg Kroah-Hartman --- include/linux/input.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/input.h b/include/linux/input.h index bef08551a33b..6d4cc3c110d6 100644 --- a/include/linux/input.h +++ b/include/linux/input.h @@ -13,12 +13,12 @@ #include #include #include +#include #else #include #include #include #endif -#include /* * The event structure itself -- cgit v1.2.3-71-gd317 From 93ce3061be212f6280e7ccafa9a7f698a95c6d75 Mon Sep 17 00:00:00 2001 From: Dmitry Torokhov Date: Sat, 10 Dec 2005 01:36:27 -0500 Subject: [PATCH] Driver Core: Add platform_device_del() Driver core: add platform_device_del function Having platform_device_del90 allows more straightforward error handling code in drivers registering platform devices. Signed-off-by: Dmitry Torokhov Signed-off-by: Greg Kroah-Hartman --- drivers/base/platform.c | 45 +++++++++++++++++++++++++++-------------- include/linux/platform_device.h | 1 + 2 files changed, 31 insertions(+), 15 deletions(-) (limited to 'include/linux') diff --git a/drivers/base/platform.c b/drivers/base/platform.c index 1091af1cbb58..95ecfc490d54 100644 --- a/drivers/base/platform.c +++ b/drivers/base/platform.c @@ -168,7 +168,7 @@ struct platform_device *platform_device_alloc(const char *name, unsigned int id) pa->pdev.dev.release = platform_device_release; } - return pa ? &pa->pdev : NULL; + return pa ? &pa->pdev : NULL; } EXPORT_SYMBOL_GPL(platform_device_alloc); @@ -282,24 +282,13 @@ int platform_device_add(struct platform_device *pdev) EXPORT_SYMBOL_GPL(platform_device_add); /** - * platform_device_register - add a platform-level device - * @pdev: platform device we're adding - * - */ -int platform_device_register(struct platform_device * pdev) -{ - device_initialize(&pdev->dev); - return platform_device_add(pdev); -} - -/** - * platform_device_unregister - remove a platform-level device + * platform_device_del - remove a platform-level device * @pdev: platform device we're removing * * Note that this function will also release all memory- and port-based * resources owned by the device (@dev->resource). */ -void platform_device_unregister(struct platform_device * pdev) +void platform_device_del(struct platform_device *pdev) { int i; @@ -310,9 +299,35 @@ void platform_device_unregister(struct platform_device * pdev) release_resource(r); } - device_unregister(&pdev->dev); + device_del(&pdev->dev); } } +EXPORT_SYMBOL_GPL(platform_device_del); + +/** + * platform_device_register - add a platform-level device + * @pdev: platform device we're adding + * + */ +int platform_device_register(struct platform_device * pdev) +{ + device_initialize(&pdev->dev); + return platform_device_add(pdev); +} + +/** + * platform_device_unregister - unregister a platform-level device + * @pdev: platform device we're unregistering + * + * Unregistration is done in 2 steps. Fisrt we release all resources + * and remove it from the sybsystem, then we drop reference count by + * calling platform_device_put(). + */ +void platform_device_unregister(struct platform_device * pdev) +{ + platform_device_del(pdev); + platform_device_put(pdev); +} /** * platform_device_register_simple diff --git a/include/linux/platform_device.h b/include/linux/platform_device.h index 17e336f40b47..782090c68932 100644 --- a/include/linux/platform_device.h +++ b/include/linux/platform_device.h @@ -41,6 +41,7 @@ extern struct platform_device *platform_device_alloc(const char *name, unsigned extern int platform_device_add_resources(struct platform_device *pdev, struct resource *res, unsigned int num); extern int platform_device_add_data(struct platform_device *pdev, void *data, size_t size); extern int platform_device_add(struct platform_device *pdev); +extern void platform_device_del(struct platform_device *pdev); extern void platform_device_put(struct platform_device *pdev); struct platform_driver { -- cgit v1.2.3-71-gd317 From c1d10adb4a521de5760112853f42aaeefcec96eb Mon Sep 17 00:00:00 2001 From: Pablo Neira Ayuso Date: Thu, 5 Jan 2006 12:19:05 -0800 Subject: [NETFILTER]: Add ctnetlink port for nf_conntrack Signed-off-by: Pablo Neira Ayuso Signed-off-by: Patrick McHardy Signed-off-by: David S. Miller --- include/linux/netfilter/nfnetlink_conntrack.h | 3 + include/net/netfilter/nf_conntrack.h | 31 + include/net/netfilter/nf_conntrack_helper.h | 2 + include/net/netfilter/nf_conntrack_l3proto.h | 15 +- include/net/netfilter/nf_conntrack_protocol.h | 26 +- net/ipv4/netfilter/nf_conntrack_l3proto_ipv4.c | 47 + net/ipv4/netfilter/nf_conntrack_proto_icmp.c | 97 +- net/ipv6/netfilter/nf_conntrack_l3proto_ipv6.c | 47 + net/ipv6/netfilter/nf_conntrack_proto_icmpv6.c | 77 +- net/netfilter/Kconfig | 7 + net/netfilter/Makefile | 3 + net/netfilter/nf_conntrack_core.c | 232 +++- net/netfilter/nf_conntrack_netlink.c | 1642 ++++++++++++++++++++++++ net/netfilter/nf_conntrack_proto_tcp.c | 71 + net/netfilter/nf_conntrack_proto_udp.c | 10 + net/netfilter/nf_conntrack_standalone.c | 42 +- 16 files changed, 2289 insertions(+), 63 deletions(-) create mode 100644 net/netfilter/nf_conntrack_netlink.c (limited to 'include/linux') diff --git a/include/linux/netfilter/nfnetlink_conntrack.h b/include/linux/netfilter/nfnetlink_conntrack.h index 116fcaced909..b8e9a5b6fb1e 100644 --- a/include/linux/netfilter/nfnetlink_conntrack.h +++ b/include/linux/netfilter/nfnetlink_conntrack.h @@ -64,6 +64,9 @@ enum ctattr_l4proto { CTA_PROTO_ICMP_ID, CTA_PROTO_ICMP_TYPE, CTA_PROTO_ICMP_CODE, + CTA_PROTO_ICMPV6_ID, + CTA_PROTO_ICMPV6_TYPE, + CTA_PROTO_ICMPV6_CODE, __CTA_PROTO_MAX }; #define CTA_PROTO_MAX (__CTA_PROTO_MAX - 1) diff --git a/include/net/netfilter/nf_conntrack.h b/include/net/netfilter/nf_conntrack.h index cc4825610795..64b82b74a650 100644 --- a/include/net/netfilter/nf_conntrack.h +++ b/include/net/netfilter/nf_conntrack.h @@ -94,6 +94,9 @@ struct nf_conn /* Current number of expected connections */ unsigned int expecting; + /* Unique ID that identifies this conntrack*/ + unsigned int id; + /* Helper. if any */ struct nf_conntrack_helper *helper; @@ -140,6 +143,9 @@ struct nf_conntrack_expect /* Usage count. */ atomic_t use; + /* Unique ID */ + unsigned int id; + /* Flags */ unsigned int flags; @@ -190,6 +196,31 @@ static inline void nf_ct_put(struct nf_conn *ct) nf_conntrack_put(&ct->ct_general); } +extern struct nf_conntrack_tuple_hash * +__nf_conntrack_find(const struct nf_conntrack_tuple *tuple, + const struct nf_conn *ignored_conntrack); + +extern void nf_conntrack_hash_insert(struct nf_conn *ct); + +extern struct nf_conntrack_expect * +__nf_conntrack_expect_find(const struct nf_conntrack_tuple *tuple); + +extern struct nf_conntrack_expect * +nf_conntrack_expect_find(const struct nf_conntrack_tuple *tuple); + +extern void nf_ct_unlink_expect(struct nf_conntrack_expect *exp); + +extern void nf_ct_remove_expectations(struct nf_conn *ct); + +extern void nf_conntrack_flush(void); + +extern struct nf_conntrack_helper * +nf_ct_helper_find_get( const struct nf_conntrack_tuple *tuple); +extern void nf_ct_helper_put(struct nf_conntrack_helper *helper); + +extern struct nf_conntrack_helper * +__nf_conntrack_helper_find_byname(const char *name); + /* call to create an explicit dependency on nf_conntrack. */ extern void need_nf_conntrack(void); diff --git a/include/net/netfilter/nf_conntrack_helper.h b/include/net/netfilter/nf_conntrack_helper.h index 5a66b2a3a623..86ec8174ad02 100644 --- a/include/net/netfilter/nf_conntrack_helper.h +++ b/include/net/netfilter/nf_conntrack_helper.h @@ -33,6 +33,8 @@ struct nf_conntrack_helper unsigned int protoff, struct nf_conn *ct, enum ip_conntrack_info conntrackinfo); + + int (*to_nfattr)(struct sk_buff *skb, const struct nf_conn *ct); }; extern int nf_conntrack_helper_register(struct nf_conntrack_helper *); diff --git a/include/net/netfilter/nf_conntrack_l3proto.h b/include/net/netfilter/nf_conntrack_l3proto.h index 01663e5b33df..67856eb93b43 100644 --- a/include/net/netfilter/nf_conntrack_l3proto.h +++ b/include/net/netfilter/nf_conntrack_l3proto.h @@ -14,6 +14,8 @@ #include #include +struct nfattr; + struct nf_conntrack_l3proto { /* Next pointer. */ @@ -70,6 +72,12 @@ struct nf_conntrack_l3proto u_int32_t (*get_features)(const struct nf_conntrack_tuple *tuple); + int (*tuple_to_nfattr)(struct sk_buff *skb, + const struct nf_conntrack_tuple *t); + + int (*nfattr_to_tuple)(struct nfattr *tb[], + struct nf_conntrack_tuple *t); + /* Module (if any) which this is connected to. */ struct module *me; }; @@ -81,11 +89,16 @@ extern int nf_conntrack_l3proto_register(struct nf_conntrack_l3proto *proto); extern void nf_conntrack_l3proto_unregister(struct nf_conntrack_l3proto *proto); static inline struct nf_conntrack_l3proto * -nf_ct_find_l3proto(u_int16_t l3proto) +__nf_ct_l3proto_find(u_int16_t l3proto) { return nf_ct_l3protos[l3proto]; } +extern struct nf_conntrack_l3proto * +nf_ct_l3proto_find_get(u_int16_t l3proto); + +extern void nf_ct_l3proto_put(struct nf_conntrack_l3proto *p); + /* Existing built-in protocols */ extern struct nf_conntrack_l3proto nf_conntrack_l3proto_ipv4; extern struct nf_conntrack_l3proto nf_conntrack_l3proto_ipv6; diff --git a/include/net/netfilter/nf_conntrack_protocol.h b/include/net/netfilter/nf_conntrack_protocol.h index b3afda35397a..1f33737fcea5 100644 --- a/include/net/netfilter/nf_conntrack_protocol.h +++ b/include/net/netfilter/nf_conntrack_protocol.h @@ -12,6 +12,7 @@ #include struct seq_file; +struct nfattr; struct nf_conntrack_protocol { @@ -66,6 +67,18 @@ struct nf_conntrack_protocol enum ip_conntrack_info *ctinfo, int pf, unsigned int hooknum); + /* convert protoinfo to nfnetink attributes */ + int (*to_nfattr)(struct sk_buff *skb, struct nfattr *nfa, + const struct nf_conn *ct); + + /* convert nfnetlink attributes to protoinfo */ + int (*from_nfattr)(struct nfattr *tb[], struct nf_conn *ct); + + int (*tuple_to_nfattr)(struct sk_buff *skb, + const struct nf_conntrack_tuple *t); + int (*nfattr_to_tuple)(struct nfattr *tb[], + struct nf_conntrack_tuple *t); + /* Module (if any) which this is connected to. */ struct module *me; }; @@ -80,12 +93,23 @@ extern struct nf_conntrack_protocol nf_conntrack_generic_protocol; extern struct nf_conntrack_protocol **nf_ct_protos[PF_MAX]; extern struct nf_conntrack_protocol * -nf_ct_find_proto(u_int16_t l3proto, u_int8_t protocol); +__nf_ct_proto_find(u_int16_t l3proto, u_int8_t protocol); + +extern struct nf_conntrack_protocol * +nf_ct_proto_find_get(u_int16_t l3proto, u_int8_t protocol); + +extern void nf_ct_proto_put(struct nf_conntrack_protocol *p); /* Protocol registration. */ extern int nf_conntrack_protocol_register(struct nf_conntrack_protocol *proto); extern void nf_conntrack_protocol_unregister(struct nf_conntrack_protocol *proto); +/* Generic netlink helpers */ +extern int nf_ct_port_tuple_to_nfattr(struct sk_buff *skb, + const struct nf_conntrack_tuple *tuple); +extern int nf_ct_port_nfattr_to_tuple(struct nfattr *tb[], + struct nf_conntrack_tuple *t); + /* Log invalid packets */ extern unsigned int nf_ct_log_invalid; diff --git a/net/ipv4/netfilter/nf_conntrack_l3proto_ipv4.c b/net/ipv4/netfilter/nf_conntrack_l3proto_ipv4.c index 8202c1c0afad..385867efd481 100644 --- a/net/ipv4/netfilter/nf_conntrack_l3proto_ipv4.c +++ b/net/ipv4/netfilter/nf_conntrack_l3proto_ipv4.c @@ -392,6 +392,48 @@ getorigdst(struct sock *sk, int optval, void __user *user, int *len) return -ENOENT; } +#if defined(CONFIG_NF_CT_NETLINK) || \ + defined(CONFIG_NF_CT_NETLINK_MODULE) + +#include +#include + +static int ipv4_tuple_to_nfattr(struct sk_buff *skb, + const struct nf_conntrack_tuple *tuple) +{ + NFA_PUT(skb, CTA_IP_V4_SRC, sizeof(u_int32_t), + &tuple->src.u3.ip); + NFA_PUT(skb, CTA_IP_V4_DST, sizeof(u_int32_t), + &tuple->dst.u3.ip); + return 0; + +nfattr_failure: + return -1; +} + +static const size_t cta_min_ip[CTA_IP_MAX] = { + [CTA_IP_V4_SRC-1] = sizeof(u_int32_t), + [CTA_IP_V4_DST-1] = sizeof(u_int32_t), +}; + +static int ipv4_nfattr_to_tuple(struct nfattr *tb[], + struct nf_conntrack_tuple *t) +{ + if (!tb[CTA_IP_V4_SRC-1] || !tb[CTA_IP_V4_DST-1]) + return -EINVAL; + + if (nfattr_bad_size(tb, CTA_IP_MAX, cta_min_ip)) + return -EINVAL; + + t->src.u3.ip = + *(u_int32_t *)NFA_DATA(tb[CTA_IP_V4_SRC-1]); + t->dst.u3.ip = + *(u_int32_t *)NFA_DATA(tb[CTA_IP_V4_DST-1]); + + return 0; +} +#endif + static struct nf_sockopt_ops so_getorigdst = { .pf = PF_INET, .get_optmin = SO_ORIGINAL_DST, @@ -408,6 +450,11 @@ struct nf_conntrack_l3proto nf_conntrack_l3proto_ipv4 = { .print_conntrack = ipv4_print_conntrack, .prepare = ipv4_prepare, .get_features = ipv4_get_features, +#if defined(CONFIG_NF_CT_NETLINK) || \ + defined(CONFIG_NF_CT_NETLINK_MODULE) + .tuple_to_nfattr = ipv4_tuple_to_nfattr, + .nfattr_to_tuple = ipv4_nfattr_to_tuple, +#endif .me = THIS_MODULE, }; diff --git a/net/ipv4/netfilter/nf_conntrack_proto_icmp.c b/net/ipv4/netfilter/nf_conntrack_proto_icmp.c index 7ddb5c08f7b8..52dc175be39a 100644 --- a/net/ipv4/netfilter/nf_conntrack_proto_icmp.c +++ b/net/ipv4/netfilter/nf_conntrack_proto_icmp.c @@ -50,20 +50,21 @@ static int icmp_pkt_to_tuple(const struct sk_buff *skb, return 1; } +/* Add 1; spaces filled with 0. */ +static const u_int8_t invmap[] = { + [ICMP_ECHO] = ICMP_ECHOREPLY + 1, + [ICMP_ECHOREPLY] = ICMP_ECHO + 1, + [ICMP_TIMESTAMP] = ICMP_TIMESTAMPREPLY + 1, + [ICMP_TIMESTAMPREPLY] = ICMP_TIMESTAMP + 1, + [ICMP_INFO_REQUEST] = ICMP_INFO_REPLY + 1, + [ICMP_INFO_REPLY] = ICMP_INFO_REQUEST + 1, + [ICMP_ADDRESS] = ICMP_ADDRESSREPLY + 1, + [ICMP_ADDRESSREPLY] = ICMP_ADDRESS + 1 +}; + static int icmp_invert_tuple(struct nf_conntrack_tuple *tuple, const struct nf_conntrack_tuple *orig) { - /* Add 1; spaces filled with 0. */ - static u_int8_t invmap[] - = { [ICMP_ECHO] = ICMP_ECHOREPLY + 1, - [ICMP_ECHOREPLY] = ICMP_ECHO + 1, - [ICMP_TIMESTAMP] = ICMP_TIMESTAMPREPLY + 1, - [ICMP_TIMESTAMPREPLY] = ICMP_TIMESTAMP + 1, - [ICMP_INFO_REQUEST] = ICMP_INFO_REPLY + 1, - [ICMP_INFO_REPLY] = ICMP_INFO_REQUEST + 1, - [ICMP_ADDRESS] = ICMP_ADDRESSREPLY + 1, - [ICMP_ADDRESSREPLY] = ICMP_ADDRESS + 1}; - if (orig->dst.u.icmp.type >= sizeof(invmap) || !invmap[orig->dst.u.icmp.type]) return 0; @@ -120,11 +121,12 @@ static int icmp_packet(struct nf_conn *ct, static int icmp_new(struct nf_conn *conntrack, const struct sk_buff *skb, unsigned int dataoff) { - static u_int8_t valid_new[] - = { [ICMP_ECHO] = 1, - [ICMP_TIMESTAMP] = 1, - [ICMP_INFO_REQUEST] = 1, - [ICMP_ADDRESS] = 1 }; + static const u_int8_t valid_new[] = { + [ICMP_ECHO] = 1, + [ICMP_TIMESTAMP] = 1, + [ICMP_INFO_REQUEST] = 1, + [ICMP_ADDRESS] = 1 + }; if (conntrack->tuplehash[0].tuple.dst.u.icmp.type >= sizeof(valid_new) || !valid_new[conntrack->tuplehash[0].tuple.dst.u.icmp.type]) { @@ -168,7 +170,7 @@ icmp_error_message(struct sk_buff *skb, return -NF_ACCEPT; } - innerproto = nf_ct_find_proto(PF_INET, inside->ip.protocol); + innerproto = __nf_ct_proto_find(PF_INET, inside->ip.protocol); dataoff = skb->nh.iph->ihl*4 + sizeof(inside->icmp); /* Are they talking about one of our connections? */ if (!nf_ct_get_tuple(skb, dataoff, dataoff + inside->ip.ihl*4, PF_INET, @@ -281,6 +283,60 @@ checksum_skipped: return icmp_error_message(skb, ctinfo, hooknum); } +#if defined(CONFIG_NF_CT_NETLINK) || \ + defined(CONFIG_NF_CT_NETLINK_MODULE) + +#include +#include + +static int icmp_tuple_to_nfattr(struct sk_buff *skb, + const struct nf_conntrack_tuple *t) +{ + NFA_PUT(skb, CTA_PROTO_ICMP_ID, sizeof(u_int16_t), + &t->src.u.icmp.id); + NFA_PUT(skb, CTA_PROTO_ICMP_TYPE, sizeof(u_int8_t), + &t->dst.u.icmp.type); + NFA_PUT(skb, CTA_PROTO_ICMP_CODE, sizeof(u_int8_t), + &t->dst.u.icmp.code); + + return 0; + +nfattr_failure: + return -1; +} + +static const size_t cta_min_proto[CTA_PROTO_MAX] = { + [CTA_PROTO_ICMP_TYPE-1] = sizeof(u_int8_t), + [CTA_PROTO_ICMP_CODE-1] = sizeof(u_int8_t), + [CTA_PROTO_ICMP_ID-1] = sizeof(u_int16_t) +}; + +static int icmp_nfattr_to_tuple(struct nfattr *tb[], + struct nf_conntrack_tuple *tuple) +{ + if (!tb[CTA_PROTO_ICMP_TYPE-1] + || !tb[CTA_PROTO_ICMP_CODE-1] + || !tb[CTA_PROTO_ICMP_ID-1]) + return -EINVAL; + + if (nfattr_bad_size(tb, CTA_PROTO_MAX, cta_min_proto)) + return -EINVAL; + + tuple->dst.u.icmp.type = + *(u_int8_t *)NFA_DATA(tb[CTA_PROTO_ICMP_TYPE-1]); + tuple->dst.u.icmp.code = + *(u_int8_t *)NFA_DATA(tb[CTA_PROTO_ICMP_CODE-1]); + tuple->src.u.icmp.id = + *(u_int16_t *)NFA_DATA(tb[CTA_PROTO_ICMP_ID-1]); + + if (tuple->dst.u.icmp.type >= sizeof(invmap) + || !invmap[tuple->dst.u.icmp.type]) + return -EINVAL; + + return 0; +} +#endif + struct nf_conntrack_protocol nf_conntrack_protocol_icmp = { .list = { NULL, NULL }, @@ -295,7 +351,12 @@ struct nf_conntrack_protocol nf_conntrack_protocol_icmp = .new = icmp_new, .error = icmp_error, .destroy = NULL, - .me = NULL + .me = NULL, +#if defined(CONFIG_NF_CT_NETLINK) || \ + defined(CONFIG_NF_CT_NETLINK_MODULE) + .tuple_to_nfattr = icmp_tuple_to_nfattr, + .nfattr_to_tuple = icmp_nfattr_to_tuple, +#endif }; EXPORT_SYMBOL(nf_conntrack_protocol_icmp); diff --git a/net/ipv6/netfilter/nf_conntrack_l3proto_ipv6.c b/net/ipv6/netfilter/nf_conntrack_l3proto_ipv6.c index 753a3ae8502b..704fbbe74874 100644 --- a/net/ipv6/netfilter/nf_conntrack_l3proto_ipv6.c +++ b/net/ipv6/netfilter/nf_conntrack_l3proto_ipv6.c @@ -401,6 +401,48 @@ static ctl_table nf_ct_net_table[] = { }; #endif +#if defined(CONFIG_NF_CT_NETLINK) || \ + defined(CONFIG_NF_CT_NETLINK_MODULE) + +#include +#include + +static int ipv6_tuple_to_nfattr(struct sk_buff *skb, + const struct nf_conntrack_tuple *tuple) +{ + NFA_PUT(skb, CTA_IP_V6_SRC, sizeof(u_int32_t) * 4, + &tuple->src.u3.ip6); + NFA_PUT(skb, CTA_IP_V6_DST, sizeof(u_int32_t) * 4, + &tuple->dst.u3.ip6); + return 0; + +nfattr_failure: + return -1; +} + +static const size_t cta_min_ip[CTA_IP_MAX] = { + [CTA_IP_V6_SRC-1] = sizeof(u_int32_t)*4, + [CTA_IP_V6_DST-1] = sizeof(u_int32_t)*4, +}; + +static int ipv6_nfattr_to_tuple(struct nfattr *tb[], + struct nf_conntrack_tuple *t) +{ + if (!tb[CTA_IP_V6_SRC-1] || !tb[CTA_IP_V6_DST-1]) + return -EINVAL; + + if (nfattr_bad_size(tb, CTA_IP_MAX, cta_min_ip)) + return -EINVAL; + + memcpy(&t->src.u3.ip6, NFA_DATA(tb[CTA_IP_V6_SRC-1]), + sizeof(u_int32_t) * 4); + memcpy(&t->dst.u3.ip6, NFA_DATA(tb[CTA_IP_V6_DST-1]), + sizeof(u_int32_t) * 4); + + return 0; +} +#endif + struct nf_conntrack_l3proto nf_conntrack_l3proto_ipv6 = { .l3proto = PF_INET6, .name = "ipv6", @@ -409,6 +451,11 @@ struct nf_conntrack_l3proto nf_conntrack_l3proto_ipv6 = { .print_tuple = ipv6_print_tuple, .print_conntrack = ipv6_print_conntrack, .prepare = ipv6_prepare, +#if defined(CONFIG_NF_CT_NETLINK) || \ + defined(CONFIG_NF_CT_NETLINK_MODULE) + .tuple_to_nfattr = ipv6_tuple_to_nfattr, + .nfattr_to_tuple = ipv6_nfattr_to_tuple, +#endif .get_features = ipv6_get_features, .me = THIS_MODULE, }; diff --git a/net/ipv6/netfilter/nf_conntrack_proto_icmpv6.c b/net/ipv6/netfilter/nf_conntrack_proto_icmpv6.c index a7e03cfacd06..09945c333055 100644 --- a/net/ipv6/netfilter/nf_conntrack_proto_icmpv6.c +++ b/net/ipv6/netfilter/nf_conntrack_proto_icmpv6.c @@ -57,17 +57,17 @@ static int icmpv6_pkt_to_tuple(const struct sk_buff *skb, return 1; } +/* Add 1; spaces filled with 0. */ +static u_int8_t invmap[] = { + [ICMPV6_ECHO_REQUEST - 128] = ICMPV6_ECHO_REPLY + 1, + [ICMPV6_ECHO_REPLY - 128] = ICMPV6_ECHO_REQUEST + 1, + [ICMPV6_NI_QUERY - 128] = ICMPV6_NI_QUERY + 1, + [ICMPV6_NI_REPLY - 128] = ICMPV6_NI_REPLY +1 +}; + static int icmpv6_invert_tuple(struct nf_conntrack_tuple *tuple, const struct nf_conntrack_tuple *orig) { - /* Add 1; spaces filled with 0. */ - static u_int8_t invmap[] = { - [ICMPV6_ECHO_REQUEST - 128] = ICMPV6_ECHO_REPLY + 1, - [ICMPV6_ECHO_REPLY - 128] = ICMPV6_ECHO_REQUEST + 1, - [ICMPV6_NI_QUERY - 128] = ICMPV6_NI_QUERY + 1, - [ICMPV6_NI_REPLY - 128] = ICMPV6_NI_REPLY +1 - }; - int type = orig->dst.u.icmp.type - 128; if (type < 0 || type >= sizeof(invmap) || !invmap[type]) return 0; @@ -185,7 +185,7 @@ icmpv6_error_message(struct sk_buff *skb, return -NF_ACCEPT; } - inproto = nf_ct_find_proto(PF_INET6, inprotonum); + inproto = __nf_ct_proto_find(PF_INET6, inprotonum); /* Are they talking about one of our connections? */ if (!nf_ct_get_tuple(skb, inip6off, inprotoff, PF_INET6, inprotonum, @@ -255,6 +255,60 @@ skipped: return icmpv6_error_message(skb, dataoff, ctinfo, hooknum); } +#if defined(CONFIG_NF_CT_NETLINK) || \ + defined(CONFIG_NF_CT_NETLINK_MODULE) + +#include +#include +static int icmpv6_tuple_to_nfattr(struct sk_buff *skb, + const struct nf_conntrack_tuple *t) +{ + NFA_PUT(skb, CTA_PROTO_ICMPV6_ID, sizeof(u_int16_t), + &t->src.u.icmp.id); + NFA_PUT(skb, CTA_PROTO_ICMPV6_TYPE, sizeof(u_int8_t), + &t->dst.u.icmp.type); + NFA_PUT(skb, CTA_PROTO_ICMPV6_CODE, sizeof(u_int8_t), + &t->dst.u.icmp.code); + + return 0; + +nfattr_failure: + return -1; +} + +static const size_t cta_min_proto[CTA_PROTO_MAX] = { + [CTA_PROTO_ICMPV6_TYPE-1] = sizeof(u_int8_t), + [CTA_PROTO_ICMPV6_CODE-1] = sizeof(u_int8_t), + [CTA_PROTO_ICMPV6_ID-1] = sizeof(u_int16_t) +}; + +static int icmpv6_nfattr_to_tuple(struct nfattr *tb[], + struct nf_conntrack_tuple *tuple) +{ + if (!tb[CTA_PROTO_ICMPV6_TYPE-1] + || !tb[CTA_PROTO_ICMPV6_CODE-1] + || !tb[CTA_PROTO_ICMPV6_ID-1]) + return -EINVAL; + + if (nfattr_bad_size(tb, CTA_PROTO_MAX, cta_min_proto)) + return -EINVAL; + + tuple->dst.u.icmp.type = + *(u_int8_t *)NFA_DATA(tb[CTA_PROTO_ICMPV6_TYPE-1]); + tuple->dst.u.icmp.code = + *(u_int8_t *)NFA_DATA(tb[CTA_PROTO_ICMPV6_CODE-1]); + tuple->src.u.icmp.id = + *(u_int16_t *)NFA_DATA(tb[CTA_PROTO_ICMPV6_ID-1]); + + if (tuple->dst.u.icmp.type < 128 + || tuple->dst.u.icmp.type - 128 >= sizeof(invmap) + || !invmap[tuple->dst.u.icmp.type - 128]) + return -EINVAL; + + return 0; +} +#endif + struct nf_conntrack_protocol nf_conntrack_protocol_icmpv6 = { .l3proto = PF_INET6, @@ -267,6 +321,11 @@ struct nf_conntrack_protocol nf_conntrack_protocol_icmpv6 = .packet = icmpv6_packet, .new = icmpv6_new, .error = icmpv6_error, +#if defined(CONFIG_NF_CT_NETLINK) || \ + defined(CONFIG_NF_CT_NETLINK_MODULE) + .tuple_to_nfattr = icmpv6_tuple_to_nfattr, + .nfattr_to_tuple = icmpv6_nfattr_to_tuple, +#endif }; EXPORT_SYMBOL(nf_conntrack_protocol_icmpv6); diff --git a/net/netfilter/Kconfig b/net/netfilter/Kconfig index 794c41d19b28..7d55f9cbd853 100644 --- a/net/netfilter/Kconfig +++ b/net/netfilter/Kconfig @@ -95,4 +95,11 @@ config NF_CONNTRACK_FTP To compile it as a module, choose M here. If unsure, say N. +config NF_CT_NETLINK + tristate 'Connection tracking netlink interface (EXPERIMENTAL)' + depends on EXPERIMENTAL && NF_CONNTRACK && NETFILTER_NETLINK + depends on NF_CONNTRACK!=y || NETFILTER_NETLINK!=m + help + This option enables support for a netlink-based userspace interface + endmenu diff --git a/net/netfilter/Makefile b/net/netfilter/Makefile index 55f019ad2c08..cb2183145c37 100644 --- a/net/netfilter/Makefile +++ b/net/netfilter/Makefile @@ -13,3 +13,6 @@ obj-$(CONFIG_NF_CONNTRACK_FTP) += nf_conntrack_ftp.o # SCTP protocol connection tracking obj-$(CONFIG_NF_CT_PROTO_SCTP) += nf_conntrack_proto_sctp.o + +# netlink interface for nf_conntrack +obj-$(CONFIG_NF_CT_NETLINK) += nf_conntrack_netlink.o diff --git a/net/netfilter/nf_conntrack_core.c b/net/netfilter/nf_conntrack_core.c index 0c5b01d732d8..62bb509f05d4 100644 --- a/net/netfilter/nf_conntrack_core.c +++ b/net/netfilter/nf_conntrack_core.c @@ -82,6 +82,8 @@ unsigned int nf_ct_log_invalid; static LIST_HEAD(unconfirmed); static int nf_conntrack_vmalloc; +static unsigned int nf_conntrack_next_id = 1; +static unsigned int nf_conntrack_expect_next_id = 1; #ifdef CONFIG_NF_CONNTRACK_EVENTS struct notifier_block *nf_conntrack_chain; struct notifier_block *nf_conntrack_expect_chain; @@ -184,7 +186,7 @@ DECLARE_MUTEX(nf_ct_cache_mutex); extern struct nf_conntrack_protocol nf_conntrack_generic_protocol; struct nf_conntrack_protocol * -nf_ct_find_proto(u_int16_t l3proto, u_int8_t protocol) +__nf_ct_proto_find(u_int16_t l3proto, u_int8_t protocol) { if (unlikely(nf_ct_protos[l3proto] == NULL)) return &nf_conntrack_generic_protocol; @@ -192,6 +194,50 @@ nf_ct_find_proto(u_int16_t l3proto, u_int8_t protocol) return nf_ct_protos[l3proto][protocol]; } +/* this is guaranteed to always return a valid protocol helper, since + * it falls back to generic_protocol */ +struct nf_conntrack_protocol * +nf_ct_proto_find_get(u_int16_t l3proto, u_int8_t protocol) +{ + struct nf_conntrack_protocol *p; + + preempt_disable(); + p = __nf_ct_proto_find(l3proto, protocol); + if (p) { + if (!try_module_get(p->me)) + p = &nf_conntrack_generic_protocol; + } + preempt_enable(); + + return p; +} + +void nf_ct_proto_put(struct nf_conntrack_protocol *p) +{ + module_put(p->me); +} + +struct nf_conntrack_l3proto * +nf_ct_l3proto_find_get(u_int16_t l3proto) +{ + struct nf_conntrack_l3proto *p; + + preempt_disable(); + p = __nf_ct_l3proto_find(l3proto); + if (p) { + if (!try_module_get(p->me)) + p = &nf_conntrack_generic_l3proto; + } + preempt_enable(); + + return p; +} + +void nf_ct_l3proto_put(struct nf_conntrack_l3proto *p) +{ + module_put(p->me); +} + static int nf_conntrack_hash_rnd_initted; static unsigned int nf_conntrack_hash_rnd; @@ -384,7 +430,7 @@ nf_ct_invert_tuple(struct nf_conntrack_tuple *inverse, } /* nf_conntrack_expect helper functions */ -static void nf_ct_unlink_expect(struct nf_conntrack_expect *exp) +void nf_ct_unlink_expect(struct nf_conntrack_expect *exp) { ASSERT_WRITE_LOCK(&nf_conntrack_lock); NF_CT_ASSERT(!timer_pending(&exp->timeout)); @@ -404,6 +450,33 @@ static void expectation_timed_out(unsigned long ul_expect) nf_conntrack_expect_put(exp); } +struct nf_conntrack_expect * +__nf_conntrack_expect_find(const struct nf_conntrack_tuple *tuple) +{ + struct nf_conntrack_expect *i; + + list_for_each_entry(i, &nf_conntrack_expect_list, list) { + if (nf_ct_tuple_mask_cmp(tuple, &i->tuple, &i->mask)) { + atomic_inc(&i->use); + return i; + } + } + return NULL; +} + +/* Just find a expectation corresponding to a tuple. */ +struct nf_conntrack_expect * +nf_conntrack_expect_find(const struct nf_conntrack_tuple *tuple) +{ + struct nf_conntrack_expect *i; + + read_lock_bh(&nf_conntrack_lock); + i = __nf_conntrack_expect_find(tuple); + read_unlock_bh(&nf_conntrack_lock); + + return i; +} + /* If an expectation for this connection is found, it gets delete from * global list then returned. */ static struct nf_conntrack_expect * @@ -432,7 +505,7 @@ find_expectation(const struct nf_conntrack_tuple *tuple) } /* delete all expectations for this conntrack */ -static void remove_expectations(struct nf_conn *ct) +void nf_ct_remove_expectations(struct nf_conn *ct) { struct nf_conntrack_expect *i, *tmp; @@ -462,7 +535,7 @@ clean_from_lists(struct nf_conn *ct) LIST_DELETE(&nf_conntrack_hash[hr], &ct->tuplehash[IP_CT_DIR_REPLY]); /* Destroy all pending expectations */ - remove_expectations(ct); + nf_ct_remove_expectations(ct); } static void @@ -482,12 +555,11 @@ destroy_conntrack(struct nf_conntrack *nfct) /* To make sure we don't get any weird locking issues here: * destroy_conntrack() MUST NOT be called with a write lock * to nf_conntrack_lock!!! -HW */ - l3proto = nf_ct_find_l3proto(ct->tuplehash[IP_CT_DIR_REPLY].tuple.src.l3num); + l3proto = __nf_ct_l3proto_find(ct->tuplehash[IP_CT_DIR_REPLY].tuple.src.l3num); if (l3proto && l3proto->destroy) l3proto->destroy(ct); - proto = nf_ct_find_proto(ct->tuplehash[IP_CT_DIR_REPLY].tuple.src.l3num, - ct->tuplehash[IP_CT_DIR_REPLY].tuple.dst.protonum); + proto = __nf_ct_proto_find(ct->tuplehash[IP_CT_DIR_REPLY].tuple.src.l3num, ct->tuplehash[IP_CT_DIR_REPLY].tuple.dst.protonum); if (proto && proto->destroy) proto->destroy(ct); @@ -499,7 +571,7 @@ destroy_conntrack(struct nf_conntrack *nfct) * except TFTP can create an expectation on the first packet, * before connection is in the list, so we need to clean here, * too. */ - remove_expectations(ct); + nf_ct_remove_expectations(ct); /* We overload first tuple to link into unconfirmed list. */ if (!nf_ct_is_confirmed(ct)) { @@ -540,7 +612,7 @@ conntrack_tuple_cmp(const struct nf_conntrack_tuple_hash *i, && nf_ct_tuple_equal(tuple, &i->tuple); } -static struct nf_conntrack_tuple_hash * +struct nf_conntrack_tuple_hash * __nf_conntrack_find(const struct nf_conntrack_tuple *tuple, const struct nf_conn *ignored_conntrack) { @@ -575,6 +647,29 @@ nf_conntrack_find_get(const struct nf_conntrack_tuple *tuple, return h; } +static void __nf_conntrack_hash_insert(struct nf_conn *ct, + unsigned int hash, + unsigned int repl_hash) +{ + ct->id = ++nf_conntrack_next_id; + list_prepend(&nf_conntrack_hash[hash], + &ct->tuplehash[IP_CT_DIR_ORIGINAL].list); + list_prepend(&nf_conntrack_hash[repl_hash], + &ct->tuplehash[IP_CT_DIR_REPLY].list); +} + +void nf_conntrack_hash_insert(struct nf_conn *ct) +{ + unsigned int hash, repl_hash; + + hash = hash_conntrack(&ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple); + repl_hash = hash_conntrack(&ct->tuplehash[IP_CT_DIR_REPLY].tuple); + + write_lock_bh(&nf_conntrack_lock); + __nf_conntrack_hash_insert(ct, hash, repl_hash); + write_unlock_bh(&nf_conntrack_lock); +} + /* Confirm a connection given skb; places it in hash table */ int __nf_conntrack_confirm(struct sk_buff **pskb) @@ -621,10 +716,7 @@ __nf_conntrack_confirm(struct sk_buff **pskb) /* Remove from unconfirmed list */ list_del(&ct->tuplehash[IP_CT_DIR_ORIGINAL].list); - list_prepend(&nf_conntrack_hash[hash], - &ct->tuplehash[IP_CT_DIR_ORIGINAL]); - list_prepend(&nf_conntrack_hash[repl_hash], - &ct->tuplehash[IP_CT_DIR_REPLY]); + __nf_conntrack_hash_insert(ct, hash, repl_hash); /* Timer relative to confirmation time, not original setting time, otherwise we'd get timer wrap in weird delay cases. */ @@ -708,13 +800,41 @@ static inline int helper_cmp(const struct nf_conntrack_helper *i, } static struct nf_conntrack_helper * -nf_ct_find_helper(const struct nf_conntrack_tuple *tuple) +__nf_ct_helper_find(const struct nf_conntrack_tuple *tuple) { return LIST_FIND(&helpers, helper_cmp, struct nf_conntrack_helper *, tuple); } +struct nf_conntrack_helper * +nf_ct_helper_find_get( const struct nf_conntrack_tuple *tuple) +{ + struct nf_conntrack_helper *helper; + + /* need nf_conntrack_lock to assure that helper exists until + * try_module_get() is called */ + read_lock_bh(&nf_conntrack_lock); + + helper = __nf_ct_helper_find(tuple); + if (helper) { + /* need to increase module usage count to assure helper will + * not go away while the caller is e.g. busy putting a + * conntrack in the hash that uses the helper */ + if (!try_module_get(helper->me)) + helper = NULL; + } + + read_unlock_bh(&nf_conntrack_lock); + + return helper; +} + +void nf_ct_helper_put(struct nf_conntrack_helper *helper) +{ + module_put(helper->me); +} + static struct nf_conn * __nf_conntrack_alloc(const struct nf_conntrack_tuple *orig, const struct nf_conntrack_tuple *repl, @@ -744,7 +864,7 @@ __nf_conntrack_alloc(const struct nf_conntrack_tuple *orig, /* find features needed by this conntrack. */ features = l3proto->get_features(orig); read_lock_bh(&nf_conntrack_lock); - if (nf_ct_find_helper(repl) != NULL) + if (__nf_ct_helper_find(repl) != NULL) features |= NF_CT_F_HELP; read_unlock_bh(&nf_conntrack_lock); @@ -794,7 +914,7 @@ struct nf_conn *nf_conntrack_alloc(const struct nf_conntrack_tuple *orig, { struct nf_conntrack_l3proto *l3proto; - l3proto = nf_ct_find_l3proto(orig->src.l3num); + l3proto = __nf_ct_l3proto_find(orig->src.l3num); return __nf_conntrack_alloc(orig, repl, l3proto); } @@ -853,7 +973,7 @@ init_conntrack(const struct nf_conntrack_tuple *tuple, nf_conntrack_get(&conntrack->master->ct_general); NF_CT_STAT_INC(expect_new); } else { - conntrack->helper = nf_ct_find_helper(&repl_tuple); + conntrack->helper = __nf_ct_helper_find(&repl_tuple); NF_CT_STAT_INC(new); } @@ -947,13 +1067,13 @@ nf_conntrack_in(int pf, unsigned int hooknum, struct sk_buff **pskb) return NF_ACCEPT; } - l3proto = nf_ct_find_l3proto((u_int16_t)pf); + l3proto = __nf_ct_l3proto_find((u_int16_t)pf); if ((ret = l3proto->prepare(pskb, hooknum, &dataoff, &protonum)) <= 0) { DEBUGP("not prepared to track yet or error occured\n"); return -ret; } - proto = nf_ct_find_proto((u_int16_t)pf, protonum); + proto = __nf_ct_proto_find((u_int16_t)pf, protonum); /* It may be an special packet, error, unclean... * inverse of the return code tells to the netfilter @@ -1002,9 +1122,9 @@ int nf_ct_invert_tuplepr(struct nf_conntrack_tuple *inverse, const struct nf_conntrack_tuple *orig) { return nf_ct_invert_tuple(inverse, orig, - nf_ct_find_l3proto(orig->src.l3num), - nf_ct_find_proto(orig->src.l3num, - orig->dst.protonum)); + __nf_ct_l3proto_find(orig->src.l3num), + __nf_ct_proto_find(orig->src.l3num, + orig->dst.protonum)); } /* Would two expected things clash? */ @@ -1096,6 +1216,7 @@ static void nf_conntrack_expect_insert(struct nf_conntrack_expect *exp) exp->timeout.expires = jiffies + exp->master->helper->timeout * HZ; add_timer(&exp->timeout); + exp->id = ++nf_conntrack_expect_next_id; atomic_inc(&exp->use); NF_CT_STAT_INC(expect_create); } @@ -1176,7 +1297,7 @@ void nf_conntrack_alter_reply(struct nf_conn *conntrack, conntrack->tuplehash[IP_CT_DIR_REPLY].tuple = *newreply; if (!conntrack->master && conntrack->expecting == 0) - conntrack->helper = nf_ct_find_helper(newreply); + conntrack->helper = __nf_ct_helper_find(newreply); write_unlock_bh(&nf_conntrack_lock); } @@ -1201,6 +1322,19 @@ int nf_conntrack_helper_register(struct nf_conntrack_helper *me) return 0; } +struct nf_conntrack_helper * +__nf_conntrack_helper_find_byname(const char *name) +{ + struct nf_conntrack_helper *h; + + list_for_each_entry(h, &helpers, list) { + if (!strcmp(h->name, name)) + return h; + } + + return NULL; +} + static inline int unhelp(struct nf_conntrack_tuple_hash *i, const struct nf_conntrack_helper *me) { @@ -1284,6 +1418,51 @@ void __nf_ct_refresh_acct(struct nf_conn *ct, nf_conntrack_event_cache(event, skb); } +#if defined(CONFIG_NF_CT_NETLINK) || \ + defined(CONFIG_NF_CT_NETLINK_MODULE) + +#include +#include + +/* Generic function for tcp/udp/sctp/dccp and alike. This needs to be + * in ip_conntrack_core, since we don't want the protocols to autoload + * or depend on ctnetlink */ +int nf_ct_port_tuple_to_nfattr(struct sk_buff *skb, + const struct nf_conntrack_tuple *tuple) +{ + NFA_PUT(skb, CTA_PROTO_SRC_PORT, sizeof(u_int16_t), + &tuple->src.u.tcp.port); + NFA_PUT(skb, CTA_PROTO_DST_PORT, sizeof(u_int16_t), + &tuple->dst.u.tcp.port); + return 0; + +nfattr_failure: + return -1; +} + +static const size_t cta_min_proto[CTA_PROTO_MAX] = { + [CTA_PROTO_SRC_PORT-1] = sizeof(u_int16_t), + [CTA_PROTO_DST_PORT-1] = sizeof(u_int16_t) +}; + +int nf_ct_port_nfattr_to_tuple(struct nfattr *tb[], + struct nf_conntrack_tuple *t) +{ + if (!tb[CTA_PROTO_SRC_PORT-1] || !tb[CTA_PROTO_DST_PORT-1]) + return -EINVAL; + + if (nfattr_bad_size(tb, CTA_PROTO_MAX, cta_min_proto)) + return -EINVAL; + + t->src.u.tcp.port = + *(u_int16_t *)NFA_DATA(tb[CTA_PROTO_SRC_PORT-1]); + t->dst.u.tcp.port = + *(u_int16_t *)NFA_DATA(tb[CTA_PROTO_DST_PORT-1]); + + return 0; +} +#endif + /* Used by ipt_REJECT and ip6t_REJECT. */ void __nf_conntrack_attach(struct sk_buff *nskb, struct sk_buff *skb) { @@ -1366,6 +1545,11 @@ static void free_conntrack_hash(struct list_head *hash, int vmalloced, int size) get_order(sizeof(struct list_head) * size)); } +void nf_conntrack_flush() +{ + nf_ct_iterate_cleanup(kill_all, NULL); +} + /* Mishearing the voices in his head, our hero wonders how he's supposed to kill the mall. */ void nf_conntrack_cleanup(void) @@ -1379,7 +1563,7 @@ void nf_conntrack_cleanup(void) nf_ct_event_cache_flush(); i_see_dead_people: - nf_ct_iterate_cleanup(kill_all, NULL); + nf_conntrack_flush(); if (atomic_read(&nf_conntrack_count) != 0) { schedule(); goto i_see_dead_people; diff --git a/net/netfilter/nf_conntrack_netlink.c b/net/netfilter/nf_conntrack_netlink.c new file mode 100644 index 000000000000..4f2e50952a12 --- /dev/null +++ b/net/netfilter/nf_conntrack_netlink.c @@ -0,0 +1,1642 @@ +/* Connection tracking via netlink socket. Allows for user space + * protocol helpers and general trouble making from userspace. + * + * (C) 2001 by Jay Schulist + * (C) 2002-2005 by Harald Welte + * (C) 2003 by Patrick Mchardy + * (C) 2005 by Pablo Neira Ayuso + * + * I've reworked this stuff to use attributes instead of conntrack + * structures. 5.44 am. I need more tea. --pablo 05/07/11. + * + * Initial connection tracking via netlink development funded and + * generally made possible by Network Robots, Inc. (www.networkrobots.com) + * + * Further development of this code funded by Astaro AG (http://www.astaro.com) + * + * This software may be used and distributed according to the terms + * of the GNU General Public License, incorporated herein by reference. + * + * Derived from ip_conntrack_netlink.c: Port by Pablo Neira Ayuso (05/11/14) + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include +#include +#include +#include +#include +#include +#include + +#include +#include + +MODULE_LICENSE("GPL"); + +static char __initdata version[] = "0.92"; + +#if 0 +#define DEBUGP printk +#else +#define DEBUGP(format, args...) +#endif + + +static inline int +ctnetlink_dump_tuples_proto(struct sk_buff *skb, + const struct nf_conntrack_tuple *tuple) +{ + struct nf_conntrack_protocol *proto; + int ret = 0; + + NFA_PUT(skb, CTA_PROTO_NUM, sizeof(u_int8_t), &tuple->dst.protonum); + + /* If no protocol helper is found, this function will return the + * generic protocol helper, so proto won't *ever* be NULL */ + proto = nf_ct_proto_find_get(tuple->src.l3num, tuple->dst.protonum); + if (likely(proto->tuple_to_nfattr)) + ret = proto->tuple_to_nfattr(skb, tuple); + + nf_ct_proto_put(proto); + + return ret; + +nfattr_failure: + return -1; +} + +static inline int +ctnetlink_dump_tuples(struct sk_buff *skb, + const struct nf_conntrack_tuple *tuple) +{ + struct nfattr *nest_parms; + struct nf_conntrack_l3proto *l3proto; + int ret = 0; + + l3proto = nf_ct_l3proto_find_get(tuple->src.l3num); + + nest_parms = NFA_NEST(skb, CTA_TUPLE_IP); + if (likely(l3proto->tuple_to_nfattr)) + ret = l3proto->tuple_to_nfattr(skb, tuple); + NFA_NEST_END(skb, nest_parms); + + nf_ct_l3proto_put(l3proto); + + if (unlikely(ret < 0)) + return ret; + + nest_parms = NFA_NEST(skb, CTA_TUPLE_PROTO); + ret = ctnetlink_dump_tuples_proto(skb, tuple); + NFA_NEST_END(skb, nest_parms); + + return ret; + +nfattr_failure: + return -1; +} + +static inline int +ctnetlink_dump_status(struct sk_buff *skb, const struct nf_conn *ct) +{ + u_int32_t status = htonl((u_int32_t) ct->status); + NFA_PUT(skb, CTA_STATUS, sizeof(status), &status); + return 0; + +nfattr_failure: + return -1; +} + +static inline int +ctnetlink_dump_timeout(struct sk_buff *skb, const struct nf_conn *ct) +{ + long timeout_l = ct->timeout.expires - jiffies; + u_int32_t timeout; + + if (timeout_l < 0) + timeout = 0; + else + timeout = htonl(timeout_l / HZ); + + NFA_PUT(skb, CTA_TIMEOUT, sizeof(timeout), &timeout); + return 0; + +nfattr_failure: + return -1; +} + +static inline int +ctnetlink_dump_protoinfo(struct sk_buff *skb, const struct nf_conn *ct) +{ + struct nf_conntrack_protocol *proto = nf_ct_proto_find_get(ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple.src.l3num, ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple.dst.protonum); + struct nfattr *nest_proto; + int ret; + + if (!proto->to_nfattr) { + nf_ct_proto_put(proto); + return 0; + } + + nest_proto = NFA_NEST(skb, CTA_PROTOINFO); + + ret = proto->to_nfattr(skb, nest_proto, ct); + + nf_ct_proto_put(proto); + + NFA_NEST_END(skb, nest_proto); + + return ret; + +nfattr_failure: + return -1; +} + +static inline int +ctnetlink_dump_helpinfo(struct sk_buff *skb, const struct nf_conn *ct) +{ + struct nfattr *nest_helper; + + if (!ct->helper) + return 0; + + nest_helper = NFA_NEST(skb, CTA_HELP); + NFA_PUT(skb, CTA_HELP_NAME, strlen(ct->helper->name), ct->helper->name); + + if (ct->helper->to_nfattr) + ct->helper->to_nfattr(skb, ct); + + NFA_NEST_END(skb, nest_helper); + + return 0; + +nfattr_failure: + return -1; +} + +#ifdef CONFIG_NF_CT_ACCT +static inline int +ctnetlink_dump_counters(struct sk_buff *skb, const struct nf_conn *ct, + enum ip_conntrack_dir dir) +{ + enum ctattr_type type = dir ? CTA_COUNTERS_REPLY: CTA_COUNTERS_ORIG; + struct nfattr *nest_count = NFA_NEST(skb, type); + u_int32_t tmp; + + tmp = htonl(ct->counters[dir].packets); + NFA_PUT(skb, CTA_COUNTERS32_PACKETS, sizeof(u_int32_t), &tmp); + + tmp = htonl(ct->counters[dir].bytes); + NFA_PUT(skb, CTA_COUNTERS32_BYTES, sizeof(u_int32_t), &tmp); + + NFA_NEST_END(skb, nest_count); + + return 0; + +nfattr_failure: + return -1; +} +#else +#define ctnetlink_dump_counters(a, b, c) (0) +#endif + +#ifdef CONFIG_NF_CONNTRACK_MARK +static inline int +ctnetlink_dump_mark(struct sk_buff *skb, const struct nf_conn *ct) +{ + u_int32_t mark = htonl(ct->mark); + + NFA_PUT(skb, CTA_MARK, sizeof(u_int32_t), &mark); + return 0; + +nfattr_failure: + return -1; +} +#else +#define ctnetlink_dump_mark(a, b) (0) +#endif + +static inline int +ctnetlink_dump_id(struct sk_buff *skb, const struct nf_conn *ct) +{ + u_int32_t id = htonl(ct->id); + NFA_PUT(skb, CTA_ID, sizeof(u_int32_t), &id); + return 0; + +nfattr_failure: + return -1; +} + +static inline int +ctnetlink_dump_use(struct sk_buff *skb, const struct nf_conn *ct) +{ + u_int32_t use = htonl(atomic_read(&ct->ct_general.use)); + + NFA_PUT(skb, CTA_USE, sizeof(u_int32_t), &use); + return 0; + +nfattr_failure: + return -1; +} + +#define tuple(ct, dir) (&(ct)->tuplehash[dir].tuple) + +static int +ctnetlink_fill_info(struct sk_buff *skb, u32 pid, u32 seq, + int event, int nowait, + const struct nf_conn *ct) +{ + struct nlmsghdr *nlh; + struct nfgenmsg *nfmsg; + struct nfattr *nest_parms; + unsigned char *b; + + b = skb->tail; + + event |= NFNL_SUBSYS_CTNETLINK << 8; + nlh = NLMSG_PUT(skb, pid, seq, event, sizeof(struct nfgenmsg)); + nfmsg = NLMSG_DATA(nlh); + + nlh->nlmsg_flags = (nowait && pid) ? NLM_F_MULTI : 0; + nfmsg->nfgen_family = + ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple.src.l3num; + nfmsg->version = NFNETLINK_V0; + nfmsg->res_id = 0; + + nest_parms = NFA_NEST(skb, CTA_TUPLE_ORIG); + if (ctnetlink_dump_tuples(skb, tuple(ct, IP_CT_DIR_ORIGINAL)) < 0) + goto nfattr_failure; + NFA_NEST_END(skb, nest_parms); + + nest_parms = NFA_NEST(skb, CTA_TUPLE_REPLY); + if (ctnetlink_dump_tuples(skb, tuple(ct, IP_CT_DIR_REPLY)) < 0) + goto nfattr_failure; + NFA_NEST_END(skb, nest_parms); + + if (ctnetlink_dump_status(skb, ct) < 0 || + ctnetlink_dump_timeout(skb, ct) < 0 || + ctnetlink_dump_counters(skb, ct, IP_CT_DIR_ORIGINAL) < 0 || + ctnetlink_dump_counters(skb, ct, IP_CT_DIR_REPLY) < 0 || + ctnetlink_dump_protoinfo(skb, ct) < 0 || + ctnetlink_dump_helpinfo(skb, ct) < 0 || + ctnetlink_dump_mark(skb, ct) < 0 || + ctnetlink_dump_id(skb, ct) < 0 || + ctnetlink_dump_use(skb, ct) < 0) + goto nfattr_failure; + + nlh->nlmsg_len = skb->tail - b; + return skb->len; + +nlmsg_failure: +nfattr_failure: + skb_trim(skb, b - skb->data); + return -1; +} + +#ifdef CONFIG_NF_CONNTRACK_EVENTS +static int ctnetlink_conntrack_event(struct notifier_block *this, + unsigned long events, void *ptr) +{ + struct nlmsghdr *nlh; + struct nfgenmsg *nfmsg; + struct nfattr *nest_parms; + struct nf_conn *ct = (struct nf_conn *)ptr; + struct sk_buff *skb; + unsigned int type; + unsigned char *b; + unsigned int flags = 0, group; + + /* ignore our fake conntrack entry */ + if (ct == &nf_conntrack_untracked) + return NOTIFY_DONE; + + if (events & IPCT_DESTROY) { + type = IPCTNL_MSG_CT_DELETE; + group = NFNLGRP_CONNTRACK_DESTROY; + } else if (events & (IPCT_NEW | IPCT_RELATED)) { + type = IPCTNL_MSG_CT_NEW; + flags = NLM_F_CREATE|NLM_F_EXCL; + /* dump everything */ + events = ~0UL; + group = NFNLGRP_CONNTRACK_NEW; + } else if (events & (IPCT_STATUS | + IPCT_PROTOINFO | + IPCT_HELPER | + IPCT_HELPINFO | + IPCT_NATINFO)) { + type = IPCTNL_MSG_CT_NEW; + group = NFNLGRP_CONNTRACK_UPDATE; + } else + return NOTIFY_DONE; + + /* FIXME: Check if there are any listeners before, don't hurt performance */ + + skb = alloc_skb(NLMSG_GOODSIZE, GFP_ATOMIC); + if (!skb) + return NOTIFY_DONE; + + b = skb->tail; + + type |= NFNL_SUBSYS_CTNETLINK << 8; + nlh = NLMSG_PUT(skb, 0, 0, type, sizeof(struct nfgenmsg)); + nfmsg = NLMSG_DATA(nlh); + + nlh->nlmsg_flags = flags; + nfmsg->nfgen_family = ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple.src.l3num; + nfmsg->version = NFNETLINK_V0; + nfmsg->res_id = 0; + + nest_parms = NFA_NEST(skb, CTA_TUPLE_ORIG); + if (ctnetlink_dump_tuples(skb, tuple(ct, IP_CT_DIR_ORIGINAL)) < 0) + goto nfattr_failure; + NFA_NEST_END(skb, nest_parms); + + nest_parms = NFA_NEST(skb, CTA_TUPLE_REPLY); + if (ctnetlink_dump_tuples(skb, tuple(ct, IP_CT_DIR_REPLY)) < 0) + goto nfattr_failure; + NFA_NEST_END(skb, nest_parms); + + /* NAT stuff is now a status flag */ + if ((events & IPCT_STATUS || events & IPCT_NATINFO) + && ctnetlink_dump_status(skb, ct) < 0) + goto nfattr_failure; + if (events & IPCT_REFRESH + && ctnetlink_dump_timeout(skb, ct) < 0) + goto nfattr_failure; + if (events & IPCT_PROTOINFO + && ctnetlink_dump_protoinfo(skb, ct) < 0) + goto nfattr_failure; + if (events & IPCT_HELPINFO + && ctnetlink_dump_helpinfo(skb, ct) < 0) + goto nfattr_failure; + + if (ctnetlink_dump_counters(skb, ct, IP_CT_DIR_ORIGINAL) < 0 || + ctnetlink_dump_counters(skb, ct, IP_CT_DIR_REPLY) < 0) + goto nfattr_failure; + + nlh->nlmsg_len = skb->tail - b; + nfnetlink_send(skb, 0, group, 0); + return NOTIFY_DONE; + +nlmsg_failure: +nfattr_failure: + kfree_skb(skb); + return NOTIFY_DONE; +} +#endif /* CONFIG_NF_CONNTRACK_EVENTS */ + +static int ctnetlink_done(struct netlink_callback *cb) +{ + DEBUGP("entered %s\n", __FUNCTION__); + return 0; +} + +static int +ctnetlink_dump_table(struct sk_buff *skb, struct netlink_callback *cb) +{ + struct nf_conn *ct = NULL; + struct nf_conntrack_tuple_hash *h; + struct list_head *i; + u_int32_t *id = (u_int32_t *) &cb->args[1]; + + DEBUGP("entered %s, last bucket=%lu id=%u\n", __FUNCTION__, + cb->args[0], *id); + + read_lock_bh(&nf_conntrack_lock); + for (; cb->args[0] < nf_conntrack_htable_size; cb->args[0]++, *id = 0) { + list_for_each_prev(i, &nf_conntrack_hash[cb->args[0]]) { + h = (struct nf_conntrack_tuple_hash *) i; + if (DIRECTION(h) != IP_CT_DIR_ORIGINAL) + continue; + ct = nf_ct_tuplehash_to_ctrack(h); + if (ct->id <= *id) + continue; + if (ctnetlink_fill_info(skb, NETLINK_CB(cb->skb).pid, + cb->nlh->nlmsg_seq, + IPCTNL_MSG_CT_NEW, + 1, ct) < 0) + goto out; + *id = ct->id; + } + } +out: + read_unlock_bh(&nf_conntrack_lock); + + DEBUGP("leaving, last bucket=%lu id=%u\n", cb->args[0], *id); + + return skb->len; +} + +#ifdef CONFIG_NF_CT_ACCT +static int +ctnetlink_dump_table_w(struct sk_buff *skb, struct netlink_callback *cb) +{ + struct nf_conn *ct = NULL; + struct nf_conntrack_tuple_hash *h; + struct list_head *i; + u_int32_t *id = (u_int32_t *) &cb->args[1]; + + DEBUGP("entered %s, last bucket=%u id=%u\n", __FUNCTION__, + cb->args[0], *id); + + write_lock_bh(&nf_conntrack_lock); + for (; cb->args[0] < nf_conntrack_htable_size; cb->args[0]++, *id = 0) { + list_for_each_prev(i, &nf_conntrack_hash[cb->args[0]]) { + h = (struct nf_conntrack_tuple_hash *) i; + if (DIRECTION(h) != IP_CT_DIR_ORIGINAL) + continue; + ct = nf_ct_tuplehash_to_ctrack(h); + if (ct->id <= *id) + continue; + if (ctnetlink_fill_info(skb, NETLINK_CB(cb->skb).pid, + cb->nlh->nlmsg_seq, + IPCTNL_MSG_CT_NEW, + 1, ct) < 0) + goto out; + *id = ct->id; + + memset(&ct->counters, 0, sizeof(ct->counters)); + } + } +out: + write_unlock_bh(&nf_conntrack_lock); + + DEBUGP("leaving, last bucket=%lu id=%u\n", cb->args[0], *id); + + return skb->len; +} +#endif + +static inline int +ctnetlink_parse_tuple_ip(struct nfattr *attr, struct nf_conntrack_tuple *tuple) +{ + struct nfattr *tb[CTA_IP_MAX]; + struct nf_conntrack_l3proto *l3proto; + int ret = 0; + + DEBUGP("entered %s\n", __FUNCTION__); + + nfattr_parse_nested(tb, CTA_IP_MAX, attr); + + l3proto = nf_ct_l3proto_find_get(tuple->src.l3num); + + if (likely(l3proto->nfattr_to_tuple)) + ret = l3proto->nfattr_to_tuple(tb, tuple); + + nf_ct_l3proto_put(l3proto); + + DEBUGP("leaving\n"); + + return ret; +} + +static const size_t cta_min_proto[CTA_PROTO_MAX] = { + [CTA_PROTO_NUM-1] = sizeof(u_int8_t), +}; + +static inline int +ctnetlink_parse_tuple_proto(struct nfattr *attr, + struct nf_conntrack_tuple *tuple) +{ + struct nfattr *tb[CTA_PROTO_MAX]; + struct nf_conntrack_protocol *proto; + int ret = 0; + + DEBUGP("entered %s\n", __FUNCTION__); + + nfattr_parse_nested(tb, CTA_PROTO_MAX, attr); + + if (nfattr_bad_size(tb, CTA_PROTO_MAX, cta_min_proto)) + return -EINVAL; + + if (!tb[CTA_PROTO_NUM-1]) + return -EINVAL; + tuple->dst.protonum = *(u_int8_t *)NFA_DATA(tb[CTA_PROTO_NUM-1]); + + proto = nf_ct_proto_find_get(tuple->src.l3num, tuple->dst.protonum); + + if (likely(proto->nfattr_to_tuple)) + ret = proto->nfattr_to_tuple(tb, tuple); + + nf_ct_proto_put(proto); + + return ret; +} + +static inline int +ctnetlink_parse_tuple(struct nfattr *cda[], struct nf_conntrack_tuple *tuple, + enum ctattr_tuple type, u_int8_t l3num) +{ + struct nfattr *tb[CTA_TUPLE_MAX]; + int err; + + DEBUGP("entered %s\n", __FUNCTION__); + + memset(tuple, 0, sizeof(*tuple)); + + nfattr_parse_nested(tb, CTA_TUPLE_MAX, cda[type-1]); + + if (!tb[CTA_TUPLE_IP-1]) + return -EINVAL; + + tuple->src.l3num = l3num; + + err = ctnetlink_parse_tuple_ip(tb[CTA_TUPLE_IP-1], tuple); + if (err < 0) + return err; + + if (!tb[CTA_TUPLE_PROTO-1]) + return -EINVAL; + + err = ctnetlink_parse_tuple_proto(tb[CTA_TUPLE_PROTO-1], tuple); + if (err < 0) + return err; + + /* orig and expect tuples get DIR_ORIGINAL */ + if (type == CTA_TUPLE_REPLY) + tuple->dst.dir = IP_CT_DIR_REPLY; + else + tuple->dst.dir = IP_CT_DIR_ORIGINAL; + + NF_CT_DUMP_TUPLE(tuple); + + DEBUGP("leaving\n"); + + return 0; +} + +#ifdef CONFIG_IP_NF_NAT_NEEDED +static const size_t cta_min_protonat[CTA_PROTONAT_MAX] = { + [CTA_PROTONAT_PORT_MIN-1] = sizeof(u_int16_t), + [CTA_PROTONAT_PORT_MAX-1] = sizeof(u_int16_t), +}; + +static int ctnetlink_parse_nat_proto(struct nfattr *attr, + const struct nf_conn *ct, + struct ip_nat_range *range) +{ + struct nfattr *tb[CTA_PROTONAT_MAX]; + struct ip_nat_protocol *npt; + + DEBUGP("entered %s\n", __FUNCTION__); + + nfattr_parse_nested(tb, CTA_PROTONAT_MAX, attr); + + if (nfattr_bad_size(tb, CTA_PROTONAT_MAX, cta_min_protonat)) + return -EINVAL; + + npt = ip_nat_proto_find_get(ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple.dst.protonum); + + if (!npt->nfattr_to_range) { + ip_nat_proto_put(npt); + return 0; + } + + /* nfattr_to_range returns 1 if it parsed, 0 if not, neg. on error */ + if (npt->nfattr_to_range(tb, range) > 0) + range->flags |= IP_NAT_RANGE_PROTO_SPECIFIED; + + ip_nat_proto_put(npt); + + DEBUGP("leaving\n"); + return 0; +} + +static const size_t cta_min_nat[CTA_NAT_MAX] = { + [CTA_NAT_MINIP-1] = sizeof(u_int32_t), + [CTA_NAT_MAXIP-1] = sizeof(u_int32_t), +}; + +static inline int +ctnetlink_parse_nat(struct nfattr *cda[], + const struct nf_conn *ct, struct ip_nat_range *range) +{ + struct nfattr *tb[CTA_NAT_MAX]; + int err; + + DEBUGP("entered %s\n", __FUNCTION__); + + memset(range, 0, sizeof(*range)); + + nfattr_parse_nested(tb, CTA_NAT_MAX, cda[CTA_NAT-1]); + + if (nfattr_bad_size(tb, CTA_NAT_MAX, cta_min_nat)) + return -EINVAL; + + if (tb[CTA_NAT_MINIP-1]) + range->min_ip = *(u_int32_t *)NFA_DATA(tb[CTA_NAT_MINIP-1]); + + if (!tb[CTA_NAT_MAXIP-1]) + range->max_ip = range->min_ip; + else + range->max_ip = *(u_int32_t *)NFA_DATA(tb[CTA_NAT_MAXIP-1]); + + if (range->min_ip) + range->flags |= IP_NAT_RANGE_MAP_IPS; + + if (!tb[CTA_NAT_PROTO-1]) + return 0; + + err = ctnetlink_parse_nat_proto(tb[CTA_NAT_PROTO-1], ct, range); + if (err < 0) + return err; + + DEBUGP("leaving\n"); + return 0; +} +#endif + +static inline int +ctnetlink_parse_help(struct nfattr *attr, char **helper_name) +{ + struct nfattr *tb[CTA_HELP_MAX]; + + DEBUGP("entered %s\n", __FUNCTION__); + + nfattr_parse_nested(tb, CTA_HELP_MAX, attr); + + if (!tb[CTA_HELP_NAME-1]) + return -EINVAL; + + *helper_name = NFA_DATA(tb[CTA_HELP_NAME-1]); + + return 0; +} + +static const size_t cta_min[CTA_MAX] = { + [CTA_STATUS-1] = sizeof(u_int32_t), + [CTA_TIMEOUT-1] = sizeof(u_int32_t), + [CTA_MARK-1] = sizeof(u_int32_t), + [CTA_USE-1] = sizeof(u_int32_t), + [CTA_ID-1] = sizeof(u_int32_t) +}; + +static int +ctnetlink_del_conntrack(struct sock *ctnl, struct sk_buff *skb, + struct nlmsghdr *nlh, struct nfattr *cda[], int *errp) +{ + struct nf_conntrack_tuple_hash *h; + struct nf_conntrack_tuple tuple; + struct nf_conn *ct; + struct nfgenmsg *nfmsg = NLMSG_DATA(nlh); + u_int8_t u3 = nfmsg->nfgen_family; + int err = 0; + + DEBUGP("entered %s\n", __FUNCTION__); + + if (nfattr_bad_size(cda, CTA_MAX, cta_min)) + return -EINVAL; + + if (cda[CTA_TUPLE_ORIG-1]) + err = ctnetlink_parse_tuple(cda, &tuple, CTA_TUPLE_ORIG, u3); + else if (cda[CTA_TUPLE_REPLY-1]) + err = ctnetlink_parse_tuple(cda, &tuple, CTA_TUPLE_REPLY, u3); + else { + /* Flush the whole table */ + nf_conntrack_flush(); + return 0; + } + + if (err < 0) + return err; + + h = nf_conntrack_find_get(&tuple, NULL); + if (!h) { + DEBUGP("tuple not found in conntrack hash\n"); + return -ENOENT; + } + + ct = nf_ct_tuplehash_to_ctrack(h); + + if (cda[CTA_ID-1]) { + u_int32_t id = ntohl(*(u_int32_t *)NFA_DATA(cda[CTA_ID-1])); + if (ct->id != id) { + nf_ct_put(ct); + return -ENOENT; + } + } + if (del_timer(&ct->timeout)) + ct->timeout.function((unsigned long)ct); + + nf_ct_put(ct); + DEBUGP("leaving\n"); + + return 0; +} + +static int +ctnetlink_get_conntrack(struct sock *ctnl, struct sk_buff *skb, + struct nlmsghdr *nlh, struct nfattr *cda[], int *errp) +{ + struct nf_conntrack_tuple_hash *h; + struct nf_conntrack_tuple tuple; + struct nf_conn *ct; + struct sk_buff *skb2 = NULL; + struct nfgenmsg *nfmsg = NLMSG_DATA(nlh); + u_int8_t u3 = nfmsg->nfgen_family; + int err = 0; + + DEBUGP("entered %s\n", __FUNCTION__); + + if (nlh->nlmsg_flags & NLM_F_DUMP) { + u32 rlen; + + if (nfmsg->nfgen_family != AF_INET) + return -EAFNOSUPPORT; + + if (NFNL_MSG_TYPE(nlh->nlmsg_type) == + IPCTNL_MSG_CT_GET_CTRZERO) { +#ifdef CONFIG_NF_CT_ACCT + if ((*errp = netlink_dump_start(ctnl, skb, nlh, + ctnetlink_dump_table_w, + ctnetlink_done)) != 0) + return -EINVAL; +#else + return -ENOTSUPP; +#endif + } else { + if ((*errp = netlink_dump_start(ctnl, skb, nlh, + ctnetlink_dump_table, + ctnetlink_done)) != 0) + return -EINVAL; + } + + rlen = NLMSG_ALIGN(nlh->nlmsg_len); + if (rlen > skb->len) + rlen = skb->len; + skb_pull(skb, rlen); + return 0; + } + + if (nfattr_bad_size(cda, CTA_MAX, cta_min)) + return -EINVAL; + + if (cda[CTA_TUPLE_ORIG-1]) + err = ctnetlink_parse_tuple(cda, &tuple, CTA_TUPLE_ORIG, u3); + else if (cda[CTA_TUPLE_REPLY-1]) + err = ctnetlink_parse_tuple(cda, &tuple, CTA_TUPLE_REPLY, u3); + else + return -EINVAL; + + if (err < 0) + return err; + + h = nf_conntrack_find_get(&tuple, NULL); + if (!h) { + DEBUGP("tuple not found in conntrack hash"); + return -ENOENT; + } + DEBUGP("tuple found\n"); + ct = nf_ct_tuplehash_to_ctrack(h); + + err = -ENOMEM; + skb2 = alloc_skb(NLMSG_GOODSIZE, GFP_KERNEL); + if (!skb2) { + nf_ct_put(ct); + return -ENOMEM; + } + NETLINK_CB(skb2).dst_pid = NETLINK_CB(skb).pid; + + err = ctnetlink_fill_info(skb2, NETLINK_CB(skb).pid, nlh->nlmsg_seq, + IPCTNL_MSG_CT_NEW, 1, ct); + nf_ct_put(ct); + if (err <= 0) + goto free; + + err = netlink_unicast(ctnl, skb2, NETLINK_CB(skb).pid, MSG_DONTWAIT); + if (err < 0) + goto out; + + DEBUGP("leaving\n"); + return 0; + +free: + kfree_skb(skb2); +out: + return err; +} + +static inline int +ctnetlink_change_status(struct nf_conn *ct, struct nfattr *cda[]) +{ + unsigned long d; + unsigned status = ntohl(*(u_int32_t *)NFA_DATA(cda[CTA_STATUS-1])); + d = ct->status ^ status; + + if (d & (IPS_EXPECTED|IPS_CONFIRMED|IPS_DYING)) + /* unchangeable */ + return -EINVAL; + + if (d & IPS_SEEN_REPLY && !(status & IPS_SEEN_REPLY)) + /* SEEN_REPLY bit can only be set */ + return -EINVAL; + + + if (d & IPS_ASSURED && !(status & IPS_ASSURED)) + /* ASSURED bit can only be set */ + return -EINVAL; + + if (cda[CTA_NAT-1]) { +#ifndef CONFIG_IP_NF_NAT_NEEDED + return -EINVAL; +#else + unsigned int hooknum; + struct ip_nat_range range; + + if (ctnetlink_parse_nat(cda, ct, &range) < 0) + return -EINVAL; + + DEBUGP("NAT: %u.%u.%u.%u-%u.%u.%u.%u:%u-%u\n", + NIPQUAD(range.min_ip), NIPQUAD(range.max_ip), + htons(range.min.all), htons(range.max.all)); + + /* This is tricky but it works. ip_nat_setup_info needs the + * hook number as parameter, so let's do the correct + * conversion and run away */ + if (status & IPS_SRC_NAT_DONE) + hooknum = NF_IP_POST_ROUTING; /* IP_NAT_MANIP_SRC */ + else if (status & IPS_DST_NAT_DONE) + hooknum = NF_IP_PRE_ROUTING; /* IP_NAT_MANIP_DST */ + else + return -EINVAL; /* Missing NAT flags */ + + DEBUGP("NAT status: %lu\n", + status & (IPS_NAT_MASK | IPS_NAT_DONE_MASK)); + + if (ip_nat_initialized(ct, HOOK2MANIP(hooknum))) + return -EEXIST; + ip_nat_setup_info(ct, &range, hooknum); + + DEBUGP("NAT status after setup_info: %lu\n", + ct->status & (IPS_NAT_MASK | IPS_NAT_DONE_MASK)); +#endif + } + + /* Be careful here, modifying NAT bits can screw up things, + * so don't let users modify them directly if they don't pass + * ip_nat_range. */ + ct->status |= status & ~(IPS_NAT_DONE_MASK | IPS_NAT_MASK); + return 0; +} + + +static inline int +ctnetlink_change_helper(struct nf_conn *ct, struct nfattr *cda[]) +{ + struct nf_conntrack_helper *helper; + char *helpname; + int err; + + DEBUGP("entered %s\n", __FUNCTION__); + + /* don't change helper of sibling connections */ + if (ct->master) + return -EINVAL; + + err = ctnetlink_parse_help(cda[CTA_HELP-1], &helpname); + if (err < 0) + return err; + + helper = __nf_conntrack_helper_find_byname(helpname); + if (!helper) { + if (!strcmp(helpname, "")) + helper = NULL; + else + return -EINVAL; + } + + if (ct->helper) { + if (!helper) { + /* we had a helper before ... */ + nf_ct_remove_expectations(ct); + ct->helper = NULL; + } else { + /* need to zero data of old helper */ + memset(&ct->help, 0, sizeof(ct->help)); + } + } + + ct->helper = helper; + + return 0; +} + +static inline int +ctnetlink_change_timeout(struct nf_conn *ct, struct nfattr *cda[]) +{ + u_int32_t timeout = ntohl(*(u_int32_t *)NFA_DATA(cda[CTA_TIMEOUT-1])); + + if (!del_timer(&ct->timeout)) + return -ETIME; + + ct->timeout.expires = jiffies + timeout * HZ; + add_timer(&ct->timeout); + + return 0; +} + +static inline int +ctnetlink_change_protoinfo(struct nf_conn *ct, struct nfattr *cda[]) +{ + struct nfattr *tb[CTA_PROTOINFO_MAX], *attr = cda[CTA_PROTOINFO-1]; + struct nf_conntrack_protocol *proto; + u_int16_t npt = ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple.dst.protonum; + u_int16_t l3num = ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple.src.l3num; + int err = 0; + + nfattr_parse_nested(tb, CTA_PROTOINFO_MAX, attr); + + proto = nf_ct_proto_find_get(l3num, npt); + + if (proto->from_nfattr) + err = proto->from_nfattr(tb, ct); + nf_ct_proto_put(proto); + + return err; +} + +static int +ctnetlink_change_conntrack(struct nf_conn *ct, struct nfattr *cda[]) +{ + int err; + + DEBUGP("entered %s\n", __FUNCTION__); + + if (cda[CTA_HELP-1]) { + err = ctnetlink_change_helper(ct, cda); + if (err < 0) + return err; + } + + if (cda[CTA_TIMEOUT-1]) { + err = ctnetlink_change_timeout(ct, cda); + if (err < 0) + return err; + } + + if (cda[CTA_STATUS-1]) { + err = ctnetlink_change_status(ct, cda); + if (err < 0) + return err; + } + + if (cda[CTA_PROTOINFO-1]) { + err = ctnetlink_change_protoinfo(ct, cda); + if (err < 0) + return err; + } + +#if defined(CONFIG_IP_NF_CONNTRACK_MARK) + if (cda[CTA_MARK-1]) + ct->mark = ntohl(*(u_int32_t *)NFA_DATA(cda[CTA_MARK-1])); +#endif + + DEBUGP("all done\n"); + return 0; +} + +static int +ctnetlink_create_conntrack(struct nfattr *cda[], + struct nf_conntrack_tuple *otuple, + struct nf_conntrack_tuple *rtuple) +{ + struct nf_conn *ct; + int err = -EINVAL; + + DEBUGP("entered %s\n", __FUNCTION__); + + ct = nf_conntrack_alloc(otuple, rtuple); + if (ct == NULL || IS_ERR(ct)) + return -ENOMEM; + + if (!cda[CTA_TIMEOUT-1]) + goto err; + ct->timeout.expires = ntohl(*(u_int32_t *)NFA_DATA(cda[CTA_TIMEOUT-1])); + + ct->timeout.expires = jiffies + ct->timeout.expires * HZ; + ct->status |= IPS_CONFIRMED; + + err = ctnetlink_change_status(ct, cda); + if (err < 0) + goto err; + + if (cda[CTA_PROTOINFO-1]) { + err = ctnetlink_change_protoinfo(ct, cda); + if (err < 0) + return err; + } + +#if defined(CONFIG_IP_NF_CONNTRACK_MARK) + if (cda[CTA_MARK-1]) + ct->mark = ntohl(*(u_int32_t *)NFA_DATA(cda[CTA_MARK-1])); +#endif + + ct->helper = nf_ct_helper_find_get(rtuple); + + add_timer(&ct->timeout); + nf_conntrack_hash_insert(ct); + + if (ct->helper) + nf_ct_helper_put(ct->helper); + + DEBUGP("conntrack with id %u inserted\n", ct->id); + return 0; + +err: + nf_conntrack_free(ct); + return err; +} + +static int +ctnetlink_new_conntrack(struct sock *ctnl, struct sk_buff *skb, + struct nlmsghdr *nlh, struct nfattr *cda[], int *errp) +{ + struct nf_conntrack_tuple otuple, rtuple; + struct nf_conntrack_tuple_hash *h = NULL; + struct nfgenmsg *nfmsg = NLMSG_DATA(nlh); + u_int8_t u3 = nfmsg->nfgen_family; + int err = 0; + + DEBUGP("entered %s\n", __FUNCTION__); + + if (nfattr_bad_size(cda, CTA_MAX, cta_min)) + return -EINVAL; + + if (cda[CTA_TUPLE_ORIG-1]) { + err = ctnetlink_parse_tuple(cda, &otuple, CTA_TUPLE_ORIG, u3); + if (err < 0) + return err; + } + + if (cda[CTA_TUPLE_REPLY-1]) { + err = ctnetlink_parse_tuple(cda, &rtuple, CTA_TUPLE_REPLY, u3); + if (err < 0) + return err; + } + + write_lock_bh(&nf_conntrack_lock); + if (cda[CTA_TUPLE_ORIG-1]) + h = __nf_conntrack_find(&otuple, NULL); + else if (cda[CTA_TUPLE_REPLY-1]) + h = __nf_conntrack_find(&rtuple, NULL); + + if (h == NULL) { + write_unlock_bh(&nf_conntrack_lock); + DEBUGP("no such conntrack, create new\n"); + err = -ENOENT; + if (nlh->nlmsg_flags & NLM_F_CREATE) + err = ctnetlink_create_conntrack(cda, &otuple, &rtuple); + return err; + } + /* implicit 'else' */ + + /* we only allow nat config for new conntracks */ + if (cda[CTA_NAT-1]) { + err = -EINVAL; + goto out_unlock; + } + + /* We manipulate the conntrack inside the global conntrack table lock, + * so there's no need to increase the refcount */ + DEBUGP("conntrack found\n"); + err = -EEXIST; + if (!(nlh->nlmsg_flags & NLM_F_EXCL)) + err = ctnetlink_change_conntrack(nf_ct_tuplehash_to_ctrack(h), cda); + +out_unlock: + write_unlock_bh(&nf_conntrack_lock); + return err; +} + +/*********************************************************************** + * EXPECT + ***********************************************************************/ + +static inline int +ctnetlink_exp_dump_tuple(struct sk_buff *skb, + const struct nf_conntrack_tuple *tuple, + enum ctattr_expect type) +{ + struct nfattr *nest_parms = NFA_NEST(skb, type); + + if (ctnetlink_dump_tuples(skb, tuple) < 0) + goto nfattr_failure; + + NFA_NEST_END(skb, nest_parms); + + return 0; + +nfattr_failure: + return -1; +} + +static inline int +ctnetlink_exp_dump_expect(struct sk_buff *skb, + const struct nf_conntrack_expect *exp) +{ + struct nf_conn *master = exp->master; + u_int32_t timeout = htonl((exp->timeout.expires - jiffies) / HZ); + u_int32_t id = htonl(exp->id); + + if (ctnetlink_exp_dump_tuple(skb, &exp->tuple, CTA_EXPECT_TUPLE) < 0) + goto nfattr_failure; + if (ctnetlink_exp_dump_tuple(skb, &exp->mask, CTA_EXPECT_MASK) < 0) + goto nfattr_failure; + if (ctnetlink_exp_dump_tuple(skb, + &master->tuplehash[IP_CT_DIR_ORIGINAL].tuple, + CTA_EXPECT_MASTER) < 0) + goto nfattr_failure; + + NFA_PUT(skb, CTA_EXPECT_TIMEOUT, sizeof(timeout), &timeout); + NFA_PUT(skb, CTA_EXPECT_ID, sizeof(u_int32_t), &id); + + return 0; + +nfattr_failure: + return -1; +} + +static int +ctnetlink_exp_fill_info(struct sk_buff *skb, u32 pid, u32 seq, + int event, + int nowait, + const struct nf_conntrack_expect *exp) +{ + struct nlmsghdr *nlh; + struct nfgenmsg *nfmsg; + unsigned char *b; + + b = skb->tail; + + event |= NFNL_SUBSYS_CTNETLINK_EXP << 8; + nlh = NLMSG_PUT(skb, pid, seq, event, sizeof(struct nfgenmsg)); + nfmsg = NLMSG_DATA(nlh); + + nlh->nlmsg_flags = (nowait && pid) ? NLM_F_MULTI : 0; + nfmsg->nfgen_family = exp->tuple.src.l3num; + nfmsg->version = NFNETLINK_V0; + nfmsg->res_id = 0; + + if (ctnetlink_exp_dump_expect(skb, exp) < 0) + goto nfattr_failure; + + nlh->nlmsg_len = skb->tail - b; + return skb->len; + +nlmsg_failure: +nfattr_failure: + skb_trim(skb, b - skb->data); + return -1; +} + +#ifdef CONFIG_NF_CONNTRACK_EVENTS +static int ctnetlink_expect_event(struct notifier_block *this, + unsigned long events, void *ptr) +{ + struct nlmsghdr *nlh; + struct nfgenmsg *nfmsg; + struct nf_conntrack_expect *exp = (struct nf_conntrack_expect *)ptr; + struct sk_buff *skb; + unsigned int type; + unsigned char *b; + int flags = 0; + + if (events & IPEXP_NEW) { + type = IPCTNL_MSG_EXP_NEW; + flags = NLM_F_CREATE|NLM_F_EXCL; + } else + return NOTIFY_DONE; + + skb = alloc_skb(NLMSG_GOODSIZE, GFP_ATOMIC); + if (!skb) + return NOTIFY_DONE; + + b = skb->tail; + + type |= NFNL_SUBSYS_CTNETLINK << 8; + nlh = NLMSG_PUT(skb, 0, 0, type, sizeof(struct nfgenmsg)); + nfmsg = NLMSG_DATA(nlh); + + nlh->nlmsg_flags = flags; + nfmsg->nfgen_family = exp->tuple.src.l3num; + nfmsg->version = NFNETLINK_V0; + nfmsg->res_id = 0; + + if (ctnetlink_exp_dump_expect(skb, exp) < 0) + goto nfattr_failure; + + nlh->nlmsg_len = skb->tail - b; + nfnetlink_send(skb, 0, NFNLGRP_CONNTRACK_EXP_NEW, 0); + return NOTIFY_DONE; + +nlmsg_failure: +nfattr_failure: + kfree_skb(skb); + return NOTIFY_DONE; +} +#endif + +static int +ctnetlink_exp_dump_table(struct sk_buff *skb, struct netlink_callback *cb) +{ + struct nf_conntrack_expect *exp = NULL; + struct list_head *i; + u_int32_t *id = (u_int32_t *) &cb->args[0]; + + DEBUGP("entered %s, last id=%llu\n", __FUNCTION__, *id); + + read_lock_bh(&nf_conntrack_lock); + list_for_each_prev(i, &nf_conntrack_expect_list) { + exp = (struct nf_conntrack_expect *) i; + if (exp->id <= *id) + continue; + if (ctnetlink_exp_fill_info(skb, NETLINK_CB(cb->skb).pid, + cb->nlh->nlmsg_seq, + IPCTNL_MSG_EXP_NEW, + 1, exp) < 0) + goto out; + *id = exp->id; + } +out: + read_unlock_bh(&nf_conntrack_lock); + + DEBUGP("leaving, last id=%llu\n", *id); + + return skb->len; +} + +static const size_t cta_min_exp[CTA_EXPECT_MAX] = { + [CTA_EXPECT_TIMEOUT-1] = sizeof(u_int32_t), + [CTA_EXPECT_ID-1] = sizeof(u_int32_t) +}; + +static int +ctnetlink_get_expect(struct sock *ctnl, struct sk_buff *skb, + struct nlmsghdr *nlh, struct nfattr *cda[], int *errp) +{ + struct nf_conntrack_tuple tuple; + struct nf_conntrack_expect *exp; + struct sk_buff *skb2; + struct nfgenmsg *nfmsg = NLMSG_DATA(nlh); + u_int8_t u3 = nfmsg->nfgen_family; + int err = 0; + + DEBUGP("entered %s\n", __FUNCTION__); + + if (nfattr_bad_size(cda, CTA_EXPECT_MAX, cta_min_exp)) + return -EINVAL; + + if (nlh->nlmsg_flags & NLM_F_DUMP) { + u32 rlen; + + if (nfmsg->nfgen_family != AF_INET) + return -EAFNOSUPPORT; + + if ((*errp = netlink_dump_start(ctnl, skb, nlh, + ctnetlink_exp_dump_table, + ctnetlink_done)) != 0) + return -EINVAL; + rlen = NLMSG_ALIGN(nlh->nlmsg_len); + if (rlen > skb->len) + rlen = skb->len; + skb_pull(skb, rlen); + return 0; + } + + if (cda[CTA_EXPECT_MASTER-1]) + err = ctnetlink_parse_tuple(cda, &tuple, CTA_EXPECT_MASTER, u3); + else + return -EINVAL; + + if (err < 0) + return err; + + exp = nf_conntrack_expect_find(&tuple); + if (!exp) + return -ENOENT; + + if (cda[CTA_EXPECT_ID-1]) { + u_int32_t id = *(u_int32_t *)NFA_DATA(cda[CTA_EXPECT_ID-1]); + if (exp->id != ntohl(id)) { + nf_conntrack_expect_put(exp); + return -ENOENT; + } + } + + err = -ENOMEM; + skb2 = alloc_skb(NLMSG_GOODSIZE, GFP_KERNEL); + if (!skb2) + goto out; + NETLINK_CB(skb2).dst_pid = NETLINK_CB(skb).pid; + + err = ctnetlink_exp_fill_info(skb2, NETLINK_CB(skb).pid, + nlh->nlmsg_seq, IPCTNL_MSG_EXP_NEW, + 1, exp); + if (err <= 0) + goto free; + + nf_conntrack_expect_put(exp); + + return netlink_unicast(ctnl, skb2, NETLINK_CB(skb).pid, MSG_DONTWAIT); + +free: + kfree_skb(skb2); +out: + nf_conntrack_expect_put(exp); + return err; +} + +static int +ctnetlink_del_expect(struct sock *ctnl, struct sk_buff *skb, + struct nlmsghdr *nlh, struct nfattr *cda[], int *errp) +{ + struct nf_conntrack_expect *exp, *tmp; + struct nf_conntrack_tuple tuple; + struct nf_conntrack_helper *h; + struct nfgenmsg *nfmsg = NLMSG_DATA(nlh); + u_int8_t u3 = nfmsg->nfgen_family; + int err; + + if (nfattr_bad_size(cda, CTA_EXPECT_MAX, cta_min_exp)) + return -EINVAL; + + if (cda[CTA_EXPECT_TUPLE-1]) { + /* delete a single expect by tuple */ + err = ctnetlink_parse_tuple(cda, &tuple, CTA_EXPECT_TUPLE, u3); + if (err < 0) + return err; + + /* bump usage count to 2 */ + exp = nf_conntrack_expect_find(&tuple); + if (!exp) + return -ENOENT; + + if (cda[CTA_EXPECT_ID-1]) { + u_int32_t id = + *(u_int32_t *)NFA_DATA(cda[CTA_EXPECT_ID-1]); + if (exp->id != ntohl(id)) { + nf_conntrack_expect_put(exp); + return -ENOENT; + } + } + + /* after list removal, usage count == 1 */ + nf_conntrack_unexpect_related(exp); + /* have to put what we 'get' above. + * after this line usage count == 0 */ + nf_conntrack_expect_put(exp); + } else if (cda[CTA_EXPECT_HELP_NAME-1]) { + char *name = NFA_DATA(cda[CTA_EXPECT_HELP_NAME-1]); + + /* delete all expectations for this helper */ + write_lock_bh(&nf_conntrack_lock); + h = __nf_conntrack_helper_find_byname(name); + if (!h) { + write_unlock_bh(&nf_conntrack_lock); + return -EINVAL; + } + list_for_each_entry_safe(exp, tmp, &nf_conntrack_expect_list, + list) { + if (exp->master->helper == h + && del_timer(&exp->timeout)) { + nf_ct_unlink_expect(exp); + nf_conntrack_expect_put(exp); + } + } + write_unlock_bh(&nf_conntrack_lock); + } else { + /* This basically means we have to flush everything*/ + write_lock_bh(&nf_conntrack_lock); + list_for_each_entry_safe(exp, tmp, &nf_conntrack_expect_list, + list) { + if (del_timer(&exp->timeout)) { + nf_ct_unlink_expect(exp); + nf_conntrack_expect_put(exp); + } + } + write_unlock_bh(&nf_conntrack_lock); + } + + return 0; +} +static int +ctnetlink_change_expect(struct nf_conntrack_expect *x, struct nfattr *cda[]) +{ + return -EOPNOTSUPP; +} + +static int +ctnetlink_create_expect(struct nfattr *cda[], u_int8_t u3) +{ + struct nf_conntrack_tuple tuple, mask, master_tuple; + struct nf_conntrack_tuple_hash *h = NULL; + struct nf_conntrack_expect *exp; + struct nf_conn *ct; + int err = 0; + + DEBUGP("entered %s\n", __FUNCTION__); + + /* caller guarantees that those three CTA_EXPECT_* exist */ + err = ctnetlink_parse_tuple(cda, &tuple, CTA_EXPECT_TUPLE, u3); + if (err < 0) + return err; + err = ctnetlink_parse_tuple(cda, &mask, CTA_EXPECT_MASK, u3); + if (err < 0) + return err; + err = ctnetlink_parse_tuple(cda, &master_tuple, CTA_EXPECT_MASTER, u3); + if (err < 0) + return err; + + /* Look for master conntrack of this expectation */ + h = nf_conntrack_find_get(&master_tuple, NULL); + if (!h) + return -ENOENT; + ct = nf_ct_tuplehash_to_ctrack(h); + + if (!ct->helper) { + /* such conntrack hasn't got any helper, abort */ + err = -EINVAL; + goto out; + } + + exp = nf_conntrack_expect_alloc(ct); + if (!exp) { + err = -ENOMEM; + goto out; + } + + exp->expectfn = NULL; + exp->flags = 0; + exp->master = ct; + memcpy(&exp->tuple, &tuple, sizeof(struct nf_conntrack_tuple)); + memcpy(&exp->mask, &mask, sizeof(struct nf_conntrack_tuple)); + + err = nf_conntrack_expect_related(exp); + nf_conntrack_expect_put(exp); + +out: + nf_ct_put(nf_ct_tuplehash_to_ctrack(h)); + return err; +} + +static int +ctnetlink_new_expect(struct sock *ctnl, struct sk_buff *skb, + struct nlmsghdr *nlh, struct nfattr *cda[], int *errp) +{ + struct nf_conntrack_tuple tuple; + struct nf_conntrack_expect *exp; + struct nfgenmsg *nfmsg = NLMSG_DATA(nlh); + u_int8_t u3 = nfmsg->nfgen_family; + int err = 0; + + DEBUGP("entered %s\n", __FUNCTION__); + + if (nfattr_bad_size(cda, CTA_EXPECT_MAX, cta_min_exp)) + return -EINVAL; + + if (!cda[CTA_EXPECT_TUPLE-1] + || !cda[CTA_EXPECT_MASK-1] + || !cda[CTA_EXPECT_MASTER-1]) + return -EINVAL; + + err = ctnetlink_parse_tuple(cda, &tuple, CTA_EXPECT_TUPLE, u3); + if (err < 0) + return err; + + write_lock_bh(&nf_conntrack_lock); + exp = __nf_conntrack_expect_find(&tuple); + + if (!exp) { + write_unlock_bh(&nf_conntrack_lock); + err = -ENOENT; + if (nlh->nlmsg_flags & NLM_F_CREATE) + err = ctnetlink_create_expect(cda, u3); + return err; + } + + err = -EEXIST; + if (!(nlh->nlmsg_flags & NLM_F_EXCL)) + err = ctnetlink_change_expect(exp, cda); + write_unlock_bh(&nf_conntrack_lock); + + DEBUGP("leaving\n"); + + return err; +} + +#ifdef CONFIG_NF_CONNTRACK_EVENTS +static struct notifier_block ctnl_notifier = { + .notifier_call = ctnetlink_conntrack_event, +}; + +static struct notifier_block ctnl_notifier_exp = { + .notifier_call = ctnetlink_expect_event, +}; +#endif + +static struct nfnl_callback ctnl_cb[IPCTNL_MSG_MAX] = { + [IPCTNL_MSG_CT_NEW] = { .call = ctnetlink_new_conntrack, + .attr_count = CTA_MAX, }, + [IPCTNL_MSG_CT_GET] = { .call = ctnetlink_get_conntrack, + .attr_count = CTA_MAX, }, + [IPCTNL_MSG_CT_DELETE] = { .call = ctnetlink_del_conntrack, + .attr_count = CTA_MAX, }, + [IPCTNL_MSG_CT_GET_CTRZERO] = { .call = ctnetlink_get_conntrack, + .attr_count = CTA_MAX, }, +}; + +static struct nfnl_callback ctnl_exp_cb[IPCTNL_MSG_EXP_MAX] = { + [IPCTNL_MSG_EXP_GET] = { .call = ctnetlink_get_expect, + .attr_count = CTA_EXPECT_MAX, }, + [IPCTNL_MSG_EXP_NEW] = { .call = ctnetlink_new_expect, + .attr_count = CTA_EXPECT_MAX, }, + [IPCTNL_MSG_EXP_DELETE] = { .call = ctnetlink_del_expect, + .attr_count = CTA_EXPECT_MAX, }, +}; + +static struct nfnetlink_subsystem ctnl_subsys = { + .name = "conntrack", + .subsys_id = NFNL_SUBSYS_CTNETLINK, + .cb_count = IPCTNL_MSG_MAX, + .cb = ctnl_cb, +}; + +static struct nfnetlink_subsystem ctnl_exp_subsys = { + .name = "conntrack_expect", + .subsys_id = NFNL_SUBSYS_CTNETLINK_EXP, + .cb_count = IPCTNL_MSG_EXP_MAX, + .cb = ctnl_exp_cb, +}; + +MODULE_ALIAS_NFNL_SUBSYS(NFNL_SUBSYS_CTNETLINK); + +static int __init ctnetlink_init(void) +{ + int ret; + + printk("ctnetlink v%s: registering with nfnetlink.\n", version); + ret = nfnetlink_subsys_register(&ctnl_subsys); + if (ret < 0) { + printk("ctnetlink_init: cannot register with nfnetlink.\n"); + goto err_out; + } + + ret = nfnetlink_subsys_register(&ctnl_exp_subsys); + if (ret < 0) { + printk("ctnetlink_init: cannot register exp with nfnetlink.\n"); + goto err_unreg_subsys; + } + +#ifdef CONFIG_NF_CONNTRACK_EVENTS + ret = nf_conntrack_register_notifier(&ctnl_notifier); + if (ret < 0) { + printk("ctnetlink_init: cannot register notifier.\n"); + goto err_unreg_exp_subsys; + } + + ret = nf_conntrack_expect_register_notifier(&ctnl_notifier_exp); + if (ret < 0) { + printk("ctnetlink_init: cannot expect register notifier.\n"); + goto err_unreg_notifier; + } +#endif + + return 0; + +#ifdef CONFIG_NF_CONNTRACK_EVENTS +err_unreg_notifier: + nf_conntrack_unregister_notifier(&ctnl_notifier); +err_unreg_exp_subsys: + nfnetlink_subsys_unregister(&ctnl_exp_subsys); +#endif +err_unreg_subsys: + nfnetlink_subsys_unregister(&ctnl_subsys); +err_out: + return ret; +} + +static void __exit ctnetlink_exit(void) +{ + printk("ctnetlink: unregistering from nfnetlink.\n"); + +#ifdef CONFIG_NF_CONNTRACK_EVENTS + nf_conntrack_unregister_notifier(&ctnl_notifier_exp); + nf_conntrack_unregister_notifier(&ctnl_notifier); +#endif + + nfnetlink_subsys_unregister(&ctnl_exp_subsys); + nfnetlink_subsys_unregister(&ctnl_subsys); + return; +} + +module_init(ctnetlink_init); +module_exit(ctnetlink_exit); diff --git a/net/netfilter/nf_conntrack_proto_tcp.c b/net/netfilter/nf_conntrack_proto_tcp.c index 6035633d8225..6167137a5cb5 100644 --- a/net/netfilter/nf_conntrack_proto_tcp.c +++ b/net/netfilter/nf_conntrack_proto_tcp.c @@ -1147,6 +1147,63 @@ static int tcp_new(struct nf_conn *conntrack, receiver->td_scale); return 1; } + +#if defined(CONFIG_NF_CT_NETLINK) || \ + defined(CONFIG_NF_CT_NETLINK_MODULE) + +#include +#include + +static int tcp_to_nfattr(struct sk_buff *skb, struct nfattr *nfa, + const struct nf_conn *ct) +{ + struct nfattr *nest_parms; + + read_lock_bh(&tcp_lock); + nest_parms = NFA_NEST(skb, CTA_PROTOINFO_TCP); + NFA_PUT(skb, CTA_PROTOINFO_TCP_STATE, sizeof(u_int8_t), + &ct->proto.tcp.state); + read_unlock_bh(&tcp_lock); + + NFA_NEST_END(skb, nest_parms); + + return 0; + +nfattr_failure: + read_unlock_bh(&tcp_lock); + return -1; +} + +static const size_t cta_min_tcp[CTA_PROTOINFO_TCP_MAX] = { + [CTA_PROTOINFO_TCP_STATE-1] = sizeof(u_int8_t), +}; + +static int nfattr_to_tcp(struct nfattr *cda[], struct nf_conn *ct) +{ + struct nfattr *attr = cda[CTA_PROTOINFO_TCP-1]; + struct nfattr *tb[CTA_PROTOINFO_TCP_MAX]; + + /* updates could not contain anything about the private + * protocol info, in that case skip the parsing */ + if (!attr) + return 0; + + nfattr_parse_nested(tb, CTA_PROTOINFO_TCP_MAX, attr); + + if (nfattr_bad_size(tb, CTA_PROTOINFO_TCP_MAX, cta_min_tcp)) + return -EINVAL; + + if (!tb[CTA_PROTOINFO_TCP_STATE-1]) + return -EINVAL; + + write_lock_bh(&tcp_lock); + ct->proto.tcp.state = + *(u_int8_t *)NFA_DATA(tb[CTA_PROTOINFO_TCP_STATE-1]); + write_unlock_bh(&tcp_lock); + + return 0; +} +#endif struct nf_conntrack_protocol nf_conntrack_protocol_tcp4 = { @@ -1160,6 +1217,13 @@ struct nf_conntrack_protocol nf_conntrack_protocol_tcp4 = .packet = tcp_packet, .new = tcp_new, .error = tcp_error4, +#if defined(CONFIG_NF_CT_NETLINK) || \ + defined(CONFIG_NF_CT_NETLINK_MODULE) + .to_nfattr = tcp_to_nfattr, + .from_nfattr = nfattr_to_tcp, + .tuple_to_nfattr = nf_ct_port_tuple_to_nfattr, + .nfattr_to_tuple = nf_ct_port_nfattr_to_tuple, +#endif }; struct nf_conntrack_protocol nf_conntrack_protocol_tcp6 = @@ -1174,6 +1238,13 @@ struct nf_conntrack_protocol nf_conntrack_protocol_tcp6 = .packet = tcp_packet, .new = tcp_new, .error = tcp_error6, +#if defined(CONFIG_NF_CT_NETLINK) || \ + defined(CONFIG_NF_CT_NETLINK_MODULE) + .to_nfattr = tcp_to_nfattr, + .from_nfattr = nfattr_to_tcp, + .tuple_to_nfattr = nf_ct_port_tuple_to_nfattr, + .nfattr_to_tuple = nf_ct_port_nfattr_to_tuple, +#endif }; EXPORT_SYMBOL(nf_conntrack_protocol_tcp4); diff --git a/net/netfilter/nf_conntrack_proto_udp.c b/net/netfilter/nf_conntrack_proto_udp.c index 3cae7ce420dd..1a592a556182 100644 --- a/net/netfilter/nf_conntrack_proto_udp.c +++ b/net/netfilter/nf_conntrack_proto_udp.c @@ -196,6 +196,11 @@ struct nf_conntrack_protocol nf_conntrack_protocol_udp4 = .packet = udp_packet, .new = udp_new, .error = udp_error4, +#if defined(CONFIG_NF_CT_NETLINK) || \ + defined(CONFIG_NF_CT_NETLINK_MODULE) + .tuple_to_nfattr = nf_ct_port_tuple_to_nfattr, + .nfattr_to_tuple = nf_ct_port_nfattr_to_tuple, +#endif }; struct nf_conntrack_protocol nf_conntrack_protocol_udp6 = @@ -210,6 +215,11 @@ struct nf_conntrack_protocol nf_conntrack_protocol_udp6 = .packet = udp_packet, .new = udp_new, .error = udp_error6, +#if defined(CONFIG_NF_CT_NETLINK) || \ + defined(CONFIG_NF_CT_NETLINK_MODULE) + .tuple_to_nfattr = nf_ct_port_tuple_to_nfattr, + .nfattr_to_tuple = nf_ct_port_nfattr_to_tuple, +#endif }; EXPORT_SYMBOL(nf_conntrack_protocol_udp4); diff --git a/net/netfilter/nf_conntrack_standalone.c b/net/netfilter/nf_conntrack_standalone.c index 5af381f9fe3d..d17e42b28c79 100644 --- a/net/netfilter/nf_conntrack_standalone.c +++ b/net/netfilter/nf_conntrack_standalone.c @@ -161,14 +161,14 @@ static int ct_seq_show(struct seq_file *s, void *v) if (NF_CT_DIRECTION(hash)) return 0; - l3proto = nf_ct_find_l3proto(conntrack->tuplehash[IP_CT_DIR_ORIGINAL] - .tuple.src.l3num); + l3proto = __nf_ct_l3proto_find(conntrack->tuplehash[IP_CT_DIR_ORIGINAL] + .tuple.src.l3num); NF_CT_ASSERT(l3proto); - proto = nf_ct_find_proto(conntrack->tuplehash[IP_CT_DIR_ORIGINAL] - .tuple.src.l3num, - conntrack->tuplehash[IP_CT_DIR_ORIGINAL] - .tuple.dst.protonum); + proto = __nf_ct_proto_find(conntrack->tuplehash[IP_CT_DIR_ORIGINAL] + .tuple.src.l3num, + conntrack->tuplehash[IP_CT_DIR_ORIGINAL] + .tuple.dst.protonum); NF_CT_ASSERT(proto); if (seq_printf(s, "%-8s %u %-8s %u %ld ", @@ -307,9 +307,9 @@ static int exp_seq_show(struct seq_file *s, void *v) expect->tuple.src.l3num, expect->tuple.dst.protonum); print_tuple(s, &expect->tuple, - nf_ct_find_l3proto(expect->tuple.src.l3num), - nf_ct_find_proto(expect->tuple.src.l3num, - expect->tuple.dst.protonum)); + __nf_ct_l3proto_find(expect->tuple.src.l3num), + __nf_ct_proto_find(expect->tuple.src.l3num, + expect->tuple.dst.protonum)); return seq_putc(s, '\n'); } @@ -847,7 +847,11 @@ EXPORT_SYMBOL(nf_conntrack_helper_unregister); EXPORT_SYMBOL(nf_ct_iterate_cleanup); EXPORT_SYMBOL(__nf_ct_refresh_acct); EXPORT_SYMBOL(nf_ct_protos); -EXPORT_SYMBOL(nf_ct_find_proto); +EXPORT_SYMBOL(__nf_ct_proto_find); +EXPORT_SYMBOL(nf_ct_proto_find_get); +EXPORT_SYMBOL(nf_ct_proto_put); +EXPORT_SYMBOL(nf_ct_l3proto_find_get); +EXPORT_SYMBOL(nf_ct_l3proto_put); EXPORT_SYMBOL(nf_ct_l3protos); EXPORT_SYMBOL(nf_conntrack_expect_alloc); EXPORT_SYMBOL(nf_conntrack_expect_put); @@ -867,3 +871,21 @@ EXPORT_SYMBOL(nf_ct_get_tuple); EXPORT_SYMBOL(nf_ct_invert_tuple); EXPORT_SYMBOL(nf_conntrack_in); EXPORT_SYMBOL(__nf_conntrack_attach); +EXPORT_SYMBOL(nf_conntrack_alloc); +EXPORT_SYMBOL(nf_conntrack_free); +EXPORT_SYMBOL(nf_conntrack_flush); +EXPORT_SYMBOL(nf_ct_remove_expectations); +EXPORT_SYMBOL(nf_ct_helper_find_get); +EXPORT_SYMBOL(nf_ct_helper_put); +EXPORT_SYMBOL(__nf_conntrack_helper_find_byname); +EXPORT_SYMBOL(__nf_conntrack_find); +EXPORT_SYMBOL(nf_ct_unlink_expect); +EXPORT_SYMBOL(nf_conntrack_hash_insert); +EXPORT_SYMBOL(__nf_conntrack_expect_find); +EXPORT_SYMBOL(nf_conntrack_expect_find); +EXPORT_SYMBOL(nf_conntrack_expect_list); +#if defined(CONFIG_NF_CT_NETLINK) || \ + defined(CONFIG_NF_CT_NETLINK_MODULE) +EXPORT_SYMBOL(nf_ct_port_tuple_to_nfattr); +EXPORT_SYMBOL(nf_ct_port_nfattr_to_tuple); +#endif -- cgit v1.2.3-71-gd317 From a9b305c4e56f97d6a2ae4f21691bc13797498caf Mon Sep 17 00:00:00 2001 From: Patrick McHardy Date: Thu, 5 Jan 2006 12:20:02 -0800 Subject: [NETFILTER]: ctnetlink: Fix dumping of helper name Properly dump the helper name instead of internal kernel data. Based on patch by Marcus Sundberg . Signed-off-by: Patrick McHardy Signed-off-by: David S. Miller --- include/linux/netfilter/nfnetlink_conntrack.h | 2 -- net/ipv4/netfilter/ip_conntrack_netlink.c | 2 +- 2 files changed, 1 insertion(+), 3 deletions(-) (limited to 'include/linux') diff --git a/include/linux/netfilter/nfnetlink_conntrack.h b/include/linux/netfilter/nfnetlink_conntrack.h index b8e9a5b6fb1e..668ec946c8e2 100644 --- a/include/linux/netfilter/nfnetlink_conntrack.h +++ b/include/linux/netfilter/nfnetlink_conntrack.h @@ -131,6 +131,4 @@ enum ctattr_help { }; #define CTA_HELP_MAX (__CTA_HELP_MAX - 1) -#define CTA_HELP_MAXNAMESIZE 32 - #endif /* _IPCONNTRACK_NETLINK_H */ diff --git a/net/ipv4/netfilter/ip_conntrack_netlink.c b/net/ipv4/netfilter/ip_conntrack_netlink.c index 703f2d2e3464..c9ebbe0d2d9c 100644 --- a/net/ipv4/netfilter/ip_conntrack_netlink.c +++ b/net/ipv4/netfilter/ip_conntrack_netlink.c @@ -161,7 +161,7 @@ ctnetlink_dump_helpinfo(struct sk_buff *skb, const struct ip_conntrack *ct) return 0; nest_helper = NFA_NEST(skb, CTA_HELP); - NFA_PUT(skb, CTA_HELP_NAME, CTA_HELP_MAXNAMESIZE, &ct->helper->name); + NFA_PUT(skb, CTA_HELP_NAME, strlen(ct->helper->name), ct->helper->name); if (ct->helper->to_nfattr) ct->helper->to_nfattr(skb, ct); -- cgit v1.2.3-71-gd317 From b777e0ce7437a0e788e2aeb42aca9af2cce1f2e1 Mon Sep 17 00:00:00 2001 From: Patrick McHardy Date: Thu, 5 Jan 2006 12:21:16 -0800 Subject: [NETFILTER]: make ipv6_find_hdr() find transport protocol header The original ipv6_find_hdr() finds the specified header in IPv6 packets. This makes it possible to get transport header so that we can kill similar loop in ip6_match_packet(). Signed-off-by: Patrick McHardy Signed-off-by: David S. Miller --- include/linux/netfilter_ipv6/ip6_tables.h | 2 +- net/ipv6/netfilter/ip6_tables.c | 106 +++++++++++------------------- net/ipv6/netfilter/ip6t_ah.c | 2 +- net/ipv6/netfilter/ip6t_dst.c | 4 +- net/ipv6/netfilter/ip6t_esp.c | 2 +- net/ipv6/netfilter/ip6t_frag.c | 2 +- net/ipv6/netfilter/ip6t_hbh.c | 4 +- net/ipv6/netfilter/ip6t_rt.c | 2 +- 8 files changed, 49 insertions(+), 75 deletions(-) (limited to 'include/linux') diff --git a/include/linux/netfilter_ipv6/ip6_tables.h b/include/linux/netfilter_ipv6/ip6_tables.h index 2efc046d9e94..a291cb76ef18 100644 --- a/include/linux/netfilter_ipv6/ip6_tables.h +++ b/include/linux/netfilter_ipv6/ip6_tables.h @@ -474,7 +474,7 @@ extern unsigned int ip6t_do_table(struct sk_buff **pskb, extern int ip6t_ext_hdr(u8 nexthdr); /* find specified header and get offset to it */ extern int ipv6_find_hdr(const struct sk_buff *skb, unsigned int *offset, - u8 target); + int target, unsigned short *fragoff); #define IP6T_ALIGN(s) (((s) + (__alignof__(struct ip6t_entry)-1)) & ~(__alignof__(struct ip6t_entry)-1)) diff --git a/net/ipv6/netfilter/ip6_tables.c b/net/ipv6/netfilter/ip6_tables.c index ea43ef1d94a7..13b1a525b92c 100644 --- a/net/ipv6/netfilter/ip6_tables.c +++ b/net/ipv6/netfilter/ip6_tables.c @@ -205,69 +205,21 @@ ip6_packet_match(const struct sk_buff *skb, /* look for the desired protocol header */ if((ip6info->flags & IP6T_F_PROTO)) { - u_int8_t currenthdr = ipv6->nexthdr; - struct ipv6_opt_hdr _hdr, *hp; - u_int16_t ptr; /* Header offset in skb */ - u_int16_t hdrlen; /* Header */ - u_int16_t _fragoff = 0, *fp = NULL; - - ptr = IPV6_HDR_LEN; - - while (ip6t_ext_hdr(currenthdr)) { - /* Is there enough space for the next ext header? */ - if (skb->len - ptr < IPV6_OPTHDR_LEN) - return 0; - - /* NONE or ESP: there isn't protocol part */ - /* If we want to count these packets in '-p all', - * we will change the return 0 to 1*/ - if ((currenthdr == IPPROTO_NONE) || - (currenthdr == IPPROTO_ESP)) - break; + int protohdr; + unsigned short _frag_off; - hp = skb_header_pointer(skb, ptr, sizeof(_hdr), &_hdr); - BUG_ON(hp == NULL); - - /* Size calculation */ - if (currenthdr == IPPROTO_FRAGMENT) { - fp = skb_header_pointer(skb, - ptr+offsetof(struct frag_hdr, - frag_off), - sizeof(_fragoff), - &_fragoff); - if (fp == NULL) - return 0; - - _fragoff = ntohs(*fp) & ~0x7; - hdrlen = 8; - } else if (currenthdr == IPPROTO_AH) - hdrlen = (hp->hdrlen+2)<<2; - else - hdrlen = ipv6_optlen(hp); - - currenthdr = hp->nexthdr; - ptr += hdrlen; - /* ptr is too large */ - if ( ptr > skb->len ) - return 0; - if (_fragoff) { - if (ip6t_ext_hdr(currenthdr)) - return 0; - break; - } - } - - *protoff = ptr; - *fragoff = _fragoff; + protohdr = ipv6_find_hdr(skb, protoff, -1, &_frag_off); + if (protohdr < 0) + return 0; - /* currenthdr contains the protocol header */ + *fragoff = _frag_off; dprintf("Packet protocol %hi ?= %s%hi.\n", - currenthdr, + protohdr, ip6info->invflags & IP6T_INV_PROTO ? "!":"", ip6info->proto); - if (ip6info->proto == currenthdr) { + if (ip6info->proto == protohdr) { if(ip6info->invflags & IP6T_INV_PROTO) { return 0; } @@ -2098,26 +2050,39 @@ static void __exit fini(void) } /* - * find specified header up to transport protocol header. - * If found target header, the offset to the header is set to *offset - * and return 0. otherwise, return -1. + * find the offset to specified header or the protocol number of last header + * if target < 0. "last header" is transport protocol header, ESP, or + * "No next header". + * + * If target header is found, its offset is set in *offset and return protocol + * number. Otherwise, return -1. + * + * Note that non-1st fragment is special case that "the protocol number + * of last header" is "next header" field in Fragment header. In this case, + * *offset is meaningless and fragment offset is stored in *fragoff if fragoff + * isn't NULL. * - * Notes: - non-1st Fragment Header isn't skipped. - * - ESP header isn't skipped. - * - The target header may be trancated. */ -int ipv6_find_hdr(const struct sk_buff *skb, unsigned int *offset, u8 target) +int ipv6_find_hdr(const struct sk_buff *skb, unsigned int *offset, + int target, unsigned short *fragoff) { unsigned int start = (u8*)(skb->nh.ipv6h + 1) - skb->data; u8 nexthdr = skb->nh.ipv6h->nexthdr; unsigned int len = skb->len - start; + if (fragoff) + *fragoff = 0; + while (nexthdr != target) { struct ipv6_opt_hdr _hdr, *hp; unsigned int hdrlen; - if ((!ipv6_ext_hdr(nexthdr)) || nexthdr == NEXTHDR_NONE) + if ((!ipv6_ext_hdr(nexthdr)) || nexthdr == NEXTHDR_NONE) { + if (target < 0) + break; return -1; + } + hp = skb_header_pointer(skb, start, sizeof(_hdr), &_hdr); if (hp == NULL) return -1; @@ -2131,8 +2096,17 @@ int ipv6_find_hdr(const struct sk_buff *skb, unsigned int *offset, u8 target) if (fp == NULL) return -1; - if (ntohs(*fp) & ~0x7) + _frag_off = ntohs(*fp) & ~0x7; + if (_frag_off) { + if (target < 0 && + ((!ipv6_ext_hdr(hp->nexthdr)) || + nexthdr == NEXTHDR_NONE)) { + if (fragoff) + *fragoff = _frag_off; + return hp->nexthdr; + } return -1; + } hdrlen = 8; } else if (nexthdr == NEXTHDR_AUTH) hdrlen = (hp->hdrlen + 2) << 2; @@ -2145,7 +2119,7 @@ int ipv6_find_hdr(const struct sk_buff *skb, unsigned int *offset, u8 target) } *offset = start; - return 0; + return nexthdr; } EXPORT_SYMBOL(ip6t_register_table); diff --git a/net/ipv6/netfilter/ip6t_ah.c b/net/ipv6/netfilter/ip6t_ah.c index 268918d5deea..f5c1a7ff4a1f 100644 --- a/net/ipv6/netfilter/ip6t_ah.c +++ b/net/ipv6/netfilter/ip6t_ah.c @@ -54,7 +54,7 @@ match(const struct sk_buff *skb, unsigned int ptr; unsigned int hdrlen = 0; - if (ipv6_find_hdr(skb, &ptr, NEXTHDR_AUTH) < 0) + if (ipv6_find_hdr(skb, &ptr, NEXTHDR_AUTH, NULL) < 0) return 0; ah = skb_header_pointer(skb, ptr, sizeof(_ah), &_ah); diff --git a/net/ipv6/netfilter/ip6t_dst.c b/net/ipv6/netfilter/ip6t_dst.c index c450a635e54b..48cf5f9efc95 100644 --- a/net/ipv6/netfilter/ip6t_dst.c +++ b/net/ipv6/netfilter/ip6t_dst.c @@ -71,9 +71,9 @@ match(const struct sk_buff *skb, unsigned int optlen; #if HOPBYHOP - if (ipv6_find_hdr(skb, &ptr, NEXTHDR_HOP) < 0) + if (ipv6_find_hdr(skb, &ptr, NEXTHDR_HOP, NULL) < 0) #else - if (ipv6_find_hdr(skb, &ptr, NEXTHDR_DEST) < 0) + if (ipv6_find_hdr(skb, &ptr, NEXTHDR_DEST, NULL) < 0) #endif return 0; diff --git a/net/ipv6/netfilter/ip6t_esp.c b/net/ipv6/netfilter/ip6t_esp.c index 65937de1b58c..e1828f6d0a40 100644 --- a/net/ipv6/netfilter/ip6t_esp.c +++ b/net/ipv6/netfilter/ip6t_esp.c @@ -56,7 +56,7 @@ match(const struct sk_buff *skb, /* Make sure this isn't an evil packet */ /*DEBUGP("ipv6_esp entered \n");*/ - if (ipv6_find_hdr(skb, &ptr, NEXTHDR_ESP) < 0) + if (ipv6_find_hdr(skb, &ptr, NEXTHDR_ESP, NULL) < 0) return 0; eh = skb_header_pointer(skb, ptr, sizeof(_esp), &_esp); diff --git a/net/ipv6/netfilter/ip6t_frag.c b/net/ipv6/netfilter/ip6t_frag.c index 085d5f8eea29..d1549b268669 100644 --- a/net/ipv6/netfilter/ip6t_frag.c +++ b/net/ipv6/netfilter/ip6t_frag.c @@ -52,7 +52,7 @@ match(const struct sk_buff *skb, const struct ip6t_frag *fraginfo = matchinfo; unsigned int ptr; - if (ipv6_find_hdr(skb, &ptr, NEXTHDR_FRAGMENT) < 0) + if (ipv6_find_hdr(skb, &ptr, NEXTHDR_FRAGMENT, NULL) < 0) return 0; fh = skb_header_pointer(skb, ptr, sizeof(_frag), &_frag); diff --git a/net/ipv6/netfilter/ip6t_hbh.c b/net/ipv6/netfilter/ip6t_hbh.c index 1d09485111d0..e3bc8e2700e7 100644 --- a/net/ipv6/netfilter/ip6t_hbh.c +++ b/net/ipv6/netfilter/ip6t_hbh.c @@ -71,9 +71,9 @@ match(const struct sk_buff *skb, unsigned int optlen; #if HOPBYHOP - if (ipv6_find_hdr(skb, &ptr, NEXTHDR_HOP) < 0) + if (ipv6_find_hdr(skb, &ptr, NEXTHDR_HOP, NULL) < 0) #else - if (ipv6_find_hdr(skb, &ptr, NEXTHDR_DEST) < 0) + if (ipv6_find_hdr(skb, &ptr, NEXTHDR_DEST, NULL) < 0) #endif return 0; diff --git a/net/ipv6/netfilter/ip6t_rt.c b/net/ipv6/netfilter/ip6t_rt.c index beb2fd5cebbb..c1e770e45543 100644 --- a/net/ipv6/netfilter/ip6t_rt.c +++ b/net/ipv6/netfilter/ip6t_rt.c @@ -58,7 +58,7 @@ match(const struct sk_buff *skb, unsigned int ret = 0; struct in6_addr *ap, _addr; - if (ipv6_find_hdr(skb, &ptr, NEXTHDR_ROUTING) < 0) + if (ipv6_find_hdr(skb, &ptr, NEXTHDR_ROUTING, NULL) < 0) return 0; rh = skb_header_pointer(skb, ptr, sizeof(_route), &_route); -- cgit v1.2.3-71-gd317 From 22dea562bb56dbc3430c8f23f60ccd38527b1f5a Mon Sep 17 00:00:00 2001 From: Patrick McHardy Date: Thu, 5 Jan 2006 12:21:34 -0800 Subject: [NETFILTER]: Export ip6_masked_addrcmp, don't pass IPv6 addresses on stack Signed-off-by: Patrick McHardy Signed-off-by: David S. Miller --- include/linux/netfilter_ipv6/ip6_tables.h | 4 ++++ net/ipv6/netfilter/ip6_tables.c | 18 ++++++++++-------- 2 files changed, 14 insertions(+), 8 deletions(-) (limited to 'include/linux') diff --git a/include/linux/netfilter_ipv6/ip6_tables.h b/include/linux/netfilter_ipv6/ip6_tables.h index a291cb76ef18..c163ba31aab7 100644 --- a/include/linux/netfilter_ipv6/ip6_tables.h +++ b/include/linux/netfilter_ipv6/ip6_tables.h @@ -476,6 +476,10 @@ extern int ip6t_ext_hdr(u8 nexthdr); extern int ipv6_find_hdr(const struct sk_buff *skb, unsigned int *offset, int target, unsigned short *fragoff); +extern int ip6_masked_addrcmp(const struct in6_addr *addr1, + const struct in6_addr *mask, + const struct in6_addr *addr2); + #define IP6T_ALIGN(s) (((s) + (__alignof__(struct ip6t_entry)-1)) & ~(__alignof__(struct ip6t_entry)-1)) #endif /*__KERNEL__*/ diff --git a/net/ipv6/netfilter/ip6_tables.c b/net/ipv6/netfilter/ip6_tables.c index 13b1a525b92c..925b42d48347 100644 --- a/net/ipv6/netfilter/ip6_tables.c +++ b/net/ipv6/netfilter/ip6_tables.c @@ -119,13 +119,14 @@ static LIST_HEAD(ip6t_tables); #define up(x) do { printk("UP:%u:" #x "\n", __LINE__); up(x); } while(0) #endif -static int ip6_masked_addrcmp(struct in6_addr addr1, struct in6_addr mask, - struct in6_addr addr2) +int +ip6_masked_addrcmp(const struct in6_addr *addr1, const struct in6_addr *mask, + const struct in6_addr *addr2) { int i; for( i = 0; i < 16; i++){ - if((addr1.s6_addr[i] & mask.s6_addr[i]) != - (addr2.s6_addr[i] & mask.s6_addr[i])) + if((addr1->s6_addr[i] & mask->s6_addr[i]) != + (addr2->s6_addr[i] & mask->s6_addr[i])) return 1; } return 0; @@ -159,10 +160,10 @@ ip6_packet_match(const struct sk_buff *skb, #define FWINV(bool,invflg) ((bool) ^ !!(ip6info->invflags & invflg)) - if (FWINV(ip6_masked_addrcmp(ipv6->saddr,ip6info->smsk,ip6info->src), - IP6T_INV_SRCIP) - || FWINV(ip6_masked_addrcmp(ipv6->daddr,ip6info->dmsk,ip6info->dst), - IP6T_INV_DSTIP)) { + if (FWINV(ip6_masked_addrcmp(&ipv6->saddr, &ip6info->smsk, + &ip6info->src), IP6T_INV_SRCIP) + || FWINV(ip6_masked_addrcmp(&ipv6->daddr, &ip6info->dmsk, + &ip6info->dst), IP6T_INV_DSTIP)) { dprintf("Source or dest mismatch.\n"); /* dprintf("SRC: %u. Mask: %u. Target: %u.%s\n", ip->saddr, @@ -2131,6 +2132,7 @@ EXPORT_SYMBOL(ip6t_register_target); EXPORT_SYMBOL(ip6t_unregister_target); EXPORT_SYMBOL(ip6t_ext_hdr); EXPORT_SYMBOL(ipv6_find_hdr); +EXPORT_SYMBOL(ip6_masked_addrcmp); module_init(init); module_exit(fini); -- cgit v1.2.3-71-gd317 From ff179c8cf5caa17bf3d407edbb5872aa2eee6900 Mon Sep 17 00:00:00 2001 From: Jean Delvare Date: Sat, 26 Nov 2005 20:24:59 +0100 Subject: [PATCH] i2c: Drop i2c_driver.flags, 1 of 3 The I2C_DF_DUMMY flag is gone since 2.5.70, it's about time to drop all ifdef'd out references thereto. Signed-off-by: Jean Delvare Signed-off-by: Greg Kroah-Hartman --- drivers/media/video/tvmixer.c | 4 ---- include/linux/i2c.h | 5 ----- 2 files changed, 9 deletions(-) (limited to 'include/linux') diff --git a/drivers/media/video/tvmixer.c b/drivers/media/video/tvmixer.c index 8318bd1aad00..5897e5d4d3d2 100644 --- a/drivers/media/video/tvmixer.c +++ b/drivers/media/video/tvmixer.c @@ -232,12 +232,8 @@ static struct i2c_driver driver = { #endif .name = "tv card mixer driver", .id = I2C_DRIVERID_TVMIXER, -#ifdef I2C_DF_DUMMY - .flags = I2C_DF_DUMMY, -#else .flags = I2C_DF_NOTIFY, .detach_adapter = tvmixer_adapters, -#endif .attach_adapter = tvmixer_adapters, .detach_client = tvmixer_clients, }; diff --git a/include/linux/i2c.h b/include/linux/i2c.h index 5e19a7ba69b2..0316ba1294ca 100644 --- a/include/linux/i2c.h +++ b/include/linux/i2c.h @@ -252,11 +252,6 @@ static inline void i2c_set_adapdata (struct i2c_adapter *dev, void *data) /*flags for the driver struct: */ #define I2C_DF_NOTIFY 0x01 /* notify on bus (de/a)ttaches */ -#if 0 -/* this flag is gone -- there is a (optional) driver->detach_adapter - * callback now which can be used instead */ -# define I2C_DF_DUMMY 0x02 -#endif /*flags for the client struct: */ #define I2C_CLIENT_ALLOW_USE 0x01 /* Client allows access */ -- cgit v1.2.3-71-gd317 From 8a9947552d43b0d20d5fa23ac0ba435d526be454 Mon Sep 17 00:00:00 2001 From: Jean Delvare Date: Sat, 26 Nov 2005 20:28:06 +0100 Subject: [PATCH] i2c: Drop i2c_driver.flags, 2 of 3 Just about every i2c chip driver sets the I2C_DF_NOTIFY flag, so we can simply make it the default and drop the flag. If any driver really doesn't want to be notified when i2c adapters are added, that driver can simply omit to set .attach_adapter. This approach is also more robust as it prevents accidental NULL pointer dereferences. Signed-off-by: Jean Delvare Signed-off-by: Greg Kroah-Hartman --- Documentation/i2c/porting-clients | 3 +++ Documentation/i2c/writing-clients | 5 ----- arch/arm/mach-pxa/akita-ioexp.c | 1 - drivers/acorn/char/pcf8583.c | 1 - drivers/hwmon/adm1021.c | 1 - drivers/hwmon/adm1025.c | 1 - drivers/hwmon/adm1026.c | 1 - drivers/hwmon/adm1031.c | 1 - drivers/hwmon/adm9240.c | 1 - drivers/hwmon/asb100.c | 1 - drivers/hwmon/atxp1.c | 1 - drivers/hwmon/ds1621.c | 1 - drivers/hwmon/fscher.c | 1 - drivers/hwmon/fscpos.c | 1 - drivers/hwmon/gl518sm.c | 1 - drivers/hwmon/gl520sm.c | 1 - drivers/hwmon/it87.c | 1 - drivers/hwmon/lm63.c | 1 - drivers/hwmon/lm75.c | 1 - drivers/hwmon/lm77.c | 1 - drivers/hwmon/lm78.c | 1 - drivers/hwmon/lm80.c | 1 - drivers/hwmon/lm83.c | 1 - drivers/hwmon/lm85.c | 1 - drivers/hwmon/lm87.c | 1 - drivers/hwmon/lm90.c | 1 - drivers/hwmon/lm92.c | 1 - drivers/hwmon/max1619.c | 1 - drivers/hwmon/w83781d.c | 1 - drivers/hwmon/w83792d.c | 1 - drivers/hwmon/w83l785ts.c | 1 - drivers/i2c/chips/ds1337.c | 1 - drivers/i2c/chips/ds1374.c | 1 - drivers/i2c/chips/eeprom.c | 1 - drivers/i2c/chips/isp1301_omap.c | 1 - drivers/i2c/chips/m41t00.c | 1 - drivers/i2c/chips/max6875.c | 1 - drivers/i2c/chips/pca9539.c | 1 - drivers/i2c/chips/pcf8574.c | 1 - drivers/i2c/chips/pcf8591.c | 1 - drivers/i2c/chips/rtc8564.c | 1 - drivers/i2c/chips/tps65010.c | 1 - drivers/i2c/chips/x1205.c | 1 - drivers/i2c/i2c-core.c | 4 ++-- drivers/i2c/i2c-dev.c | 1 - drivers/macintosh/therm_adt746x.c | 1 - drivers/macintosh/therm_pm72.c | 1 - drivers/macintosh/therm_windtunnel.c | 1 - drivers/macintosh/windfarm_lm75_sensor.c | 1 - drivers/media/video/adv7170.c | 1 - drivers/media/video/adv7175.c | 1 - drivers/media/video/bt819.c | 1 - drivers/media/video/bt832.c | 1 - drivers/media/video/bt856.c | 1 - drivers/media/video/cs53l32a.c | 1 - drivers/media/video/cx25840/cx25840-core.c | 1 - drivers/media/video/indycam.c | 1 - drivers/media/video/ir-kbd-i2c.c | 1 - drivers/media/video/msp3400.c | 1 - drivers/media/video/ovcamchip/ovcamchip_core.c | 1 - drivers/media/video/saa5246a.c | 1 - drivers/media/video/saa5249.c | 1 - drivers/media/video/saa6588.c | 1 - drivers/media/video/saa7110.c | 1 - drivers/media/video/saa7111.c | 1 - drivers/media/video/saa7114.c | 1 - drivers/media/video/saa7115.c | 1 - drivers/media/video/saa711x.c | 1 - drivers/media/video/saa7127.c | 1 - drivers/media/video/saa7134/saa6752hs.c | 1 - drivers/media/video/saa7185.c | 1 - drivers/media/video/saa7191.c | 1 - drivers/media/video/tda7432.c | 1 - drivers/media/video/tda9840.c | 1 - drivers/media/video/tda9875.c | 1 - drivers/media/video/tda9887.c | 1 - drivers/media/video/tea6415c.c | 1 - drivers/media/video/tea6420.c | 1 - drivers/media/video/tuner-3036.c | 1 - drivers/media/video/tuner-core.c | 1 - drivers/media/video/tvaudio.c | 1 - drivers/media/video/tveeprom.c | 1 - drivers/media/video/tvmixer.c | 1 - drivers/media/video/tvp5150.c | 1 - drivers/media/video/vpx3220.c | 1 - drivers/media/video/wm8775.c | 1 - drivers/video/matrox/matroxfb_maven.c | 1 - include/linux/i2c.h | 1 - sound/oss/dmasound/dac3550a.c | 1 - sound/oss/dmasound/tas_common.c | 1 - sound/ppc/keywest.c | 1 - 91 files changed, 5 insertions(+), 95 deletions(-) (limited to 'include/linux') diff --git a/Documentation/i2c/porting-clients b/Documentation/i2c/porting-clients index 184fac2377aa..64c610bf2fbc 100644 --- a/Documentation/i2c/porting-clients +++ b/Documentation/i2c/porting-clients @@ -109,6 +109,9 @@ Technical changes: there is a MODULE_LICENSE() line, at the bottom of the file (after MODULE_AUTHOR() and MODULE_DESCRIPTION(), in this order). +* [Driver] The flags field of the i2c_driver structure is gone. + I2C_DF_NOTIFY is now the default behavior. + Coding policy: * [Copyright] Use (C), not (c), for copyright. diff --git a/Documentation/i2c/writing-clients b/Documentation/i2c/writing-clients index d19993cc0604..59d2c169cd61 100644 --- a/Documentation/i2c/writing-clients +++ b/Documentation/i2c/writing-clients @@ -27,7 +27,6 @@ address. static struct i2c_driver foo_driver = { .owner = THIS_MODULE, .name = "Foo version 2.3 driver", - .flags = I2C_DF_NOTIFY, .attach_adapter = &foo_attach_adapter, .detach_client = &foo_detach_client, .command = &foo_command /* may be NULL */ @@ -36,10 +35,6 @@ static struct i2c_driver foo_driver = { The name field must match the driver name, including the case. It must not contain spaces, and may be up to 31 characters long. -Don't worry about the flags field; just put I2C_DF_NOTIFY into it. This -means that your driver will be notified when new adapters are found. -This is almost always what you want. - All other fields are for call-back functions which will be explained below. diff --git a/arch/arm/mach-pxa/akita-ioexp.c b/arch/arm/mach-pxa/akita-ioexp.c index f6d73cc01f78..440ebb3c3db1 100644 --- a/arch/arm/mach-pxa/akita-ioexp.c +++ b/arch/arm/mach-pxa/akita-ioexp.c @@ -127,7 +127,6 @@ static struct i2c_driver max7310_i2c_driver = { .owner = THIS_MODULE, .name = "akita-max7310", .id = I2C_DRIVERID_AKITAIOEXP, - .flags = I2C_DF_NOTIFY, .attach_adapter = max7310_attach_adapter, .detach_client = max7310_detach_client, }; diff --git a/drivers/acorn/char/pcf8583.c b/drivers/acorn/char/pcf8583.c index e26f007a1417..befc9469b4f2 100644 --- a/drivers/acorn/char/pcf8583.c +++ b/drivers/acorn/char/pcf8583.c @@ -259,7 +259,6 @@ pcf8583_command(struct i2c_client *client, unsigned int cmd, void *arg) static struct i2c_driver pcf8583_driver = { .name = "PCF8583", .id = I2C_DRIVERID_PCF8583, - .flags = I2C_DF_NOTIFY, .attach_adapter = pcf8583_probe, .detach_client = pcf8583_detach, .command = pcf8583_command diff --git a/drivers/hwmon/adm1021.c b/drivers/hwmon/adm1021.c index 8102876c7c3f..5e6df4b7b857 100644 --- a/drivers/hwmon/adm1021.c +++ b/drivers/hwmon/adm1021.c @@ -129,7 +129,6 @@ static struct i2c_driver adm1021_driver = { .owner = THIS_MODULE, .name = "adm1021", .id = I2C_DRIVERID_ADM1021, - .flags = I2C_DF_NOTIFY, .attach_adapter = adm1021_attach_adapter, .detach_client = adm1021_detach_client, }; diff --git a/drivers/hwmon/adm1025.c b/drivers/hwmon/adm1025.c index bf67860e6a20..2be48a7a90b8 100644 --- a/drivers/hwmon/adm1025.c +++ b/drivers/hwmon/adm1025.c @@ -121,7 +121,6 @@ static struct i2c_driver adm1025_driver = { .owner = THIS_MODULE, .name = "adm1025", .id = I2C_DRIVERID_ADM1025, - .flags = I2C_DF_NOTIFY, .attach_adapter = adm1025_attach_adapter, .detach_client = adm1025_detach_client, }; diff --git a/drivers/hwmon/adm1026.c b/drivers/hwmon/adm1026.c index 5036b17a39cd..5416db809f97 100644 --- a/drivers/hwmon/adm1026.c +++ b/drivers/hwmon/adm1026.c @@ -310,7 +310,6 @@ static void adm1026_init_client(struct i2c_client *client); static struct i2c_driver adm1026_driver = { .owner = THIS_MODULE, .name = "adm1026", - .flags = I2C_DF_NOTIFY, .attach_adapter = adm1026_attach_adapter, .detach_client = adm1026_detach_client, }; diff --git a/drivers/hwmon/adm1031.c b/drivers/hwmon/adm1031.c index 7c545d5eee45..1e24428090c4 100644 --- a/drivers/hwmon/adm1031.c +++ b/drivers/hwmon/adm1031.c @@ -107,7 +107,6 @@ static struct adm1031_data *adm1031_update_device(struct device *dev); static struct i2c_driver adm1031_driver = { .owner = THIS_MODULE, .name = "adm1031", - .flags = I2C_DF_NOTIFY, .attach_adapter = adm1031_attach_adapter, .detach_client = adm1031_detach_client, }; diff --git a/drivers/hwmon/adm9240.c b/drivers/hwmon/adm9240.c index 11dc95f8a17e..287733fe2c0d 100644 --- a/drivers/hwmon/adm9240.c +++ b/drivers/hwmon/adm9240.c @@ -140,7 +140,6 @@ static struct i2c_driver adm9240_driver = { .owner = THIS_MODULE, .name = "adm9240", .id = I2C_DRIVERID_ADM9240, - .flags = I2C_DF_NOTIFY, .attach_adapter = adm9240_attach_adapter, .detach_client = adm9240_detach_client, }; diff --git a/drivers/hwmon/asb100.c b/drivers/hwmon/asb100.c index 52c469722a65..7227f800bef9 100644 --- a/drivers/hwmon/asb100.c +++ b/drivers/hwmon/asb100.c @@ -220,7 +220,6 @@ static struct i2c_driver asb100_driver = { .owner = THIS_MODULE, .name = "asb100", .id = I2C_DRIVERID_ASB100, - .flags = I2C_DF_NOTIFY, .attach_adapter = asb100_attach_adapter, .detach_client = asb100_detach_client, }; diff --git a/drivers/hwmon/atxp1.c b/drivers/hwmon/atxp1.c index 53324f56404e..a60a9f20281b 100644 --- a/drivers/hwmon/atxp1.c +++ b/drivers/hwmon/atxp1.c @@ -52,7 +52,6 @@ static int atxp1_detect(struct i2c_adapter *adapter, int address, int kind); static struct i2c_driver atxp1_driver = { .owner = THIS_MODULE, .name = "atxp1", - .flags = I2C_DF_NOTIFY, .attach_adapter = atxp1_attach_adapter, .detach_client = atxp1_detach_client, }; diff --git a/drivers/hwmon/ds1621.c b/drivers/hwmon/ds1621.c index 34f71b7c7f37..0096eb37c663 100644 --- a/drivers/hwmon/ds1621.c +++ b/drivers/hwmon/ds1621.c @@ -92,7 +92,6 @@ static struct i2c_driver ds1621_driver = { .owner = THIS_MODULE, .name = "ds1621", .id = I2C_DRIVERID_DS1621, - .flags = I2C_DF_NOTIFY, .attach_adapter = ds1621_attach_adapter, .detach_client = ds1621_detach_client, }; diff --git a/drivers/hwmon/fscher.c b/drivers/hwmon/fscher.c index a02e1c34c757..f56ca06dbf88 100644 --- a/drivers/hwmon/fscher.c +++ b/drivers/hwmon/fscher.c @@ -121,7 +121,6 @@ static struct i2c_driver fscher_driver = { .owner = THIS_MODULE, .name = "fscher", .id = I2C_DRIVERID_FSCHER, - .flags = I2C_DF_NOTIFY, .attach_adapter = fscher_attach_adapter, .detach_client = fscher_detach_client, }; diff --git a/drivers/hwmon/fscpos.c b/drivers/hwmon/fscpos.c index 64e4edc64f8d..701dffc2ceed 100644 --- a/drivers/hwmon/fscpos.c +++ b/drivers/hwmon/fscpos.c @@ -103,7 +103,6 @@ static struct i2c_driver fscpos_driver = { .owner = THIS_MODULE, .name = "fscpos", .id = I2C_DRIVERID_FSCPOS, - .flags = I2C_DF_NOTIFY, .attach_adapter = fscpos_attach_adapter, .detach_client = fscpos_detach_client, }; diff --git a/drivers/hwmon/gl518sm.c b/drivers/hwmon/gl518sm.c index 2f178dbe3d87..5788bbb77d8d 100644 --- a/drivers/hwmon/gl518sm.c +++ b/drivers/hwmon/gl518sm.c @@ -154,7 +154,6 @@ static struct i2c_driver gl518_driver = { .owner = THIS_MODULE, .name = "gl518sm", .id = I2C_DRIVERID_GL518, - .flags = I2C_DF_NOTIFY, .attach_adapter = gl518_attach_adapter, .detach_client = gl518_detach_client, }; diff --git a/drivers/hwmon/gl520sm.c b/drivers/hwmon/gl520sm.c index c39ba1239426..b3998165193d 100644 --- a/drivers/hwmon/gl520sm.c +++ b/drivers/hwmon/gl520sm.c @@ -112,7 +112,6 @@ static struct i2c_driver gl520_driver = { .owner = THIS_MODULE, .name = "gl520sm", .id = I2C_DRIVERID_GL520, - .flags = I2C_DF_NOTIFY, .attach_adapter = gl520_attach_adapter, .detach_client = gl520_detach_client, }; diff --git a/drivers/hwmon/it87.c b/drivers/hwmon/it87.c index a61f5d00f10a..d5f0d92378c5 100644 --- a/drivers/hwmon/it87.c +++ b/drivers/hwmon/it87.c @@ -237,7 +237,6 @@ static struct i2c_driver it87_driver = { .owner = THIS_MODULE, .name = "it87", .id = I2C_DRIVERID_IT87, - .flags = I2C_DF_NOTIFY, .attach_adapter = it87_attach_adapter, .detach_client = it87_detach_client, }; diff --git a/drivers/hwmon/lm63.c b/drivers/hwmon/lm63.c index 954ec2497249..c2dd4ac8042d 100644 --- a/drivers/hwmon/lm63.c +++ b/drivers/hwmon/lm63.c @@ -141,7 +141,6 @@ static void lm63_init_client(struct i2c_client *client); static struct i2c_driver lm63_driver = { .owner = THIS_MODULE, .name = "lm63", - .flags = I2C_DF_NOTIFY, .attach_adapter = lm63_attach_adapter, .detach_client = lm63_detach_client, }; diff --git a/drivers/hwmon/lm75.c b/drivers/hwmon/lm75.c index d70f4c8fc1e6..0bcbd6515139 100644 --- a/drivers/hwmon/lm75.c +++ b/drivers/hwmon/lm75.c @@ -69,7 +69,6 @@ static struct i2c_driver lm75_driver = { .owner = THIS_MODULE, .name = "lm75", .id = I2C_DRIVERID_LM75, - .flags = I2C_DF_NOTIFY, .attach_adapter = lm75_attach_adapter, .detach_client = lm75_detach_client, }; diff --git a/drivers/hwmon/lm77.c b/drivers/hwmon/lm77.c index 9380fda7dcd1..6a524e92432e 100644 --- a/drivers/hwmon/lm77.c +++ b/drivers/hwmon/lm77.c @@ -76,7 +76,6 @@ static struct lm77_data *lm77_update_device(struct device *dev); static struct i2c_driver lm77_driver = { .owner = THIS_MODULE, .name = "lm77", - .flags = I2C_DF_NOTIFY, .attach_adapter = lm77_attach_adapter, .detach_client = lm77_detach_client, }; diff --git a/drivers/hwmon/lm78.c b/drivers/hwmon/lm78.c index 78cdd506439f..18448f83a6b0 100644 --- a/drivers/hwmon/lm78.c +++ b/drivers/hwmon/lm78.c @@ -167,7 +167,6 @@ static struct i2c_driver lm78_driver = { .owner = THIS_MODULE, .name = "lm78", .id = I2C_DRIVERID_LM78, - .flags = I2C_DF_NOTIFY, .attach_adapter = lm78_attach_adapter, .detach_client = lm78_detach_client, }; diff --git a/drivers/hwmon/lm80.c b/drivers/hwmon/lm80.c index c359fdea211e..02303fa0c464 100644 --- a/drivers/hwmon/lm80.c +++ b/drivers/hwmon/lm80.c @@ -146,7 +146,6 @@ static struct i2c_driver lm80_driver = { .owner = THIS_MODULE, .name = "lm80", .id = I2C_DRIVERID_LM80, - .flags = I2C_DF_NOTIFY, .attach_adapter = lm80_attach_adapter, .detach_client = lm80_detach_client, }; diff --git a/drivers/hwmon/lm83.c b/drivers/hwmon/lm83.c index 9a70611a9f69..96cb34ea2490 100644 --- a/drivers/hwmon/lm83.c +++ b/drivers/hwmon/lm83.c @@ -127,7 +127,6 @@ static struct i2c_driver lm83_driver = { .owner = THIS_MODULE, .name = "lm83", .id = I2C_DRIVERID_LM83, - .flags = I2C_DF_NOTIFY, .attach_adapter = lm83_attach_adapter, .detach_client = lm83_detach_client, }; diff --git a/drivers/hwmon/lm85.c b/drivers/hwmon/lm85.c index 3f5544a40f3c..131ecab094ac 100644 --- a/drivers/hwmon/lm85.c +++ b/drivers/hwmon/lm85.c @@ -383,7 +383,6 @@ static struct i2c_driver lm85_driver = { .owner = THIS_MODULE, .name = "lm85", .id = I2C_DRIVERID_LM85, - .flags = I2C_DF_NOTIFY, .attach_adapter = lm85_attach_adapter, .detach_client = lm85_detach_client, }; diff --git a/drivers/hwmon/lm87.c b/drivers/hwmon/lm87.c index eeec18177861..26fd0b33beaf 100644 --- a/drivers/hwmon/lm87.c +++ b/drivers/hwmon/lm87.c @@ -164,7 +164,6 @@ static struct i2c_driver lm87_driver = { .owner = THIS_MODULE, .name = "lm87", .id = I2C_DRIVERID_LM87, - .flags = I2C_DF_NOTIFY, .attach_adapter = lm87_attach_adapter, .detach_client = lm87_detach_client, }; diff --git a/drivers/hwmon/lm90.c b/drivers/hwmon/lm90.c index 83cf2e1b09f5..011923b7091d 100644 --- a/drivers/hwmon/lm90.c +++ b/drivers/hwmon/lm90.c @@ -189,7 +189,6 @@ static struct i2c_driver lm90_driver = { .owner = THIS_MODULE, .name = "lm90", .id = I2C_DRIVERID_LM90, - .flags = I2C_DF_NOTIFY, .attach_adapter = lm90_attach_adapter, .detach_client = lm90_detach_client, }; diff --git a/drivers/hwmon/lm92.c b/drivers/hwmon/lm92.c index 7a4b3701ed1a..2005a9ee61fb 100644 --- a/drivers/hwmon/lm92.c +++ b/drivers/hwmon/lm92.c @@ -413,7 +413,6 @@ static struct i2c_driver lm92_driver = { .owner = THIS_MODULE, .name = "lm92", .id = I2C_DRIVERID_LM92, - .flags = I2C_DF_NOTIFY, .attach_adapter = lm92_attach_adapter, .detach_client = lm92_detach_client, }; diff --git a/drivers/hwmon/max1619.c b/drivers/hwmon/max1619.c index 69e7e125683b..d5aebef126d5 100644 --- a/drivers/hwmon/max1619.c +++ b/drivers/hwmon/max1619.c @@ -92,7 +92,6 @@ static struct max1619_data *max1619_update_device(struct device *dev); static struct i2c_driver max1619_driver = { .owner = THIS_MODULE, .name = "max1619", - .flags = I2C_DF_NOTIFY, .attach_adapter = max1619_attach_adapter, .detach_client = max1619_detach_client, }; diff --git a/drivers/hwmon/w83781d.c b/drivers/hwmon/w83781d.c index ffdb3a03e2b5..a78929f2b3d8 100644 --- a/drivers/hwmon/w83781d.c +++ b/drivers/hwmon/w83781d.c @@ -272,7 +272,6 @@ static struct i2c_driver w83781d_driver = { .owner = THIS_MODULE, .name = "w83781d", .id = I2C_DRIVERID_W83781D, - .flags = I2C_DF_NOTIFY, .attach_adapter = w83781d_attach_adapter, .detach_client = w83781d_detach_client, }; diff --git a/drivers/hwmon/w83792d.c b/drivers/hwmon/w83792d.c index 1ba072630361..6824243d90d7 100644 --- a/drivers/hwmon/w83792d.c +++ b/drivers/hwmon/w83792d.c @@ -319,7 +319,6 @@ static void w83792d_init_client(struct i2c_client *client); static struct i2c_driver w83792d_driver = { .owner = THIS_MODULE, .name = "w83792d", - .flags = I2C_DF_NOTIFY, .attach_adapter = w83792d_attach_adapter, .detach_client = w83792d_detach_client, }; diff --git a/drivers/hwmon/w83l785ts.c b/drivers/hwmon/w83l785ts.c index f495b6378668..35172fb455d0 100644 --- a/drivers/hwmon/w83l785ts.c +++ b/drivers/hwmon/w83l785ts.c @@ -95,7 +95,6 @@ static struct i2c_driver w83l785ts_driver = { .owner = THIS_MODULE, .name = "w83l785ts", .id = I2C_DRIVERID_W83L785TS, - .flags = I2C_DF_NOTIFY, .attach_adapter = w83l785ts_attach_adapter, .detach_client = w83l785ts_detach_client, }; diff --git a/drivers/i2c/chips/ds1337.c b/drivers/i2c/chips/ds1337.c index 18228957606c..65146cbc8390 100644 --- a/drivers/i2c/chips/ds1337.c +++ b/drivers/i2c/chips/ds1337.c @@ -54,7 +54,6 @@ static int ds1337_command(struct i2c_client *client, unsigned int cmd, static struct i2c_driver ds1337_driver = { .owner = THIS_MODULE, .name = "ds1337", - .flags = I2C_DF_NOTIFY, .attach_adapter = ds1337_attach_adapter, .detach_client = ds1337_detach_client, .command = ds1337_command, diff --git a/drivers/i2c/chips/ds1374.c b/drivers/i2c/chips/ds1374.c index da488b735abf..5a270d60b699 100644 --- a/drivers/i2c/chips/ds1374.c +++ b/drivers/i2c/chips/ds1374.c @@ -235,7 +235,6 @@ static struct i2c_driver ds1374_driver = { .owner = THIS_MODULE, .name = DS1374_DRV_NAME, .id = I2C_DRIVERID_DS1374, - .flags = I2C_DF_NOTIFY, .attach_adapter = ds1374_attach, .detach_client = ds1374_detach, }; diff --git a/drivers/i2c/chips/eeprom.c b/drivers/i2c/chips/eeprom.c index 4baf573fa04f..9bb1f8b3f38c 100644 --- a/drivers/i2c/chips/eeprom.c +++ b/drivers/i2c/chips/eeprom.c @@ -71,7 +71,6 @@ static struct i2c_driver eeprom_driver = { .owner = THIS_MODULE, .name = "eeprom", .id = I2C_DRIVERID_EEPROM, - .flags = I2C_DF_NOTIFY, .attach_adapter = eeprom_attach_adapter, .detach_client = eeprom_detach_client, }; diff --git a/drivers/i2c/chips/isp1301_omap.c b/drivers/i2c/chips/isp1301_omap.c index d2a100d77839..4f472ba66a02 100644 --- a/drivers/i2c/chips/isp1301_omap.c +++ b/drivers/i2c/chips/isp1301_omap.c @@ -1636,7 +1636,6 @@ static struct i2c_driver isp1301_driver = { .name = "isp1301_omap", .id = 1301, /* FIXME "official", i2c-ids.h */ .class = I2C_CLASS_HWMON, - .flags = I2C_DF_NOTIFY, .attach_adapter = isp1301_scan_bus, .detach_client = isp1301_detach_client, }; diff --git a/drivers/i2c/chips/m41t00.c b/drivers/i2c/chips/m41t00.c index 3df309ae44a6..13e67836b348 100644 --- a/drivers/i2c/chips/m41t00.c +++ b/drivers/i2c/chips/m41t00.c @@ -214,7 +214,6 @@ static struct i2c_driver m41t00_driver = { .owner = THIS_MODULE, .name = M41T00_DRV_NAME, .id = I2C_DRIVERID_STM41T00, - .flags = I2C_DF_NOTIFY, .attach_adapter = m41t00_attach, .detach_client = m41t00_detach, }; diff --git a/drivers/i2c/chips/max6875.c b/drivers/i2c/chips/max6875.c index b376a006883c..7e61019e72dd 100644 --- a/drivers/i2c/chips/max6875.c +++ b/drivers/i2c/chips/max6875.c @@ -69,7 +69,6 @@ static int max6875_detach_client(struct i2c_client *client); static struct i2c_driver max6875_driver = { .owner = THIS_MODULE, .name = "max6875", - .flags = I2C_DF_NOTIFY, .attach_adapter = max6875_attach_adapter, .detach_client = max6875_detach_client, }; diff --git a/drivers/i2c/chips/pca9539.c b/drivers/i2c/chips/pca9539.c index 59a930346229..26feb7a4f942 100644 --- a/drivers/i2c/chips/pca9539.c +++ b/drivers/i2c/chips/pca9539.c @@ -40,7 +40,6 @@ static int pca9539_detach_client(struct i2c_client *client); static struct i2c_driver pca9539_driver = { .owner = THIS_MODULE, .name = "pca9539", - .flags = I2C_DF_NOTIFY, .attach_adapter = pca9539_attach_adapter, .detach_client = pca9539_detach_client, }; diff --git a/drivers/i2c/chips/pcf8574.c b/drivers/i2c/chips/pcf8574.c index c323c2de236c..2fae640cf329 100644 --- a/drivers/i2c/chips/pcf8574.c +++ b/drivers/i2c/chips/pcf8574.c @@ -68,7 +68,6 @@ static struct i2c_driver pcf8574_driver = { .owner = THIS_MODULE, .name = "pcf8574", .id = I2C_DRIVERID_PCF8574, - .flags = I2C_DF_NOTIFY, .attach_adapter = pcf8574_attach_adapter, .detach_client = pcf8574_detach_client, }; diff --git a/drivers/i2c/chips/pcf8591.c b/drivers/i2c/chips/pcf8591.c index ce420a67560b..8750f71278e1 100644 --- a/drivers/i2c/chips/pcf8591.c +++ b/drivers/i2c/chips/pcf8591.c @@ -91,7 +91,6 @@ static struct i2c_driver pcf8591_driver = { .owner = THIS_MODULE, .name = "pcf8591", .id = I2C_DRIVERID_PCF8591, - .flags = I2C_DF_NOTIFY, .attach_adapter = pcf8591_attach_adapter, .detach_client = pcf8591_detach_client, }; diff --git a/drivers/i2c/chips/rtc8564.c b/drivers/i2c/chips/rtc8564.c index 26e498d921da..e586f75dd024 100644 --- a/drivers/i2c/chips/rtc8564.c +++ b/drivers/i2c/chips/rtc8564.c @@ -362,7 +362,6 @@ static struct i2c_driver rtc8564_driver = { .owner = THIS_MODULE, .name = "RTC8564", .id = I2C_DRIVERID_RTC8564, - .flags = I2C_DF_NOTIFY, .attach_adapter = rtc8564_probe, .detach_client = rtc8564_detach, .command = rtc8564_command diff --git a/drivers/i2c/chips/tps65010.c b/drivers/i2c/chips/tps65010.c index 280dd7a45db6..439bf6ceb789 100644 --- a/drivers/i2c/chips/tps65010.c +++ b/drivers/i2c/chips/tps65010.c @@ -639,7 +639,6 @@ static int __init tps65010_scan_bus(struct i2c_adapter *bus) static struct i2c_driver tps65010_driver = { .owner = THIS_MODULE, .name = "tps65010", - .flags = I2C_DF_NOTIFY, .attach_adapter = tps65010_scan_bus, .detach_client = __exit_p(tps65010_detach_client), }; diff --git a/drivers/i2c/chips/x1205.c b/drivers/i2c/chips/x1205.c index 7da366cdc18c..c5ff2cee15ae 100644 --- a/drivers/i2c/chips/x1205.c +++ b/drivers/i2c/chips/x1205.c @@ -107,7 +107,6 @@ static int x1205_command(struct i2c_client *client, unsigned int cmd, static struct i2c_driver x1205_driver = { .owner = THIS_MODULE, .name = "x1205", - .flags = I2C_DF_NOTIFY, .attach_adapter = &x1205_attach, .detach_client = &x1205_detach, }; diff --git a/drivers/i2c/i2c-core.c b/drivers/i2c/i2c-core.c index 82ea1b7ec914..ad68ac00d910 100644 --- a/drivers/i2c/i2c-core.c +++ b/drivers/i2c/i2c-core.c @@ -197,7 +197,7 @@ int i2c_add_adapter(struct i2c_adapter *adap) /* inform drivers of new adapters */ list_for_each(item,&drivers) { driver = list_entry(item, struct i2c_driver, list); - if (driver->flags & I2C_DF_NOTIFY) + if (driver->attach_adapter) /* We ignore the return code; if it fails, too bad */ driver->attach_adapter(adap); } @@ -309,7 +309,7 @@ int i2c_add_driver(struct i2c_driver *driver) pr_debug("i2c-core: driver [%s] registered\n", driver->name); /* now look for instances of driver on our adapters */ - if (driver->flags & I2C_DF_NOTIFY) { + if (driver->attach_adapter) { list_for_each(item,&adapters) { adapter = list_entry(item, struct i2c_adapter, list); driver->attach_adapter(adapter); diff --git a/drivers/i2c/i2c-dev.c b/drivers/i2c/i2c-dev.c index 8af0bd1424d2..9da51eb37c06 100644 --- a/drivers/i2c/i2c-dev.c +++ b/drivers/i2c/i2c-dev.c @@ -484,7 +484,6 @@ static struct i2c_driver i2cdev_driver = { .owner = THIS_MODULE, .name = "dev_driver", .id = I2C_DRIVERID_I2CDEV, - .flags = I2C_DF_NOTIFY, .attach_adapter = i2cdev_attach_adapter, .detach_adapter = i2cdev_detach_adapter, .detach_client = i2cdev_detach_client, diff --git a/drivers/macintosh/therm_adt746x.c b/drivers/macintosh/therm_adt746x.c index f38696622eb4..f62c16fab42b 100644 --- a/drivers/macintosh/therm_adt746x.c +++ b/drivers/macintosh/therm_adt746x.c @@ -173,7 +173,6 @@ detach_thermostat(struct i2c_adapter *adapter) static struct i2c_driver thermostat_driver = { .owner = THIS_MODULE, .name = "therm_adt746x", - .flags = I2C_DF_NOTIFY, .attach_adapter = attach_thermostat, .detach_adapter = detach_thermostat, }; diff --git a/drivers/macintosh/therm_pm72.c b/drivers/macintosh/therm_pm72.c index 190878eef990..df00c960fc5a 100644 --- a/drivers/macintosh/therm_pm72.c +++ b/drivers/macintosh/therm_pm72.c @@ -285,7 +285,6 @@ static struct i2c_driver therm_pm72_driver = { .owner = THIS_MODULE, .name = "therm_pm72", - .flags = I2C_DF_NOTIFY, .attach_adapter = therm_pm72_attach, .detach_adapter = therm_pm72_detach, }; diff --git a/drivers/macintosh/therm_windtunnel.c b/drivers/macintosh/therm_windtunnel.c index 6aaa1df1a64e..f3bae0d00ed2 100644 --- a/drivers/macintosh/therm_windtunnel.c +++ b/drivers/macintosh/therm_windtunnel.c @@ -357,7 +357,6 @@ static struct i2c_driver g4fan_driver = { .owner = THIS_MODULE, .name = "therm_windtunnel", .id = I2C_DRIVERID_G4FAN, - .flags = I2C_DF_NOTIFY, .attach_adapter = do_attach, .detach_client = do_detach, }; diff --git a/drivers/macintosh/windfarm_lm75_sensor.c b/drivers/macintosh/windfarm_lm75_sensor.c index a0a41ad0f2b5..2392789ccf32 100644 --- a/drivers/macintosh/windfarm_lm75_sensor.c +++ b/drivers/macintosh/windfarm_lm75_sensor.c @@ -49,7 +49,6 @@ static int wf_lm75_detach(struct i2c_client *client); static struct i2c_driver wf_lm75_driver = { .owner = THIS_MODULE, .name = "wf_lm75", - .flags = I2C_DF_NOTIFY, .attach_adapter = wf_lm75_attach, .detach_client = wf_lm75_detach, }; diff --git a/drivers/media/video/adv7170.c b/drivers/media/video/adv7170.c index 1ca2b67aedfb..c4f2265167a2 100644 --- a/drivers/media/video/adv7170.c +++ b/drivers/media/video/adv7170.c @@ -502,7 +502,6 @@ static struct i2c_driver i2c_driver_adv7170 = { .name = "adv7170", /* name */ .id = I2C_DRIVERID_ADV7170, - .flags = I2C_DF_NOTIFY, .attach_adapter = adv7170_attach_adapter, .detach_client = adv7170_detach_client, diff --git a/drivers/media/video/adv7175.c b/drivers/media/video/adv7175.c index 173bca1e0295..4fc08b17d4d0 100644 --- a/drivers/media/video/adv7175.c +++ b/drivers/media/video/adv7175.c @@ -552,7 +552,6 @@ static struct i2c_driver i2c_driver_adv7175 = { .name = "adv7175", /* name */ .id = I2C_DRIVERID_ADV7175, - .flags = I2C_DF_NOTIFY, .attach_adapter = adv7175_attach_adapter, .detach_client = adv7175_detach_client, diff --git a/drivers/media/video/bt819.c b/drivers/media/video/bt819.c index 3ee0afca76a7..7bba69793b78 100644 --- a/drivers/media/video/bt819.c +++ b/drivers/media/video/bt819.c @@ -627,7 +627,6 @@ static struct i2c_driver i2c_driver_bt819 = { .name = "bt819", .id = I2C_DRIVERID_BT819, - .flags = I2C_DF_NOTIFY, .attach_adapter = bt819_attach_adapter, .detach_client = bt819_detach_client, diff --git a/drivers/media/video/bt832.c b/drivers/media/video/bt832.c index 3ca1d768bfd3..0ba8652357e2 100644 --- a/drivers/media/video/bt832.c +++ b/drivers/media/video/bt832.c @@ -233,7 +233,6 @@ static struct i2c_driver driver = { .owner = THIS_MODULE, .name = "i2c bt832 driver", .id = -1, /* FIXME */ - .flags = I2C_DF_NOTIFY, .attach_adapter = bt832_probe, .detach_client = bt832_detach, .command = bt832_command, diff --git a/drivers/media/video/bt856.c b/drivers/media/video/bt856.c index 8eb871d0e85b..4c9acd1e2c70 100644 --- a/drivers/media/video/bt856.c +++ b/drivers/media/video/bt856.c @@ -409,7 +409,6 @@ static struct i2c_driver i2c_driver_bt856 = { .name = "bt856", .id = I2C_DRIVERID_BT856, - .flags = I2C_DF_NOTIFY, .attach_adapter = bt856_attach_adapter, .detach_client = bt856_detach_client, diff --git a/drivers/media/video/cs53l32a.c b/drivers/media/video/cs53l32a.c index 780b352ec119..fce5d89b7b15 100644 --- a/drivers/media/video/cs53l32a.c +++ b/drivers/media/video/cs53l32a.c @@ -218,7 +218,6 @@ static int cs53l32a_detach(struct i2c_client *client) static struct i2c_driver i2c_driver = { .name = "cs53l32a", .id = I2C_DRIVERID_CS53L32A, - .flags = I2C_DF_NOTIFY, .attach_adapter = cs53l32a_probe, .detach_client = cs53l32a_detach, .command = cs53l32a_command, diff --git a/drivers/media/video/cx25840/cx25840-core.c b/drivers/media/video/cx25840/cx25840-core.c index 5b93723a1768..c66bc147ee71 100644 --- a/drivers/media/video/cx25840/cx25840-core.c +++ b/drivers/media/video/cx25840/cx25840-core.c @@ -847,7 +847,6 @@ static struct i2c_driver i2c_driver_cx25840 = { .name = "cx25840", .id = I2C_DRIVERID_CX25840, - .flags = I2C_DF_NOTIFY, .attach_adapter = cx25840_attach_adapter, .detach_client = cx25840_detach_client, diff --git a/drivers/media/video/indycam.c b/drivers/media/video/indycam.c index deeef125eb92..3eba514cdb29 100644 --- a/drivers/media/video/indycam.c +++ b/drivers/media/video/indycam.c @@ -454,7 +454,6 @@ static struct i2c_driver i2c_driver_indycam = { .owner = THIS_MODULE, .name = "indycam", .id = I2C_DRIVERID_INDYCAM, - .flags = I2C_DF_NOTIFY, .attach_adapter = indycam_probe, .detach_client = indycam_detach, .command = indycam_command, diff --git a/drivers/media/video/ir-kbd-i2c.c b/drivers/media/video/ir-kbd-i2c.c index 740e543311af..2e2f78a4afc8 100644 --- a/drivers/media/video/ir-kbd-i2c.c +++ b/drivers/media/video/ir-kbd-i2c.c @@ -280,7 +280,6 @@ static int ir_probe(struct i2c_adapter *adap); static struct i2c_driver driver = { .name = "ir remote kbd driver", .id = I2C_DRIVERID_INFRARED, - .flags = I2C_DF_NOTIFY, .attach_adapter = ir_probe, .detach_client = ir_detach, }; diff --git a/drivers/media/video/msp3400.c b/drivers/media/video/msp3400.c index d86f8e92e534..46328fb6fe80 100644 --- a/drivers/media/video/msp3400.c +++ b/drivers/media/video/msp3400.c @@ -1564,7 +1564,6 @@ static struct i2c_driver driver = { .owner = THIS_MODULE, .name = "msp3400", .id = I2C_DRIVERID_MSP3400, - .flags = I2C_DF_NOTIFY, .attach_adapter = msp_probe, .detach_client = msp_detach, .command = msp_command, diff --git a/drivers/media/video/ovcamchip/ovcamchip_core.c b/drivers/media/video/ovcamchip/ovcamchip_core.c index 2de34ebf0673..390d0d6c7838 100644 --- a/drivers/media/video/ovcamchip/ovcamchip_core.c +++ b/drivers/media/video/ovcamchip/ovcamchip_core.c @@ -414,7 +414,6 @@ static struct i2c_driver driver = { .name = "ovcamchip", .id = I2C_DRIVERID_OVCAMCHIP, .class = I2C_CLASS_CAM_DIGITAL, - .flags = I2C_DF_NOTIFY, .attach_adapter = ovcamchip_attach, .detach_client = ovcamchip_detach, .command = ovcamchip_command, diff --git a/drivers/media/video/saa5246a.c b/drivers/media/video/saa5246a.c index b8054da31ffd..9bf686989aab 100644 --- a/drivers/media/video/saa5246a.c +++ b/drivers/media/video/saa5246a.c @@ -166,7 +166,6 @@ static struct i2c_driver i2c_driver_videotext = .owner = THIS_MODULE, .name = IF_NAME, /* name */ .id = I2C_DRIVERID_SAA5249, /* in i2c.h */ - .flags = I2C_DF_NOTIFY, .attach_adapter = saa5246a_probe, .detach_client = saa5246a_detach, .command = saa5246a_command diff --git a/drivers/media/video/saa5249.c b/drivers/media/video/saa5249.c index 7ffa2e9a9bf3..811e86396030 100644 --- a/drivers/media/video/saa5249.c +++ b/drivers/media/video/saa5249.c @@ -239,7 +239,6 @@ static struct i2c_driver i2c_driver_videotext = .owner = THIS_MODULE, .name = IF_NAME, /* name */ .id = I2C_DRIVERID_SAA5249, /* in i2c.h */ - .flags = I2C_DF_NOTIFY, .attach_adapter = saa5249_probe, .detach_client = saa5249_detach, .command = saa5249_command diff --git a/drivers/media/video/saa6588.c b/drivers/media/video/saa6588.c index 923322503e8f..18a0b7143e8b 100644 --- a/drivers/media/video/saa6588.c +++ b/drivers/media/video/saa6588.c @@ -498,7 +498,6 @@ static struct i2c_driver driver = { .owner = THIS_MODULE, .name = "i2c saa6588 driver", .id = -1, /* FIXME */ - .flags = I2C_DF_NOTIFY, .attach_adapter = saa6588_probe, .detach_client = saa6588_detach, .command = saa6588_command, diff --git a/drivers/media/video/saa7110.c b/drivers/media/video/saa7110.c index e116bdbed310..f266b35ceb35 100644 --- a/drivers/media/video/saa7110.c +++ b/drivers/media/video/saa7110.c @@ -591,7 +591,6 @@ static struct i2c_driver i2c_driver_saa7110 = { .name = "saa7110", .id = I2C_DRIVERID_SAA7110, - .flags = I2C_DF_NOTIFY, .attach_adapter = saa7110_attach_adapter, .detach_client = saa7110_detach_client, diff --git a/drivers/media/video/saa7111.c b/drivers/media/video/saa7111.c index fe8a5e453969..687beaf11adc 100644 --- a/drivers/media/video/saa7111.c +++ b/drivers/media/video/saa7111.c @@ -594,7 +594,6 @@ static struct i2c_driver i2c_driver_saa7111 = { .name = "saa7111", .id = I2C_DRIVERID_SAA7111A, - .flags = I2C_DF_NOTIFY, .attach_adapter = saa7111_attach_adapter, .detach_client = saa7111_detach_client, diff --git a/drivers/media/video/saa7114.c b/drivers/media/video/saa7114.c index d9f50e2f7b92..4748cf0598c0 100644 --- a/drivers/media/video/saa7114.c +++ b/drivers/media/video/saa7114.c @@ -1208,7 +1208,6 @@ static struct i2c_driver i2c_driver_saa7114 = { .name = "saa7114", .id = I2C_DRIVERID_SAA7114, - .flags = I2C_DF_NOTIFY, .attach_adapter = saa7114_attach_adapter, .detach_client = saa7114_detach_client, diff --git a/drivers/media/video/saa7115.c b/drivers/media/video/saa7115.c index e717e30d8187..b1079de938b7 100644 --- a/drivers/media/video/saa7115.c +++ b/drivers/media/video/saa7115.c @@ -1356,7 +1356,6 @@ static int saa7115_detach(struct i2c_client *client) static struct i2c_driver i2c_driver_saa7115 = { .name = "saa7115", .id = I2C_DRIVERID_SAA711X, - .flags = I2C_DF_NOTIFY, .attach_adapter = saa7115_probe, .detach_client = saa7115_detach, .command = saa7115_command, diff --git a/drivers/media/video/saa711x.c b/drivers/media/video/saa711x.c index 31f7b950b01c..734a70919080 100644 --- a/drivers/media/video/saa711x.c +++ b/drivers/media/video/saa711x.c @@ -569,7 +569,6 @@ static struct i2c_driver i2c_driver_saa711x = { .name = "saa711x", .id = I2C_DRIVERID_SAA711X, - .flags = I2C_DF_NOTIFY, .attach_adapter = saa711x_attach_adapter, .detach_client = saa711x_detach_client, diff --git a/drivers/media/video/saa7127.c b/drivers/media/video/saa7127.c index c36f014f1fdf..a2fab9837507 100644 --- a/drivers/media/video/saa7127.c +++ b/drivers/media/video/saa7127.c @@ -821,7 +821,6 @@ static int saa7127_detach(struct i2c_client *client) static struct i2c_driver i2c_driver_saa7127 = { .name = "saa7127", .id = I2C_DRIVERID_SAA7127, - .flags = I2C_DF_NOTIFY, .attach_adapter = saa7127_probe, .detach_client = saa7127_detach, .command = saa7127_command, diff --git a/drivers/media/video/saa7134/saa6752hs.c b/drivers/media/video/saa7134/saa6752hs.c index a61d24f588f7..6fc298e0a03a 100644 --- a/drivers/media/video/saa7134/saa6752hs.c +++ b/drivers/media/video/saa7134/saa6752hs.c @@ -600,7 +600,6 @@ static struct i2c_driver driver = { .owner = THIS_MODULE, .name = "i2c saa6752hs MPEG encoder", .id = I2C_DRIVERID_SAA6752HS, - .flags = I2C_DF_NOTIFY, .attach_adapter = saa6752hs_probe, .detach_client = saa6752hs_detach, .command = saa6752hs_command, diff --git a/drivers/media/video/saa7185.c b/drivers/media/video/saa7185.c index 132aa7943c16..e24aa16f2d8c 100644 --- a/drivers/media/video/saa7185.c +++ b/drivers/media/video/saa7185.c @@ -491,7 +491,6 @@ static struct i2c_driver i2c_driver_saa7185 = { .name = "saa7185", /* name */ .id = I2C_DRIVERID_SAA7185B, - .flags = I2C_DF_NOTIFY, .attach_adapter = saa7185_attach_adapter, .detach_client = saa7185_detach_client, diff --git a/drivers/media/video/saa7191.c b/drivers/media/video/saa7191.c index cbca896e8cfa..6be98fc0fe24 100644 --- a/drivers/media/video/saa7191.c +++ b/drivers/media/video/saa7191.c @@ -791,7 +791,6 @@ static struct i2c_driver i2c_driver_saa7191 = { .owner = THIS_MODULE, .name = "saa7191", .id = I2C_DRIVERID_SAA7191, - .flags = I2C_DF_NOTIFY, .attach_adapter = saa7191_probe, .detach_client = saa7191_detach, .command = saa7191_command diff --git a/drivers/media/video/tda7432.c b/drivers/media/video/tda7432.c index d32737dd2142..239a58666a12 100644 --- a/drivers/media/video/tda7432.c +++ b/drivers/media/video/tda7432.c @@ -504,7 +504,6 @@ static struct i2c_driver driver = { .owner = THIS_MODULE, .name = "i2c tda7432 driver", .id = I2C_DRIVERID_TDA7432, - .flags = I2C_DF_NOTIFY, .attach_adapter = tda7432_probe, .detach_client = tda7432_detach, .command = tda7432_command, diff --git a/drivers/media/video/tda9840.c b/drivers/media/video/tda9840.c index 1794686612c6..f29fb507075d 100644 --- a/drivers/media/video/tda9840.c +++ b/drivers/media/video/tda9840.c @@ -224,7 +224,6 @@ static struct i2c_driver driver = { .owner = THIS_MODULE, .name = "tda9840", .id = I2C_DRIVERID_TDA9840, - .flags = I2C_DF_NOTIFY, .attach_adapter = attach, .detach_client = detach, .command = command, diff --git a/drivers/media/video/tda9875.c b/drivers/media/video/tda9875.c index a5e37dc91f39..d053b6445502 100644 --- a/drivers/media/video/tda9875.c +++ b/drivers/media/video/tda9875.c @@ -375,7 +375,6 @@ static struct i2c_driver driver = { .owner = THIS_MODULE, .name = "i2c tda9875 driver", .id = I2C_DRIVERID_TDA9875, - .flags = I2C_DF_NOTIFY, .attach_adapter = tda9875_probe, .detach_client = tda9875_detach, .command = tda9875_command, diff --git a/drivers/media/video/tda9887.c b/drivers/media/video/tda9887.c index 2f2414e90e8b..049b44e0767b 100644 --- a/drivers/media/video/tda9887.c +++ b/drivers/media/video/tda9887.c @@ -822,7 +822,6 @@ static struct i2c_driver driver = { .owner = THIS_MODULE, .name = "i2c tda9887 driver", .id = -1, /* FIXME */ - .flags = I2C_DF_NOTIFY, .attach_adapter = tda9887_probe, .detach_client = tda9887_detach, .command = tda9887_command, diff --git a/drivers/media/video/tea6415c.c b/drivers/media/video/tea6415c.c index ee3688348b66..96d88ce60c98 100644 --- a/drivers/media/video/tea6415c.c +++ b/drivers/media/video/tea6415c.c @@ -193,7 +193,6 @@ static struct i2c_driver driver = { .owner = THIS_MODULE, .name = "tea6415c", .id = I2C_DRIVERID_TEA6415C, - .flags = I2C_DF_NOTIFY, .attach_adapter = attach, .detach_client = detach, .command = command, diff --git a/drivers/media/video/tea6420.c b/drivers/media/video/tea6420.c index 17975c19da5e..fd417de95847 100644 --- a/drivers/media/video/tea6420.c +++ b/drivers/media/video/tea6420.c @@ -170,7 +170,6 @@ static struct i2c_driver driver = { .owner = THIS_MODULE, .name = "tea6420", .id = I2C_DRIVERID_TEA6420, - .flags = I2C_DF_NOTIFY, .attach_adapter = attach, .detach_client = detach, .command = command, diff --git a/drivers/media/video/tuner-3036.c b/drivers/media/video/tuner-3036.c index 79203595b9c1..3505cec2e65a 100644 --- a/drivers/media/video/tuner-3036.c +++ b/drivers/media/video/tuner-3036.c @@ -178,7 +178,6 @@ i2c_driver_tuner = .owner = THIS_MODULE, .name = "sab3036", .id = I2C_DRIVERID_SAB3036, - .flags = I2C_DF_NOTIFY, .attach_adapter = tuner_probe, .detach_client = tuner_detach, .command = tuner_command diff --git a/drivers/media/video/tuner-core.c b/drivers/media/video/tuner-core.c index e58abdfcaab8..3c75121f6383 100644 --- a/drivers/media/video/tuner-core.c +++ b/drivers/media/video/tuner-core.c @@ -745,7 +745,6 @@ static struct i2c_driver driver = { .owner = THIS_MODULE, .name = "tuner", .id = I2C_DRIVERID_TUNER, - .flags = I2C_DF_NOTIFY, .attach_adapter = tuner_probe, .detach_client = tuner_detach, .command = tuner_command, diff --git a/drivers/media/video/tvaudio.c b/drivers/media/video/tvaudio.c index 5b20e8177cad..3565f35be7a1 100644 --- a/drivers/media/video/tvaudio.c +++ b/drivers/media/video/tvaudio.c @@ -1705,7 +1705,6 @@ static struct i2c_driver driver = { .owner = THIS_MODULE, .name = "generic i2c audio driver", .id = I2C_DRIVERID_TVAUDIO, - .flags = I2C_DF_NOTIFY, .attach_adapter = chip_probe, .detach_client = chip_detach, .command = chip_command, diff --git a/drivers/media/video/tveeprom.c b/drivers/media/video/tveeprom.c index 5ac235365dd8..195bc51d4576 100644 --- a/drivers/media/video/tveeprom.c +++ b/drivers/media/video/tveeprom.c @@ -782,7 +782,6 @@ static struct i2c_driver i2c_driver_tveeprom = { .owner = THIS_MODULE, .name = "tveeprom", .id = I2C_DRIVERID_TVEEPROM, - .flags = I2C_DF_NOTIFY, .attach_adapter = tveeprom_attach_adapter, .detach_client = tveeprom_detach_client, .command = tveeprom_command, diff --git a/drivers/media/video/tvmixer.c b/drivers/media/video/tvmixer.c index 5897e5d4d3d2..936e01d2c785 100644 --- a/drivers/media/video/tvmixer.c +++ b/drivers/media/video/tvmixer.c @@ -232,7 +232,6 @@ static struct i2c_driver driver = { #endif .name = "tv card mixer driver", .id = I2C_DRIVERID_TVMIXER, - .flags = I2C_DF_NOTIFY, .detach_adapter = tvmixer_adapters, .attach_adapter = tvmixer_adapters, .detach_client = tvmixer_clients, diff --git a/drivers/media/video/tvp5150.c b/drivers/media/video/tvp5150.c index 97431e26d229..4f3ee2091611 100644 --- a/drivers/media/video/tvp5150.c +++ b/drivers/media/video/tvp5150.c @@ -806,7 +806,6 @@ static struct i2c_driver driver = { /* FIXME */ .id = I2C_DRIVERID_SAA7110, - .flags = I2C_DF_NOTIFY, .attach_adapter = tvp5150_attach_adapter, .detach_client = tvp5150_detach_client, diff --git a/drivers/media/video/vpx3220.c b/drivers/media/video/vpx3220.c index 137b58f2c666..c66d28505bcd 100644 --- a/drivers/media/video/vpx3220.c +++ b/drivers/media/video/vpx3220.c @@ -726,7 +726,6 @@ static struct i2c_driver vpx3220_i2c_driver = { .name = "vpx3220", .id = I2C_DRIVERID_VPX3220, - .flags = I2C_DF_NOTIFY, .attach_adapter = vpx3220_attach_adapter, .detach_client = vpx3220_detach_client, diff --git a/drivers/media/video/wm8775.c b/drivers/media/video/wm8775.c index a6936ad74fcf..7b07717a3c67 100644 --- a/drivers/media/video/wm8775.c +++ b/drivers/media/video/wm8775.c @@ -236,7 +236,6 @@ static struct i2c_driver i2c_driver = { .name = "wm8775", .id = I2C_DRIVERID_WM8775, - .flags = I2C_DF_NOTIFY, .attach_adapter = wm8775_probe, .detach_client = wm8775_detach, diff --git a/drivers/video/matrox/matroxfb_maven.c b/drivers/video/matrox/matroxfb_maven.c index ad60bbb16cdf..78994c5fe932 100644 --- a/drivers/video/matrox/matroxfb_maven.c +++ b/drivers/video/matrox/matroxfb_maven.c @@ -1296,7 +1296,6 @@ static struct i2c_driver maven_driver={ .owner = THIS_MODULE, .name = "maven", .id = I2C_DRIVERID_MGATVO, - .flags = I2C_DF_NOTIFY, .attach_adapter = maven_attach_adapter, .detach_client = maven_detach_client, .command = maven_command, diff --git a/include/linux/i2c.h b/include/linux/i2c.h index 0316ba1294ca..99399fadf13f 100644 --- a/include/linux/i2c.h +++ b/include/linux/i2c.h @@ -251,7 +251,6 @@ static inline void i2c_set_adapdata (struct i2c_adapter *dev, void *data) } /*flags for the driver struct: */ -#define I2C_DF_NOTIFY 0x01 /* notify on bus (de/a)ttaches */ /*flags for the client struct: */ #define I2C_CLIENT_ALLOW_USE 0x01 /* Client allows access */ diff --git a/sound/oss/dmasound/dac3550a.c b/sound/oss/dmasound/dac3550a.c index 533895eba0eb..3402a663d07f 100644 --- a/sound/oss/dmasound/dac3550a.c +++ b/sound/oss/dmasound/dac3550a.c @@ -44,7 +44,6 @@ struct i2c_driver daca_driver = { .owner = THIS_MODULE, .name = "DAC3550A driver V " DACA_VERSION, .id = I2C_DRIVERID_DACA, - .flags = I2C_DF_NOTIFY, .attach_adapter = daca_attach_adapter, .detach_client = daca_detach_client, }; diff --git a/sound/oss/dmasound/tas_common.c b/sound/oss/dmasound/tas_common.c index d36a1fe2fcf3..7e3d517af4b9 100644 --- a/sound/oss/dmasound/tas_common.c +++ b/sound/oss/dmasound/tas_common.c @@ -49,7 +49,6 @@ static int tas_detach_client(struct i2c_client *); struct i2c_driver tas_driver = { .owner = THIS_MODULE, .name = "tas", - .flags = I2C_DF_NOTIFY, .attach_adapter = tas_attach_adapter, .detach_client = tas_detach_client, }; diff --git a/sound/ppc/keywest.c b/sound/ppc/keywest.c index 097fbcfc5d45..fd8e2e6062f6 100644 --- a/sound/ppc/keywest.c +++ b/sound/ppc/keywest.c @@ -43,7 +43,6 @@ static int keywest_detach_client(struct i2c_client *client); struct i2c_driver keywest_driver = { .name = "PMac Keywest Audio", .id = I2C_DRIVERID_KEYWEST, - .flags = I2C_DF_NOTIFY, .attach_adapter = &keywest_attach_adapter, .detach_client = &keywest_detach_client, }; -- cgit v1.2.3-71-gd317 From 5d7b851dcced3611e4a4432308618b1ed1a9fc31 Mon Sep 17 00:00:00 2001 From: Jean Delvare Date: Sun, 27 Nov 2005 08:57:10 +0100 Subject: [PATCH] i2c: Drop i2c_driver.flags, 3 of 3 The flags member of the i2c_driver structure is no more used. Drop it. Signed-off-by: Jean Delvare Signed-off-by: Greg Kroah-Hartman --- include/linux/i2c.h | 3 --- 1 file changed, 3 deletions(-) (limited to 'include/linux') diff --git a/include/linux/i2c.h b/include/linux/i2c.h index 99399fadf13f..3c16a8fb95f4 100644 --- a/include/linux/i2c.h +++ b/include/linux/i2c.h @@ -112,7 +112,6 @@ struct i2c_driver { char name[32]; int id; unsigned int class; - unsigned int flags; /* div., see below */ /* Notifies the driver that a new bus has appeared. This routine * can be used by the driver to test if the bus meets its conditions @@ -250,8 +249,6 @@ static inline void i2c_set_adapdata (struct i2c_adapter *dev, void *data) dev_set_drvdata (&dev->dev, data); } -/*flags for the driver struct: */ - /*flags for the client struct: */ #define I2C_CLIENT_ALLOW_USE 0x01 /* Client allows access */ #define I2C_CLIENT_ALLOW_MULTIPLE_USE 0x02 /* Allow multiple access-locks */ -- cgit v1.2.3-71-gd317 From cb748fb20186d4b345c68a7f580429f379fdd268 Mon Sep 17 00:00:00 2001 From: Jean Delvare Date: Sat, 26 Nov 2005 20:58:35 +0100 Subject: [PATCH] i2c: Rework client usage count, 1 of 3 No i2c client uses the I2C_CLIENT_ALLOW_MULTIPLE_USE flag, drop it. Signed-off-by: Jean Delvare Signed-off-by: Greg Kroah-Hartman --- drivers/i2c/i2c-core.c | 4 +--- include/linux/i2c.h | 2 -- 2 files changed, 1 insertion(+), 5 deletions(-) (limited to 'include/linux') diff --git a/drivers/i2c/i2c-core.c b/drivers/i2c/i2c-core.c index ad68ac00d910..2f0bc9529376 100644 --- a/drivers/i2c/i2c-core.c +++ b/drivers/i2c/i2c-core.c @@ -500,9 +500,7 @@ int i2c_use_client(struct i2c_client *client) return ret; if (client->flags & I2C_CLIENT_ALLOW_USE) { - if (client->flags & I2C_CLIENT_ALLOW_MULTIPLE_USE) - client->usage_count++; - else if (client->usage_count > 0) + if (client->usage_count > 0) goto busy; else client->usage_count++; diff --git a/include/linux/i2c.h b/include/linux/i2c.h index 3c16a8fb95f4..4487c5189747 100644 --- a/include/linux/i2c.h +++ b/include/linux/i2c.h @@ -251,8 +251,6 @@ static inline void i2c_set_adapdata (struct i2c_adapter *dev, void *data) /*flags for the client struct: */ #define I2C_CLIENT_ALLOW_USE 0x01 /* Client allows access */ -#define I2C_CLIENT_ALLOW_MULTIPLE_USE 0x02 /* Allow multiple access-locks */ - /* on an i2c_client */ #define I2C_CLIENT_PEC 0x04 /* Use Packet Error Checking */ #define I2C_CLIENT_TEN 0x10 /* we have a ten bit chip address */ /* Must equal I2C_M_TEN below */ -- cgit v1.2.3-71-gd317 From cde7859bda0d1124392b44e50aa11df99707e1d9 Mon Sep 17 00:00:00 2001 From: Jean Delvare Date: Sat, 26 Nov 2005 21:00:54 +0100 Subject: [PATCH] i2c: Rework client usage count, 2 of 3 Make I2C_CLIENT_ALLOW_USE the default for all i2c clients. It doesn't hurt if the usage count is actually never used for any given driver, and allows for nice code simplifications in i2c-core. Signed-off-by: Jean Delvare Signed-off-by: Greg Kroah-Hartman --- arch/arm/mach-pxa/akita-ioexp.c | 1 - drivers/i2c/chips/rtc8564.c | 1 - drivers/i2c/i2c-core.c | 28 ++++++++++------------------ drivers/media/video/adv7170.c | 1 - drivers/media/video/adv7175.c | 1 - drivers/media/video/bt819.c | 1 - drivers/media/video/bt832.c | 1 - drivers/media/video/bt856.c | 1 - drivers/media/video/cs53l32a.c | 1 - drivers/media/video/cx25840/cx25840-core.c | 1 - drivers/media/video/em28xx/em28xx-i2c.c | 1 - drivers/media/video/msp3400.c | 1 - drivers/media/video/saa6588.c | 1 - drivers/media/video/saa7110.c | 1 - drivers/media/video/saa7111.c | 1 - drivers/media/video/saa7114.c | 1 - drivers/media/video/saa7115.c | 1 - drivers/media/video/saa711x.c | 1 - drivers/media/video/saa7127.c | 1 - drivers/media/video/saa7134/saa6752hs.c | 1 - drivers/media/video/saa7185.c | 1 - drivers/media/video/tda9887.c | 1 - drivers/media/video/tuner-core.c | 1 - drivers/media/video/tvaudio.c | 1 - drivers/media/video/tveeprom.c | 1 - drivers/media/video/tvp5150.c | 1 - drivers/media/video/vpx3220.c | 1 - drivers/media/video/wm8775.c | 1 - include/linux/i2c.h | 1 - 29 files changed, 10 insertions(+), 46 deletions(-) (limited to 'include/linux') diff --git a/arch/arm/mach-pxa/akita-ioexp.c b/arch/arm/mach-pxa/akita-ioexp.c index 440ebb3c3db1..b6bff550c8e9 100644 --- a/arch/arm/mach-pxa/akita-ioexp.c +++ b/arch/arm/mach-pxa/akita-ioexp.c @@ -133,7 +133,6 @@ static struct i2c_driver max7310_i2c_driver = { static struct i2c_client max7310_template = { name: "akita-max7310", - flags: I2C_CLIENT_ALLOW_USE, driver: &max7310_i2c_driver, }; diff --git a/drivers/i2c/chips/rtc8564.c b/drivers/i2c/chips/rtc8564.c index e586f75dd024..07494d394381 100644 --- a/drivers/i2c/chips/rtc8564.c +++ b/drivers/i2c/chips/rtc8564.c @@ -155,7 +155,6 @@ static int rtc8564_attach(struct i2c_adapter *adap, int addr, int kind) strlcpy(new_client->name, "RTC8564", I2C_NAME_SIZE); i2c_set_clientdata(new_client, d); - new_client->flags = I2C_CLIENT_ALLOW_USE; new_client->addr = addr; new_client->adapter = adap; new_client->driver = &rtc8564_driver; diff --git a/drivers/i2c/i2c-core.c b/drivers/i2c/i2c-core.c index 2f0bc9529376..d16b4998c4c2 100644 --- a/drivers/i2c/i2c-core.c +++ b/drivers/i2c/i2c-core.c @@ -419,8 +419,7 @@ int i2c_attach_client(struct i2c_client *client) } } - if (client->flags & I2C_CLIENT_ALLOW_USE) - client->usage_count = 0; + client->usage_count = 0; client->dev.parent = &client->adapter->dev; client->dev.driver = &client->driver->driver; @@ -443,8 +442,7 @@ int i2c_detach_client(struct i2c_client *client) struct i2c_adapter *adapter = client->adapter; int res = 0; - if ((client->flags & I2C_CLIENT_ALLOW_USE) - && (client->usage_count > 0)) { + if (client->usage_count > 0) { dev_warn(&client->dev, "Client [%s] still busy, " "can't detach\n", client->name); return -EBUSY; @@ -499,12 +497,9 @@ int i2c_use_client(struct i2c_client *client) if (ret) return ret; - if (client->flags & I2C_CLIENT_ALLOW_USE) { - if (client->usage_count > 0) - goto busy; - else - client->usage_count++; - } + if (client->usage_count > 0) + goto busy; + client->usage_count++; return 0; busy: @@ -514,16 +509,13 @@ int i2c_use_client(struct i2c_client *client) int i2c_release_client(struct i2c_client *client) { - if(client->flags & I2C_CLIENT_ALLOW_USE) { - if(client->usage_count>0) - client->usage_count--; - else { - pr_debug("i2c-core: %s used one too many times\n", - __FUNCTION__); - return -EPERM; - } + if (!client->usage_count) { + pr_debug("i2c-core: %s used one too many times\n", + __FUNCTION__); + return -EPERM; } + client->usage_count--; i2c_dec_use_client(client); return 0; diff --git a/drivers/media/video/adv7170.c b/drivers/media/video/adv7170.c index c4f2265167a2..622b1619a7e3 100644 --- a/drivers/media/video/adv7170.c +++ b/drivers/media/video/adv7170.c @@ -420,7 +420,6 @@ adv7170_detect_client (struct i2c_adapter *adapter, client->addr = address; client->adapter = adapter; client->driver = &i2c_driver_adv7170; - client->flags = I2C_CLIENT_ALLOW_USE; if ((client->addr == I2C_ADV7170 >> 1) || (client->addr == (I2C_ADV7170 >> 1) + 1)) { dname = adv7170_name; diff --git a/drivers/media/video/adv7175.c b/drivers/media/video/adv7175.c index 4fc08b17d4d0..d4859b445715 100644 --- a/drivers/media/video/adv7175.c +++ b/drivers/media/video/adv7175.c @@ -470,7 +470,6 @@ adv7175_detect_client (struct i2c_adapter *adapter, client->addr = address; client->adapter = adapter; client->driver = &i2c_driver_adv7175; - client->flags = I2C_CLIENT_ALLOW_USE; if ((client->addr == I2C_ADV7175 >> 1) || (client->addr == (I2C_ADV7175 >> 1) + 1)) { dname = adv7175_name; diff --git a/drivers/media/video/bt819.c b/drivers/media/video/bt819.c index 7bba69793b78..741e59af0991 100644 --- a/drivers/media/video/bt819.c +++ b/drivers/media/video/bt819.c @@ -535,7 +535,6 @@ bt819_detect_client (struct i2c_adapter *adapter, client->addr = address; client->adapter = adapter; client->driver = &i2c_driver_bt819; - client->flags = I2C_CLIENT_ALLOW_USE; decoder = kmalloc(sizeof(struct bt819), GFP_KERNEL); if (decoder == NULL) { diff --git a/drivers/media/video/bt832.c b/drivers/media/video/bt832.c index 0ba8652357e2..4ed13860b523 100644 --- a/drivers/media/video/bt832.c +++ b/drivers/media/video/bt832.c @@ -240,7 +240,6 @@ static struct i2c_driver driver = { static struct i2c_client client_template = { .name = "bt832", - .flags = I2C_CLIENT_ALLOW_USE, .driver = &driver, }; diff --git a/drivers/media/video/bt856.c b/drivers/media/video/bt856.c index 4c9acd1e2c70..d4bba8efac69 100644 --- a/drivers/media/video/bt856.c +++ b/drivers/media/video/bt856.c @@ -323,7 +323,6 @@ bt856_detect_client (struct i2c_adapter *adapter, client->addr = address; client->adapter = adapter; client->driver = &i2c_driver_bt856; - client->flags = I2C_CLIENT_ALLOW_USE; strlcpy(I2C_NAME(client), "bt856", sizeof(I2C_NAME(client))); encoder = kmalloc(sizeof(struct bt856), GFP_KERNEL); diff --git a/drivers/media/video/cs53l32a.c b/drivers/media/video/cs53l32a.c index fce5d89b7b15..f442ce3ba74b 100644 --- a/drivers/media/video/cs53l32a.c +++ b/drivers/media/video/cs53l32a.c @@ -154,7 +154,6 @@ static int cs53l32a_attach(struct i2c_adapter *adapter, int address, int kind) client->addr = address; client->adapter = adapter; client->driver = &i2c_driver; - client->flags = I2C_CLIENT_ALLOW_USE; snprintf(client->name, sizeof(client->name) - 1, "cs53l32a"); cs53l32a_info("chip found @ 0x%x (%s)\n", address << 1, adapter->name); diff --git a/drivers/media/video/cx25840/cx25840-core.c b/drivers/media/video/cx25840/cx25840-core.c index c66bc147ee71..0b278abe879d 100644 --- a/drivers/media/video/cx25840/cx25840-core.c +++ b/drivers/media/video/cx25840/cx25840-core.c @@ -773,7 +773,6 @@ static int cx25840_detect_client(struct i2c_adapter *adapter, int address, client->addr = address; client->adapter = adapter; client->driver = &i2c_driver_cx25840; - client->flags = I2C_CLIENT_ALLOW_USE; snprintf(client->name, sizeof(client->name) - 1, "cx25840"); cx25840_dbg("detecting cx25840 client on address 0x%x\n", address << 1); diff --git a/drivers/media/video/em28xx/em28xx-i2c.c b/drivers/media/video/em28xx/em28xx-i2c.c index 7f5603054f02..d14bcf4ceaea 100644 --- a/drivers/media/video/em28xx/em28xx-i2c.c +++ b/drivers/media/video/em28xx/em28xx-i2c.c @@ -497,7 +497,6 @@ static struct i2c_adapter em28xx_adap_template = { static struct i2c_client em28xx_client_template = { .name = "em28xx internal", - .flags = I2C_CLIENT_ALLOW_USE, }; /* ----------------------------------------------------------- */ diff --git a/drivers/media/video/msp3400.c b/drivers/media/video/msp3400.c index 46328fb6fe80..c5e8ad3aac4a 100644 --- a/drivers/media/video/msp3400.c +++ b/drivers/media/video/msp3400.c @@ -1576,7 +1576,6 @@ static struct i2c_driver driver = { static struct i2c_client client_template = { .name = "(unset)", - .flags = I2C_CLIENT_ALLOW_USE, .driver = &driver, }; diff --git a/drivers/media/video/saa6588.c b/drivers/media/video/saa6588.c index 18a0b7143e8b..3d4076ca67c3 100644 --- a/drivers/media/video/saa6588.c +++ b/drivers/media/video/saa6588.c @@ -505,7 +505,6 @@ static struct i2c_driver driver = { static struct i2c_client client_template = { .name = "saa6588", - .flags = I2C_CLIENT_ALLOW_USE, .driver = &driver, }; diff --git a/drivers/media/video/saa7110.c b/drivers/media/video/saa7110.c index f266b35ceb35..8affa63c8b24 100644 --- a/drivers/media/video/saa7110.c +++ b/drivers/media/video/saa7110.c @@ -501,7 +501,6 @@ saa7110_detect_client (struct i2c_adapter *adapter, client->addr = address; client->adapter = adapter; client->driver = &i2c_driver_saa7110; - client->flags = I2C_CLIENT_ALLOW_USE; strlcpy(I2C_NAME(client), "saa7110", sizeof(I2C_NAME(client))); decoder = kmalloc(sizeof(struct saa7110), GFP_KERNEL); diff --git a/drivers/media/video/saa7111.c b/drivers/media/video/saa7111.c index 687beaf11adc..2b2204564514 100644 --- a/drivers/media/video/saa7111.c +++ b/drivers/media/video/saa7111.c @@ -518,7 +518,6 @@ saa7111_detect_client (struct i2c_adapter *adapter, client->addr = address; client->adapter = adapter; client->driver = &i2c_driver_saa7111; - client->flags = I2C_CLIENT_ALLOW_USE; strlcpy(I2C_NAME(client), "saa7111", sizeof(I2C_NAME(client))); decoder = kmalloc(sizeof(struct saa7111), GFP_KERNEL); diff --git a/drivers/media/video/saa7114.c b/drivers/media/video/saa7114.c index 4748cf0598c0..285f6c7a8f71 100644 --- a/drivers/media/video/saa7114.c +++ b/drivers/media/video/saa7114.c @@ -859,7 +859,6 @@ saa7114_detect_client (struct i2c_adapter *adapter, client->addr = address; client->adapter = adapter; client->driver = &i2c_driver_saa7114; - client->flags = I2C_CLIENT_ALLOW_USE; strlcpy(I2C_NAME(client), "saa7114", sizeof(I2C_NAME(client))); decoder = kmalloc(sizeof(struct saa7114), GFP_KERNEL); diff --git a/drivers/media/video/saa7115.c b/drivers/media/video/saa7115.c index b1079de938b7..79aadd2d408f 100644 --- a/drivers/media/video/saa7115.c +++ b/drivers/media/video/saa7115.c @@ -1270,7 +1270,6 @@ static int saa7115_attach(struct i2c_adapter *adapter, int address, int kind) client->addr = address; client->adapter = adapter; client->driver = &i2c_driver_saa7115; - client->flags = I2C_CLIENT_ALLOW_USE; snprintf(client->name, sizeof(client->name) - 1, "saa7115"); saa7115_dbg("detecting saa7115 client on address 0x%x\n", address << 1); diff --git a/drivers/media/video/saa711x.c b/drivers/media/video/saa711x.c index 734a70919080..44bfc047704c 100644 --- a/drivers/media/video/saa711x.c +++ b/drivers/media/video/saa711x.c @@ -494,7 +494,6 @@ saa711x_detect_client (struct i2c_adapter *adapter, client->addr = address; client->adapter = adapter; client->driver = &i2c_driver_saa711x; - client->flags = I2C_CLIENT_ALLOW_USE; strlcpy(I2C_NAME(client), "saa711x", sizeof(I2C_NAME(client))); decoder = kmalloc(sizeof(struct saa711x), GFP_KERNEL); if (decoder == NULL) { diff --git a/drivers/media/video/saa7127.c b/drivers/media/video/saa7127.c index a2fab9837507..1f4b41599445 100644 --- a/drivers/media/video/saa7127.c +++ b/drivers/media/video/saa7127.c @@ -719,7 +719,6 @@ static int saa7127_attach(struct i2c_adapter *adapter, int address, int kind) client->addr = address; client->adapter = adapter; client->driver = &i2c_driver_saa7127; - client->flags = I2C_CLIENT_ALLOW_USE; snprintf(client->name, sizeof(client->name) - 1, "saa7127"); saa7127_dbg("detecting saa7127 client on address 0x%x\n", address << 1); diff --git a/drivers/media/video/saa7134/saa6752hs.c b/drivers/media/video/saa7134/saa6752hs.c index 6fc298e0a03a..68206060c92d 100644 --- a/drivers/media/video/saa7134/saa6752hs.c +++ b/drivers/media/video/saa7134/saa6752hs.c @@ -608,7 +608,6 @@ static struct i2c_driver driver = { static struct i2c_client client_template = { .name = "saa6752hs", - .flags = I2C_CLIENT_ALLOW_USE, .driver = &driver, }; diff --git a/drivers/media/video/saa7185.c b/drivers/media/video/saa7185.c index e24aa16f2d8c..9f37585d3f73 100644 --- a/drivers/media/video/saa7185.c +++ b/drivers/media/video/saa7185.c @@ -415,7 +415,6 @@ saa7185_detect_client (struct i2c_adapter *adapter, client->addr = address; client->adapter = adapter; client->driver = &i2c_driver_saa7185; - client->flags = I2C_CLIENT_ALLOW_USE; strlcpy(I2C_NAME(client), "saa7185", sizeof(I2C_NAME(client))); encoder = kmalloc(sizeof(struct saa7185), GFP_KERNEL); diff --git a/drivers/media/video/tda9887.c b/drivers/media/video/tda9887.c index 049b44e0767b..324f61bf714e 100644 --- a/drivers/media/video/tda9887.c +++ b/drivers/media/video/tda9887.c @@ -833,7 +833,6 @@ static struct i2c_driver driver = { static struct i2c_client client_template = { .name = "tda9887", - .flags = I2C_CLIENT_ALLOW_USE, .driver = &driver, }; diff --git a/drivers/media/video/tuner-core.c b/drivers/media/video/tuner-core.c index 3c75121f6383..6328f0954e70 100644 --- a/drivers/media/video/tuner-core.c +++ b/drivers/media/video/tuner-core.c @@ -755,7 +755,6 @@ static struct i2c_driver driver = { }; static struct i2c_client client_template = { .name = "(tuner unset)", - .flags = I2C_CLIENT_ALLOW_USE, .driver = &driver, }; diff --git a/drivers/media/video/tvaudio.c b/drivers/media/video/tvaudio.c index 3565f35be7a1..4f1f339283e0 100644 --- a/drivers/media/video/tvaudio.c +++ b/drivers/media/video/tvaudio.c @@ -1713,7 +1713,6 @@ static struct i2c_driver driver = { static struct i2c_client client_template = { .name = "(unset)", - .flags = I2C_CLIENT_ALLOW_USE, .driver = &driver, }; diff --git a/drivers/media/video/tveeprom.c b/drivers/media/video/tveeprom.c index 195bc51d4576..d833b651073a 100644 --- a/drivers/media/video/tveeprom.c +++ b/drivers/media/video/tveeprom.c @@ -751,7 +751,6 @@ tveeprom_detect_client(struct i2c_adapter *adapter, client->addr = address; client->adapter = adapter; client->driver = &i2c_driver_tveeprom; - client->flags = I2C_CLIENT_ALLOW_USE; snprintf(client->name, sizeof(client->name), "tveeprom"); i2c_attach_client(client); diff --git a/drivers/media/video/tvp5150.c b/drivers/media/video/tvp5150.c index 4f3ee2091611..3734554fc73b 100644 --- a/drivers/media/video/tvp5150.c +++ b/drivers/media/video/tvp5150.c @@ -714,7 +714,6 @@ static struct i2c_driver driver; static struct i2c_client client_template = { .name = "(unset)", - .flags = I2C_CLIENT_ALLOW_USE, .driver = &driver, }; diff --git a/drivers/media/video/vpx3220.c b/drivers/media/video/vpx3220.c index c66d28505bcd..54bc888c3891 100644 --- a/drivers/media/video/vpx3220.c +++ b/drivers/media/video/vpx3220.c @@ -631,7 +631,6 @@ vpx3220_detect_client (struct i2c_adapter *adapter, client->addr = address; client->adapter = adapter; client->driver = &vpx3220_i2c_driver; - client->flags = I2C_CLIENT_ALLOW_USE; /* Check for manufacture ID and part number */ if (kind < 0) { diff --git a/drivers/media/video/wm8775.c b/drivers/media/video/wm8775.c index 7b07717a3c67..527c2591749a 100644 --- a/drivers/media/video/wm8775.c +++ b/drivers/media/video/wm8775.c @@ -168,7 +168,6 @@ static int wm8775_attach(struct i2c_adapter *adapter, int address, int kind) client->addr = address; client->adapter = adapter; client->driver = &i2c_driver; - client->flags = I2C_CLIENT_ALLOW_USE; snprintf(client->name, sizeof(client->name) - 1, "wm8775"); wm8775_info("chip found @ 0x%x (%s)\n", address << 1, adapter->name); diff --git a/include/linux/i2c.h b/include/linux/i2c.h index 4487c5189747..8b4d4695de0e 100644 --- a/include/linux/i2c.h +++ b/include/linux/i2c.h @@ -250,7 +250,6 @@ static inline void i2c_set_adapdata (struct i2c_adapter *dev, void *data) } /*flags for the client struct: */ -#define I2C_CLIENT_ALLOW_USE 0x01 /* Client allows access */ #define I2C_CLIENT_PEC 0x04 /* Use Packet Error Checking */ #define I2C_CLIENT_TEN 0x10 /* we have a ten bit chip address */ /* Must equal I2C_M_TEN below */ -- cgit v1.2.3-71-gd317 From cf02df770228350254251fde520007a2709db785 Mon Sep 17 00:00:00 2001 From: Jean Delvare Date: Sat, 26 Nov 2005 21:03:41 +0100 Subject: [PATCH] i2c: Rework client usage count, 3 of 3 Do not limit the usage count of i2c clients to 1. In other words, change the client usage count behavior from the old I2C_CLIENT_ALLOW_USE to the old I2C_CLIENT_ALLOW_MULTIPLE_USE. The rationale is that no driver actually needs the limiting behavior, and the unlimiting behavior is slightly easier to implement. Update the documentation to reflect this change. Signed-off-by: Jean Delvare Signed-off-by: Greg Kroah-Hartman --- Documentation/i2c/porting-clients | 1 + drivers/i2c/i2c-core.c | 5 ----- include/linux/i2c.h | 4 +--- 3 files changed, 2 insertions(+), 8 deletions(-) (limited to 'include/linux') diff --git a/Documentation/i2c/porting-clients b/Documentation/i2c/porting-clients index 64c610bf2fbc..6b07f23039d2 100644 --- a/Documentation/i2c/porting-clients +++ b/Documentation/i2c/porting-clients @@ -92,6 +92,7 @@ Technical changes: Drop client->id. Drop any 24RF08 corruption prevention you find, as this is now done at the i2c-core level, and doing it twice voids it. + Don't add I2C_CLIENT_ALLOW_USE to client->flags, it's the default now. * [Init] Limits must not be set by the driver (can be done later in user-space). Chip should not be reset default (although a module diff --git a/drivers/i2c/i2c-core.c b/drivers/i2c/i2c-core.c index d16b4998c4c2..a1c5dff85431 100644 --- a/drivers/i2c/i2c-core.c +++ b/drivers/i2c/i2c-core.c @@ -497,14 +497,9 @@ int i2c_use_client(struct i2c_client *client) if (ret) return ret; - if (client->usage_count > 0) - goto busy; client->usage_count++; return 0; - busy: - i2c_dec_use_client(client); - return -EBUSY; } int i2c_release_client(struct i2c_client *client) diff --git a/include/linux/i2c.h b/include/linux/i2c.h index 8b4d4695de0e..85c517a9b05b 100644 --- a/include/linux/i2c.h +++ b/include/linux/i2c.h @@ -307,9 +307,7 @@ extern struct i2c_client *i2c_get_client(int driver_id, int adapter_id, extern struct i2c_client *i2c_get_client(int,int,struct i2c_client *); to make sure that client-struct is valid and that it is okay to access the i2c-client. - returns -EACCES if client doesn't allow use (default) - returns -EBUSY if client doesn't allow multiple use (default) and - usage_count >0 */ + returns -ENODEV if client has gone in the meantime */ extern int i2c_use_client(struct i2c_client *); extern int i2c_release_client(struct i2c_client *); -- cgit v1.2.3-71-gd317 From 482c788ded0aa9710722eaf9cf60886d3b923218 Mon Sep 17 00:00:00 2001 From: Jean Delvare Date: Sat, 26 Nov 2005 21:06:08 +0100 Subject: [PATCH] i2c: i2c_get_client is gone The i2c_get_client function doesn't exist anymore, so we shouldn't have a definition for it in i2c.h. Signed-off-by: Jean Delvare Signed-off-by: Greg Kroah-Hartman --- include/linux/i2c.h | 13 ++----------- 1 file changed, 2 insertions(+), 11 deletions(-) (limited to 'include/linux') diff --git a/include/linux/i2c.h b/include/linux/i2c.h index 85c517a9b05b..a9cea62fd486 100644 --- a/include/linux/i2c.h +++ b/include/linux/i2c.h @@ -296,17 +296,8 @@ extern int i2c_del_driver(struct i2c_driver *); extern int i2c_attach_client(struct i2c_client *); extern int i2c_detach_client(struct i2c_client *); -/* New function: This is to get an i2c_client-struct for controlling the - client either by using i2c_control-function or having the - client-module export functions that can be used with the i2c_client - -struct. */ -extern struct i2c_client *i2c_get_client(int driver_id, int adapter_id, - struct i2c_client *prev); - -/* Should be used with new function - extern struct i2c_client *i2c_get_client(int,int,struct i2c_client *); - to make sure that client-struct is valid and that it is okay to access - the i2c-client. +/* Should be used to make sure that client-struct is valid and that it + is okay to access the i2c-client. returns -ENODEV if client has gone in the meantime */ extern int i2c_use_client(struct i2c_client *); extern int i2c_release_client(struct i2c_client *); -- cgit v1.2.3-71-gd317 From 35d8b2e6b8e86b0d5126f36613b5202d4eb978b6 Mon Sep 17 00:00:00 2001 From: Laurent Riffard Date: Sat, 26 Nov 2005 20:34:05 +0100 Subject: [PATCH] i2c: Drop i2c_driver.{owner,name}, 1 of 11 We should use the i2c_driver.driver's .name and .owner fields instead of the i2c_driver's ones. This patch updates the core of the i2c drivers: it removes .name and .owner fields from the struct i2c_device and modify various functions to use struct device fields instead. Signed-off-by: Laurent Riffard Signed-off-by: Jean Delvare Signed-off-by: Greg Kroah-Hartman --- drivers/i2c/busses/i2c-isa.c | 4 +--- drivers/i2c/i2c-core.c | 22 +++++++++++----------- drivers/i2c/i2c-dev.c | 6 ++++-- include/linux/i2c.h | 5 +++-- 4 files changed, 19 insertions(+), 18 deletions(-) (limited to 'include/linux') diff --git a/drivers/i2c/busses/i2c-isa.c b/drivers/i2c/busses/i2c-isa.c index 03672c9ca409..9f93fb85d06e 100644 --- a/drivers/i2c/busses/i2c-isa.c +++ b/drivers/i2c/busses/i2c-isa.c @@ -92,8 +92,6 @@ int i2c_isa_add_driver(struct i2c_driver *driver) int res; /* Add the driver to the list of i2c drivers in the driver core */ - driver->driver.name = driver->name; - driver->driver.owner = driver->owner; driver->driver.bus = &i2c_bus_type; driver->driver.probe = i2c_isa_device_probe; driver->driver.remove = i2c_isa_device_remove; @@ -124,7 +122,7 @@ int i2c_isa_del_driver(struct i2c_driver *driver) if ((res = driver->detach_client(client))) { dev_err(&isa_adapter.dev, "Failed, driver " "%s not unregistered!\n", - driver->name); + driver->driver.name); return res; } } diff --git a/drivers/i2c/i2c-core.c b/drivers/i2c/i2c-core.c index a1c5dff85431..4ce5f0f32fba 100644 --- a/drivers/i2c/i2c-core.c +++ b/drivers/i2c/i2c-core.c @@ -235,7 +235,8 @@ int i2c_del_adapter(struct i2c_adapter *adap) if (driver->detach_adapter) if ((res = driver->detach_adapter(adap))) { dev_err(&adap->dev, "detach_adapter failed " - "for driver [%s]\n", driver->name); + "for driver [%s]\n", + driver->driver.name); goto out_unlock; } } @@ -295,8 +296,6 @@ int i2c_add_driver(struct i2c_driver *driver) down(&core_lists); /* add the driver to the list of i2c drivers in the driver core */ - driver->driver.owner = driver->owner; - driver->driver.name = driver->name; driver->driver.bus = &i2c_bus_type; driver->driver.probe = i2c_device_probe; driver->driver.remove = i2c_device_remove; @@ -306,7 +305,7 @@ int i2c_add_driver(struct i2c_driver *driver) goto out_unlock; list_add_tail(&driver->list,&drivers); - pr_debug("i2c-core: driver [%s] registered\n", driver->name); + pr_debug("i2c-core: driver [%s] registered\n", driver->driver.name); /* now look for instances of driver on our adapters */ if (driver->attach_adapter) { @@ -344,7 +343,8 @@ int i2c_del_driver(struct i2c_driver *driver) if (driver->detach_adapter) { if ((res = driver->detach_adapter(adap))) { dev_err(&adap->dev, "detach_adapter failed " - "for driver [%s]\n", driver->name); + "for driver [%s]\n", + driver->driver.name); goto out_unlock; } } else { @@ -368,7 +368,7 @@ int i2c_del_driver(struct i2c_driver *driver) driver_unregister(&driver->driver); list_del(&driver->list); - pr_debug("i2c-core: driver [%s] unregistered\n", driver->name); + pr_debug("i2c-core: driver [%s] unregistered\n", driver->driver.name); out_unlock: up(&core_lists); @@ -473,10 +473,10 @@ int i2c_detach_client(struct i2c_client *client) static int i2c_inc_use_client(struct i2c_client *client) { - if (!try_module_get(client->driver->owner)) + if (!try_module_get(client->driver->driver.owner)) return -ENODEV; if (!try_module_get(client->adapter->owner)) { - module_put(client->driver->owner); + module_put(client->driver->driver.owner); return -ENODEV; } @@ -485,7 +485,7 @@ static int i2c_inc_use_client(struct i2c_client *client) static void i2c_dec_use_client(struct i2c_client *client) { - module_put(client->driver->owner); + module_put(client->driver->driver.owner); module_put(client->adapter->owner); } @@ -524,14 +524,14 @@ void i2c_clients_command(struct i2c_adapter *adap, unsigned int cmd, void *arg) down(&adap->clist_lock); list_for_each(item,&adap->clients) { client = list_entry(item, struct i2c_client, list); - if (!try_module_get(client->driver->owner)) + if (!try_module_get(client->driver->driver.owner)) continue; if (NULL != client->driver->command) { up(&adap->clist_lock); client->driver->command(client,cmd,arg); down(&adap->clist_lock); } - module_put(client->driver->owner); + module_put(client->driver->driver.owner); } up(&adap->clist_lock); } diff --git a/drivers/i2c/i2c-dev.c b/drivers/i2c/i2c-dev.c index 9da51eb37c06..9715217a0343 100644 --- a/drivers/i2c/i2c-dev.c +++ b/drivers/i2c/i2c-dev.c @@ -481,8 +481,10 @@ static int i2cdev_command(struct i2c_client *client, unsigned int cmd, } static struct i2c_driver i2cdev_driver = { - .owner = THIS_MODULE, - .name = "dev_driver", + .driver = { + .owner = THIS_MODULE, + .name = "dev_driver", + }, .id = I2C_DRIVERID_I2CDEV, .attach_adapter = i2cdev_attach_adapter, .detach_adapter = i2cdev_detach_adapter, diff --git a/include/linux/i2c.h b/include/linux/i2c.h index a9cea62fd486..75aa18e865da 100644 --- a/include/linux/i2c.h +++ b/include/linux/i2c.h @@ -105,11 +105,12 @@ extern s32 i2c_smbus_read_i2c_block_data(struct i2c_client * client, * A driver is capable of handling one or more physical devices present on * I2C adapters. This information is used to inform the driver of adapter * events. + * + * The driver.owner field should be set to the module owner of this driver. + * The driver.name field should be set to the name of this driver. */ struct i2c_driver { - struct module *owner; - char name[32]; int id; unsigned int class; -- cgit v1.2.3-71-gd317 From de59cf9ed44f64991e52a9de6094729537f0420c Mon Sep 17 00:00:00 2001 From: Greg Kroah-Hartman Date: Tue, 6 Dec 2005 15:33:15 -0800 Subject: [PATCH] I2C: Make i2c_add_driver automatically set the proper module owner This prevents i2c drivers from messing up and forgetting to set the module owner of their driver. It also reduces the size of their drivers by one line :) Signed-off-by: Greg Kroah-Hartman Cc: Jean Delvare --- drivers/i2c/i2c-core.c | 5 +++-- include/linux/i2c.h | 7 ++++++- 2 files changed, 9 insertions(+), 3 deletions(-) (limited to 'include/linux') diff --git a/drivers/i2c/i2c-core.c b/drivers/i2c/i2c-core.c index 4ce5f0f32fba..c23443ee1b33 100644 --- a/drivers/i2c/i2c-core.c +++ b/drivers/i2c/i2c-core.c @@ -287,7 +287,7 @@ int i2c_del_adapter(struct i2c_adapter *adap) * chips. */ -int i2c_add_driver(struct i2c_driver *driver) +int i2c_register_driver(struct module *owner, struct i2c_driver *driver) { struct list_head *item; struct i2c_adapter *adapter; @@ -296,6 +296,7 @@ int i2c_add_driver(struct i2c_driver *driver) down(&core_lists); /* add the driver to the list of i2c drivers in the driver core */ + driver->driver.owner = owner; driver->driver.bus = &i2c_bus_type; driver->driver.probe = i2c_device_probe; driver->driver.remove = i2c_device_remove; @@ -319,6 +320,7 @@ int i2c_add_driver(struct i2c_driver *driver) up(&core_lists); return res; } +EXPORT_SYMBOL(i2c_register_driver); int i2c_del_driver(struct i2c_driver *driver) { @@ -1132,7 +1134,6 @@ EXPORT_SYMBOL_GPL(i2c_bus_type); EXPORT_SYMBOL(i2c_add_adapter); EXPORT_SYMBOL(i2c_del_adapter); -EXPORT_SYMBOL(i2c_add_driver); EXPORT_SYMBOL(i2c_del_driver); EXPORT_SYMBOL(i2c_attach_client); EXPORT_SYMBOL(i2c_detach_client); diff --git a/include/linux/i2c.h b/include/linux/i2c.h index 75aa18e865da..7863a59bd598 100644 --- a/include/linux/i2c.h +++ b/include/linux/i2c.h @@ -291,9 +291,14 @@ struct i2c_client_address_data { extern int i2c_add_adapter(struct i2c_adapter *); extern int i2c_del_adapter(struct i2c_adapter *); -extern int i2c_add_driver(struct i2c_driver *); +extern int i2c_register_driver(struct module *, struct i2c_driver *); extern int i2c_del_driver(struct i2c_driver *); +static inline int i2c_add_driver(struct i2c_driver *driver) +{ + return i2c_register_driver(THIS_MODULE, driver); +} + extern int i2c_attach_client(struct i2c_client *); extern int i2c_detach_client(struct i2c_client *); -- cgit v1.2.3-71-gd317 From 734a12a36676ad3b9418fa5a829c518afec02b8a Mon Sep 17 00:00:00 2001 From: Rudolf Marek Date: Sun, 18 Dec 2005 16:36:52 +0100 Subject: [PATCH] hwmon: add VRM/VID support for some VIA CPUs This patch adds the VIA CENTAUR CPUs to detection table. Table was updated to treat future Intel x86 CPUs as VRD10. Stepping field was added, because some VIA CPUs have different VRM specs across stepping. I changed the vrm type to u8 because all drivers use u8 anyway. Signed-off-by: Rudolf Marek Signed-off-by: Jean Delvare Signed-off-by: Greg Kroah-Hartman --- drivers/hwmon/hwmon-vid.c | 69 ++++++++++++++++++++++++++++------------------- include/linux/hwmon-vid.h | 6 ++--- 2 files changed, 45 insertions(+), 30 deletions(-) (limited to 'include/linux') diff --git a/drivers/hwmon/hwmon-vid.c b/drivers/hwmon/hwmon-vid.c index 312769ad4dab..e497274916ce 100644 --- a/drivers/hwmon/hwmon-vid.c +++ b/drivers/hwmon/hwmon-vid.c @@ -49,20 +49,22 @@ . . . . 11110 = 0.800 V 11111 = 0.000 V (off) + + The 17 specification is in fact Intel Mobile Voltage Positioning - + (IMVP-II). You can find more information in the datasheet of Max1718 + http://www.maxim-ic.com/quick_view2.cfm/qv_pk/2452 + */ /* vrm is the VRM/VRD document version multiplied by 10. val is the 4-, 5- or 6-bit VID code. Returned value is in mV to avoid floating point in the kernel. */ -int vid_from_reg(int val, int vrm) +int vid_from_reg(int val, u8 vrm) { int vid; switch(vrm) { - case 0: - return 0; - case 100: /* VRD 10.0 */ if((val & 0x1f) == 0x1f) return 0; @@ -91,10 +93,16 @@ int vid_from_reg(int val, int vrm) case 84: /* VRM 8.4 */ val &= 0x0f; /* fall through */ - default: /* VRM 8.2 */ + case 82: /* VRM 8.2 */ return(val == 0x1f ? 0 : val & 0x10 ? 5100 - (val) * 100 : 2050 - (val) * 50); + case 17: /* Intel IMVP-II */ + return(val & 0x10 ? 975 - (val & 0xF) * 25 : + 1750 - val * 50); + default: /* report 0 for unknown */ + printk(KERN_INFO "hwmon-vid: requested unknown VRM version\n"); + return 0; } } @@ -108,30 +116,36 @@ struct vrm_model { u8 vendor; u8 eff_family; u8 eff_model; - int vrm_type; + u8 eff_stepping; + u8 vrm_type; }; #define ANY 0xFF #ifdef CONFIG_X86 +/* the stepping parameter is highest acceptable stepping for current line */ + static struct vrm_model vrm_models[] = { - {X86_VENDOR_AMD, 0x6, ANY, 90}, /* Athlon Duron etc */ - {X86_VENDOR_AMD, 0xF, ANY, 24}, /* Athlon 64, Opteron */ - {X86_VENDOR_INTEL, 0x6, 0x9, 85}, /* 0.13um too */ - {X86_VENDOR_INTEL, 0x6, 0xB, 85}, /* Tualatin */ - {X86_VENDOR_INTEL, 0x6, ANY, 82}, /* any P6 */ - {X86_VENDOR_INTEL, 0x7, ANY, 0}, /* Itanium */ - {X86_VENDOR_INTEL, 0xF, 0x0, 90}, /* P4 */ - {X86_VENDOR_INTEL, 0xF, 0x1, 90}, /* P4 Willamette */ - {X86_VENDOR_INTEL, 0xF, 0x2, 90}, /* P4 Northwood */ - {X86_VENDOR_INTEL, 0xF, 0x3, 100}, /* P4 Prescott */ - {X86_VENDOR_INTEL, 0xF, 0x4, 100}, /* P4 Prescott */ - {X86_VENDOR_INTEL, 0x10,ANY, 0}, /* Itanium 2 */ - {X86_VENDOR_UNKNOWN, ANY, ANY, 0} /* stop here */ + {X86_VENDOR_AMD, 0x6, ANY, ANY, 90}, /* Athlon Duron etc */ + {X86_VENDOR_AMD, 0xF, ANY, ANY, 24}, /* Athlon 64, Opteron and above VRM 24 */ + {X86_VENDOR_INTEL, 0x6, 0x9, ANY, 85}, /* 0.13um too */ + {X86_VENDOR_INTEL, 0x6, 0xB, ANY, 85}, /* Tualatin */ + {X86_VENDOR_INTEL, 0x6, ANY, ANY, 82}, /* any P6 */ + {X86_VENDOR_INTEL, 0x7, ANY, ANY, 0}, /* Itanium */ + {X86_VENDOR_INTEL, 0xF, 0x0, ANY, 90}, /* P4 */ + {X86_VENDOR_INTEL, 0xF, 0x1, ANY, 90}, /* P4 Willamette */ + {X86_VENDOR_INTEL, 0xF, 0x2, ANY, 90}, /* P4 Northwood */ + {X86_VENDOR_INTEL, 0xF, ANY, ANY, 100}, /* Prescott and above assume VRD 10 */ + {X86_VENDOR_INTEL, 0x10, ANY, ANY, 0}, /* Itanium 2 */ + {X86_VENDOR_CENTAUR, 0x6, 0x7, ANY, 85}, /* Eden ESP/Ezra */ + {X86_VENDOR_CENTAUR, 0x6, 0x8, 0x7, 85}, /* Ezra T */ + {X86_VENDOR_CENTAUR, 0x6, 0x9, 0x7, 85}, /* Nemiah */ + {X86_VENDOR_CENTAUR, 0x6, 0x9, ANY, 17}, /* C3-M */ + {X86_VENDOR_UNKNOWN, ANY, ANY, ANY, 0} /* stop here */ }; -static int find_vrm(u8 eff_family, u8 eff_model, u8 vendor) +static u8 find_vrm(u8 eff_family, u8 eff_model, u8 eff_stepping, u8 vendor) { int i = 0; @@ -139,7 +153,8 @@ static int find_vrm(u8 eff_family, u8 eff_model, u8 vendor) if (vrm_models[i].vendor==vendor) if ((vrm_models[i].eff_family==eff_family) && ((vrm_models[i].eff_model==eff_model) || - (vrm_models[i].eff_model==ANY))) + (vrm_models[i].eff_model==ANY)) && + (eff_stepping <= vrm_models[i].eff_stepping)) return vrm_models[i].vrm_type; i++; } @@ -147,12 +162,11 @@ static int find_vrm(u8 eff_family, u8 eff_model, u8 vendor) return 0; } -int vid_which_vrm(void) +u8 vid_which_vrm(void) { struct cpuinfo_x86 *c = cpu_data; u32 eax; - u8 eff_family, eff_model; - int vrm_ret; + u8 eff_family, eff_model, eff_stepping, vrm_ret; if (c->x86 < 6) /* Any CPU with family lower than 6 */ return 0; /* doesn't have VID and/or CPUID */ @@ -160,20 +174,21 @@ int vid_which_vrm(void) eax = cpuid_eax(1); eff_family = ((eax & 0x00000F00)>>8); eff_model = ((eax & 0x000000F0)>>4); + eff_stepping = eax & 0xF; if (eff_family == 0xF) { /* use extended model & family */ eff_family += ((eax & 0x00F00000)>>20); eff_model += ((eax & 0x000F0000)>>16)<<4; } - vrm_ret = find_vrm(eff_family,eff_model,c->x86_vendor); + vrm_ret = find_vrm(eff_family, eff_model, eff_stepping, c->x86_vendor); if (vrm_ret == 0) printk(KERN_INFO "hwmon-vid: Unknown VRM version of your " "x86 CPU\n"); return vrm_ret; } -/* and now something completely different for the non-x86 world */ +/* and now for something completely different for the non-x86 world */ #else -int vid_which_vrm(void) +u8 vid_which_vrm(void) { printk(KERN_INFO "hwmon-vid: Unknown VRM version of your CPU\n"); return 0; diff --git a/include/linux/hwmon-vid.h b/include/linux/hwmon-vid.h index cd4b7a042b86..f346e4d5381c 100644 --- a/include/linux/hwmon-vid.h +++ b/include/linux/hwmon-vid.h @@ -23,14 +23,14 @@ #ifndef _LINUX_HWMON_VID_H #define _LINUX_HWMON_VID_H -int vid_from_reg(int val, int vrm); -int vid_which_vrm(void); +int vid_from_reg(int val, u8 vrm); +u8 vid_which_vrm(void); /* vrm is the VRM/VRD document version multiplied by 10. val is in mV to avoid floating point in the kernel. Returned value is the 4-, 5- or 6-bit VID code. Note that only VRM 9.x is supported for now. */ -static inline int vid_to_reg(int val, int vrm) +static inline int vid_to_reg(int val, u8 vrm) { switch (vrm) { case 91: /* VRM 9.1 */ -- cgit v1.2.3-71-gd317 From 04b4b8434a92b9ef127985113c0bd961957778b7 Mon Sep 17 00:00:00 2001 From: Jean Delvare Date: Sun, 18 Dec 2005 16:42:35 +0100 Subject: [PATCH] i2c: driver ID list cleanups Cleanups to i2c driver ID list: * Remove mostly bogus comments about driver ID ranges. * Drop experimental driver IDs, as the concept is pretty broken. * Drop now unused IDs of non-I2C (ISA) drivers. * Drop a few more IDs which are no more used. Signed-off-by: Jean Delvare Signed-off-by: Greg Kroah-Hartman --- include/linux/i2c-id.h | 20 -------------------- 1 file changed, 20 deletions(-) (limited to 'include/linux') diff --git a/include/linux/i2c-id.h b/include/linux/i2c-id.h index 006c81ef4d50..fb46f8d56999 100644 --- a/include/linux/i2c-id.h +++ b/include/linux/i2c-id.h @@ -25,12 +25,6 @@ /* * ---- Driver types ----------------------------------------------------- - * device id name + number function description, i2c address(es) - * - * Range 1000-1999 range is defined in sensors/sensors.h - * Range 0x100 - 0x1ff is for V4L2 Common Components - * Range 0xf000 - 0xffff is reserved for local experimentation, and should - * never be used in official drivers */ #define I2C_DRIVERID_MSP3400 1 @@ -110,13 +104,7 @@ #define I2C_DRIVERID_AKITAIOEXP 74 /* IO Expander on Sharp SL-C1000 */ #define I2C_DRIVERID_INFRARED 75 /* I2C InfraRed on Video boards */ -#define I2C_DRIVERID_EXP0 0xF0 /* experimental use id's */ -#define I2C_DRIVERID_EXP1 0xF1 -#define I2C_DRIVERID_EXP2 0xF2 -#define I2C_DRIVERID_EXP3 0xF3 - #define I2C_DRIVERID_I2CDEV 900 -#define I2C_DRIVERID_I2CPROC 901 #define I2C_DRIVERID_ARP 902 /* SMBus ARP Client */ #define I2C_DRIVERID_ALERT 903 /* SMBus Alert Responder Client */ @@ -131,15 +119,12 @@ #define I2C_DRIVERID_ADM1021 1008 #define I2C_DRIVERID_ADM9240 1009 #define I2C_DRIVERID_LTC1710 1010 -#define I2C_DRIVERID_SIS5595 1011 #define I2C_DRIVERID_ICSPLL 1012 #define I2C_DRIVERID_BT869 1013 #define I2C_DRIVERID_MAXILIFE 1014 #define I2C_DRIVERID_MATORB 1015 #define I2C_DRIVERID_GL520 1016 #define I2C_DRIVERID_THMC50 1017 -#define I2C_DRIVERID_DDCMON 1018 -#define I2C_DRIVERID_VIA686A 1019 #define I2C_DRIVERID_ADM1025 1020 #define I2C_DRIVERID_LM87 1021 #define I2C_DRIVERID_PCF8574 1022 @@ -151,21 +136,16 @@ #define I2C_DRIVERID_FSCPOS 1028 #define I2C_DRIVERID_FSCSCY 1029 #define I2C_DRIVERID_PCF8591 1030 -#define I2C_DRIVERID_SMSC47M1 1031 -#define I2C_DRIVERID_VT1211 1032 #define I2C_DRIVERID_LM92 1033 -#define I2C_DRIVERID_VT8231 1034 #define I2C_DRIVERID_SMARTBATT 1035 #define I2C_DRIVERID_BMCSENSORS 1036 #define I2C_DRIVERID_FS451 1037 -#define I2C_DRIVERID_W83627HF 1038 #define I2C_DRIVERID_LM85 1039 #define I2C_DRIVERID_LM83 1040 #define I2C_DRIVERID_LM90 1042 #define I2C_DRIVERID_ASB100 1043 #define I2C_DRIVERID_FSCHER 1046 #define I2C_DRIVERID_W83L785TS 1047 -#define I2C_DRIVERID_SMSC47B397 1050 /* * ---- Adapter types ---------------------------------------------------- -- cgit v1.2.3-71-gd317 From 7c72ccf09b6debe55b8e049377ad3183ed4f4cb3 Mon Sep 17 00:00:00 2001 From: Jean Delvare Date: Sun, 18 Dec 2005 17:25:18 +0100 Subject: [PATCH] i2c: i2c-nforce2 add nforce4 MCP-04 device ID One more supported PCI ID for the i2c-nforce2 driver. Signed-off-by: Jean Delvare Signed-off-by: Greg Kroah-Hartman --- Documentation/i2c/busses/i2c-nforce2 | 3 ++- drivers/i2c/busses/i2c-nforce2.c | 2 ++ include/linux/pci_ids.h | 1 + 3 files changed, 5 insertions(+), 1 deletion(-) (limited to 'include/linux') diff --git a/Documentation/i2c/busses/i2c-nforce2 b/Documentation/i2c/busses/i2c-nforce2 index e379e182e64f..d751282d9b2a 100644 --- a/Documentation/i2c/busses/i2c-nforce2 +++ b/Documentation/i2c/busses/i2c-nforce2 @@ -5,7 +5,8 @@ Supported adapters: * nForce2 Ultra 400 MCP 10de:0084 * nForce3 Pro150 MCP 10de:00D4 * nForce3 250Gb MCP 10de:00E4 - * nForce4 MCP 10de:0052 + * nForce4 MCP 10de:0052 + * nForce4 MCP-04 10de:0034 Datasheet: not publically available, but seems to be similar to the AMD-8111 SMBus 2.0 adapter. diff --git a/drivers/i2c/busses/i2c-nforce2.c b/drivers/i2c/busses/i2c-nforce2.c index 4d18e6e5f159..2d80eb26f688 100644 --- a/drivers/i2c/busses/i2c-nforce2.c +++ b/drivers/i2c/busses/i2c-nforce2.c @@ -30,6 +30,7 @@ nForce3 Pro150 MCP 00D4 nForce3 250Gb MCP 00E4 nForce4 MCP 0052 + nForce4 MCP-04 0034 This driver supports the 2 SMBuses that are included in the MCP of the nForce2/3/4 chipsets. @@ -257,6 +258,7 @@ static struct pci_device_id nforce2_ids[] = { { PCI_DEVICE(PCI_VENDOR_ID_NVIDIA, PCI_DEVICE_ID_NVIDIA_NFORCE3_SMBUS) }, { PCI_DEVICE(PCI_VENDOR_ID_NVIDIA, PCI_DEVICE_ID_NVIDIA_NFORCE3S_SMBUS) }, { PCI_DEVICE(PCI_VENDOR_ID_NVIDIA, PCI_DEVICE_ID_NVIDIA_NFORCE4_SMBUS) }, + { PCI_DEVICE(PCI_VENDOR_ID_NVIDIA, PCI_DEVICE_ID_NVIDIA_NFORCE_MCP04_SMBUS) }, { 0 } }; diff --git a/include/linux/pci_ids.h b/include/linux/pci_ids.h index 4f01710485cd..96a0403f61f6 100644 --- a/include/linux/pci_ids.h +++ b/include/linux/pci_ids.h @@ -976,6 +976,7 @@ #define PCI_DEVICE_ID_NVIDIA_TNT_UNKNOWN 0x002a #define PCI_DEVICE_ID_NVIDIA_VTNT2 0x002C #define PCI_DEVICE_ID_NVIDIA_UVTNT2 0x002D +#define PCI_DEVICE_ID_NVIDIA_NFORCE_MCP04_SMBUS 0x0034 #define PCI_DEVICE_ID_NVIDIA_NFORCE_MCP04_IDE 0x0035 #define PCI_DEVICE_ID_NVIDIA_NFORCE_MCP04_SATA 0x0036 #define PCI_DEVICE_ID_NVIDIA_NVENET_10 0x0037 -- cgit v1.2.3-71-gd317 From 8ffdc6550c47f75ca4e6c9f30a2a89063e035cf2 Mon Sep 17 00:00:00 2001 From: Tejun Heo Date: Fri, 6 Jan 2006 09:49:03 +0100 Subject: [BLOCK] add @uptodate to end_that_request_last() and @error to rq_end_io_fn() add @uptodate argument to end_that_request_last() and @error to rq_end_io_fn(). there's no generic way to pass error code to request completion function, making generic error handling of non-fs request difficult (rq->errors is driver-specific and each driver uses it differently). this patch adds @uptodate to end_that_request_last() and @error to rq_end_io_fn(). for fs requests, this doesn't really matter, so just using the same uptodate argument used in the last call to end_that_request_first() should suffice. imho, this can also help the generic command-carrying request jens is working on. Signed-off-by: tejun heo Signed-Off-By: Jens Axboe --- block/elevator.c | 2 +- block/ll_rw_blk.c | 22 +++++++++++++++------- drivers/block/DAC960.c | 2 +- drivers/block/cciss.c | 2 +- drivers/block/cpqarray.c | 2 +- drivers/block/floppy.c | 2 +- drivers/block/nbd.c | 2 +- drivers/block/sx8.c | 2 +- drivers/block/ub.c | 2 +- drivers/block/viodasd.c | 2 +- drivers/cdrom/cdu31a.c | 2 +- drivers/ide/ide-cd.c | 4 ++-- drivers/ide/ide-io.c | 6 +++--- drivers/message/i2o/i2o_block.c | 2 +- drivers/mmc/mmc_block.c | 4 ++-- drivers/s390/block/dasd.c | 2 +- drivers/s390/char/tape_block.c | 2 +- drivers/scsi/ide-scsi.c | 4 ++-- drivers/scsi/scsi_lib.c | 2 +- drivers/scsi/sd.c | 2 +- include/linux/blkdev.h | 6 +++--- 21 files changed, 42 insertions(+), 34 deletions(-) (limited to 'include/linux') diff --git a/block/elevator.c b/block/elevator.c index 6c3fc8a10bf2..85a11cee7d1c 100644 --- a/block/elevator.c +++ b/block/elevator.c @@ -498,7 +498,7 @@ struct request *elv_next_request(request_queue_t *q) blkdev_dequeue_request(rq); rq->flags |= REQ_QUIET; end_that_request_chunk(rq, 0, nr_bytes); - end_that_request_last(rq); + end_that_request_last(rq, 0); } else { printk(KERN_ERR "%s: bad return=%d\n", __FUNCTION__, ret); diff --git a/block/ll_rw_blk.c b/block/ll_rw_blk.c index e02c88ca8fb5..8b1ae69bc5ac 100644 --- a/block/ll_rw_blk.c +++ b/block/ll_rw_blk.c @@ -344,7 +344,7 @@ EXPORT_SYMBOL(blk_queue_issue_flush_fn); /* * Cache flushing for ordered writes handling */ -static void blk_pre_flush_end_io(struct request *flush_rq) +static void blk_pre_flush_end_io(struct request *flush_rq, int error) { struct request *rq = flush_rq->end_io_data; request_queue_t *q = rq->q; @@ -362,7 +362,7 @@ static void blk_pre_flush_end_io(struct request *flush_rq) } } -static void blk_post_flush_end_io(struct request *flush_rq) +static void blk_post_flush_end_io(struct request *flush_rq, int error) { struct request *rq = flush_rq->end_io_data; request_queue_t *q = rq->q; @@ -2317,7 +2317,7 @@ EXPORT_SYMBOL(blk_rq_map_kern); */ void blk_execute_rq_nowait(request_queue_t *q, struct gendisk *bd_disk, struct request *rq, int at_head, - void (*done)(struct request *)) + rq_end_io_fn *done) { int where = at_head ? ELEVATOR_INSERT_FRONT : ELEVATOR_INSERT_BACK; @@ -2521,7 +2521,7 @@ EXPORT_SYMBOL(blk_put_request); * blk_end_sync_rq - executes a completion event on a request * @rq: request to complete */ -void blk_end_sync_rq(struct request *rq) +void blk_end_sync_rq(struct request *rq, int error) { struct completion *waiting = rq->waiting; @@ -3183,9 +3183,17 @@ EXPORT_SYMBOL(end_that_request_chunk); /* * queue lock must be held */ -void end_that_request_last(struct request *req) +void end_that_request_last(struct request *req, int uptodate) { struct gendisk *disk = req->rq_disk; + int error; + + /* + * extend uptodate bool to allow < 0 value to be direct io error + */ + error = 0; + if (end_io_error(uptodate)) + error = !uptodate ? -EIO : uptodate; if (unlikely(laptop_mode) && blk_fs_request(req)) laptop_io_completion(); @@ -3200,7 +3208,7 @@ void end_that_request_last(struct request *req) disk->in_flight--; } if (req->end_io) - req->end_io(req); + req->end_io(req, error); else __blk_put_request(req->q, req); } @@ -3212,7 +3220,7 @@ void end_request(struct request *req, int uptodate) if (!end_that_request_first(req, uptodate, req->hard_cur_sectors)) { add_disk_randomness(req->rq_disk); blkdev_dequeue_request(req); - end_that_request_last(req); + end_that_request_last(req, uptodate); } } diff --git a/drivers/block/DAC960.c b/drivers/block/DAC960.c index 70eaa5c7ac08..21097a39a057 100644 --- a/drivers/block/DAC960.c +++ b/drivers/block/DAC960.c @@ -3471,7 +3471,7 @@ static inline boolean DAC960_ProcessCompletedRequest(DAC960_Command_T *Command, if (!end_that_request_first(Request, UpToDate, Command->BlockCount)) { - end_that_request_last(Request); + end_that_request_last(Request, UpToDate); if (Command->Completion) { complete(Command->Completion); diff --git a/drivers/block/cciss.c b/drivers/block/cciss.c index c3441b3f086e..d2815b7a9150 100644 --- a/drivers/block/cciss.c +++ b/drivers/block/cciss.c @@ -2310,7 +2310,7 @@ static inline void complete_command( ctlr_info_t *h, CommandList_struct *cmd, printk("Done with %p\n", cmd->rq); #endif /* CCISS_DEBUG */ - end_that_request_last(cmd->rq); + end_that_request_last(cmd->rq, status ? 1 : -EIO); cmd_free(h,cmd,1); } diff --git a/drivers/block/cpqarray.c b/drivers/block/cpqarray.c index cf1822a6361c..9bddb6874873 100644 --- a/drivers/block/cpqarray.c +++ b/drivers/block/cpqarray.c @@ -1036,7 +1036,7 @@ static inline void complete_command(cmdlist_t *cmd, int timeout) complete_buffers(cmd->rq->bio, ok); DBGPX(printk("Done with %p\n", cmd->rq);); - end_that_request_last(cmd->rq); + end_that_request_last(cmd->rq, ok ? 1 : -EIO); } /* diff --git a/drivers/block/floppy.c b/drivers/block/floppy.c index f7e765a1d313..a5b857c5c4b8 100644 --- a/drivers/block/floppy.c +++ b/drivers/block/floppy.c @@ -2301,7 +2301,7 @@ static void floppy_end_request(struct request *req, int uptodate) add_disk_randomness(req->rq_disk); floppy_off((long)req->rq_disk->private_data); blkdev_dequeue_request(req); - end_that_request_last(req); + end_that_request_last(req, uptodate); /* We're done with the request */ current_req = NULL; diff --git a/drivers/block/nbd.c b/drivers/block/nbd.c index 9e268ddedfbd..485345c8e632 100644 --- a/drivers/block/nbd.c +++ b/drivers/block/nbd.c @@ -136,7 +136,7 @@ static void nbd_end_request(struct request *req) spin_lock_irqsave(q->queue_lock, flags); if (!end_that_request_first(req, uptodate, req->nr_sectors)) { - end_that_request_last(req); + end_that_request_last(req, uptodate); } spin_unlock_irqrestore(q->queue_lock, flags); } diff --git a/drivers/block/sx8.c b/drivers/block/sx8.c index 1ded3b433459..9251f4131b53 100644 --- a/drivers/block/sx8.c +++ b/drivers/block/sx8.c @@ -770,7 +770,7 @@ static inline void carm_end_request_queued(struct carm_host *host, rc = end_that_request_first(req, uptodate, req->hard_nr_sectors); assert(rc == 0); - end_that_request_last(req); + end_that_request_last(req, uptodate); rc = carm_put_request(host, crq); assert(rc == 0); diff --git a/drivers/block/ub.c b/drivers/block/ub.c index 10740a065088..a05fe5843e6c 100644 --- a/drivers/block/ub.c +++ b/drivers/block/ub.c @@ -951,7 +951,7 @@ static void ub_rw_cmd_done(struct ub_dev *sc, struct ub_scsi_cmd *cmd) static void ub_end_rq(struct request *rq, int uptodate) { end_that_request_first(rq, uptodate, rq->hard_nr_sectors); - end_that_request_last(rq); + end_that_request_last(rq, uptodate); } static int ub_rw_cmd_retry(struct ub_dev *sc, struct ub_lun *lun, diff --git a/drivers/block/viodasd.c b/drivers/block/viodasd.c index 2d518aa2720a..063f0304a163 100644 --- a/drivers/block/viodasd.c +++ b/drivers/block/viodasd.c @@ -305,7 +305,7 @@ static void viodasd_end_request(struct request *req, int uptodate, if (end_that_request_first(req, uptodate, num_sectors)) return; add_disk_randomness(req->rq_disk); - end_that_request_last(req); + end_that_request_last(req, uptodate); } /* diff --git a/drivers/cdrom/cdu31a.c b/drivers/cdrom/cdu31a.c index ac96de15d833..378e88d20757 100644 --- a/drivers/cdrom/cdu31a.c +++ b/drivers/cdrom/cdu31a.c @@ -1402,7 +1402,7 @@ static void do_cdu31a_request(request_queue_t * q) if (!end_that_request_first(req, 1, nblock)) { spin_lock_irq(q->queue_lock); blkdev_dequeue_request(req); - end_that_request_last(req); + end_that_request_last(req, 1); spin_unlock_irq(q->queue_lock); } continue; diff --git a/drivers/ide/ide-cd.c b/drivers/ide/ide-cd.c index 70aeb3a60120..d31117eb95aa 100644 --- a/drivers/ide/ide-cd.c +++ b/drivers/ide/ide-cd.c @@ -614,7 +614,7 @@ static void cdrom_end_request (ide_drive_t *drive, int uptodate) */ spin_lock_irqsave(&ide_lock, flags); end_that_request_chunk(failed, 0, failed->data_len); - end_that_request_last(failed); + end_that_request_last(failed, 0); spin_unlock_irqrestore(&ide_lock, flags); } @@ -1735,7 +1735,7 @@ end_request: spin_lock_irqsave(&ide_lock, flags); blkdev_dequeue_request(rq); - end_that_request_last(rq); + end_that_request_last(rq, 1); HWGROUP(drive)->rq = NULL; spin_unlock_irqrestore(&ide_lock, flags); return ide_stopped; diff --git a/drivers/ide/ide-io.c b/drivers/ide/ide-io.c index ecfafcdafea4..8435b44a700b 100644 --- a/drivers/ide/ide-io.c +++ b/drivers/ide/ide-io.c @@ -89,7 +89,7 @@ int __ide_end_request(ide_drive_t *drive, struct request *rq, int uptodate, blkdev_dequeue_request(rq); HWGROUP(drive)->rq = NULL; - end_that_request_last(rq); + end_that_request_last(rq, uptodate); ret = 0; } return ret; @@ -247,7 +247,7 @@ static void ide_complete_pm_request (ide_drive_t *drive, struct request *rq) } blkdev_dequeue_request(rq); HWGROUP(drive)->rq = NULL; - end_that_request_last(rq); + end_that_request_last(rq, 1); spin_unlock_irqrestore(&ide_lock, flags); } @@ -379,7 +379,7 @@ void ide_end_drive_cmd (ide_drive_t *drive, u8 stat, u8 err) blkdev_dequeue_request(rq); HWGROUP(drive)->rq = NULL; rq->errors = err; - end_that_request_last(rq); + end_that_request_last(rq, !rq->errors); spin_unlock_irqrestore(&ide_lock, flags); } diff --git a/drivers/message/i2o/i2o_block.c b/drivers/message/i2o/i2o_block.c index f283b5bafdd3..4f522527b7ed 100644 --- a/drivers/message/i2o/i2o_block.c +++ b/drivers/message/i2o/i2o_block.c @@ -466,7 +466,7 @@ static void i2o_block_end_request(struct request *req, int uptodate, spin_lock_irqsave(q->queue_lock, flags); - end_that_request_last(req); + end_that_request_last(req, uptodate); if (likely(dev)) { dev->open_queue_depth--; diff --git a/drivers/mmc/mmc_block.c b/drivers/mmc/mmc_block.c index abcf19116d70..8e380c14bf65 100644 --- a/drivers/mmc/mmc_block.c +++ b/drivers/mmc/mmc_block.c @@ -263,7 +263,7 @@ static int mmc_blk_issue_rq(struct mmc_queue *mq, struct request *req) */ add_disk_randomness(req->rq_disk); blkdev_dequeue_request(req); - end_that_request_last(req); + end_that_request_last(req, 1); } spin_unlock_irq(&md->lock); } while (ret); @@ -289,7 +289,7 @@ static int mmc_blk_issue_rq(struct mmc_queue *mq, struct request *req) add_disk_randomness(req->rq_disk); blkdev_dequeue_request(req); - end_that_request_last(req); + end_that_request_last(req, 0); spin_unlock_irq(&md->lock); return 0; diff --git a/drivers/s390/block/dasd.c b/drivers/s390/block/dasd.c index 7008d32433bf..fdb61380c523 100644 --- a/drivers/s390/block/dasd.c +++ b/drivers/s390/block/dasd.c @@ -1035,7 +1035,7 @@ dasd_end_request(struct request *req, int uptodate) if (end_that_request_first(req, uptodate, req->hard_nr_sectors)) BUG(); add_disk_randomness(req->rq_disk); - end_that_request_last(req); + end_that_request_last(req, uptodate); } /* diff --git a/drivers/s390/char/tape_block.c b/drivers/s390/char/tape_block.c index 1efc9f21229e..559d51490e2f 100644 --- a/drivers/s390/char/tape_block.c +++ b/drivers/s390/char/tape_block.c @@ -78,7 +78,7 @@ tapeblock_end_request(struct request *req, int uptodate) { if (end_that_request_first(req, uptodate, req->hard_nr_sectors)) BUG(); - end_that_request_last(req); + end_that_request_last(req, uptodate); } static void diff --git a/drivers/scsi/ide-scsi.c b/drivers/scsi/ide-scsi.c index 4cb1f3ed9100..3c688ef54660 100644 --- a/drivers/scsi/ide-scsi.c +++ b/drivers/scsi/ide-scsi.c @@ -1046,7 +1046,7 @@ static int idescsi_eh_reset (struct scsi_cmnd *cmd) /* kill current request */ blkdev_dequeue_request(req); - end_that_request_last(req); + end_that_request_last(req, 0); if (req->flags & REQ_SENSE) kfree(scsi->pc->buffer); kfree(scsi->pc); @@ -1056,7 +1056,7 @@ static int idescsi_eh_reset (struct scsi_cmnd *cmd) /* now nuke the drive queue */ while ((req = elv_next_request(drive->queue))) { blkdev_dequeue_request(req); - end_that_request_last(req); + end_that_request_last(req, 0); } HWGROUP(drive)->rq = NULL; diff --git a/drivers/scsi/scsi_lib.c b/drivers/scsi/scsi_lib.c index a7f3f0c84db7..53551f1dfe21 100644 --- a/drivers/scsi/scsi_lib.c +++ b/drivers/scsi/scsi_lib.c @@ -791,7 +791,7 @@ static struct scsi_cmnd *scsi_end_request(struct scsi_cmnd *cmd, int uptodate, spin_lock_irqsave(q->queue_lock, flags); if (blk_rq_tagged(req)) blk_queue_end_tag(q, req); - end_that_request_last(req); + end_that_request_last(req, uptodate); spin_unlock_irqrestore(q->queue_lock, flags); /* diff --git a/drivers/scsi/sd.c b/drivers/scsi/sd.c index 3d3ad7d1b779..d651150ee76d 100644 --- a/drivers/scsi/sd.c +++ b/drivers/scsi/sd.c @@ -748,7 +748,7 @@ static void sd_end_flush(request_queue_t *q, struct request *flush_rq) * force journal abort of barriers */ end_that_request_first(rq, -EOPNOTSUPP, rq->hard_nr_sectors); - end_that_request_last(rq); + end_that_request_last(rq, -EOPNOTSUPP); } } diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h index a18500d196e1..a0ce8c585165 100644 --- a/include/linux/blkdev.h +++ b/include/linux/blkdev.h @@ -102,7 +102,7 @@ void copy_io_context(struct io_context **pdst, struct io_context **psrc); void swap_io_context(struct io_context **ioc1, struct io_context **ioc2); struct request; -typedef void (rq_end_io_fn)(struct request *); +typedef void (rq_end_io_fn)(struct request *, int); struct request_list { int count[2]; @@ -560,7 +560,7 @@ extern void register_disk(struct gendisk *dev); extern void generic_make_request(struct bio *bio); extern void blk_put_request(struct request *); extern void __blk_put_request(request_queue_t *, struct request *); -extern void blk_end_sync_rq(struct request *rq); +extern void blk_end_sync_rq(struct request *rq, int error); extern void blk_attempt_remerge(request_queue_t *, struct request *); extern struct request *blk_get_request(request_queue_t *, int, gfp_t); extern void blk_insert_request(request_queue_t *, struct request *, int, void *); @@ -614,7 +614,7 @@ static inline void blk_run_address_space(struct address_space *mapping) */ extern int end_that_request_first(struct request *, int, int); extern int end_that_request_chunk(struct request *, int, int); -extern void end_that_request_last(struct request *); +extern void end_that_request_last(struct request *, int); extern void end_request(struct request *req, int uptodate); /* -- cgit v1.2.3-71-gd317 From 797e7dbbee0a91fa1349192f18ad5c454997d876 Mon Sep 17 00:00:00 2001 From: Tejun Heo Date: Fri, 6 Jan 2006 09:51:03 +0100 Subject: [BLOCK] reimplement handling of barrier request Reimplement handling of barrier requests. * Flexible handling to deal with various capabilities of target devices. * Retry support for falling back. * Tagged queues which don't support ordered tag can do ordered. Signed-off-by: Tejun Heo Signed-off-by: Jens Axboe --- block/elevator.c | 84 +++++++---- block/ll_rw_blk.c | 384 ++++++++++++++++++++++++++++++----------------- include/linux/blkdev.h | 82 +++++++--- include/linux/elevator.h | 1 + 4 files changed, 359 insertions(+), 192 deletions(-) (limited to 'include/linux') diff --git a/block/elevator.c b/block/elevator.c index 85a11cee7d1c..39dcccc82ada 100644 --- a/block/elevator.c +++ b/block/elevator.c @@ -304,15 +304,7 @@ void elv_requeue_request(request_queue_t *q, struct request *rq) rq->flags &= ~REQ_STARTED; - /* - * if this is the flush, requeue the original instead and drop the flush - */ - if (rq->flags & REQ_BAR_FLUSH) { - clear_bit(QUEUE_FLAG_FLUSH, &q->queue_flags); - rq = rq->end_io_data; - } - - __elv_add_request(q, rq, ELEVATOR_INSERT_FRONT, 0); + __elv_add_request(q, rq, ELEVATOR_INSERT_REQUEUE, 0); } static void elv_drain_elevator(request_queue_t *q) @@ -332,7 +324,18 @@ static void elv_drain_elevator(request_queue_t *q) void __elv_add_request(request_queue_t *q, struct request *rq, int where, int plug) { + struct list_head *pos; + unsigned ordseq; + + if (q->ordcolor) + rq->flags |= REQ_ORDERED_COLOR; + if (rq->flags & (REQ_SOFTBARRIER | REQ_HARDBARRIER)) { + /* + * toggle ordered color + */ + q->ordcolor ^= 1; + /* * barriers implicitly indicate back insertion */ @@ -393,6 +396,30 @@ void __elv_add_request(request_queue_t *q, struct request *rq, int where, q->elevator->ops->elevator_add_req_fn(q, rq); break; + case ELEVATOR_INSERT_REQUEUE: + /* + * If ordered flush isn't in progress, we do front + * insertion; otherwise, requests should be requeued + * in ordseq order. + */ + rq->flags |= REQ_SOFTBARRIER; + + if (q->ordseq == 0) { + list_add(&rq->queuelist, &q->queue_head); + break; + } + + ordseq = blk_ordered_req_seq(rq); + + list_for_each(pos, &q->queue_head) { + struct request *pos_rq = list_entry_rq(pos); + if (ordseq <= blk_ordered_req_seq(pos_rq)) + break; + } + + list_add_tail(&rq->queuelist, pos); + break; + default: printk(KERN_ERR "%s: bad insertion point %d\n", __FUNCTION__, where); @@ -422,25 +449,16 @@ static inline struct request *__elv_next_request(request_queue_t *q) { struct request *rq; - if (unlikely(list_empty(&q->queue_head) && - !q->elevator->ops->elevator_dispatch_fn(q, 0))) - return NULL; - - rq = list_entry_rq(q->queue_head.next); - - /* - * if this is a barrier write and the device has to issue a - * flush sequence to support it, check how far we are - */ - if (blk_fs_request(rq) && blk_barrier_rq(rq)) { - BUG_ON(q->ordered == QUEUE_ORDERED_NONE); + while (1) { + while (!list_empty(&q->queue_head)) { + rq = list_entry_rq(q->queue_head.next); + if (blk_do_ordered(q, &rq)) + return rq; + } - if (q->ordered == QUEUE_ORDERED_FLUSH && - !blk_barrier_preflush(rq)) - rq = blk_start_pre_flush(q, rq); + if (!q->elevator->ops->elevator_dispatch_fn(q, 0)) + return NULL; } - - return rq; } struct request *elv_next_request(request_queue_t *q) @@ -593,7 +611,21 @@ void elv_completed_request(request_queue_t *q, struct request *rq) * request is released from the driver, io must be done */ if (blk_account_rq(rq)) { + struct request *first_rq = list_entry_rq(q->queue_head.next); + q->in_flight--; + + /* + * Check if the queue is waiting for fs requests to be + * drained for flush sequence. + */ + if (q->ordseq && q->in_flight == 0 && + blk_ordered_cur_seq(q) == QUEUE_ORDSEQ_DRAIN && + blk_ordered_req_seq(first_rq) > QUEUE_ORDSEQ_DRAIN) { + blk_ordered_complete_seq(q, QUEUE_ORDSEQ_DRAIN, 0); + q->request_fn(q); + } + if (blk_sorted_rq(rq) && e->ops->elevator_completed_req_fn) e->ops->elevator_completed_req_fn(q, rq); } diff --git a/block/ll_rw_blk.c b/block/ll_rw_blk.c index 65c4efc02adf..91d3b4828c49 100644 --- a/block/ll_rw_blk.c +++ b/block/ll_rw_blk.c @@ -290,8 +290,8 @@ static inline void rq_init(request_queue_t *q, struct request *rq) /** * blk_queue_ordered - does this queue support ordered writes - * @q: the request queue - * @flag: see below + * @q: the request queue + * @ordered: one of QUEUE_ORDERED_* * * Description: * For journalled file systems, doing ordered writes on a commit @@ -300,28 +300,30 @@ static inline void rq_init(request_queue_t *q, struct request *rq) * feature should call this function and indicate so. * **/ -void blk_queue_ordered(request_queue_t *q, int flag) -{ - switch (flag) { - case QUEUE_ORDERED_NONE: - if (q->flush_rq) - kmem_cache_free(request_cachep, q->flush_rq); - q->flush_rq = NULL; - q->ordered = flag; - break; - case QUEUE_ORDERED_TAG: - q->ordered = flag; - break; - case QUEUE_ORDERED_FLUSH: - q->ordered = flag; - if (!q->flush_rq) - q->flush_rq = kmem_cache_alloc(request_cachep, - GFP_KERNEL); - break; - default: - printk("blk_queue_ordered: bad value %d\n", flag); - break; +int blk_queue_ordered(request_queue_t *q, unsigned ordered, + prepare_flush_fn *prepare_flush_fn) +{ + if (ordered & (QUEUE_ORDERED_PREFLUSH | QUEUE_ORDERED_POSTFLUSH) && + prepare_flush_fn == NULL) { + printk(KERN_ERR "blk_queue_ordered: prepare_flush_fn required\n"); + return -EINVAL; + } + + if (ordered != QUEUE_ORDERED_NONE && + ordered != QUEUE_ORDERED_DRAIN && + ordered != QUEUE_ORDERED_DRAIN_FLUSH && + ordered != QUEUE_ORDERED_DRAIN_FUA && + ordered != QUEUE_ORDERED_TAG && + ordered != QUEUE_ORDERED_TAG_FLUSH && + ordered != QUEUE_ORDERED_TAG_FUA) { + printk(KERN_ERR "blk_queue_ordered: bad value %d\n", ordered); + return -EINVAL; } + + q->next_ordered = ordered; + q->prepare_flush_fn = prepare_flush_fn; + + return 0; } EXPORT_SYMBOL(blk_queue_ordered); @@ -346,167 +348,265 @@ EXPORT_SYMBOL(blk_queue_issue_flush_fn); /* * Cache flushing for ordered writes handling */ -static void blk_pre_flush_end_io(struct request *flush_rq, int error) +inline unsigned blk_ordered_cur_seq(request_queue_t *q) { - struct request *rq = flush_rq->end_io_data; - request_queue_t *q = rq->q; - - elv_completed_request(q, flush_rq); - - rq->flags |= REQ_BAR_PREFLUSH; - - if (!flush_rq->errors) - elv_requeue_request(q, rq); - else { - q->end_flush_fn(q, flush_rq); - clear_bit(QUEUE_FLAG_FLUSH, &q->queue_flags); - q->request_fn(q); - } + if (!q->ordseq) + return 0; + return 1 << ffz(q->ordseq); } -static void blk_post_flush_end_io(struct request *flush_rq, int error) +unsigned blk_ordered_req_seq(struct request *rq) { - struct request *rq = flush_rq->end_io_data; request_queue_t *q = rq->q; - elv_completed_request(q, flush_rq); + BUG_ON(q->ordseq == 0); - rq->flags |= REQ_BAR_POSTFLUSH; + if (rq == &q->pre_flush_rq) + return QUEUE_ORDSEQ_PREFLUSH; + if (rq == &q->bar_rq) + return QUEUE_ORDSEQ_BAR; + if (rq == &q->post_flush_rq) + return QUEUE_ORDSEQ_POSTFLUSH; - q->end_flush_fn(q, flush_rq); - clear_bit(QUEUE_FLAG_FLUSH, &q->queue_flags); - q->request_fn(q); + if ((rq->flags & REQ_ORDERED_COLOR) == + (q->orig_bar_rq->flags & REQ_ORDERED_COLOR)) + return QUEUE_ORDSEQ_DRAIN; + else + return QUEUE_ORDSEQ_DONE; } -struct request *blk_start_pre_flush(request_queue_t *q, struct request *rq) +void blk_ordered_complete_seq(request_queue_t *q, unsigned seq, int error) { - struct request *flush_rq = q->flush_rq; - - BUG_ON(!blk_barrier_rq(rq)); + struct request *rq; + int uptodate; - if (test_and_set_bit(QUEUE_FLAG_FLUSH, &q->queue_flags)) - return NULL; + if (error && !q->orderr) + q->orderr = error; - rq_init(q, flush_rq); - flush_rq->elevator_private = NULL; - flush_rq->flags = REQ_BAR_FLUSH; - flush_rq->rq_disk = rq->rq_disk; - flush_rq->rl = NULL; + BUG_ON(q->ordseq & seq); + q->ordseq |= seq; - /* - * prepare_flush returns 0 if no flush is needed, just mark both - * pre and post flush as done in that case - */ - if (!q->prepare_flush_fn(q, flush_rq)) { - rq->flags |= REQ_BAR_PREFLUSH | REQ_BAR_POSTFLUSH; - clear_bit(QUEUE_FLAG_FLUSH, &q->queue_flags); - return rq; - } + if (blk_ordered_cur_seq(q) != QUEUE_ORDSEQ_DONE) + return; /* - * some drivers dequeue requests right away, some only after io - * completion. make sure the request is dequeued. + * Okay, sequence complete. */ - if (!list_empty(&rq->queuelist)) - blkdev_dequeue_request(rq); + rq = q->orig_bar_rq; + uptodate = q->orderr ? q->orderr : 1; - flush_rq->end_io_data = rq; - flush_rq->end_io = blk_pre_flush_end_io; + q->ordseq = 0; - __elv_add_request(q, flush_rq, ELEVATOR_INSERT_FRONT, 0); - return flush_rq; + end_that_request_first(rq, uptodate, rq->hard_nr_sectors); + end_that_request_last(rq, uptodate); } -static void blk_start_post_flush(request_queue_t *q, struct request *rq) +static void pre_flush_end_io(struct request *rq, int error) { - struct request *flush_rq = q->flush_rq; + elv_completed_request(rq->q, rq); + blk_ordered_complete_seq(rq->q, QUEUE_ORDSEQ_PREFLUSH, error); +} - BUG_ON(!blk_barrier_rq(rq)); +static void bar_end_io(struct request *rq, int error) +{ + elv_completed_request(rq->q, rq); + blk_ordered_complete_seq(rq->q, QUEUE_ORDSEQ_BAR, error); +} - rq_init(q, flush_rq); - flush_rq->elevator_private = NULL; - flush_rq->flags = REQ_BAR_FLUSH; - flush_rq->rq_disk = rq->rq_disk; - flush_rq->rl = NULL; +static void post_flush_end_io(struct request *rq, int error) +{ + elv_completed_request(rq->q, rq); + blk_ordered_complete_seq(rq->q, QUEUE_ORDSEQ_POSTFLUSH, error); +} - if (q->prepare_flush_fn(q, flush_rq)) { - flush_rq->end_io_data = rq; - flush_rq->end_io = blk_post_flush_end_io; +static void queue_flush(request_queue_t *q, unsigned which) +{ + struct request *rq; + rq_end_io_fn *end_io; - __elv_add_request(q, flush_rq, ELEVATOR_INSERT_FRONT, 0); - q->request_fn(q); + if (which == QUEUE_ORDERED_PREFLUSH) { + rq = &q->pre_flush_rq; + end_io = pre_flush_end_io; + } else { + rq = &q->post_flush_rq; + end_io = post_flush_end_io; } + + rq_init(q, rq); + rq->flags = REQ_HARDBARRIER; + rq->elevator_private = NULL; + rq->rq_disk = q->bar_rq.rq_disk; + rq->rl = NULL; + rq->end_io = end_io; + q->prepare_flush_fn(q, rq); + + __elv_add_request(q, rq, ELEVATOR_INSERT_FRONT, 0); } -static inline int blk_check_end_barrier(request_queue_t *q, struct request *rq, - int sectors) +static inline struct request *start_ordered(request_queue_t *q, + struct request *rq) { - if (sectors > rq->nr_sectors) - sectors = rq->nr_sectors; + q->bi_size = 0; + q->orderr = 0; + q->ordered = q->next_ordered; + q->ordseq |= QUEUE_ORDSEQ_STARTED; + + /* + * Prep proxy barrier request. + */ + blkdev_dequeue_request(rq); + q->orig_bar_rq = rq; + rq = &q->bar_rq; + rq_init(q, rq); + rq->flags = bio_data_dir(q->orig_bar_rq->bio); + rq->flags |= q->ordered & QUEUE_ORDERED_FUA ? REQ_FUA : 0; + rq->elevator_private = NULL; + rq->rl = NULL; + init_request_from_bio(rq, q->orig_bar_rq->bio); + rq->end_io = bar_end_io; + + /* + * Queue ordered sequence. As we stack them at the head, we + * need to queue in reverse order. Note that we rely on that + * no fs request uses ELEVATOR_INSERT_FRONT and thus no fs + * request gets inbetween ordered sequence. + */ + if (q->ordered & QUEUE_ORDERED_POSTFLUSH) + queue_flush(q, QUEUE_ORDERED_POSTFLUSH); + else + q->ordseq |= QUEUE_ORDSEQ_POSTFLUSH; + + __elv_add_request(q, rq, ELEVATOR_INSERT_FRONT, 0); + + if (q->ordered & QUEUE_ORDERED_PREFLUSH) { + queue_flush(q, QUEUE_ORDERED_PREFLUSH); + rq = &q->pre_flush_rq; + } else + q->ordseq |= QUEUE_ORDSEQ_PREFLUSH; - rq->nr_sectors -= sectors; - return rq->nr_sectors; + if ((q->ordered & QUEUE_ORDERED_TAG) || q->in_flight == 0) + q->ordseq |= QUEUE_ORDSEQ_DRAIN; + else + rq = NULL; + + return rq; } -static int __blk_complete_barrier_rq(request_queue_t *q, struct request *rq, - int sectors, int queue_locked) +int blk_do_ordered(request_queue_t *q, struct request **rqp) { - if (q->ordered != QUEUE_ORDERED_FLUSH) - return 0; - if (!blk_fs_request(rq) || !blk_barrier_rq(rq)) - return 0; - if (blk_barrier_postflush(rq)) - return 0; + struct request *rq = *rqp, *allowed_rq; + int is_barrier = blk_fs_request(rq) && blk_barrier_rq(rq); - if (!blk_check_end_barrier(q, rq, sectors)) { - unsigned long flags = 0; + if (!q->ordseq) { + if (!is_barrier) + return 1; - if (!queue_locked) - spin_lock_irqsave(q->queue_lock, flags); + if (q->next_ordered != QUEUE_ORDERED_NONE) { + *rqp = start_ordered(q, rq); + return 1; + } else { + /* + * This can happen when the queue switches to + * ORDERED_NONE while this request is on it. + */ + blkdev_dequeue_request(rq); + end_that_request_first(rq, -EOPNOTSUPP, + rq->hard_nr_sectors); + end_that_request_last(rq, -EOPNOTSUPP); + *rqp = NULL; + return 0; + } + } - blk_start_post_flush(q, rq); + if (q->ordered & QUEUE_ORDERED_TAG) { + if (is_barrier && rq != &q->bar_rq) + *rqp = NULL; + return 1; + } - if (!queue_locked) - spin_unlock_irqrestore(q->queue_lock, flags); + switch (blk_ordered_cur_seq(q)) { + case QUEUE_ORDSEQ_PREFLUSH: + allowed_rq = &q->pre_flush_rq; + break; + case QUEUE_ORDSEQ_BAR: + allowed_rq = &q->bar_rq; + break; + case QUEUE_ORDSEQ_POSTFLUSH: + allowed_rq = &q->post_flush_rq; + break; + default: + allowed_rq = NULL; + break; } + if (rq != allowed_rq && + (blk_fs_request(rq) || rq == &q->pre_flush_rq || + rq == &q->post_flush_rq)) + *rqp = NULL; + return 1; } -/** - * blk_complete_barrier_rq - complete possible barrier request - * @q: the request queue for the device - * @rq: the request - * @sectors: number of sectors to complete - * - * Description: - * Used in driver end_io handling to determine whether to postpone - * completion of a barrier request until a post flush has been done. This - * is the unlocked variant, used if the caller doesn't already hold the - * queue lock. - **/ -int blk_complete_barrier_rq(request_queue_t *q, struct request *rq, int sectors) +static int flush_dry_bio_endio(struct bio *bio, unsigned int bytes, int error) { - return __blk_complete_barrier_rq(q, rq, sectors, 0); + request_queue_t *q = bio->bi_private; + struct bio_vec *bvec; + int i; + + /* + * This is dry run, restore bio_sector and size. We'll finish + * this request again with the original bi_end_io after an + * error occurs or post flush is complete. + */ + q->bi_size += bytes; + + if (bio->bi_size) + return 1; + + /* Rewind bvec's */ + bio->bi_idx = 0; + bio_for_each_segment(bvec, bio, i) { + bvec->bv_len += bvec->bv_offset; + bvec->bv_offset = 0; + } + + /* Reset bio */ + set_bit(BIO_UPTODATE, &bio->bi_flags); + bio->bi_size = q->bi_size; + bio->bi_sector -= (q->bi_size >> 9); + q->bi_size = 0; + + return 0; } -EXPORT_SYMBOL(blk_complete_barrier_rq); -/** - * blk_complete_barrier_rq_locked - complete possible barrier request - * @q: the request queue for the device - * @rq: the request - * @sectors: number of sectors to complete - * - * Description: - * See blk_complete_barrier_rq(). This variant must be used if the caller - * holds the queue lock. - **/ -int blk_complete_barrier_rq_locked(request_queue_t *q, struct request *rq, - int sectors) +static inline int ordered_bio_endio(struct request *rq, struct bio *bio, + unsigned int nbytes, int error) { - return __blk_complete_barrier_rq(q, rq, sectors, 1); + request_queue_t *q = rq->q; + bio_end_io_t *endio; + void *private; + + if (&q->bar_rq != rq) + return 0; + + /* + * Okay, this is the barrier request in progress, dry finish it. + */ + if (error && !q->orderr) + q->orderr = error; + + endio = bio->bi_end_io; + private = bio->bi_private; + bio->bi_end_io = flush_dry_bio_endio; + bio->bi_private = q; + + bio_endio(bio, nbytes, error); + + bio->bi_end_io = endio; + bio->bi_private = private; + + return 1; } -EXPORT_SYMBOL(blk_complete_barrier_rq_locked); /** * blk_queue_bounce_limit - set bounce buffer limit for queue @@ -1047,6 +1147,7 @@ static const char * const rq_flags[] = { "REQ_SORTED", "REQ_SOFTBARRIER", "REQ_HARDBARRIER", + "REQ_FUA", "REQ_CMD", "REQ_NOMERGE", "REQ_STARTED", @@ -1066,6 +1167,7 @@ static const char * const rq_flags[] = { "REQ_PM_SUSPEND", "REQ_PM_RESUME", "REQ_PM_SHUTDOWN", + "REQ_ORDERED_COLOR", }; void blk_dump_rq_flags(struct request *rq, char *msg) @@ -1643,8 +1745,6 @@ void blk_cleanup_queue(request_queue_t * q) if (q->queue_tags) __blk_queue_free_tags(q); - blk_queue_ordered(q, QUEUE_ORDERED_NONE); - kmem_cache_free(requestq_cachep, q); } @@ -2714,7 +2814,7 @@ static int __make_request(request_queue_t *q, struct bio *bio) spin_lock_prefetch(q->queue_lock); barrier = bio_barrier(bio); - if (unlikely(barrier) && (q->ordered == QUEUE_ORDERED_NONE)) { + if (unlikely(barrier) && (q->next_ordered == QUEUE_ORDERED_NONE)) { err = -EOPNOTSUPP; goto end_io; } @@ -3075,7 +3175,8 @@ static int __end_that_request_first(struct request *req, int uptodate, if (nr_bytes >= bio->bi_size) { req->bio = bio->bi_next; nbytes = bio->bi_size; - bio_endio(bio, nbytes, error); + if (!ordered_bio_endio(req, bio, nbytes, error)) + bio_endio(bio, nbytes, error); next_idx = 0; bio_nbytes = 0; } else { @@ -3130,7 +3231,8 @@ static int __end_that_request_first(struct request *req, int uptodate, * if the request wasn't completed, update state */ if (bio_nbytes) { - bio_endio(bio, bio_nbytes, error); + if (!ordered_bio_endio(req, bio, bio_nbytes, error)) + bio_endio(bio, bio_nbytes, error); bio->bi_idx += next_idx; bio_iovec(bio)->bv_offset += nr_bytes; bio_iovec(bio)->bv_len -= nr_bytes; diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h index a0ce8c585165..15db0f112d0a 100644 --- a/include/linux/blkdev.h +++ b/include/linux/blkdev.h @@ -207,6 +207,7 @@ enum rq_flag_bits { __REQ_SORTED, /* elevator knows about this request */ __REQ_SOFTBARRIER, /* may not be passed by ioscheduler */ __REQ_HARDBARRIER, /* may not be passed by drive either */ + __REQ_FUA, /* forced unit access */ __REQ_CMD, /* is a regular fs rw request */ __REQ_NOMERGE, /* don't touch this for merging */ __REQ_STARTED, /* drive already may have started this one */ @@ -230,9 +231,7 @@ enum rq_flag_bits { __REQ_PM_SUSPEND, /* suspend request */ __REQ_PM_RESUME, /* resume request */ __REQ_PM_SHUTDOWN, /* shutdown request */ - __REQ_BAR_PREFLUSH, /* barrier pre-flush done */ - __REQ_BAR_POSTFLUSH, /* barrier post-flush */ - __REQ_BAR_FLUSH, /* rq is the flush request */ + __REQ_ORDERED_COLOR, /* is before or after barrier */ __REQ_NR_BITS, /* stops here */ }; @@ -241,6 +240,7 @@ enum rq_flag_bits { #define REQ_SORTED (1 << __REQ_SORTED) #define REQ_SOFTBARRIER (1 << __REQ_SOFTBARRIER) #define REQ_HARDBARRIER (1 << __REQ_HARDBARRIER) +#define REQ_FUA (1 << __REQ_FUA) #define REQ_CMD (1 << __REQ_CMD) #define REQ_NOMERGE (1 << __REQ_NOMERGE) #define REQ_STARTED (1 << __REQ_STARTED) @@ -260,9 +260,7 @@ enum rq_flag_bits { #define REQ_PM_SUSPEND (1 << __REQ_PM_SUSPEND) #define REQ_PM_RESUME (1 << __REQ_PM_RESUME) #define REQ_PM_SHUTDOWN (1 << __REQ_PM_SHUTDOWN) -#define REQ_BAR_PREFLUSH (1 << __REQ_BAR_PREFLUSH) -#define REQ_BAR_POSTFLUSH (1 << __REQ_BAR_POSTFLUSH) -#define REQ_BAR_FLUSH (1 << __REQ_BAR_FLUSH) +#define REQ_ORDERED_COLOR (1 << __REQ_ORDERED_COLOR) /* * State information carried for REQ_PM_SUSPEND and REQ_PM_RESUME @@ -292,8 +290,7 @@ struct bio_vec; typedef int (merge_bvec_fn) (request_queue_t *, struct bio *, struct bio_vec *); typedef void (activity_fn) (void *data, int rw); typedef int (issue_flush_fn) (request_queue_t *, struct gendisk *, sector_t *); -typedef int (prepare_flush_fn) (request_queue_t *, struct request *); -typedef void (end_flush_fn) (request_queue_t *, struct request *); +typedef void (prepare_flush_fn) (request_queue_t *, struct request *); enum blk_queue_state { Queue_down, @@ -335,7 +332,6 @@ struct request_queue activity_fn *activity_fn; issue_flush_fn *issue_flush_fn; prepare_flush_fn *prepare_flush_fn; - end_flush_fn *end_flush_fn; /* * Dispatch queue sorting @@ -420,14 +416,11 @@ struct request_queue /* * reserved for flush operations */ - struct request *flush_rq; - unsigned char ordered; -}; - -enum { - QUEUE_ORDERED_NONE, - QUEUE_ORDERED_TAG, - QUEUE_ORDERED_FLUSH, + unsigned int ordered, next_ordered, ordseq; + int orderr, ordcolor; + struct request pre_flush_rq, bar_rq, post_flush_rq; + struct request *orig_bar_rq; + unsigned int bi_size; }; #define RQ_INACTIVE (-1) @@ -445,12 +438,51 @@ enum { #define QUEUE_FLAG_REENTER 6 /* Re-entrancy avoidance */ #define QUEUE_FLAG_PLUGGED 7 /* queue is plugged */ #define QUEUE_FLAG_ELVSWITCH 8 /* don't use elevator, just do FIFO */ -#define QUEUE_FLAG_FLUSH 9 /* doing barrier flush sequence */ + +enum { + /* + * Hardbarrier is supported with one of the following methods. + * + * NONE : hardbarrier unsupported + * DRAIN : ordering by draining is enough + * DRAIN_FLUSH : ordering by draining w/ pre and post flushes + * DRAIN_FUA : ordering by draining w/ pre flush and FUA write + * TAG : ordering by tag is enough + * TAG_FLUSH : ordering by tag w/ pre and post flushes + * TAG_FUA : ordering by tag w/ pre flush and FUA write + */ + QUEUE_ORDERED_NONE = 0x00, + QUEUE_ORDERED_DRAIN = 0x01, + QUEUE_ORDERED_TAG = 0x02, + + QUEUE_ORDERED_PREFLUSH = 0x10, + QUEUE_ORDERED_POSTFLUSH = 0x20, + QUEUE_ORDERED_FUA = 0x40, + + QUEUE_ORDERED_DRAIN_FLUSH = QUEUE_ORDERED_DRAIN | + QUEUE_ORDERED_PREFLUSH | QUEUE_ORDERED_POSTFLUSH, + QUEUE_ORDERED_DRAIN_FUA = QUEUE_ORDERED_DRAIN | + QUEUE_ORDERED_PREFLUSH | QUEUE_ORDERED_FUA, + QUEUE_ORDERED_TAG_FLUSH = QUEUE_ORDERED_TAG | + QUEUE_ORDERED_PREFLUSH | QUEUE_ORDERED_POSTFLUSH, + QUEUE_ORDERED_TAG_FUA = QUEUE_ORDERED_TAG | + QUEUE_ORDERED_PREFLUSH | QUEUE_ORDERED_FUA, + + /* + * Ordered operation sequence + */ + QUEUE_ORDSEQ_STARTED = 0x01, /* flushing in progress */ + QUEUE_ORDSEQ_DRAIN = 0x02, /* waiting for the queue to be drained */ + QUEUE_ORDSEQ_PREFLUSH = 0x04, /* pre-flushing in progress */ + QUEUE_ORDSEQ_BAR = 0x08, /* original barrier req in progress */ + QUEUE_ORDSEQ_POSTFLUSH = 0x10, /* post-flushing in progress */ + QUEUE_ORDSEQ_DONE = 0x20, +}; #define blk_queue_plugged(q) test_bit(QUEUE_FLAG_PLUGGED, &(q)->queue_flags) #define blk_queue_tagged(q) test_bit(QUEUE_FLAG_QUEUED, &(q)->queue_flags) #define blk_queue_stopped(q) test_bit(QUEUE_FLAG_STOPPED, &(q)->queue_flags) -#define blk_queue_flushing(q) test_bit(QUEUE_FLAG_FLUSH, &(q)->queue_flags) +#define blk_queue_flushing(q) ((q)->ordseq) #define blk_fs_request(rq) ((rq)->flags & REQ_CMD) #define blk_pc_request(rq) ((rq)->flags & REQ_BLOCK_PC) @@ -466,8 +498,7 @@ enum { #define blk_sorted_rq(rq) ((rq)->flags & REQ_SORTED) #define blk_barrier_rq(rq) ((rq)->flags & REQ_HARDBARRIER) -#define blk_barrier_preflush(rq) ((rq)->flags & REQ_BAR_PREFLUSH) -#define blk_barrier_postflush(rq) ((rq)->flags & REQ_BAR_POSTFLUSH) +#define blk_fua_rq(rq) ((rq)->flags & REQ_FUA) #define list_entry_rq(ptr) list_entry((ptr), struct request, queuelist) @@ -665,11 +696,12 @@ extern void blk_queue_prep_rq(request_queue_t *, prep_rq_fn *pfn); extern void blk_queue_merge_bvec(request_queue_t *, merge_bvec_fn *); extern void blk_queue_dma_alignment(request_queue_t *, int); extern struct backing_dev_info *blk_get_backing_dev_info(struct block_device *bdev); -extern void blk_queue_ordered(request_queue_t *, int); +extern int blk_queue_ordered(request_queue_t *, unsigned, prepare_flush_fn *); extern void blk_queue_issue_flush_fn(request_queue_t *, issue_flush_fn *); -extern struct request *blk_start_pre_flush(request_queue_t *,struct request *); -extern int blk_complete_barrier_rq(request_queue_t *, struct request *, int); -extern int blk_complete_barrier_rq_locked(request_queue_t *, struct request *, int); +extern int blk_do_ordered(request_queue_t *, struct request **); +extern unsigned blk_ordered_cur_seq(request_queue_t *); +extern unsigned blk_ordered_req_seq(struct request *); +extern void blk_ordered_complete_seq(request_queue_t *, unsigned, int); extern int blk_rq_map_sg(request_queue_t *, struct request *, struct scatterlist *); extern void blk_dump_rq_flags(struct request *, char *); diff --git a/include/linux/elevator.h b/include/linux/elevator.h index a74c27e460ba..fb80fa44c4dd 100644 --- a/include/linux/elevator.h +++ b/include/linux/elevator.h @@ -130,6 +130,7 @@ extern int elv_try_last_merge(request_queue_t *, struct bio *); #define ELEVATOR_INSERT_FRONT 1 #define ELEVATOR_INSERT_BACK 2 #define ELEVATOR_INSERT_SORT 3 +#define ELEVATOR_INSERT_REQUEUE 4 /* * return values from elevator_may_queue_fn -- cgit v1.2.3-71-gd317 From 9a3dccc42556537a48f39ee9a9e7ab90a933f766 Mon Sep 17 00:00:00 2001 From: Tejun Heo Date: Fri, 6 Jan 2006 09:56:18 +0100 Subject: [BLOCK] add FUA support to libata Signed-off-by: Tejun Heo Signed-off-by: Jens Axboe --- drivers/scsi/libata-core.c | 31 +++++++++++++++++++++++++------ drivers/scsi/libata-scsi.c | 32 ++++++++++++++++++++++++++------ drivers/scsi/libata.h | 4 +++- include/linux/ata.h | 6 +++++- include/linux/libata.h | 3 ++- 5 files changed, 61 insertions(+), 15 deletions(-) (limited to 'include/linux') diff --git a/drivers/scsi/libata-core.c b/drivers/scsi/libata-core.c index 9ea102587914..bdfb0a88cd6f 100644 --- a/drivers/scsi/libata-core.c +++ b/drivers/scsi/libata-core.c @@ -562,16 +562,28 @@ static const u8 ata_rw_cmds[] = { ATA_CMD_WRITE_MULTI, ATA_CMD_READ_MULTI_EXT, ATA_CMD_WRITE_MULTI_EXT, + 0, + 0, + 0, + ATA_CMD_WRITE_MULTI_FUA_EXT, /* pio */ ATA_CMD_PIO_READ, ATA_CMD_PIO_WRITE, ATA_CMD_PIO_READ_EXT, ATA_CMD_PIO_WRITE_EXT, + 0, + 0, + 0, + 0, /* dma */ ATA_CMD_READ, ATA_CMD_WRITE, ATA_CMD_READ_EXT, - ATA_CMD_WRITE_EXT + ATA_CMD_WRITE_EXT, + 0, + 0, + 0, + ATA_CMD_WRITE_FUA_EXT }; /** @@ -584,25 +596,32 @@ static const u8 ata_rw_cmds[] = { * LOCKING: * caller. */ -void ata_rwcmd_protocol(struct ata_queued_cmd *qc) +int ata_rwcmd_protocol(struct ata_queued_cmd *qc) { struct ata_taskfile *tf = &qc->tf; struct ata_device *dev = qc->dev; + u8 cmd; - int index, lba48, write; + int index, fua, lba48, write; + fua = (tf->flags & ATA_TFLAG_FUA) ? 4 : 0; lba48 = (tf->flags & ATA_TFLAG_LBA48) ? 2 : 0; write = (tf->flags & ATA_TFLAG_WRITE) ? 1 : 0; if (dev->flags & ATA_DFLAG_PIO) { tf->protocol = ATA_PROT_PIO; - index = dev->multi_count ? 0 : 4; + index = dev->multi_count ? 0 : 8; } else { tf->protocol = ATA_PROT_DMA; - index = 8; + index = 16; } - tf->command = ata_rw_cmds[index + lba48 + write]; + cmd = ata_rw_cmds[index + fua + lba48 + write]; + if (cmd) { + tf->command = cmd; + return 0; + } + return -1; } static const char * const xfer_mode_str[] = { diff --git a/drivers/scsi/libata-scsi.c b/drivers/scsi/libata-scsi.c index e0439be4b573..2c644cbb6e9c 100644 --- a/drivers/scsi/libata-scsi.c +++ b/drivers/scsi/libata-scsi.c @@ -1080,11 +1080,13 @@ static unsigned int ata_scsi_rw_xlat(struct ata_queued_cmd *qc, const u8 *scsicm scsicmd[0] == WRITE_16) tf->flags |= ATA_TFLAG_WRITE; - /* Calculate the SCSI LBA and transfer length. */ + /* Calculate the SCSI LBA, transfer length and FUA. */ switch (scsicmd[0]) { case READ_10: case WRITE_10: scsi_10_lba_len(scsicmd, &block, &n_block); + if (unlikely(scsicmd[1] & (1 << 3))) + tf->flags |= ATA_TFLAG_FUA; break; case READ_6: case WRITE_6: @@ -1099,6 +1101,8 @@ static unsigned int ata_scsi_rw_xlat(struct ata_queued_cmd *qc, const u8 *scsicm case READ_16: case WRITE_16: scsi_16_lba_len(scsicmd, &block, &n_block); + if (unlikely(scsicmd[1] & (1 << 3))) + tf->flags |= ATA_TFLAG_FUA; break; default: DPRINTK("no-byte command\n"); @@ -1142,7 +1146,8 @@ static unsigned int ata_scsi_rw_xlat(struct ata_queued_cmd *qc, const u8 *scsicm tf->device |= (block >> 24) & 0xf; } - ata_rwcmd_protocol(qc); + if (unlikely(ata_rwcmd_protocol(qc) < 0)) + goto invalid_fld; qc->nsect = n_block; tf->nsect = n_block & 0xff; @@ -1160,7 +1165,8 @@ static unsigned int ata_scsi_rw_xlat(struct ata_queued_cmd *qc, const u8 *scsicm if ((block >> 28) || (n_block > 256)) goto out_of_range; - ata_rwcmd_protocol(qc); + if (unlikely(ata_rwcmd_protocol(qc) < 0)) + goto invalid_fld; /* Convert LBA to CHS */ track = (u32)block / dev->sectors; @@ -1695,6 +1701,7 @@ static unsigned int ata_msense_rw_recovery(u8 **ptr_io, const u8 *last) unsigned int ata_scsiop_mode_sense(struct ata_scsi_args *args, u8 *rbuf, unsigned int buflen) { + struct ata_device *dev = args->dev; u8 *scsicmd = args->cmd->cmnd, *p, *last; const u8 sat_blk_desc[] = { 0, 0, 0, 0, /* number of blocks: sat unspecified */ @@ -1703,6 +1710,7 @@ unsigned int ata_scsiop_mode_sense(struct ata_scsi_args *args, u8 *rbuf, }; u8 pg, spg; unsigned int ebd, page_control, six_byte, output_len, alloc_len, minlen; + u8 dpofua; VPRINTK("ENTER\n"); @@ -1771,9 +1779,17 @@ unsigned int ata_scsiop_mode_sense(struct ata_scsi_args *args, u8 *rbuf, if (minlen < 1) return 0; + + dpofua = 0; + if (ata_id_has_fua(args->id) && dev->flags & ATA_DFLAG_LBA48 && + (!(dev->flags & ATA_DFLAG_PIO) || dev->multi_count)) + dpofua = 1 << 4; + if (six_byte) { output_len--; rbuf[0] = output_len; + if (minlen > 2) + rbuf[2] |= dpofua; if (ebd) { if (minlen > 3) rbuf[3] = sizeof(sat_blk_desc); @@ -1786,6 +1802,8 @@ unsigned int ata_scsiop_mode_sense(struct ata_scsi_args *args, u8 *rbuf, rbuf[0] = output_len >> 8; if (minlen > 1) rbuf[1] = output_len; + if (minlen > 3) + rbuf[3] |= dpofua; if (ebd) { if (minlen > 7) rbuf[7] = sizeof(sat_blk_desc); @@ -2446,7 +2464,7 @@ int ata_scsi_queuecmd(struct scsi_cmnd *cmd, void (*done)(struct scsi_cmnd *)) if (xlat_func) ata_scsi_translate(ap, dev, cmd, done, xlat_func); else - ata_scsi_simulate(dev->id, cmd, done); + ata_scsi_simulate(ap, dev, cmd, done); } else ata_scsi_translate(ap, dev, cmd, done, atapi_xlat); @@ -2469,14 +2487,16 @@ out_unlock: * spin_lock_irqsave(host_set lock) */ -void ata_scsi_simulate(u16 *id, +void ata_scsi_simulate(struct ata_port *ap, struct ata_device *dev, struct scsi_cmnd *cmd, void (*done)(struct scsi_cmnd *)) { struct ata_scsi_args args; const u8 *scsicmd = cmd->cmnd; - args.id = id; + args.ap = ap; + args.dev = dev; + args.id = dev->id; args.cmd = cmd; args.done = done; diff --git a/drivers/scsi/libata.h b/drivers/scsi/libata.h index 251e53bdc6e0..e03ce48b7b4b 100644 --- a/drivers/scsi/libata.h +++ b/drivers/scsi/libata.h @@ -32,6 +32,8 @@ #define DRV_VERSION "1.20" /* must be exactly four chars */ struct ata_scsi_args { + struct ata_port *ap; + struct ata_device *dev; u16 *id; struct scsi_cmnd *cmd; void (*done)(struct scsi_cmnd *); @@ -41,7 +43,7 @@ struct ata_scsi_args { extern int atapi_enabled; extern struct ata_queued_cmd *ata_qc_new_init(struct ata_port *ap, struct ata_device *dev); -extern void ata_rwcmd_protocol(struct ata_queued_cmd *qc); +extern int ata_rwcmd_protocol(struct ata_queued_cmd *qc); extern void ata_qc_free(struct ata_queued_cmd *qc); extern int ata_qc_issue(struct ata_queued_cmd *qc); extern int ata_check_atapi_dma(struct ata_queued_cmd *qc); diff --git a/include/linux/ata.h b/include/linux/ata.h index d2873b732bb1..f63dad4165b1 100644 --- a/include/linux/ata.h +++ b/include/linux/ata.h @@ -129,6 +129,7 @@ enum { ATA_CMD_READ_EXT = 0x25, ATA_CMD_WRITE = 0xCA, ATA_CMD_WRITE_EXT = 0x35, + ATA_CMD_WRITE_FUA_EXT = 0x3D, ATA_CMD_PIO_READ = 0x20, ATA_CMD_PIO_READ_EXT = 0x24, ATA_CMD_PIO_WRITE = 0x30, @@ -137,6 +138,7 @@ enum { ATA_CMD_READ_MULTI_EXT = 0x29, ATA_CMD_WRITE_MULTI = 0xC5, ATA_CMD_WRITE_MULTI_EXT = 0x39, + ATA_CMD_WRITE_MULTI_FUA_EXT = 0xCE, ATA_CMD_SET_FEATURES = 0xEF, ATA_CMD_PACKET = 0xA0, ATA_CMD_VERIFY = 0x40, @@ -192,6 +194,7 @@ enum { ATA_TFLAG_DEVICE = (1 << 2), /* enable r/w to device reg */ ATA_TFLAG_WRITE = (1 << 3), /* data dir: host->dev==1 (write) */ ATA_TFLAG_LBA = (1 << 4), /* enable LBA */ + ATA_TFLAG_FUA = (1 << 5), /* enable FUA */ }; enum ata_tf_protocols { @@ -245,7 +248,8 @@ struct ata_taskfile { #define ata_id_is_sata(id) ((id)[93] == 0) #define ata_id_rahead_enabled(id) ((id)[85] & (1 << 6)) #define ata_id_wcache_enabled(id) ((id)[85] & (1 << 5)) -#define ata_id_has_flush(id) ((id)[83] & (1 << 12)) +#define ata_id_has_fua(id) ((id)[84] & (1 << 6)) +#define ata_id_has_flush(id) ((id)[83] & (1 << 12)) #define ata_id_has_flush_ext(id) ((id)[83] & (1 << 13)) #define ata_id_has_lba48(id) ((id)[83] & (1 << 10)) #define ata_id_has_wcache(id) ((id)[82] & (1 << 5)) diff --git a/include/linux/libata.h b/include/linux/libata.h index e828e172ccbf..6db2c0845731 100644 --- a/include/linux/libata.h +++ b/include/linux/libata.h @@ -480,7 +480,8 @@ extern u8 ata_bmdma_status(struct ata_port *ap); extern void ata_bmdma_irq_clear(struct ata_port *ap); extern void ata_qc_complete(struct ata_queued_cmd *qc); extern void ata_eng_timeout(struct ata_port *ap); -extern void ata_scsi_simulate(u16 *id, struct scsi_cmnd *cmd, +extern void ata_scsi_simulate(struct ata_port *ap, struct ata_device *dev, + struct scsi_cmnd *cmd, void (*done)(struct scsi_cmnd *)); extern int ata_std_bios_param(struct scsi_device *sdev, struct block_device *bdev, -- cgit v1.2.3-71-gd317 From 15fc858a0067c800f410a24551a7b461978abf0b Mon Sep 17 00:00:00 2001 From: Jens Axboe Date: Fri, 6 Jan 2006 10:00:50 +0100 Subject: [BLOCK] Correct blk_execute_rq_nowait() prototype --- include/linux/blkdev.h | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h index 15db0f112d0a..fb0985377421 100644 --- a/include/linux/blkdev.h +++ b/include/linux/blkdev.h @@ -613,8 +613,7 @@ extern int blk_rq_map_user_iov(request_queue_t *, struct request *, struct sg_io extern int blk_execute_rq(request_queue_t *, struct gendisk *, struct request *, int); extern void blk_execute_rq_nowait(request_queue_t *, struct gendisk *, - struct request *, int, - void (*done)(struct request *)); + struct request *, int, rq_end_io_fn *); static inline request_queue_t *bdev_get_queue(struct block_device *bdev) { -- cgit v1.2.3-71-gd317 From 4b2f0260c74324abca76ccaa42d426af163125e7 Mon Sep 17 00:00:00 2001 From: Herbert Xu Date: Fri, 6 Jan 2006 00:09:47 -0800 Subject: [PATCH] nbd: fix TX/RX race condition Janos Haar of First NetCenter Bt. reported numerous crashes involving the NBD driver. With his help, this was tracked down to bogus bio vectors which in turn was the result of a race condition between the receive/transmit routines in the NBD driver. The bug manifests itself like this: CPU0 CPU1 do_nbd_request add req to queuelist nbd_send_request send req head for each bio kmap send nbd_read_stat nbd_find_request nbd_end_request kunmap When CPU1 finishes nbd_end_request, the request and all its associated bio's are freed. So when CPU0 calls kunmap whose argument is derived from the last bio, it may crash. Under normal circumstances, the race occurs only on the last bio. However, if an error is encountered on the remote NBD server (such as an incorrect magic number in the request), or if there were a bug in the server, it is possible for the nbd_end_request to occur any time after the request's addition to the queuelist. The following patch fixes this problem by making sure that requests are not added to the queuelist until after they have been completed transmission. In order for the receiving side to be ready for responses involving requests still being transmitted, the patch introduces the concept of the active request. When a response matches the current active request, its processing is delayed until after the tranmission has come to a stop. This has been tested by Janos and it has been successful in curing this race condition. From: Herbert Xu Here is an updated patch which removes the active_req wait in nbd_clear_queue and the associated memory barrier. I've also clarified this in the comment. Signed-off-by: Herbert Xu Cc: Cc: Paul Clements Signed-off-by: Herbert Xu Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- drivers/block/nbd.c | 122 ++++++++++++++++++++++++++-------------------------- include/linux/nbd.h | 8 ++++ 2 files changed, 68 insertions(+), 62 deletions(-) (limited to 'include/linux') diff --git a/drivers/block/nbd.c b/drivers/block/nbd.c index 9e268ddedfbd..d5c8ee7d9815 100644 --- a/drivers/block/nbd.c +++ b/drivers/block/nbd.c @@ -54,11 +54,15 @@ #include #include #include +#include +#include +#include #include #include #include +#include #include #include @@ -230,14 +234,6 @@ static int nbd_send_req(struct nbd_device *lo, struct request *req) request.len = htonl(size); memcpy(request.handle, &req, sizeof(req)); - down(&lo->tx_lock); - - if (!sock || !lo->sock) { - printk(KERN_ERR "%s: Attempted send on closed socket\n", - lo->disk->disk_name); - goto error_out; - } - dprintk(DBG_TX, "%s: request %p: sending control (%s@%llu,%luB)\n", lo->disk->disk_name, req, nbdcmd_to_ascii(nbd_cmd(req)), @@ -276,11 +272,9 @@ static int nbd_send_req(struct nbd_device *lo, struct request *req) } } } - up(&lo->tx_lock); return 0; error_out: - up(&lo->tx_lock); return 1; } @@ -289,9 +283,14 @@ static struct request *nbd_find_request(struct nbd_device *lo, char *handle) struct request *req; struct list_head *tmp; struct request *xreq; + int err; memcpy(&xreq, handle, sizeof(xreq)); + err = wait_event_interruptible(lo->active_wq, lo->active_req != xreq); + if (unlikely(err)) + goto out; + spin_lock(&lo->queue_lock); list_for_each(tmp, &lo->queue_head) { req = list_entry(tmp, struct request, queuelist); @@ -302,7 +301,11 @@ static struct request *nbd_find_request(struct nbd_device *lo, char *handle) return req; } spin_unlock(&lo->queue_lock); - return NULL; + + err = -ENOENT; + +out: + return ERR_PTR(err); } static inline int sock_recv_bvec(struct socket *sock, struct bio_vec *bvec) @@ -331,7 +334,11 @@ static struct request *nbd_read_stat(struct nbd_device *lo) goto harderror; } req = nbd_find_request(lo, reply.handle); - if (req == NULL) { + if (unlikely(IS_ERR(req))) { + result = PTR_ERR(req); + if (result != -ENOENT) + goto harderror; + printk(KERN_ERR "%s: Unexpected reply (%p)\n", lo->disk->disk_name, reply.handle); result = -EBADR; @@ -395,19 +402,24 @@ static void nbd_clear_que(struct nbd_device *lo) BUG_ON(lo->magic != LO_MAGIC); - do { - req = NULL; - spin_lock(&lo->queue_lock); - if (!list_empty(&lo->queue_head)) { - req = list_entry(lo->queue_head.next, struct request, queuelist); - list_del_init(&req->queuelist); - } - spin_unlock(&lo->queue_lock); - if (req) { - req->errors++; - nbd_end_request(req); - } - } while (req); + /* + * Because we have set lo->sock to NULL under the tx_lock, all + * modifications to the list must have completed by now. For + * the same reason, the active_req must be NULL. + * + * As a consequence, we don't need to take the spin lock while + * purging the list here. + */ + BUG_ON(lo->sock); + BUG_ON(lo->active_req); + + while (!list_empty(&lo->queue_head)) { + req = list_entry(lo->queue_head.next, struct request, + queuelist); + list_del_init(&req->queuelist); + req->errors++; + nbd_end_request(req); + } } /* @@ -435,11 +447,6 @@ static void do_nbd_request(request_queue_t * q) BUG_ON(lo->magic != LO_MAGIC); - if (!lo->file) { - printk(KERN_ERR "%s: Request when not-ready\n", - lo->disk->disk_name); - goto error_out; - } nbd_cmd(req) = NBD_CMD_READ; if (rq_data_dir(req) == WRITE) { nbd_cmd(req) = NBD_CMD_WRITE; @@ -453,32 +460,34 @@ static void do_nbd_request(request_queue_t * q) req->errors = 0; spin_unlock_irq(q->queue_lock); - spin_lock(&lo->queue_lock); - - if (!lo->file) { - spin_unlock(&lo->queue_lock); - printk(KERN_ERR "%s: failed between accept and semaphore, file lost\n", - lo->disk->disk_name); + down(&lo->tx_lock); + if (unlikely(!lo->sock)) { + up(&lo->tx_lock); + printk(KERN_ERR "%s: Attempted send on closed socket\n", + lo->disk->disk_name); req->errors++; nbd_end_request(req); spin_lock_irq(q->queue_lock); continue; } - list_add(&req->queuelist, &lo->queue_head); - spin_unlock(&lo->queue_lock); + lo->active_req = req; if (nbd_send_req(lo, req) != 0) { printk(KERN_ERR "%s: Request send failed\n", lo->disk->disk_name); - if (nbd_find_request(lo, (char *)&req) != NULL) { - /* we still own req */ - req->errors++; - nbd_end_request(req); - } else /* we're racing with nbd_clear_que */ - printk(KERN_DEBUG "nbd: can't find req\n"); + req->errors++; + nbd_end_request(req); + } else { + spin_lock(&lo->queue_lock); + list_add(&req->queuelist, &lo->queue_head); + spin_unlock(&lo->queue_lock); } + lo->active_req = NULL; + up(&lo->tx_lock); + wake_up_all(&lo->active_wq); + spin_lock_irq(q->queue_lock); continue; @@ -529,17 +538,10 @@ static int nbd_ioctl(struct inode *inode, struct file *file, down(&lo->tx_lock); lo->sock = NULL; up(&lo->tx_lock); - spin_lock(&lo->queue_lock); file = lo->file; lo->file = NULL; - spin_unlock(&lo->queue_lock); nbd_clear_que(lo); - spin_lock(&lo->queue_lock); - if (!list_empty(&lo->queue_head)) { - printk(KERN_ERR "nbd: disconnect: some requests are in progress -> please try again.\n"); - error = -EBUSY; - } - spin_unlock(&lo->queue_lock); + BUG_ON(!list_empty(&lo->queue_head)); if (file) fput(file); return error; @@ -598,24 +600,19 @@ static int nbd_ioctl(struct inode *inode, struct file *file, lo->sock = NULL; } up(&lo->tx_lock); - spin_lock(&lo->queue_lock); file = lo->file; lo->file = NULL; - spin_unlock(&lo->queue_lock); nbd_clear_que(lo); printk(KERN_WARNING "%s: queue cleared\n", lo->disk->disk_name); if (file) fput(file); return lo->harderror; case NBD_CLEAR_QUE: - down(&lo->tx_lock); - if (lo->sock) { - up(&lo->tx_lock); - return 0; /* probably should be error, but that would - * break "nbd-client -d", so just return 0 */ - } - up(&lo->tx_lock); - nbd_clear_que(lo); + /* + * This is for compatibility only. The queue is always cleared + * by NBD_DO_IT or NBD_CLEAR_SOCK. + */ + BUG_ON(!lo->sock && !list_empty(&lo->queue_head)); return 0; case NBD_PRINT_DEBUG: printk(KERN_INFO "%s: next = %p, prev = %p, head = %p\n", @@ -688,6 +685,7 @@ static int __init nbd_init(void) spin_lock_init(&nbd_dev[i].queue_lock); INIT_LIST_HEAD(&nbd_dev[i].queue_head); init_MUTEX(&nbd_dev[i].tx_lock); + init_waitqueue_head(&nbd_dev[i].active_wq); nbd_dev[i].blksize = 1024; nbd_dev[i].bytesize = 0x7ffffc00ULL << 10; /* 2TB */ disk->major = NBD_MAJOR; diff --git a/include/linux/nbd.h b/include/linux/nbd.h index 090e210e98f0..f95d51fae733 100644 --- a/include/linux/nbd.h +++ b/include/linux/nbd.h @@ -37,18 +37,26 @@ enum { /* userspace doesn't need the nbd_device structure */ #ifdef __KERNEL__ +#include + /* values for flags field */ #define NBD_READ_ONLY 0x0001 #define NBD_WRITE_NOCHK 0x0002 +struct request; + struct nbd_device { int flags; int harderror; /* Code of hard error */ struct socket * sock; struct file * file; /* If == NULL, device is not ready, yet */ int magic; + spinlock_t queue_lock; struct list_head queue_head;/* Requests are added here... */ + struct request *active_req; + wait_queue_head_t active_wq; + struct semaphore tx_lock; struct gendisk *disk; int blksize; -- cgit v1.2.3-71-gd317 From d7339071f6a8b50101d7ba327926b770f22d5d8b Mon Sep 17 00:00:00 2001 From: Hans Reiser Date: Fri, 6 Jan 2006 00:10:36 -0800 Subject: [PATCH] reiser4: vfs: add truncate_inode_pages_range() This patch makes truncate_inode_pages_range from truncate_inode_pages. truncate_inode_pages became a one-liner call to truncate_inode_pages_range. Reiser4 needs truncate_inode_pages_ranges because it tries to keep correspondence between existences of metadata pointing to data pages and pages to which those metadata point to. So, when metadata of certain part of file is removed from filesystem tree, only pages of corresponding range are to be truncated. (Needed by the madvise(MADV_REMOVE) patch) Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/mm.h | 2 ++ mm/truncate.c | 44 +++++++++++++++++++++++++++++++++++++------- 2 files changed, 39 insertions(+), 7 deletions(-) (limited to 'include/linux') diff --git a/include/linux/mm.h b/include/linux/mm.h index a06a84d347fb..92acae9f1f4c 100644 --- a/include/linux/mm.h +++ b/include/linux/mm.h @@ -896,6 +896,8 @@ extern unsigned long do_brk(unsigned long, unsigned long); /* filemap.c */ extern unsigned long page_unuse(struct page *); extern void truncate_inode_pages(struct address_space *, loff_t); +extern void truncate_inode_pages_range(struct address_space *, + loff_t lstart, loff_t lend); /* generic vm_area_ops exported for stackable file systems */ extern struct page *filemap_nopage(struct vm_area_struct *, unsigned long, int *); diff --git a/mm/truncate.c b/mm/truncate.c index 9173ab500604..7dee32745901 100644 --- a/mm/truncate.c +++ b/mm/truncate.c @@ -82,12 +82,15 @@ invalidate_complete_page(struct address_space *mapping, struct page *page) } /** - * truncate_inode_pages - truncate *all* the pages from an offset + * truncate_inode_pages - truncate range of pages specified by start and + * end byte offsets * @mapping: mapping to truncate * @lstart: offset from which to truncate + * @lend: offset to which to truncate * - * Truncate the page cache at a set offset, removing the pages that are beyond - * that offset (and zeroing out partial pages). + * Truncate the page cache, removing the pages that are between + * specified offsets (and zeroing out partial page + * (if lstart is not page aligned)). * * Truncate takes two passes - the first pass is nonblocking. It will not * block on page locks and it will not block on writeback. The second pass @@ -101,12 +104,12 @@ invalidate_complete_page(struct address_space *mapping, struct page *page) * We pass down the cache-hot hint to the page freeing code. Even if the * mapping is large, it is probably the case that the final pages are the most * recently touched, and freeing happens in ascending file offset order. - * - * Called under (and serialised by) inode->i_sem. */ -void truncate_inode_pages(struct address_space *mapping, loff_t lstart) +void truncate_inode_pages_range(struct address_space *mapping, + loff_t lstart, loff_t lend) { const pgoff_t start = (lstart + PAGE_CACHE_SIZE-1) >> PAGE_CACHE_SHIFT; + pgoff_t end; const unsigned partial = lstart & (PAGE_CACHE_SIZE - 1); struct pagevec pvec; pgoff_t next; @@ -115,13 +118,22 @@ void truncate_inode_pages(struct address_space *mapping, loff_t lstart) if (mapping->nrpages == 0) return; + BUG_ON((lend & (PAGE_CACHE_SIZE - 1)) != (PAGE_CACHE_SIZE - 1)); + end = (lend >> PAGE_CACHE_SHIFT); + pagevec_init(&pvec, 0); next = start; - while (pagevec_lookup(&pvec, mapping, next, PAGEVEC_SIZE)) { + while (next <= end && + pagevec_lookup(&pvec, mapping, next, PAGEVEC_SIZE)) { for (i = 0; i < pagevec_count(&pvec); i++) { struct page *page = pvec.pages[i]; pgoff_t page_index = page->index; + if (page_index > end) { + next = page_index; + break; + } + if (page_index > next) next = page_index; next++; @@ -157,9 +169,15 @@ void truncate_inode_pages(struct address_space *mapping, loff_t lstart) next = start; continue; } + if (pvec.pages[0]->index > end) { + pagevec_release(&pvec); + break; + } for (i = 0; i < pagevec_count(&pvec); i++) { struct page *page = pvec.pages[i]; + if (page->index > end) + break; lock_page(page); wait_on_page_writeback(page); if (page->index > next) @@ -171,7 +189,19 @@ void truncate_inode_pages(struct address_space *mapping, loff_t lstart) pagevec_release(&pvec); } } +EXPORT_SYMBOL(truncate_inode_pages_range); +/** + * truncate_inode_pages - truncate *all* the pages from an offset + * @mapping: mapping to truncate + * @lstart: offset from which to truncate + * + * Called under (and serialised by) inode->i_sem. + */ +void truncate_inode_pages(struct address_space *mapping, loff_t lstart) +{ + truncate_inode_pages_range(mapping, lstart, (loff_t)-1); +} EXPORT_SYMBOL(truncate_inode_pages); /** -- cgit v1.2.3-71-gd317 From f6b3ec238d12c8cc6cc71490c6e3127988460349 Mon Sep 17 00:00:00 2001 From: Badari Pulavarty Date: Fri, 6 Jan 2006 00:10:38 -0800 Subject: [PATCH] madvise(MADV_REMOVE): remove pages from tmpfs shm backing store Here is the patch to implement madvise(MADV_REMOVE) - which frees up a given range of pages & its associated backing store. Current implementation supports only shmfs/tmpfs and other filesystems return -ENOSYS. "Some app allocates large tmpfs files, then when some task quits and some client disconnect, some memory can be released. However the only way to release tmpfs-swap is to MADV_REMOVE". - Andrea Arcangeli Databases want to use this feature to drop a section of their bufferpool (shared memory segments) - without writing back to disk/swap space. This feature is also useful for supporting hot-plug memory on UML. Concerns raised by Andrew Morton: - "We have no plan for holepunching! If we _do_ have such a plan (or might in the future) then what would the API look like? I think sys_holepunch(fd, start, len), so we should start out with that." - Using madvise is very weird, because people will ask "why do I need to mmap my file before I can stick a hole in it?" - None of the other madvise operations call into the filesystem in this manner. A broad question is: is this capability an MM operation or a filesytem operation? truncate, for example, is a filesystem operation which sometimes has MM side-effects. madvise is an mm operation and with this patch, it gains FS side-effects, only they're really, really significant ones." Comments: - Andrea suggested the fs operation too but then it's more efficient to have it as a mm operation with fs side effects, because they don't immediatly know fd and physical offset of the range. It's possible to fixup in userland and to use the fs operation but it's more expensive, the vmas are already in the kernel and we can use them. Short term plan & Future Direction: - We seem to need this interface only for shmfs/tmpfs files in the short term. We have to add hooks into the filesystem for correctness and completeness. This is what this patch does. - In the future, plan is to support both fs and mmap apis also. This also involves (other) filesystem specific functions to be implemented. - Current patch doesn't support VM_NONLINEAR - which can be addressed in the future. Signed-off-by: Badari Pulavarty Cc: Hugh Dickins Cc: Andrea Arcangeli Cc: Michael Kerrisk Cc: Ulrich Drepper Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/asm-alpha/mman.h | 1 + include/asm-arm/mman.h | 1 + include/asm-arm26/mman.h | 1 + include/asm-cris/mman.h | 1 + include/asm-frv/mman.h | 1 + include/asm-h8300/mman.h | 1 + include/asm-i386/mman.h | 1 + include/asm-ia64/mman.h | 1 + include/asm-m32r/mman.h | 1 + include/asm-m68k/mman.h | 1 + include/asm-mips/mman.h | 1 + include/asm-parisc/mman.h | 1 + include/asm-powerpc/mman.h | 1 + include/asm-s390/mman.h | 1 + include/asm-sh/mman.h | 1 + include/asm-sparc/mman.h | 1 + include/asm-sparc64/mman.h | 1 + include/asm-v850/mman.h | 1 + include/asm-x86_64/mman.h | 1 + include/asm-xtensa/mman.h | 1 + include/linux/fs.h | 1 + include/linux/mm.h | 1 + mm/madvise.c | 35 +++++++++++++++++++++++++++++++++++ mm/memory.c | 25 ++++++++++++++++++++++++- mm/shmem.c | 32 ++++++++++++++++++++++++-------- 25 files changed, 105 insertions(+), 9 deletions(-) (limited to 'include/linux') diff --git a/include/asm-alpha/mman.h b/include/asm-alpha/mman.h index eb9c279045ef..f6439532a262 100644 --- a/include/asm-alpha/mman.h +++ b/include/asm-alpha/mman.h @@ -42,6 +42,7 @@ #define MADV_WILLNEED 3 /* will need these pages */ #define MADV_SPACEAVAIL 5 /* ensure resources are available */ #define MADV_DONTNEED 6 /* don't need these pages */ +#define MADV_REMOVE 7 /* remove these pages & resources */ /* compatibility flags */ #define MAP_ANON MAP_ANONYMOUS diff --git a/include/asm-arm/mman.h b/include/asm-arm/mman.h index 8e4f69c4fa5f..f0bebca2ac21 100644 --- a/include/asm-arm/mman.h +++ b/include/asm-arm/mman.h @@ -35,6 +35,7 @@ #define MADV_SEQUENTIAL 0x2 /* read-ahead aggressively */ #define MADV_WILLNEED 0x3 /* pre-fault pages */ #define MADV_DONTNEED 0x4 /* discard these pages */ +#define MADV_REMOVE 0x5 /* remove these pages & resources */ /* compatibility flags */ #define MAP_ANON MAP_ANONYMOUS diff --git a/include/asm-arm26/mman.h b/include/asm-arm26/mman.h index cc27b8240265..0ed7780541fa 100644 --- a/include/asm-arm26/mman.h +++ b/include/asm-arm26/mman.h @@ -35,6 +35,7 @@ #define MADV_SEQUENTIAL 0x2 /* read-ahead aggressively */ #define MADV_WILLNEED 0x3 /* pre-fault pages */ #define MADV_DONTNEED 0x4 /* discard these pages */ +#define MADV_REMOVE 0x5 /* remove these pages & resources */ /* compatibility flags */ #define MAP_ANON MAP_ANONYMOUS diff --git a/include/asm-cris/mman.h b/include/asm-cris/mman.h index 8570e72b9502..5a382b8bf3f7 100644 --- a/include/asm-cris/mman.h +++ b/include/asm-cris/mman.h @@ -37,6 +37,7 @@ #define MADV_SEQUENTIAL 0x2 /* read-ahead aggressively */ #define MADV_WILLNEED 0x3 /* pre-fault pages */ #define MADV_DONTNEED 0x4 /* discard these pages */ +#define MADV_REMOVE 0x5 /* remove these pages & resources */ /* compatibility flags */ #define MAP_ANON MAP_ANONYMOUS diff --git a/include/asm-frv/mman.h b/include/asm-frv/mman.h index c684720dfbdd..8af4a41c255e 100644 --- a/include/asm-frv/mman.h +++ b/include/asm-frv/mman.h @@ -35,6 +35,7 @@ #define MADV_SEQUENTIAL 0x2 /* read-ahead aggressively */ #define MADV_WILLNEED 0x3 /* pre-fault pages */ #define MADV_DONTNEED 0x4 /* discard these pages */ +#define MADV_REMOVE 0x5 /* remove these pages & resources */ /* compatibility flags */ #define MAP_ANON MAP_ANONYMOUS diff --git a/include/asm-h8300/mman.h b/include/asm-h8300/mman.h index 63f727a59850..744a8fb485c2 100644 --- a/include/asm-h8300/mman.h +++ b/include/asm-h8300/mman.h @@ -35,6 +35,7 @@ #define MADV_SEQUENTIAL 0x2 /* read-ahead aggressively */ #define MADV_WILLNEED 0x3 /* pre-fault pages */ #define MADV_DONTNEED 0x4 /* discard these pages */ +#define MADV_REMOVE 0x5 /* remove these pages & resources */ /* compatibility flags */ #define MAP_ANON MAP_ANONYMOUS diff --git a/include/asm-i386/mman.h b/include/asm-i386/mman.h index 196619a83854..ba4941e6f643 100644 --- a/include/asm-i386/mman.h +++ b/include/asm-i386/mman.h @@ -35,6 +35,7 @@ #define MADV_SEQUENTIAL 0x2 /* read-ahead aggressively */ #define MADV_WILLNEED 0x3 /* pre-fault pages */ #define MADV_DONTNEED 0x4 /* discard these pages */ +#define MADV_REMOVE 0x5 /* remove these pages & resources */ /* compatibility flags */ #define MAP_ANON MAP_ANONYMOUS diff --git a/include/asm-ia64/mman.h b/include/asm-ia64/mman.h index 1c0a73af1461..828beb24a20e 100644 --- a/include/asm-ia64/mman.h +++ b/include/asm-ia64/mman.h @@ -43,6 +43,7 @@ #define MADV_SEQUENTIAL 0x2 /* read-ahead aggressively */ #define MADV_WILLNEED 0x3 /* pre-fault pages */ #define MADV_DONTNEED 0x4 /* discard these pages */ +#define MADV_REMOVE 0x5 /* remove these pages & resources */ /* compatibility flags */ #define MAP_ANON MAP_ANONYMOUS diff --git a/include/asm-m32r/mman.h b/include/asm-m32r/mman.h index 011f6d9ec5cc..12e29747bc84 100644 --- a/include/asm-m32r/mman.h +++ b/include/asm-m32r/mman.h @@ -37,6 +37,7 @@ #define MADV_SEQUENTIAL 0x2 /* read-ahead aggressively */ #define MADV_WILLNEED 0x3 /* pre-fault pages */ #define MADV_DONTNEED 0x4 /* discard these pages */ +#define MADV_REMOVE 0x5 /* remove these pages & resources */ /* compatibility flags */ #define MAP_ANON MAP_ANONYMOUS diff --git a/include/asm-m68k/mman.h b/include/asm-m68k/mman.h index f831c4eeae6e..ea262ab88b3b 100644 --- a/include/asm-m68k/mman.h +++ b/include/asm-m68k/mman.h @@ -35,6 +35,7 @@ #define MADV_SEQUENTIAL 0x2 /* read-ahead aggressively */ #define MADV_WILLNEED 0x3 /* pre-fault pages */ #define MADV_DONTNEED 0x4 /* discard these pages */ +#define MADV_REMOVE 0x5 /* remove these pages & resources */ /* compatibility flags */ #define MAP_ANON MAP_ANONYMOUS diff --git a/include/asm-mips/mman.h b/include/asm-mips/mman.h index 62060957ba93..dd17c8bd62a1 100644 --- a/include/asm-mips/mman.h +++ b/include/asm-mips/mman.h @@ -65,6 +65,7 @@ #define MADV_SEQUENTIAL 0x2 /* read-ahead aggressively */ #define MADV_WILLNEED 0x3 /* pre-fault pages */ #define MADV_DONTNEED 0x4 /* discard these pages */ +#define MADV_REMOVE 0x5 /* remove these pages & resources */ /* compatibility flags */ #define MAP_ANON MAP_ANONYMOUS diff --git a/include/asm-parisc/mman.h b/include/asm-parisc/mman.h index e829607eb8bc..736b0abcac05 100644 --- a/include/asm-parisc/mman.h +++ b/include/asm-parisc/mman.h @@ -38,6 +38,7 @@ #define MADV_SPACEAVAIL 5 /* insure that resources are reserved */ #define MADV_VPS_PURGE 6 /* Purge pages from VM page cache */ #define MADV_VPS_INHERIT 7 /* Inherit parents page size */ +#define MADV_REMOVE 8 /* remove these pages & resources */ /* The range 12-64 is reserved for page size specification. */ #define MADV_4K_PAGES 12 /* Use 4K pages */ diff --git a/include/asm-powerpc/mman.h b/include/asm-powerpc/mman.h index f5e5342fcac5..a2e34c21b44f 100644 --- a/include/asm-powerpc/mman.h +++ b/include/asm-powerpc/mman.h @@ -44,6 +44,7 @@ #define MADV_SEQUENTIAL 0x2 /* read-ahead aggressively */ #define MADV_WILLNEED 0x3 /* pre-fault pages */ #define MADV_DONTNEED 0x4 /* discard these pages */ +#define MADV_REMOVE 0x5 /* remove these pages & resources */ /* compatibility flags */ #define MAP_ANON MAP_ANONYMOUS diff --git a/include/asm-s390/mman.h b/include/asm-s390/mman.h index ea86bd12204f..c8d5409b5d56 100644 --- a/include/asm-s390/mman.h +++ b/include/asm-s390/mman.h @@ -43,6 +43,7 @@ #define MADV_SEQUENTIAL 0x2 /* read-ahead aggressively */ #define MADV_WILLNEED 0x3 /* pre-fault pages */ #define MADV_DONTNEED 0x4 /* discard these pages */ +#define MADV_REMOVE 0x5 /* remove these pages & resources */ /* compatibility flags */ #define MAP_ANON MAP_ANONYMOUS diff --git a/include/asm-sh/mman.h b/include/asm-sh/mman.h index 3ebab5f79db7..693bd55a3710 100644 --- a/include/asm-sh/mman.h +++ b/include/asm-sh/mman.h @@ -35,6 +35,7 @@ #define MADV_SEQUENTIAL 0x2 /* read-ahead aggressively */ #define MADV_WILLNEED 0x3 /* pre-fault pages */ #define MADV_DONTNEED 0x4 /* discard these pages */ +#define MADV_REMOVE 0x5 /* remove these pages & resources */ /* compatibility flags */ #define MAP_ANON MAP_ANONYMOUS diff --git a/include/asm-sparc/mman.h b/include/asm-sparc/mman.h index 138eb81dd70d..98435ad8619e 100644 --- a/include/asm-sparc/mman.h +++ b/include/asm-sparc/mman.h @@ -54,6 +54,7 @@ #define MADV_WILLNEED 0x3 /* pre-fault pages */ #define MADV_DONTNEED 0x4 /* discard these pages */ #define MADV_FREE 0x5 /* (Solaris) contents can be freed */ +#define MADV_REMOVE 0x6 /* remove these pages & resources */ /* compatibility flags */ #define MAP_ANON MAP_ANONYMOUS diff --git a/include/asm-sparc64/mman.h b/include/asm-sparc64/mman.h index 01cecf54357b..cb4b6156194d 100644 --- a/include/asm-sparc64/mman.h +++ b/include/asm-sparc64/mman.h @@ -54,6 +54,7 @@ #define MADV_WILLNEED 0x3 /* pre-fault pages */ #define MADV_DONTNEED 0x4 /* discard these pages */ #define MADV_FREE 0x5 /* (Solaris) contents can be freed */ +#define MADV_REMOVE 0x6 /* remove these pages & resources */ /* compatibility flags */ #define MAP_ANON MAP_ANONYMOUS diff --git a/include/asm-v850/mman.h b/include/asm-v850/mman.h index e2b90081b56f..edc79965193a 100644 --- a/include/asm-v850/mman.h +++ b/include/asm-v850/mman.h @@ -32,6 +32,7 @@ #define MADV_SEQUENTIAL 0x2 /* read-ahead aggressively */ #define MADV_WILLNEED 0x3 /* pre-fault pages */ #define MADV_DONTNEED 0x4 /* discard these pages */ +#define MADV_REMOVE 0x5 /* remove these pages & resources */ /* compatibility flags */ #define MAP_ANON MAP_ANONYMOUS diff --git a/include/asm-x86_64/mman.h b/include/asm-x86_64/mman.h index 78e60a4fd4ee..d0e97b74f735 100644 --- a/include/asm-x86_64/mman.h +++ b/include/asm-x86_64/mman.h @@ -36,6 +36,7 @@ #define MADV_SEQUENTIAL 0x2 /* read-ahead aggressively */ #define MADV_WILLNEED 0x3 /* pre-fault pages */ #define MADV_DONTNEED 0x4 /* discard these pages */ +#define MADV_REMOVE 0x5 /* remove these pages & resources */ /* compatibility flags */ #define MAP_ANON MAP_ANONYMOUS diff --git a/include/asm-xtensa/mman.h b/include/asm-xtensa/mman.h index 9a95a45df996..082a7504925e 100644 --- a/include/asm-xtensa/mman.h +++ b/include/asm-xtensa/mman.h @@ -72,6 +72,7 @@ #define MADV_SEQUENTIAL 0x2 /* read-ahead aggressively */ #define MADV_WILLNEED 0x3 /* pre-fault pages */ #define MADV_DONTNEED 0x4 /* discard these pages */ +#define MADV_REMOVE 0x5 /* remove these pages & resources */ /* compatibility flags */ #define MAP_ANON MAP_ANONYMOUS diff --git a/include/linux/fs.h b/include/linux/fs.h index ed9a41a71e8b..115e72be25d0 100644 --- a/include/linux/fs.h +++ b/include/linux/fs.h @@ -1050,6 +1050,7 @@ struct inode_operations { ssize_t (*getxattr) (struct dentry *, const char *, void *, size_t); ssize_t (*listxattr) (struct dentry *, char *, size_t); int (*removexattr) (struct dentry *, const char *); + void (*truncate_range)(struct inode *, loff_t, loff_t); }; struct seq_file; diff --git a/include/linux/mm.h b/include/linux/mm.h index 92acae9f1f4c..6c9be99429f3 100644 --- a/include/linux/mm.h +++ b/include/linux/mm.h @@ -690,6 +690,7 @@ static inline void unmap_shared_mapping_range(struct address_space *mapping, } extern int vmtruncate(struct inode * inode, loff_t offset); +extern int vmtruncate_range(struct inode * inode, loff_t offset, loff_t end); extern int install_page(struct mm_struct *mm, struct vm_area_struct *vma, unsigned long addr, struct page *page, pgprot_t prot); extern int install_file_pte(struct mm_struct *mm, struct vm_area_struct *vma, unsigned long addr, unsigned long pgoff, pgprot_t prot); extern int __handle_mm_fault(struct mm_struct *mm,struct vm_area_struct *vma, unsigned long address, int write_access); diff --git a/mm/madvise.c b/mm/madvise.c index 2b7cf0400a21..ae0ae3ea299a 100644 --- a/mm/madvise.c +++ b/mm/madvise.c @@ -140,6 +140,36 @@ static long madvise_dontneed(struct vm_area_struct * vma, return 0; } +/* + * Application wants to free up the pages and associated backing store. + * This is effectively punching a hole into the middle of a file. + * + * NOTE: Currently, only shmfs/tmpfs is supported for this operation. + * Other filesystems return -ENOSYS. + */ +static long madvise_remove(struct vm_area_struct *vma, + unsigned long start, unsigned long end) +{ + struct address_space *mapping; + loff_t offset, endoff; + + if (vma->vm_flags & (VM_LOCKED|VM_NONLINEAR|VM_HUGETLB)) + return -EINVAL; + + if (!vma->vm_file || !vma->vm_file->f_mapping + || !vma->vm_file->f_mapping->host) { + return -EINVAL; + } + + mapping = vma->vm_file->f_mapping; + + offset = (loff_t)(start - vma->vm_start) + + ((loff_t)vma->vm_pgoff << PAGE_SHIFT); + endoff = (loff_t)(end - vma->vm_start - 1) + + ((loff_t)vma->vm_pgoff << PAGE_SHIFT); + return vmtruncate_range(mapping->host, offset, endoff); +} + static long madvise_vma(struct vm_area_struct *vma, struct vm_area_struct **prev, unsigned long start, unsigned long end, int behavior) @@ -152,6 +182,9 @@ madvise_vma(struct vm_area_struct *vma, struct vm_area_struct **prev, case MADV_RANDOM: error = madvise_behavior(vma, prev, start, end, behavior); break; + case MADV_REMOVE: + error = madvise_remove(vma, start, end); + break; case MADV_WILLNEED: error = madvise_willneed(vma, prev, start, end); @@ -190,6 +223,8 @@ madvise_vma(struct vm_area_struct *vma, struct vm_area_struct **prev, * some pages ahead. * MADV_DONTNEED - the application is finished with the given range, * so the kernel can free resources associated with it. + * MADV_REMOVE - the application wants to free up the given range of + * pages and associated backing store. * * return values: * zero - success diff --git a/mm/memory.c b/mm/memory.c index d8dde07a3656..e249088908c4 100644 --- a/mm/memory.c +++ b/mm/memory.c @@ -1770,9 +1770,32 @@ out_big: out_busy: return -ETXTBSY; } - EXPORT_SYMBOL(vmtruncate); +int vmtruncate_range(struct inode *inode, loff_t offset, loff_t end) +{ + struct address_space *mapping = inode->i_mapping; + + /* + * If the underlying filesystem is not going to provide + * a way to truncate a range of blocks (punch a hole) - + * we should return failure right now. + */ + if (!inode->i_op || !inode->i_op->truncate_range) + return -ENOSYS; + + down(&inode->i_sem); + down_write(&inode->i_alloc_sem); + unmap_mapping_range(mapping, offset, (end - offset), 1); + truncate_inode_pages_range(mapping, offset, end); + inode->i_op->truncate_range(inode, offset, end); + up_write(&inode->i_alloc_sem); + up(&inode->i_sem); + + return 0; +} +EXPORT_SYMBOL(vmtruncate_range); + /* * Primitive swap readahead code. We simply read an aligned block of * (1 << page_cluster) entries in the swap area. This method is chosen diff --git a/mm/shmem.c b/mm/shmem.c index d9fc277940da..65c148efa2ed 100644 --- a/mm/shmem.c +++ b/mm/shmem.c @@ -457,7 +457,7 @@ static void shmem_free_pages(struct list_head *next) } while (next); } -static void shmem_truncate(struct inode *inode) +static void shmem_truncate_range(struct inode *inode, loff_t start, loff_t end) { struct shmem_inode_info *info = SHMEM_I(inode); unsigned long idx; @@ -475,18 +475,27 @@ static void shmem_truncate(struct inode *inode) long nr_swaps_freed = 0; int offset; int freed; + int punch_hole = 0; inode->i_ctime = inode->i_mtime = CURRENT_TIME; - idx = (inode->i_size + PAGE_CACHE_SIZE - 1) >> PAGE_CACHE_SHIFT; + idx = (start + PAGE_CACHE_SIZE - 1) >> PAGE_CACHE_SHIFT; if (idx >= info->next_index) return; spin_lock(&info->lock); info->flags |= SHMEM_TRUNCATE; - limit = info->next_index; - info->next_index = idx; + if (likely(end == (loff_t) -1)) { + limit = info->next_index; + info->next_index = idx; + } else { + limit = (end + PAGE_CACHE_SIZE - 1) >> PAGE_CACHE_SHIFT; + if (limit > info->next_index) + limit = info->next_index; + punch_hole = 1; + } + topdir = info->i_indirect; - if (topdir && idx <= SHMEM_NR_DIRECT) { + if (topdir && idx <= SHMEM_NR_DIRECT && !punch_hole) { info->i_indirect = NULL; nr_pages_to_free++; list_add(&topdir->lru, &pages_to_free); @@ -573,11 +582,12 @@ static void shmem_truncate(struct inode *inode) set_page_private(subdir, page_private(subdir) - freed); if (offset) spin_unlock(&info->lock); - BUG_ON(page_private(subdir) > offset); + if (!punch_hole) + BUG_ON(page_private(subdir) > offset); } if (offset) offset = 0; - else if (subdir) { + else if (subdir && !page_private(subdir)) { dir[diroff] = NULL; nr_pages_to_free++; list_add(&subdir->lru, &pages_to_free); @@ -594,7 +604,7 @@ done2: * Also, though shmem_getpage checks i_size before adding to * cache, no recheck after: so fix the narrow window there too. */ - truncate_inode_pages(inode->i_mapping, inode->i_size); + truncate_inode_pages_range(inode->i_mapping, start, end); } spin_lock(&info->lock); @@ -614,6 +624,11 @@ done2: } } +static void shmem_truncate(struct inode *inode) +{ + shmem_truncate_range(inode, inode->i_size, (loff_t)-1); +} + static int shmem_notify_change(struct dentry *dentry, struct iattr *attr) { struct inode *inode = dentry->d_inode; @@ -2083,6 +2098,7 @@ static struct file_operations shmem_file_operations = { static struct inode_operations shmem_inode_operations = { .truncate = shmem_truncate, .setattr = shmem_notify_change, + .truncate_range = shmem_truncate_range, }; static struct inode_operations shmem_dir_inode_operations = { -- cgit v1.2.3-71-gd317 From 5da7ca86078964cbfe6c83efc1205904587706fe Mon Sep 17 00:00:00 2001 From: Christoph Lameter Date: Fri, 6 Jan 2006 00:10:46 -0800 Subject: [PATCH] Add NUMA policy support for huge pages. The huge_zonelist() function in the memory policy layer provides an list of zones ordered by NUMA distance. The hugetlb layer will walk that list looking for a zone that has available huge pages but is also in the nodeset of the current cpuset. This patch does not contain the folding of find_or_alloc_huge_page() that was controversial in the earlier discussion. Signed-off-by: Christoph Lameter Cc: Andi Kleen Acked-by: William Lee Irwin III Cc: Adam Litke Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/hugetlb.h | 4 ++-- include/linux/mempolicy.h | 8 ++++++++ mm/hugetlb.c | 24 ++++++++++++++---------- mm/mempolicy.c | 39 ++++++++++++++++++++++++++++++--------- 4 files changed, 54 insertions(+), 21 deletions(-) (limited to 'include/linux') diff --git a/include/linux/hugetlb.h b/include/linux/hugetlb.h index 1056717ee501..68d82ad6b17c 100644 --- a/include/linux/hugetlb.h +++ b/include/linux/hugetlb.h @@ -22,7 +22,7 @@ int hugetlb_report_meminfo(char *); int hugetlb_report_node_meminfo(int, char *); int is_hugepage_mem_enough(size_t); unsigned long hugetlb_total_pages(void); -struct page *alloc_huge_page(void); +struct page *alloc_huge_page(struct vm_area_struct *, unsigned long); void free_huge_page(struct page *); int hugetlb_fault(struct mm_struct *mm, struct vm_area_struct *vma, unsigned long address, int write_access); @@ -97,7 +97,7 @@ static inline unsigned long hugetlb_total_pages(void) #define is_hugepage_only_range(mm, addr, len) 0 #define hugetlb_free_pgd_range(tlb, addr, end, floor, ceiling) \ do { } while (0) -#define alloc_huge_page() ({ NULL; }) +#define alloc_huge_page(vma, addr) ({ NULL; }) #define free_huge_page(p) ({ (void)(p); BUG(); }) #define hugetlb_fault(mm, vma, addr, write) ({ BUG(); 0; }) diff --git a/include/linux/mempolicy.h b/include/linux/mempolicy.h index 8b67cf837ca9..817db6427113 100644 --- a/include/linux/mempolicy.h +++ b/include/linux/mempolicy.h @@ -156,6 +156,8 @@ extern void numa_default_policy(void); extern void numa_policy_init(void); extern void numa_policy_rebind(const nodemask_t *old, const nodemask_t *new); extern struct mempolicy default_policy; +extern struct zonelist *huge_zonelist(struct vm_area_struct *vma, + unsigned long addr); #else @@ -232,6 +234,12 @@ static inline void numa_policy_rebind(const nodemask_t *old, { } +static inline struct zonelist *huge_zonelist(struct vm_area_struct *vma, + unsigned long addr) +{ + return NODE_DATA(0)->node_zonelists + gfp_zone(GFP_HIGHUSER); +} + #endif /* CONFIG_NUMA */ #endif /* __KERNEL__ */ diff --git a/mm/hugetlb.c b/mm/hugetlb.c index e93bd63462f0..eb405565949d 100644 --- a/mm/hugetlb.c +++ b/mm/hugetlb.c @@ -11,6 +11,8 @@ #include #include #include +#include + #include #include @@ -36,11 +38,12 @@ static void enqueue_huge_page(struct page *page) free_huge_pages_node[nid]++; } -static struct page *dequeue_huge_page(void) +static struct page *dequeue_huge_page(struct vm_area_struct *vma, + unsigned long address) { int nid = numa_node_id(); struct page *page = NULL; - struct zonelist *zonelist = NODE_DATA(nid)->node_zonelists; + struct zonelist *zonelist = huge_zonelist(vma, address); struct zone **z; for (z = zonelist->zones; *z; z++) { @@ -87,13 +90,13 @@ void free_huge_page(struct page *page) spin_unlock(&hugetlb_lock); } -struct page *alloc_huge_page(void) +struct page *alloc_huge_page(struct vm_area_struct *vma, unsigned long addr) { struct page *page; int i; spin_lock(&hugetlb_lock); - page = dequeue_huge_page(); + page = dequeue_huge_page(vma, addr); if (!page) { spin_unlock(&hugetlb_lock); return NULL; @@ -196,7 +199,7 @@ static unsigned long set_max_huge_pages(unsigned long count) spin_lock(&hugetlb_lock); try_to_free_low(count); while (count < nr_huge_pages) { - struct page *page = dequeue_huge_page(); + struct page *page = dequeue_huge_page(NULL, 0); if (!page) break; update_and_free_page(page); @@ -365,8 +368,9 @@ void unmap_hugepage_range(struct vm_area_struct *vma, unsigned long start, flush_tlb_range(vma, start, end); } -static struct page *find_or_alloc_huge_page(struct address_space *mapping, - unsigned long idx, int shared) +static struct page *find_or_alloc_huge_page(struct vm_area_struct *vma, + unsigned long addr, struct address_space *mapping, + unsigned long idx, int shared) { struct page *page; int err; @@ -378,7 +382,7 @@ retry: if (hugetlb_get_quota(mapping)) goto out; - page = alloc_huge_page(); + page = alloc_huge_page(vma, addr); if (!page) { hugetlb_put_quota(mapping); goto out; @@ -418,7 +422,7 @@ static int hugetlb_cow(struct mm_struct *mm, struct vm_area_struct *vma, } page_cache_get(old_page); - new_page = alloc_huge_page(); + new_page = alloc_huge_page(vma, address); if (!new_page) { page_cache_release(old_page); @@ -467,7 +471,7 @@ int hugetlb_no_page(struct mm_struct *mm, struct vm_area_struct *vma, * Use page lock to guard against racing truncation * before we get page_table_lock. */ - page = find_or_alloc_huge_page(mapping, idx, + page = find_or_alloc_huge_page(vma, address, mapping, idx, vma->vm_flags & VM_SHARED); if (!page) goto out; diff --git a/mm/mempolicy.c b/mm/mempolicy.c index 72f402cc9c9a..45c51ac63443 100644 --- a/mm/mempolicy.c +++ b/mm/mempolicy.c @@ -785,6 +785,34 @@ static unsigned offset_il_node(struct mempolicy *pol, return nid; } +/* Determine a node number for interleave */ +static inline unsigned interleave_nid(struct mempolicy *pol, + struct vm_area_struct *vma, unsigned long addr, int shift) +{ + if (vma) { + unsigned long off; + + off = vma->vm_pgoff; + off += (addr - vma->vm_start) >> shift; + return offset_il_node(pol, vma, off); + } else + return interleave_nodes(pol); +} + +/* Return a zonelist suitable for a huge page allocation. */ +struct zonelist *huge_zonelist(struct vm_area_struct *vma, unsigned long addr) +{ + struct mempolicy *pol = get_vma_policy(current, vma, addr); + + if (pol->policy == MPOL_INTERLEAVE) { + unsigned nid; + + nid = interleave_nid(pol, vma, addr, HPAGE_SHIFT); + return NODE_DATA(nid)->node_zonelists + gfp_zone(GFP_HIGHUSER); + } + return zonelist_policy(GFP_HIGHUSER, pol); +} + /* Allocate a page in interleaved policy. Own path because it needs to do special accounting. */ static struct page *alloc_page_interleave(gfp_t gfp, unsigned order, @@ -833,15 +861,8 @@ alloc_page_vma(gfp_t gfp, struct vm_area_struct *vma, unsigned long addr) if (unlikely(pol->policy == MPOL_INTERLEAVE)) { unsigned nid; - if (vma) { - unsigned long off; - off = vma->vm_pgoff; - off += (addr - vma->vm_start) >> PAGE_SHIFT; - nid = offset_il_node(pol, vma, off); - } else { - /* fall back to process interleaving */ - nid = interleave_nodes(pol); - } + + nid = interleave_nid(pol, vma, addr, PAGE_SHIFT); return alloc_page_interleave(gfp, 0, nid); } return __alloc_pages(gfp, 0, zonelist_policy(gfp, pol)); -- cgit v1.2.3-71-gd317 From 21abb1478a87e26f5fa71dbcb7cf4264272c2248 Mon Sep 17 00:00:00 2001 From: Christoph Lameter Date: Fri, 6 Jan 2006 00:10:47 -0800 Subject: [PATCH] Remove old node based policy interface from mempolicy.c mempolicy.c contains provisional interface for huge page allocation based on node numbers. This is in use in SLES9 but was never used (AFAIK) in upstream versions of Linux. Huge page allocations now use zonelists to figure out where to allocate pages. The use of zonelists allows us to find the closest hugepage which was the consideration of the NUMA distance for huge page allocations. Remove the obsolete functions. Signed-off-by: Christoph Lameter Cc: Andi Kleen Acked-by: William Lee Irwin III Cc: Adam Litke Acked-by: Paul Jackson Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/mempolicy.h | 19 ------------------- mm/mempolicy.c | 48 ----------------------------------------------- 2 files changed, 67 deletions(-) (limited to 'include/linux') diff --git a/include/linux/mempolicy.h b/include/linux/mempolicy.h index 817db6427113..b972f985a3c5 100644 --- a/include/linux/mempolicy.h +++ b/include/linux/mempolicy.h @@ -109,14 +109,6 @@ static inline int mpol_equal(struct mempolicy *a, struct mempolicy *b) #define mpol_set_vma_default(vma) ((vma)->vm_policy = NULL) -/* - * Hugetlb policy. i386 hugetlb so far works with node numbers - * instead of zone lists, so give it special interfaces for now. - */ -extern int mpol_first_node(struct vm_area_struct *vma, unsigned long addr); -extern int mpol_node_valid(int nid, struct vm_area_struct *vma, - unsigned long addr); - /* * Tree of shared policies for a shared memory region. * Maintain the policies in a pseudo mm that contains vmas. The vmas @@ -184,17 +176,6 @@ static inline struct mempolicy *mpol_copy(struct mempolicy *old) return NULL; } -static inline int mpol_first_node(struct vm_area_struct *vma, unsigned long a) -{ - return numa_node_id(); -} - -static inline int -mpol_node_valid(int nid, struct vm_area_struct *vma, unsigned long a) -{ - return 1; -} - struct shared_policy {}; static inline int mpol_set_shared_policy(struct shared_policy *info, diff --git a/mm/mempolicy.c b/mm/mempolicy.c index 45c51ac63443..96714e2646ad 100644 --- a/mm/mempolicy.c +++ b/mm/mempolicy.c @@ -960,54 +960,6 @@ void __mpol_free(struct mempolicy *p) kmem_cache_free(policy_cache, p); } -/* - * Hugetlb policy. Same as above, just works with node numbers instead of - * zonelists. - */ - -/* Find first node suitable for an allocation */ -int mpol_first_node(struct vm_area_struct *vma, unsigned long addr) -{ - struct mempolicy *pol = get_vma_policy(current, vma, addr); - - switch (pol->policy) { - case MPOL_DEFAULT: - return numa_node_id(); - case MPOL_BIND: - return pol->v.zonelist->zones[0]->zone_pgdat->node_id; - case MPOL_INTERLEAVE: - return interleave_nodes(pol); - case MPOL_PREFERRED: - return pol->v.preferred_node >= 0 ? - pol->v.preferred_node : numa_node_id(); - } - BUG(); - return 0; -} - -/* Find secondary valid nodes for an allocation */ -int mpol_node_valid(int nid, struct vm_area_struct *vma, unsigned long addr) -{ - struct mempolicy *pol = get_vma_policy(current, vma, addr); - - switch (pol->policy) { - case MPOL_PREFERRED: - case MPOL_DEFAULT: - case MPOL_INTERLEAVE: - return 1; - case MPOL_BIND: { - struct zone **z; - for (z = pol->v.zonelist->zones; *z; z++) - if ((*z)->zone_pgdat->node_id == nid) - return 1; - return 0; - } - default: - BUG(); - return 0; - } -} - /* * Shared memory backing store policy support. * -- cgit v1.2.3-71-gd317 From 9f3fd602aef96c2a490e3bfd669d06475aeba8d8 Mon Sep 17 00:00:00 2001 From: Andy Whitcroft Date: Fri, 6 Jan 2006 00:10:50 -0800 Subject: [PATCH] mm: kvaddr_to_nid not used in common code kvaddr_to_nid() isn't used in common code nor in i386 code. Remove these definitions. Signed-off-by: Andy Whitcroft Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/asm-i386/mmzone.h | 5 ----- include/linux/mmzone.h | 5 ----- 2 files changed, 10 deletions(-) (limited to 'include/linux') diff --git a/include/asm-i386/mmzone.h b/include/asm-i386/mmzone.h index 620a90641ea8..74f595d80579 100644 --- a/include/asm-i386/mmzone.h +++ b/include/asm-i386/mmzone.h @@ -76,11 +76,6 @@ static inline int pfn_to_nid(unsigned long pfn) * Following are macros that each numa implmentation must define. */ -/* - * Given a kernel address, find the home node of the underlying memory. - */ -#define kvaddr_to_nid(kaddr) pfn_to_nid(__pa(kaddr) >> PAGE_SHIFT) - #define node_start_pfn(nid) (NODE_DATA(nid)->node_start_pfn) #define node_end_pfn(nid) \ ({ \ diff --git a/include/linux/mmzone.h b/include/linux/mmzone.h index 9f22090df7dd..3c49f786f90c 100644 --- a/include/linux/mmzone.h +++ b/include/linux/mmzone.h @@ -564,11 +564,6 @@ static inline int valid_section_nr(unsigned long nr) return valid_section(__nr_to_section(nr)); } -/* - * Given a kernel address, find the home node of the underlying memory. - */ -#define kvaddr_to_nid(kaddr) pfn_to_nid(__pa(kaddr) >> PAGE_SHIFT) - static inline struct mem_section *__pfn_to_section(unsigned long pfn) { return __nr_to_section(pfn_to_section_nr(pfn)); -- cgit v1.2.3-71-gd317 From d5afa6dcf74c0efb60ce07c63d0a727be93c67c5 Mon Sep 17 00:00:00 2001 From: Andy Whitcroft Date: Fri, 6 Jan 2006 00:10:50 -0800 Subject: [PATCH] mm: pfn_to_pgdat not used in common code pfn_to_pgdat() isn't used in common code. Remove definition. Signed-off-by: Andy Whitcroft Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/mmzone.h | 5 ----- 1 file changed, 5 deletions(-) (limited to 'include/linux') diff --git a/include/linux/mmzone.h b/include/linux/mmzone.h index 3c49f786f90c..28f8496abcb9 100644 --- a/include/linux/mmzone.h +++ b/include/linux/mmzone.h @@ -596,11 +596,6 @@ static inline int pfn_valid(unsigned long pfn) #define pfn_to_nid early_pfn_to_nid #endif -#define pfn_to_pgdat(pfn) \ -({ \ - NODE_DATA(pfn_to_nid(pfn)); \ -}) - #define early_pfn_valid(pfn) pfn_valid(pfn) void sparse_init(void); #else -- cgit v1.2.3-71-gd317 From a94b3ab7eab4edcc9b2cb474b188f774c331adf7 Mon Sep 17 00:00:00 2001 From: Mike Kravetz Date: Fri, 6 Jan 2006 00:10:51 -0800 Subject: [PATCH] mm: remove arch independent NODES_SPAN_OTHER_NODES The NODES_SPAN_OTHER_NODES config option was created so that DISCONTIGMEM could handle pSeries numa layouts. However, support for DISCONTIGMEM has been replaced by SPARSEMEM on powerpc. As a result, this config option and supporting code is no longer needed. I have already sent a patch to Paul that removes the option from powerpc specific code. This removes the arch independent piece. Doesn't really matter which is applied first. Signed-off-by: Mike Kravetz Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/mmzone.h | 6 ------ mm/page_alloc.c | 2 -- 2 files changed, 8 deletions(-) (limited to 'include/linux') diff --git a/include/linux/mmzone.h b/include/linux/mmzone.h index 28f8496abcb9..d294b57a4016 100644 --- a/include/linux/mmzone.h +++ b/include/linux/mmzone.h @@ -603,12 +603,6 @@ void sparse_init(void); #define sparse_index_init(_sec, _nid) do {} while (0) #endif /* CONFIG_SPARSEMEM */ -#ifdef CONFIG_NODES_SPAN_OTHER_NODES -#define early_pfn_in_nid(pfn, nid) (early_pfn_to_nid(pfn) == (nid)) -#else -#define early_pfn_in_nid(pfn, nid) (1) -#endif - #ifndef early_pfn_valid #define early_pfn_valid(pfn) (1) #endif diff --git a/mm/page_alloc.c b/mm/page_alloc.c index 1e49dc7cd619..07825c637a58 100644 --- a/mm/page_alloc.c +++ b/mm/page_alloc.c @@ -1708,8 +1708,6 @@ void __devinit memmap_init_zone(unsigned long size, int nid, unsigned long zone, for (pfn = start_pfn; pfn < end_pfn; pfn++, page++) { if (!early_pfn_valid(pfn)) continue; - if (!early_pfn_in_nid(pfn, nid)) - continue; page = pfn_to_page(pfn); set_page_links(page, zone, nid, pfn); set_page_count(page, 1); -- cgit v1.2.3-71-gd317 From 03b00ebcc804180829d513df9e92e5fe8f72aacf Mon Sep 17 00:00:00 2001 From: Russell King Date: Fri, 6 Jan 2006 00:10:52 -0800 Subject: [PATCH] Shut up warnings in ipc/shm.c Fix two warnings in ipc/shm.c ipc/shm.c:122: warning: statement with no effect ipc/shm.c:560: warning: statement with no effect by converting the macros to empty inline functions. For safety, let's do all three. This also has the advantage that typechecking gets performed even without CONFIG_SHMEM enabled. Signed-off-by: Russell King Cc: Manfred Spraul Cc: Hugh Dickins Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/mm.h | 21 ++++++++++++++++++--- 1 file changed, 18 insertions(+), 3 deletions(-) (limited to 'include/linux') diff --git a/include/linux/mm.h b/include/linux/mm.h index 6c9be99429f3..75ec04e2f184 100644 --- a/include/linux/mm.h +++ b/include/linux/mm.h @@ -634,9 +634,24 @@ struct mempolicy *shmem_get_policy(struct vm_area_struct *vma, int shmem_lock(struct file *file, int lock, struct user_struct *user); #else #define shmem_nopage filemap_nopage -#define shmem_lock(a, b, c) ({0;}) /* always in memory, no need to lock */ -#define shmem_set_policy(a, b) (0) -#define shmem_get_policy(a, b) (NULL) + +static inline int shmem_lock(struct file *file, int lock, + struct user_struct *user) +{ + return 0; +} + +static inline int shmem_set_policy(struct vm_area_struct *vma, + struct mempolicy *new) +{ + return 0; +} + +static inline struct mempolicy *shmem_get_policy(struct vm_area_struct *vma, + unsigned long addr) +{ + return NULL; +} #endif struct file *shmem_file_setup(char *name, loff_t size, unsigned long flags); -- cgit v1.2.3-71-gd317 From 2bdaf115b1c364d89484b59d5b937973f1c5a5c3 Mon Sep 17 00:00:00 2001 From: Andy Whitcroft Date: Fri, 6 Jan 2006 00:10:53 -0800 Subject: [PATCH] flatmem split out memory model There are three places we define pfn_to_nid(). Two in linux/mmzone.h and one in asm/mmzone.h. These in essence represent the three memory models. The definition in linux/mmzone.h under !NEED_MULTIPLE_NODES is both the FLATMEM definition and the optimisation for single NUMA nodes; the one under SPARSEMEM is the NUMA sparsemem one; the one in asm/mmzone.h under DISCONTIGMEM is the discontigmem one. This is not in the least bit obvious, particularly the connection between the non-NUMA optimisations and the memory models. Two patches: flatmem-split-out-memory-model: simplifies the selection of pfn_to_nid() implementations. The selection is based primarily off the memory model selected. Optimisations for non-NUMA are applied where needed. sparse-provide-pfn_to_nid: implement pfn_to_nid() for SPARSEMEM This patch: pfn_to_nid is memory model specific The pfn_to_nid() call is memory model specific. It represents the locality identifier for the memory passed. Classically this would be a NUMA node, but not a chunk of memory under DISCONTIGMEM. The SPARSEMEM and FLATMEM memory model non-NUMA versions of pfn_to_nid() are folded together under NEED_MULTIPLE_NODES, while DISCONTIGMEM has its own optimisation. This is all very confusing. This patch splits out each implementation of pfn_to_nid() so that we can see them and the optimisations to each. Signed-off-by: Andy Whitcroft Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/mmzone.h | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/mmzone.h b/include/linux/mmzone.h index d294b57a4016..ee9f7b74e613 100644 --- a/include/linux/mmzone.h +++ b/include/linux/mmzone.h @@ -435,7 +435,6 @@ extern struct pglist_data contig_page_data; #define NODE_DATA(nid) (&contig_page_data) #define NODE_MEM_MAP(nid) mem_map #define MAX_NODES_SHIFT 1 -#define pfn_to_nid(pfn) (0) #else /* CONFIG_NEED_MULTIPLE_NODES */ @@ -470,6 +469,10 @@ extern struct pglist_data contig_page_data; #define early_pfn_to_nid(nid) (0UL) #endif +#ifdef CONFIG_FLATMEM +#define pfn_to_nid(pfn) (0) +#endif + #define pfn_to_section_nr(pfn) ((pfn) >> PFN_SECTION_SHIFT) #define section_nr_to_pfn(sec) ((sec) << PFN_SECTION_SHIFT) @@ -594,6 +597,8 @@ static inline int pfn_valid(unsigned long pfn) */ #ifdef CONFIG_NUMA #define pfn_to_nid early_pfn_to_nid +#else +#define pfn_to_nid(pfn) (0) #endif #define early_pfn_valid(pfn) pfn_valid(pfn) -- cgit v1.2.3-71-gd317 From 161599ff39a3c3cdea0a1be05ac53accd2c45cdd Mon Sep 17 00:00:00 2001 From: Andy Whitcroft Date: Fri, 6 Jan 2006 00:10:54 -0800 Subject: [PATCH] sparsemem: provide pfn_to_nid Before SPARSEMEM is initialised we cannot provide an efficient pfn_to_nid() implmentation; before initialisation is complete we use early_pfn_to_nid() to provide location information. Until recently there was no non-init user of this functionality. Provide a post init pfn_to_nid() implementation. Note that this implmentation assumes that the pfn passed has been validated with pfn_valid(). The current single user of this function already has this check. Signed-off-by: Andy Whitcroft Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/mmzone.h | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/mmzone.h b/include/linux/mmzone.h index ee9f7b74e613..8cba76c6a28c 100644 --- a/include/linux/mmzone.h +++ b/include/linux/mmzone.h @@ -596,7 +596,11 @@ static inline int pfn_valid(unsigned long pfn) * this restriction. */ #ifdef CONFIG_NUMA -#define pfn_to_nid early_pfn_to_nid +#define pfn_to_nid(pfn) \ +({ \ + unsigned long __pfn_to_nid_pfn = (pfn); \ + page_to_nid(pfn_to_page(__pfn_to_nid_pfn)); \ +}) #else #define pfn_to_nid(pfn) (0) #endif -- cgit v1.2.3-71-gd317 From 2d92c5c9150a2a9ca3dc25da58d5042e17a96b6a Mon Sep 17 00:00:00 2001 From: Nick Piggin Date: Fri, 6 Jan 2006 00:10:59 -0800 Subject: [PATCH] mm: remove pcp low struct per_cpu_pages.low is useless. Remove it. Signed-off-by: Nick Piggin Cc: Hugh Dickins Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/mmzone.h | 1 - mm/page_alloc.c | 9 ++------- 2 files changed, 2 insertions(+), 8 deletions(-) (limited to 'include/linux') diff --git a/include/linux/mmzone.h b/include/linux/mmzone.h index 8cba76c6a28c..0d1a5981bb94 100644 --- a/include/linux/mmzone.h +++ b/include/linux/mmzone.h @@ -46,7 +46,6 @@ struct zone_padding { struct per_cpu_pages { int count; /* number of pages in the list */ - int low; /* low watermark, refill needed */ int high; /* high watermark, emptying needed */ int batch; /* chunk size for buddy add/remove */ struct list_head list; /* the list of pages */ diff --git a/mm/page_alloc.c b/mm/page_alloc.c index 088712f2ac02..7cff958e7813 100644 --- a/mm/page_alloc.c +++ b/mm/page_alloc.c @@ -740,7 +740,7 @@ again: page = NULL; pcp = &zone_pcp(zone, get_cpu())->pcp[cold]; local_irq_save(flags); - if (pcp->count <= pcp->low) + if (!pcp->count) pcp->count += rmqueue_bulk(zone, 0, pcp->batch, &pcp->list); if (likely(pcp->count)) { @@ -1345,10 +1345,9 @@ void show_free_areas(void) pageset = zone_pcp(zone, cpu); for (temperature = 0; temperature < 2; temperature++) - printk("cpu %d %s: low %d, high %d, batch %d used:%d\n", + printk("cpu %d %s: high %d, batch %d used:%d\n", cpu, temperature ? "cold" : "hot", - pageset->pcp[temperature].low, pageset->pcp[temperature].high, pageset->pcp[temperature].batch, pageset->pcp[temperature].count); @@ -1790,14 +1789,12 @@ inline void setup_pageset(struct per_cpu_pageset *p, unsigned long batch) pcp = &p->pcp[0]; /* hot */ pcp->count = 0; - pcp->low = 0; pcp->high = 6 * batch; pcp->batch = max(1UL, 1 * batch); INIT_LIST_HEAD(&pcp->list); pcp = &p->pcp[1]; /* cold*/ pcp->count = 0; - pcp->low = 0; pcp->high = 2 * batch; pcp->batch = max(1UL, batch/2); INIT_LIST_HEAD(&pcp->list); @@ -2193,12 +2190,10 @@ static int zoneinfo_show(struct seq_file *m, void *arg) seq_printf(m, "\n cpu: %i pcp: %i" "\n count: %i" - "\n low: %i" "\n high: %i" "\n batch: %i", i, j, pageset->pcp[j].count, - pageset->pcp[j].low, pageset->pcp[j].high, pageset->pcp[j].batch); } -- cgit v1.2.3-71-gd317 From 008857c1a49ccffc31a54c3ea7e182833bd61304 Mon Sep 17 00:00:00 2001 From: Ravikiran G Thirumalai Date: Fri, 6 Jan 2006 00:11:01 -0800 Subject: [PATCH] Cleanup bootmem allocator and fix alloc_bootmem_low Patch cleans up the alloc_bootmem fix for swiotlb. Patch removes alloc_bootmem_*_limit api and fixes alloc_boot_*low api to do the right thing -- allocate from low32 memory. Signed-off-by: Ravikiran Thirumalai Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/bootmem.h | 46 ++++++++++++---------------------------------- lib/swiotlb.c | 3 +-- mm/bootmem.c | 38 +++++++++++++++++++++++++++++++------- 3 files changed, 44 insertions(+), 43 deletions(-) (limited to 'include/linux') diff --git a/include/linux/bootmem.h b/include/linux/bootmem.h index 3b03b0b868dd..993da8cc9706 100644 --- a/include/linux/bootmem.h +++ b/include/linux/bootmem.h @@ -43,50 +43,38 @@ typedef struct bootmem_data { extern unsigned long __init bootmem_bootmap_pages (unsigned long); extern unsigned long __init init_bootmem (unsigned long addr, unsigned long memend); extern void __init free_bootmem (unsigned long addr, unsigned long size); -extern void * __init __alloc_bootmem_limit (unsigned long size, unsigned long align, unsigned long goal, unsigned long limit); +extern void * __init __alloc_bootmem (unsigned long size, unsigned long align, unsigned long goal); +extern void * __init __alloc_bootmem_low(unsigned long size, + unsigned long align, + unsigned long goal); +extern void * __init __alloc_bootmem_low_node(pg_data_t *pgdat, + unsigned long size, + unsigned long align, + unsigned long goal); #ifndef CONFIG_HAVE_ARCH_BOOTMEM_NODE extern void __init reserve_bootmem (unsigned long addr, unsigned long size); #define alloc_bootmem(x) \ __alloc_bootmem((x), SMP_CACHE_BYTES, __pa(MAX_DMA_ADDRESS)) #define alloc_bootmem_low(x) \ - __alloc_bootmem((x), SMP_CACHE_BYTES, 0) + __alloc_bootmem_low((x), SMP_CACHE_BYTES, 0) #define alloc_bootmem_pages(x) \ __alloc_bootmem((x), PAGE_SIZE, __pa(MAX_DMA_ADDRESS)) #define alloc_bootmem_low_pages(x) \ - __alloc_bootmem((x), PAGE_SIZE, 0) - -#define alloc_bootmem_limit(x, limit) \ - __alloc_bootmem_limit((x), SMP_CACHE_BYTES, __pa(MAX_DMA_ADDRESS), (limit)) -#define alloc_bootmem_low_limit(x, limit) \ - __alloc_bootmem_limit((x), SMP_CACHE_BYTES, 0, (limit)) -#define alloc_bootmem_pages_limit(x, limit) \ - __alloc_bootmem_limit((x), PAGE_SIZE, __pa(MAX_DMA_ADDRESS), (limit)) -#define alloc_bootmem_low_pages_limit(x, limit) \ - __alloc_bootmem_limit((x), PAGE_SIZE, 0, (limit)) - + __alloc_bootmem_low((x), PAGE_SIZE, 0) #endif /* !CONFIG_HAVE_ARCH_BOOTMEM_NODE */ extern unsigned long __init free_all_bootmem (void); - +extern void * __init __alloc_bootmem_node (pg_data_t *pgdat, unsigned long size, unsigned long align, unsigned long goal); extern unsigned long __init init_bootmem_node (pg_data_t *pgdat, unsigned long freepfn, unsigned long startpfn, unsigned long endpfn); extern void __init reserve_bootmem_node (pg_data_t *pgdat, unsigned long physaddr, unsigned long size); extern void __init free_bootmem_node (pg_data_t *pgdat, unsigned long addr, unsigned long size); extern unsigned long __init free_all_bootmem_node (pg_data_t *pgdat); -extern void * __init __alloc_bootmem_node_limit (pg_data_t *pgdat, unsigned long size, unsigned long align, unsigned long goal, unsigned long limit); #ifndef CONFIG_HAVE_ARCH_BOOTMEM_NODE #define alloc_bootmem_node(pgdat, x) \ __alloc_bootmem_node((pgdat), (x), SMP_CACHE_BYTES, __pa(MAX_DMA_ADDRESS)) #define alloc_bootmem_pages_node(pgdat, x) \ __alloc_bootmem_node((pgdat), (x), PAGE_SIZE, __pa(MAX_DMA_ADDRESS)) #define alloc_bootmem_low_pages_node(pgdat, x) \ - __alloc_bootmem_node((pgdat), (x), PAGE_SIZE, 0) - -#define alloc_bootmem_node_limit(pgdat, x, limit) \ - __alloc_bootmem_node_limit((pgdat), (x), SMP_CACHE_BYTES, __pa(MAX_DMA_ADDRESS), (limit)) -#define alloc_bootmem_pages_node_limit(pgdat, x, limit) \ - __alloc_bootmem_node_limit((pgdat), (x), PAGE_SIZE, __pa(MAX_DMA_ADDRESS), (limit)) -#define alloc_bootmem_low_pages_node_limit(pgdat, x, limit) \ - __alloc_bootmem_node_limit((pgdat), (x), PAGE_SIZE, 0, (limit)) - + __alloc_bootmem_low_node((pgdat), (x), PAGE_SIZE, 0) #endif /* !CONFIG_HAVE_ARCH_BOOTMEM_NODE */ #ifdef CONFIG_HAVE_ARCH_ALLOC_REMAP @@ -123,15 +111,5 @@ extern void *__init alloc_large_system_hash(const char *tablename, #endif extern int __initdata hashdist; /* Distribute hashes across NUMA nodes? */ -static inline void *__alloc_bootmem (unsigned long size, unsigned long align, unsigned long goal) -{ - return __alloc_bootmem_limit(size, align, goal, 0); -} - -static inline void *__alloc_bootmem_node (pg_data_t *pgdat, unsigned long size, unsigned long align, - unsigned long goal) -{ - return __alloc_bootmem_node_limit(pgdat, size, align, goal, 0); -} #endif /* _LINUX_BOOTMEM_H */ diff --git a/lib/swiotlb.c b/lib/swiotlb.c index 1ff8dcebf7c6..3b482052f403 100644 --- a/lib/swiotlb.c +++ b/lib/swiotlb.c @@ -142,8 +142,7 @@ swiotlb_init_with_default_size (size_t default_size) /* * Get IO TLB memory from the low pages */ - io_tlb_start = alloc_bootmem_low_pages_limit(io_tlb_nslabs * - (1 << IO_TLB_SHIFT), 0x100000000); + io_tlb_start = alloc_bootmem_low_pages(io_tlb_nslabs * (1 << IO_TLB_SHIFT)); if (!io_tlb_start) panic("Cannot allocate SWIOTLB buffer"); io_tlb_end = io_tlb_start + io_tlb_nslabs * (1 << IO_TLB_SHIFT); diff --git a/mm/bootmem.c b/mm/bootmem.c index 16b9465eb4eb..cbb82ee14fb5 100644 --- a/mm/bootmem.c +++ b/mm/bootmem.c @@ -393,15 +393,14 @@ unsigned long __init free_all_bootmem (void) return(free_all_bootmem_core(NODE_DATA(0))); } -void * __init __alloc_bootmem_limit (unsigned long size, unsigned long align, unsigned long goal, - unsigned long limit) +void * __init __alloc_bootmem(unsigned long size, unsigned long align, unsigned long goal) { pg_data_t *pgdat = pgdat_list; void *ptr; for_each_pgdat(pgdat) if ((ptr = __alloc_bootmem_core(pgdat->bdata, size, - align, goal, limit))) + align, goal, 0))) return(ptr); /* @@ -413,15 +412,40 @@ void * __init __alloc_bootmem_limit (unsigned long size, unsigned long align, un } -void * __init __alloc_bootmem_node_limit (pg_data_t *pgdat, unsigned long size, unsigned long align, - unsigned long goal, unsigned long limit) +void * __init __alloc_bootmem_node(pg_data_t *pgdat, unsigned long size, unsigned long align, + unsigned long goal) { void *ptr; - ptr = __alloc_bootmem_core(pgdat->bdata, size, align, goal, limit); + ptr = __alloc_bootmem_core(pgdat->bdata, size, align, goal, 0); if (ptr) return (ptr); - return __alloc_bootmem_limit(size, align, goal, limit); + return __alloc_bootmem(size, align, goal); } +#define LOW32LIMIT 0xffffffff + +void * __init __alloc_bootmem_low(unsigned long size, unsigned long align, unsigned long goal) +{ + pg_data_t *pgdat = pgdat_list; + void *ptr; + + for_each_pgdat(pgdat) + if ((ptr = __alloc_bootmem_core(pgdat->bdata, size, + align, goal, LOW32LIMIT))) + return(ptr); + + /* + * Whoops, we cannot satisfy the allocation request. + */ + printk(KERN_ALERT "low bootmem alloc of %lu bytes failed!\n", size); + panic("Out of low memory"); + return NULL; +} + +void * __init __alloc_bootmem_low_node(pg_data_t *pgdat, unsigned long size, + unsigned long align, unsigned long goal) +{ + return __alloc_bootmem_core(pgdat->bdata, size, align, goal, LOW32LIMIT); +} -- cgit v1.2.3-71-gd317 From 7756b9e4e321c3c83c7aa5b9532d3e7fd7ddeb4a Mon Sep 17 00:00:00 2001 From: Andrew Morton Date: Fri, 6 Jan 2006 00:11:09 -0800 Subject: [PATCH] kill last zone_reclaim() bits Remove the last bits of Martin's ill-fated sys_set_zone_reclaim(). Cc: Martin Hicks Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/asm-i386/unistd.h | 2 +- include/asm-ia64/unistd.h | 2 +- include/linux/swap.h | 1 - mm/vmscan.c | 80 ----------------------------------------------- 4 files changed, 2 insertions(+), 83 deletions(-) (limited to 'include/linux') diff --git a/include/asm-i386/unistd.h b/include/asm-i386/unistd.h index 0f92e78dfea1..fe38b9a96233 100644 --- a/include/asm-i386/unistd.h +++ b/include/asm-i386/unistd.h @@ -256,7 +256,7 @@ #define __NR_io_submit 248 #define __NR_io_cancel 249 #define __NR_fadvise64 250 -#define __NR_set_zone_reclaim 251 +/* 251 is available for reuse (was briefly sys_set_zone_reclaim) */ #define __NR_exit_group 252 #define __NR_lookup_dcookie 253 #define __NR_epoll_create 254 diff --git a/include/asm-ia64/unistd.h b/include/asm-ia64/unistd.h index 6d96a67439be..2bf543493cb8 100644 --- a/include/asm-ia64/unistd.h +++ b/include/asm-ia64/unistd.h @@ -265,7 +265,7 @@ #define __NR_keyctl 1273 #define __NR_ioprio_set 1274 #define __NR_ioprio_get 1275 -#define __NR_set_zone_reclaim 1276 +/* 1276 is available for reuse (was briefly sys_set_zone_reclaim) */ #define __NR_inotify_init 1277 #define __NR_inotify_add_watch 1278 #define __NR_inotify_rm_watch 1279 diff --git a/include/linux/swap.h b/include/linux/swap.h index 508668f840b6..bd6641784107 100644 --- a/include/linux/swap.h +++ b/include/linux/swap.h @@ -172,7 +172,6 @@ extern void swap_setup(void); /* linux/mm/vmscan.c */ extern int try_to_free_pages(struct zone **, gfp_t); -extern int zone_reclaim(struct zone *, gfp_t, unsigned int); extern int shrink_all_memory(int); extern int vm_swappiness; diff --git a/mm/vmscan.c b/mm/vmscan.c index 795a050fe471..b2baca7645d7 100644 --- a/mm/vmscan.c +++ b/mm/vmscan.c @@ -74,9 +74,6 @@ struct scan_control { int may_writepage; - /* Can pages be swapped as part of reclaim? */ - int may_swap; - /* This context's SWAP_CLUSTER_MAX. If freeing memory for * suspend, we effectively ignore SWAP_CLUSTER_MAX. * In this context, it doesn't matter that we scan the @@ -430,8 +427,6 @@ static int shrink_list(struct list_head *page_list, struct scan_control *sc) * Try to allocate it some swap space here. */ if (PageAnon(page) && !PageSwapCache(page)) { - if (!sc->may_swap) - goto keep_locked; if (!add_to_swap(page)) goto activate_locked; } @@ -952,7 +947,6 @@ int try_to_free_pages(struct zone **zones, gfp_t gfp_mask) sc.gfp_mask = gfp_mask; sc.may_writepage = 0; - sc.may_swap = 1; inc_page_state(allocstall); @@ -1055,7 +1049,6 @@ loop_again: total_reclaimed = 0; sc.gfp_mask = GFP_KERNEL; sc.may_writepage = 0; - sc.may_swap = 1; sc.nr_mapped = read_page_state(nr_mapped); inc_page_state(pageoutrun); @@ -1353,76 +1346,3 @@ static int __init kswapd_init(void) } module_init(kswapd_init) - - -/* - * Try to free up some pages from this zone through reclaim. - */ -int zone_reclaim(struct zone *zone, gfp_t gfp_mask, unsigned int order) -{ - struct scan_control sc; - int nr_pages = 1 << order; - int total_reclaimed = 0; - - /* The reclaim may sleep, so don't do it if sleep isn't allowed */ - if (!(gfp_mask & __GFP_WAIT)) - return 0; - if (zone->all_unreclaimable) - return 0; - - sc.gfp_mask = gfp_mask; - sc.may_writepage = 0; - sc.may_swap = 0; - sc.nr_mapped = read_page_state(nr_mapped); - sc.nr_scanned = 0; - sc.nr_reclaimed = 0; - /* scan at the highest priority */ - sc.priority = 0; - disable_swap_token(); - - if (nr_pages > SWAP_CLUSTER_MAX) - sc.swap_cluster_max = nr_pages; - else - sc.swap_cluster_max = SWAP_CLUSTER_MAX; - - /* Don't reclaim the zone if there are other reclaimers active */ - if (atomic_read(&zone->reclaim_in_progress) > 0) - goto out; - - shrink_zone(zone, &sc); - total_reclaimed = sc.nr_reclaimed; - - out: - return total_reclaimed; -} - -asmlinkage long sys_set_zone_reclaim(unsigned int node, unsigned int zone, - unsigned int state) -{ - struct zone *z; - int i; - - if (!capable(CAP_SYS_ADMIN)) - return -EACCES; - - if (node >= MAX_NUMNODES || !node_online(node)) - return -EINVAL; - - /* This will break if we ever add more zones */ - if (!(zone & (1<node_zones[i]; - - if (state) - z->reclaim_pages = 1; - else - z->reclaim_pages = 0; - } - - return 0; -} -- cgit v1.2.3-71-gd317 From 9328b8faae922e52073785ed6c1eaa8565648a0e Mon Sep 17 00:00:00 2001 From: Nick Piggin Date: Fri, 6 Jan 2006 00:11:10 -0800 Subject: [PATCH] mm: dma32 zone statistics Add dma32 to zone statistics. Also attempt to arrange struct page_state a bit better (visually). Signed-off-by: Nick Piggin Cc: Andi Kleen Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/mmzone.h | 11 +++++++++++ include/linux/page-flags.h | 38 ++++++++++++++++++++++++-------------- mm/page_alloc.c | 14 +++++++++++--- 3 files changed, 46 insertions(+), 17 deletions(-) (limited to 'include/linux') diff --git a/include/linux/mmzone.h b/include/linux/mmzone.h index 0d1a5981bb94..8d6caa414c4c 100644 --- a/include/linux/mmzone.h +++ b/include/linux/mmzone.h @@ -397,6 +397,7 @@ static inline int is_normal_idx(int idx) { return (idx == ZONE_NORMAL); } + /** * is_highmem - helper function to quickly check if a struct zone is a * highmem zone or not. This is an attempt to keep references @@ -413,6 +414,16 @@ static inline int is_normal(struct zone *zone) return zone == zone->zone_pgdat->node_zones + ZONE_NORMAL; } +static inline int is_dma32(struct zone *zone) +{ + return zone == zone->zone_pgdat->node_zones + ZONE_DMA32; +} + +static inline int is_dma(struct zone *zone) +{ + return zone == zone->zone_pgdat->node_zones + ZONE_DMA; +} + /* These two functions are used to setup the per zone pages min values */ struct ctl_table; struct file; diff --git a/include/linux/page-flags.h b/include/linux/page-flags.h index 343083fec258..32d09c8d952b 100644 --- a/include/linux/page-flags.h +++ b/include/linux/page-flags.h @@ -97,32 +97,40 @@ struct page_state { unsigned long pgpgout; /* Disk writes */ unsigned long pswpin; /* swap reads */ unsigned long pswpout; /* swap writes */ - unsigned long pgalloc_high; /* page allocations */ + unsigned long pgalloc_high; /* page allocations */ unsigned long pgalloc_normal; + unsigned long pgalloc_dma32; unsigned long pgalloc_dma; + unsigned long pgfree; /* page freeings */ unsigned long pgactivate; /* pages moved inactive->active */ unsigned long pgdeactivate; /* pages moved active->inactive */ unsigned long pgfault; /* faults (major+minor) */ unsigned long pgmajfault; /* faults (major only) */ + unsigned long pgrefill_high; /* inspected in refill_inactive_zone */ unsigned long pgrefill_normal; + unsigned long pgrefill_dma32; unsigned long pgrefill_dma; unsigned long pgsteal_high; /* total highmem pages reclaimed */ unsigned long pgsteal_normal; + unsigned long pgsteal_dma32; unsigned long pgsteal_dma; + unsigned long pgscan_kswapd_high;/* total highmem pages scanned */ unsigned long pgscan_kswapd_normal; - + unsigned long pgscan_kswapd_dma32; unsigned long pgscan_kswapd_dma; + unsigned long pgscan_direct_high;/* total highmem pages scanned */ unsigned long pgscan_direct_normal; + unsigned long pgscan_direct_dma32; unsigned long pgscan_direct_dma; - unsigned long pginodesteal; /* pages reclaimed via inode freeing */ + unsigned long pginodesteal; /* pages reclaimed via inode freeing */ unsigned long slabs_scanned; /* slab objects scanned */ unsigned long kswapd_steal; /* pages reclaimed by kswapd */ unsigned long kswapd_inodesteal;/* reclaimed via kswapd inode freeing */ @@ -150,17 +158,19 @@ extern void __mod_page_state(unsigned long offset, unsigned long delta); #define add_page_state(member,delta) mod_page_state(member, (delta)) #define sub_page_state(member,delta) mod_page_state(member, 0UL - (delta)) -#define mod_page_state_zone(zone, member, delta) \ - do { \ - unsigned offset; \ - if (is_highmem(zone)) \ - offset = offsetof(struct page_state, member##_high); \ - else if (is_normal(zone)) \ - offset = offsetof(struct page_state, member##_normal); \ - else \ - offset = offsetof(struct page_state, member##_dma); \ - __mod_page_state(offset, (delta)); \ - } while (0) +#define mod_page_state_zone(zone, member, delta) \ + do { \ + unsigned offset; \ + if (is_highmem(zone)) \ + offset = offsetof(struct page_state, member##_high); \ + else if (is_normal(zone)) \ + offset = offsetof(struct page_state, member##_normal); \ + else if (is_dma32(zone)) \ + offset = offsetof(struct page_state, member##_dma32); \ + else \ + offset = offsetof(struct page_state, member##_dma); \ + __mod_page_state(offset, (delta)); \ + } while (0) /* * Manipulation of page state flags diff --git a/mm/page_alloc.c b/mm/page_alloc.c index cdad3249cf7f..e12154d9c4ed 100644 --- a/mm/page_alloc.c +++ b/mm/page_alloc.c @@ -2277,32 +2277,40 @@ static char *vmstat_text[] = { "pgpgout", "pswpin", "pswpout", - "pgalloc_high", + "pgalloc_high", "pgalloc_normal", + "pgalloc_dma32", "pgalloc_dma", + "pgfree", "pgactivate", "pgdeactivate", "pgfault", "pgmajfault", + "pgrefill_high", "pgrefill_normal", + "pgrefill_dma32", "pgrefill_dma", "pgsteal_high", "pgsteal_normal", + "pgsteal_dma32", "pgsteal_dma", + "pgscan_kswapd_high", "pgscan_kswapd_normal", - + "pgscan_kswapd_dma32", "pgscan_kswapd_dma", + "pgscan_direct_high", "pgscan_direct_normal", + "pgscan_direct_dma32", "pgscan_direct_dma", - "pginodesteal", + "pginodesteal", "slabs_scanned", "kswapd_steal", "kswapd_inodesteal", -- cgit v1.2.3-71-gd317 From 9617d95e6e9ffd883cf90a89724fe60d7ab22f9a Mon Sep 17 00:00:00 2001 From: Nick Piggin Date: Fri, 6 Jan 2006 00:11:12 -0800 Subject: [PATCH] mm: rmap optimisation Optimise rmap functions by minimising atomic operations when we know there will be no concurrent modifications. Signed-off-by: Nick Piggin Cc: Hugh Dickins Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- fs/exec.c | 2 +- include/linux/rmap.h | 1 + mm/memory.c | 6 +++--- mm/rmap.c | 49 ++++++++++++++++++++++++++++++++++++++----------- 4 files changed, 43 insertions(+), 15 deletions(-) (limited to 'include/linux') diff --git a/fs/exec.c b/fs/exec.c index 22533cce0611..e75a9548da8e 100644 --- a/fs/exec.c +++ b/fs/exec.c @@ -324,7 +324,7 @@ void install_arg_page(struct vm_area_struct *vma, lru_cache_add_active(page); set_pte_at(mm, address, pte, pte_mkdirty(pte_mkwrite(mk_pte( page, vma->vm_page_prot)))); - page_add_anon_rmap(page, vma, address); + page_add_new_anon_rmap(page, vma, address); pte_unmap_unlock(pte, ptl); /* no need for flush_tlb */ diff --git a/include/linux/rmap.h b/include/linux/rmap.h index 33261f1d2239..9d6fbeef2104 100644 --- a/include/linux/rmap.h +++ b/include/linux/rmap.h @@ -71,6 +71,7 @@ void __anon_vma_link(struct vm_area_struct *); * rmap interfaces called when adding or removing pte of page */ void page_add_anon_rmap(struct page *, struct vm_area_struct *, unsigned long); +void page_add_new_anon_rmap(struct page *, struct vm_area_struct *, unsigned long); void page_add_file_rmap(struct page *); void page_remove_rmap(struct page *); diff --git a/mm/memory.c b/mm/memory.c index e249088908c4..d7ca7de10f4d 100644 --- a/mm/memory.c +++ b/mm/memory.c @@ -1498,7 +1498,7 @@ gotten: update_mmu_cache(vma, address, entry); lazy_mmu_prot_update(entry); lru_cache_add_active(new_page); - page_add_anon_rmap(new_page, vma, address); + page_add_new_anon_rmap(new_page, vma, address); /* Free the old page.. */ new_page = old_page; @@ -1978,7 +1978,7 @@ static int do_anonymous_page(struct mm_struct *mm, struct vm_area_struct *vma, inc_mm_counter(mm, anon_rss); lru_cache_add_active(page); SetPageReferenced(page); - page_add_anon_rmap(page, vma, address); + page_add_new_anon_rmap(page, vma, address); } else { /* Map the ZERO_PAGE - vm_page_prot is readonly */ page = ZERO_PAGE(address); @@ -2109,7 +2109,7 @@ retry: if (anon) { inc_mm_counter(mm, anon_rss); lru_cache_add_active(new_page); - page_add_anon_rmap(new_page, vma, address); + page_add_new_anon_rmap(new_page, vma, address); } else { inc_mm_counter(mm, file_rss); page_add_file_rmap(new_page); diff --git a/mm/rmap.c b/mm/rmap.c index f853c6def159..4107f64ff749 100644 --- a/mm/rmap.c +++ b/mm/rmap.c @@ -434,6 +434,26 @@ int page_referenced(struct page *page, int is_locked) return referenced; } +/** + * page_set_anon_rmap - setup new anonymous rmap + * @page: the page to add the mapping to + * @vma: the vm area in which the mapping is added + * @address: the user virtual address mapped + */ +static void __page_set_anon_rmap(struct page *page, + struct vm_area_struct *vma, unsigned long address) +{ + struct anon_vma *anon_vma = vma->anon_vma; + + BUG_ON(!anon_vma); + anon_vma = (void *) anon_vma + PAGE_MAPPING_ANON; + page->mapping = (struct address_space *) anon_vma; + + page->index = linear_page_index(vma, address); + + inc_page_state(nr_mapped); +} + /** * page_add_anon_rmap - add pte mapping to an anonymous page * @page: the page to add the mapping to @@ -445,20 +465,27 @@ int page_referenced(struct page *page, int is_locked) void page_add_anon_rmap(struct page *page, struct vm_area_struct *vma, unsigned long address) { - if (atomic_inc_and_test(&page->_mapcount)) { - struct anon_vma *anon_vma = vma->anon_vma; - - BUG_ON(!anon_vma); - anon_vma = (void *) anon_vma + PAGE_MAPPING_ANON; - page->mapping = (struct address_space *) anon_vma; - - page->index = linear_page_index(vma, address); - - inc_page_state(nr_mapped); - } + if (atomic_inc_and_test(&page->_mapcount)) + __page_set_anon_rmap(page, vma, address); /* else checking page index and mapping is racy */ } +/* + * page_add_new_anon_rmap - add pte mapping to a new anonymous page + * @page: the page to add the mapping to + * @vma: the vm area in which the mapping is added + * @address: the user virtual address mapped + * + * Same as page_add_anon_rmap but must only be called on *new* pages. + * This means the inc-and-test can be bypassed. + */ +void page_add_new_anon_rmap(struct page *page, + struct vm_area_struct *vma, unsigned long address) +{ + atomic_set(&page->_mapcount, 0); /* elevate count by 1 (starts at -1) */ + __page_set_anon_rmap(page, vma, address); +} + /** * page_add_file_rmap - add pte mapping to a file page * @page: the page to add the mapping to -- cgit v1.2.3-71-gd317 From f3fe65122da05e1cd4c9140340d96ea2f95d0c49 Mon Sep 17 00:00:00 2001 From: Con Kolivas Date: Fri, 6 Jan 2006 00:11:15 -0800 Subject: [PATCH] mm: add populated_zone() helper There are numerous places we check whether a zone is populated or not. Provide a helper function to check for populated zones and convert all checks for zone->present_pages. Signed-off-by: Con Kolivas Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/mmzone.h | 5 +++++ mm/page_alloc.c | 8 ++++---- mm/vmscan.c | 8 ++++---- 3 files changed, 13 insertions(+), 8 deletions(-) (limited to 'include/linux') diff --git a/include/linux/mmzone.h b/include/linux/mmzone.h index 8d6caa414c4c..c34f4a2c62f8 100644 --- a/include/linux/mmzone.h +++ b/include/linux/mmzone.h @@ -388,6 +388,11 @@ static inline struct zone *next_zone(struct zone *zone) #define for_each_zone(zone) \ for (zone = pgdat_list->node_zones; zone; zone = next_zone(zone)) +static inline int populated_zone(struct zone *zone) +{ + return (!!zone->present_pages); +} + static inline int is_highmem_idx(int idx) { return (idx == ZONE_HIGHMEM); diff --git a/mm/page_alloc.c b/mm/page_alloc.c index b9fd2c238f13..8f3de5af92dd 100644 --- a/mm/page_alloc.c +++ b/mm/page_alloc.c @@ -1358,7 +1358,7 @@ void show_free_areas(void) show_node(zone); printk("%s per-cpu:", zone->name); - if (!zone->present_pages) { + if (!populated_zone(zone)) { printk(" empty\n"); continue; } else @@ -1435,7 +1435,7 @@ void show_free_areas(void) show_node(zone); printk("%s: ", zone->name); - if (!zone->present_pages) { + if (!populated_zone(zone)) { printk("empty\n"); continue; } @@ -2134,7 +2134,7 @@ static int frag_show(struct seq_file *m, void *arg) int order; for (zone = node_zones; zone - node_zones < MAX_NR_ZONES; ++zone) { - if (!zone->present_pages) + if (!populated_zone(zone)) continue; spin_lock_irqsave(&zone->lock, flags); @@ -2167,7 +2167,7 @@ static int zoneinfo_show(struct seq_file *m, void *arg) for (zone = node_zones; zone - node_zones < MAX_NR_ZONES; zone++) { int i; - if (!zone->present_pages) + if (!populated_zone(zone)) continue; spin_lock_irqsave(&zone->lock, flags); diff --git a/mm/vmscan.c b/mm/vmscan.c index 5c8a412b43f4..7681d8ee04fe 100644 --- a/mm/vmscan.c +++ b/mm/vmscan.c @@ -897,7 +897,7 @@ shrink_caches(struct zone **zones, struct scan_control *sc) for (i = 0; zones[i] != NULL; i++) { struct zone *zone = zones[i]; - if (zone->present_pages == 0) + if (!populated_zone(zone)) continue; if (!cpuset_zone_allowed(zone, __GFP_HARDWALL)) @@ -1069,7 +1069,7 @@ loop_again: for (i = pgdat->nr_zones - 1; i >= 0; i--) { struct zone *zone = pgdat->node_zones + i; - if (zone->present_pages == 0) + if (!populated_zone(zone)) continue; if (zone->all_unreclaimable && @@ -1106,7 +1106,7 @@ scan: struct zone *zone = pgdat->node_zones + i; int nr_slab; - if (zone->present_pages == 0) + if (!populated_zone(zone)) continue; if (zone->all_unreclaimable && priority != DEF_PRIORITY) @@ -1258,7 +1258,7 @@ void wakeup_kswapd(struct zone *zone, int order) { pg_data_t *pgdat; - if (zone->present_pages == 0) + if (!populated_zone(zone)) return; pgdat = zone->zone_pgdat; -- cgit v1.2.3-71-gd317 From 4be38e351c5f455f6f490f5aff29053e33ab4f99 Mon Sep 17 00:00:00 2001 From: Christoph Lameter Date: Fri, 6 Jan 2006 00:11:17 -0800 Subject: [PATCH] mm: move determination of policy_zone into page allocator Currently the function to build a zonelist for a BIND policy has the side effect to set the policy_zone. This seems to be a bit strange. policy zone seems to not be initialized elsewhere and therefore 0. Do we police ZONE_DMA if no bind policy has been used yet? This patch moves the determination of the zone to apply policies to into the page allocator. We determine the zone while building the zonelist for nodes. Signed-off-by: Christoph Lameter Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/mempolicy.h | 11 +++++++++++ mm/mempolicy.c | 15 +++------------ mm/page_alloc.c | 2 ++ 3 files changed, 16 insertions(+), 12 deletions(-) (limited to 'include/linux') diff --git a/include/linux/mempolicy.h b/include/linux/mempolicy.h index b972f985a3c5..ed00b278cb93 100644 --- a/include/linux/mempolicy.h +++ b/include/linux/mempolicy.h @@ -151,6 +151,14 @@ extern struct mempolicy default_policy; extern struct zonelist *huge_zonelist(struct vm_area_struct *vma, unsigned long addr); +extern int policy_zone; + +static inline void check_highest_zone(int k) +{ + if (k > policy_zone) + policy_zone = k; +} + #else struct mempolicy {}; @@ -221,6 +229,9 @@ static inline struct zonelist *huge_zonelist(struct vm_area_struct *vma, return NODE_DATA(0)->node_zonelists + gfp_zone(GFP_HIGHUSER); } +static inline void check_highest_zone(int k) +{ +} #endif /* CONFIG_NUMA */ #endif /* __KERNEL__ */ diff --git a/mm/mempolicy.c b/mm/mempolicy.c index 96714e2646ad..0f1d2b8a952b 100644 --- a/mm/mempolicy.c +++ b/mm/mempolicy.c @@ -93,7 +93,7 @@ static kmem_cache_t *sn_cache; /* Highest zone. An specific allocation for a zone below that is not policied. */ -static int policy_zone; +int policy_zone = ZONE_DMA; struct mempolicy default_policy = { .refcnt = ATOMIC_INIT(1), /* never free it */ @@ -131,17 +131,8 @@ static struct zonelist *bind_zonelist(nodemask_t *nodes) if (!zl) return NULL; num = 0; - for_each_node_mask(nd, *nodes) { - int k; - for (k = MAX_NR_ZONES-1; k >= 0; k--) { - struct zone *z = &NODE_DATA(nd)->node_zones[k]; - if (!z->present_pages) - continue; - zl->zones[num++] = z; - if (k > policy_zone) - policy_zone = k; - } - } + for_each_node_mask(nd, *nodes) + zl->zones[num++] = &NODE_DATA(nd)->node_zones[policy_zone]; zl->zones[num] = NULL; return zl; } diff --git a/mm/page_alloc.c b/mm/page_alloc.c index 7adc9526d329..512e3f4d4963 100644 --- a/mm/page_alloc.c +++ b/mm/page_alloc.c @@ -36,6 +36,7 @@ #include #include #include +#include #include #include "internal.h" @@ -1470,6 +1471,7 @@ static int __init build_zonelists_node(pg_data_t *pgdat, BUG_ON(zone - pgdat->node_zones > ZONE_NORMAL); #endif zonelist->zones[j++] = zone; + check_highest_zone(k); } } return j; -- cgit v1.2.3-71-gd317 From d3cb487149bd706aa6aeb02042332a450978dc1c Mon Sep 17 00:00:00 2001 From: Christoph Lameter Date: Fri, 6 Jan 2006 00:11:20 -0800 Subject: [PATCH] atomic_long_t & include/asm-generic/atomic.h V2 Several counters already have the need to use 64 atomic variables on 64 bit platforms (see mm_counter_t in sched.h). We have to do ugly ifdefs to fall back to 32 bit atomic on 32 bit platforms. The VM statistics patch that I am working on will also make more extensive use of atomic64. This patch introduces a new type atomic_long_t by providing definitions in asm-generic/atomic.h that works similar to the c "long" type. Its 32 bits on 32 bit platforms and 64 bits on 64 bit platforms. Also cleans up the determination of the mm_counter_t in sched.h. Signed-off-by: Christoph Lameter Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/asm-alpha/atomic.h | 1 + include/asm-arm/atomic.h | 1 + include/asm-arm26/atomic.h | 1 + include/asm-cris/atomic.h | 1 + include/asm-frv/atomic.h | 1 + include/asm-generic/atomic.h | 116 +++++++++++++++++++++++++++++++++++++++++ include/asm-h8300/atomic.h | 1 + include/asm-i386/atomic.h | 1 + include/asm-ia64/atomic.h | 1 + include/asm-m32r/atomic.h | 1 + include/asm-m68k/atomic.h | 1 + include/asm-m68knommu/atomic.h | 1 + include/asm-mips/atomic.h | 1 + include/asm-parisc/atomic.h | 1 + include/asm-powerpc/atomic.h | 1 + include/asm-s390/atomic.h | 1 + include/asm-sh/atomic.h | 1 + include/asm-sh64/atomic.h | 1 + include/asm-sparc/atomic.h | 1 + include/asm-sparc64/atomic.h | 1 + include/asm-v850/atomic.h | 1 + include/asm-x86_64/atomic.h | 1 + include/asm-xtensa/atomic.h | 1 + include/linux/sched.h | 25 +++------ 24 files changed, 144 insertions(+), 19 deletions(-) create mode 100644 include/asm-generic/atomic.h (limited to 'include/linux') diff --git a/include/asm-alpha/atomic.h b/include/asm-alpha/atomic.h index 6183eab006d4..cb03bbe92cdf 100644 --- a/include/asm-alpha/atomic.h +++ b/include/asm-alpha/atomic.h @@ -216,4 +216,5 @@ static __inline__ long atomic64_sub_return(long i, atomic64_t * v) #define smp_mb__before_atomic_inc() smp_mb() #define smp_mb__after_atomic_inc() smp_mb() +#include #endif /* _ALPHA_ATOMIC_H */ diff --git a/include/asm-arm/atomic.h b/include/asm-arm/atomic.h index d586f65c8228..f72b63309bc5 100644 --- a/include/asm-arm/atomic.h +++ b/include/asm-arm/atomic.h @@ -205,5 +205,6 @@ static inline int atomic_add_unless(atomic_t *v, int a, int u) #define smp_mb__before_atomic_inc() barrier() #define smp_mb__after_atomic_inc() barrier() +#include #endif #endif diff --git a/include/asm-arm26/atomic.h b/include/asm-arm26/atomic.h index a47cadc59686..3074b0e76343 100644 --- a/include/asm-arm26/atomic.h +++ b/include/asm-arm26/atomic.h @@ -118,5 +118,6 @@ static inline void atomic_clear_mask(unsigned long mask, unsigned long *addr) #define smp_mb__before_atomic_inc() barrier() #define smp_mb__after_atomic_inc() barrier() +#include #endif #endif diff --git a/include/asm-cris/atomic.h b/include/asm-cris/atomic.h index 683b05a57d88..2df2c7aa19b7 100644 --- a/include/asm-cris/atomic.h +++ b/include/asm-cris/atomic.h @@ -156,4 +156,5 @@ static inline int atomic_add_unless(atomic_t *v, int a, int u) #define smp_mb__before_atomic_inc() barrier() #define smp_mb__after_atomic_inc() barrier() +#include #endif diff --git a/include/asm-frv/atomic.h b/include/asm-frv/atomic.h index f6539ff569c5..3f54fea2b051 100644 --- a/include/asm-frv/atomic.h +++ b/include/asm-frv/atomic.h @@ -426,4 +426,5 @@ extern uint32_t __cmpxchg_32(uint32_t *v, uint32_t test, uint32_t new); }) #define atomic_inc_not_zero(v) atomic_add_unless((v), 1, 0) +#include #endif /* _ASM_ATOMIC_H */ diff --git a/include/asm-generic/atomic.h b/include/asm-generic/atomic.h new file mode 100644 index 000000000000..e0a28b925ef0 --- /dev/null +++ b/include/asm-generic/atomic.h @@ -0,0 +1,116 @@ +#ifndef _ASM_GENERIC_ATOMIC_H +#define _ASM_GENERIC_ATOMIC_H +/* + * Copyright (C) 2005 Silicon Graphics, Inc. + * Christoph Lameter + * + * Allows to provide arch independent atomic definitions without the need to + * edit all arch specific atomic.h files. + */ + + +/* + * Suppport for atomic_long_t + * + * Casts for parameters are avoided for existing atomic functions in order to + * avoid issues with cast-as-lval under gcc 4.x and other limitations that the + * macros of a platform may have. + */ + +#if BITS_PER_LONG == 64 + +typedef atomic64_t atomic_long_t; + +#define ATOMIC_LONG_INIT(i) ATOMIC64_INIT(i) + +static inline long atomic_long_read(atomic_long_t *l) +{ + atomic64_t *v = (atomic64_t *)l; + + return (long)atomic64_read(v); +} + +static inline void atomic_long_set(atomic_long_t *l, long i) +{ + atomic64_t *v = (atomic64_t *)l; + + atomic_set(v, i); +} + +static inline void atomic_long_inc(atomic_long_t *l) +{ + atomic64_t *v = (atomic64_t *)l; + + atomic64_inc(v); +} + +static inline void atomic_long_dec(atomic_long_t *l) +{ + atomic64_t *v = (atomic64_t *)l; + + atomic64_dec(v); +} + +static inline void atomic_long_add(long i, atomic_long_t *l) +{ + atomic64_t *v = (atomic64_t *)l; + + atomic64_add(i, v); +} + +static inline void atomic_long_sub(long i, atomic_long_t *l) +{ + atomic64_t *v = (atomic64_t *)l; + + atomic64_sub(i, v); +} + +#else + +typedef atomic_t atomic_long_t; + +#define ATOMIC_LONG_INIT(i) ATOMIC_INIT(i) +static inline long atomic_long_read(atomic_long_t *l) +{ + atomic_t *v = (atomic_t *)l; + + return (long)atomic_read(v); +} + +static inline void atomic_long_set(atomic_long_t *l, long i) +{ + atomic_t *v = (atomic_t *)l; + + atomic_set(v, i); +} + +static inline void atomic_long_inc(atomic_long_t *l) +{ + atomic_t *v = (atomic_t *)l; + + atomic_inc(v); +} + +static inline void atomic_long_dec(atomic_long_t *l) +{ + atomic_t *v = (atomic_t *)l; + + atomic_dec(v); +} + +static inline void atomic_long_add(long i, atomic_long_t *l) +{ + atomic_t *v = (atomic_t *)l; + + atomic_add(i, v); +} + +static inline void atomic_long_sub(long i, atomic_long_t *l) +{ + atomic_t *v = (atomic_t *)l; + + atomic_sub(i, v); +} + +#endif +#endif diff --git a/include/asm-h8300/atomic.h b/include/asm-h8300/atomic.h index f23d86819ea8..d891541e89c3 100644 --- a/include/asm-h8300/atomic.h +++ b/include/asm-h8300/atomic.h @@ -137,4 +137,5 @@ static __inline__ void atomic_set_mask(unsigned long mask, unsigned long *v) #define smp_mb__before_atomic_inc() barrier() #define smp_mb__after_atomic_inc() barrier() +#include #endif /* __ARCH_H8300_ATOMIC __ */ diff --git a/include/asm-i386/atomic.h b/include/asm-i386/atomic.h index c68557aa04b2..7a5472d77091 100644 --- a/include/asm-i386/atomic.h +++ b/include/asm-i386/atomic.h @@ -254,4 +254,5 @@ __asm__ __volatile__(LOCK "orl %0,%1" \ #define smp_mb__before_atomic_inc() barrier() #define smp_mb__after_atomic_inc() barrier() +#include #endif diff --git a/include/asm-ia64/atomic.h b/include/asm-ia64/atomic.h index 2fbebf85c31d..15cf7984c48e 100644 --- a/include/asm-ia64/atomic.h +++ b/include/asm-ia64/atomic.h @@ -192,4 +192,5 @@ atomic64_add_negative (__s64 i, atomic64_t *v) #define smp_mb__before_atomic_inc() barrier() #define smp_mb__after_atomic_inc() barrier() +#include #endif /* _ASM_IA64_ATOMIC_H */ diff --git a/include/asm-m32r/atomic.h b/include/asm-m32r/atomic.h index ef1fb8ea4726..70761278b6cb 100644 --- a/include/asm-m32r/atomic.h +++ b/include/asm-m32r/atomic.h @@ -313,4 +313,5 @@ static __inline__ void atomic_set_mask(unsigned long mask, atomic_t *addr) #define smp_mb__before_atomic_inc() barrier() #define smp_mb__after_atomic_inc() barrier() +#include #endif /* _ASM_M32R_ATOMIC_H */ diff --git a/include/asm-m68k/atomic.h b/include/asm-m68k/atomic.h index e3c962eeabf3..b8a4e75d679d 100644 --- a/include/asm-m68k/atomic.h +++ b/include/asm-m68k/atomic.h @@ -157,4 +157,5 @@ static inline void atomic_set_mask(unsigned long mask, unsigned long *v) #define smp_mb__before_atomic_inc() barrier() #define smp_mb__after_atomic_inc() barrier() +#include #endif /* __ARCH_M68K_ATOMIC __ */ diff --git a/include/asm-m68knommu/atomic.h b/include/asm-m68knommu/atomic.h index 3c1cc153c415..1702dbe9318c 100644 --- a/include/asm-m68knommu/atomic.h +++ b/include/asm-m68knommu/atomic.h @@ -143,4 +143,5 @@ static inline int atomic_sub_return(int i, atomic_t * v) #define atomic_dec_return(v) atomic_sub_return(1,(v)) #define atomic_inc_return(v) atomic_add_return(1,(v)) +#include #endif /* __ARCH_M68KNOMMU_ATOMIC __ */ diff --git a/include/asm-mips/atomic.h b/include/asm-mips/atomic.h index 55c37c106ef0..92256e43a938 100644 --- a/include/asm-mips/atomic.h +++ b/include/asm-mips/atomic.h @@ -713,4 +713,5 @@ static __inline__ long atomic64_sub_if_positive(long i, atomic64_t * v) #define smp_mb__before_atomic_inc() smp_mb() #define smp_mb__after_atomic_inc() smp_mb() +#include #endif /* _ASM_ATOMIC_H */ diff --git a/include/asm-parisc/atomic.h b/include/asm-parisc/atomic.h index 983e9a2b6042..64ebd086c40d 100644 --- a/include/asm-parisc/atomic.h +++ b/include/asm-parisc/atomic.h @@ -216,4 +216,5 @@ static __inline__ int atomic_read(const atomic_t *v) #define smp_mb__before_atomic_inc() smp_mb() #define smp_mb__after_atomic_inc() smp_mb() +#include #endif diff --git a/include/asm-powerpc/atomic.h b/include/asm-powerpc/atomic.h index ec4b14468959..ae395a0632a6 100644 --- a/include/asm-powerpc/atomic.h +++ b/include/asm-powerpc/atomic.h @@ -402,5 +402,6 @@ static __inline__ long atomic64_dec_if_positive(atomic64_t *v) #endif /* __powerpc64__ */ +#include #endif /* __KERNEL__ */ #endif /* _ASM_POWERPC_ATOMIC_H_ */ diff --git a/include/asm-s390/atomic.h b/include/asm-s390/atomic.h index b3bd4f679f72..6d07c7df4b40 100644 --- a/include/asm-s390/atomic.h +++ b/include/asm-s390/atomic.h @@ -215,5 +215,6 @@ atomic_compare_and_swap(int expected_oldval,int new_val,atomic_t *v) #define smp_mb__before_atomic_inc() smp_mb() #define smp_mb__after_atomic_inc() smp_mb() +#include #endif /* __KERNEL__ */ #endif /* __ARCH_S390_ATOMIC__ */ diff --git a/include/asm-sh/atomic.h b/include/asm-sh/atomic.h index aabfd334462c..618d8e0de348 100644 --- a/include/asm-sh/atomic.h +++ b/include/asm-sh/atomic.h @@ -140,4 +140,5 @@ static __inline__ void atomic_set_mask(unsigned int mask, atomic_t *v) #define smp_mb__before_atomic_inc() barrier() #define smp_mb__after_atomic_inc() barrier() +#include #endif /* __ASM_SH_ATOMIC_H */ diff --git a/include/asm-sh64/atomic.h b/include/asm-sh64/atomic.h index 927a2bc27b30..f3ce5c0df13a 100644 --- a/include/asm-sh64/atomic.h +++ b/include/asm-sh64/atomic.h @@ -152,4 +152,5 @@ static __inline__ void atomic_set_mask(unsigned int mask, atomic_t *v) #define smp_mb__before_atomic_inc() barrier() #define smp_mb__after_atomic_inc() barrier() +#include #endif /* __ASM_SH64_ATOMIC_H */ diff --git a/include/asm-sparc/atomic.h b/include/asm-sparc/atomic.h index 62bec7ad271c..accb4967e9d2 100644 --- a/include/asm-sparc/atomic.h +++ b/include/asm-sparc/atomic.h @@ -159,4 +159,5 @@ static inline int __atomic24_sub(int i, atomic24_t *v) #endif /* !(__KERNEL__) */ +#include #endif /* !(__ARCH_SPARC_ATOMIC__) */ diff --git a/include/asm-sparc64/atomic.h b/include/asm-sparc64/atomic.h index 3789fe315992..11f5aa5d108c 100644 --- a/include/asm-sparc64/atomic.h +++ b/include/asm-sparc64/atomic.h @@ -96,4 +96,5 @@ extern int atomic64_sub_ret(int, atomic64_t *); #define smp_mb__after_atomic_inc() barrier() #endif +#include #endif /* !(__ARCH_SPARC64_ATOMIC__) */ diff --git a/include/asm-v850/atomic.h b/include/asm-v850/atomic.h index bede3172ce7f..f5b9ab6f4e70 100644 --- a/include/asm-v850/atomic.h +++ b/include/asm-v850/atomic.h @@ -126,4 +126,5 @@ static inline int atomic_add_unless(atomic_t *v, int a, int u) #define smp_mb__before_atomic_inc() barrier() #define smp_mb__after_atomic_inc() barrier() +#include #endif /* __V850_ATOMIC_H__ */ diff --git a/include/asm-x86_64/atomic.h b/include/asm-x86_64/atomic.h index 50db9f39274f..72eb071488c7 100644 --- a/include/asm-x86_64/atomic.h +++ b/include/asm-x86_64/atomic.h @@ -424,4 +424,5 @@ __asm__ __volatile__(LOCK "orl %0,%1" \ #define smp_mb__before_atomic_inc() barrier() #define smp_mb__after_atomic_inc() barrier() +#include #endif diff --git a/include/asm-xtensa/atomic.h b/include/asm-xtensa/atomic.h index 3670cc7695da..e2ce06b101ad 100644 --- a/include/asm-xtensa/atomic.h +++ b/include/asm-xtensa/atomic.h @@ -286,6 +286,7 @@ static inline void atomic_set_mask(unsigned int mask, atomic_t *v) #define smp_mb__before_atomic_inc() barrier() #define smp_mb__after_atomic_inc() barrier() +#include #endif /* __KERNEL__ */ #endif /* _XTENSA_ATOMIC_H */ diff --git a/include/linux/sched.h b/include/linux/sched.h index b0ad6f30679e..7da33619d5d0 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h @@ -254,25 +254,12 @@ extern void arch_unmap_area_topdown(struct mm_struct *, unsigned long); * The mm counters are not protected by its page_table_lock, * so must be incremented atomically. */ -#ifdef ATOMIC64_INIT -#define set_mm_counter(mm, member, value) atomic64_set(&(mm)->_##member, value) -#define get_mm_counter(mm, member) ((unsigned long)atomic64_read(&(mm)->_##member)) -#define add_mm_counter(mm, member, value) atomic64_add(value, &(mm)->_##member) -#define inc_mm_counter(mm, member) atomic64_inc(&(mm)->_##member) -#define dec_mm_counter(mm, member) atomic64_dec(&(mm)->_##member) -typedef atomic64_t mm_counter_t; -#else /* !ATOMIC64_INIT */ -/* - * The counters wrap back to 0 at 2^32 * PAGE_SIZE, - * that is, at 16TB if using 4kB page size. - */ -#define set_mm_counter(mm, member, value) atomic_set(&(mm)->_##member, value) -#define get_mm_counter(mm, member) ((unsigned long)atomic_read(&(mm)->_##member)) -#define add_mm_counter(mm, member, value) atomic_add(value, &(mm)->_##member) -#define inc_mm_counter(mm, member) atomic_inc(&(mm)->_##member) -#define dec_mm_counter(mm, member) atomic_dec(&(mm)->_##member) -typedef atomic_t mm_counter_t; -#endif /* !ATOMIC64_INIT */ +#define set_mm_counter(mm, member, value) atomic_long_set(&(mm)->_##member, value) +#define get_mm_counter(mm, member) ((unsigned long)atomic_long_read(&(mm)->_##member)) +#define add_mm_counter(mm, member, value) atomic_long_add(value, &(mm)->_##member) +#define inc_mm_counter(mm, member) atomic_long_inc(&(mm)->_##member) +#define dec_mm_counter(mm, member) atomic_long_dec(&(mm)->_##member) +typedef atomic_long_t mm_counter_t; #else /* NR_CPUS < CONFIG_SPLIT_PTLOCK_CPUS */ /* -- cgit v1.2.3-71-gd317 From a74609fafa2e5cc31d558012abaaa55ec9ad9da4 Mon Sep 17 00:00:00 2001 From: Nick Piggin Date: Fri, 6 Jan 2006 00:11:20 -0800 Subject: [PATCH] mm: page_state opt Optimise page_state manipulations by introducing interrupt unsafe accessors to page_state fields. Callers must provide their own locking (either disable interrupts or not update from interrupt context). Switch over the hot callsites that can easily be moved under interrupts off sections. Signed-off-by: Nick Piggin Cc: Hugh Dickins Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/page-flags.h | 43 ++++++++++++++++------ mm/page_alloc.c | 89 ++++++++++++++++++++++++++-------------------- mm/rmap.c | 10 ++++-- mm/vmscan.c | 27 +++++++------- 4 files changed, 104 insertions(+), 65 deletions(-) (limited to 'include/linux') diff --git a/include/linux/page-flags.h b/include/linux/page-flags.h index 32d09c8d952b..dede8d412dca 100644 --- a/include/linux/page-flags.h +++ b/include/linux/page-flags.h @@ -144,22 +144,33 @@ struct page_state { extern void get_page_state(struct page_state *ret); extern void get_page_state_node(struct page_state *ret, int node); extern void get_full_page_state(struct page_state *ret); -extern unsigned long __read_page_state(unsigned long offset); -extern void __mod_page_state(unsigned long offset, unsigned long delta); +extern unsigned long read_page_state_offset(unsigned long offset); +extern void mod_page_state_offset(unsigned long offset, unsigned long delta); +extern void __mod_page_state_offset(unsigned long offset, unsigned long delta); #define read_page_state(member) \ - __read_page_state(offsetof(struct page_state, member)) + read_page_state_offset(offsetof(struct page_state, member)) #define mod_page_state(member, delta) \ - __mod_page_state(offsetof(struct page_state, member), (delta)) + mod_page_state_offset(offsetof(struct page_state, member), (delta)) -#define inc_page_state(member) mod_page_state(member, 1UL) -#define dec_page_state(member) mod_page_state(member, 0UL - 1) -#define add_page_state(member,delta) mod_page_state(member, (delta)) -#define sub_page_state(member,delta) mod_page_state(member, 0UL - (delta)) +#define __mod_page_state(member, delta) \ + __mod_page_state_offset(offsetof(struct page_state, member), (delta)) -#define mod_page_state_zone(zone, member, delta) \ - do { \ +#define inc_page_state(member) mod_page_state(member, 1UL) +#define dec_page_state(member) mod_page_state(member, 0UL - 1) +#define add_page_state(member,delta) mod_page_state(member, (delta)) +#define sub_page_state(member,delta) mod_page_state(member, 0UL - (delta)) + +#define __inc_page_state(member) __mod_page_state(member, 1UL) +#define __dec_page_state(member) __mod_page_state(member, 0UL - 1) +#define __add_page_state(member,delta) __mod_page_state(member, (delta)) +#define __sub_page_state(member,delta) __mod_page_state(member, 0UL - (delta)) + +#define page_state(member) (*__page_state(offsetof(struct page_state, member))) + +#define state_zone_offset(zone, member) \ +({ \ unsigned offset; \ if (is_highmem(zone)) \ offset = offsetof(struct page_state, member##_high); \ @@ -169,7 +180,17 @@ extern void __mod_page_state(unsigned long offset, unsigned long delta); offset = offsetof(struct page_state, member##_dma32); \ else \ offset = offsetof(struct page_state, member##_dma); \ - __mod_page_state(offset, (delta)); \ + offset; \ +}) + +#define __mod_page_state_zone(zone, member, delta) \ + do { \ + __mod_page_state_offset(state_zone_offset(zone, member), (delta)); \ + } while (0) + +#define mod_page_state_zone(zone, member, delta) \ + do { \ + mod_page_state_offset(state_zone_offset(zone, member), (delta)); \ } while (0) /* diff --git a/mm/page_alloc.c b/mm/page_alloc.c index 7f580779abdb..fd47494cb989 100644 --- a/mm/page_alloc.c +++ b/mm/page_alloc.c @@ -424,9 +424,9 @@ void __free_pages_ok(struct page *page, unsigned int order) return; list_add(&page->lru, &list); - mod_page_state(pgfree, 1 << order); kernel_map_pages(page, 1<zone_pgdat; pg_data_t *orig = zonelist->zones[0]->zone_pgdat; struct per_cpu_pageset *p; - local_irq_save(flags); - cpu = smp_processor_id(); - p = zone_pcp(z,cpu); + p = zone_pcp(z, cpu); if (pg == orig) { p->numa_hit++; } else { @@ -696,7 +692,6 @@ static void zone_statistics(struct zonelist *zonelist, struct zone *z) p->local_node++; else p->other_node++; - local_irq_restore(flags); #endif } @@ -716,11 +711,11 @@ static void fastcall free_hot_cold_page(struct page *page, int cold) if (free_pages_check(page)) return; - inc_page_state(pgfree); kernel_map_pages(page, 1, 0); pcp = &zone_pcp(zone, get_cpu())->pcp[cold]; local_irq_save(flags); + __inc_page_state(pgfree); list_add(&page->lru, &pcp->list); pcp->count++; if (pcp->count >= pcp->high) @@ -753,49 +748,58 @@ static inline void prep_zero_page(struct page *page, int order, gfp_t gfp_flags) * we cheat by calling it from here, in the order > 0 path. Saves a branch * or two. */ -static struct page * -buffered_rmqueue(struct zone *zone, int order, gfp_t gfp_flags) +static struct page *buffered_rmqueue(struct zonelist *zonelist, + struct zone *zone, int order, gfp_t gfp_flags) { unsigned long flags; struct page *page; int cold = !!(gfp_flags & __GFP_COLD); + int cpu; again: + cpu = get_cpu(); if (order == 0) { struct per_cpu_pages *pcp; - page = NULL; - pcp = &zone_pcp(zone, get_cpu())->pcp[cold]; + pcp = &zone_pcp(zone, cpu)->pcp[cold]; local_irq_save(flags); - if (!pcp->count) + if (!pcp->count) { pcp->count += rmqueue_bulk(zone, 0, pcp->batch, &pcp->list); - if (likely(pcp->count)) { - page = list_entry(pcp->list.next, struct page, lru); - list_del(&page->lru); - pcp->count--; + if (unlikely(!pcp->count)) + goto failed; } - local_irq_restore(flags); - put_cpu(); + page = list_entry(pcp->list.next, struct page, lru); + list_del(&page->lru); + pcp->count--; } else { spin_lock_irqsave(&zone->lock, flags); page = __rmqueue(zone, order); - spin_unlock_irqrestore(&zone->lock, flags); + spin_unlock(&zone->lock); + if (!page) + goto failed; } - if (page != NULL) { - BUG_ON(bad_range(zone, page)); - mod_page_state_zone(zone, pgalloc, 1 << order); - if (prep_new_page(page, order)) - goto again; + __mod_page_state_zone(zone, pgalloc, 1 << order); + zone_statistics(zonelist, zone, cpu); + local_irq_restore(flags); + put_cpu(); - if (gfp_flags & __GFP_ZERO) - prep_zero_page(page, order, gfp_flags); + BUG_ON(bad_range(zone, page)); + if (prep_new_page(page, order)) + goto again; - if (order && (gfp_flags & __GFP_COMP)) - prep_compound_page(page, order); - } + if (gfp_flags & __GFP_ZERO) + prep_zero_page(page, order, gfp_flags); + + if (order && (gfp_flags & __GFP_COMP)) + prep_compound_page(page, order); return page; + +failed: + local_irq_restore(flags); + put_cpu(); + return NULL; } #define ALLOC_NO_WATERMARKS 0x01 /* don't check watermarks at all */ @@ -871,9 +875,8 @@ get_page_from_freelist(gfp_t gfp_mask, unsigned int order, continue; } - page = buffered_rmqueue(*z, order, gfp_mask); + page = buffered_rmqueue(zonelist, *z, order, gfp_mask); if (page) { - zone_statistics(zonelist, *z); break; } } while (*(++z) != NULL); @@ -1248,7 +1251,7 @@ void get_full_page_state(struct page_state *ret) __get_page_state(ret, sizeof(*ret) / sizeof(unsigned long), &mask); } -unsigned long __read_page_state(unsigned long offset) +unsigned long read_page_state_offset(unsigned long offset) { unsigned long ret = 0; int cpu; @@ -1262,18 +1265,26 @@ unsigned long __read_page_state(unsigned long offset) return ret; } -void __mod_page_state(unsigned long offset, unsigned long delta) +void __mod_page_state_offset(unsigned long offset, unsigned long delta) +{ + void *ptr; + + ptr = &__get_cpu_var(page_states); + *(unsigned long *)(ptr + offset) += delta; +} +EXPORT_SYMBOL(__mod_page_state_offset); + +void mod_page_state_offset(unsigned long offset, unsigned long delta) { unsigned long flags; - void* ptr; + void *ptr; local_irq_save(flags); ptr = &__get_cpu_var(page_states); - *(unsigned long*)(ptr + offset) += delta; + *(unsigned long *)(ptr + offset) += delta; local_irq_restore(flags); } - -EXPORT_SYMBOL(__mod_page_state); +EXPORT_SYMBOL(mod_page_state_offset); void __get_zone_counts(unsigned long *active, unsigned long *inactive, unsigned long *free, struct pglist_data *pgdat) diff --git a/mm/rmap.c b/mm/rmap.c index 4107f64ff749..6f3f7db27128 100644 --- a/mm/rmap.c +++ b/mm/rmap.c @@ -451,7 +451,11 @@ static void __page_set_anon_rmap(struct page *page, page->index = linear_page_index(vma, address); - inc_page_state(nr_mapped); + /* + * nr_mapped state can be updated without turning off + * interrupts because it is not modified via interrupt. + */ + __inc_page_state(nr_mapped); } /** @@ -498,7 +502,7 @@ void page_add_file_rmap(struct page *page) BUG_ON(!pfn_valid(page_to_pfn(page))); if (atomic_inc_and_test(&page->_mapcount)) - inc_page_state(nr_mapped); + __inc_page_state(nr_mapped); } /** @@ -522,7 +526,7 @@ void page_remove_rmap(struct page *page) */ if (page_test_and_clear_dirty(page)) set_page_dirty(page); - dec_page_state(nr_mapped); + __dec_page_state(nr_mapped); } } diff --git a/mm/vmscan.c b/mm/vmscan.c index 7681d8ee04fe..be8235fb1939 100644 --- a/mm/vmscan.c +++ b/mm/vmscan.c @@ -645,16 +645,17 @@ static void shrink_cache(struct zone *zone, struct scan_control *sc) goto done; max_scan -= nr_scan; - if (current_is_kswapd()) - mod_page_state_zone(zone, pgscan_kswapd, nr_scan); - else - mod_page_state_zone(zone, pgscan_direct, nr_scan); nr_freed = shrink_list(&page_list, sc); - if (current_is_kswapd()) - mod_page_state(kswapd_steal, nr_freed); - mod_page_state_zone(zone, pgsteal, nr_freed); - spin_lock_irq(&zone->lru_lock); + local_irq_disable(); + if (current_is_kswapd()) { + __mod_page_state_zone(zone, pgscan_kswapd, nr_scan); + __mod_page_state(kswapd_steal, nr_freed); + } else + __mod_page_state_zone(zone, pgscan_direct, nr_scan); + __mod_page_state_zone(zone, pgsteal, nr_freed); + + spin_lock(&zone->lru_lock); /* * Put back any unfreeable pages. */ @@ -816,11 +817,13 @@ refill_inactive_zone(struct zone *zone, struct scan_control *sc) } } zone->nr_active += pgmoved; - spin_unlock_irq(&zone->lru_lock); - pagevec_release(&pvec); + spin_unlock(&zone->lru_lock); + + __mod_page_state_zone(zone, pgrefill, pgscanned); + __mod_page_state(pgdeactivate, pgdeactivate); + local_irq_enable(); - mod_page_state_zone(zone, pgrefill, pgscanned); - mod_page_state(pgdeactivate, pgdeactivate); + pagevec_release(&pvec); } /* -- cgit v1.2.3-71-gd317 From b09eb1c06a14641209e6b86e9a5b28ea8287f193 Mon Sep 17 00:00:00 2001 From: Nick Piggin Date: Fri, 6 Jan 2006 00:11:21 -0800 Subject: [PATCH] mm: page_state opt docs Comment the new locking rules for page_state statistics. Signed-off-by: Nick Piggin Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/page-flags.h | 12 +++++++++++- 1 file changed, 11 insertions(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/page-flags.h b/include/linux/page-flags.h index dede8d412dca..d52999c43336 100644 --- a/include/linux/page-flags.h +++ b/include/linux/page-flags.h @@ -79,13 +79,23 @@ /* * Global page accounting. One instance per CPU. Only unsigned longs are * allowed. + * + * - Fields can be modified with xxx_page_state and xxx_page_state_zone at + * any time safely (which protects the instance from modification by + * interrupt. + * - The __xxx_page_state variants can be used safely when interrupts are + * disabled. + * - The __xxx_page_state variants can be used if the field is only + * modified from process context, or only modified from interrupt context. + * In this case, the field should be commented here. */ struct page_state { unsigned long nr_dirty; /* Dirty writeable pages */ unsigned long nr_writeback; /* Pages under writeback */ unsigned long nr_unstable; /* NFS unstable pages */ unsigned long nr_page_table_pages;/* Pages used for pagetables */ - unsigned long nr_mapped; /* mapped into pagetables */ + unsigned long nr_mapped; /* mapped into pagetables. + * only modified from process context */ unsigned long nr_slab; /* In slab */ #define GET_PAGE_STATE_LAST nr_slab -- cgit v1.2.3-71-gd317 From 8d9067bda99c68e1a17d93e78cf3a5a3f67e0c35 Mon Sep 17 00:00:00 2001 From: David Howells Date: Fri, 6 Jan 2006 00:11:24 -0800 Subject: [PATCH] Keys: Remove key duplication Remove the key duplication stuff since there's nothing that uses it, no way to get at it and it's awkward to deal with for LSM purposes. Signed-off-by: David Howells Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- Documentation/keys.txt | 18 ------------- include/keys/user-type.h | 1 - include/linux/key.h | 8 ------ security/keys/key.c | 56 +++----------------------------------- security/keys/keyring.c | 64 -------------------------------------------- security/keys/user_defined.c | 33 ----------------------- 6 files changed, 3 insertions(+), 177 deletions(-) (limited to 'include/linux') diff --git a/Documentation/keys.txt b/Documentation/keys.txt index 31154882000a..6304db59bfe4 100644 --- a/Documentation/keys.txt +++ b/Documentation/keys.txt @@ -860,24 +860,6 @@ The structure has a number of fields, some of which are mandatory: It is safe to sleep in this method. - (*) int (*duplicate)(struct key *key, const struct key *source); - - If this type of key can be duplicated, then this method should be - provided. It is called to copy the payload attached to the source into the - new key. The data length on the new key will have been updated and the - quota adjusted already. - - This method will be called with the source key's semaphore read-locked to - prevent its payload from being changed, thus RCU constraints need not be - applied to the source key. - - This method does not have to lock the destination key in order to attach a - payload. The fact that KEY_FLAG_INSTANTIATED is not set in key->flags - prevents anything else from gaining access to the key. - - It is safe to sleep in this method. - - (*) int (*update)(struct key *key, const void *data, size_t datalen); If this type of key can be updated, then this method should be provided. diff --git a/include/keys/user-type.h b/include/keys/user-type.h index 26f6ec38577a..a3dae1803f45 100644 --- a/include/keys/user-type.h +++ b/include/keys/user-type.h @@ -35,7 +35,6 @@ struct user_key_payload { extern struct key_type key_type_user; extern int user_instantiate(struct key *key, const void *data, size_t datalen); -extern int user_duplicate(struct key *key, const struct key *source); extern int user_update(struct key *key, const void *data, size_t datalen); extern int user_match(const struct key *key, const void *criterion); extern void user_destroy(struct key *key); diff --git a/include/linux/key.h b/include/linux/key.h index 53513a3be53b..4d189e51bc6c 100644 --- a/include/linux/key.h +++ b/include/linux/key.h @@ -193,14 +193,6 @@ struct key_type { */ int (*instantiate)(struct key *key, const void *data, size_t datalen); - /* duplicate a key of this type (optional) - * - the source key will be locked against change - * - the new description will be attached - * - the quota will have been adjusted automatically from - * source->quotalen - */ - int (*duplicate)(struct key *key, const struct key *source); - /* update a key of this type (optional) * - this method should call key_payload_reserve() to recalculate the * quota consumption diff --git a/security/keys/key.c b/security/keys/key.c index 01bcfecb7eae..bb036623d0a8 100644 --- a/security/keys/key.c +++ b/security/keys/key.c @@ -240,9 +240,9 @@ static inline void key_alloc_serial(struct key *key) /* * allocate a key of the specified type * - update the user's quota to reflect the existence of the key - * - called from a key-type operation with key_types_sem read-locked by either - * key_create_or_update() or by key_duplicate(); this prevents unregistration - * of the key type + * - called from a key-type operation with key_types_sem read-locked by + * key_create_or_update() + * - this prevents unregistration of the key type * - upon return the key is as yet uninstantiated; the caller needs to either * instantiate the key or discard it before returning */ @@ -887,56 +887,6 @@ int key_update(key_ref_t key_ref, const void *payload, size_t plen) EXPORT_SYMBOL(key_update); -/*****************************************************************************/ -/* - * duplicate a key, potentially with a revised description - * - must be supported by the keytype (keyrings for instance can be duplicated) - */ -struct key *key_duplicate(struct key *source, const char *desc) -{ - struct key *key; - int ret; - - key_check(source); - - if (!desc) - desc = source->description; - - down_read(&key_types_sem); - - ret = -EINVAL; - if (!source->type->duplicate) - goto error; - - /* allocate and instantiate a key */ - key = key_alloc(source->type, desc, current->fsuid, current->fsgid, - source->perm, 0); - if (IS_ERR(key)) - goto error_k; - - down_read(&source->sem); - ret = key->type->duplicate(key, source); - up_read(&source->sem); - if (ret < 0) - goto error2; - - atomic_inc(&key->user->nikeys); - set_bit(KEY_FLAG_INSTANTIATED, &key->flags); - - error_k: - up_read(&key_types_sem); - out: - return key; - - error2: - key_put(key); - error: - up_read(&key_types_sem); - key = ERR_PTR(ret); - goto out; - -} /* end key_duplicate() */ - /*****************************************************************************/ /* * revoke a key diff --git a/security/keys/keyring.c b/security/keys/keyring.c index 4e9fa8be44b8..0acecbd4fa37 100644 --- a/security/keys/keyring.c +++ b/security/keys/keyring.c @@ -48,7 +48,6 @@ static inline unsigned keyring_hash(const char *desc) */ static int keyring_instantiate(struct key *keyring, const void *data, size_t datalen); -static int keyring_duplicate(struct key *keyring, const struct key *source); static int keyring_match(const struct key *keyring, const void *criterion); static void keyring_destroy(struct key *keyring); static void keyring_describe(const struct key *keyring, struct seq_file *m); @@ -59,7 +58,6 @@ struct key_type key_type_keyring = { .name = "keyring", .def_datalen = sizeof(struct keyring_list), .instantiate = keyring_instantiate, - .duplicate = keyring_duplicate, .match = keyring_match, .destroy = keyring_destroy, .describe = keyring_describe, @@ -118,68 +116,6 @@ static int keyring_instantiate(struct key *keyring, } /* end keyring_instantiate() */ -/*****************************************************************************/ -/* - * duplicate the list of subscribed keys from a source keyring into this one - */ -static int keyring_duplicate(struct key *keyring, const struct key *source) -{ - struct keyring_list *sklist, *klist; - unsigned max; - size_t size; - int loop, ret; - - const unsigned limit = - (PAGE_SIZE - sizeof(*klist)) / sizeof(struct key *); - - ret = 0; - - /* find out how many keys are currently linked */ - rcu_read_lock(); - sklist = rcu_dereference(source->payload.subscriptions); - max = 0; - if (sklist) - max = sklist->nkeys; - rcu_read_unlock(); - - /* allocate a new payload and stuff load with key links */ - if (max > 0) { - BUG_ON(max > limit); - - max = (max + 3) & ~3; - if (max > limit) - max = limit; - - ret = -ENOMEM; - size = sizeof(*klist) + sizeof(struct key *) * max; - klist = kmalloc(size, GFP_KERNEL); - if (!klist) - goto error; - - /* set links */ - rcu_read_lock(); - sklist = rcu_dereference(source->payload.subscriptions); - - klist->maxkeys = max; - klist->nkeys = sklist->nkeys; - memcpy(klist->keys, - sklist->keys, - sklist->nkeys * sizeof(struct key *)); - - for (loop = klist->nkeys - 1; loop >= 0; loop--) - atomic_inc(&klist->keys[loop]->usage); - - rcu_read_unlock(); - - rcu_assign_pointer(keyring->payload.subscriptions, klist); - ret = 0; - } - - error: - return ret; - -} /* end keyring_duplicate() */ - /*****************************************************************************/ /* * match keyrings on their name diff --git a/security/keys/user_defined.c b/security/keys/user_defined.c index cbda3b2780a1..8e71895b97a7 100644 --- a/security/keys/user_defined.c +++ b/security/keys/user_defined.c @@ -26,7 +26,6 @@ struct key_type key_type_user = { .name = "user", .instantiate = user_instantiate, - .duplicate = user_duplicate, .update = user_update, .match = user_match, .destroy = user_destroy, @@ -68,40 +67,8 @@ error: return ret; } /* end user_instantiate() */ - EXPORT_SYMBOL_GPL(user_instantiate); -/*****************************************************************************/ -/* - * duplicate a user defined key - * - both keys' semaphores are locked against further modification - * - the new key cannot yet be accessed - */ -int user_duplicate(struct key *key, const struct key *source) -{ - struct user_key_payload *upayload, *spayload; - int ret; - - /* just copy the payload */ - ret = -ENOMEM; - upayload = kmalloc(sizeof(*upayload) + source->datalen, GFP_KERNEL); - if (upayload) { - spayload = rcu_dereference(source->payload.data); - BUG_ON(source->datalen != spayload->datalen); - - upayload->datalen = key->datalen = spayload->datalen; - memcpy(upayload->data, spayload->data, key->datalen); - - key->payload.data = upayload; - ret = 0; - } - - return ret; - -} /* end user_duplicate() */ - -EXPORT_SYMBOL_GPL(user_duplicate); - /*****************************************************************************/ /* * dispose of the old data from an updated user defined key -- cgit v1.2.3-71-gd317 From 642fb4d1f1dd2417aa69189fe5ceb81e4fb72900 Mon Sep 17 00:00:00 2001 From: David Howells Date: Fri, 6 Jan 2006 00:11:41 -0800 Subject: [PATCH] NOMMU: Provide shared-writable mmap support on ramfs The attached patch makes ramfs support shared-writable mmaps by: (1) Attempting to perform a contiguous block allocation to the requested size when truncate attempts to increase the file from zero size, such as happens when: fd = shm_open("/file/on/ramfs", ...): ftruncate(fd, size_requested); addr = mmap(NULL, subsize, PROT_READ|PROT_WRITE|PROT_EXEC, MAP_SHARED, fd, offset); (2) Permitting any shared-writable mapping over any contiguous set of extant pages. get_unmapped_area() will return the address into the actual ramfs pages. The mapping may start anywhere and be of any size, but may not go over the end of file. Multiple mappings may overlap in any way. (3) Not permitting a file to be shrunk if it would truncate any shared mappings (private mappings are copied). Thus this patch provides support for POSIX shared memory on NOMMU kernels, with certain limitations such as there being a large enough block of pages available to support the allocation and it only working on directly mappable filesystems. Signed-off-by: David Howells Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- fs/ramfs/Makefile | 4 +- fs/ramfs/file-mmu.c | 57 ++++++++++ fs/ramfs/file-nommu.c | 292 ++++++++++++++++++++++++++++++++++++++++++++++++++ fs/ramfs/inode.c | 22 +--- fs/ramfs/internal.h | 15 +++ include/linux/ramfs.h | 10 ++ 6 files changed, 378 insertions(+), 22 deletions(-) create mode 100644 fs/ramfs/file-mmu.c create mode 100644 fs/ramfs/file-nommu.c create mode 100644 fs/ramfs/internal.h (limited to 'include/linux') diff --git a/fs/ramfs/Makefile b/fs/ramfs/Makefile index f096f3007091..5a0236e02ee1 100644 --- a/fs/ramfs/Makefile +++ b/fs/ramfs/Makefile @@ -4,4 +4,6 @@ obj-$(CONFIG_RAMFS) += ramfs.o -ramfs-objs := inode.o +file-mmu-y := file-nommu.o +file-mmu-$(CONFIG_MMU) := file-mmu.o +ramfs-objs += inode.o $(file-mmu-y) diff --git a/fs/ramfs/file-mmu.c b/fs/ramfs/file-mmu.c new file mode 100644 index 000000000000..2115383dcc8d --- /dev/null +++ b/fs/ramfs/file-mmu.c @@ -0,0 +1,57 @@ +/* file-mmu.c: ramfs MMU-based file operations + * + * Resizable simple ram filesystem for Linux. + * + * Copyright (C) 2000 Linus Torvalds. + * 2000 Transmeta Corp. + * + * Usage limits added by David Gibson, Linuxcare Australia. + * This file is released under the GPL. + */ + +/* + * NOTE! This filesystem is probably most useful + * not as a real filesystem, but as an example of + * how virtual filesystems can be written. + * + * It doesn't get much simpler than this. Consider + * that this file implements the full semantics of + * a POSIX-compliant read-write filesystem. + * + * Note in particular how the filesystem does not + * need to implement any data structures of its own + * to keep track of the virtual data: using the VFS + * caches is sufficient. + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include +#include "internal.h" + +struct address_space_operations ramfs_aops = { + .readpage = simple_readpage, + .prepare_write = simple_prepare_write, + .commit_write = simple_commit_write +}; + +struct file_operations ramfs_file_operations = { + .read = generic_file_read, + .write = generic_file_write, + .mmap = generic_file_mmap, + .fsync = simple_sync_file, + .sendfile = generic_file_sendfile, + .llseek = generic_file_llseek, +}; + +struct inode_operations ramfs_file_inode_operations = { + .getattr = simple_getattr, +}; diff --git a/fs/ramfs/file-nommu.c b/fs/ramfs/file-nommu.c new file mode 100644 index 000000000000..3f810acd0bfa --- /dev/null +++ b/fs/ramfs/file-nommu.c @@ -0,0 +1,292 @@ +/* file-nommu.c: no-MMU version of ramfs + * + * Copyright (C) 2005 Red Hat, Inc. All Rights Reserved. + * Written by David Howells (dhowells@redhat.com) + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version + * 2 of the License, or (at your option) any later version. + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include +#include "internal.h" + +static int ramfs_nommu_setattr(struct dentry *, struct iattr *); + +struct address_space_operations ramfs_aops = { + .readpage = simple_readpage, + .prepare_write = simple_prepare_write, + .commit_write = simple_commit_write +}; + +struct file_operations ramfs_file_operations = { + .mmap = ramfs_nommu_mmap, + .get_unmapped_area = ramfs_nommu_get_unmapped_area, + .read = generic_file_read, + .write = generic_file_write, + .fsync = simple_sync_file, + .sendfile = generic_file_sendfile, + .llseek = generic_file_llseek, +}; + +struct inode_operations ramfs_file_inode_operations = { + .setattr = ramfs_nommu_setattr, + .getattr = simple_getattr, +}; + +/*****************************************************************************/ +/* + * add a contiguous set of pages into a ramfs inode when it's truncated from + * size 0 on the assumption that it's going to be used for an mmap of shared + * memory + */ +static int ramfs_nommu_expand_for_mapping(struct inode *inode, size_t newsize) +{ + struct pagevec lru_pvec; + unsigned long npages, xpages, loop, limit; + struct page *pages; + unsigned order; + void *data; + int ret; + + /* make various checks */ + order = get_order(newsize); + if (unlikely(order >= MAX_ORDER)) + goto too_big; + + limit = current->signal->rlim[RLIMIT_FSIZE].rlim_cur; + if (limit != RLIM_INFINITY && newsize > limit) + goto fsize_exceeded; + + if (newsize > inode->i_sb->s_maxbytes) + goto too_big; + + i_size_write(inode, newsize); + + /* allocate enough contiguous pages to be able to satisfy the + * request */ + pages = alloc_pages(mapping_gfp_mask(inode->i_mapping), order); + if (!pages) + return -ENOMEM; + + /* split the high-order page into an array of single pages */ + xpages = 1UL << order; + npages = (newsize + PAGE_SIZE - 1) >> PAGE_SHIFT; + + for (loop = 0; loop < npages; loop++) + set_page_count(pages + loop, 1); + + /* trim off any pages we don't actually require */ + for (loop = npages; loop < xpages; loop++) + __free_page(pages + loop); + + /* clear the memory we allocated */ + newsize = PAGE_SIZE * npages; + data = page_address(pages); + memset(data, 0, newsize); + + /* attach all the pages to the inode's address space */ + pagevec_init(&lru_pvec, 0); + for (loop = 0; loop < npages; loop++) { + struct page *page = pages + loop; + + ret = add_to_page_cache(page, inode->i_mapping, loop, GFP_KERNEL); + if (ret < 0) + goto add_error; + + if (!pagevec_add(&lru_pvec, page)) + __pagevec_lru_add(&lru_pvec); + + unlock_page(page); + } + + pagevec_lru_add(&lru_pvec); + return 0; + + fsize_exceeded: + send_sig(SIGXFSZ, current, 0); + too_big: + return -EFBIG; + + add_error: + page_cache_release(pages + loop); + for (loop++; loop < npages; loop++) + __free_page(pages + loop); + return ret; +} + +/*****************************************************************************/ +/* + * check that file shrinkage doesn't leave any VMAs dangling in midair + */ +static int ramfs_nommu_check_mappings(struct inode *inode, + size_t newsize, size_t size) +{ + struct vm_area_struct *vma; + struct prio_tree_iter iter; + + /* search for VMAs that fall within the dead zone */ + vma_prio_tree_foreach(vma, &iter, &inode->i_mapping->i_mmap, + newsize >> PAGE_SHIFT, + (size + PAGE_SIZE - 1) >> PAGE_SHIFT + ) { + /* found one - only interested if it's shared out of the page + * cache */ + if (vma->vm_flags & VM_SHARED) + return -ETXTBSY; /* not quite true, but near enough */ + } + + return 0; +} + +/*****************************************************************************/ +/* + * + */ +static int ramfs_nommu_resize(struct inode *inode, loff_t newsize, loff_t size) +{ + int ret; + + /* assume a truncate from zero size is going to be for the purposes of + * shared mmap */ + if (size == 0) { + if (unlikely(newsize >> 32)) + return -EFBIG; + + return ramfs_nommu_expand_for_mapping(inode, newsize); + } + + /* check that a decrease in size doesn't cut off any shared mappings */ + if (newsize < size) { + ret = ramfs_nommu_check_mappings(inode, newsize, size); + if (ret < 0) + return ret; + } + + ret = vmtruncate(inode, size); + + return ret; +} + +/*****************************************************************************/ +/* + * handle a change of attributes + * - we're specifically interested in a change of size + */ +static int ramfs_nommu_setattr(struct dentry *dentry, struct iattr *ia) +{ + struct inode *inode = dentry->d_inode; + unsigned int old_ia_valid = ia->ia_valid; + int ret = 0; + + /* by providing our own setattr() method, we skip this quotaism */ + if ((old_ia_valid & ATTR_UID && ia->ia_uid != inode->i_uid) || + (old_ia_valid & ATTR_GID && ia->ia_gid != inode->i_gid)) + ret = DQUOT_TRANSFER(inode, ia) ? -EDQUOT : 0; + + /* pick out size-changing events */ + if (ia->ia_valid & ATTR_SIZE) { + loff_t size = i_size_read(inode); + if (ia->ia_size != size) { + ret = ramfs_nommu_resize(inode, ia->ia_size, size); + if (ret < 0 || ia->ia_valid == ATTR_SIZE) + goto out; + } else { + /* we skipped the truncate but must still update + * timestamps + */ + ia->ia_valid |= ATTR_MTIME|ATTR_CTIME; + } + } + + ret = inode_setattr(inode, ia); + out: + ia->ia_valid = old_ia_valid; + return ret; +} + +/*****************************************************************************/ +/* + * try to determine where a shared mapping can be made + * - we require that: + * - the pages to be mapped must exist + * - the pages be physically contiguous in sequence + */ +unsigned long ramfs_nommu_get_unmapped_area(struct file *file, + unsigned long addr, unsigned long len, + unsigned long pgoff, unsigned long flags) +{ + unsigned long maxpages, lpages, nr, loop, ret; + struct inode *inode = file->f_dentry->d_inode; + struct page **pages = NULL, **ptr, *page; + loff_t isize; + + if (!(flags & MAP_SHARED)) + return addr; + + /* the mapping mustn't extend beyond the EOF */ + lpages = (len + PAGE_SIZE - 1) >> PAGE_SHIFT; + isize = i_size_read(inode); + + ret = -EINVAL; + maxpages = (isize + PAGE_SIZE - 1) >> PAGE_SHIFT; + if (pgoff >= maxpages) + goto out; + + if (maxpages - pgoff < lpages) + goto out; + + /* gang-find the pages */ + ret = -ENOMEM; + pages = kzalloc(lpages * sizeof(struct page *), GFP_KERNEL); + if (!pages) + goto out; + + nr = find_get_pages(inode->i_mapping, pgoff, lpages, pages); + if (nr != lpages) + goto out; /* leave if some pages were missing */ + + /* check the pages for physical adjacency */ + ptr = pages; + page = *ptr++; + page++; + for (loop = lpages; loop > 1; loop--) + if (*ptr++ != page++) + goto out; + + /* okay - all conditions fulfilled */ + ret = (unsigned long) page_address(pages[0]); + + out: + if (pages) { + ptr = pages; + for (loop = lpages; loop > 0; loop--) + put_page(*ptr++); + kfree(pages); + } + + return ret; +} + +/*****************************************************************************/ +/* + * set up a mapping + */ +int ramfs_nommu_mmap(struct file *file, struct vm_area_struct *vma) +{ + return 0; +} diff --git a/fs/ramfs/inode.c b/fs/ramfs/inode.c index 0a88917605ae..c66bd5e4c05c 100644 --- a/fs/ramfs/inode.c +++ b/fs/ramfs/inode.c @@ -34,13 +34,12 @@ #include #include +#include "internal.h" /* some random number */ #define RAMFS_MAGIC 0x858458f6 static struct super_operations ramfs_ops; -static struct address_space_operations ramfs_aops; -static struct inode_operations ramfs_file_inode_operations; static struct inode_operations ramfs_dir_inode_operations; static struct backing_dev_info ramfs_backing_dev_info = { @@ -142,25 +141,6 @@ static int ramfs_symlink(struct inode * dir, struct dentry *dentry, const char * return error; } -static struct address_space_operations ramfs_aops = { - .readpage = simple_readpage, - .prepare_write = simple_prepare_write, - .commit_write = simple_commit_write -}; - -struct file_operations ramfs_file_operations = { - .read = generic_file_read, - .write = generic_file_write, - .mmap = generic_file_mmap, - .fsync = simple_sync_file, - .sendfile = generic_file_sendfile, - .llseek = generic_file_llseek, -}; - -static struct inode_operations ramfs_file_inode_operations = { - .getattr = simple_getattr, -}; - static struct inode_operations ramfs_dir_inode_operations = { .create = ramfs_create, .lookup = simple_lookup, diff --git a/fs/ramfs/internal.h b/fs/ramfs/internal.h new file mode 100644 index 000000000000..272c8a7120b0 --- /dev/null +++ b/fs/ramfs/internal.h @@ -0,0 +1,15 @@ +/* internal.h: ramfs internal definitions + * + * Copyright (C) 2005 Red Hat, Inc. All Rights Reserved. + * Written by David Howells (dhowells@redhat.com) + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version + * 2 of the License, or (at your option) any later version. + */ + + +extern struct address_space_operations ramfs_aops; +extern struct file_operations ramfs_file_operations; +extern struct inode_operations ramfs_file_inode_operations; diff --git a/include/linux/ramfs.h b/include/linux/ramfs.h index e0a4faa9610c..953b6df5d037 100644 --- a/include/linux/ramfs.h +++ b/include/linux/ramfs.h @@ -5,6 +5,16 @@ struct inode *ramfs_get_inode(struct super_block *sb, int mode, dev_t dev); struct super_block *ramfs_get_sb(struct file_system_type *fs_type, int flags, const char *dev_name, void *data); +#ifndef CONFIG_MMU +extern unsigned long ramfs_nommu_get_unmapped_area(struct file *file, + unsigned long addr, + unsigned long len, + unsigned long pgoff, + unsigned long flags); + +extern int ramfs_nommu_mmap(struct file *file, struct vm_area_struct *vma); +#endif + extern struct file_operations ramfs_file_operations; extern struct vm_operations_struct generic_file_vm_ops; -- cgit v1.2.3-71-gd317 From b0e15190ead07056ab0c3844a499ff35e66d27cc Mon Sep 17 00:00:00 2001 From: David Howells Date: Fri, 6 Jan 2006 00:11:42 -0800 Subject: [PATCH] NOMMU: Make SYSV IPC SHM use ramfs facilities on NOMMU The attached patch makes the SYSV IPC shared memory facilities use the new ramfs facilities on a no-MMU kernel. The following changes are made: (1) There are now shmem_mmap() and shmem_get_unmapped_area() functions to allow the IPC SHM facilities to commune with the tiny-shmem and shmem code. (2) ramfs files now need resizing using do_truncate() rather than by modifying the inode size directly (see shmem_file_setup()). This causes ramfs to attempt to bind a block of pages of sufficient size to the inode. (3) CONFIG_SYSVIPC is no longer contingent on CONFIG_MMU. Signed-off-by: David Howells Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/mm.h | 9 +++++++++ init/Kconfig | 1 - ipc/shm.c | 18 +++++++++++++----- mm/nommu.c | 7 +++++++ mm/shmem.c | 2 +- mm/tiny-shmem.c | 29 ++++++++++++++++++++++++++++- 6 files changed, 58 insertions(+), 8 deletions(-) (limited to 'include/linux') diff --git a/include/linux/mm.h b/include/linux/mm.h index 75ec04e2f184..26f3094911a5 100644 --- a/include/linux/mm.h +++ b/include/linux/mm.h @@ -654,9 +654,18 @@ static inline struct mempolicy *shmem_get_policy(struct vm_area_struct *vma, } #endif struct file *shmem_file_setup(char *name, loff_t size, unsigned long flags); +extern int shmem_mmap(struct file *file, struct vm_area_struct *vma); int shmem_zero_setup(struct vm_area_struct *); +#ifndef CONFIG_MMU +extern unsigned long shmem_get_unmapped_area(struct file *file, + unsigned long addr, + unsigned long len, + unsigned long pgoff, + unsigned long flags); +#endif + static inline int can_do_mlock(void) { if (capable(CAP_IPC_LOCK)) diff --git a/init/Kconfig b/init/Kconfig index ce737e02c5a2..24e0f7c756c0 100644 --- a/init/Kconfig +++ b/init/Kconfig @@ -105,7 +105,6 @@ config SWAP config SYSVIPC bool "System V IPC" - depends on MMU ---help--- Inter Process Communication is a suite of library functions and system calls which let processes (running programs) synchronize and diff --git a/ipc/shm.c b/ipc/shm.c index 587d836d80d9..0ef4a1cf3e27 100644 --- a/ipc/shm.c +++ b/ipc/shm.c @@ -157,14 +157,22 @@ static void shm_close (struct vm_area_struct *shmd) static int shm_mmap(struct file * file, struct vm_area_struct * vma) { - file_accessed(file); - vma->vm_ops = &shm_vm_ops; - shm_inc(file->f_dentry->d_inode->i_ino); - return 0; + int ret; + + ret = shmem_mmap(file, vma); + if (ret == 0) { + vma->vm_ops = &shm_vm_ops; + shm_inc(file->f_dentry->d_inode->i_ino); + } + + return ret; } static struct file_operations shm_file_operations = { - .mmap = shm_mmap + .mmap = shm_mmap, +#ifndef CONFIG_MMU + .get_unmapped_area = shmem_get_unmapped_area, +#endif }; static struct vm_operations_struct shm_vm_ops = { diff --git a/mm/nommu.c b/mm/nommu.c index c1196812876b..c10262d68232 100644 --- a/mm/nommu.c +++ b/mm/nommu.c @@ -1177,3 +1177,10 @@ int in_gate_area_no_task(unsigned long addr) { return 0; } + +struct page *filemap_nopage(struct vm_area_struct *area, + unsigned long address, int *type) +{ + BUG(); + return NULL; +} diff --git a/mm/shmem.c b/mm/shmem.c index 65c148efa2ed..a1f2f02af724 100644 --- a/mm/shmem.c +++ b/mm/shmem.c @@ -1270,7 +1270,7 @@ out_nomem: return retval; } -static int shmem_mmap(struct file *file, struct vm_area_struct *vma) +int shmem_mmap(struct file *file, struct vm_area_struct *vma) { file_accessed(file); vma->vm_ops = &shmem_vm_ops; diff --git a/mm/tiny-shmem.c b/mm/tiny-shmem.c index b58abcf44ed6..cdc6d431972b 100644 --- a/mm/tiny-shmem.c +++ b/mm/tiny-shmem.c @@ -81,13 +81,19 @@ struct file *shmem_file_setup(char *name, loff_t size, unsigned long flags) goto close_file; d_instantiate(dentry, inode); - inode->i_size = size; inode->i_nlink = 0; /* It is unlinked */ + file->f_vfsmnt = mntget(shm_mnt); file->f_dentry = dentry; file->f_mapping = inode->i_mapping; file->f_op = &ramfs_file_operations; file->f_mode = FMODE_WRITE | FMODE_READ; + + /* notify everyone as to the change of file size */ + error = do_truncate(dentry, size, file); + if (error < 0) + goto close_file; + return file; close_file: @@ -123,3 +129,24 @@ int shmem_unuse(swp_entry_t entry, struct page *page) { return 0; } + +int shmem_mmap(struct file *file, struct vm_area_struct *vma) +{ + file_accessed(file); +#ifndef CONFIG_MMU + return ramfs_nommu_mmap(file, vma); +#else + return 0; +#endif +} + +#ifndef CONFIG_MMU +unsigned long shmem_get_unmapped_area(struct file *file, + unsigned long addr, + unsigned long len, + unsigned long pgoff, + unsigned long flags) +{ + return ramfs_nommu_get_unmapped_area(file, addr, len, pgoff, flags); +} +#endif -- cgit v1.2.3-71-gd317 From 7ee1dd3fee22f15728f545d266403fc977e1eb99 Mon Sep 17 00:00:00 2001 From: David Howells Date: Fri, 6 Jan 2006 00:11:44 -0800 Subject: [PATCH] FRV: Make futex code compilable on nommu [try #2] Make the futex code compilable and usable on NOMMU by making the attempt to handle page faults conditional on CONFIG_MMU. If this is not enabled, then we can assume that EFAULT returned from futex_atomic_op_inuser() is not recoverable, and that the address lies outside of valid memory. handle_mm_fault() is made to BUG if called on NOMMU without attempting to invoke the actual handler (__handle_mm_fault). Signed-off-by: David Howells Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/mm.h | 22 +++++++++++++++++++--- kernel/futex.c | 7 +++++++ 2 files changed, 26 insertions(+), 3 deletions(-) (limited to 'include/linux') diff --git a/include/linux/mm.h b/include/linux/mm.h index 26f3094911a5..bc01fff3aa01 100644 --- a/include/linux/mm.h +++ b/include/linux/mm.h @@ -717,12 +717,28 @@ extern int vmtruncate(struct inode * inode, loff_t offset); extern int vmtruncate_range(struct inode * inode, loff_t offset, loff_t end); extern int install_page(struct mm_struct *mm, struct vm_area_struct *vma, unsigned long addr, struct page *page, pgprot_t prot); extern int install_file_pte(struct mm_struct *mm, struct vm_area_struct *vma, unsigned long addr, unsigned long pgoff, pgprot_t prot); -extern int __handle_mm_fault(struct mm_struct *mm,struct vm_area_struct *vma, unsigned long address, int write_access); -static inline int handle_mm_fault(struct mm_struct *mm, struct vm_area_struct *vma, unsigned long address, int write_access) +#ifdef CONFIG_MMU +extern int __handle_mm_fault(struct mm_struct *mm,struct vm_area_struct *vma, + unsigned long address, int write_access); + +static inline int handle_mm_fault(struct mm_struct *mm, + struct vm_area_struct *vma, unsigned long address, + int write_access) { - return __handle_mm_fault(mm, vma, address, write_access) & (~VM_FAULT_WRITE); + return __handle_mm_fault(mm, vma, address, write_access) & + (~VM_FAULT_WRITE); } +#else +static inline int handle_mm_fault(struct mm_struct *mm, + struct vm_area_struct *vma, unsigned long address, + int write_access) +{ + /* should never happen if there's no MMU */ + BUG(); + return VM_FAULT_SIGBUS; +} +#endif extern int make_pages_present(unsigned long addr, unsigned long end); extern int access_process_vm(struct task_struct *tsk, unsigned long addr, void *buf, int len, int write); diff --git a/kernel/futex.c b/kernel/futex.c index 5e71a6bf6f6b..5efa2f978032 100644 --- a/kernel/futex.c +++ b/kernel/futex.c @@ -356,6 +356,13 @@ retry: if (bh1 != bh2) spin_unlock(&bh2->lock); +#ifndef CONFIG_MMU + /* we don't get EFAULT from MMU faults if we don't have an MMU, + * but we might get them from range checking */ + ret = op_ret; + goto out; +#endif + if (unlikely(op_ret != -EFAULT)) { ret = op_ret; goto out; -- cgit v1.2.3-71-gd317 From f90b8116032f4216d260e31f966a3585319387ac Mon Sep 17 00:00:00 2001 From: Jordan Crouse Date: Fri, 6 Jan 2006 00:12:14 -0800 Subject: [PATCH] Base support for AMD Geode GX/LX processors Provide basic support for the AMD Geode GX and LX processors. Signed-off-by: Jordan Crouse Cc: Alan Cox Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- MAINTAINERS | 7 +++++++ arch/i386/Kconfig.cpu | 14 ++++++++++---- arch/i386/kernel/cpu/amd.c | 7 ++++++- arch/i386/kernel/cpu/cyrix.c | 27 ++++++++++++++++++++++++++- include/asm-i386/module.h | 4 +++- include/linux/pci_ids.h | 10 ++++++++++ 6 files changed, 62 insertions(+), 7 deletions(-) (limited to 'include/linux') diff --git a/MAINTAINERS b/MAINTAINERS index e9db0d6b928a..cb536bbed9ff 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -258,6 +258,13 @@ P: Ivan Kokshaysky M: ink@jurassic.park.msu.ru S: Maintained for 2.4; PCI support for 2.6. +AMD GEODE PROCESSOR/CHIPSET SUPPORT +P: Jordan Crouse +M: info-linux@geode.amd.com +L: info-linux@geode.amd.com +W: http://www.amd.com/us-en/ConnectivitySolutions/TechnicalResources/0,,50_2334_2452_11363,00.html +S: Supported + APM DRIVER P: Stephen Rothwell M: sfr@canb.auug.org.au diff --git a/arch/i386/Kconfig.cpu b/arch/i386/Kconfig.cpu index 53bbb3c008ee..79603b3471f9 100644 --- a/arch/i386/Kconfig.cpu +++ b/arch/i386/Kconfig.cpu @@ -39,6 +39,7 @@ config M386 - "Winchip-2" for IDT Winchip 2. - "Winchip-2A" for IDT Winchips with 3dNow! capabilities. - "GeodeGX1" for Geode GX1 (Cyrix MediaGX). + - "Geode GX/LX" For AMD Geode GX and LX processors. - "CyrixIII/VIA C3" for VIA Cyrix III or VIA C3. - "VIA C3-2 for VIA C3-2 "Nehemiah" (model 9 and above). @@ -171,6 +172,11 @@ config MGEODEGX1 help Select this for a Geode GX1 (Cyrix MediaGX) chip. +config MGEODE_LX + bool "Geode GX/LX" + help + Select this for AMD Geode GX and LX processors. + config MCYRIXIII bool "CyrixIII/VIA-C3" help @@ -220,8 +226,8 @@ config X86_XADD config X86_L1_CACHE_SHIFT int default "7" if MPENTIUM4 || X86_GENERIC - default "4" if X86_ELAN || M486 || M386 - default "5" if MWINCHIP3D || MWINCHIP2 || MWINCHIPC6 || MCRUSOE || MEFFICEON || MCYRIXIII || MK6 || MPENTIUMIII || MPENTIUMII || M686 || M586MMX || M586TSC || M586 || MVIAC3_2 || MGEODEGX1 + default "4" if X86_ELAN || M486 || M386 || MGEODEGX1 + default "5" if MWINCHIP3D || MWINCHIP2 || MWINCHIPC6 || MCRUSOE || MEFFICEON || MCYRIXIII || MK6 || MPENTIUMIII || MPENTIUMII || M686 || M586MMX || M586TSC || M586 || MVIAC3_2 || MGEODE_LX default "6" if MK7 || MK8 || MPENTIUMM config RWSEM_GENERIC_SPINLOCK @@ -290,12 +296,12 @@ config X86_INTEL_USERCOPY config X86_USE_PPRO_CHECKSUM bool - depends on MWINCHIP3D || MWINCHIP2 || MWINCHIPC6 || MCYRIXIII || MK7 || MK6 || MPENTIUM4 || MPENTIUMM || MPENTIUMIII || MPENTIUMII || M686 || MK8 || MVIAC3_2 || MEFFICEON + depends on MWINCHIP3D || MWINCHIP2 || MWINCHIPC6 || MCYRIXIII || MK7 || MK6 || MPENTIUM4 || MPENTIUMM || MPENTIUMIII || MPENTIUMII || M686 || MK8 || MVIAC3_2 || MEFFICEON || MGEODE_LX default y config X86_USE_3DNOW bool - depends on MCYRIXIII || MK7 + depends on MCYRIXIII || MK7 || MGEODE_LX default y config X86_OOSTORE diff --git a/arch/i386/kernel/cpu/amd.c b/arch/i386/kernel/cpu/amd.c index e344ef88cfcd..e7697e077f6b 100644 --- a/arch/i386/kernel/cpu/amd.c +++ b/arch/i386/kernel/cpu/amd.c @@ -161,8 +161,13 @@ static void __init init_amd(struct cpuinfo_x86 *c) set_bit(X86_FEATURE_K6_MTRR, c->x86_capability); break; } - break; + if (c->x86_model == 10) { + /* AMD Geode LX is model 10 */ + /* placeholder for any needed mods */ + break; + } + break; case 6: /* An Athlon/Duron */ /* Bit 15 of Athlon specific MSR 15, needs to be 0 diff --git a/arch/i386/kernel/cpu/cyrix.c b/arch/i386/kernel/cpu/cyrix.c index ff87cc22b323..75015975d038 100644 --- a/arch/i386/kernel/cpu/cyrix.c +++ b/arch/i386/kernel/cpu/cyrix.c @@ -342,6 +342,31 @@ static void __init init_cyrix(struct cpuinfo_x86 *c) return; } +/* + * Handle National Semiconductor branded processors + */ +static void __devinit init_nsc(struct cpuinfo_x86 *c) +{ + /* There may be GX1 processors in the wild that are branded + * NSC and not Cyrix. + * + * This function only handles the GX processor, and kicks every + * thing else to the Cyrix init function above - that should + * cover any processors that might have been branded differently + * after NSC aquired Cyrix. + * + * If this breaks your GX1 horribly, please e-mail + * info-linux@ldcmail.amd.com to tell us. + */ + + /* Handle the GX (Formally known as the GX2) */ + + if (c->x86 == 5 && c->x86_model == 5) + display_cacheinfo(c); + else + init_cyrix(c); +} + /* * Cyrix CPUs without cpuid or with cpuid not yet enabled can be detected * by the fact that they preserve the flags across the division of 5/2. @@ -422,7 +447,7 @@ int __init cyrix_init_cpu(void) static struct cpu_dev nsc_cpu_dev __initdata = { .c_vendor = "NSC", .c_ident = { "Geode by NSC" }, - .c_init = init_cyrix, + .c_init = init_nsc, .c_identify = generic_identify, }; diff --git a/include/asm-i386/module.h b/include/asm-i386/module.h index eb7f2b4234aa..424661d25bd3 100644 --- a/include/asm-i386/module.h +++ b/include/asm-i386/module.h @@ -52,8 +52,10 @@ struct mod_arch_specific #define MODULE_PROC_FAMILY "CYRIXIII " #elif defined CONFIG_MVIAC3_2 #define MODULE_PROC_FAMILY "VIAC3-2 " -#elif CONFIG_MGEODEGX1 +#elif defined CONFIG_MGEODEGX1 #define MODULE_PROC_FAMILY "GEODEGX1 " +#elif defined CONFIG_MGEODE_LX +#define MODULE_PROC_FAMILY "GEODE " #else #error unknown processor family #endif diff --git a/include/linux/pci_ids.h b/include/linux/pci_ids.h index 4f01710485cd..24db7248301a 100644 --- a/include/linux/pci_ids.h +++ b/include/linux/pci_ids.h @@ -394,6 +394,13 @@ #define PCI_DEVICE_ID_NS_87410 0xd001 #define PCI_DEVICE_ID_NS_CS5535_IDE 0x002d +#define PCI_DEVICE_ID_NS_CS5535_HOST_BRIDGE 0x0028 +#define PCI_DEVICE_ID_NS_CS5535_ISA_BRIDGE 0x002b +#define PCI_DEVICE_ID_NS_CS5535_IDE 0x002d +#define PCI_DEVICE_ID_NS_CS5535_AUDIO 0x002e +#define PCI_DEVICE_ID_NS_CS5535_USB 0x002f +#define PCI_DEVICE_ID_NS_CS5535_VIDEO 0x0030 + #define PCI_VENDOR_ID_TSENG 0x100c #define PCI_DEVICE_ID_TSENG_W32P_2 0x3202 #define PCI_DEVICE_ID_TSENG_W32P_b 0x3205 @@ -496,6 +503,9 @@ #define PCI_DEVICE_ID_AMD_CS5536_IDE 0x209A +#define PCI_DEVICE_ID_AMD_LX_VIDEO 0x2081 +#define PCI_DEVICE_ID_AMD_LX_AES 0x2082 + #define PCI_VENDOR_ID_TRIDENT 0x1023 #define PCI_DEVICE_ID_TRIDENT_4DWAVE_DX 0x2000 #define PCI_DEVICE_ID_TRIDENT_4DWAVE_NX 0x2001 -- cgit v1.2.3-71-gd317 From eee45269b0f5979c70bc151c6c2f4e5f4f5ababe Mon Sep 17 00:00:00 2001 From: Ivan Kokshaysky Date: Fri, 6 Jan 2006 00:12:21 -0800 Subject: [PATCH] Alpha: convert to generic irq framework (generic part) Thanks to Christoph for doing most of the work. This allows automatic SMP IRQ affinity assignment other than default "all interrupts on all CPUs" which is rather expensive. This might be useful if the hardware can be programmed to distribute interrupts among different CPUs, like Alpha does. Signed-off-by: Ivan Kokshaysky Cc: Christoph Hellwig Cc: Richard Henderson Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/irq.h | 11 +++++++++++ kernel/irq/manage.c | 2 ++ kernel/irq/proc.c | 4 +++- 3 files changed, 16 insertions(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/irq.h b/include/linux/irq.h index f04ba20712a2..60f8bc78a35a 100644 --- a/include/linux/irq.h +++ b/include/linux/irq.h @@ -221,6 +221,17 @@ extern void note_interrupt(unsigned int irq, irq_desc_t *desc, extern int can_request_irq(unsigned int irq, unsigned long irqflags); extern void init_irq_proc(void); + +#ifdef CONFIG_AUTO_IRQ_AFFINITY +extern int select_smp_affinity(unsigned int irq); +#else +static inline int +select_smp_affinity(unsigned int irq) +{ + return 1; +} +#endif + #endif extern hw_irq_controller no_irq_type; /* needed in every arch ? */ diff --git a/kernel/irq/manage.c b/kernel/irq/manage.c index 81c49a4d679e..97d5559997d2 100644 --- a/kernel/irq/manage.c +++ b/kernel/irq/manage.c @@ -366,6 +366,8 @@ int request_irq(unsigned int irq, action->next = NULL; action->dev_id = dev_id; + select_smp_affinity(irq); + retval = setup_irq(irq, action); if (retval) kfree(action); diff --git a/kernel/irq/proc.c b/kernel/irq/proc.c index f26e534c6585..8a64a4844cde 100644 --- a/kernel/irq/proc.c +++ b/kernel/irq/proc.c @@ -68,7 +68,9 @@ static int irq_affinity_write_proc(struct file *file, const char __user *buffer, */ cpus_and(tmp, new_value, cpu_online_map); if (cpus_empty(tmp)) - return -EINVAL; + /* Special case for empty set - allow the architecture + code to set default SMP affinity. */ + return select_smp_affinity(irq) ? -EINVAL : full_count; proc_set_irq_affinity(irq, new_value); -- cgit v1.2.3-71-gd317 From 7088a5c00103ef48782d6c359cd12b13a10666e6 Mon Sep 17 00:00:00 2001 From: "Rafael J. Wysocki" Date: Fri, 6 Jan 2006 00:13:05 -0800 Subject: [PATCH] swsusp: introduce the swap map structure MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This patch introduces the swap map structure that can be used by swsusp for keeping tracks of data pages written to the swap.  The structure itself is described in a comment within the patch. The overall idea is to reduce the amount of metadata written to the swap and to write and read the image pages sequentially, in a file-alike way. This makes the swap-handling part of swsusp fairly independent of its snapshot-handling part and will hopefully allow us to completely separate these two parts in the future. This patch is needed to remove the suspend image size limit imposed by the limited size of the swsusp_info structure, which is essential for x86-64 systems with more than 512 MB of RAM. Signed-off-by: Rafael J. Wysocki Acked-by: Pavel Machek Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/suspend.h | 6 +- kernel/power/disk.c | 8 +- kernel/power/power.h | 13 +- kernel/power/snapshot.c | 14 +- kernel/power/swsusp.c | 558 ++++++++++++++++++++++++++++++++++-------------- 5 files changed, 418 insertions(+), 181 deletions(-) (limited to 'include/linux') diff --git a/include/linux/suspend.h b/include/linux/suspend.h index a61c04f804b2..33bbaea23aaf 100644 --- a/include/linux/suspend.h +++ b/include/linux/suspend.h @@ -14,11 +14,7 @@ typedef struct pbe { unsigned long address; /* address of the copy */ unsigned long orig_address; /* original address of page */ - swp_entry_t swap_address; - - struct pbe *next; /* also used as scratch space at - * end of page (see link, diskpage) - */ + struct pbe *next; } suspend_pagedir_t; #define for_each_pbe(pbe, pblist) \ diff --git a/kernel/power/disk.c b/kernel/power/disk.c index 4d944b281b28..76a5131b0e80 100644 --- a/kernel/power/disk.c +++ b/kernel/power/disk.c @@ -25,9 +25,9 @@ extern suspend_disk_method_t pm_disk_mode; extern int swsusp_suspend(void); -extern int swsusp_write(void); +extern int swsusp_write(struct pbe *pblist, unsigned int nr_pages); extern int swsusp_check(void); -extern int swsusp_read(void); +extern int swsusp_read(struct pbe **pblist_ptr); extern void swsusp_close(void); extern int swsusp_resume(void); @@ -176,7 +176,7 @@ int pm_suspend_disk(void) if (in_suspend) { device_resume(); pr_debug("PM: writing image.\n"); - error = swsusp_write(); + error = swsusp_write(pagedir_nosave, nr_copy_pages); if (!error) power_down(pm_disk_mode); else { @@ -247,7 +247,7 @@ static int software_resume(void) pr_debug("PM: Reading swsusp image.\n"); - if ((error = swsusp_read())) { + if ((error = swsusp_read(&pagedir_nosave))) { swsusp_free(); goto Thaw; } diff --git a/kernel/power/power.h b/kernel/power/power.h index 6c042b5ee14b..977877c6dcfc 100644 --- a/kernel/power/power.h +++ b/kernel/power/power.h @@ -9,19 +9,14 @@ #define SUSPEND_CONSOLE (MAX_NR_CONSOLES-1) #endif -#define MAX_PBES ((PAGE_SIZE - sizeof(struct new_utsname) \ - - 4 - 3*sizeof(unsigned long) - sizeof(int) \ - - sizeof(void *)) / sizeof(swp_entry_t)) - struct swsusp_info { struct new_utsname uts; u32 version_code; unsigned long num_physpages; int cpus; unsigned long image_pages; - unsigned long pagedir_pages; - suspend_pagedir_t * suspend_pagedir; - swp_entry_t pagedir[MAX_PBES]; + unsigned long pages; + swp_entry_t start; } __attribute__((aligned(PAGE_SIZE))); @@ -67,6 +62,8 @@ extern asmlinkage int swsusp_arch_resume(void); extern void free_pagedir(struct pbe *pblist); extern struct pbe *alloc_pagedir(unsigned nr_pages, gfp_t gfp_mask, int safe_needed); -extern void create_pbe_list(struct pbe *pblist, unsigned nr_pages); extern void swsusp_free(void); extern int alloc_data_pages(struct pbe *pblist, gfp_t gfp_mask, int safe_needed); +extern unsigned int snapshot_nr_pages(void); +extern struct pbe *snapshot_pblist(void); +extern void snapshot_pblist_set(struct pbe *pblist); diff --git a/kernel/power/snapshot.c b/kernel/power/snapshot.c index 4a6dbcefd378..152d56cdf017 100644 --- a/kernel/power/snapshot.c +++ b/kernel/power/snapshot.c @@ -33,6 +33,9 @@ #include "power.h" +struct pbe *pagedir_nosave; +unsigned int nr_copy_pages; + #ifdef CONFIG_HIGHMEM struct highmem_page { char *data; @@ -244,7 +247,7 @@ static inline void fill_pb_page(struct pbe *pbpage) * of memory pages allocated with alloc_pagedir() */ -void create_pbe_list(struct pbe *pblist, unsigned int nr_pages) +static inline void create_pbe_list(struct pbe *pblist, unsigned int nr_pages) { struct pbe *pbpage, *p; unsigned int num = PBES_PER_PAGE; @@ -261,7 +264,6 @@ void create_pbe_list(struct pbe *pblist, unsigned int nr_pages) p->next = p + 1; p->next = NULL; } - pr_debug("create_pbe_list(): initialized %d PBEs\n", num); } /** @@ -332,7 +334,8 @@ struct pbe *alloc_pagedir(unsigned int nr_pages, gfp_t gfp_mask, int safe_needed if (!pbe) { /* get_zeroed_page() failed */ free_pagedir(pblist); pblist = NULL; - } + } else + create_pbe_list(pblist, nr_pages); return pblist; } @@ -395,7 +398,6 @@ static struct pbe *swsusp_alloc(unsigned int nr_pages) printk(KERN_ERR "suspend: Allocating pagedir failed.\n"); return NULL; } - create_pbe_list(pblist, nr_pages); if (alloc_data_pages(pblist, GFP_ATOMIC | __GFP_COLD, 0)) { printk(KERN_ERR "suspend: Allocating image pages failed.\n"); @@ -421,10 +423,6 @@ asmlinkage int swsusp_save(void) (nr_pages + PBES_PER_PAGE - 1) / PBES_PER_PAGE, PAGES_FOR_IO, nr_free_pages()); - /* This is needed because of the fixed size of swsusp_info */ - if (MAX_PBES < (nr_pages + PBES_PER_PAGE - 1) / PBES_PER_PAGE) - return -ENOSPC; - if (!enough_free_mem(nr_pages)) { printk(KERN_ERR "swsusp: Not enough free memory\n"); return -ENOMEM; diff --git a/kernel/power/swsusp.c b/kernel/power/swsusp.c index bd3097c583bf..b09bd7c0998d 100644 --- a/kernel/power/swsusp.c +++ b/kernel/power/swsusp.c @@ -30,6 +30,9 @@ * Alex Badea : * Fixed runaway init * + * Rafael J. Wysocki + * Added the swap map data structure and reworked the handling of swap + * * More state savers are welcome. Especially for the scsi layer... * * For TODOs,FIXMEs also look in Documentation/power/swsusp.txt @@ -76,18 +79,6 @@ static int restore_highmem(void) { return 0; } extern char resume_file[]; -/* Local variables that should not be affected by save */ -unsigned int nr_copy_pages __nosavedata = 0; - -/* Suspend pagedir is allocated before final copy, therefore it - must be freed after resume - - Warning: this is even more evil than it seems. Pagedirs this file - talks about are completely different from page directories used by - MMU hardware. - */ -suspend_pagedir_t *pagedir_nosave __nosavedata = NULL; - #define SWSUSP_SIG "S1SUSPEND" static struct swsusp_header { @@ -238,48 +229,205 @@ static int write_page(unsigned long addr, swp_entry_t *loc) } /** - * data_free - Free the swap entries used by the saved image. + * Swap map-handling functions + * + * The swap map is a data structure used for keeping track of each page + * written to the swap. It consists of many swap_map_page structures + * that contain each an array of MAP_PAGE_SIZE swap entries. + * These structures are linked together with the help of either the + * .next (in memory) or the .next_swap (in swap) member. * - * Walk the list of used swap entries and free each one. - * This is only used for cleanup when suspend fails. + * The swap map is created during suspend. At that time we need to keep + * it in memory, because we have to free all of the allocated swap + * entries if an error occurs. The memory needed is preallocated + * so that we know in advance if there's enough of it. + * + * The first swap_map_page structure is filled with the swap entries that + * correspond to the first MAP_PAGE_SIZE data pages written to swap and + * so on. After the all of the data pages have been written, the order + * of the swap_map_page structures in the map is reversed so that they + * can be read from swap in the original order. This causes the data + * pages to be loaded in exactly the same order in which they have been + * saved. + * + * During resume we only need to use one swap_map_page structure + * at a time, which means that we only need to use two memory pages for + * reading the image - one for reading the swap_map_page structures + * and the second for reading the data pages from swap. */ -static void data_free(void) + +#define MAP_PAGE_SIZE ((PAGE_SIZE - sizeof(swp_entry_t) - sizeof(void *)) \ + / sizeof(swp_entry_t)) + +struct swap_map_page { + swp_entry_t entries[MAP_PAGE_SIZE]; + swp_entry_t next_swap; + struct swap_map_page *next; +}; + +static inline void free_swap_map(struct swap_map_page *swap_map) { - swp_entry_t entry; - struct pbe *p; + struct swap_map_page *swp; - for_each_pbe (p, pagedir_nosave) { - entry = p->swap_address; - if (entry.val) - swap_free(entry); - else - break; + while (swap_map) { + swp = swap_map->next; + free_page((unsigned long)swap_map); + swap_map = swp; + } +} + +static struct swap_map_page *alloc_swap_map(unsigned int nr_pages) +{ + struct swap_map_page *swap_map, *swp; + unsigned n = 0; + + if (!nr_pages) + return NULL; + + pr_debug("alloc_swap_map(): nr_pages = %d\n", nr_pages); + swap_map = (struct swap_map_page *)get_zeroed_page(GFP_ATOMIC); + swp = swap_map; + for (n = MAP_PAGE_SIZE; n < nr_pages; n += MAP_PAGE_SIZE) { + swp->next = (struct swap_map_page *)get_zeroed_page(GFP_ATOMIC); + swp = swp->next; + if (!swp) { + free_swap_map(swap_map); + return NULL; + } } + return swap_map; } /** - * data_write - Write saved image to swap. - * - * Walk the list of pages in the image and sync each one to swap. + * reverse_swap_map - reverse the order of pages in the swap map + * @swap_map */ -static int data_write(void) + +static inline struct swap_map_page *reverse_swap_map(struct swap_map_page *swap_map) { - int error = 0, i = 0; - unsigned int mod = nr_copy_pages / 100; - struct pbe *p; + struct swap_map_page *prev, *next; + + prev = NULL; + while (swap_map) { + next = swap_map->next; + swap_map->next = prev; + prev = swap_map; + swap_map = next; + } + return prev; +} - if (!mod) - mod = 1; +/** + * free_swap_map_entries - free the swap entries allocated to store + * the swap map @swap_map (this is only called in case of an error) + */ +static inline void free_swap_map_entries(struct swap_map_page *swap_map) +{ + while (swap_map) { + if (swap_map->next_swap.val) + swap_free(swap_map->next_swap); + swap_map = swap_map->next; + } +} - printk( "Writing data to swap (%d pages)... ", nr_copy_pages ); - for_each_pbe (p, pagedir_nosave) { - if (!(i%mod)) - printk( "\b\b\b\b%3d%%", i / mod ); - if ((error = write_page(p->address, &p->swap_address))) +/** + * save_swap_map - save the swap map used for tracing the data pages + * stored in the swap + */ + +static int save_swap_map(struct swap_map_page *swap_map, swp_entry_t *start) +{ + swp_entry_t entry = (swp_entry_t){0}; + int error; + + while (swap_map) { + swap_map->next_swap = entry; + if ((error = write_page((unsigned long)swap_map, &entry))) return error; - i++; + swap_map = swap_map->next; } - printk("\b\b\b\bdone\n"); + *start = entry; + return 0; +} + +/** + * free_image_entries - free the swap entries allocated to store + * the image data pages (this is only called in case of an error) + */ + +static inline void free_image_entries(struct swap_map_page *swp) +{ + unsigned k; + + while (swp) { + for (k = 0; k < MAP_PAGE_SIZE; k++) + if (swp->entries[k].val) + swap_free(swp->entries[k]); + swp = swp->next; + } +} + +/** + * The swap_map_handle structure is used for handling the swap map in + * a file-alike way + */ + +struct swap_map_handle { + struct swap_map_page *cur; + unsigned int k; +}; + +static inline void init_swap_map_handle(struct swap_map_handle *handle, + struct swap_map_page *map) +{ + handle->cur = map; + handle->k = 0; +} + +static inline int swap_map_write_page(struct swap_map_handle *handle, + unsigned long addr) +{ + int error; + + error = write_page(addr, handle->cur->entries + handle->k); + if (error) + return error; + if (++handle->k >= MAP_PAGE_SIZE) { + handle->cur = handle->cur->next; + handle->k = 0; + } + return 0; +} + +/** + * save_image_data - save the data pages pointed to by the PBEs + * from the list @pblist using the swap map handle @handle + * (assume there are @nr_pages data pages to save) + */ + +static int save_image_data(struct pbe *pblist, + struct swap_map_handle *handle, + unsigned int nr_pages) +{ + unsigned int m; + struct pbe *p; + int error = 0; + + printk("Saving image data pages (%u pages) ... ", nr_pages); + m = nr_pages / 100; + if (!m) + m = 1; + nr_pages = 0; + for_each_pbe (p, pblist) { + error = swap_map_write_page(handle, p->address); + if (error) + break; + if (!(nr_pages % m)) + printk("\b\b\b\b%3d%%", nr_pages / m); + nr_pages++; + } + if (!error) + printk("\b\b\b\bdone\n"); return error; } @@ -295,19 +443,20 @@ static void dump_info(void) pr_debug(" swsusp: UTS Domain: %s\n",swsusp_info.uts.domainname); pr_debug(" swsusp: CPUs: %d\n",swsusp_info.cpus); pr_debug(" swsusp: Image: %ld Pages\n",swsusp_info.image_pages); - pr_debug(" swsusp: Pagedir: %ld Pages\n",swsusp_info.pagedir_pages); + pr_debug(" swsusp: Total: %ld Pages\n", swsusp_info.pages); } -static void init_header(void) +static void init_header(unsigned int nr_pages) { memset(&swsusp_info, 0, sizeof(swsusp_info)); swsusp_info.version_code = LINUX_VERSION_CODE; swsusp_info.num_physpages = num_physpages; memcpy(&swsusp_info.uts, &system_utsname, sizeof(system_utsname)); - swsusp_info.suspend_pagedir = pagedir_nosave; swsusp_info.cpus = num_online_cpus(); - swsusp_info.image_pages = nr_copy_pages; + swsusp_info.image_pages = nr_pages; + swsusp_info.pages = nr_pages + + ((nr_pages * sizeof(long) + PAGE_SIZE - 1) >> PAGE_SHIFT); } static int close_swap(void) @@ -326,39 +475,53 @@ static int close_swap(void) } /** - * free_pagedir_entries - Free pages used by the page directory. - * - * This is used during suspend for error recovery. + * pack_orig_addresses - the .orig_address fields of the PBEs from the + * list starting at @pbe are stored in the array @buf[] (1 page) */ -static void free_pagedir_entries(void) +static inline struct pbe *pack_orig_addresses(unsigned long *buf, + struct pbe *pbe) { - int i; + int j; - for (i = 0; i < swsusp_info.pagedir_pages; i++) - swap_free(swsusp_info.pagedir[i]); + for (j = 0; j < PAGE_SIZE / sizeof(long) && pbe; j++) { + buf[j] = pbe->orig_address; + pbe = pbe->next; + } + if (!pbe) + for (; j < PAGE_SIZE / sizeof(long); j++) + buf[j] = 0; + return pbe; } - /** - * write_pagedir - Write the array of pages holding the page directory. - * @last: Last swap entry we write (needed for header). + * save_image_metadata - save the .orig_address fields of the PBEs + * from the list @pblist using the swap map handle @handle */ -static int write_pagedir(void) +static int save_image_metadata(struct pbe *pblist, + struct swap_map_handle *handle) { - int error = 0; + unsigned long *buf; unsigned int n = 0; - struct pbe *pbe; + struct pbe *p; + int error = 0; - printk( "Writing pagedir..."); - for_each_pb_page (pbe, pagedir_nosave) { - if ((error = write_page((unsigned long)pbe, &swsusp_info.pagedir[n++]))) - return error; + printk("Saving image metadata ... "); + buf = (unsigned long *)get_zeroed_page(GFP_ATOMIC); + if (!buf) + return -ENOMEM; + p = pblist; + while (p) { + p = pack_orig_addresses(buf, p); + error = swap_map_write_page(handle, (unsigned long)buf); + if (error) + break; + n++; } - - swsusp_info.pagedir_pages = n; - printk("done (%u pages)\n", n); + free_page((unsigned long)buf); + if (!error) + printk("done (%u pages saved)\n", n); return error; } @@ -384,33 +547,48 @@ static int enough_swap(unsigned int nr_pages) /** * write_suspend_image - Write entire image and metadata. - * */ -static int write_suspend_image(void) +static int write_suspend_image(struct pbe *pblist, unsigned int nr_pages) { + struct swap_map_page *swap_map; + struct swap_map_handle handle; int error; - if (!enough_swap(nr_copy_pages)) { + if (!enough_swap(nr_pages)) { printk(KERN_ERR "swsusp: Not enough free swap\n"); return -ENOSPC; } - init_header(); - if ((error = data_write())) - goto FreeData; + init_header(nr_pages); + swap_map = alloc_swap_map(swsusp_info.pages); + if (!swap_map) + return -ENOMEM; + init_swap_map_handle(&handle, swap_map); - if ((error = write_pagedir())) - goto FreePagedir; + error = save_image_metadata(pblist, &handle); + if (!error) + error = save_image_data(pblist, &handle, nr_pages); + if (error) + goto Free_image_entries; - if ((error = close_swap())) - goto FreePagedir; - Done: + swap_map = reverse_swap_map(swap_map); + error = save_swap_map(swap_map, &swsusp_info.start); + if (error) + goto Free_map_entries; + + error = close_swap(); + if (error) + goto Free_map_entries; + +Free_swap_map: + free_swap_map(swap_map); return error; - FreePagedir: - free_pagedir_entries(); - FreeData: - data_free(); - goto Done; + +Free_map_entries: + free_swap_map_entries(swap_map); +Free_image_entries: + free_image_entries(swap_map); + goto Free_swap_map; } /* It is important _NOT_ to umount filesystems at this point. We want @@ -418,7 +596,7 @@ static int write_suspend_image(void) * filesystem clean: it is not. (And it does not matter, if we resume * correctly, we'll mark system clean, anyway.) */ -int swsusp_write(void) +int swsusp_write(struct pbe *pblist, unsigned int nr_pages) { int error; @@ -427,14 +605,12 @@ int swsusp_write(void) return error; } lock_swapdevices(); - error = write_suspend_image(); + error = write_suspend_image(pblist, nr_pages); /* This will unlock ignored swap devices since writing is finished */ lock_swapdevices(); return error; } - - int swsusp_suspend(void) { int error; @@ -531,7 +707,6 @@ static void copy_page_backup_list(struct pbe *dst, struct pbe *src) /* We assume both lists contain the same number of elements */ while (src) { dst->orig_address = src->orig_address; - dst->swap_address = src->swap_address; dst = dst->next; src = src->next; } @@ -611,6 +786,61 @@ static int bio_write_page(pgoff_t page_off, void *page) return submit(WRITE, page_off, page); } +/** + * The following functions allow us to read data using a swap map + * in a file-alike way + */ + +static inline void release_swap_map_reader(struct swap_map_handle *handle) +{ + if (handle->cur) + free_page((unsigned long)handle->cur); + handle->cur = NULL; +} + +static inline int get_swap_map_reader(struct swap_map_handle *handle, + swp_entry_t start) +{ + int error; + + if (!swp_offset(start)) + return -EINVAL; + handle->cur = (struct swap_map_page *)get_zeroed_page(GFP_ATOMIC); + if (!handle->cur) + return -ENOMEM; + error = bio_read_page(swp_offset(start), handle->cur); + if (error) { + release_swap_map_reader(handle); + return error; + } + handle->k = 0; + return 0; +} + +static inline int swap_map_read_page(struct swap_map_handle *handle, void *buf) +{ + unsigned long offset; + int error; + + if (!handle->cur) + return -EINVAL; + offset = swp_offset(handle->cur->entries[handle->k]); + if (!offset) + return -EINVAL; + error = bio_read_page(offset, buf); + if (error) + return error; + if (++handle->k >= MAP_PAGE_SIZE) { + handle->k = 0; + offset = swp_offset(handle->cur->next_swap); + if (!offset) + release_swap_map_reader(handle); + else + error = bio_read_page(offset, handle->cur); + } + return error; +} + /* * Sanity check if this image makes sense with this kernel/swap context * I really don't think that it's foolproof but more than nothing.. @@ -639,7 +869,6 @@ static const char *sanity_check(void) return NULL; } - static int check_header(void) { const char *reason = NULL; @@ -653,7 +882,6 @@ static int check_header(void) printk(KERN_ERR "swsusp: Resume mismatch: %s\n",reason); return -EPERM; } - nr_copy_pages = swsusp_info.image_pages; return error; } @@ -680,75 +908,88 @@ static int check_sig(void) } /** - * data_read - Read image pages from swap. - * - * You do not need to check for overlaps, check_pagedir() - * already did that. + * load_image_data - load the image data using the swap map handle + * @handle and store them using the page backup list @pblist + * (assume there are @nr_pages pages to load) */ -static int data_read(struct pbe *pblist) +static int load_image_data(struct pbe *pblist, + struct swap_map_handle *handle, + unsigned int nr_pages) { + int error; + unsigned int m; struct pbe *p; - int error = 0; - int i = 0; - int mod = swsusp_info.image_pages / 100; - - if (!mod) - mod = 1; - - printk("swsusp: Reading image data (%lu pages): ", - swsusp_info.image_pages); - - for_each_pbe (p, pblist) { - if (!(i % mod)) - printk("\b\b\b\b%3d%%", i / mod); - if ((error = bio_read_page(swp_offset(p->swap_address), - (void *)p->address))) - return error; - - i++; + if (!pblist) + return -EINVAL; + printk("Loading image data pages (%u pages) ... ", nr_pages); + m = nr_pages / 100; + if (!m) + m = 1; + nr_pages = 0; + p = pblist; + while (p) { + error = swap_map_read_page(handle, (void *)p->address); + if (error) + break; + p = p->next; + if (!(nr_pages % m)) + printk("\b\b\b\b%3d%%", nr_pages / m); + nr_pages++; } - printk("\b\b\b\bdone\n"); + if (!error) + printk("\b\b\b\bdone\n"); return error; } /** - * read_pagedir - Read page backup list pages from swap + * unpack_orig_addresses - copy the elements of @buf[] (1 page) to + * the PBEs in the list starting at @pbe */ -static int read_pagedir(struct pbe *pblist) +static inline struct pbe *unpack_orig_addresses(unsigned long *buf, + struct pbe *pbe) { - struct pbe *pbpage, *p; - unsigned int i = 0; - int error; + int j; - if (!pblist) - return -EFAULT; + for (j = 0; j < PAGE_SIZE / sizeof(long) && pbe; j++) { + pbe->orig_address = buf[j]; + pbe = pbe->next; + } + return pbe; +} - printk("swsusp: Reading pagedir (%lu pages)\n", - swsusp_info.pagedir_pages); +/** + * load_image_metadata - load the image metadata using the swap map + * handle @handle and put them into the PBEs in the list @pblist + */ - for_each_pb_page (pbpage, pblist) { - unsigned long offset = swp_offset(swsusp_info.pagedir[i++]); +static int load_image_metadata(struct pbe *pblist, struct swap_map_handle *handle) +{ + struct pbe *p; + unsigned long *buf; + unsigned int n = 0; + int error = 0; - error = -EFAULT; - if (offset) { - p = (pbpage + PB_PAGE_SKIP)->next; - error = bio_read_page(offset, (void *)pbpage); - (pbpage + PB_PAGE_SKIP)->next = p; - } + printk("Loading image metadata ... "); + buf = (unsigned long *)get_zeroed_page(GFP_ATOMIC); + if (!buf) + return -ENOMEM; + p = pblist; + while (p) { + error = swap_map_read_page(handle, buf); if (error) break; + p = unpack_orig_addresses(buf, p); + n++; } - + free_page((unsigned long)buf); if (!error) - BUG_ON(i != swsusp_info.pagedir_pages); - + printk("done (%u pages loaded)\n", n); return error; } - static int check_suspend_image(void) { int error = 0; @@ -762,34 +1003,39 @@ static int check_suspend_image(void) return 0; } -static int read_suspend_image(void) +static int read_suspend_image(struct pbe **pblist_ptr) { int error = 0; - struct pbe *p; + struct pbe *p, *pblist; + struct swap_map_handle handle; + unsigned int nr_pages = swsusp_info.image_pages; - if (!(p = alloc_pagedir(nr_copy_pages, GFP_ATOMIC, 0))) + p = alloc_pagedir(nr_pages, GFP_ATOMIC, 0); + if (!p) return -ENOMEM; - - if ((error = read_pagedir(p))) + error = get_swap_map_reader(&handle, swsusp_info.start); + if (error) + /* The PBE list at p will be released by swsusp_free() */ return error; - create_pbe_list(p, nr_copy_pages); - mark_unsafe_pages(p); - pagedir_nosave = alloc_pagedir(nr_copy_pages, GFP_ATOMIC, 1); - if (pagedir_nosave) { - create_pbe_list(pagedir_nosave, nr_copy_pages); - copy_page_backup_list(pagedir_nosave, p); + error = load_image_metadata(p, &handle); + if (!error) { + mark_unsafe_pages(p); + pblist = alloc_pagedir(nr_pages, GFP_ATOMIC, 1); + if (pblist) + copy_page_backup_list(pblist, p); + free_pagedir(p); + if (!pblist) + error = -ENOMEM; + + /* Allocate memory for the image and read the data from swap */ + if (!error) + error = alloc_data_pages(pblist, GFP_ATOMIC, 1); + if (!error) + error = load_image_data(pblist, &handle, nr_pages); + if (!error) + *pblist_ptr = pblist; } - free_pagedir(p); - if (!pagedir_nosave) - return -ENOMEM; - - /* Allocate memory for the image and read the data from swap */ - - error = alloc_data_pages(pagedir_nosave, GFP_ATOMIC, 1); - - if (!error) - error = data_read(pagedir_nosave); - + release_swap_map_reader(&handle); return error; } @@ -821,7 +1067,7 @@ int swsusp_check(void) * swsusp_read - Read saved image from swap. */ -int swsusp_read(void) +int swsusp_read(struct pbe **pblist_ptr) { int error; @@ -830,7 +1076,7 @@ int swsusp_read(void) return PTR_ERR(resume_bdev); } - error = read_suspend_image(); + error = read_suspend_image(pblist_ptr); blkdev_put(resume_bdev); if (!error) -- cgit v1.2.3-71-gd317 From 72a97e08394a3b2e75481ff680ec2a0591e3cba4 Mon Sep 17 00:00:00 2001 From: "Rafael J. Wysocki" Date: Fri, 6 Jan 2006 00:13:46 -0800 Subject: [PATCH] swsusp: improve freeing of memory MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This patch makes swsusp free only as much memory as needed to complete the suspend and not as much as possible.  In the most of cases this should speed up the suspend and make the system much more responsive after resume, especially if a GUI (eg. X Windows) is used. If needed, the old behavior (ie to free as much memory as possible during suspend) can be restored by unsetting FAST_FREE in power.h Signed-off-by: Rafael J. Wysocki Acked-by: Pavel Machek Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/suspend.h | 2 +- kernel/power/disk.c | 30 +++-------------------- kernel/power/power.h | 14 ++++++++--- kernel/power/snapshot.c | 65 +++++++++++++++++++++++++++++++++++++++++++++---- kernel/power/swsusp.c | 52 ++++++++++++++++++++++++++++++++++++++- 5 files changed, 126 insertions(+), 37 deletions(-) (limited to 'include/linux') diff --git a/include/linux/suspend.h b/include/linux/suspend.h index 33bbaea23aaf..5dc94e777fab 100644 --- a/include/linux/suspend.h +++ b/include/linux/suspend.h @@ -73,6 +73,6 @@ unsigned long get_safe_page(gfp_t gfp_mask); * XXX: We try to keep some more pages free so that I/O operations succeed * without paging. Might this be more? */ -#define PAGES_FOR_IO 512 +#define PAGES_FOR_IO 1024 #endif /* _LINUX_SWSUSP_H */ diff --git a/kernel/power/disk.c b/kernel/power/disk.c index 76a5131b0e80..9e51cdf7b78d 100644 --- a/kernel/power/disk.c +++ b/kernel/power/disk.c @@ -24,6 +24,7 @@ extern suspend_disk_method_t pm_disk_mode; +extern int swsusp_shrink_memory(void); extern int swsusp_suspend(void); extern int swsusp_write(struct pbe *pblist, unsigned int nr_pages); extern int swsusp_check(void); @@ -73,31 +74,6 @@ static void power_down(suspend_disk_method_t mode) static int in_suspend __nosavedata = 0; -/** - * free_some_memory - Try to free as much memory as possible - * - * ... but do not OOM-kill anyone - * - * Notice: all userland should be stopped at this point, or - * livelock is possible. - */ - -static void free_some_memory(void) -{ - unsigned int i = 0; - unsigned int tmp; - unsigned long pages = 0; - char *p = "-\\|/"; - - printk("Freeing memory... "); - while ((tmp = shrink_all_memory(10000))) { - pages += tmp; - printk("\b%c", p[i++ % 4]); - } - printk("\bdone (%li pages freed)\n", pages); -} - - static inline void platform_finish(void) { if (pm_disk_mode == PM_DISK_PLATFORM) { @@ -127,8 +103,8 @@ static int prepare_processes(void) } /* Free memory before shutting down devices. */ - free_some_memory(); - return 0; + if (!(error = swsusp_shrink_memory())) + return 0; thaw: thaw_processes(); enable_nonboot_cpus(); diff --git a/kernel/power/power.h b/kernel/power/power.h index 977877c6dcfc..acdc83b3d890 100644 --- a/kernel/power/power.h +++ b/kernel/power/power.h @@ -49,18 +49,26 @@ extern void thaw_processes(void); extern int pm_prepare_console(void); extern void pm_restore_console(void); - /* References to section boundaries */ extern const void __nosave_begin, __nosave_end; extern unsigned int nr_copy_pages; -extern suspend_pagedir_t *pagedir_nosave; -extern suspend_pagedir_t *pagedir_save; +extern struct pbe *pagedir_nosave; + +/* + * This compilation switch determines the way in which memory will be freed + * during suspend. If defined, only as much memory will be freed as needed + * to complete the suspend, which will make it go faster. Otherwise, the + * largest possible amount of memory will be freed. + */ +#define FAST_FREE 1 extern asmlinkage int swsusp_arch_suspend(void); extern asmlinkage int swsusp_arch_resume(void); +extern unsigned int count_data_pages(void); extern void free_pagedir(struct pbe *pblist); +extern void release_eaten_pages(void); extern struct pbe *alloc_pagedir(unsigned nr_pages, gfp_t gfp_mask, int safe_needed); extern void swsusp_free(void); extern int alloc_data_pages(struct pbe *pblist, gfp_t gfp_mask, int safe_needed); diff --git a/kernel/power/snapshot.c b/kernel/power/snapshot.c index 152d56cdf017..e80d282dbf58 100644 --- a/kernel/power/snapshot.c +++ b/kernel/power/snapshot.c @@ -37,6 +37,31 @@ struct pbe *pagedir_nosave; unsigned int nr_copy_pages; #ifdef CONFIG_HIGHMEM +unsigned int count_highmem_pages(void) +{ + struct zone *zone; + unsigned long zone_pfn; + unsigned int n = 0; + + for_each_zone (zone) + if (is_highmem(zone)) { + mark_free_pages(zone); + for (zone_pfn = 0; zone_pfn < zone->spanned_pages; zone_pfn++) { + struct page *page; + unsigned long pfn = zone_pfn + zone->zone_start_pfn; + if (!pfn_valid(pfn)) + continue; + page = pfn_to_page(pfn); + if (PageReserved(page)) + continue; + if (PageNosaveFree(page)) + continue; + n++; + } + } + return n; +} + struct highmem_page { char *data; struct page *page; @@ -152,17 +177,15 @@ static int saveable(struct zone *zone, unsigned long *zone_pfn) BUG_ON(PageReserved(page) && PageNosave(page)); if (PageNosave(page)) return 0; - if (PageReserved(page) && pfn_is_nosave(pfn)) { - pr_debug("[nosave pfn 0x%lx]", pfn); + if (PageReserved(page) && pfn_is_nosave(pfn)) return 0; - } if (PageNosaveFree(page)) return 0; return 1; } -static unsigned count_data_pages(void) +unsigned int count_data_pages(void) { struct zone *zone; unsigned long zone_pfn; @@ -266,6 +289,35 @@ static inline void create_pbe_list(struct pbe *pblist, unsigned int nr_pages) } } +/** + * On resume it is necessary to trace and eventually free the unsafe + * pages that have been allocated, because they are needed for I/O + * (on x86-64 we likely will "eat" these pages once again while + * creating the temporary page translation tables) + */ + +struct eaten_page { + struct eaten_page *next; + char padding[PAGE_SIZE - sizeof(void *)]; +}; + +static struct eaten_page *eaten_pages = NULL; + +void release_eaten_pages(void) +{ + struct eaten_page *p, *q; + + p = eaten_pages; + while (p) { + q = p->next; + /* We don't want swsusp_free() to free this page again */ + ClearPageNosave(virt_to_page(p)); + free_page((unsigned long)p); + p = q; + } + eaten_pages = NULL; +} + /** * @safe_needed - on resume, for storing the PBE list and the image, * we can only use memory pages that do not conflict with the pages @@ -284,9 +336,12 @@ static inline void *alloc_image_page(gfp_t gfp_mask, int safe_needed) if (safe_needed) do { res = (void *)get_zeroed_page(gfp_mask); - if (res && PageNosaveFree(virt_to_page(res))) + if (res && PageNosaveFree(virt_to_page(res))) { /* This is for swsusp_free() */ SetPageNosave(virt_to_page(res)); + ((struct eaten_page *)res)->next = eaten_pages; + eaten_pages = res; + } } while (res && PageNosaveFree(virt_to_page(res))); else res = (void *)get_zeroed_page(gfp_mask); diff --git a/kernel/power/swsusp.c b/kernel/power/swsusp.c index b09bd7c0998d..f77f9397a364 100644 --- a/kernel/power/swsusp.c +++ b/kernel/power/swsusp.c @@ -70,11 +70,13 @@ #include "power.h" #ifdef CONFIG_HIGHMEM +unsigned int count_highmem_pages(void); int save_highmem(void); int restore_highmem(void); #else static int save_highmem(void) { return 0; } static int restore_highmem(void) { return 0; } +static unsigned int count_highmem_pages(void) { return 0; } #endif extern char resume_file[]; @@ -611,6 +613,52 @@ int swsusp_write(struct pbe *pblist, unsigned int nr_pages) return error; } +/** + * swsusp_shrink_memory - Try to free as much memory as needed + * + * ... but do not OOM-kill anyone + * + * Notice: all userland should be stopped before it is called, or + * livelock is possible. + */ + +#define SHRINK_BITE 10000 + +int swsusp_shrink_memory(void) +{ + long tmp; + struct zone *zone; + unsigned long pages = 0; + unsigned int i = 0; + char *p = "-\\|/"; + + printk("Shrinking memory... "); + do { +#ifdef FAST_FREE + tmp = 2 * count_highmem_pages(); + tmp += tmp / 50 + count_data_pages(); + tmp += (tmp + PBES_PER_PAGE - 1) / PBES_PER_PAGE + + PAGES_FOR_IO; + for_each_zone (zone) + if (!is_highmem(zone)) + tmp -= zone->free_pages; + if (tmp > 0) { + tmp = shrink_all_memory(SHRINK_BITE); + if (!tmp) + return -ENOMEM; + pages += tmp; + } +#else + tmp = shrink_all_memory(SHRINK_BITE); + pages += tmp; +#endif + printk("\b%c", p[i++%4]); + } while (tmp > 0); + printk("\bdone (%lu pages freed)\n", pages); + + return 0; +} + int swsusp_suspend(void) { int error; @@ -1030,8 +1078,10 @@ static int read_suspend_image(struct pbe **pblist_ptr) /* Allocate memory for the image and read the data from swap */ if (!error) error = alloc_data_pages(pblist, GFP_ATOMIC, 1); - if (!error) + if (!error) { + release_eaten_pages(); error = load_image_data(pblist, &handle, nr_pages); + } if (!error) *pblist_ptr = pblist; } -- cgit v1.2.3-71-gd317 From 3a291a20bd6fcfafb2109031f0760a0d3e92ecd7 Mon Sep 17 00:00:00 2001 From: "Rafael J. Wysocki" Date: Fri, 6 Jan 2006 00:16:37 -0800 Subject: [PATCH] mm: add a new function (needed for swap suspend) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This adds the function get_swap_page_of_type() allowing us to specify an index in swap_info[] and select a swap_info_struct structure to be used for allocating a swap page. This function (or another one of similar functionality) will be necessary for implementing the image-writing part of swsusp in the user space.  It can also be used for simplifying the current in-kernel implementation of the image-writing part of swsusp. Signed-off-by: Rafael J. Wysocki Acked-by: Pavel Machek Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/swap.h | 1 + mm/swapfile.c | 20 ++++++++++++++++++++ 2 files changed, 21 insertions(+) (limited to 'include/linux') diff --git a/include/linux/swap.h b/include/linux/swap.h index bd6641784107..556617bcf7ac 100644 --- a/include/linux/swap.h +++ b/include/linux/swap.h @@ -209,6 +209,7 @@ extern unsigned int nr_swapfiles; extern struct swap_info_struct swap_info[]; extern void si_swapinfo(struct sysinfo *); extern swp_entry_t get_swap_page(void); +extern swp_entry_t get_swap_page_of_type(int type); extern int swap_duplicate(swp_entry_t); extern int valid_swaphandles(swp_entry_t, unsigned long *); extern void swap_free(swp_entry_t); diff --git a/mm/swapfile.c b/mm/swapfile.c index edafeace301f..6da4b28b896b 100644 --- a/mm/swapfile.c +++ b/mm/swapfile.c @@ -211,6 +211,26 @@ noswap: return (swp_entry_t) {0}; } +swp_entry_t get_swap_page_of_type(int type) +{ + struct swap_info_struct *si; + pgoff_t offset; + + spin_lock(&swap_lock); + si = swap_info + type; + if (si->flags & SWP_WRITEOK) { + nr_swap_pages--; + offset = scan_swap_map(si); + if (offset) { + spin_unlock(&swap_lock); + return swp_entry(type, offset); + } + nr_swap_pages++; + } + spin_unlock(&swap_lock); + return (swp_entry_t) {0}; +} + static struct swap_info_struct * swap_info_get(swp_entry_t entry) { struct swap_info_struct * p; -- cgit v1.2.3-71-gd317 From 347a8dc3b815f0c0fa62a1df075184ffe4cbdcf1 Mon Sep 17 00:00:00 2001 From: Martin Schwidefsky Date: Fri, 6 Jan 2006 00:19:28 -0800 Subject: [PATCH] s390: cleanup Kconfig Sanitize some s390 Kconfig options. We have ARCH_S390, ARCH_S390X, ARCH_S390_31, 64BIT, S390_SUPPORT and COMPAT. Replace these 6 options by S390, 64BIT and COMPAT. Signed-off-by: Martin Schwidefsky Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- arch/s390/Kconfig | 27 +++++++-------------------- arch/s390/Makefile | 6 ++---- arch/s390/appldata/appldata_base.c | 8 ++++---- arch/s390/crypto/crypt_s390.h | 10 +++++----- arch/s390/defconfig | 4 +--- arch/s390/kernel/Makefile | 15 +++++---------- arch/s390/kernel/cpcmd.c | 16 ++++++++-------- arch/s390/kernel/entry64.S | 18 +++++++++--------- arch/s390/kernel/head.S | 4 ++-- arch/s390/kernel/module.c | 12 ++++++------ arch/s390/kernel/process.c | 12 ++++++------ arch/s390/kernel/ptrace.c | 24 ++++++++++++------------ arch/s390/kernel/reipl_diag.c | 2 +- arch/s390/kernel/setup.c | 14 +++++++------- arch/s390/kernel/signal.c | 2 +- arch/s390/kernel/smp.c | 8 ++++---- arch/s390/kernel/sys_s390.c | 12 +++++------- arch/s390/kernel/traps.c | 10 +++++----- arch/s390/kernel/vmlinux.lds.S | 2 +- arch/s390/lib/Makefile | 5 ++--- arch/s390/lib/spinlock.c | 2 +- arch/s390/mm/extmem.c | 2 +- arch/s390/mm/fault.c | 18 +++++++++--------- arch/s390/mm/init.c | 8 ++++---- arch/s390/mm/mmap.c | 2 +- block/Kconfig | 2 +- crypto/Kconfig | 8 ++++---- drivers/char/Kconfig | 2 +- drivers/char/hangcheck-timer.c | 2 +- drivers/char/watchdog/Kconfig | 2 +- drivers/input/evdev.c | 2 +- drivers/net/phy/Kconfig | 2 +- drivers/s390/block/Kconfig | 8 ++++---- drivers/s390/block/dasd.c | 2 +- drivers/s390/block/dasd_diag.c | 2 +- drivers/s390/block/dasd_diag.h | 6 +++--- drivers/s390/block/dasd_eckd.c | 2 +- drivers/s390/block/dasd_fba.c | 2 +- drivers/s390/block/xpram.c | 4 ++-- drivers/s390/char/vmwatchdog.c | 2 +- drivers/s390/cio/cio.c | 2 +- drivers/s390/cio/device_id.c | 2 +- drivers/s390/cio/ioasm.h | 4 ++-- drivers/s390/cio/qdio.c | 2 +- drivers/s390/cio/qdio.h | 34 +++++++++++++++++----------------- drivers/s390/crypto/z90hardware.c | 8 ++++---- drivers/s390/net/Kconfig | 2 +- drivers/s390/net/claw.c | 6 +++--- drivers/s390/s390mach.c | 10 +++++----- drivers/s390/sysinfo.c | 2 +- drivers/scsi/Kconfig | 2 +- fs/partitions/Kconfig | 2 +- fs/proc/array.c | 2 +- include/asm-s390/unistd.h | 2 +- include/linux/irq.h | 2 +- init/Kconfig | 2 +- init/do_mounts_rd.c | 4 ++-- kernel/panic.c | 4 ++-- kernel/sysctl.c | 6 +++--- lib/Kconfig.debug | 2 +- 60 files changed, 183 insertions(+), 208 deletions(-) (limited to 'include/linux') diff --git a/arch/s390/Kconfig b/arch/s390/Kconfig index 1846fbfd6bf2..6fe532d82417 100644 --- a/arch/s390/Kconfig +++ b/arch/s390/Kconfig @@ -23,14 +23,14 @@ config GENERIC_BUST_SPINLOCK mainmenu "Linux Kernel Configuration" -config ARCH_S390 +config S390 bool default y config UID16 bool default y - depends on ARCH_S390X = 'n' + depends on !64BIT source "init/Kconfig" @@ -38,20 +38,12 @@ menu "Base setup" comment "Processor type and features" -config ARCH_S390X +config 64BIT bool "64 bit kernel" help Select this option if you have a 64 bit IBM zSeries machine and want to use the 64 bit addressing mode. -config 64BIT - def_bool ARCH_S390X - -config ARCH_S390_31 - bool - depends on ARCH_S390X = 'n' - default y - config SMP bool "Symmetric multi-processing support" ---help--- @@ -101,20 +93,15 @@ config MATHEMU on older S/390 machines. Say Y unless you know your machine doesn't need this. -config S390_SUPPORT +config COMPAT bool "Kernel support for 31 bit emulation" - depends on ARCH_S390X + depends on 64BIT help Select this option if you want to enable your system kernel to handle system-calls from ELF binaries for 31 bit ESA. This option (and some other stuff like libraries and such) is needed for executing 31 bit applications. It is safe to say "Y". -config COMPAT - bool - depends on S390_SUPPORT - default y - config SYSVIPC_COMPAT bool depends on COMPAT && SYSVIPC @@ -122,7 +109,7 @@ config SYSVIPC_COMPAT config BINFMT_ELF32 tristate "Kernel support for 31 bit ELF binaries" - depends on S390_SUPPORT + depends on COMPAT help This allows you to run 32-bit Linux/ELF binaries on your zSeries in 64 bit mode. Everybody wants this; say Y. @@ -135,7 +122,7 @@ choice config MARCH_G5 bool "S/390 model G5 and G6" - depends on ARCH_S390_31 + depends on !64BIT help Select this to build a 31 bit kernel that works on all S/390 and zSeries machines. diff --git a/arch/s390/Makefile b/arch/s390/Makefile index 73a09a6ee6c8..6c6b197898d0 100644 --- a/arch/s390/Makefile +++ b/arch/s390/Makefile @@ -13,16 +13,14 @@ # Copyright (C) 1994 by Linus Torvalds # -ifdef CONFIG_ARCH_S390_31 +ifndef CONFIG_64BIT LDFLAGS := -m elf_s390 CFLAGS += -m31 AFLAGS += -m31 UTS_MACHINE := s390 STACK_SIZE := 8192 CHECKFLAGS += -D__s390__ -endif - -ifdef CONFIG_ARCH_S390X +else LDFLAGS := -m elf64_s390 MODFLAGS += -fpic -D__PIC__ CFLAGS += -m64 diff --git a/arch/s390/appldata/appldata_base.c b/arch/s390/appldata/appldata_base.c index dee6ab54984d..d06a8d71c71d 100644 --- a/arch/s390/appldata/appldata_base.c +++ b/arch/s390/appldata/appldata_base.c @@ -40,7 +40,7 @@ #define TOD_MICRO 0x01000 /* nr. of TOD clock units for 1 microsecond */ -#ifndef CONFIG_ARCH_S390X +#ifndef CONFIG_64BIT #define APPLDATA_START_INTERVAL_REC 0x00 /* Function codes for */ #define APPLDATA_STOP_REC 0x01 /* DIAG 0xDC */ @@ -54,13 +54,13 @@ #define APPLDATA_GEN_EVENT_RECORD 0x82 #define APPLDATA_START_CONFIG_REC 0x83 -#endif /* CONFIG_ARCH_S390X */ +#endif /* CONFIG_64BIT */ /* * Parameter list for DIAGNOSE X'DC' */ -#ifndef CONFIG_ARCH_S390X +#ifndef CONFIG_64BIT struct appldata_parameter_list { u16 diag; /* The DIAGNOSE code X'00DC' */ u8 function; /* The function code for the DIAGNOSE */ @@ -82,7 +82,7 @@ struct appldata_parameter_list { u64 product_id_addr; u64 buffer_addr; }; -#endif /* CONFIG_ARCH_S390X */ +#endif /* CONFIG_64BIT */ /* * /proc entries (sysctl) diff --git a/arch/s390/crypto/crypt_s390.h b/arch/s390/crypto/crypt_s390.h index d6712cfa6def..d1c259a7fe33 100644 --- a/arch/s390/crypto/crypt_s390.h +++ b/arch/s390/crypto/crypt_s390.h @@ -112,7 +112,7 @@ struct crypt_s390_query_status { * [ret] is the variable to receive the error code * [ERR] is the error code value */ -#ifndef __s390x__ +#ifndef CONFIG_64BIT #define __crypt_s390_fixup \ ".section .fixup,\"ax\" \n" \ "7: lhi %0,%h[e1] \n" \ @@ -129,7 +129,7 @@ struct crypt_s390_query_status { " .long 0b,7b \n" \ " .long 1b,8b \n" \ ".previous" -#else /* __s390x__ */ +#else /* CONFIG_64BIT */ #define __crypt_s390_fixup \ ".section .fixup,\"ax\" \n" \ "7: lhi %0,%h[e1] \n" \ @@ -142,7 +142,7 @@ struct crypt_s390_query_status { " .quad 0b,7b \n" \ " .quad 1b,8b \n" \ ".previous" -#endif /* __s390x__ */ +#endif /* CONFIG_64BIT */ /* * Standard code for setting the result of s390 crypto instructions. @@ -150,10 +150,10 @@ struct crypt_s390_query_status { * [result]: the register containing the result (e.g. second operand length * to compute number of processed bytes]. */ -#ifndef __s390x__ +#ifndef CONFIG_64BIT #define __crypt_s390_set_result \ " lr %0,%[result] \n" -#else /* __s390x__ */ +#else /* CONFIG_64BIT */ #define __crypt_s390_set_result \ " lgr %0,%[result] \n" #endif diff --git a/arch/s390/defconfig b/arch/s390/defconfig index f195c7ea1d7b..7d23edc6facb 100644 --- a/arch/s390/defconfig +++ b/arch/s390/defconfig @@ -6,7 +6,7 @@ CONFIG_MMU=y CONFIG_RWSEM_XCHGADD_ALGORITHM=y CONFIG_GENERIC_CALIBRATE_DELAY=y -CONFIG_ARCH_S390=y +CONFIG_S390=y CONFIG_UID16=y # @@ -89,9 +89,7 @@ CONFIG_DEFAULT_IOSCHED="anticipatory" # # Processor type and features # -# CONFIG_ARCH_S390X is not set # CONFIG_64BIT is not set -CONFIG_ARCH_S390_31=y CONFIG_SMP=y CONFIG_NR_CPUS=32 CONFIG_HOTPLUG_CPU=y diff --git a/arch/s390/kernel/Makefile b/arch/s390/kernel/Makefile index 7434c32bc631..4865e4b49464 100644 --- a/arch/s390/kernel/Makefile +++ b/arch/s390/kernel/Makefile @@ -8,31 +8,26 @@ obj-y := bitmap.o traps.o time.o process.o \ setup.o sys_s390.o ptrace.o signal.o cpcmd.o ebcdic.o \ semaphore.o s390_ext.o debug.o profile.o irq.o reipl_diag.o +obj-y += $(if $(CONFIG_64BIT),entry64.o,entry.o) +obj-y += $(if $(CONFIG_64BIT),reipl64.o,reipl.o) + extra-y += head.o init_task.o vmlinux.lds obj-$(CONFIG_MODULES) += s390_ksyms.o module.o obj-$(CONFIG_SMP) += smp.o -obj-$(CONFIG_S390_SUPPORT) += compat_linux.o compat_signal.o \ +obj-$(CONFIG_COMPAT) += compat_linux.o compat_signal.o \ compat_ioctl.o compat_wrapper.o \ compat_exec_domain.o obj-$(CONFIG_BINFMT_ELF32) += binfmt_elf32.o -obj-$(CONFIG_ARCH_S390_31) += entry.o reipl.o -obj-$(CONFIG_ARCH_S390X) += entry64.o reipl64.o - obj-$(CONFIG_VIRT_TIMER) += vtime.o # Kexec part S390_KEXEC_OBJS := machine_kexec.o crash.o -ifeq ($(CONFIG_ARCH_S390X),y) -S390_KEXEC_OBJS += relocate_kernel64.o -else -S390_KEXEC_OBJS += relocate_kernel.o -endif +S390_KEXEC_OBJS += $(if $(CONFIG_64BIT),relocate_kernel64.o,relocate_kernel.o) obj-$(CONFIG_KEXEC) += $(S390_KEXEC_OBJS) - # # This is just to get the dependencies... # diff --git a/arch/s390/kernel/cpcmd.c b/arch/s390/kernel/cpcmd.c index d47fecb42cc5..4ef44e536b2c 100644 --- a/arch/s390/kernel/cpcmd.c +++ b/arch/s390/kernel/cpcmd.c @@ -39,7 +39,7 @@ int __cpcmd(const char *cmd, char *response, int rlen, int *response_code) if (response != NULL && rlen > 0) { memset(response, 0, rlen); -#ifndef CONFIG_ARCH_S390X +#ifndef CONFIG_64BIT asm volatile ( "lra 2,0(%2)\n" "lr 4,%3\n" "o 4,%6\n" @@ -55,7 +55,7 @@ int __cpcmd(const char *cmd, char *response, int rlen, int *response_code) : "a" (cpcmd_buf), "d" (cmdlen), "a" (response), "d" (rlen), "m" (mask) : "cc", "2", "3", "4", "5" ); -#else /* CONFIG_ARCH_S390X */ +#else /* CONFIG_64BIT */ asm volatile ( "lrag 2,0(%2)\n" "lgr 4,%3\n" "o 4,%6\n" @@ -73,11 +73,11 @@ int __cpcmd(const char *cmd, char *response, int rlen, int *response_code) : "a" (cpcmd_buf), "d" (cmdlen), "a" (response), "d" (rlen), "m" (mask) : "cc", "2", "3", "4", "5" ); -#endif /* CONFIG_ARCH_S390X */ +#endif /* CONFIG_64BIT */ EBCASC(response, rlen); } else { return_len = 0; -#ifndef CONFIG_ARCH_S390X +#ifndef CONFIG_64BIT asm volatile ( "lra 2,0(%1)\n" "lr 3,%2\n" "diag 2,3,0x8\n" @@ -85,7 +85,7 @@ int __cpcmd(const char *cmd, char *response, int rlen, int *response_code) : "=d" (return_code) : "a" (cpcmd_buf), "d" (cmdlen) : "2", "3" ); -#else /* CONFIG_ARCH_S390X */ +#else /* CONFIG_64BIT */ asm volatile ( "lrag 2,0(%1)\n" "lgr 3,%2\n" "sam31\n" @@ -95,7 +95,7 @@ int __cpcmd(const char *cmd, char *response, int rlen, int *response_code) : "=d" (return_code) : "a" (cpcmd_buf), "d" (cmdlen) : "2", "3" ); -#endif /* CONFIG_ARCH_S390X */ +#endif /* CONFIG_64BIT */ } spin_unlock_irqrestore(&cpcmd_lock, flags); if (response_code != NULL) @@ -105,7 +105,7 @@ int __cpcmd(const char *cmd, char *response, int rlen, int *response_code) EXPORT_SYMBOL(__cpcmd); -#ifdef CONFIG_ARCH_S390X +#ifdef CONFIG_64BIT int cpcmd(const char *cmd, char *response, int rlen, int *response_code) { char *lowbuf; @@ -129,4 +129,4 @@ int cpcmd(const char *cmd, char *response, int rlen, int *response_code) } EXPORT_SYMBOL(cpcmd); -#endif /* CONFIG_ARCH_S390X */ +#endif /* CONFIG_64BIT */ diff --git a/arch/s390/kernel/entry64.S b/arch/s390/kernel/entry64.S index 4eb71ffcf484..369ab4413ec7 100644 --- a/arch/s390/kernel/entry64.S +++ b/arch/s390/kernel/entry64.S @@ -213,7 +213,7 @@ sysc_nr_ok: mvc SP_ARGS(8,%r15),SP_R7(%r15) sysc_do_restart: larl %r10,sys_call_table -#ifdef CONFIG_S390_SUPPORT +#ifdef CONFIG_COMPAT tm __TI_flags+5(%r9),(_TIF_31BIT>>16) # running in 31 bit mode ? jno sysc_noemu larl %r10,sys_call_table_emu # use 31 bit emulation system calls @@ -361,7 +361,7 @@ sys_clone_glue: la %r2,SP_PTREGS(%r15) # load pt_regs jg sys_clone # branch to sys_clone -#ifdef CONFIG_S390_SUPPORT +#ifdef CONFIG_COMPAT sys32_clone_glue: la %r2,SP_PTREGS(%r15) # load pt_regs jg sys32_clone # branch to sys32_clone @@ -383,7 +383,7 @@ sys_execve_glue: bnz 0(%r12) # it did fail -> store result in gpr2 b 6(%r12) # SKIP STG 2,SP_R2(15) in # system_call/sysc_tracesys -#ifdef CONFIG_S390_SUPPORT +#ifdef CONFIG_COMPAT sys32_execve_glue: la %r2,SP_PTREGS(%r15) # load pt_regs lgr %r12,%r14 # save return address @@ -398,7 +398,7 @@ sys_sigreturn_glue: la %r2,SP_PTREGS(%r15) # load pt_regs as parameter jg sys_sigreturn # branch to sys_sigreturn -#ifdef CONFIG_S390_SUPPORT +#ifdef CONFIG_COMPAT sys32_sigreturn_glue: la %r2,SP_PTREGS(%r15) # load pt_regs as parameter jg sys32_sigreturn # branch to sys32_sigreturn @@ -408,7 +408,7 @@ sys_rt_sigreturn_glue: la %r2,SP_PTREGS(%r15) # load pt_regs as parameter jg sys_rt_sigreturn # branch to sys_sigreturn -#ifdef CONFIG_S390_SUPPORT +#ifdef CONFIG_COMPAT sys32_rt_sigreturn_glue: la %r2,SP_PTREGS(%r15) # load pt_regs as parameter jg sys32_rt_sigreturn # branch to sys32_sigreturn @@ -429,7 +429,7 @@ sys_sigsuspend_glue: la %r14,6(%r14) # skip store of return value jg sys_sigsuspend # branch to sys_sigsuspend -#ifdef CONFIG_S390_SUPPORT +#ifdef CONFIG_COMPAT sys32_sigsuspend_glue: llgfr %r4,%r4 # unsigned long lgr %r5,%r4 # move mask back @@ -449,7 +449,7 @@ sys_rt_sigsuspend_glue: la %r14,6(%r14) # skip store of return value jg sys_rt_sigsuspend # branch to sys_rt_sigsuspend -#ifdef CONFIG_S390_SUPPORT +#ifdef CONFIG_COMPAT sys32_rt_sigsuspend_glue: llgfr %r3,%r3 # size_t lgr %r4,%r3 # move sigsetsize parameter @@ -464,7 +464,7 @@ sys_sigaltstack_glue: la %r4,SP_PTREGS(%r15) # load pt_regs as parameter jg sys_sigaltstack # branch to sys_sigreturn -#ifdef CONFIG_S390_SUPPORT +#ifdef CONFIG_COMPAT sys32_sigaltstack_glue: la %r4,SP_PTREGS(%r15) # load pt_regs as parameter jg sys32_sigaltstack_wrapper # branch to sys_sigreturn @@ -1009,7 +1009,7 @@ sys_call_table: #include "syscalls.S" #undef SYSCALL -#ifdef CONFIG_S390_SUPPORT +#ifdef CONFIG_COMPAT #define SYSCALL(esa,esame,emu) .long emu .globl sys_call_table_emu diff --git a/arch/s390/kernel/head.S b/arch/s390/kernel/head.S index d31a97c89f68..ea88d066bf04 100644 --- a/arch/s390/kernel/head.S +++ b/arch/s390/kernel/head.S @@ -30,7 +30,7 @@ #include #include -#ifdef CONFIG_ARCH_S390X +#ifdef CONFIG_64BIT #define ARCH_OFFSET 4 #else #define ARCH_OFFSET 0 @@ -539,7 +539,7 @@ ipl_devno: .word 0 .endm -#ifdef CONFIG_ARCH_S390X +#ifdef CONFIG_64BIT #include "head64.S" #else #include "head31.S" diff --git a/arch/s390/kernel/module.c b/arch/s390/kernel/module.c index 607d506689c8..c271cdab58e2 100644 --- a/arch/s390/kernel/module.c +++ b/arch/s390/kernel/module.c @@ -37,11 +37,11 @@ #define DEBUGP(fmt , ...) #endif -#ifndef CONFIG_ARCH_S390X +#ifndef CONFIG_64BIT #define PLT_ENTRY_SIZE 12 -#else /* CONFIG_ARCH_S390X */ +#else /* CONFIG_64BIT */ #define PLT_ENTRY_SIZE 20 -#endif /* CONFIG_ARCH_S390X */ +#endif /* CONFIG_64BIT */ void *module_alloc(unsigned long size) { @@ -294,17 +294,17 @@ apply_rela(Elf_Rela *rela, Elf_Addr base, Elf_Sym *symtab, unsigned int *ip; ip = me->module_core + me->arch.plt_offset + info->plt_offset; -#ifndef CONFIG_ARCH_S390X +#ifndef CONFIG_64BIT ip[0] = 0x0d105810; /* basr 1,0; l 1,6(1); br 1 */ ip[1] = 0x100607f1; ip[2] = val; -#else /* CONFIG_ARCH_S390X */ +#else /* CONFIG_64BIT */ ip[0] = 0x0d10e310; /* basr 1,0; lg 1,10(1); br 1 */ ip[1] = 0x100a0004; ip[2] = 0x07f10000; ip[3] = (unsigned int) (val >> 32); ip[4] = (unsigned int) val; -#endif /* CONFIG_ARCH_S390X */ +#endif /* CONFIG_64BIT */ info->plt_initialized = 1; } if (r_type == R_390_PLTOFF16 || diff --git a/arch/s390/kernel/process.c b/arch/s390/kernel/process.c index 78b64fe5e7c2..a942bf2d58e9 100644 --- a/arch/s390/kernel/process.c +++ b/arch/s390/kernel/process.c @@ -235,7 +235,7 @@ int copy_thread(int nr, unsigned long clone_flags, unsigned long new_stackp, /* Save access registers to new thread structure. */ save_access_regs(&p->thread.acrs[0]); -#ifndef CONFIG_ARCH_S390X +#ifndef CONFIG_64BIT /* * save fprs to current->thread.fp_regs to merge them with * the emulated registers and then copy the result to the child. @@ -247,7 +247,7 @@ int copy_thread(int nr, unsigned long clone_flags, unsigned long new_stackp, /* Set a new TLS ? */ if (clone_flags & CLONE_SETTLS) p->thread.acrs[0] = regs->gprs[6]; -#else /* CONFIG_ARCH_S390X */ +#else /* CONFIG_64BIT */ /* Save the fpu registers to new thread structure. */ save_fp_regs(&p->thread.fp_regs); p->thread.user_seg = __pa((unsigned long) p->mm->pgd) | _REGION_TABLE; @@ -260,7 +260,7 @@ int copy_thread(int nr, unsigned long clone_flags, unsigned long new_stackp, p->thread.acrs[1] = (unsigned int) regs->gprs[6]; } } -#endif /* CONFIG_ARCH_S390X */ +#endif /* CONFIG_64BIT */ /* start new process with ar4 pointing to the correct address space */ p->thread.mm_segment = get_fs(); /* Don't copy debug registers */ @@ -339,16 +339,16 @@ out: */ int dump_fpu (struct pt_regs * regs, s390_fp_regs *fpregs) { -#ifndef CONFIG_ARCH_S390X +#ifndef CONFIG_64BIT /* * save fprs to current->thread.fp_regs to merge them with * the emulated registers and then copy the result to the dump. */ save_fp_regs(¤t->thread.fp_regs); memcpy(fpregs, ¤t->thread.fp_regs, sizeof(s390_fp_regs)); -#else /* CONFIG_ARCH_S390X */ +#else /* CONFIG_64BIT */ save_fp_regs(fpregs); -#endif /* CONFIG_ARCH_S390X */ +#endif /* CONFIG_64BIT */ return 1; } diff --git a/arch/s390/kernel/ptrace.c b/arch/s390/kernel/ptrace.c index 06afa3103ace..8ecda6d66de4 100644 --- a/arch/s390/kernel/ptrace.c +++ b/arch/s390/kernel/ptrace.c @@ -42,7 +42,7 @@ #include #include -#ifdef CONFIG_S390_SUPPORT +#ifdef CONFIG_COMPAT #include "compat_ptrace.h" #endif @@ -59,7 +59,7 @@ FixPerRegisters(struct task_struct *task) if (per_info->single_step) { per_info->control_regs.bits.starting_addr = 0; -#ifdef CONFIG_S390_SUPPORT +#ifdef CONFIG_COMPAT if (test_thread_flag(TIF_31BIT)) per_info->control_regs.bits.ending_addr = 0x7fffffffUL; else @@ -112,7 +112,7 @@ ptrace_disable(struct task_struct *child) clear_single_step(child); } -#ifndef CONFIG_ARCH_S390X +#ifndef CONFIG_64BIT # define __ADDR_MASK 3 #else # define __ADDR_MASK 7 @@ -138,7 +138,7 @@ peek_user(struct task_struct *child, addr_t addr, addr_t data) * an alignment of 4. Programmers from hell... */ mask = __ADDR_MASK; -#ifdef CONFIG_ARCH_S390X +#ifdef CONFIG_64BIT if (addr >= (addr_t) &dummy->regs.acrs && addr < (addr_t) &dummy->regs.orig_gpr2) mask = 3; @@ -160,7 +160,7 @@ peek_user(struct task_struct *child, addr_t addr, addr_t data) * access registers are stored in the thread structure */ offset = addr - (addr_t) &dummy->regs.acrs; -#ifdef CONFIG_ARCH_S390X +#ifdef CONFIG_64BIT /* * Very special case: old & broken 64 bit gdb reading * from acrs[15]. Result is a 64 bit value. Read the @@ -218,7 +218,7 @@ poke_user(struct task_struct *child, addr_t addr, addr_t data) * an alignment of 4. Programmers from hell indeed... */ mask = __ADDR_MASK; -#ifdef CONFIG_ARCH_S390X +#ifdef CONFIG_64BIT if (addr >= (addr_t) &dummy->regs.acrs && addr < (addr_t) &dummy->regs.orig_gpr2) mask = 3; @@ -231,13 +231,13 @@ poke_user(struct task_struct *child, addr_t addr, addr_t data) * psw and gprs are stored on the stack */ if (addr == (addr_t) &dummy->regs.psw.mask && -#ifdef CONFIG_S390_SUPPORT +#ifdef CONFIG_COMPAT data != PSW_MASK_MERGE(PSW_USER32_BITS, data) && #endif data != PSW_MASK_MERGE(PSW_USER_BITS, data)) /* Invalid psw mask. */ return -EINVAL; -#ifndef CONFIG_ARCH_S390X +#ifndef CONFIG_64BIT if (addr == (addr_t) &dummy->regs.psw.addr) /* I'd like to reject addresses without the high order bit but older gdb's rely on it */ @@ -250,7 +250,7 @@ poke_user(struct task_struct *child, addr_t addr, addr_t data) * access registers are stored in the thread structure */ offset = addr - (addr_t) &dummy->regs.acrs; -#ifdef CONFIG_ARCH_S390X +#ifdef CONFIG_64BIT /* * Very special case: old & broken 64 bit gdb writing * to acrs[15] with a 64 bit value. Ignore the lower @@ -357,7 +357,7 @@ do_ptrace_normal(struct task_struct *child, long request, long addr, long data) return ptrace_request(child, request, addr, data); } -#ifdef CONFIG_S390_SUPPORT +#ifdef CONFIG_COMPAT /* * Now the fun part starts... a 31 bit program running in the * 31 bit emulation tracing another program. PTRACE_PEEKTEXT, @@ -629,7 +629,7 @@ do_ptrace(struct task_struct *child, long request, long addr, long data) return peek_user(child, addr, data); if (request == PTRACE_POKEUSR && addr == PT_IEEE_IP) return poke_user(child, addr, data); -#ifdef CONFIG_S390_SUPPORT +#ifdef CONFIG_COMPAT if (request == PTRACE_PEEKUSR && addr == PT32_IEEE_IP && test_thread_flag(TIF_31BIT)) return peek_user_emu31(child, addr, data); @@ -695,7 +695,7 @@ do_ptrace(struct task_struct *child, long request, long addr, long data) /* Do requests that differ for 31/64 bit */ default: -#ifdef CONFIG_S390_SUPPORT +#ifdef CONFIG_COMPAT if (test_thread_flag(TIF_31BIT)) return do_ptrace_emu31(child, request, addr, data); #endif diff --git a/arch/s390/kernel/reipl_diag.c b/arch/s390/kernel/reipl_diag.c index 83cb42bc0b76..1f33951ba439 100644 --- a/arch/s390/kernel/reipl_diag.c +++ b/arch/s390/kernel/reipl_diag.c @@ -26,7 +26,7 @@ void reipl_diag(void) " st %%r4,%0\n" " st %%r5,%1\n" ".section __ex_table,\"a\"\n" -#ifdef __s390x__ +#ifdef CONFIG_64BIT " .align 8\n" " .quad 0b, 0b\n" #else diff --git a/arch/s390/kernel/setup.c b/arch/s390/kernel/setup.c index 31e7b19348b7..b03847d100d9 100644 --- a/arch/s390/kernel/setup.c +++ b/arch/s390/kernel/setup.c @@ -427,7 +427,7 @@ setup_lowcore(void) __alloc_bootmem(PAGE_SIZE, PAGE_SIZE, 0) + PAGE_SIZE; lc->current_task = (unsigned long) init_thread_union.thread_info.task; lc->thread_info = (unsigned long) &init_thread_union; -#ifndef CONFIG_ARCH_S390X +#ifndef CONFIG_64BIT if (MACHINE_HAS_IEEE) { lc->extended_save_area_addr = (__u32) __alloc_bootmem(PAGE_SIZE, PAGE_SIZE, 0); @@ -562,21 +562,21 @@ setup_arch(char **cmdline_p) /* * print what head.S has found out about the machine */ -#ifndef CONFIG_ARCH_S390X +#ifndef CONFIG_64BIT printk((MACHINE_IS_VM) ? "We are running under VM (31 bit mode)\n" : "We are running native (31 bit mode)\n"); printk((MACHINE_HAS_IEEE) ? "This machine has an IEEE fpu\n" : "This machine has no IEEE fpu\n"); -#else /* CONFIG_ARCH_S390X */ +#else /* CONFIG_64BIT */ printk((MACHINE_IS_VM) ? "We are running under VM (64 bit mode)\n" : "We are running native (64 bit mode)\n"); -#endif /* CONFIG_ARCH_S390X */ +#endif /* CONFIG_64BIT */ ROOT_DEV = Root_RAM0; -#ifndef CONFIG_ARCH_S390X +#ifndef CONFIG_64BIT memory_end = memory_size & ~0x400000UL; /* align memory end to 4MB */ /* * We need some free virtual space to be able to do vmalloc. @@ -585,9 +585,9 @@ setup_arch(char **cmdline_p) */ if (memory_end > 1920*1024*1024) memory_end = 1920*1024*1024; -#else /* CONFIG_ARCH_S390X */ +#else /* CONFIG_64BIT */ memory_end = memory_size & ~0x200000UL; /* detected in head.s */ -#endif /* CONFIG_ARCH_S390X */ +#endif /* CONFIG_64BIT */ init_mm.start_code = PAGE_OFFSET; init_mm.end_code = (unsigned long) &_etext; diff --git a/arch/s390/kernel/signal.c b/arch/s390/kernel/signal.c index 13592d00a10f..6ae4a77270b5 100644 --- a/arch/s390/kernel/signal.c +++ b/arch/s390/kernel/signal.c @@ -501,7 +501,7 @@ int do_signal(struct pt_regs *regs, sigset_t *oldset) if (signr > 0) { /* Whee! Actually deliver the signal. */ -#ifdef CONFIG_S390_SUPPORT +#ifdef CONFIG_COMPAT if (test_thread_flag(TIF_31BIT)) { extern void handle_signal32(unsigned long sig, struct k_sigaction *ka, diff --git a/arch/s390/kernel/smp.c b/arch/s390/kernel/smp.c index bd5b311006be..e10f4ca00499 100644 --- a/arch/s390/kernel/smp.c +++ b/arch/s390/kernel/smp.c @@ -402,7 +402,7 @@ static void smp_ext_bitcall_others(ec_bit_sig sig) } } -#ifndef CONFIG_ARCH_S390X +#ifndef CONFIG_64BIT /* * this function sends a 'purge tlb' signal to another CPU. */ @@ -416,7 +416,7 @@ void smp_ptlb_all(void) on_each_cpu(smp_ptlb_callback, NULL, 0, 1); } EXPORT_SYMBOL(smp_ptlb_all); -#endif /* ! CONFIG_ARCH_S390X */ +#endif /* ! CONFIG_64BIT */ /* * this function sends a 'reschedule' IPI to another CPU. @@ -783,7 +783,7 @@ void __init smp_prepare_cpus(unsigned int max_cpus) if (stack == 0ULL) panic("smp_boot_cpus failed to allocate memory\n"); lowcore_ptr[i]->panic_stack = stack + (PAGE_SIZE); -#ifndef __s390x__ +#ifndef CONFIG_64BIT if (MACHINE_HAS_IEEE) { lowcore_ptr[i]->extended_save_area_addr = (__u32) __get_free_pages(GFP_KERNEL,0); @@ -793,7 +793,7 @@ void __init smp_prepare_cpus(unsigned int max_cpus) } #endif } -#ifndef __s390x__ +#ifndef CONFIG_64BIT if (MACHINE_HAS_IEEE) ctl_set_bit(14, 29); /* enable extended save area */ #endif diff --git a/arch/s390/kernel/sys_s390.c b/arch/s390/kernel/sys_s390.c index efe6b83b53f7..6a63553493c5 100644 --- a/arch/s390/kernel/sys_s390.c +++ b/arch/s390/kernel/sys_s390.c @@ -26,9 +26,7 @@ #include #include #include -#ifdef CONFIG_ARCH_S390X #include -#endif /* CONFIG_ARCH_S390X */ #include #include @@ -121,7 +119,7 @@ out: return error; } -#ifndef CONFIG_ARCH_S390X +#ifndef CONFIG_64BIT struct sel_arg_struct { unsigned long n; fd_set *inp, *outp, *exp; @@ -138,7 +136,7 @@ asmlinkage long old_select(struct sel_arg_struct __user *arg) return sys_select(a.n, a.inp, a.outp, a.exp, a.tvp); } -#endif /* CONFIG_ARCH_S390X */ +#endif /* CONFIG_64BIT */ /* * sys_ipc() is the de-multiplexer for the SysV IPC calls.. @@ -211,7 +209,7 @@ asmlinkage long sys_ipc(uint call, int first, unsigned long second, return -EINVAL; } -#ifdef CONFIG_ARCH_S390X +#ifdef CONFIG_64BIT asmlinkage long s390x_newuname(struct new_utsname __user *name) { int ret = sys_newuname(name); @@ -235,12 +233,12 @@ asmlinkage long s390x_personality(unsigned long personality) return ret; } -#endif /* CONFIG_ARCH_S390X */ +#endif /* CONFIG_64BIT */ /* * Wrapper function for sys_fadvise64/fadvise64_64 */ -#ifndef CONFIG_ARCH_S390X +#ifndef CONFIG_64BIT asmlinkage long s390_fadvise64(int fd, u32 offset_high, u32 offset_low, size_t len, int advice) diff --git a/arch/s390/kernel/traps.c b/arch/s390/kernel/traps.c index c5bd36fae56b..95d109968619 100644 --- a/arch/s390/kernel/traps.c +++ b/arch/s390/kernel/traps.c @@ -67,13 +67,13 @@ extern pgm_check_handler_t do_monitor_call; #define stack_pointer ({ void **sp; asm("la %0,0(15)" : "=&d" (sp)); sp; }) -#ifndef CONFIG_ARCH_S390X +#ifndef CONFIG_64BIT #define FOURLONG "%08lx %08lx %08lx %08lx\n" static int kstack_depth_to_print = 12; -#else /* CONFIG_ARCH_S390X */ +#else /* CONFIG_64BIT */ #define FOURLONG "%016lx %016lx %016lx %016lx\n" static int kstack_depth_to_print = 20; -#endif /* CONFIG_ARCH_S390X */ +#endif /* CONFIG_64BIT */ /* * For show_trace we have tree different stack to consider: @@ -702,12 +702,12 @@ void __init trap_init(void) pgm_check_table[0x11] = &do_dat_exception; pgm_check_table[0x12] = &translation_exception; pgm_check_table[0x13] = &special_op_exception; -#ifdef CONFIG_ARCH_S390X +#ifdef CONFIG_64BIT pgm_check_table[0x38] = &do_dat_exception; pgm_check_table[0x39] = &do_dat_exception; pgm_check_table[0x3A] = &do_dat_exception; pgm_check_table[0x3B] = &do_dat_exception; -#endif /* CONFIG_ARCH_S390X */ +#endif /* CONFIG_64BIT */ pgm_check_table[0x15] = &operand_exception; pgm_check_table[0x1C] = &space_switch_exception; pgm_check_table[0x1D] = &hfp_sqrt_exception; diff --git a/arch/s390/kernel/vmlinux.lds.S b/arch/s390/kernel/vmlinux.lds.S index 89fdb3808bc0..9289face3027 100644 --- a/arch/s390/kernel/vmlinux.lds.S +++ b/arch/s390/kernel/vmlinux.lds.S @@ -5,7 +5,7 @@ #include #include -#ifndef CONFIG_ARCH_S390X +#ifndef CONFIG_64BIT OUTPUT_FORMAT("elf32-s390", "elf32-s390", "elf32-s390") OUTPUT_ARCH(s390) ENTRY(_start) diff --git a/arch/s390/lib/Makefile b/arch/s390/lib/Makefile index b701efa1f00e..d9b97b3c597f 100644 --- a/arch/s390/lib/Makefile +++ b/arch/s390/lib/Makefile @@ -4,6 +4,5 @@ EXTRA_AFLAGS := -traditional -lib-y += delay.o string.o -lib-$(CONFIG_ARCH_S390_31) += uaccess.o spinlock.o -lib-$(CONFIG_ARCH_S390X) += uaccess64.o spinlock.o +lib-y += delay.o string.o spinlock.o +lib-y += $(if $(CONFIG_64BIT),uaccess64.o,uaccess.o) diff --git a/arch/s390/lib/spinlock.c b/arch/s390/lib/spinlock.c index 2dc14e9c8327..68d79c502081 100644 --- a/arch/s390/lib/spinlock.c +++ b/arch/s390/lib/spinlock.c @@ -29,7 +29,7 @@ __setup("spin_retry=", spin_retry_setup); static inline void _diag44(void) { -#ifdef __s390x__ +#ifdef CONFIG_64BIT if (MACHINE_HAS_DIAG44) #endif asm volatile("diag 0,0,0x44"); diff --git a/arch/s390/mm/extmem.c b/arch/s390/mm/extmem.c index 506a33b51e4f..a9566bcab682 100644 --- a/arch/s390/mm/extmem.c +++ b/arch/s390/mm/extmem.c @@ -143,7 +143,7 @@ dcss_diag (__u8 func, void *parameter, rx = (unsigned long) parameter; ry = (unsigned long) func; __asm__ __volatile__( -#ifdef CONFIG_ARCH_S390X +#ifdef CONFIG_64BIT " sam31\n" // switch to 31 bit " diag %0,%1,0x64\n" " sam64\n" // switch back to 64 bit diff --git a/arch/s390/mm/fault.c b/arch/s390/mm/fault.c index fb2607c369ed..81ade401b073 100644 --- a/arch/s390/mm/fault.c +++ b/arch/s390/mm/fault.c @@ -31,17 +31,17 @@ #include #include -#ifndef CONFIG_ARCH_S390X +#ifndef CONFIG_64BIT #define __FAIL_ADDR_MASK 0x7ffff000 #define __FIXUP_MASK 0x7fffffff #define __SUBCODE_MASK 0x0200 #define __PF_RES_FIELD 0ULL -#else /* CONFIG_ARCH_S390X */ +#else /* CONFIG_64BIT */ #define __FAIL_ADDR_MASK -4096L #define __FIXUP_MASK ~0L #define __SUBCODE_MASK 0x0600 #define __PF_RES_FIELD 0x8000000000000000ULL -#endif /* CONFIG_ARCH_S390X */ +#endif /* CONFIG_64BIT */ #ifdef CONFIG_SYSCTL extern int sysctl_userprocess_debug; @@ -393,11 +393,11 @@ int pfault_init(void) "2:\n" ".section __ex_table,\"a\"\n" " .align 4\n" -#ifndef CONFIG_ARCH_S390X +#ifndef CONFIG_64BIT " .long 0b,1b\n" -#else /* CONFIG_ARCH_S390X */ +#else /* CONFIG_64BIT */ " .quad 0b,1b\n" -#endif /* CONFIG_ARCH_S390X */ +#endif /* CONFIG_64BIT */ ".previous" : "=d" (rc) : "a" (&refbk), "m" (refbk) : "cc" ); __ctl_set_bit(0, 9); @@ -417,11 +417,11 @@ void pfault_fini(void) "0:\n" ".section __ex_table,\"a\"\n" " .align 4\n" -#ifndef CONFIG_ARCH_S390X +#ifndef CONFIG_64BIT " .long 0b,0b\n" -#else /* CONFIG_ARCH_S390X */ +#else /* CONFIG_64BIT */ " .quad 0b,0b\n" -#endif /* CONFIG_ARCH_S390X */ +#endif /* CONFIG_64BIT */ ".previous" : : "a" (&refbk), "m" (refbk) : "cc" ); } diff --git a/arch/s390/mm/init.c b/arch/s390/mm/init.c index 6ec5cd981e74..df953383724d 100644 --- a/arch/s390/mm/init.c +++ b/arch/s390/mm/init.c @@ -44,7 +44,7 @@ void diag10(unsigned long addr) { if (addr >= 0x7ff00000) return; -#ifdef __s390x__ +#ifdef CONFIG_64BIT asm volatile ( " sam31\n" " diag %0,%0,0x10\n" @@ -106,7 +106,7 @@ extern unsigned long __initdata zholes_size[]; * paging_init() sets up the page tables */ -#ifndef CONFIG_ARCH_S390X +#ifndef CONFIG_64BIT void __init paging_init(void) { pgd_t * pg_dir; @@ -175,7 +175,7 @@ void __init paging_init(void) return; } -#else /* CONFIG_ARCH_S390X */ +#else /* CONFIG_64BIT */ void __init paging_init(void) { pgd_t * pg_dir; @@ -256,7 +256,7 @@ void __init paging_init(void) return; } -#endif /* CONFIG_ARCH_S390X */ +#endif /* CONFIG_64BIT */ void __init mem_init(void) { diff --git a/arch/s390/mm/mmap.c b/arch/s390/mm/mmap.c index fb187e5a54b4..356257c171de 100644 --- a/arch/s390/mm/mmap.c +++ b/arch/s390/mm/mmap.c @@ -50,7 +50,7 @@ static inline unsigned long mmap_base(void) static inline int mmap_is_legacy(void) { -#ifdef CONFIG_ARCH_S390X +#ifdef CONFIG_64BIT /* * Force standard allocation for 64 bit programs. */ diff --git a/block/Kconfig b/block/Kconfig index eb48edb80c1d..377f6dd20e17 100644 --- a/block/Kconfig +++ b/block/Kconfig @@ -5,7 +5,7 @@ #for instance. config LBD bool "Support for Large Block Devices" - depends on X86 || (MIPS && 32BIT) || PPC32 || ARCH_S390_31 || SUPERH || UML + depends on X86 || (MIPS && 32BIT) || PPC32 || (S390 && !64BIT) || SUPERH || UML help Say Y here if you want to attach large (bigger than 2TB) discs to your machine, or if you want to have a raid or loopback device diff --git a/crypto/Kconfig b/crypto/Kconfig index c696f7ab729e..52e1d4108a99 100644 --- a/crypto/Kconfig +++ b/crypto/Kconfig @@ -42,7 +42,7 @@ config CRYPTO_SHA1 config CRYPTO_SHA1_S390 tristate "SHA1 digest algorithm (s390)" - depends on CRYPTO && ARCH_S390 + depends on CRYPTO && S390 help This is the s390 hardware accelerated implementation of the SHA-1 secure hash standard (FIPS 180-1/DFIPS 180-2). @@ -58,7 +58,7 @@ config CRYPTO_SHA256 config CRYPTO_SHA256_S390 tristate "SHA256 digest algorithm (s390)" - depends on CRYPTO && ARCH_S390 + depends on CRYPTO && S390 help This is the s390 hardware accelerated implementation of the SHA256 secure hash standard (DFIPS 180-2). @@ -111,7 +111,7 @@ config CRYPTO_DES config CRYPTO_DES_S390 tristate "DES and Triple DES cipher algorithms (s390)" - depends on CRYPTO && ARCH_S390 + depends on CRYPTO && S390 help DES cipher algorithm (FIPS 46-2), and Triple DES EDE (FIPS 46-3). @@ -217,7 +217,7 @@ config CRYPTO_AES_X86_64 config CRYPTO_AES_S390 tristate "AES cipher algorithms (s390)" - depends on CRYPTO && ARCH_S390 + depends on CRYPTO && S390 help This is the s390 hardware accelerated implementation of the AES cipher algorithms (FIPS-197). AES uses the Rijndael diff --git a/drivers/char/Kconfig b/drivers/char/Kconfig index 84e68cdd451b..5ebd06b1b4ca 100644 --- a/drivers/char/Kconfig +++ b/drivers/char/Kconfig @@ -985,7 +985,7 @@ config HPET_MMAP config HANGCHECK_TIMER tristate "Hangcheck timer" - depends on X86 || IA64 || PPC64 || ARCH_S390 + depends on X86 || IA64 || PPC64 || S390 help The hangcheck-timer module detects when the system has gone out to lunch past a certain margin. It can reboot the system diff --git a/drivers/char/hangcheck-timer.c b/drivers/char/hangcheck-timer.c index 66e53dd450ff..40a67c86420c 100644 --- a/drivers/char/hangcheck-timer.c +++ b/drivers/char/hangcheck-timer.c @@ -120,7 +120,7 @@ __setup("hcheck_dump_tasks", hangcheck_parse_dump_tasks); #if defined(CONFIG_X86) # define HAVE_MONOTONIC # define TIMER_FREQ 1000000000ULL -#elif defined(CONFIG_ARCH_S390) +#elif defined(CONFIG_S390) /* FA240000 is 1 Second in the IBM time universe (Page 4-38 Principles of Op for zSeries */ # define TIMER_FREQ 0xFA240000ULL #elif defined(CONFIG_IA64) diff --git a/drivers/char/watchdog/Kconfig b/drivers/char/watchdog/Kconfig index 344001b45af9..a6544790af60 100644 --- a/drivers/char/watchdog/Kconfig +++ b/drivers/char/watchdog/Kconfig @@ -438,7 +438,7 @@ config INDYDOG config ZVM_WATCHDOG tristate "z/VM Watchdog Timer" - depends on WATCHDOG && ARCH_S390 + depends on WATCHDOG && S390 help IBM s/390 and zSeries machines running under z/VM 5.1 or later provide a virtual watchdog timer to their guest that cause a diff --git a/drivers/input/evdev.c b/drivers/input/evdev.c index 9f2352bd8348..a1e660e3531d 100644 --- a/drivers/input/evdev.c +++ b/drivers/input/evdev.c @@ -157,7 +157,7 @@ struct input_event_compat { # define COMPAT_TEST test_thread_flag(TIF_IA32) #elif defined(CONFIG_IA64) # define COMPAT_TEST IS_IA32_PROCESS(ia64_task_regs(current)) -#elif defined(CONFIG_ARCH_S390) +#elif defined(CONFIG_S390) # define COMPAT_TEST test_thread_flag(TIF_31BIT) #elif defined(CONFIG_MIPS) # define COMPAT_TEST (current->thread.mflags & MF_32BIT_ADDR) diff --git a/drivers/net/phy/Kconfig b/drivers/net/phy/Kconfig index c782a6329805..fa39b944bc46 100644 --- a/drivers/net/phy/Kconfig +++ b/drivers/net/phy/Kconfig @@ -6,7 +6,7 @@ menu "PHY device support" config PHYLIB tristate "PHY Device support and infrastructure" - depends on NET_ETHERNET && (BROKEN || !ARCH_S390) + depends on NET_ETHERNET && (BROKEN || !S390) help Ethernet controllers are usually attached to PHY devices. This option provides infrastructure for diff --git a/drivers/s390/block/Kconfig b/drivers/s390/block/Kconfig index 6e7d7b06421d..6f50cc9323d9 100644 --- a/drivers/s390/block/Kconfig +++ b/drivers/s390/block/Kconfig @@ -1,11 +1,11 @@ -if ARCH_S390 +if S390 comment "S/390 block device drivers" - depends on ARCH_S390 + depends on S390 config BLK_DEV_XPRAM tristate "XPRAM disk support" - depends on ARCH_S390 + depends on S390 help Select this option if you want to use your expanded storage on S/390 or zSeries as a disk. This is useful as a _fast_ swap device if you @@ -49,7 +49,7 @@ config DASD_FBA config DASD_DIAG tristate "Support for DIAG access to Disks" - depends on DASD && ( ARCH_S390X = 'n' || EXPERIMENTAL) + depends on DASD && ( 64BIT = 'n' || EXPERIMENTAL) help Select this option if you want to use Diagnose250 command to access Disks under VM. If you are not running under VM or unsure what it is, diff --git a/drivers/s390/block/dasd.c b/drivers/s390/block/dasd.c index 1141a5963b67..041e1a621885 100644 --- a/drivers/s390/block/dasd.c +++ b/drivers/s390/block/dasd.c @@ -604,7 +604,7 @@ dasd_smalloc_request(char *magic, int cplength, int datasize, void dasd_kfree_request(struct dasd_ccw_req * cqr, struct dasd_device * device) { -#ifdef CONFIG_ARCH_S390X +#ifdef CONFIG_64BIT struct ccw1 *ccw; /* Clear any idals used for the request. */ diff --git a/drivers/s390/block/dasd_diag.c b/drivers/s390/block/dasd_diag.c index a33d4064b537..ba80fdea7ebf 100644 --- a/drivers/s390/block/dasd_diag.c +++ b/drivers/s390/block/dasd_diag.c @@ -75,7 +75,7 @@ dia250(void *iob, int cmd) int rc; __asm__ __volatile__( -#ifdef CONFIG_ARCH_S390X +#ifdef CONFIG_64BIT " lghi %0,3\n" " lgr 0,%3\n" " diag 0,%2,0x250\n" diff --git a/drivers/s390/block/dasd_diag.h b/drivers/s390/block/dasd_diag.h index 37edf6e91715..a4f80bd735f1 100644 --- a/drivers/s390/block/dasd_diag.h +++ b/drivers/s390/block/dasd_diag.h @@ -45,7 +45,7 @@ struct dasd_diag_characteristics { } __attribute__ ((packed, aligned(4))); -#ifdef CONFIG_ARCH_S390X +#ifdef CONFIG_64BIT #define DASD_DIAG_FLAGA_DEFAULT DASD_DIAG_FLAGA_FORMAT_64BIT typedef u64 blocknum_t; @@ -86,7 +86,7 @@ struct dasd_diag_rw_io { struct dasd_diag_bio *bio_list; u8 spare4[8]; } __attribute__ ((packed, aligned(8))); -#else /* CONFIG_ARCH_S390X */ +#else /* CONFIG_64BIT */ #define DASD_DIAG_FLAGA_DEFAULT 0x0 typedef u32 blocknum_t; @@ -125,4 +125,4 @@ struct dasd_diag_rw_io { u32 interrupt_params; u8 spare3[20]; } __attribute__ ((packed, aligned(8))); -#endif /* CONFIG_ARCH_S390X */ +#endif /* CONFIG_64BIT */ diff --git a/drivers/s390/block/dasd_eckd.c b/drivers/s390/block/dasd_eckd.c index efc4cf62496e..96eb48258580 100644 --- a/drivers/s390/block/dasd_eckd.c +++ b/drivers/s390/block/dasd_eckd.c @@ -1041,7 +1041,7 @@ dasd_eckd_build_cp(struct dasd_device * device, struct request *req) /* Eckd can only do full blocks. */ return ERR_PTR(-EINVAL); count += bv->bv_len >> (device->s2b_shift + 9); -#if defined(CONFIG_ARCH_S390X) +#if defined(CONFIG_64BIT) if (idal_is_needed (page_address(bv->bv_page), bv->bv_len)) cidaw += bv->bv_len >> (device->s2b_shift + 9); diff --git a/drivers/s390/block/dasd_fba.c b/drivers/s390/block/dasd_fba.c index 9bac8d87a9cc..8ec75dc08e2c 100644 --- a/drivers/s390/block/dasd_fba.c +++ b/drivers/s390/block/dasd_fba.c @@ -271,7 +271,7 @@ dasd_fba_build_cp(struct dasd_device * device, struct request *req) /* Fba can only do full blocks. */ return ERR_PTR(-EINVAL); count += bv->bv_len >> (device->s2b_shift + 9); -#if defined(CONFIG_ARCH_S390X) +#if defined(CONFIG_64BIT) if (idal_is_needed (page_address(bv->bv_page), bv->bv_len)) cidaw += bv->bv_len / blksize; diff --git a/drivers/s390/block/xpram.c b/drivers/s390/block/xpram.c index d428c909b8a0..bf3a67c3cc5e 100644 --- a/drivers/s390/block/xpram.c +++ b/drivers/s390/block/xpram.c @@ -160,7 +160,7 @@ static int xpram_page_in (unsigned long page_addr, unsigned int xpage_index) "0: ipm %0\n" " srl %0,28\n" "1:\n" -#ifndef CONFIG_ARCH_S390X +#ifndef CONFIG_64BIT ".section __ex_table,\"a\"\n" " .align 4\n" " .long 0b,1b\n" @@ -208,7 +208,7 @@ static long xpram_page_out (unsigned long page_addr, unsigned int xpage_index) "0: ipm %0\n" " srl %0,28\n" "1:\n" -#ifndef CONFIG_ARCH_S390X +#ifndef CONFIG_64BIT ".section __ex_table,\"a\"\n" " .align 4\n" " .long 0b,1b\n" diff --git a/drivers/s390/char/vmwatchdog.c b/drivers/s390/char/vmwatchdog.c index 5473c23fcb52..5acc0ace3d7d 100644 --- a/drivers/s390/char/vmwatchdog.c +++ b/drivers/s390/char/vmwatchdog.c @@ -66,7 +66,7 @@ static int __diag288(enum vmwdt_func func, unsigned int timeout, __cmdl = len; err = 0; asm volatile ( -#ifdef __s390x__ +#ifdef CONFIG_64BIT "diag %2,%4,0x288\n" "1: \n" ".section .fixup,\"ax\"\n" diff --git a/drivers/s390/cio/cio.c b/drivers/s390/cio/cio.c index 6f274f4f92eb..7376bc87206d 100644 --- a/drivers/s390/cio/cio.c +++ b/drivers/s390/cio/cio.c @@ -195,7 +195,7 @@ cio_start_key (struct subchannel *sch, /* subchannel structure */ sch->orb.spnd = sch->options.suspend; sch->orb.ssic = sch->options.suspend && sch->options.inter; sch->orb.lpm = (lpm != 0) ? (lpm & sch->opm) : sch->lpm; -#ifdef CONFIG_ARCH_S390X +#ifdef CONFIG_64BIT /* * for 64 bit we always support 64 bit IDAWs with 4k page size only */ diff --git a/drivers/s390/cio/device_id.c b/drivers/s390/cio/device_id.c index 3c77c3fd461d..04ceba343db8 100644 --- a/drivers/s390/cio/device_id.c +++ b/drivers/s390/cio/device_id.c @@ -27,7 +27,7 @@ /* * diag210 is used under VM to get information about a virtual device */ -#ifdef CONFIG_ARCH_S390X +#ifdef CONFIG_64BIT int diag210(struct diag210 * addr) { diff --git a/drivers/s390/cio/ioasm.h b/drivers/s390/cio/ioasm.h index 62b0e2ad507f..95a9462f9a91 100644 --- a/drivers/s390/cio/ioasm.h +++ b/drivers/s390/cio/ioasm.h @@ -50,7 +50,7 @@ static inline int stsch_err(struct subchannel_id schid, "0: ipm %0\n" " srl %0,28\n" "1:\n" -#ifdef CONFIG_ARCH_S390X +#ifdef CONFIG_64BIT ".section __ex_table,\"a\"\n" " .align 8\n" " .quad 0b,1b\n" @@ -95,7 +95,7 @@ static inline int msch_err(struct subchannel_id schid, "0: ipm %0\n" " srl %0,28\n" "1:\n" -#ifdef CONFIG_ARCH_S390X +#ifdef CONFIG_64BIT ".section __ex_table,\"a\"\n" " .align 8\n" " .quad 0b,1b\n" diff --git a/drivers/s390/cio/qdio.c b/drivers/s390/cio/qdio.c index 035c77af9cd3..30a836ffc31f 100644 --- a/drivers/s390/cio/qdio.c +++ b/drivers/s390/cio/qdio.c @@ -2394,7 +2394,7 @@ tiqdio_check_chsc_availability(void) sprintf(dbf_text,"hydrati%1x", hydra_thinints); QDIO_DBF_TEXT0(0,setup,dbf_text); -#ifdef CONFIG_ARCH_S390X +#ifdef CONFIG_64BIT /* Check for QEBSM support in general (bit 58). */ is_passthrough = css_general_characteristics.qebsm; #endif diff --git a/drivers/s390/cio/qdio.h b/drivers/s390/cio/qdio.h index 43b840af5300..fa385e761fe1 100644 --- a/drivers/s390/cio/qdio.h +++ b/drivers/s390/cio/qdio.h @@ -271,7 +271,7 @@ static inline int do_sqbs(unsigned long sch, unsigned char state, int queue, unsigned int *start, unsigned int *count) { -#ifdef CONFIG_ARCH_S390X +#ifdef CONFIG_64BIT register unsigned long _ccq asm ("0") = *count; register unsigned long _sch asm ("1") = sch; unsigned long _queuestart = ((unsigned long)queue << 32) | *start; @@ -295,7 +295,7 @@ static inline int do_eqbs(unsigned long sch, unsigned char *state, int queue, unsigned int *start, unsigned int *count) { -#ifdef CONFIG_ARCH_S390X +#ifdef CONFIG_64BIT register unsigned long _ccq asm ("0") = *count; register unsigned long _sch asm ("1") = sch; unsigned long _queuestart = ((unsigned long)queue << 32) | *start; @@ -323,7 +323,7 @@ do_siga_sync(struct subchannel_id schid, unsigned int mask1, unsigned int mask2) { int cc; -#ifndef CONFIG_ARCH_S390X +#ifndef CONFIG_64BIT asm volatile ( "lhi 0,2 \n\t" "lr 1,%1 \n\t" @@ -336,7 +336,7 @@ do_siga_sync(struct subchannel_id schid, unsigned int mask1, unsigned int mask2) : "d" (schid), "d" (mask1), "d" (mask2) : "cc", "0", "1", "2", "3" ); -#else /* CONFIG_ARCH_S390X */ +#else /* CONFIG_64BIT */ asm volatile ( "lghi 0,2 \n\t" "llgfr 1,%1 \n\t" @@ -349,7 +349,7 @@ do_siga_sync(struct subchannel_id schid, unsigned int mask1, unsigned int mask2) : "d" (schid), "d" (mask1), "d" (mask2) : "cc", "0", "1", "2", "3" ); -#endif /* CONFIG_ARCH_S390X */ +#endif /* CONFIG_64BIT */ return cc; } @@ -358,7 +358,7 @@ do_siga_input(struct subchannel_id schid, unsigned int mask) { int cc; -#ifndef CONFIG_ARCH_S390X +#ifndef CONFIG_64BIT asm volatile ( "lhi 0,1 \n\t" "lr 1,%1 \n\t" @@ -370,7 +370,7 @@ do_siga_input(struct subchannel_id schid, unsigned int mask) : "d" (schid), "d" (mask) : "cc", "0", "1", "2", "memory" ); -#else /* CONFIG_ARCH_S390X */ +#else /* CONFIG_64BIT */ asm volatile ( "lghi 0,1 \n\t" "llgfr 1,%1 \n\t" @@ -382,7 +382,7 @@ do_siga_input(struct subchannel_id schid, unsigned int mask) : "d" (schid), "d" (mask) : "cc", "0", "1", "2", "memory" ); -#endif /* CONFIG_ARCH_S390X */ +#endif /* CONFIG_64BIT */ return cc; } @@ -394,7 +394,7 @@ do_siga_output(unsigned long schid, unsigned long mask, __u32 *bb, int cc; __u32 busy_bit; -#ifndef CONFIG_ARCH_S390X +#ifndef CONFIG_64BIT asm volatile ( "lhi 0,0 \n\t" "lr 1,%2 \n\t" @@ -424,7 +424,7 @@ do_siga_output(unsigned long schid, unsigned long mask, __u32 *bb, "i" (QDIO_SIGA_ERROR_ACCESS_EXCEPTION) : "cc", "0", "1", "2", "memory" ); -#else /* CONFIG_ARCH_S390X */ +#else /* CONFIG_64BIT */ asm volatile ( "llgfr 0,%5 \n\t" "lgr 1,%2 \n\t" @@ -449,7 +449,7 @@ do_siga_output(unsigned long schid, unsigned long mask, __u32 *bb, "i" (QDIO_SIGA_ERROR_ACCESS_EXCEPTION), "d" (fc) : "cc", "0", "1", "2", "memory" ); -#endif /* CONFIG_ARCH_S390X */ +#endif /* CONFIG_64BIT */ (*bb) = busy_bit; return cc; @@ -461,21 +461,21 @@ do_clear_global_summary(void) unsigned long time; -#ifndef CONFIG_ARCH_S390X +#ifndef CONFIG_64BIT asm volatile ( "lhi 1,3 \n\t" ".insn rre,0xb2650000,2,0 \n\t" "lr %0,3 \n\t" : "=d" (time) : : "cc", "1", "2", "3" ); -#else /* CONFIG_ARCH_S390X */ +#else /* CONFIG_64BIT */ asm volatile ( "lghi 1,3 \n\t" ".insn rre,0xb2650000,2,0 \n\t" "lgr %0,3 \n\t" : "=d" (time) : : "cc", "1", "2", "3" ); -#endif /* CONFIG_ARCH_S390X */ +#endif /* CONFIG_64BIT */ return time; } @@ -542,11 +542,11 @@ struct qdio_perf_stats { #define MY_MODULE_STRING(x) #x -#ifdef CONFIG_ARCH_S390X +#ifdef CONFIG_64BIT #define QDIO_GET_ADDR(x) ((__u32)(unsigned long)x) -#else /* CONFIG_ARCH_S390X */ +#else /* CONFIG_64BIT */ #define QDIO_GET_ADDR(x) ((__u32)(long)x) -#endif /* CONFIG_ARCH_S390X */ +#endif /* CONFIG_64BIT */ struct qdio_q { volatile struct slsb slsb; diff --git a/drivers/s390/crypto/z90hardware.c b/drivers/s390/crypto/z90hardware.c index 7c3ed52e03e1..d7f7494a0cbe 100644 --- a/drivers/s390/crypto/z90hardware.c +++ b/drivers/s390/crypto/z90hardware.c @@ -785,7 +785,7 @@ testq(int q_nr, int *q_depth, int *dev_type, struct ap_status_word *stat) int ccode; asm volatile -#ifdef __s390x__ +#ifdef CONFIG_64BIT (" llgfr 0,%4 \n" " slgr 1,1 \n" " lgr 2,1 \n" @@ -855,7 +855,7 @@ resetq(int q_nr, struct ap_status_word *stat_p) int ccode; asm volatile -#ifdef __s390x__ +#ifdef CONFIG_64BIT (" llgfr 0,%2 \n" " lghi 1,1 \n" " sll 1,24 \n" @@ -921,7 +921,7 @@ sen(int msg_len, unsigned char *msg_ext, struct ap_status_word *stat) int ccode; asm volatile -#ifdef __s390x__ +#ifdef CONFIG_64BIT (" lgr 6,%3 \n" " llgfr 7,%2 \n" " llgt 0,0(6) \n" @@ -1000,7 +1000,7 @@ rec(int q_nr, int buff_l, unsigned char *rsp, unsigned char *id, int ccode; asm volatile -#ifdef __s390x__ +#ifdef CONFIG_64BIT (" llgfr 0,%2 \n" " lgr 3,%4 \n" " lgr 6,%3 \n" diff --git a/drivers/s390/net/Kconfig b/drivers/s390/net/Kconfig index a7efc394515e..548854754921 100644 --- a/drivers/s390/net/Kconfig +++ b/drivers/s390/net/Kconfig @@ -1,5 +1,5 @@ menu "S/390 network device drivers" - depends on NETDEVICES && ARCH_S390 + depends on NETDEVICES && S390 config LCS tristate "Lan Channel Station Interface" diff --git a/drivers/s390/net/claw.c b/drivers/s390/net/claw.c index 6b63d21612ec..e70af7f39946 100644 --- a/drivers/s390/net/claw.c +++ b/drivers/s390/net/claw.c @@ -1603,7 +1603,7 @@ dumpit(char* buf, int len) __u32 ct, sw, rm, dup; char *ptr, *rptr; char tbuf[82], tdup[82]; -#if (CONFIG_ARCH_S390X) +#if (CONFIG_64BIT) char addr[22]; #else char addr[12]; @@ -1619,7 +1619,7 @@ dumpit(char* buf, int len) dup = 0; for ( ct=0; ct < len; ct++, ptr++, rptr++ ) { if (sw == 0) { -#if (CONFIG_ARCH_S390X) +#if (CONFIG_64BIT) sprintf(addr, "%16.16lX",(unsigned long)rptr); #else sprintf(addr, "%8.8X",(__u32)rptr); @@ -1634,7 +1634,7 @@ dumpit(char* buf, int len) if (sw == 8) { strcat(bhex, " "); } -#if (CONFIG_ARCH_S390X) +#if (CONFIG_64BIT) sprintf(tbuf,"%2.2lX", (unsigned long)*ptr); #else sprintf(tbuf,"%2.2X", (__u32)*ptr); diff --git a/drivers/s390/s390mach.c b/drivers/s390/s390mach.c index 7dad597ff86e..3bf466603512 100644 --- a/drivers/s390/s390mach.c +++ b/drivers/s390/s390mach.c @@ -246,7 +246,7 @@ s390_revalidate_registers(struct mci *mci) */ kill_task = 1; -#ifndef __s390x__ +#ifndef CONFIG_64BIT asm volatile("ld 0,0(%0)\n" "ld 2,8(%0)\n" "ld 4,16(%0)\n" @@ -255,7 +255,7 @@ s390_revalidate_registers(struct mci *mci) #endif if (MACHINE_HAS_IEEE) { -#ifdef __s390x__ +#ifdef CONFIG_64BIT fpt_save_area = &S390_lowcore.floating_pt_save_area; fpt_creg_save_area = &S390_lowcore.fpt_creg_save_area; #else @@ -314,7 +314,7 @@ s390_revalidate_registers(struct mci *mci) */ s390_handle_damage("invalid control registers."); else -#ifdef __s390x__ +#ifdef CONFIG_64BIT asm volatile("lctlg 0,15,0(%0)" : : "a" (&S390_lowcore.cregs_save_area)); #else @@ -327,7 +327,7 @@ s390_revalidate_registers(struct mci *mci) * can't write something sensible into that register. */ -#ifdef __s390x__ +#ifdef CONFIG_64BIT /* * See if we can revalidate the TOD programmable register with its * old contents (should be zero) otherwise set it to zero. @@ -384,7 +384,7 @@ s390_do_machine_check(struct pt_regs *regs) if (mci->b) { /* Processing backup -> verify if we can survive this */ u64 z_mcic, o_mcic, t_mcic; -#ifdef __s390x__ +#ifdef CONFIG_64BIT z_mcic = (1ULL<<63 | 1ULL<<59 | 1ULL<<29); o_mcic = (1ULL<<43 | 1ULL<<42 | 1ULL<<41 | 1ULL<<40 | 1ULL<<36 | 1ULL<<35 | 1ULL<<34 | 1ULL<<32 | diff --git a/drivers/s390/sysinfo.c b/drivers/s390/sysinfo.c index 87c2db1bd4f5..66da840c9316 100644 --- a/drivers/s390/sysinfo.c +++ b/drivers/s390/sysinfo.c @@ -106,7 +106,7 @@ static inline int stsi (void *sysinfo, { int cc, retv; -#ifndef CONFIG_ARCH_S390X +#ifndef CONFIG_64BIT __asm__ __volatile__ ( "lr\t0,%2\n" "\tlr\t1,%3\n" "\tstsi\t0(%4)\n" diff --git a/drivers/scsi/Kconfig b/drivers/scsi/Kconfig index 4c42065dea88..9e8254f0256c 100644 --- a/drivers/scsi/Kconfig +++ b/drivers/scsi/Kconfig @@ -1815,7 +1815,7 @@ config SCSI_SUNESP config ZFCP tristate "FCP host bus adapter driver for IBM eServer zSeries" - depends on ARCH_S390 && QDIO && SCSI + depends on S390 && QDIO && SCSI select SCSI_FC_ATTRS help If you want to access SCSI devices attached to your IBM eServer diff --git a/fs/partitions/Kconfig b/fs/partitions/Kconfig index 656bc43431b9..e227a04261ab 100644 --- a/fs/partitions/Kconfig +++ b/fs/partitions/Kconfig @@ -85,7 +85,7 @@ config ATARI_PARTITION config IBM_PARTITION bool "IBM disk label and partition support" - depends on PARTITION_ADVANCED && ARCH_S390 + depends on PARTITION_ADVANCED && S390 help Say Y here if you would like to be able to read the hard disk partition table format used by IBM DASD disks operating under CMS. diff --git a/fs/proc/array.c b/fs/proc/array.c index 3e1239e4b303..5e9251f65317 100644 --- a/fs/proc/array.c +++ b/fs/proc/array.c @@ -308,7 +308,7 @@ int proc_pid_status(struct task_struct *task, char * buffer) buffer = task_sig(task, buffer); buffer = task_cap(task, buffer); buffer = cpuset_task_status_allowed(task, buffer); -#if defined(CONFIG_ARCH_S390) +#if defined(CONFIG_S390) buffer = task_show_regs(task, buffer); #endif return buffer - orig; diff --git a/include/asm-s390/unistd.h b/include/asm-s390/unistd.h index f97d92691f17..2861cdc243ad 100644 --- a/include/asm-s390/unistd.h +++ b/include/asm-s390/unistd.h @@ -539,7 +539,7 @@ type name(type1 arg1, type2 arg2, type3 arg3, type4 arg4, \ #define __ARCH_WANT_SYS_SIGPENDING #define __ARCH_WANT_SYS_SIGPROCMASK #define __ARCH_WANT_SYS_RT_SIGACTION -# ifdef CONFIG_ARCH_S390_31 +# ifndef CONFIG_64BIT # define __ARCH_WANT_STAT64 # define __ARCH_WANT_SYS_TIME # endif diff --git a/include/linux/irq.h b/include/linux/irq.h index 60f8bc78a35a..6c5d4c898ccb 100644 --- a/include/linux/irq.h +++ b/include/linux/irq.h @@ -12,7 +12,7 @@ #include #include -#if !defined(CONFIG_ARCH_S390) +#if !defined(CONFIG_S390) #include #include diff --git a/init/Kconfig b/init/Kconfig index 24e0f7c756c0..ba42f3793a84 100644 --- a/init/Kconfig +++ b/init/Kconfig @@ -189,7 +189,7 @@ config AUDIT config AUDITSYSCALL bool "Enable system-call auditing support" - depends on AUDIT && (X86 || PPC || PPC64 || ARCH_S390 || IA64 || UML || SPARC64) + depends on AUDIT && (X86 || PPC || PPC64 || S390 || IA64 || UML || SPARC64) default y if SECURITY_SELINUX help Enable low-overhead system-call auditing infrastructure that diff --git a/init/do_mounts_rd.c b/init/do_mounts_rd.c index c10b08a80982..c2683fcd792d 100644 --- a/init/do_mounts_rd.c +++ b/init/do_mounts_rd.c @@ -145,7 +145,7 @@ int __init rd_load_image(char *from) int nblocks, i, disk; char *buf = NULL; unsigned short rotate = 0; -#if !defined(CONFIG_ARCH_S390) && !defined(CONFIG_PPC_ISERIES) +#if !defined(CONFIG_S390) && !defined(CONFIG_PPC_ISERIES) char rotator[4] = { '|' , '/' , '-' , '\\' }; #endif @@ -237,7 +237,7 @@ int __init rd_load_image(char *from) } sys_read(in_fd, buf, BLOCK_SIZE); sys_write(out_fd, buf, BLOCK_SIZE); -#if !defined(CONFIG_ARCH_S390) && !defined(CONFIG_PPC_ISERIES) +#if !defined(CONFIG_S390) && !defined(CONFIG_PPC_ISERIES) if (!(i % 16)) { printk("%c\b", rotator[rotate & 0x3]); rotate++; diff --git a/kernel/panic.c b/kernel/panic.c index aabc5f86fa3f..c5c4ab255834 100644 --- a/kernel/panic.c +++ b/kernel/panic.c @@ -60,7 +60,7 @@ NORET_TYPE void panic(const char * fmt, ...) long i; static char buf[1024]; va_list args; -#if defined(CONFIG_ARCH_S390) +#if defined(CONFIG_S390) unsigned long caller = (unsigned long) __builtin_return_address(0); #endif @@ -125,7 +125,7 @@ NORET_TYPE void panic(const char * fmt, ...) printk(KERN_EMERG "Press Stop-A (L1-A) to return to the boot prom\n"); } #endif -#if defined(CONFIG_ARCH_S390) +#if defined(CONFIG_S390) disabled_wait(caller); #endif local_irq_enable(); diff --git a/kernel/sysctl.c b/kernel/sysctl.c index 345f4a1d533f..a85047bb5739 100644 --- a/kernel/sysctl.c +++ b/kernel/sysctl.c @@ -108,7 +108,7 @@ extern int pwrsw_enabled; extern int unaligned_enabled; #endif -#ifdef CONFIG_ARCH_S390 +#ifdef CONFIG_S390 #ifdef CONFIG_MATHEMU extern int sysctl_ieee_emulation_warnings; #endif @@ -542,7 +542,7 @@ static ctl_table kern_table[] = { .extra1 = &minolduid, .extra2 = &maxolduid, }, -#ifdef CONFIG_ARCH_S390 +#ifdef CONFIG_S390 #ifdef CONFIG_MATHEMU { .ctl_name = KERN_IEEE_EMULATION_WARNINGS, @@ -644,7 +644,7 @@ static ctl_table kern_table[] = { .mode = 0644, .proc_handler = &proc_dointvec, }, -#if defined(CONFIG_ARCH_S390) +#if defined(CONFIG_S390) { .ctl_name = KERN_SPIN_RETRY, .procname = "spin_retry", diff --git a/lib/Kconfig.debug b/lib/Kconfig.debug index 1cedc2356b78..80598cfd728c 100644 --- a/lib/Kconfig.debug +++ b/lib/Kconfig.debug @@ -32,7 +32,7 @@ config MAGIC_SYSRQ config LOG_BUF_SHIFT int "Kernel log buffer size (16 => 64KB, 17 => 128KB)" if DEBUG_KERNEL range 12 21 - default 17 if ARCH_S390 + default 17 if S390 default 16 if X86_NUMAQ || IA64 default 15 if SMP default 14 -- cgit v1.2.3-71-gd317 From a1a5ea70a6e9db6332b27fe2d96666e17aa1436b Mon Sep 17 00:00:00 2001 From: Markus Lidel Date: Fri, 6 Jan 2006 00:19:29 -0800 Subject: [PATCH] I2O: changed I2O API to create I2O messages in kernel memory Changed the I2O API to create I2O messages first in kernel memory and then transfer it at once over the PCI bus instead of sending each quad-word over the PCI bus. Signed-off-by: Markus Lidel Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- drivers/message/i2o/bus-osm.c | 21 +- drivers/message/i2o/device.c | 51 +- drivers/message/i2o/exec-osm.c | 93 +- drivers/message/i2o/i2o_block.c | 157 +-- drivers/message/i2o/i2o_config.c | 169 ++-- drivers/message/i2o/i2o_scsi.c | 50 +- drivers/message/i2o/iop.c | 296 +++--- drivers/message/i2o/pci.c | 1 + include/linux/i2o.h | 1947 ++++++++++++++++++++------------------ 9 files changed, 1450 insertions(+), 1335 deletions(-) (limited to 'include/linux') diff --git a/drivers/message/i2o/bus-osm.c b/drivers/message/i2o/bus-osm.c index 151b228e1cb3..ce039d322fd0 100644 --- a/drivers/message/i2o/bus-osm.c +++ b/drivers/message/i2o/bus-osm.c @@ -39,18 +39,18 @@ static struct i2o_class_id i2o_bus_class_id[] = { */ static int i2o_bus_scan(struct i2o_device *dev) { - struct i2o_message __iomem *msg; - u32 m; + struct i2o_message *msg; - m = i2o_msg_get_wait(dev->iop, &msg, I2O_TIMEOUT_MESSAGE_GET); - if (m == I2O_QUEUE_EMPTY) + msg = i2o_msg_get_wait(dev->iop, I2O_TIMEOUT_MESSAGE_GET); + if (IS_ERR(msg)) return -ETIMEDOUT; - writel(FIVE_WORD_MSG_SIZE | SGL_OFFSET_0, &msg->u.head[0]); - writel(I2O_CMD_BUS_SCAN << 24 | HOST_TID << 12 | dev->lct_data.tid, - &msg->u.head[1]); + msg->u.head[0] = cpu_to_le32(FIVE_WORD_MSG_SIZE | SGL_OFFSET_0); + msg->u.head[1] = + cpu_to_le32(I2O_CMD_BUS_SCAN << 24 | HOST_TID << 12 | dev->lct_data. + tid); - return i2o_msg_post_wait(dev->iop, m, 60); + return i2o_msg_post_wait(dev->iop, msg, 60); }; /** @@ -59,8 +59,9 @@ static int i2o_bus_scan(struct i2o_device *dev) * * Returns count. */ -static ssize_t i2o_bus_store_scan(struct device *d, struct device_attribute *attr, const char *buf, - size_t count) +static ssize_t i2o_bus_store_scan(struct device *d, + struct device_attribute *attr, + const char *buf, size_t count) { struct i2o_device *i2o_dev = to_i2o_device(d); int rc; diff --git a/drivers/message/i2o/device.c b/drivers/message/i2o/device.c index 8eb50cdb8ae1..002ae0ed8966 100644 --- a/drivers/message/i2o/device.c +++ b/drivers/message/i2o/device.c @@ -35,18 +35,18 @@ static inline int i2o_device_issue_claim(struct i2o_device *dev, u32 cmd, u32 type) { - struct i2o_message __iomem *msg; - u32 m; + struct i2o_message *msg; - m = i2o_msg_get_wait(dev->iop, &msg, I2O_TIMEOUT_MESSAGE_GET); - if (m == I2O_QUEUE_EMPTY) - return -ETIMEDOUT; + msg = i2o_msg_get_wait(dev->iop, I2O_TIMEOUT_MESSAGE_GET); + if (IS_ERR(msg)) + return PTR_ERR(msg); - writel(FIVE_WORD_MSG_SIZE | SGL_OFFSET_0, &msg->u.head[0]); - writel(cmd << 24 | HOST_TID << 12 | dev->lct_data.tid, &msg->u.head[1]); - writel(type, &msg->body[0]); + msg->u.head[0] = cpu_to_le32(FIVE_WORD_MSG_SIZE | SGL_OFFSET_0); + msg->u.head[1] = + cpu_to_le32(cmd << 24 | HOST_TID << 12 | dev->lct_data.tid); + msg->body[0] = cpu_to_le32(type); - return i2o_msg_post_wait(dev->iop, m, 60); + return i2o_msg_post_wait(dev->iop, msg, 60); } /** @@ -419,10 +419,9 @@ int i2o_device_parse_lct(struct i2o_controller *c) * ResultCount, ErrorInfoSize, BlockStatus and BlockSize. */ int i2o_parm_issue(struct i2o_device *i2o_dev, int cmd, void *oplist, - int oplen, void *reslist, int reslen) + int oplen, void *reslist, int reslen) { - struct i2o_message __iomem *msg; - u32 m; + struct i2o_message *msg; u32 *res32 = (u32 *) reslist; u32 *restmp = (u32 *) reslist; int len = 0; @@ -437,26 +436,28 @@ int i2o_parm_issue(struct i2o_device *i2o_dev, int cmd, void *oplist, if (i2o_dma_alloc(dev, &res, reslen, GFP_KERNEL)) return -ENOMEM; - m = i2o_msg_get_wait(c, &msg, I2O_TIMEOUT_MESSAGE_GET); - if (m == I2O_QUEUE_EMPTY) { + msg = i2o_msg_get_wait(c, I2O_TIMEOUT_MESSAGE_GET); + if (IS_ERR(msg)) { i2o_dma_free(dev, &res); - return -ETIMEDOUT; + return PTR_ERR(msg); } i = 0; - writel(cmd << 24 | HOST_TID << 12 | i2o_dev->lct_data.tid, - &msg->u.head[1]); - writel(0, &msg->body[i++]); - writel(0x4C000000 | oplen, &msg->body[i++]); /* OperationList */ - memcpy_toio(&msg->body[i], oplist, oplen); + msg->u.head[1] = + cpu_to_le32(cmd << 24 | HOST_TID << 12 | i2o_dev->lct_data.tid); + msg->body[i++] = cpu_to_le32(0x00000000); + msg->body[i++] = cpu_to_le32(0x4C000000 | oplen); /* OperationList */ + memcpy(&msg->body[i], oplist, oplen); + i += (oplen / 4 + (oplen % 4 ? 1 : 0)); - writel(0xD0000000 | res.len, &msg->body[i++]); /* ResultList */ - writel(res.phys, &msg->body[i++]); + msg->body[i++] = cpu_to_le32(0xD0000000 | res.len); /* ResultList */ + msg->body[i++] = cpu_to_le32(res.phys); - writel(I2O_MESSAGE_SIZE(i + sizeof(struct i2o_message) / 4) | - SGL_OFFSET_5, &msg->u.head[0]); + msg->u.head[0] = + cpu_to_le32(I2O_MESSAGE_SIZE(i + sizeof(struct i2o_message) / 4) | + SGL_OFFSET_5); - rc = i2o_msg_post_wait_mem(c, m, 10, &res); + rc = i2o_msg_post_wait_mem(c, msg, 10, &res); /* This only looks like a memory leak - don't "fix" it. */ if (rc == -ETIMEDOUT) diff --git a/drivers/message/i2o/exec-osm.c b/drivers/message/i2o/exec-osm.c index 9c339a2505b0..71a09332e7c0 100644 --- a/drivers/message/i2o/exec-osm.c +++ b/drivers/message/i2o/exec-osm.c @@ -114,13 +114,12 @@ static void i2o_exec_wait_free(struct i2o_exec_wait *wait) * Returns 0 on success, negative error code on timeout or positive error * code from reply. */ -int i2o_msg_post_wait_mem(struct i2o_controller *c, u32 m, unsigned long - timeout, struct i2o_dma *dma) +int i2o_msg_post_wait_mem(struct i2o_controller *c, struct i2o_message *msg, + unsigned long timeout, struct i2o_dma *dma) { DECLARE_WAIT_QUEUE_HEAD(wq); struct i2o_exec_wait *wait; static u32 tcntxt = 0x80000000; - struct i2o_message __iomem *msg = i2o_msg_in_to_virt(c, m); int rc = 0; wait = i2o_exec_wait_alloc(); @@ -138,15 +137,15 @@ int i2o_msg_post_wait_mem(struct i2o_controller *c, u32 m, unsigned long * We will only use transaction contexts >= 0x80000000 for POST WAIT, * so we could find a POST WAIT reply easier in the reply handler. */ - writel(i2o_exec_driver.context, &msg->u.s.icntxt); + msg->u.s.icntxt = cpu_to_le32(i2o_exec_driver.context); wait->tcntxt = tcntxt++; - writel(wait->tcntxt, &msg->u.s.tcntxt); + msg->u.s.tcntxt = cpu_to_le32(wait->tcntxt); /* * Post the message to the controller. At some point later it will * return. If we time out before it returns then complete will be zero. */ - i2o_msg_post(c, m); + i2o_msg_post(c, msg); if (!wait->complete) { wait->wq = &wq; @@ -266,7 +265,8 @@ static int i2o_msg_post_wait_complete(struct i2o_controller *c, u32 m, * * Returns number of bytes printed into buffer. */ -static ssize_t i2o_exec_show_vendor_id(struct device *d, struct device_attribute *attr, char *buf) +static ssize_t i2o_exec_show_vendor_id(struct device *d, + struct device_attribute *attr, char *buf) { struct i2o_device *dev = to_i2o_device(d); u16 id; @@ -286,7 +286,9 @@ static ssize_t i2o_exec_show_vendor_id(struct device *d, struct device_attribute * * Returns number of bytes printed into buffer. */ -static ssize_t i2o_exec_show_product_id(struct device *d, struct device_attribute *attr, char *buf) +static ssize_t i2o_exec_show_product_id(struct device *d, + struct device_attribute *attr, + char *buf) { struct i2o_device *dev = to_i2o_device(d); u16 id; @@ -385,23 +387,22 @@ static int i2o_exec_reply(struct i2o_controller *c, u32 m, u32 context; if (le32_to_cpu(msg->u.head[0]) & MSG_FAIL) { + struct i2o_message __iomem *pmsg; + u32 pm; + /* * If Fail bit is set we must take the transaction context of * the preserved message to find the right request again. */ - struct i2o_message __iomem *pmsg; - u32 pm; pm = le32_to_cpu(msg->body[3]); - pmsg = i2o_msg_in_to_virt(c, pm); + context = readl(&pmsg->u.s.tcntxt); i2o_report_status(KERN_INFO, "i2o_core", msg); - context = readl(&pmsg->u.s.tcntxt); - /* Release the preserved msg */ - i2o_msg_nop(c, pm); + i2o_msg_nop_mfa(c, pm); } else context = le32_to_cpu(msg->u.s.tcntxt); @@ -462,25 +463,26 @@ static void i2o_exec_event(struct i2o_event *evt) */ int i2o_exec_lct_get(struct i2o_controller *c) { - struct i2o_message __iomem *msg; - u32 m; + struct i2o_message *msg; int i = 0; int rc = -EAGAIN; for (i = 1; i <= I2O_LCT_GET_TRIES; i++) { - m = i2o_msg_get_wait(c, &msg, I2O_TIMEOUT_MESSAGE_GET); - if (m == I2O_QUEUE_EMPTY) - return -ETIMEDOUT; - - writel(EIGHT_WORD_MSG_SIZE | SGL_OFFSET_6, &msg->u.head[0]); - writel(I2O_CMD_LCT_NOTIFY << 24 | HOST_TID << 12 | ADAPTER_TID, - &msg->u.head[1]); - writel(0xffffffff, &msg->body[0]); - writel(0x00000000, &msg->body[1]); - writel(0xd0000000 | c->dlct.len, &msg->body[2]); - writel(c->dlct.phys, &msg->body[3]); - - rc = i2o_msg_post_wait(c, m, I2O_TIMEOUT_LCT_GET); + msg = i2o_msg_get_wait(c, I2O_TIMEOUT_MESSAGE_GET); + if (IS_ERR(msg)) + return PTR_ERR(msg); + + msg->u.head[0] = + cpu_to_le32(EIGHT_WORD_MSG_SIZE | SGL_OFFSET_6); + msg->u.head[1] = + cpu_to_le32(I2O_CMD_LCT_NOTIFY << 24 | HOST_TID << 12 | + ADAPTER_TID); + msg->body[0] = cpu_to_le32(0xffffffff); + msg->body[1] = cpu_to_le32(0x00000000); + msg->body[2] = cpu_to_le32(0xd0000000 | c->dlct.len); + msg->body[3] = cpu_to_le32(c->dlct.phys); + + rc = i2o_msg_post_wait(c, msg, I2O_TIMEOUT_LCT_GET); if (rc < 0) break; @@ -506,29 +508,28 @@ static int i2o_exec_lct_notify(struct i2o_controller *c, u32 change_ind) { i2o_status_block *sb = c->status_block.virt; struct device *dev; - struct i2o_message __iomem *msg; - u32 m; + struct i2o_message *msg; dev = &c->pdev->dev; if (i2o_dma_realloc(dev, &c->dlct, sb->expected_lct_size, GFP_KERNEL)) return -ENOMEM; - m = i2o_msg_get_wait(c, &msg, I2O_TIMEOUT_MESSAGE_GET); - if (m == I2O_QUEUE_EMPTY) - return -ETIMEDOUT; - - writel(EIGHT_WORD_MSG_SIZE | SGL_OFFSET_6, &msg->u.head[0]); - writel(I2O_CMD_LCT_NOTIFY << 24 | HOST_TID << 12 | ADAPTER_TID, - &msg->u.head[1]); - writel(i2o_exec_driver.context, &msg->u.s.icntxt); - writel(0, &msg->u.s.tcntxt); /* FIXME */ - writel(0xffffffff, &msg->body[0]); - writel(change_ind, &msg->body[1]); - writel(0xd0000000 | c->dlct.len, &msg->body[2]); - writel(c->dlct.phys, &msg->body[3]); - - i2o_msg_post(c, m); + msg = i2o_msg_get_wait(c, I2O_TIMEOUT_MESSAGE_GET); + if (IS_ERR(msg)) + return PTR_ERR(msg); + + msg->u.head[0] = cpu_to_le32(EIGHT_WORD_MSG_SIZE | SGL_OFFSET_6); + msg->u.head[1] = cpu_to_le32(I2O_CMD_LCT_NOTIFY << 24 | HOST_TID << 12 | + ADAPTER_TID); + msg->u.s.icntxt = cpu_to_le32(i2o_exec_driver.context); + msg->u.s.tcntxt = cpu_to_le32(0x00000000); + msg->body[0] = cpu_to_le32(0xffffffff); + msg->body[1] = cpu_to_le32(change_ind); + msg->body[2] = cpu_to_le32(0xd0000000 | c->dlct.len); + msg->body[3] = cpu_to_le32(c->dlct.phys); + + i2o_msg_post(c, msg); return 0; }; diff --git a/drivers/message/i2o/i2o_block.c b/drivers/message/i2o/i2o_block.c index f283b5bafdd3..2bd15c70773b 100644 --- a/drivers/message/i2o/i2o_block.c +++ b/drivers/message/i2o/i2o_block.c @@ -130,20 +130,20 @@ static int i2o_block_remove(struct device *dev) */ static int i2o_block_device_flush(struct i2o_device *dev) { - struct i2o_message __iomem *msg; - u32 m; + struct i2o_message *msg; - m = i2o_msg_get_wait(dev->iop, &msg, I2O_TIMEOUT_MESSAGE_GET); - if (m == I2O_QUEUE_EMPTY) - return -ETIMEDOUT; + msg = i2o_msg_get_wait(dev->iop, I2O_TIMEOUT_MESSAGE_GET); + if (IS_ERR(msg)) + return PTR_ERR(msg); - writel(FIVE_WORD_MSG_SIZE | SGL_OFFSET_0, &msg->u.head[0]); - writel(I2O_CMD_BLOCK_CFLUSH << 24 | HOST_TID << 12 | dev->lct_data.tid, - &msg->u.head[1]); - writel(60 << 16, &msg->body[0]); + msg->u.head[0] = cpu_to_le32(FIVE_WORD_MSG_SIZE | SGL_OFFSET_0); + msg->u.head[1] = + cpu_to_le32(I2O_CMD_BLOCK_CFLUSH << 24 | HOST_TID << 12 | dev-> + lct_data.tid); + msg->body[0] = cpu_to_le32(60 << 16); osm_debug("Flushing...\n"); - return i2o_msg_post_wait(dev->iop, m, 60); + return i2o_msg_post_wait(dev->iop, msg, 60); }; /** @@ -181,21 +181,21 @@ static int i2o_block_issue_flush(request_queue_t * queue, struct gendisk *disk, */ static int i2o_block_device_mount(struct i2o_device *dev, u32 media_id) { - struct i2o_message __iomem *msg; - u32 m; - - m = i2o_msg_get_wait(dev->iop, &msg, I2O_TIMEOUT_MESSAGE_GET); - if (m == I2O_QUEUE_EMPTY) - return -ETIMEDOUT; - - writel(FIVE_WORD_MSG_SIZE | SGL_OFFSET_0, &msg->u.head[0]); - writel(I2O_CMD_BLOCK_MMOUNT << 24 | HOST_TID << 12 | dev->lct_data.tid, - &msg->u.head[1]); - writel(-1, &msg->body[0]); - writel(0, &msg->body[1]); + struct i2o_message *msg; + + msg = i2o_msg_get_wait(dev->iop, I2O_TIMEOUT_MESSAGE_GET); + if (IS_ERR(msg)) + return PTR_ERR(msg); + + msg->u.head[0] = cpu_to_le32(FIVE_WORD_MSG_SIZE | SGL_OFFSET_0); + msg->u.head[1] = + cpu_to_le32(I2O_CMD_BLOCK_MMOUNT << 24 | HOST_TID << 12 | dev-> + lct_data.tid); + msg->body[0] = cpu_to_le32(-1); + msg->body[1] = cpu_to_le32(0x00000000); osm_debug("Mounting...\n"); - return i2o_msg_post_wait(dev->iop, m, 2); + return i2o_msg_post_wait(dev->iop, msg, 2); }; /** @@ -210,20 +210,20 @@ static int i2o_block_device_mount(struct i2o_device *dev, u32 media_id) */ static int i2o_block_device_lock(struct i2o_device *dev, u32 media_id) { - struct i2o_message __iomem *msg; - u32 m; + struct i2o_message *msg; - m = i2o_msg_get_wait(dev->iop, &msg, I2O_TIMEOUT_MESSAGE_GET); - if (m == I2O_QUEUE_EMPTY) - return -ETIMEDOUT; + msg = i2o_msg_get_wait(dev->iop, I2O_TIMEOUT_MESSAGE_GET); + if (IS_ERR(msg) == I2O_QUEUE_EMPTY) + return PTR_ERR(msg); - writel(FIVE_WORD_MSG_SIZE | SGL_OFFSET_0, &msg->u.head[0]); - writel(I2O_CMD_BLOCK_MLOCK << 24 | HOST_TID << 12 | dev->lct_data.tid, - &msg->u.head[1]); - writel(-1, &msg->body[0]); + msg->u.head[0] = cpu_to_le32(FIVE_WORD_MSG_SIZE | SGL_OFFSET_0); + msg->u.head[1] = + cpu_to_le32(I2O_CMD_BLOCK_MLOCK << 24 | HOST_TID << 12 | dev-> + lct_data.tid); + msg->body[0] = cpu_to_le32(-1); osm_debug("Locking...\n"); - return i2o_msg_post_wait(dev->iop, m, 2); + return i2o_msg_post_wait(dev->iop, msg, 2); }; /** @@ -238,20 +238,20 @@ static int i2o_block_device_lock(struct i2o_device *dev, u32 media_id) */ static int i2o_block_device_unlock(struct i2o_device *dev, u32 media_id) { - struct i2o_message __iomem *msg; - u32 m; + struct i2o_message *msg; - m = i2o_msg_get_wait(dev->iop, &msg, I2O_TIMEOUT_MESSAGE_GET); - if (m == I2O_QUEUE_EMPTY) - return -ETIMEDOUT; + msg = i2o_msg_get_wait(dev->iop, I2O_TIMEOUT_MESSAGE_GET); + if (IS_ERR(msg)) + return PTR_ERR(msg); - writel(FIVE_WORD_MSG_SIZE | SGL_OFFSET_0, &msg->u.head[0]); - writel(I2O_CMD_BLOCK_MUNLOCK << 24 | HOST_TID << 12 | dev->lct_data.tid, - &msg->u.head[1]); - writel(media_id, &msg->body[0]); + msg->u.head[0] = cpu_to_le32(FIVE_WORD_MSG_SIZE | SGL_OFFSET_0); + msg->u.head[1] = + cpu_to_le32(I2O_CMD_BLOCK_MUNLOCK << 24 | HOST_TID << 12 | dev-> + lct_data.tid); + msg->body[0] = cpu_to_le32(media_id); osm_debug("Unlocking...\n"); - return i2o_msg_post_wait(dev->iop, m, 2); + return i2o_msg_post_wait(dev->iop, msg, 2); }; /** @@ -267,21 +267,21 @@ static int i2o_block_device_power(struct i2o_block_device *dev, u8 op) { struct i2o_device *i2o_dev = dev->i2o_dev; struct i2o_controller *c = i2o_dev->iop; - struct i2o_message __iomem *msg; - u32 m; + struct i2o_message *msg; int rc; - m = i2o_msg_get_wait(c, &msg, I2O_TIMEOUT_MESSAGE_GET); - if (m == I2O_QUEUE_EMPTY) - return -ETIMEDOUT; + msg = i2o_msg_get_wait(c, I2O_TIMEOUT_MESSAGE_GET); + if (IS_ERR(msg)) + return PTR_ERR(msg); - writel(FOUR_WORD_MSG_SIZE | SGL_OFFSET_0, &msg->u.head[0]); - writel(I2O_CMD_BLOCK_POWER << 24 | HOST_TID << 12 | i2o_dev->lct_data. - tid, &msg->u.head[1]); - writel(op << 24, &msg->body[0]); + msg->u.head[0] = cpu_to_le32(FOUR_WORD_MSG_SIZE | SGL_OFFSET_0); + msg->u.head[1] = + cpu_to_le32(I2O_CMD_BLOCK_POWER << 24 | HOST_TID << 12 | i2o_dev-> + lct_data.tid); + msg->body[0] = cpu_to_le32(op << 24); osm_debug("Power...\n"); - rc = i2o_msg_post_wait(c, m, 60); + rc = i2o_msg_post_wait(c, msg, 60); if (!rc) dev->power = op; @@ -331,7 +331,7 @@ static inline void i2o_block_request_free(struct i2o_block_request *ireq) */ static inline int i2o_block_sglist_alloc(struct i2o_controller *c, struct i2o_block_request *ireq, - u32 __iomem ** mptr) + u32 ** mptr) { int nents; enum dma_data_direction direction; @@ -745,10 +745,9 @@ static int i2o_block_transfer(struct request *req) struct i2o_block_device *dev = req->rq_disk->private_data; struct i2o_controller *c; int tid = dev->i2o_dev->lct_data.tid; - struct i2o_message __iomem *msg; - u32 __iomem *mptr; + struct i2o_message *msg; + u32 *mptr; struct i2o_block_request *ireq = req->special; - u32 m; u32 tcntxt; u32 sgl_offset = SGL_OFFSET_8; u32 ctl_flags = 0x00000000; @@ -763,9 +762,9 @@ static int i2o_block_transfer(struct request *req) c = dev->i2o_dev->iop; - m = i2o_msg_get(c, &msg); - if (m == I2O_QUEUE_EMPTY) { - rc = -EBUSY; + msg = i2o_msg_get(c); + if (IS_ERR(msg)) { + rc = PTR_ERR(msg); goto exit; } @@ -775,8 +774,8 @@ static int i2o_block_transfer(struct request *req) goto nop_msg; } - writel(i2o_block_driver.context, &msg->u.s.icntxt); - writel(tcntxt, &msg->u.s.tcntxt); + msg->u.s.icntxt = cpu_to_le32(i2o_block_driver.context); + msg->u.s.tcntxt = cpu_to_le32(tcntxt); mptr = &msg->body[0]; @@ -834,11 +833,11 @@ static int i2o_block_transfer(struct request *req) sgl_offset = SGL_OFFSET_12; - writel(I2O_CMD_PRIVATE << 24 | HOST_TID << 12 | tid, - &msg->u.head[1]); + msg->u.head[1] = + cpu_to_le32(I2O_CMD_PRIVATE << 24 | HOST_TID << 12 | tid); - writel(I2O_VENDOR_DPT << 16 | I2O_CMD_SCSI_EXEC, mptr++); - writel(tid, mptr++); + *mptr++ = cpu_to_le32(I2O_VENDOR_DPT << 16 | I2O_CMD_SCSI_EXEC); + *mptr++ = cpu_to_le32(tid); /* * ENABLE_DISCONNECT @@ -853,22 +852,24 @@ static int i2o_block_transfer(struct request *req) scsi_flags = 0xa0a0000a; } - writel(scsi_flags, mptr++); + *mptr++ = cpu_to_le32(scsi_flags); *((u32 *) & cmd[2]) = cpu_to_be32(req->sector * hwsec); *((u16 *) & cmd[7]) = cpu_to_be16(req->nr_sectors * hwsec); - memcpy_toio(mptr, cmd, 10); + memcpy(mptr, cmd, 10); mptr += 4; - writel(req->nr_sectors << KERNEL_SECTOR_SHIFT, mptr++); + *mptr++ = cpu_to_le32(req->nr_sectors << KERNEL_SECTOR_SHIFT); } else #endif { - writel(cmd | HOST_TID << 12 | tid, &msg->u.head[1]); - writel(ctl_flags, mptr++); - writel(req->nr_sectors << KERNEL_SECTOR_SHIFT, mptr++); - writel((u32) (req->sector << KERNEL_SECTOR_SHIFT), mptr++); - writel(req->sector >> (32 - KERNEL_SECTOR_SHIFT), mptr++); + msg->u.head[1] = cpu_to_le32(cmd | HOST_TID << 12 | tid); + *mptr++ = cpu_to_le32(ctl_flags); + *mptr++ = cpu_to_le32(req->nr_sectors << KERNEL_SECTOR_SHIFT); + *mptr++ = + cpu_to_le32((u32) (req->sector << KERNEL_SECTOR_SHIFT)); + *mptr++ = + cpu_to_le32(req->sector >> (32 - KERNEL_SECTOR_SHIFT)); } if (!i2o_block_sglist_alloc(c, ireq, &mptr)) { @@ -876,13 +877,13 @@ static int i2o_block_transfer(struct request *req) goto context_remove; } - writel(I2O_MESSAGE_SIZE(mptr - &msg->u.head[0]) | - sgl_offset, &msg->u.head[0]); + msg->u.head[0] = + cpu_to_le32(I2O_MESSAGE_SIZE(mptr - &msg->u.head[0]) | sgl_offset); list_add_tail(&ireq->queue, &dev->open_queue); dev->open_queue_depth++; - i2o_msg_post(c, m); + i2o_msg_post(c, msg); return 0; @@ -890,7 +891,7 @@ static int i2o_block_transfer(struct request *req) i2o_cntxt_list_remove(c, req); nop_msg: - i2o_msg_nop(c, m); + i2o_msg_nop(c, msg); exit: return rc; diff --git a/drivers/message/i2o/i2o_config.c b/drivers/message/i2o/i2o_config.c index 3c3a7abebb1b..4fe73d628c5b 100644 --- a/drivers/message/i2o/i2o_config.c +++ b/drivers/message/i2o/i2o_config.c @@ -230,8 +230,7 @@ static int i2o_cfg_swdl(unsigned long arg) struct i2o_sw_xfer __user *pxfer = (struct i2o_sw_xfer __user *)arg; unsigned char maxfrag = 0, curfrag = 1; struct i2o_dma buffer; - struct i2o_message __iomem *msg; - u32 m; + struct i2o_message *msg; unsigned int status = 0, swlen = 0, fragsize = 8192; struct i2o_controller *c; @@ -257,31 +256,34 @@ static int i2o_cfg_swdl(unsigned long arg) if (!c) return -ENXIO; - m = i2o_msg_get_wait(c, &msg, I2O_TIMEOUT_MESSAGE_GET); - if (m == I2O_QUEUE_EMPTY) - return -EBUSY; + msg = i2o_msg_get_wait(c, I2O_TIMEOUT_MESSAGE_GET); + if (IS_ERR(msg)) + return PTR_ERR(msg); if (i2o_dma_alloc(&c->pdev->dev, &buffer, fragsize, GFP_KERNEL)) { - i2o_msg_nop(c, m); + i2o_msg_nop(c, msg); return -ENOMEM; } __copy_from_user(buffer.virt, kxfer.buf, fragsize); - writel(NINE_WORD_MSG_SIZE | SGL_OFFSET_7, &msg->u.head[0]); - writel(I2O_CMD_SW_DOWNLOAD << 24 | HOST_TID << 12 | ADAPTER_TID, - &msg->u.head[1]); - writel(i2o_config_driver.context, &msg->u.head[2]); - writel(0, &msg->u.head[3]); - writel((((u32) kxfer.flags) << 24) | (((u32) kxfer.sw_type) << 16) | - (((u32) maxfrag) << 8) | (((u32) curfrag)), &msg->body[0]); - writel(swlen, &msg->body[1]); - writel(kxfer.sw_id, &msg->body[2]); - writel(0xD0000000 | fragsize, &msg->body[3]); - writel(buffer.phys, &msg->body[4]); + msg->u.head[0] = cpu_to_le32(NINE_WORD_MSG_SIZE | SGL_OFFSET_7); + msg->u.head[1] = + cpu_to_le32(I2O_CMD_SW_DOWNLOAD << 24 | HOST_TID << 12 | + ADAPTER_TID); + msg->u.head[2] = cpu_to_le32(i2o_config_driver.context); + msg->u.head[3] = cpu_to_le32(0); + msg->body[0] = + cpu_to_le32((((u32) kxfer.flags) << 24) | (((u32) kxfer. + sw_type) << 16) | + (((u32) maxfrag) << 8) | (((u32) curfrag))); + msg->body[1] = cpu_to_le32(swlen); + msg->body[2] = cpu_to_le32(kxfer.sw_id); + msg->body[3] = cpu_to_le32(0xD0000000 | fragsize); + msg->body[4] = cpu_to_le32(buffer.phys); osm_debug("swdl frag %d/%d (size %d)\n", curfrag, maxfrag, fragsize); - status = i2o_msg_post_wait_mem(c, m, 60, &buffer); + status = i2o_msg_post_wait_mem(c, msg, 60, &buffer); if (status != -ETIMEDOUT) i2o_dma_free(&c->pdev->dev, &buffer); @@ -302,8 +304,7 @@ static int i2o_cfg_swul(unsigned long arg) struct i2o_sw_xfer __user *pxfer = (struct i2o_sw_xfer __user *)arg; unsigned char maxfrag = 0, curfrag = 1; struct i2o_dma buffer; - struct i2o_message __iomem *msg; - u32 m; + struct i2o_message *msg; unsigned int status = 0, swlen = 0, fragsize = 8192; struct i2o_controller *c; int ret = 0; @@ -330,30 +331,30 @@ static int i2o_cfg_swul(unsigned long arg) if (!c) return -ENXIO; - m = i2o_msg_get_wait(c, &msg, I2O_TIMEOUT_MESSAGE_GET); - if (m == I2O_QUEUE_EMPTY) - return -EBUSY; + msg = i2o_msg_get_wait(c, I2O_TIMEOUT_MESSAGE_GET); + if (IS_ERR(msg)) + return PTR_ERR(msg); if (i2o_dma_alloc(&c->pdev->dev, &buffer, fragsize, GFP_KERNEL)) { - i2o_msg_nop(c, m); + i2o_msg_nop(c, msg); return -ENOMEM; } - writel(NINE_WORD_MSG_SIZE | SGL_OFFSET_7, &msg->u.head[0]); - writel(I2O_CMD_SW_UPLOAD << 24 | HOST_TID << 12 | ADAPTER_TID, - &msg->u.head[1]); - writel(i2o_config_driver.context, &msg->u.head[2]); - writel(0, &msg->u.head[3]); - writel((u32) kxfer.flags << 24 | (u32) kxfer. - sw_type << 16 | (u32) maxfrag << 8 | (u32) curfrag, - &msg->body[0]); - writel(swlen, &msg->body[1]); - writel(kxfer.sw_id, &msg->body[2]); - writel(0xD0000000 | fragsize, &msg->body[3]); - writel(buffer.phys, &msg->body[4]); + msg->u.head[0] = cpu_to_le32(NINE_WORD_MSG_SIZE | SGL_OFFSET_7); + msg->u.head[1] = + cpu_to_le32(I2O_CMD_SW_UPLOAD << 24 | HOST_TID << 12 | ADAPTER_TID); + msg->u.head[2] = cpu_to_le32(i2o_config_driver.context); + msg->u.head[3] = cpu_to_le32(0); + msg->body[0] = + cpu_to_le32((u32) kxfer.flags << 24 | (u32) kxfer. + sw_type << 16 | (u32) maxfrag << 8 | (u32) curfrag); + msg->body[1] = cpu_to_le32(swlen); + msg->body[2] = cpu_to_le32(kxfer.sw_id); + msg->body[3] = cpu_to_le32(0xD0000000 | fragsize); + msg->body[4] = cpu_to_le32(buffer.phys); osm_debug("swul frag %d/%d (size %d)\n", curfrag, maxfrag, fragsize); - status = i2o_msg_post_wait_mem(c, m, 60, &buffer); + status = i2o_msg_post_wait_mem(c, msg, 60, &buffer); if (status != I2O_POST_WAIT_OK) { if (status != -ETIMEDOUT) @@ -380,8 +381,7 @@ static int i2o_cfg_swdel(unsigned long arg) struct i2o_controller *c; struct i2o_sw_xfer kxfer; struct i2o_sw_xfer __user *pxfer = (struct i2o_sw_xfer __user *)arg; - struct i2o_message __iomem *msg; - u32 m; + struct i2o_message *msg; unsigned int swlen; int token; @@ -395,21 +395,21 @@ static int i2o_cfg_swdel(unsigned long arg) if (!c) return -ENXIO; - m = i2o_msg_get_wait(c, &msg, I2O_TIMEOUT_MESSAGE_GET); - if (m == I2O_QUEUE_EMPTY) - return -EBUSY; + msg = i2o_msg_get_wait(c, I2O_TIMEOUT_MESSAGE_GET); + if (IS_ERR(msg)) + return PTR_ERR(msg); - writel(SEVEN_WORD_MSG_SIZE | SGL_OFFSET_0, &msg->u.head[0]); - writel(I2O_CMD_SW_REMOVE << 24 | HOST_TID << 12 | ADAPTER_TID, - &msg->u.head[1]); - writel(i2o_config_driver.context, &msg->u.head[2]); - writel(0, &msg->u.head[3]); - writel((u32) kxfer.flags << 24 | (u32) kxfer.sw_type << 16, - &msg->body[0]); - writel(swlen, &msg->body[1]); - writel(kxfer.sw_id, &msg->body[2]); + msg->u.head[0] = cpu_to_le32(SEVEN_WORD_MSG_SIZE | SGL_OFFSET_0); + msg->u.head[1] = + cpu_to_le32(I2O_CMD_SW_REMOVE << 24 | HOST_TID << 12 | ADAPTER_TID); + msg->u.head[2] = cpu_to_le32(i2o_config_driver.context); + msg->u.head[3] = cpu_to_le32(0); + msg->body[0] = + cpu_to_le32((u32) kxfer.flags << 24 | (u32) kxfer.sw_type << 16); + msg->body[1] = cpu_to_le32(swlen); + msg->body[2] = cpu_to_le32(kxfer.sw_id); - token = i2o_msg_post_wait(c, m, 10); + token = i2o_msg_post_wait(c, msg, 10); if (token != I2O_POST_WAIT_OK) { osm_info("swdel failed, DetailedStatus = %d\n", token); @@ -423,25 +423,24 @@ static int i2o_cfg_validate(unsigned long arg) { int token; int iop = (int)arg; - struct i2o_message __iomem *msg; - u32 m; + struct i2o_message *msg; struct i2o_controller *c; c = i2o_find_iop(iop); if (!c) return -ENXIO; - m = i2o_msg_get_wait(c, &msg, I2O_TIMEOUT_MESSAGE_GET); - if (m == I2O_QUEUE_EMPTY) - return -EBUSY; + msg = i2o_msg_get_wait(c, I2O_TIMEOUT_MESSAGE_GET); + if (IS_ERR(msg)) + return PTR_ERR(msg); - writel(FOUR_WORD_MSG_SIZE | SGL_OFFSET_0, &msg->u.head[0]); - writel(I2O_CMD_CONFIG_VALIDATE << 24 | HOST_TID << 12 | iop, - &msg->u.head[1]); - writel(i2o_config_driver.context, &msg->u.head[2]); - writel(0, &msg->u.head[3]); + msg->u.head[0] = cpu_to_le32(FOUR_WORD_MSG_SIZE | SGL_OFFSET_0); + msg->u.head[1] = + cpu_to_le32(I2O_CMD_CONFIG_VALIDATE << 24 | HOST_TID << 12 | iop); + msg->u.head[2] = cpu_to_le32(i2o_config_driver.context); + msg->u.head[3] = cpu_to_le32(0); - token = i2o_msg_post_wait(c, m, 10); + token = i2o_msg_post_wait(c, msg, 10); if (token != I2O_POST_WAIT_OK) { osm_info("Can't validate configuration, ErrorStatus = %d\n", @@ -454,8 +453,7 @@ static int i2o_cfg_validate(unsigned long arg) static int i2o_cfg_evt_reg(unsigned long arg, struct file *fp) { - struct i2o_message __iomem *msg; - u32 m; + struct i2o_message *msg; struct i2o_evt_id __user *pdesc = (struct i2o_evt_id __user *)arg; struct i2o_evt_id kdesc; struct i2o_controller *c; @@ -474,18 +472,19 @@ static int i2o_cfg_evt_reg(unsigned long arg, struct file *fp) if (!d) return -ENODEV; - m = i2o_msg_get_wait(c, &msg, I2O_TIMEOUT_MESSAGE_GET); - if (m == I2O_QUEUE_EMPTY) - return -EBUSY; + msg = i2o_msg_get_wait(c, I2O_TIMEOUT_MESSAGE_GET); + if (IS_ERR(msg)) + return PTR_ERR(msg); - writel(FOUR_WORD_MSG_SIZE | SGL_OFFSET_0, &msg->u.head[0]); - writel(I2O_CMD_UTIL_EVT_REGISTER << 24 | HOST_TID << 12 | kdesc.tid, - &msg->u.head[1]); - writel(i2o_config_driver.context, &msg->u.head[2]); - writel(i2o_cntxt_list_add(c, fp->private_data), &msg->u.head[3]); - writel(kdesc.evt_mask, &msg->body[0]); + msg->u.head[0] = cpu_to_le32(FOUR_WORD_MSG_SIZE | SGL_OFFSET_0); + msg->u.head[1] = + cpu_to_le32(I2O_CMD_UTIL_EVT_REGISTER << 24 | HOST_TID << 12 | + kdesc.tid); + msg->u.head[2] = cpu_to_le32(i2o_config_driver.context); + msg->u.head[3] = cpu_to_le32(i2o_cntxt_list_add(c, fp->private_data)); + msg->body[0] = cpu_to_le32(kdesc.evt_mask); - i2o_msg_post(c, m); + i2o_msg_post(c, msg); return 0; } @@ -537,7 +536,6 @@ static int i2o_cfg_passthru32(struct file *file, unsigned cmnd, u32 sg_index = 0; i2o_status_block *sb; struct i2o_message *msg; - u32 m; unsigned int iop; cmd = (struct i2o_cmd_passthru32 __user *)arg; @@ -553,7 +551,7 @@ static int i2o_cfg_passthru32(struct file *file, unsigned cmnd, return -ENXIO; } - m = i2o_msg_get_wait(c, &msg, I2O_TIMEOUT_MESSAGE_GET); + msg = i2o_msg_get_wait(c, I2O_TIMEOUT_MESSAGE_GET); sb = c->status_block.virt; @@ -595,8 +593,8 @@ static int i2o_cfg_passthru32(struct file *file, unsigned cmnd, sg_offset = (msg->u.head[0] >> 4) & 0x0f; - writel(i2o_config_driver.context, &msg->u.s.icntxt); - writel(i2o_cntxt_list_add(c, reply), &msg->u.s.tcntxt); + msg->u.s.icntxt = cpu_to_le32(i2o_config_driver.context); + msg->u.s.tcntxt = cpu_to_le32(i2o_cntxt_list_add(c, reply)); memset(sg_list, 0, sizeof(sg_list[0]) * SG_TABLESIZE); if (sg_offset) { @@ -662,7 +660,7 @@ static int i2o_cfg_passthru32(struct file *file, unsigned cmnd, } } - rcode = i2o_msg_post_wait(c, m, 60); + rcode = i2o_msg_post_wait(c, msg, 60); if (rcode) goto sg_list_cleanup; @@ -780,8 +778,7 @@ static int i2o_cfg_passthru(unsigned long arg) u32 i = 0; void *p = NULL; i2o_status_block *sb; - struct i2o_message __iomem *msg; - u32 m; + struct i2o_message *msg; unsigned int iop; if (get_user(iop, &cmd->iop) || get_user(user_msg, &cmd->msg)) @@ -793,7 +790,7 @@ static int i2o_cfg_passthru(unsigned long arg) return -ENXIO; } - m = i2o_msg_get_wait(c, &msg, I2O_TIMEOUT_MESSAGE_GET); + msg = i2o_msg_get_wait(c, I2O_TIMEOUT_MESSAGE_GET); sb = c->status_block.virt; @@ -830,8 +827,8 @@ static int i2o_cfg_passthru(unsigned long arg) sg_offset = (msg->u.head[0] >> 4) & 0x0f; - writel(i2o_config_driver.context, &msg->u.s.icntxt); - writel(i2o_cntxt_list_add(c, reply), &msg->u.s.tcntxt); + msg->u.s.icntxt = cpu_to_le32(i2o_config_driver.context); + msg->u.s.tcntxt = cpu_to_le32(i2o_cntxt_list_add(c, reply)); memset(sg_list, 0, sizeof(sg_list[0]) * SG_TABLESIZE); if (sg_offset) { @@ -894,7 +891,7 @@ static int i2o_cfg_passthru(unsigned long arg) } } - rcode = i2o_msg_post_wait(c, m, 60); + rcode = i2o_msg_post_wait(c, msg, 60); if (rcode) goto sg_list_cleanup; diff --git a/drivers/message/i2o/i2o_scsi.c b/drivers/message/i2o/i2o_scsi.c index 9f1744c3933b..7a784fd60804 100644 --- a/drivers/message/i2o/i2o_scsi.c +++ b/drivers/message/i2o/i2o_scsi.c @@ -510,8 +510,7 @@ static int i2o_scsi_queuecommand(struct scsi_cmnd *SCpnt, struct i2o_controller *c; struct i2o_device *i2o_dev; int tid; - struct i2o_message __iomem *msg; - u32 m; + struct i2o_message *msg; /* * ENABLE_DISCONNECT * SIMPLE_TAG @@ -519,7 +518,7 @@ static int i2o_scsi_queuecommand(struct scsi_cmnd *SCpnt, */ u32 scsi_flags = 0x20a00000; u32 sgl_offset; - u32 __iomem *mptr; + u32 *mptr; u32 cmd = I2O_CMD_SCSI_EXEC << 24; int rc = 0; @@ -576,8 +575,8 @@ static int i2o_scsi_queuecommand(struct scsi_cmnd *SCpnt, * throw it back to the scsi layer */ - m = i2o_msg_get_wait(c, &msg, I2O_TIMEOUT_MESSAGE_GET); - if (m == I2O_QUEUE_EMPTY) { + msg = i2o_msg_get(c); + if (IS_ERR(msg)) { rc = SCSI_MLQUEUE_HOST_BUSY; goto exit; } @@ -617,16 +616,16 @@ static int i2o_scsi_queuecommand(struct scsi_cmnd *SCpnt, if (sgl_offset == SGL_OFFSET_10) sgl_offset = SGL_OFFSET_12; cmd = I2O_CMD_PRIVATE << 24; - writel(I2O_VENDOR_DPT << 16 | I2O_CMD_SCSI_EXEC, mptr++); - writel(adpt_flags | tid, mptr++); + *mptr++ = cpu_to_le32(I2O_VENDOR_DPT << 16 | I2O_CMD_SCSI_EXEC); + *mptr++ = cpu_to_le32(adpt_flags | tid); } #endif - writel(cmd | HOST_TID << 12 | tid, &msg->u.head[1]); - writel(i2o_scsi_driver.context, &msg->u.s.icntxt); + msg->u.head[1] = cpu_to_le32(cmd | HOST_TID << 12 | tid); + msg->u.s.icntxt = cpu_to_le32(i2o_scsi_driver.context); /* We want the SCSI control block back */ - writel(i2o_cntxt_list_add(c, SCpnt), &msg->u.s.tcntxt); + msg->u.s.tcntxt = cpu_to_le32(i2o_cntxt_list_add(c, SCpnt)); /* LSI_920_PCI_QUIRK * @@ -649,15 +648,15 @@ static int i2o_scsi_queuecommand(struct scsi_cmnd *SCpnt, } */ - writel(scsi_flags | SCpnt->cmd_len, mptr++); + *mptr++ = cpu_to_le32(scsi_flags | SCpnt->cmd_len); /* Write SCSI command into the message - always 16 byte block */ - memcpy_toio(mptr, SCpnt->cmnd, 16); + memcpy(mptr, SCpnt->cmnd, 16); mptr += 4; if (sgl_offset != SGL_OFFSET_0) { /* write size of data addressed by SGL */ - writel(SCpnt->request_bufflen, mptr++); + *mptr++ = cpu_to_le32(SCpnt->request_bufflen); /* Now fill in the SGList and command */ if (SCpnt->use_sg) { @@ -676,11 +675,11 @@ static int i2o_scsi_queuecommand(struct scsi_cmnd *SCpnt, } /* Stick the headers on */ - writel(I2O_MESSAGE_SIZE(mptr - &msg->u.head[0]) | sgl_offset, - &msg->u.head[0]); + msg->u.head[0] = + cpu_to_le32(I2O_MESSAGE_SIZE(mptr - &msg->u.head[0]) | sgl_offset); /* Queue the message */ - i2o_msg_post(c, m); + i2o_msg_post(c, msg); osm_debug("Issued %ld\n", SCpnt->serial_number); @@ -688,7 +687,7 @@ static int i2o_scsi_queuecommand(struct scsi_cmnd *SCpnt, nomem: rc = -ENOMEM; - i2o_msg_nop(c, m); + i2o_msg_nop(c, msg); exit: return rc; @@ -709,8 +708,7 @@ static int i2o_scsi_abort(struct scsi_cmnd *SCpnt) { struct i2o_device *i2o_dev; struct i2o_controller *c; - struct i2o_message __iomem *msg; - u32 m; + struct i2o_message *msg; int tid; int status = FAILED; @@ -720,16 +718,16 @@ static int i2o_scsi_abort(struct scsi_cmnd *SCpnt) c = i2o_dev->iop; tid = i2o_dev->lct_data.tid; - m = i2o_msg_get_wait(c, &msg, I2O_TIMEOUT_MESSAGE_GET); - if (m == I2O_QUEUE_EMPTY) + msg = i2o_msg_get_wait(c, I2O_TIMEOUT_MESSAGE_GET); + if (IS_ERR(msg)) return SCSI_MLQUEUE_HOST_BUSY; - writel(FIVE_WORD_MSG_SIZE | SGL_OFFSET_0, &msg->u.head[0]); - writel(I2O_CMD_SCSI_ABORT << 24 | HOST_TID << 12 | tid, - &msg->u.head[1]); - writel(i2o_cntxt_list_get_ptr(c, SCpnt), &msg->body[0]); + msg->u.head[0] = cpu_to_le32(FIVE_WORD_MSG_SIZE | SGL_OFFSET_0); + msg->u.head[1] = + cpu_to_le32(I2O_CMD_SCSI_ABORT << 24 | HOST_TID << 12 | tid); + msg->body[0] = cpu_to_le32(i2o_cntxt_list_get_ptr(c, SCpnt)); - if (i2o_msg_post_wait(c, m, I2O_TIMEOUT_SCSI_SCB_ABORT)) + if (i2o_msg_post_wait(c, msg, I2O_TIMEOUT_SCSI_SCB_ABORT)) status = SUCCESS; return status; diff --git a/drivers/message/i2o/iop.c b/drivers/message/i2o/iop.c index 4eb53258842e..f86abb42bf89 100644 --- a/drivers/message/i2o/iop.c +++ b/drivers/message/i2o/iop.c @@ -46,27 +46,6 @@ static struct i2o_dma i2o_systab; static int i2o_hrt_get(struct i2o_controller *c); -/** - * i2o_msg_nop - Returns a message which is not used - * @c: I2O controller from which the message was created - * @m: message which should be returned - * - * If you fetch a message via i2o_msg_get, and can't use it, you must - * return the message with this function. Otherwise the message frame - * is lost. - */ -void i2o_msg_nop(struct i2o_controller *c, u32 m) -{ - struct i2o_message __iomem *msg = i2o_msg_in_to_virt(c, m); - - writel(THREE_WORD_MSG_SIZE | SGL_OFFSET_0, &msg->u.head[0]); - writel(I2O_CMD_UTIL_NOP << 24 | HOST_TID << 12 | ADAPTER_TID, - &msg->u.head[1]); - writel(0, &msg->u.head[2]); - writel(0, &msg->u.head[3]); - i2o_msg_post(c, m); -}; - /** * i2o_msg_get_wait - obtain an I2O message from the IOP * @c: I2O controller @@ -81,22 +60,21 @@ void i2o_msg_nop(struct i2o_controller *c, u32 m) * address from the read port (see the i2o spec). If no message is * available returns I2O_QUEUE_EMPTY and msg is leaved untouched. */ -u32 i2o_msg_get_wait(struct i2o_controller *c, - struct i2o_message __iomem ** msg, int wait) +struct i2o_message *i2o_msg_get_wait(struct i2o_controller *c, int wait) { unsigned long timeout = jiffies + wait * HZ; - u32 m; + struct i2o_message *msg; - while ((m = i2o_msg_get(c, msg)) == I2O_QUEUE_EMPTY) { + while (IS_ERR(msg = i2o_msg_get(c))) { if (time_after(jiffies, timeout)) { osm_debug("%s: Timeout waiting for message frame.\n", c->name); - return I2O_QUEUE_EMPTY; + return ERR_PTR(-ETIMEDOUT); } schedule_timeout_uninterruptible(1); } - return m; + return msg; }; #if BITS_PER_LONG == 64 @@ -301,8 +279,7 @@ struct i2o_device *i2o_iop_find_device(struct i2o_controller *c, u16 tid) */ static int i2o_iop_quiesce(struct i2o_controller *c) { - struct i2o_message __iomem *msg; - u32 m; + struct i2o_message *msg; i2o_status_block *sb = c->status_block.virt; int rc; @@ -313,16 +290,17 @@ static int i2o_iop_quiesce(struct i2o_controller *c) (sb->iop_state != ADAPTER_STATE_OPERATIONAL)) return 0; - m = i2o_msg_get_wait(c, &msg, I2O_TIMEOUT_MESSAGE_GET); - if (m == I2O_QUEUE_EMPTY) - return -ETIMEDOUT; + msg = i2o_msg_get_wait(c, I2O_TIMEOUT_MESSAGE_GET); + if (IS_ERR(msg)) + return PTR_ERR(msg); - writel(FOUR_WORD_MSG_SIZE | SGL_OFFSET_0, &msg->u.head[0]); - writel(I2O_CMD_SYS_QUIESCE << 24 | HOST_TID << 12 | ADAPTER_TID, - &msg->u.head[1]); + msg->u.head[0] = cpu_to_le32(FOUR_WORD_MSG_SIZE | SGL_OFFSET_0); + msg->u.head[1] = + cpu_to_le32(I2O_CMD_SYS_QUIESCE << 24 | HOST_TID << 12 | + ADAPTER_TID); /* Long timeout needed for quiesce if lots of devices */ - if ((rc = i2o_msg_post_wait(c, m, 240))) + if ((rc = i2o_msg_post_wait(c, msg, 240))) osm_info("%s: Unable to quiesce (status=%#x).\n", c->name, -rc); else osm_debug("%s: Quiesced.\n", c->name); @@ -342,8 +320,7 @@ static int i2o_iop_quiesce(struct i2o_controller *c) */ static int i2o_iop_enable(struct i2o_controller *c) { - struct i2o_message __iomem *msg; - u32 m; + struct i2o_message *msg; i2o_status_block *sb = c->status_block.virt; int rc; @@ -353,16 +330,17 @@ static int i2o_iop_enable(struct i2o_controller *c) if (sb->iop_state != ADAPTER_STATE_READY) return -EINVAL; - m = i2o_msg_get_wait(c, &msg, I2O_TIMEOUT_MESSAGE_GET); - if (m == I2O_QUEUE_EMPTY) - return -ETIMEDOUT; + msg = i2o_msg_get_wait(c, I2O_TIMEOUT_MESSAGE_GET); + if (IS_ERR(msg)) + return PTR_ERR(msg); - writel(FOUR_WORD_MSG_SIZE | SGL_OFFSET_0, &msg->u.head[0]); - writel(I2O_CMD_SYS_ENABLE << 24 | HOST_TID << 12 | ADAPTER_TID, - &msg->u.head[1]); + msg->u.head[0] = cpu_to_le32(FOUR_WORD_MSG_SIZE | SGL_OFFSET_0); + msg->u.head[1] = + cpu_to_le32(I2O_CMD_SYS_ENABLE << 24 | HOST_TID << 12 | + ADAPTER_TID); /* How long of a timeout do we need? */ - if ((rc = i2o_msg_post_wait(c, m, 240))) + if ((rc = i2o_msg_post_wait(c, msg, 240))) osm_err("%s: Could not enable (status=%#x).\n", c->name, -rc); else osm_debug("%s: Enabled.\n", c->name); @@ -413,22 +391,22 @@ static inline void i2o_iop_enable_all(void) */ static int i2o_iop_clear(struct i2o_controller *c) { - struct i2o_message __iomem *msg; - u32 m; + struct i2o_message *msg; int rc; - m = i2o_msg_get_wait(c, &msg, I2O_TIMEOUT_MESSAGE_GET); - if (m == I2O_QUEUE_EMPTY) - return -ETIMEDOUT; + msg = i2o_msg_get_wait(c, I2O_TIMEOUT_MESSAGE_GET); + if (IS_ERR(msg)) + return PTR_ERR(msg); /* Quiesce all IOPs first */ i2o_iop_quiesce_all(); - writel(FOUR_WORD_MSG_SIZE | SGL_OFFSET_0, &msg->u.head[0]); - writel(I2O_CMD_ADAPTER_CLEAR << 24 | HOST_TID << 12 | ADAPTER_TID, - &msg->u.head[1]); + msg->u.head[0] = cpu_to_le32(FOUR_WORD_MSG_SIZE | SGL_OFFSET_0); + msg->u.head[1] = + cpu_to_le32(I2O_CMD_ADAPTER_CLEAR << 24 | HOST_TID << 12 | + ADAPTER_TID); - if ((rc = i2o_msg_post_wait(c, m, 30))) + if ((rc = i2o_msg_post_wait(c, msg, 30))) osm_info("%s: Unable to clear (status=%#x).\n", c->name, -rc); else osm_debug("%s: Cleared.\n", c->name); @@ -446,13 +424,13 @@ static int i2o_iop_clear(struct i2o_controller *c) * Clear and (re)initialize IOP's outbound queue and post the message * frames to the IOP. * - * Returns 0 on success or a negative errno code on failure. + * Returns 0 on success or negative error code on failure. */ static int i2o_iop_init_outbound_queue(struct i2o_controller *c) { - volatile u8 *status = c->status.virt; u32 m; - struct i2o_message __iomem *msg; + volatile u8 *status = c->status.virt; + struct i2o_message *msg; ulong timeout; int i; @@ -460,23 +438,24 @@ static int i2o_iop_init_outbound_queue(struct i2o_controller *c) memset(c->status.virt, 0, 4); - m = i2o_msg_get_wait(c, &msg, I2O_TIMEOUT_MESSAGE_GET); - if (m == I2O_QUEUE_EMPTY) - return -ETIMEDOUT; - - writel(EIGHT_WORD_MSG_SIZE | SGL_OFFSET_6, &msg->u.head[0]); - writel(I2O_CMD_OUTBOUND_INIT << 24 | HOST_TID << 12 | ADAPTER_TID, - &msg->u.head[1]); - writel(i2o_exec_driver.context, &msg->u.s.icntxt); - writel(0x00000000, &msg->u.s.tcntxt); - writel(PAGE_SIZE, &msg->body[0]); + msg = i2o_msg_get_wait(c, I2O_TIMEOUT_MESSAGE_GET); + if (IS_ERR(msg)) + return PTR_ERR(msg); + + msg->u.head[0] = cpu_to_le32(EIGHT_WORD_MSG_SIZE | SGL_OFFSET_6); + msg->u.head[1] = + cpu_to_le32(I2O_CMD_OUTBOUND_INIT << 24 | HOST_TID << 12 | + ADAPTER_TID); + msg->u.s.icntxt = cpu_to_le32(i2o_exec_driver.context); + msg->u.s.tcntxt = cpu_to_le32(0x00000000); + msg->body[0] = cpu_to_le32(PAGE_SIZE); /* Outbound msg frame size in words and Initcode */ - writel(I2O_OUTBOUND_MSG_FRAME_SIZE << 16 | 0x80, &msg->body[1]); - writel(0xd0000004, &msg->body[2]); - writel(i2o_dma_low(c->status.phys), &msg->body[3]); - writel(i2o_dma_high(c->status.phys), &msg->body[4]); + msg->body[1] = cpu_to_le32(I2O_OUTBOUND_MSG_FRAME_SIZE << 16 | 0x80); + msg->body[2] = cpu_to_le32(0xd0000004); + msg->body[3] = cpu_to_le32(i2o_dma_low(c->status.phys)); + msg->body[4] = cpu_to_le32(i2o_dma_high(c->status.phys)); - i2o_msg_post(c, m); + i2o_msg_post(c, msg); timeout = jiffies + I2O_TIMEOUT_INIT_OUTBOUND_QUEUE * HZ; while (*status <= I2O_CMD_IN_PROGRESS) { @@ -511,34 +490,34 @@ static int i2o_iop_init_outbound_queue(struct i2o_controller *c) static int i2o_iop_reset(struct i2o_controller *c) { volatile u8 *status = c->status.virt; - struct i2o_message __iomem *msg; - u32 m; + struct i2o_message *msg; unsigned long timeout; i2o_status_block *sb = c->status_block.virt; int rc = 0; osm_debug("%s: Resetting controller\n", c->name); - m = i2o_msg_get_wait(c, &msg, I2O_TIMEOUT_MESSAGE_GET); - if (m == I2O_QUEUE_EMPTY) - return -ETIMEDOUT; + msg = i2o_msg_get_wait(c, I2O_TIMEOUT_MESSAGE_GET); + if (IS_ERR(msg)) + return PTR_ERR(msg); memset(c->status_block.virt, 0, 8); /* Quiesce all IOPs first */ i2o_iop_quiesce_all(); - writel(EIGHT_WORD_MSG_SIZE | SGL_OFFSET_0, &msg->u.head[0]); - writel(I2O_CMD_ADAPTER_RESET << 24 | HOST_TID << 12 | ADAPTER_TID, - &msg->u.head[1]); - writel(i2o_exec_driver.context, &msg->u.s.icntxt); - writel(0, &msg->u.s.tcntxt); //FIXME: use reasonable transaction context - writel(0, &msg->body[0]); - writel(0, &msg->body[1]); - writel(i2o_dma_low(c->status.phys), &msg->body[2]); - writel(i2o_dma_high(c->status.phys), &msg->body[3]); + msg->u.head[0] = cpu_to_le32(EIGHT_WORD_MSG_SIZE | SGL_OFFSET_0); + msg->u.head[1] = + cpu_to_le32(I2O_CMD_ADAPTER_RESET << 24 | HOST_TID << 12 | + ADAPTER_TID); + msg->u.s.icntxt = cpu_to_le32(i2o_exec_driver.context); + msg->u.s.tcntxt = cpu_to_le32(0x00000000); + msg->body[0] = cpu_to_le32(0x00000000); + msg->body[1] = cpu_to_le32(0x00000000); + msg->body[2] = cpu_to_le32(i2o_dma_low(c->status.phys)); + msg->body[3] = cpu_to_le32(i2o_dma_high(c->status.phys)); - i2o_msg_post(c, m); + i2o_msg_post(c, msg); /* Wait for a reply */ timeout = jiffies + I2O_TIMEOUT_RESET * HZ; @@ -567,18 +546,15 @@ static int i2o_iop_reset(struct i2o_controller *c) osm_debug("%s: Reset in progress, waiting for reboot...\n", c->name); - m = i2o_msg_get_wait(c, &msg, I2O_TIMEOUT_RESET); - while (m == I2O_QUEUE_EMPTY) { + while (IS_ERR(msg = i2o_msg_get_wait(c, I2O_TIMEOUT_RESET))) { if (time_after(jiffies, timeout)) { osm_err("%s: IOP reset timeout.\n", c->name); - rc = -ETIMEDOUT; + rc = PTR_ERR(msg); goto exit; } schedule_timeout_uninterruptible(1); - - m = i2o_msg_get_wait(c, &msg, I2O_TIMEOUT_RESET); } - i2o_msg_nop(c, m); + i2o_msg_nop(c, msg); /* from here all quiesce commands are safe */ c->no_quiesce = 0; @@ -686,8 +662,7 @@ static int i2o_iop_activate(struct i2o_controller *c) */ static int i2o_iop_systab_set(struct i2o_controller *c) { - struct i2o_message __iomem *msg; - u32 m; + struct i2o_message *msg; i2o_status_block *sb = c->status_block.virt; struct device *dev = &c->pdev->dev; struct resource *root; @@ -735,20 +710,21 @@ static int i2o_iop_systab_set(struct i2o_controller *c) } } - m = i2o_msg_get_wait(c, &msg, I2O_TIMEOUT_MESSAGE_GET); - if (m == I2O_QUEUE_EMPTY) - return -ETIMEDOUT; + msg = i2o_msg_get_wait(c, I2O_TIMEOUT_MESSAGE_GET); + if (IS_ERR(msg)) + return PTR_ERR(msg); i2o_systab.phys = dma_map_single(dev, i2o_systab.virt, i2o_systab.len, PCI_DMA_TODEVICE); if (!i2o_systab.phys) { - i2o_msg_nop(c, m); + i2o_msg_nop(c, msg); return -ENOMEM; } - writel(I2O_MESSAGE_SIZE(12) | SGL_OFFSET_6, &msg->u.head[0]); - writel(I2O_CMD_SYS_TAB_SET << 24 | HOST_TID << 12 | ADAPTER_TID, - &msg->u.head[1]); + msg->u.head[0] = cpu_to_le32(I2O_MESSAGE_SIZE(12) | SGL_OFFSET_6); + msg->u.head[1] = + cpu_to_le32(I2O_CMD_SYS_TAB_SET << 24 | HOST_TID << 12 | + ADAPTER_TID); /* * Provide three SGL-elements: @@ -760,16 +736,16 @@ static int i2o_iop_systab_set(struct i2o_controller *c) * same table to everyone. We have to go remap it for them all */ - writel(c->unit + 2, &msg->body[0]); - writel(0, &msg->body[1]); - writel(0x54000000 | i2o_systab.len, &msg->body[2]); - writel(i2o_systab.phys, &msg->body[3]); - writel(0x54000000 | sb->current_mem_size, &msg->body[4]); - writel(sb->current_mem_base, &msg->body[5]); - writel(0xd4000000 | sb->current_io_size, &msg->body[6]); - writel(sb->current_io_base, &msg->body[6]); + msg->body[0] = cpu_to_le32(c->unit + 2); + msg->body[1] = cpu_to_le32(0x00000000); + msg->body[2] = cpu_to_le32(0x54000000 | i2o_systab.len); + msg->body[3] = cpu_to_le32(i2o_systab.phys); + msg->body[4] = cpu_to_le32(0x54000000 | sb->current_mem_size); + msg->body[5] = cpu_to_le32(sb->current_mem_base); + msg->body[6] = cpu_to_le32(0xd4000000 | sb->current_io_size); + msg->body[6] = cpu_to_le32(sb->current_io_base); - rc = i2o_msg_post_wait(c, m, 120); + rc = i2o_msg_post_wait(c, msg, 120); dma_unmap_single(dev, i2o_systab.phys, i2o_systab.len, PCI_DMA_TODEVICE); @@ -952,30 +928,30 @@ static int i2o_parse_hrt(struct i2o_controller *c) */ int i2o_status_get(struct i2o_controller *c) { - struct i2o_message __iomem *msg; - u32 m; + struct i2o_message *msg; volatile u8 *status_block; unsigned long timeout; status_block = (u8 *) c->status_block.virt; memset(c->status_block.virt, 0, sizeof(i2o_status_block)); - m = i2o_msg_get_wait(c, &msg, I2O_TIMEOUT_MESSAGE_GET); - if (m == I2O_QUEUE_EMPTY) - return -ETIMEDOUT; + msg = i2o_msg_get_wait(c, I2O_TIMEOUT_MESSAGE_GET); + if (IS_ERR(msg)) + return PTR_ERR(msg); - writel(NINE_WORD_MSG_SIZE | SGL_OFFSET_0, &msg->u.head[0]); - writel(I2O_CMD_STATUS_GET << 24 | HOST_TID << 12 | ADAPTER_TID, - &msg->u.head[1]); - writel(i2o_exec_driver.context, &msg->u.s.icntxt); - writel(0, &msg->u.s.tcntxt); // FIXME: use resonable transaction context - writel(0, &msg->body[0]); - writel(0, &msg->body[1]); - writel(i2o_dma_low(c->status_block.phys), &msg->body[2]); - writel(i2o_dma_high(c->status_block.phys), &msg->body[3]); - writel(sizeof(i2o_status_block), &msg->body[4]); /* always 88 bytes */ + msg->u.head[0] = cpu_to_le32(NINE_WORD_MSG_SIZE | SGL_OFFSET_0); + msg->u.head[1] = + cpu_to_le32(I2O_CMD_STATUS_GET << 24 | HOST_TID << 12 | + ADAPTER_TID); + msg->u.s.icntxt = cpu_to_le32(i2o_exec_driver.context); + msg->u.s.tcntxt = cpu_to_le32(0x00000000); + msg->body[0] = cpu_to_le32(0x00000000); + msg->body[1] = cpu_to_le32(0x00000000); + msg->body[2] = cpu_to_le32(i2o_dma_low(c->status_block.phys)); + msg->body[3] = cpu_to_le32(i2o_dma_high(c->status_block.phys)); + msg->body[4] = cpu_to_le32(sizeof(i2o_status_block)); /* always 88 bytes */ - i2o_msg_post(c, m); + i2o_msg_post(c, msg); /* Wait for a reply */ timeout = jiffies + I2O_TIMEOUT_STATUS_GET * HZ; @@ -1013,20 +989,20 @@ static int i2o_hrt_get(struct i2o_controller *c) struct device *dev = &c->pdev->dev; for (i = 0; i < I2O_HRT_GET_TRIES; i++) { - struct i2o_message __iomem *msg; - u32 m; + struct i2o_message *msg; - m = i2o_msg_get_wait(c, &msg, I2O_TIMEOUT_MESSAGE_GET); - if (m == I2O_QUEUE_EMPTY) - return -ETIMEDOUT; + msg = i2o_msg_get_wait(c, I2O_TIMEOUT_MESSAGE_GET); + if (IS_ERR(msg)) + return PTR_ERR(msg); - writel(SIX_WORD_MSG_SIZE | SGL_OFFSET_4, &msg->u.head[0]); - writel(I2O_CMD_HRT_GET << 24 | HOST_TID << 12 | ADAPTER_TID, - &msg->u.head[1]); - writel(0xd0000000 | c->hrt.len, &msg->body[0]); - writel(c->hrt.phys, &msg->body[1]); + msg->u.head[0] = cpu_to_le32(SIX_WORD_MSG_SIZE | SGL_OFFSET_4); + msg->u.head[1] = + cpu_to_le32(I2O_CMD_HRT_GET << 24 | HOST_TID << 12 | + ADAPTER_TID); + msg->body[0] = cpu_to_le32(0xd0000000 | c->hrt.len); + msg->body[1] = cpu_to_le32(c->hrt.phys); - rc = i2o_msg_post_wait_mem(c, m, 20, &c->hrt); + rc = i2o_msg_post_wait_mem(c, msg, 20, &c->hrt); if (rc < 0) { osm_err("%s: Unable to get HRT (status=%#x)\n", c->name, @@ -1056,6 +1032,7 @@ static int i2o_hrt_get(struct i2o_controller *c) */ void i2o_iop_free(struct i2o_controller *c) { + i2o_pool_free(&c->in_msg); kfree(c); }; @@ -1080,7 +1057,7 @@ static struct class *i2o_controller_class; * i2o_iop_alloc - Allocate and initialize a i2o_controller struct * * Allocate the necessary memory for a i2o_controller struct and - * initialize the lists. + * initialize the lists and message mempool. * * Returns a pointer to the I2O controller or a negative error code on * failure. @@ -1089,6 +1066,7 @@ struct i2o_controller *i2o_iop_alloc(void) { static int unit = 0; /* 0 and 1 are NULL IOP and Local Host */ struct i2o_controller *c; + char poolname[32]; c = kmalloc(sizeof(*c), GFP_KERNEL); if (!c) { @@ -1098,11 +1076,20 @@ struct i2o_controller *i2o_iop_alloc(void) } memset(c, 0, sizeof(*c)); + c->unit = unit++; + sprintf(c->name, "iop%d", c->unit); + + snprintf(poolname, sizeof(poolname), "i2o_%s_msg_inpool", c->name); + if (i2o_pool_alloc + (&c->in_msg, poolname, I2O_INBOUND_MSG_FRAME_SIZE * 4, + I2O_MSG_INPOOL_MIN)) { + kfree(c); + return ERR_PTR(-ENOMEM); + }; + INIT_LIST_HEAD(&c->devices); spin_lock_init(&c->lock); init_MUTEX(&c->lct_lock); - c->unit = unit++; - sprintf(c->name, "iop%d", c->unit); device_initialize(&c->device); @@ -1199,28 +1186,27 @@ int i2o_iop_add(struct i2o_controller *c) * is waited for, or expected. If you do not want further notifications, * call the i2o_event_register again with a evt_mask of 0. * - * Returns 0 on success or -ETIMEDOUT if no message could be fetched for - * sending the request. + * Returns 0 on success or negative error code on failure. */ int i2o_event_register(struct i2o_device *dev, struct i2o_driver *drv, int tcntxt, u32 evt_mask) { struct i2o_controller *c = dev->iop; - struct i2o_message __iomem *msg; - u32 m; + struct i2o_message *msg; - m = i2o_msg_get_wait(c, &msg, I2O_TIMEOUT_MESSAGE_GET); - if (m == I2O_QUEUE_EMPTY) - return -ETIMEDOUT; + msg = i2o_msg_get_wait(c, I2O_TIMEOUT_MESSAGE_GET); + if (IS_ERR(msg)) + return PTR_ERR(msg); - writel(FIVE_WORD_MSG_SIZE | SGL_OFFSET_0, &msg->u.head[0]); - writel(I2O_CMD_UTIL_EVT_REGISTER << 24 | HOST_TID << 12 | dev->lct_data. - tid, &msg->u.head[1]); - writel(drv->context, &msg->u.s.icntxt); - writel(tcntxt, &msg->u.s.tcntxt); - writel(evt_mask, &msg->body[0]); + msg->u.head[0] = cpu_to_le32(FIVE_WORD_MSG_SIZE | SGL_OFFSET_0); + msg->u.head[1] = + cpu_to_le32(I2O_CMD_UTIL_EVT_REGISTER << 24 | HOST_TID << 12 | dev-> + lct_data.tid); + msg->u.s.icntxt = cpu_to_le32(drv->context); + msg->u.s.tcntxt = cpu_to_le32(tcntxt); + msg->body[0] = cpu_to_le32(evt_mask); - i2o_msg_post(c, m); + i2o_msg_post(c, msg); return 0; }; diff --git a/drivers/message/i2o/pci.c b/drivers/message/i2o/pci.c index ee7075fa1ec3..329d482eee81 100644 --- a/drivers/message/i2o/pci.c +++ b/drivers/message/i2o/pci.c @@ -483,4 +483,5 @@ void __exit i2o_pci_exit(void) { pci_unregister_driver(&i2o_pci_driver); }; + MODULE_DEVICE_TABLE(pci, i2o_pci_ids); diff --git a/include/linux/i2o.h b/include/linux/i2o.h index d79c8a4bc4f8..9e359a981221 100644 --- a/include/linux/i2o.h +++ b/include/linux/i2o.h @@ -30,6 +30,7 @@ #include #include #include /* work_struct */ +#include #include #include /* Needed for MUTEX init macros */ @@ -38,1091 +39,1219 @@ #define I2O_QUEUE_EMPTY 0xffffffff /* - * Message structures + * Cache strategies */ -struct i2o_message { - union { - struct { - u8 version_offset; - u8 flags; - u16 size; - u32 target_tid:12; - u32 init_tid:12; - u32 function:8; - u32 icntxt; /* initiator context */ - u32 tcntxt; /* transaction context */ - } s; - u32 head[4]; - } u; - /* List follows */ - u32 body[0]; -}; -/* - * Each I2O device entity has one of these. There is one per device. +/* The NULL strategy leaves everything up to the controller. This tends to be a + * pessimal but functional choice. */ -struct i2o_device { - i2o_lct_entry lct_data; /* Device LCT information */ - - struct i2o_controller *iop; /* Controlling IOP */ - struct list_head list; /* node in IOP devices list */ - - struct device device; - - struct semaphore lock; /* device lock */ -}; +#define CACHE_NULL 0 +/* Prefetch data when reading. We continually attempt to load the next 32 sectors + * into the controller cache. + */ +#define CACHE_PREFETCH 1 +/* Prefetch data when reading. We sometimes attempt to load the next 32 sectors + * into the controller cache. When an I/O is less <= 8K we assume its probably + * not sequential and don't prefetch (default) + */ +#define CACHE_SMARTFETCH 2 +/* Data is written to the cache and then out on to the disk. The I/O must be + * physically on the medium before the write is acknowledged (default without + * NVRAM) + */ +#define CACHE_WRITETHROUGH 17 +/* Data is written to the cache and then out on to the disk. The controller + * is permitted to write back the cache any way it wants. (default if battery + * backed NVRAM is present). It can be useful to set this for swap regardless of + * battery state. + */ +#define CACHE_WRITEBACK 18 +/* Optimise for under powered controllers, especially on RAID1 and RAID0. We + * write large I/O's directly to disk bypassing the cache to avoid the extra + * memory copy hits. Small writes are writeback cached + */ +#define CACHE_SMARTBACK 19 +/* Optimise for under powered controllers, especially on RAID1 and RAID0. We + * write large I/O's directly to disk bypassing the cache to avoid the extra + * memory copy hits. Small writes are writethrough cached. Suitable for devices + * lacking battery backup + */ +#define CACHE_SMARTTHROUGH 20 /* - * Event structure provided to the event handling function + * Ioctl structures */ -struct i2o_event { - struct work_struct work; - struct i2o_device *i2o_dev; /* I2O device pointer from which the - event reply was initiated */ - u16 size; /* Size of data in 32-bit words */ - u32 tcntxt; /* Transaction context used at - registration */ - u32 event_indicator; /* Event indicator from reply */ - u32 data[0]; /* Event data from reply */ -}; + +#define BLKI2OGRSTRAT _IOR('2', 1, int) +#define BLKI2OGWSTRAT _IOR('2', 2, int) +#define BLKI2OSRSTRAT _IOW('2', 3, int) +#define BLKI2OSWSTRAT _IOW('2', 4, int) /* - * I2O classes which could be handled by the OSM + * I2O Function codes */ -struct i2o_class_id { - u16 class_id:12; -}; /* - * I2O driver structure for OSMs + * Executive Class */ -struct i2o_driver { - char *name; /* OSM name */ - int context; /* Low 8 bits of the transaction info */ - struct i2o_class_id *classes; /* I2O classes that this OSM handles */ - - /* Message reply handler */ - int (*reply) (struct i2o_controller *, u32, struct i2o_message *); - - /* Event handler */ - void (*event) (struct i2o_event *); - - struct workqueue_struct *event_queue; /* Event queue */ - - struct device_driver driver; - - /* notification of changes */ - void (*notify_controller_add) (struct i2o_controller *); - void (*notify_controller_remove) (struct i2o_controller *); - void (*notify_device_add) (struct i2o_device *); - void (*notify_device_remove) (struct i2o_device *); - - struct semaphore lock; -}; +#define I2O_CMD_ADAPTER_ASSIGN 0xB3 +#define I2O_CMD_ADAPTER_READ 0xB2 +#define I2O_CMD_ADAPTER_RELEASE 0xB5 +#define I2O_CMD_BIOS_INFO_SET 0xA5 +#define I2O_CMD_BOOT_DEVICE_SET 0xA7 +#define I2O_CMD_CONFIG_VALIDATE 0xBB +#define I2O_CMD_CONN_SETUP 0xCA +#define I2O_CMD_DDM_DESTROY 0xB1 +#define I2O_CMD_DDM_ENABLE 0xD5 +#define I2O_CMD_DDM_QUIESCE 0xC7 +#define I2O_CMD_DDM_RESET 0xD9 +#define I2O_CMD_DDM_SUSPEND 0xAF +#define I2O_CMD_DEVICE_ASSIGN 0xB7 +#define I2O_CMD_DEVICE_RELEASE 0xB9 +#define I2O_CMD_HRT_GET 0xA8 +#define I2O_CMD_ADAPTER_CLEAR 0xBE +#define I2O_CMD_ADAPTER_CONNECT 0xC9 +#define I2O_CMD_ADAPTER_RESET 0xBD +#define I2O_CMD_LCT_NOTIFY 0xA2 +#define I2O_CMD_OUTBOUND_INIT 0xA1 +#define I2O_CMD_PATH_ENABLE 0xD3 +#define I2O_CMD_PATH_QUIESCE 0xC5 +#define I2O_CMD_PATH_RESET 0xD7 +#define I2O_CMD_STATIC_MF_CREATE 0xDD +#define I2O_CMD_STATIC_MF_RELEASE 0xDF +#define I2O_CMD_STATUS_GET 0xA0 +#define I2O_CMD_SW_DOWNLOAD 0xA9 +#define I2O_CMD_SW_UPLOAD 0xAB +#define I2O_CMD_SW_REMOVE 0xAD +#define I2O_CMD_SYS_ENABLE 0xD1 +#define I2O_CMD_SYS_MODIFY 0xC1 +#define I2O_CMD_SYS_QUIESCE 0xC3 +#define I2O_CMD_SYS_TAB_SET 0xA3 /* - * Contains DMA mapped address information + * Utility Class */ -struct i2o_dma { - void *virt; - dma_addr_t phys; - size_t len; -}; +#define I2O_CMD_UTIL_NOP 0x00 +#define I2O_CMD_UTIL_ABORT 0x01 +#define I2O_CMD_UTIL_CLAIM 0x09 +#define I2O_CMD_UTIL_RELEASE 0x0B +#define I2O_CMD_UTIL_PARAMS_GET 0x06 +#define I2O_CMD_UTIL_PARAMS_SET 0x05 +#define I2O_CMD_UTIL_EVT_REGISTER 0x13 +#define I2O_CMD_UTIL_EVT_ACK 0x14 +#define I2O_CMD_UTIL_CONFIG_DIALOG 0x10 +#define I2O_CMD_UTIL_DEVICE_RESERVE 0x0D +#define I2O_CMD_UTIL_DEVICE_RELEASE 0x0F +#define I2O_CMD_UTIL_LOCK 0x17 +#define I2O_CMD_UTIL_LOCK_RELEASE 0x19 +#define I2O_CMD_UTIL_REPLY_FAULT_NOTIFY 0x15 /* - * Contains IO mapped address information + * SCSI Host Bus Adapter Class */ -struct i2o_io { - void __iomem *virt; - unsigned long phys; - unsigned long len; -}; +#define I2O_CMD_SCSI_EXEC 0x81 +#define I2O_CMD_SCSI_ABORT 0x83 +#define I2O_CMD_SCSI_BUSRESET 0x27 /* - * Context queue entry, used for 32-bit context on 64-bit systems + * Bus Adapter Class */ -struct i2o_context_list_element { - struct list_head list; - u32 context; - void *ptr; - unsigned long timestamp; -}; +#define I2O_CMD_BUS_ADAPTER_RESET 0x85 +#define I2O_CMD_BUS_RESET 0x87 +#define I2O_CMD_BUS_SCAN 0x89 +#define I2O_CMD_BUS_QUIESCE 0x8b /* - * Each I2O controller has one of these objects + * Random Block Storage Class */ -struct i2o_controller { - char name[16]; - int unit; - int type; +#define I2O_CMD_BLOCK_READ 0x30 +#define I2O_CMD_BLOCK_WRITE 0x31 +#define I2O_CMD_BLOCK_CFLUSH 0x37 +#define I2O_CMD_BLOCK_MLOCK 0x49 +#define I2O_CMD_BLOCK_MUNLOCK 0x4B +#define I2O_CMD_BLOCK_MMOUNT 0x41 +#define I2O_CMD_BLOCK_MEJECT 0x43 +#define I2O_CMD_BLOCK_POWER 0x70 - struct pci_dev *pdev; /* PCI device */ +#define I2O_CMD_PRIVATE 0xFF - unsigned int promise:1; /* Promise controller */ - unsigned int adaptec:1; /* DPT / Adaptec controller */ - unsigned int raptor:1; /* split bar */ - unsigned int no_quiesce:1; /* dont quiesce before reset */ - unsigned int short_req:1; /* use small block sizes */ - unsigned int limit_sectors:1; /* limit number of sectors / request */ - unsigned int pae_support:1; /* controller has 64-bit SGL support */ +/* Command status values */ - struct list_head devices; /* list of I2O devices */ - struct list_head list; /* Controller list */ +#define I2O_CMD_IN_PROGRESS 0x01 +#define I2O_CMD_REJECTED 0x02 +#define I2O_CMD_FAILED 0x03 +#define I2O_CMD_COMPLETED 0x04 - void __iomem *in_port; /* Inbout port address */ - void __iomem *out_port; /* Outbound port address */ - void __iomem *irq_status; /* Interrupt status register address */ - void __iomem *irq_mask; /* Interrupt mask register address */ +/* I2O API function return values */ - /* Dynamic LCT related data */ +#define I2O_RTN_NO_ERROR 0 +#define I2O_RTN_NOT_INIT 1 +#define I2O_RTN_FREE_Q_EMPTY 2 +#define I2O_RTN_TCB_ERROR 3 +#define I2O_RTN_TRANSACTION_ERROR 4 +#define I2O_RTN_ADAPTER_ALREADY_INIT 5 +#define I2O_RTN_MALLOC_ERROR 6 +#define I2O_RTN_ADPTR_NOT_REGISTERED 7 +#define I2O_RTN_MSG_REPLY_TIMEOUT 8 +#define I2O_RTN_NO_STATUS 9 +#define I2O_RTN_NO_FIRM_VER 10 +#define I2O_RTN_NO_LINK_SPEED 11 - struct i2o_dma status; /* IOP status block */ +/* Reply message status defines for all messages */ - struct i2o_dma hrt; /* HW Resource Table */ - i2o_lct *lct; /* Logical Config Table */ - struct i2o_dma dlct; /* Temp LCT */ - struct semaphore lct_lock; /* Lock for LCT updates */ - struct i2o_dma status_block; /* IOP status block */ +#define I2O_REPLY_STATUS_SUCCESS 0x00 +#define I2O_REPLY_STATUS_ABORT_DIRTY 0x01 +#define I2O_REPLY_STATUS_ABORT_NO_DATA_TRANSFER 0x02 +#define I2O_REPLY_STATUS_ABORT_PARTIAL_TRANSFER 0x03 +#define I2O_REPLY_STATUS_ERROR_DIRTY 0x04 +#define I2O_REPLY_STATUS_ERROR_NO_DATA_TRANSFER 0x05 +#define I2O_REPLY_STATUS_ERROR_PARTIAL_TRANSFER 0x06 +#define I2O_REPLY_STATUS_PROCESS_ABORT_DIRTY 0x08 +#define I2O_REPLY_STATUS_PROCESS_ABORT_NO_DATA_TRANSFER 0x09 +#define I2O_REPLY_STATUS_PROCESS_ABORT_PARTIAL_TRANSFER 0x0A +#define I2O_REPLY_STATUS_TRANSACTION_ERROR 0x0B +#define I2O_REPLY_STATUS_PROGRESS_REPORT 0x80 - struct i2o_io base; /* controller messaging unit */ - struct i2o_io in_queue; /* inbound message queue Host->IOP */ - struct i2o_dma out_queue; /* outbound message queue IOP->Host */ +/* Status codes and Error Information for Parameter functions */ - unsigned int battery:1; /* Has a battery backup */ - unsigned int io_alloc:1; /* An I/O resource was allocated */ - unsigned int mem_alloc:1; /* A memory resource was allocated */ +#define I2O_PARAMS_STATUS_SUCCESS 0x00 +#define I2O_PARAMS_STATUS_BAD_KEY_ABORT 0x01 +#define I2O_PARAMS_STATUS_BAD_KEY_CONTINUE 0x02 +#define I2O_PARAMS_STATUS_BUFFER_FULL 0x03 +#define I2O_PARAMS_STATUS_BUFFER_TOO_SMALL 0x04 +#define I2O_PARAMS_STATUS_FIELD_UNREADABLE 0x05 +#define I2O_PARAMS_STATUS_FIELD_UNWRITEABLE 0x06 +#define I2O_PARAMS_STATUS_INSUFFICIENT_FIELDS 0x07 +#define I2O_PARAMS_STATUS_INVALID_GROUP_ID 0x08 +#define I2O_PARAMS_STATUS_INVALID_OPERATION 0x09 +#define I2O_PARAMS_STATUS_NO_KEY_FIELD 0x0A +#define I2O_PARAMS_STATUS_NO_SUCH_FIELD 0x0B +#define I2O_PARAMS_STATUS_NON_DYNAMIC_GROUP 0x0C +#define I2O_PARAMS_STATUS_OPERATION_ERROR 0x0D +#define I2O_PARAMS_STATUS_SCALAR_ERROR 0x0E +#define I2O_PARAMS_STATUS_TABLE_ERROR 0x0F +#define I2O_PARAMS_STATUS_WRONG_GROUP_TYPE 0x10 - struct resource io_resource; /* I/O resource allocated to the IOP */ - struct resource mem_resource; /* Mem resource allocated to the IOP */ +/* DetailedStatusCode defines for Executive, DDM, Util and Transaction error + * messages: Table 3-2 Detailed Status Codes.*/ - struct device device; - struct class_device *classdev; /* I2O controller class device */ - struct i2o_device *exec; /* Executive */ -#if BITS_PER_LONG == 64 - spinlock_t context_list_lock; /* lock for context_list */ - atomic_t context_list_counter; /* needed for unique contexts */ - struct list_head context_list; /* list of context id's - and pointers */ -#endif - spinlock_t lock; /* lock for controller - configuration */ +#define I2O_DSC_SUCCESS 0x0000 +#define I2O_DSC_BAD_KEY 0x0002 +#define I2O_DSC_TCL_ERROR 0x0003 +#define I2O_DSC_REPLY_BUFFER_FULL 0x0004 +#define I2O_DSC_NO_SUCH_PAGE 0x0005 +#define I2O_DSC_INSUFFICIENT_RESOURCE_SOFT 0x0006 +#define I2O_DSC_INSUFFICIENT_RESOURCE_HARD 0x0007 +#define I2O_DSC_CHAIN_BUFFER_TOO_LARGE 0x0009 +#define I2O_DSC_UNSUPPORTED_FUNCTION 0x000A +#define I2O_DSC_DEVICE_LOCKED 0x000B +#define I2O_DSC_DEVICE_RESET 0x000C +#define I2O_DSC_INAPPROPRIATE_FUNCTION 0x000D +#define I2O_DSC_INVALID_INITIATOR_ADDRESS 0x000E +#define I2O_DSC_INVALID_MESSAGE_FLAGS 0x000F +#define I2O_DSC_INVALID_OFFSET 0x0010 +#define I2O_DSC_INVALID_PARAMETER 0x0011 +#define I2O_DSC_INVALID_REQUEST 0x0012 +#define I2O_DSC_INVALID_TARGET_ADDRESS 0x0013 +#define I2O_DSC_MESSAGE_TOO_LARGE 0x0014 +#define I2O_DSC_MESSAGE_TOO_SMALL 0x0015 +#define I2O_DSC_MISSING_PARAMETER 0x0016 +#define I2O_DSC_TIMEOUT 0x0017 +#define I2O_DSC_UNKNOWN_ERROR 0x0018 +#define I2O_DSC_UNKNOWN_FUNCTION 0x0019 +#define I2O_DSC_UNSUPPORTED_VERSION 0x001A +#define I2O_DSC_DEVICE_BUSY 0x001B +#define I2O_DSC_DEVICE_NOT_AVAILABLE 0x001C - void *driver_data[I2O_MAX_DRIVERS]; /* storage for drivers */ -}; +/* DetailedStatusCode defines for Block Storage Operation: Table 6-7 Detailed + Status Codes.*/ -/* - * I2O System table entry - * - * The system table contains information about all the IOPs in the - * system. It is sent to all IOPs so that they can create peer2peer - * connections between them. - */ -struct i2o_sys_tbl_entry { - u16 org_id; - u16 reserved1; - u32 iop_id:12; - u32 reserved2:20; - u16 seg_num:12; - u16 i2o_version:4; - u8 iop_state; - u8 msg_type; - u16 frame_size; - u16 reserved3; - u32 last_changed; - u32 iop_capabilities; - u32 inbound_low; - u32 inbound_high; -}; +#define I2O_BSA_DSC_SUCCESS 0x0000 +#define I2O_BSA_DSC_MEDIA_ERROR 0x0001 +#define I2O_BSA_DSC_ACCESS_ERROR 0x0002 +#define I2O_BSA_DSC_DEVICE_FAILURE 0x0003 +#define I2O_BSA_DSC_DEVICE_NOT_READY 0x0004 +#define I2O_BSA_DSC_MEDIA_NOT_PRESENT 0x0005 +#define I2O_BSA_DSC_MEDIA_LOCKED 0x0006 +#define I2O_BSA_DSC_MEDIA_FAILURE 0x0007 +#define I2O_BSA_DSC_PROTOCOL_FAILURE 0x0008 +#define I2O_BSA_DSC_BUS_FAILURE 0x0009 +#define I2O_BSA_DSC_ACCESS_VIOLATION 0x000A +#define I2O_BSA_DSC_WRITE_PROTECTED 0x000B +#define I2O_BSA_DSC_DEVICE_RESET 0x000C +#define I2O_BSA_DSC_VOLUME_CHANGED 0x000D +#define I2O_BSA_DSC_TIMEOUT 0x000E -struct i2o_sys_tbl { - u8 num_entries; - u8 version; - u16 reserved1; - u32 change_ind; - u32 reserved2; - u32 reserved3; - struct i2o_sys_tbl_entry iops[0]; -}; +/* FailureStatusCodes, Table 3-3 Message Failure Codes */ -extern struct list_head i2o_controllers; +#define I2O_FSC_TRANSPORT_SERVICE_SUSPENDED 0x81 +#define I2O_FSC_TRANSPORT_SERVICE_TERMINATED 0x82 +#define I2O_FSC_TRANSPORT_CONGESTION 0x83 +#define I2O_FSC_TRANSPORT_FAILURE 0x84 +#define I2O_FSC_TRANSPORT_STATE_ERROR 0x85 +#define I2O_FSC_TRANSPORT_TIME_OUT 0x86 +#define I2O_FSC_TRANSPORT_ROUTING_FAILURE 0x87 +#define I2O_FSC_TRANSPORT_INVALID_VERSION 0x88 +#define I2O_FSC_TRANSPORT_INVALID_OFFSET 0x89 +#define I2O_FSC_TRANSPORT_INVALID_MSG_FLAGS 0x8A +#define I2O_FSC_TRANSPORT_FRAME_TOO_SMALL 0x8B +#define I2O_FSC_TRANSPORT_FRAME_TOO_LARGE 0x8C +#define I2O_FSC_TRANSPORT_INVALID_TARGET_ID 0x8D +#define I2O_FSC_TRANSPORT_INVALID_INITIATOR_ID 0x8E +#define I2O_FSC_TRANSPORT_INVALID_INITIATOR_CONTEXT 0x8F +#define I2O_FSC_TRANSPORT_UNKNOWN_FAILURE 0xFF -/* Message functions */ -static inline u32 i2o_msg_get(struct i2o_controller *, - struct i2o_message __iomem **); -extern u32 i2o_msg_get_wait(struct i2o_controller *, - struct i2o_message __iomem **, int); -static inline void i2o_msg_post(struct i2o_controller *, u32); -static inline int i2o_msg_post_wait(struct i2o_controller *, u32, - unsigned long); -extern int i2o_msg_post_wait_mem(struct i2o_controller *, u32, unsigned long, - struct i2o_dma *); -extern void i2o_msg_nop(struct i2o_controller *, u32); -static inline void i2o_flush_reply(struct i2o_controller *, u32); +/* Device Claim Types */ +#define I2O_CLAIM_PRIMARY 0x01000000 +#define I2O_CLAIM_MANAGEMENT 0x02000000 +#define I2O_CLAIM_AUTHORIZED 0x03000000 +#define I2O_CLAIM_SECONDARY 0x04000000 -/* IOP functions */ -extern int i2o_status_get(struct i2o_controller *); +/* Message header defines for VersionOffset */ +#define I2OVER15 0x0001 +#define I2OVER20 0x0002 -extern int i2o_event_register(struct i2o_device *, struct i2o_driver *, int, - u32); -extern struct i2o_device *i2o_iop_find_device(struct i2o_controller *, u16); -extern struct i2o_controller *i2o_find_iop(int); +/* Default is 1.5 */ +#define I2OVERSION I2OVER15 -/* Functions needed for handling 64-bit pointers in 32-bit context */ -#if BITS_PER_LONG == 64 -extern u32 i2o_cntxt_list_add(struct i2o_controller *, void *); -extern void *i2o_cntxt_list_get(struct i2o_controller *, u32); -extern u32 i2o_cntxt_list_remove(struct i2o_controller *, void *); -extern u32 i2o_cntxt_list_get_ptr(struct i2o_controller *, void *); +#define SGL_OFFSET_0 I2OVERSION +#define SGL_OFFSET_4 (0x0040 | I2OVERSION) +#define SGL_OFFSET_5 (0x0050 | I2OVERSION) +#define SGL_OFFSET_6 (0x0060 | I2OVERSION) +#define SGL_OFFSET_7 (0x0070 | I2OVERSION) +#define SGL_OFFSET_8 (0x0080 | I2OVERSION) +#define SGL_OFFSET_9 (0x0090 | I2OVERSION) +#define SGL_OFFSET_10 (0x00A0 | I2OVERSION) +#define SGL_OFFSET_11 (0x00B0 | I2OVERSION) +#define SGL_OFFSET_12 (0x00C0 | I2OVERSION) +#define SGL_OFFSET(x) (((x)<<4) | I2OVERSION) -static inline u32 i2o_ptr_low(void *ptr) -{ - return (u32) (u64) ptr; -}; +/* Transaction Reply Lists (TRL) Control Word structure */ +#define TRL_SINGLE_FIXED_LENGTH 0x00 +#define TRL_SINGLE_VARIABLE_LENGTH 0x40 +#define TRL_MULTIPLE_FIXED_LENGTH 0x80 -static inline u32 i2o_ptr_high(void *ptr) -{ - return (u32) ((u64) ptr >> 32); -}; + /* msg header defines for MsgFlags */ +#define MSG_STATIC 0x0100 +#define MSG_64BIT_CNTXT 0x0200 +#define MSG_MULTI_TRANS 0x1000 +#define MSG_FAIL 0x2000 +#define MSG_FINAL 0x4000 +#define MSG_REPLY 0x8000 -static inline u32 i2o_dma_low(dma_addr_t dma_addr) -{ - return (u32) (u64) dma_addr; -}; + /* minimum size msg */ +#define THREE_WORD_MSG_SIZE 0x00030000 +#define FOUR_WORD_MSG_SIZE 0x00040000 +#define FIVE_WORD_MSG_SIZE 0x00050000 +#define SIX_WORD_MSG_SIZE 0x00060000 +#define SEVEN_WORD_MSG_SIZE 0x00070000 +#define EIGHT_WORD_MSG_SIZE 0x00080000 +#define NINE_WORD_MSG_SIZE 0x00090000 +#define TEN_WORD_MSG_SIZE 0x000A0000 +#define ELEVEN_WORD_MSG_SIZE 0x000B0000 +#define I2O_MESSAGE_SIZE(x) ((x)<<16) -static inline u32 i2o_dma_high(dma_addr_t dma_addr) -{ - return (u32) ((u64) dma_addr >> 32); -}; -#else -static inline u32 i2o_cntxt_list_add(struct i2o_controller *c, void *ptr) -{ - return (u32) ptr; -}; +/* special TID assignments */ +#define ADAPTER_TID 0 +#define HOST_TID 1 -static inline void *i2o_cntxt_list_get(struct i2o_controller *c, u32 context) -{ - return (void *)context; -}; +/* outbound queue defines */ +#define I2O_MAX_OUTBOUND_MSG_FRAMES 128 +#define I2O_OUTBOUND_MSG_FRAME_SIZE 128 /* in 32-bit words */ -static inline u32 i2o_cntxt_list_remove(struct i2o_controller *c, void *ptr) -{ - return (u32) ptr; -}; +/* inbound queue definitions */ +#define I2O_MSG_INPOOL_MIN 32 +#define I2O_INBOUND_MSG_FRAME_SIZE 128 /* in 32-bit words */ -static inline u32 i2o_cntxt_list_get_ptr(struct i2o_controller *c, void *ptr) -{ - return (u32) ptr; -}; +#define I2O_POST_WAIT_OK 0 +#define I2O_POST_WAIT_TIMEOUT -ETIMEDOUT -static inline u32 i2o_ptr_low(void *ptr) -{ - return (u32) ptr; -}; +#define I2O_CONTEXT_LIST_MIN_LENGTH 15 +#define I2O_CONTEXT_LIST_USED 0x01 +#define I2O_CONTEXT_LIST_DELETED 0x02 -static inline u32 i2o_ptr_high(void *ptr) -{ - return 0; -}; +/* timeouts */ +#define I2O_TIMEOUT_INIT_OUTBOUND_QUEUE 15 +#define I2O_TIMEOUT_MESSAGE_GET 5 +#define I2O_TIMEOUT_RESET 30 +#define I2O_TIMEOUT_STATUS_GET 5 +#define I2O_TIMEOUT_LCT_GET 360 +#define I2O_TIMEOUT_SCSI_SCB_ABORT 240 -static inline u32 i2o_dma_low(dma_addr_t dma_addr) -{ - return (u32) dma_addr; -}; +/* retries */ +#define I2O_HRT_GET_TRIES 3 +#define I2O_LCT_GET_TRIES 3 -static inline u32 i2o_dma_high(dma_addr_t dma_addr) -{ - return 0; -}; -#endif +/* defines for max_sectors and max_phys_segments */ +#define I2O_MAX_SECTORS 1024 +#define I2O_MAX_SECTORS_LIMITED 256 +#define I2O_MAX_PHYS_SEGMENTS MAX_PHYS_SEGMENTS -/** - * i2o_sg_tablesize - Calculate the maximum number of elements in a SGL - * @c: I2O controller for which the calculation should be done - * @body_size: maximum body size used for message in 32-bit words. - * - * Return the maximum number of SG elements in a SG list. +/* + * Message structures */ -static inline u16 i2o_sg_tablesize(struct i2o_controller *c, u16 body_size) -{ - i2o_status_block *sb = c->status_block.virt; - u16 sg_count = - (sb->inbound_frame_size - sizeof(struct i2o_message) / 4) - - body_size; - - if (c->pae_support) { - /* - * for 64-bit a SG attribute element must be added and each - * SG element needs 12 bytes instead of 8. - */ - sg_count -= 2; - sg_count /= 3; - } else - sg_count /= 2; - - if (c->short_req && (sg_count > 8)) - sg_count = 8; +struct i2o_message { + union { + struct { + u8 version_offset; + u8 flags; + u16 size; + u32 target_tid:12; + u32 init_tid:12; + u32 function:8; + u32 icntxt; /* initiator context */ + u32 tcntxt; /* transaction context */ + } s; + u32 head[4]; + } u; + /* List follows */ + u32 body[0]; +}; - return sg_count; +/* MFA and I2O message used by mempool */ +struct i2o_msg_mfa { + u32 mfa; /* MFA returned by the controller */ + struct i2o_message msg; /* I2O message */ }; -/** - * i2o_dma_map_single - Map pointer to controller and fill in I2O message. - * @c: I2O controller - * @ptr: pointer to the data which should be mapped - * @size: size of data in bytes - * @direction: DMA_TO_DEVICE / DMA_FROM_DEVICE - * @sg_ptr: pointer to the SG list inside the I2O message - * - * This function does all necessary DMA handling and also writes the I2O - * SGL elements into the I2O message. For details on DMA handling see also - * dma_map_single(). The pointer sg_ptr will only be set to the end of the - * SG list if the allocation was successful. - * - * Returns DMA address which must be checked for failures using - * dma_mapping_error(). +/* + * Each I2O device entity has one of these. There is one per device. */ -static inline dma_addr_t i2o_dma_map_single(struct i2o_controller *c, void *ptr, - size_t size, - enum dma_data_direction direction, - u32 __iomem ** sg_ptr) -{ - u32 sg_flags; - u32 __iomem *mptr = *sg_ptr; - dma_addr_t dma_addr; +struct i2o_device { + i2o_lct_entry lct_data; /* Device LCT information */ - switch (direction) { - case DMA_TO_DEVICE: - sg_flags = 0xd4000000; - break; - case DMA_FROM_DEVICE: - sg_flags = 0xd0000000; - break; - default: - return 0; - } + struct i2o_controller *iop; /* Controlling IOP */ + struct list_head list; /* node in IOP devices list */ - dma_addr = dma_map_single(&c->pdev->dev, ptr, size, direction); - if (!dma_mapping_error(dma_addr)) { -#ifdef CONFIG_I2O_EXT_ADAPTEC_DMA64 - if ((sizeof(dma_addr_t) > 4) && c->pae_support) { - writel(0x7C020002, mptr++); - writel(PAGE_SIZE, mptr++); - } -#endif + struct device device; - writel(sg_flags | size, mptr++); - writel(i2o_dma_low(dma_addr), mptr++); -#ifdef CONFIG_I2O_EXT_ADAPTEC_DMA64 - if ((sizeof(dma_addr_t) > 4) && c->pae_support) - writel(i2o_dma_high(dma_addr), mptr++); -#endif - *sg_ptr = mptr; - } - return dma_addr; + struct semaphore lock; /* device lock */ }; -/** - * i2o_dma_map_sg - Map a SG List to controller and fill in I2O message. - * @c: I2O controller - * @sg: SG list to be mapped - * @sg_count: number of elements in the SG list - * @direction: DMA_TO_DEVICE / DMA_FROM_DEVICE - * @sg_ptr: pointer to the SG list inside the I2O message - * - * This function does all necessary DMA handling and also writes the I2O - * SGL elements into the I2O message. For details on DMA handling see also - * dma_map_sg(). The pointer sg_ptr will only be set to the end of the SG - * list if the allocation was successful. - * - * Returns 0 on failure or 1 on success. +/* + * Event structure provided to the event handling function */ -static inline int i2o_dma_map_sg(struct i2o_controller *c, - struct scatterlist *sg, int sg_count, - enum dma_data_direction direction, - u32 __iomem ** sg_ptr) -{ - u32 sg_flags; - u32 __iomem *mptr = *sg_ptr; - - switch (direction) { - case DMA_TO_DEVICE: - sg_flags = 0x14000000; - break; - case DMA_FROM_DEVICE: - sg_flags = 0x10000000; - break; - default: - return 0; - } - - sg_count = dma_map_sg(&c->pdev->dev, sg, sg_count, direction); - if (!sg_count) - return 0; - -#ifdef CONFIG_I2O_EXT_ADAPTEC_DMA64 - if ((sizeof(dma_addr_t) > 4) && c->pae_support) { - writel(0x7C020002, mptr++); - writel(PAGE_SIZE, mptr++); - } -#endif - - while (sg_count-- > 0) { - if (!sg_count) - sg_flags |= 0xC0000000; - writel(sg_flags | sg_dma_len(sg), mptr++); - writel(i2o_dma_low(sg_dma_address(sg)), mptr++); -#ifdef CONFIG_I2O_EXT_ADAPTEC_DMA64 - if ((sizeof(dma_addr_t) > 4) && c->pae_support) - writel(i2o_dma_high(sg_dma_address(sg)), mptr++); -#endif - sg++; - } - *sg_ptr = mptr; +struct i2o_event { + struct work_struct work; + struct i2o_device *i2o_dev; /* I2O device pointer from which the + event reply was initiated */ + u16 size; /* Size of data in 32-bit words */ + u32 tcntxt; /* Transaction context used at + registration */ + u32 event_indicator; /* Event indicator from reply */ + u32 data[0]; /* Event data from reply */ +}; - return 1; +/* + * I2O classes which could be handled by the OSM + */ +struct i2o_class_id { + u16 class_id:12; }; -/** - * i2o_dma_alloc - Allocate DMA memory - * @dev: struct device pointer to the PCI device of the I2O controller - * @addr: i2o_dma struct which should get the DMA buffer - * @len: length of the new DMA memory - * @gfp_mask: GFP mask - * - * Allocate a coherent DMA memory and write the pointers into addr. - * - * Returns 0 on success or -ENOMEM on failure. +/* + * I2O driver structure for OSMs */ -static inline int i2o_dma_alloc(struct device *dev, struct i2o_dma *addr, - size_t len, gfp_t gfp_mask) -{ - struct pci_dev *pdev = to_pci_dev(dev); - int dma_64 = 0; +struct i2o_driver { + char *name; /* OSM name */ + int context; /* Low 8 bits of the transaction info */ + struct i2o_class_id *classes; /* I2O classes that this OSM handles */ - if ((sizeof(dma_addr_t) > 4) && (pdev->dma_mask == DMA_64BIT_MASK)) { - dma_64 = 1; - if (pci_set_dma_mask(pdev, DMA_32BIT_MASK)) - return -ENOMEM; - } + /* Message reply handler */ + int (*reply) (struct i2o_controller *, u32, struct i2o_message *); - addr->virt = dma_alloc_coherent(dev, len, &addr->phys, gfp_mask); + /* Event handler */ + void (*event) (struct i2o_event *); - if ((sizeof(dma_addr_t) > 4) && dma_64) - if (pci_set_dma_mask(pdev, DMA_64BIT_MASK)) - printk(KERN_WARNING "i2o: unable to set 64-bit DMA"); + struct workqueue_struct *event_queue; /* Event queue */ - if (!addr->virt) - return -ENOMEM; + struct device_driver driver; - memset(addr->virt, 0, len); - addr->len = len; + /* notification of changes */ + void (*notify_controller_add) (struct i2o_controller *); + void (*notify_controller_remove) (struct i2o_controller *); + void (*notify_device_add) (struct i2o_device *); + void (*notify_device_remove) (struct i2o_device *); - return 0; + struct semaphore lock; }; -/** - * i2o_dma_free - Free DMA memory - * @dev: struct device pointer to the PCI device of the I2O controller - * @addr: i2o_dma struct which contains the DMA buffer - * - * Free a coherent DMA memory and set virtual address of addr to NULL. +/* + * Contains DMA mapped address information */ -static inline void i2o_dma_free(struct device *dev, struct i2o_dma *addr) -{ - if (addr->virt) { - if (addr->phys) - dma_free_coherent(dev, addr->len, addr->virt, - addr->phys); - else - kfree(addr->virt); - addr->virt = NULL; - } +struct i2o_dma { + void *virt; + dma_addr_t phys; + size_t len; }; -/** - * i2o_dma_realloc - Realloc DMA memory - * @dev: struct device pointer to the PCI device of the I2O controller - * @addr: pointer to a i2o_dma struct DMA buffer - * @len: new length of memory - * @gfp_mask: GFP mask - * - * If there was something allocated in the addr, free it first. If len > 0 - * than try to allocate it and write the addresses back to the addr - * structure. If len == 0 set the virtual address to NULL. - * - * Returns the 0 on success or negative error code on failure. +/* + * Contains slab cache and mempool information */ -static inline int i2o_dma_realloc(struct device *dev, struct i2o_dma *addr, - size_t len, gfp_t gfp_mask) -{ - i2o_dma_free(dev, addr); - - if (len) - return i2o_dma_alloc(dev, addr, len, gfp_mask); - - return 0; +struct i2o_pool { + char *name; + kmem_cache_t *slab; + mempool_t *mempool; }; -/* I2O driver (OSM) functions */ -extern int i2o_driver_register(struct i2o_driver *); -extern void i2o_driver_unregister(struct i2o_driver *); - -/** - * i2o_driver_notify_controller_add - Send notification of added controller - * to a single I2O driver - * - * Send notification of added controller to a single registered driver. +/* + * Contains IO mapped address information */ -static inline void i2o_driver_notify_controller_add(struct i2o_driver *drv, - struct i2o_controller *c) -{ - if (drv->notify_controller_add) - drv->notify_controller_add(c); +struct i2o_io { + void __iomem *virt; + unsigned long phys; + unsigned long len; }; -/** - * i2o_driver_notify_controller_remove - Send notification of removed - * controller to a single I2O driver - * - * Send notification of removed controller to a single registered driver. +/* + * Context queue entry, used for 32-bit context on 64-bit systems */ -static inline void i2o_driver_notify_controller_remove(struct i2o_driver *drv, - struct i2o_controller *c) -{ - if (drv->notify_controller_remove) - drv->notify_controller_remove(c); +struct i2o_context_list_element { + struct list_head list; + u32 context; + void *ptr; + unsigned long timestamp; }; -/** - * i2o_driver_notify_device_add - Send notification of added device to a - * single I2O driver - * - * Send notification of added device to a single registered driver. +/* + * Each I2O controller has one of these objects */ -static inline void i2o_driver_notify_device_add(struct i2o_driver *drv, - struct i2o_device *i2o_dev) -{ - if (drv->notify_device_add) - drv->notify_device_add(i2o_dev); +struct i2o_controller { + char name[16]; + int unit; + int type; + + struct pci_dev *pdev; /* PCI device */ + + unsigned int promise:1; /* Promise controller */ + unsigned int adaptec:1; /* DPT / Adaptec controller */ + unsigned int raptor:1; /* split bar */ + unsigned int no_quiesce:1; /* dont quiesce before reset */ + unsigned int short_req:1; /* use small block sizes */ + unsigned int limit_sectors:1; /* limit number of sectors / request */ + unsigned int pae_support:1; /* controller has 64-bit SGL support */ + + struct list_head devices; /* list of I2O devices */ + struct list_head list; /* Controller list */ + + void __iomem *in_port; /* Inbout port address */ + void __iomem *out_port; /* Outbound port address */ + void __iomem *irq_status; /* Interrupt status register address */ + void __iomem *irq_mask; /* Interrupt mask register address */ + + struct i2o_dma status; /* IOP status block */ + + struct i2o_dma hrt; /* HW Resource Table */ + i2o_lct *lct; /* Logical Config Table */ + struct i2o_dma dlct; /* Temp LCT */ + struct semaphore lct_lock; /* Lock for LCT updates */ + struct i2o_dma status_block; /* IOP status block */ + + struct i2o_io base; /* controller messaging unit */ + struct i2o_io in_queue; /* inbound message queue Host->IOP */ + struct i2o_dma out_queue; /* outbound message queue IOP->Host */ + + struct i2o_pool in_msg; /* mempool for inbound messages */ + + unsigned int battery:1; /* Has a battery backup */ + unsigned int io_alloc:1; /* An I/O resource was allocated */ + unsigned int mem_alloc:1; /* A memory resource was allocated */ + + struct resource io_resource; /* I/O resource allocated to the IOP */ + struct resource mem_resource; /* Mem resource allocated to the IOP */ + + struct device device; + struct class_device *classdev; /* I2O controller class device */ + struct i2o_device *exec; /* Executive */ +#if BITS_PER_LONG == 64 + spinlock_t context_list_lock; /* lock for context_list */ + atomic_t context_list_counter; /* needed for unique contexts */ + struct list_head context_list; /* list of context id's + and pointers */ +#endif + spinlock_t lock; /* lock for controller + configuration */ + + void *driver_data[I2O_MAX_DRIVERS]; /* storage for drivers */ }; -/** - * i2o_driver_notify_device_remove - Send notification of removed device - * to a single I2O driver +/* + * I2O System table entry * - * Send notification of removed device to a single registered driver. + * The system table contains information about all the IOPs in the + * system. It is sent to all IOPs so that they can create peer2peer + * connections between them. */ -static inline void i2o_driver_notify_device_remove(struct i2o_driver *drv, - struct i2o_device *i2o_dev) -{ - if (drv->notify_device_remove) - drv->notify_device_remove(i2o_dev); +struct i2o_sys_tbl_entry { + u16 org_id; + u16 reserved1; + u32 iop_id:12; + u32 reserved2:20; + u16 seg_num:12; + u16 i2o_version:4; + u8 iop_state; + u8 msg_type; + u16 frame_size; + u16 reserved3; + u32 last_changed; + u32 iop_capabilities; + u32 inbound_low; + u32 inbound_high; }; -extern void i2o_driver_notify_controller_add_all(struct i2o_controller *); -extern void i2o_driver_notify_controller_remove_all(struct i2o_controller *); -extern void i2o_driver_notify_device_add_all(struct i2o_device *); -extern void i2o_driver_notify_device_remove_all(struct i2o_device *); +struct i2o_sys_tbl { + u8 num_entries; + u8 version; + u16 reserved1; + u32 change_ind; + u32 reserved2; + u32 reserved3; + struct i2o_sys_tbl_entry iops[0]; +}; -/* I2O device functions */ -extern int i2o_device_claim(struct i2o_device *); -extern int i2o_device_claim_release(struct i2o_device *); +extern struct list_head i2o_controllers; -/* Exec OSM functions */ -extern int i2o_exec_lct_get(struct i2o_controller *); +/* Message functions */ +static inline struct i2o_message *i2o_msg_get(struct i2o_controller *); +extern struct i2o_message *i2o_msg_get_wait(struct i2o_controller *, int); +static inline void i2o_msg_post(struct i2o_controller *, struct i2o_message *); +static inline int i2o_msg_post_wait(struct i2o_controller *, + struct i2o_message *, unsigned long); +extern int i2o_msg_post_wait_mem(struct i2o_controller *, struct i2o_message *, + unsigned long, struct i2o_dma *); +static inline void i2o_flush_reply(struct i2o_controller *, u32); -/* device / driver / kobject conversion functions */ -#define to_i2o_driver(drv) container_of(drv,struct i2o_driver, driver) -#define to_i2o_device(dev) container_of(dev, struct i2o_device, device) -#define to_i2o_controller(dev) container_of(dev, struct i2o_controller, device) -#define kobj_to_i2o_device(kobj) to_i2o_device(container_of(kobj, struct device, kobj)) +/* IOP functions */ +extern int i2o_status_get(struct i2o_controller *); -/** - * i2o_msg_get - obtain an I2O message from the IOP - * @c: I2O controller - * @msg: pointer to a I2O message pointer - * - * This function tries to get a message slot. If no message slot is - * available do not wait until one is availabe (see also i2o_msg_get_wait). - * - * On a success the message is returned and the pointer to the message is - * set in msg. The returned message is the physical page frame offset - * address from the read port (see the i2o spec). If no message is - * available returns I2O_QUEUE_EMPTY and msg is leaved untouched. - */ -static inline u32 i2o_msg_get(struct i2o_controller *c, - struct i2o_message __iomem ** msg) -{ - u32 m = readl(c->in_port); +extern int i2o_event_register(struct i2o_device *, struct i2o_driver *, int, + u32); +extern struct i2o_device *i2o_iop_find_device(struct i2o_controller *, u16); +extern struct i2o_controller *i2o_find_iop(int); - if (m != I2O_QUEUE_EMPTY) - *msg = c->in_queue.virt + m; +/* Functions needed for handling 64-bit pointers in 32-bit context */ +#if BITS_PER_LONG == 64 +extern u32 i2o_cntxt_list_add(struct i2o_controller *, void *); +extern void *i2o_cntxt_list_get(struct i2o_controller *, u32); +extern u32 i2o_cntxt_list_remove(struct i2o_controller *, void *); +extern u32 i2o_cntxt_list_get_ptr(struct i2o_controller *, void *); - return m; +static inline u32 i2o_ptr_low(void *ptr) +{ + return (u32) (u64) ptr; }; -/** - * i2o_msg_post - Post I2O message to I2O controller - * @c: I2O controller to which the message should be send - * @m: the message identifier - * - * Post the message to the I2O controller. - */ -static inline void i2o_msg_post(struct i2o_controller *c, u32 m) +static inline u32 i2o_ptr_high(void *ptr) { - writel(m, c->in_port); + return (u32) ((u64) ptr >> 32); }; -/** - * i2o_msg_post_wait - Post and wait a message and wait until return - * @c: controller - * @m: message to post - * @timeout: time in seconds to wait - * - * This API allows an OSM to post a message and then be told whether or - * not the system received a successful reply. If the message times out - * then the value '-ETIMEDOUT' is returned. - * - * Returns 0 on success or negative error code on failure. - */ -static inline int i2o_msg_post_wait(struct i2o_controller *c, u32 m, - unsigned long timeout) +static inline u32 i2o_dma_low(dma_addr_t dma_addr) { - return i2o_msg_post_wait_mem(c, m, timeout, NULL); + return (u32) (u64) dma_addr; }; -/** - * i2o_flush_reply - Flush reply from I2O controller - * @c: I2O controller - * @m: the message identifier - * - * The I2O controller must be informed that the reply message is not needed - * anymore. If you forget to flush the reply, the message frame can't be - * used by the controller anymore and is therefore lost. - */ -static inline void i2o_flush_reply(struct i2o_controller *c, u32 m) +static inline u32 i2o_dma_high(dma_addr_t dma_addr) { - writel(m, c->out_port); + return (u32) ((u64) dma_addr >> 32); +}; +#else +static inline u32 i2o_cntxt_list_add(struct i2o_controller *c, void *ptr) +{ + return (u32) ptr; }; -/** - * i2o_out_to_virt - Turn an I2O message to a virtual address - * @c: controller - * @m: message engine value - * - * Turn a receive message from an I2O controller bus address into - * a Linux virtual address. The shared page frame is a linear block - * so we simply have to shift the offset. This function does not - * work for sender side messages as they are ioremap objects - * provided by the I2O controller. - */ -static inline struct i2o_message *i2o_msg_out_to_virt(struct i2o_controller *c, - u32 m) +static inline void *i2o_cntxt_list_get(struct i2o_controller *c, u32 context) { - BUG_ON(m < c->out_queue.phys - || m >= c->out_queue.phys + c->out_queue.len); + return (void *)context; +}; - return c->out_queue.virt + (m - c->out_queue.phys); +static inline u32 i2o_cntxt_list_remove(struct i2o_controller *c, void *ptr) +{ + return (u32) ptr; }; -/** - * i2o_msg_in_to_virt - Turn an I2O message to a virtual address - * @c: controller - * @m: message engine value +static inline u32 i2o_cntxt_list_get_ptr(struct i2o_controller *c, void *ptr) +{ + return (u32) ptr; +}; + +static inline u32 i2o_ptr_low(void *ptr) +{ + return (u32) ptr; +}; + +static inline u32 i2o_ptr_high(void *ptr) +{ + return 0; +}; + +static inline u32 i2o_dma_low(dma_addr_t dma_addr) +{ + return (u32) dma_addr; +}; + +static inline u32 i2o_dma_high(dma_addr_t dma_addr) +{ + return 0; +}; +#endif + +/** + * i2o_sg_tablesize - Calculate the maximum number of elements in a SGL + * @c: I2O controller for which the calculation should be done + * @body_size: maximum body size used for message in 32-bit words. * - * Turn a send message from an I2O controller bus address into - * a Linux virtual address. The shared page frame is a linear block - * so we simply have to shift the offset. This function does not - * work for receive side messages as they are kmalloc objects - * in a different pool. + * Return the maximum number of SG elements in a SG list. */ -static inline struct i2o_message __iomem *i2o_msg_in_to_virt(struct - i2o_controller *c, - u32 m) +static inline u16 i2o_sg_tablesize(struct i2o_controller *c, u16 body_size) { - return c->in_queue.virt + m; + i2o_status_block *sb = c->status_block.virt; + u16 sg_count = + (sb->inbound_frame_size - sizeof(struct i2o_message) / 4) - + body_size; + + if (c->pae_support) { + /* + * for 64-bit a SG attribute element must be added and each + * SG element needs 12 bytes instead of 8. + */ + sg_count -= 2; + sg_count /= 3; + } else + sg_count /= 2; + + if (c->short_req && (sg_count > 8)) + sg_count = 8; + + return sg_count; }; -/* - * Endian handling wrapped into the macro - keeps the core code - * cleaner. +/** + * i2o_dma_map_single - Map pointer to controller and fill in I2O message. + * @c: I2O controller + * @ptr: pointer to the data which should be mapped + * @size: size of data in bytes + * @direction: DMA_TO_DEVICE / DMA_FROM_DEVICE + * @sg_ptr: pointer to the SG list inside the I2O message + * + * This function does all necessary DMA handling and also writes the I2O + * SGL elements into the I2O message. For details on DMA handling see also + * dma_map_single(). The pointer sg_ptr will only be set to the end of the + * SG list if the allocation was successful. + * + * Returns DMA address which must be checked for failures using + * dma_mapping_error(). */ +static inline dma_addr_t i2o_dma_map_single(struct i2o_controller *c, void *ptr, + size_t size, + enum dma_data_direction direction, + u32 ** sg_ptr) +{ + u32 sg_flags; + u32 *mptr = *sg_ptr; + dma_addr_t dma_addr; -#define i2o_raw_writel(val, mem) __raw_writel(cpu_to_le32(val), mem) + switch (direction) { + case DMA_TO_DEVICE: + sg_flags = 0xd4000000; + break; + case DMA_FROM_DEVICE: + sg_flags = 0xd0000000; + break; + default: + return 0; + } -extern int i2o_parm_field_get(struct i2o_device *, int, int, void *, int); -extern int i2o_parm_table_get(struct i2o_device *, int, int, int, void *, int, - void *, int); + dma_addr = dma_map_single(&c->pdev->dev, ptr, size, direction); + if (!dma_mapping_error(dma_addr)) { +#ifdef CONFIG_I2O_EXT_ADAPTEC_DMA64 + if ((sizeof(dma_addr_t) > 4) && c->pae_support) { + *mptr++ = cpu_to_le32(0x7C020002); + *mptr++ = cpu_to_le32(PAGE_SIZE); + } +#endif -/* debugging and troubleshooting/diagnostic helpers. */ -#define osm_printk(level, format, arg...) \ - printk(level "%s: " format, OSM_NAME , ## arg) + *mptr++ = cpu_to_le32(sg_flags | size); + *mptr++ = cpu_to_le32(i2o_dma_low(dma_addr)); +#ifdef CONFIG_I2O_EXT_ADAPTEC_DMA64 + if ((sizeof(dma_addr_t) > 4) && c->pae_support) + *mptr++ = cpu_to_le32(i2o_dma_high(dma_addr)); +#endif + *sg_ptr = mptr; + } + return dma_addr; +}; -#ifdef DEBUG -#define osm_debug(format, arg...) \ - osm_printk(KERN_DEBUG, format , ## arg) -#else -#define osm_debug(format, arg...) \ - do { } while (0) +/** + * i2o_dma_map_sg - Map a SG List to controller and fill in I2O message. + * @c: I2O controller + * @sg: SG list to be mapped + * @sg_count: number of elements in the SG list + * @direction: DMA_TO_DEVICE / DMA_FROM_DEVICE + * @sg_ptr: pointer to the SG list inside the I2O message + * + * This function does all necessary DMA handling and also writes the I2O + * SGL elements into the I2O message. For details on DMA handling see also + * dma_map_sg(). The pointer sg_ptr will only be set to the end of the SG + * list if the allocation was successful. + * + * Returns 0 on failure or 1 on success. + */ +static inline int i2o_dma_map_sg(struct i2o_controller *c, + struct scatterlist *sg, int sg_count, + enum dma_data_direction direction, + u32 ** sg_ptr) +{ + u32 sg_flags; + u32 *mptr = *sg_ptr; + + switch (direction) { + case DMA_TO_DEVICE: + sg_flags = 0x14000000; + break; + case DMA_FROM_DEVICE: + sg_flags = 0x10000000; + break; + default: + return 0; + } + + sg_count = dma_map_sg(&c->pdev->dev, sg, sg_count, direction); + if (!sg_count) + return 0; + +#ifdef CONFIG_I2O_EXT_ADAPTEC_DMA64 + if ((sizeof(dma_addr_t) > 4) && c->pae_support) { + *mptr++ = cpu_to_le32(0x7C020002); + *mptr++ = cpu_to_le32(PAGE_SIZE); + } #endif -#define osm_err(format, arg...) \ - osm_printk(KERN_ERR, format , ## arg) -#define osm_info(format, arg...) \ - osm_printk(KERN_INFO, format , ## arg) -#define osm_warn(format, arg...) \ - osm_printk(KERN_WARNING, format , ## arg) + while (sg_count-- > 0) { + if (!sg_count) + sg_flags |= 0xC0000000; + *mptr++ = cpu_to_le32(sg_flags | sg_dma_len(sg)); + *mptr++ = cpu_to_le32(i2o_dma_low(sg_dma_address(sg))); +#ifdef CONFIG_I2O_EXT_ADAPTEC_DMA64 + if ((sizeof(dma_addr_t) > 4) && c->pae_support) + *mptr++ = cpu_to_le32(i2o_dma_high(sg_dma_address(sg))); +#endif + sg++; + } + *sg_ptr = mptr; -/* debugging functions */ -extern void i2o_report_status(const char *, const char *, struct i2o_message *); -extern void i2o_dump_message(struct i2o_message *); -extern void i2o_dump_hrt(struct i2o_controller *c); -extern void i2o_debug_state(struct i2o_controller *c); + return 1; +}; -/* - * Cache strategies +/** + * i2o_dma_alloc - Allocate DMA memory + * @dev: struct device pointer to the PCI device of the I2O controller + * @addr: i2o_dma struct which should get the DMA buffer + * @len: length of the new DMA memory + * @gfp_mask: GFP mask + * + * Allocate a coherent DMA memory and write the pointers into addr. + * + * Returns 0 on success or -ENOMEM on failure. */ +static inline int i2o_dma_alloc(struct device *dev, struct i2o_dma *addr, + size_t len, gfp_t gfp_mask) +{ + struct pci_dev *pdev = to_pci_dev(dev); + int dma_64 = 0; -/* The NULL strategy leaves everything up to the controller. This tends to be a - * pessimal but functional choice. - */ -#define CACHE_NULL 0 -/* Prefetch data when reading. We continually attempt to load the next 32 sectors - * into the controller cache. - */ -#define CACHE_PREFETCH 1 -/* Prefetch data when reading. We sometimes attempt to load the next 32 sectors - * into the controller cache. When an I/O is less <= 8K we assume its probably - * not sequential and don't prefetch (default) - */ -#define CACHE_SMARTFETCH 2 -/* Data is written to the cache and then out on to the disk. The I/O must be - * physically on the medium before the write is acknowledged (default without - * NVRAM) - */ -#define CACHE_WRITETHROUGH 17 -/* Data is written to the cache and then out on to the disk. The controller - * is permitted to write back the cache any way it wants. (default if battery - * backed NVRAM is present). It can be useful to set this for swap regardless of - * battery state. - */ -#define CACHE_WRITEBACK 18 -/* Optimise for under powered controllers, especially on RAID1 and RAID0. We - * write large I/O's directly to disk bypassing the cache to avoid the extra - * memory copy hits. Small writes are writeback cached - */ -#define CACHE_SMARTBACK 19 -/* Optimise for under powered controllers, especially on RAID1 and RAID0. We - * write large I/O's directly to disk bypassing the cache to avoid the extra - * memory copy hits. Small writes are writethrough cached. Suitable for devices - * lacking battery backup - */ -#define CACHE_SMARTTHROUGH 20 + if ((sizeof(dma_addr_t) > 4) && (pdev->dma_mask == DMA_64BIT_MASK)) { + dma_64 = 1; + if (pci_set_dma_mask(pdev, DMA_32BIT_MASK)) + return -ENOMEM; + } -/* - * Ioctl structures - */ + addr->virt = dma_alloc_coherent(dev, len, &addr->phys, gfp_mask); -#define BLKI2OGRSTRAT _IOR('2', 1, int) -#define BLKI2OGWSTRAT _IOR('2', 2, int) -#define BLKI2OSRSTRAT _IOW('2', 3, int) -#define BLKI2OSWSTRAT _IOW('2', 4, int) + if ((sizeof(dma_addr_t) > 4) && dma_64) + if (pci_set_dma_mask(pdev, DMA_64BIT_MASK)) + printk(KERN_WARNING "i2o: unable to set 64-bit DMA"); -/* - * I2O Function codes - */ + if (!addr->virt) + return -ENOMEM; -/* - * Executive Class - */ -#define I2O_CMD_ADAPTER_ASSIGN 0xB3 -#define I2O_CMD_ADAPTER_READ 0xB2 -#define I2O_CMD_ADAPTER_RELEASE 0xB5 -#define I2O_CMD_BIOS_INFO_SET 0xA5 -#define I2O_CMD_BOOT_DEVICE_SET 0xA7 -#define I2O_CMD_CONFIG_VALIDATE 0xBB -#define I2O_CMD_CONN_SETUP 0xCA -#define I2O_CMD_DDM_DESTROY 0xB1 -#define I2O_CMD_DDM_ENABLE 0xD5 -#define I2O_CMD_DDM_QUIESCE 0xC7 -#define I2O_CMD_DDM_RESET 0xD9 -#define I2O_CMD_DDM_SUSPEND 0xAF -#define I2O_CMD_DEVICE_ASSIGN 0xB7 -#define I2O_CMD_DEVICE_RELEASE 0xB9 -#define I2O_CMD_HRT_GET 0xA8 -#define I2O_CMD_ADAPTER_CLEAR 0xBE -#define I2O_CMD_ADAPTER_CONNECT 0xC9 -#define I2O_CMD_ADAPTER_RESET 0xBD -#define I2O_CMD_LCT_NOTIFY 0xA2 -#define I2O_CMD_OUTBOUND_INIT 0xA1 -#define I2O_CMD_PATH_ENABLE 0xD3 -#define I2O_CMD_PATH_QUIESCE 0xC5 -#define I2O_CMD_PATH_RESET 0xD7 -#define I2O_CMD_STATIC_MF_CREATE 0xDD -#define I2O_CMD_STATIC_MF_RELEASE 0xDF -#define I2O_CMD_STATUS_GET 0xA0 -#define I2O_CMD_SW_DOWNLOAD 0xA9 -#define I2O_CMD_SW_UPLOAD 0xAB -#define I2O_CMD_SW_REMOVE 0xAD -#define I2O_CMD_SYS_ENABLE 0xD1 -#define I2O_CMD_SYS_MODIFY 0xC1 -#define I2O_CMD_SYS_QUIESCE 0xC3 -#define I2O_CMD_SYS_TAB_SET 0xA3 + memset(addr->virt, 0, len); + addr->len = len; -/* - * Utility Class + return 0; +}; + +/** + * i2o_dma_free - Free DMA memory + * @dev: struct device pointer to the PCI device of the I2O controller + * @addr: i2o_dma struct which contains the DMA buffer + * + * Free a coherent DMA memory and set virtual address of addr to NULL. */ -#define I2O_CMD_UTIL_NOP 0x00 -#define I2O_CMD_UTIL_ABORT 0x01 -#define I2O_CMD_UTIL_CLAIM 0x09 -#define I2O_CMD_UTIL_RELEASE 0x0B -#define I2O_CMD_UTIL_PARAMS_GET 0x06 -#define I2O_CMD_UTIL_PARAMS_SET 0x05 -#define I2O_CMD_UTIL_EVT_REGISTER 0x13 -#define I2O_CMD_UTIL_EVT_ACK 0x14 -#define I2O_CMD_UTIL_CONFIG_DIALOG 0x10 -#define I2O_CMD_UTIL_DEVICE_RESERVE 0x0D -#define I2O_CMD_UTIL_DEVICE_RELEASE 0x0F -#define I2O_CMD_UTIL_LOCK 0x17 -#define I2O_CMD_UTIL_LOCK_RELEASE 0x19 -#define I2O_CMD_UTIL_REPLY_FAULT_NOTIFY 0x15 +static inline void i2o_dma_free(struct device *dev, struct i2o_dma *addr) +{ + if (addr->virt) { + if (addr->phys) + dma_free_coherent(dev, addr->len, addr->virt, + addr->phys); + else + kfree(addr->virt); + addr->virt = NULL; + } +}; -/* - * SCSI Host Bus Adapter Class +/** + * i2o_dma_realloc - Realloc DMA memory + * @dev: struct device pointer to the PCI device of the I2O controller + * @addr: pointer to a i2o_dma struct DMA buffer + * @len: new length of memory + * @gfp_mask: GFP mask + * + * If there was something allocated in the addr, free it first. If len > 0 + * than try to allocate it and write the addresses back to the addr + * structure. If len == 0 set the virtual address to NULL. + * + * Returns the 0 on success or negative error code on failure. */ -#define I2O_CMD_SCSI_EXEC 0x81 -#define I2O_CMD_SCSI_ABORT 0x83 -#define I2O_CMD_SCSI_BUSRESET 0x27 +static inline int i2o_dma_realloc(struct device *dev, struct i2o_dma *addr, + size_t len, gfp_t gfp_mask) +{ + i2o_dma_free(dev, addr); + + if (len) + return i2o_dma_alloc(dev, addr, len, gfp_mask); + + return 0; +}; /* - * Bus Adapter Class + * i2o_pool_alloc - Allocate an slab cache and mempool + * @mempool: pointer to struct i2o_pool to write data into. + * @name: name which is used to identify cache + * @size: size of each object + * @min_nr: minimum number of objects + * + * First allocates a slab cache with name and size. Then allocates a + * mempool which uses the slab cache for allocation and freeing. + * + * Returns 0 on success or negative error code on failure. */ -#define I2O_CMD_BUS_ADAPTER_RESET 0x85 -#define I2O_CMD_BUS_RESET 0x87 -#define I2O_CMD_BUS_SCAN 0x89 -#define I2O_CMD_BUS_QUIESCE 0x8b +static inline int i2o_pool_alloc(struct i2o_pool *pool, const char *name, + size_t size, int min_nr) +{ + pool->name = kmalloc(strlen(name) + 1, GFP_KERNEL); + if (!pool->name) + goto exit; + strcpy(pool->name, name); + + pool->slab = + kmem_cache_create(pool->name, size, 0, SLAB_HWCACHE_ALIGN, NULL, + NULL); + if (!pool->slab) + goto free_name; + + pool->mempool = + mempool_create(min_nr, mempool_alloc_slab, mempool_free_slab, + pool->slab); + if (!pool->mempool) + goto free_slab; + + return 0; + + free_slab: + kmem_cache_destroy(pool->slab); + + free_name: + kfree(pool->name); + + exit: + return -ENOMEM; +}; /* - * Random Block Storage Class + * i2o_pool_free - Free slab cache and mempool again + * @mempool: pointer to struct i2o_pool which should be freed + * + * Note that you have to return all objects to the mempool again before + * calling i2o_pool_free(). */ -#define I2O_CMD_BLOCK_READ 0x30 -#define I2O_CMD_BLOCK_WRITE 0x31 -#define I2O_CMD_BLOCK_CFLUSH 0x37 -#define I2O_CMD_BLOCK_MLOCK 0x49 -#define I2O_CMD_BLOCK_MUNLOCK 0x4B -#define I2O_CMD_BLOCK_MMOUNT 0x41 -#define I2O_CMD_BLOCK_MEJECT 0x43 -#define I2O_CMD_BLOCK_POWER 0x70 - -#define I2O_CMD_PRIVATE 0xFF +static inline void i2o_pool_free(struct i2o_pool *pool) +{ + mempool_destroy(pool->mempool); + kmem_cache_destroy(pool->slab); + kfree(pool->name); +}; -/* Command status values */ +/* I2O driver (OSM) functions */ +extern int i2o_driver_register(struct i2o_driver *); +extern void i2o_driver_unregister(struct i2o_driver *); -#define I2O_CMD_IN_PROGRESS 0x01 -#define I2O_CMD_REJECTED 0x02 -#define I2O_CMD_FAILED 0x03 -#define I2O_CMD_COMPLETED 0x04 +/** + * i2o_driver_notify_controller_add - Send notification of added controller + * to a single I2O driver + * + * Send notification of added controller to a single registered driver. + */ +static inline void i2o_driver_notify_controller_add(struct i2o_driver *drv, + struct i2o_controller *c) +{ + if (drv->notify_controller_add) + drv->notify_controller_add(c); +}; -/* I2O API function return values */ +/** + * i2o_driver_notify_controller_remove - Send notification of removed + * controller to a single I2O driver + * + * Send notification of removed controller to a single registered driver. + */ +static inline void i2o_driver_notify_controller_remove(struct i2o_driver *drv, + struct i2o_controller *c) +{ + if (drv->notify_controller_remove) + drv->notify_controller_remove(c); +}; -#define I2O_RTN_NO_ERROR 0 -#define I2O_RTN_NOT_INIT 1 -#define I2O_RTN_FREE_Q_EMPTY 2 -#define I2O_RTN_TCB_ERROR 3 -#define I2O_RTN_TRANSACTION_ERROR 4 -#define I2O_RTN_ADAPTER_ALREADY_INIT 5 -#define I2O_RTN_MALLOC_ERROR 6 -#define I2O_RTN_ADPTR_NOT_REGISTERED 7 -#define I2O_RTN_MSG_REPLY_TIMEOUT 8 -#define I2O_RTN_NO_STATUS 9 -#define I2O_RTN_NO_FIRM_VER 10 -#define I2O_RTN_NO_LINK_SPEED 11 +/** + * i2o_driver_notify_device_add - Send notification of added device to a + * single I2O driver + * + * Send notification of added device to a single registered driver. + */ +static inline void i2o_driver_notify_device_add(struct i2o_driver *drv, + struct i2o_device *i2o_dev) +{ + if (drv->notify_device_add) + drv->notify_device_add(i2o_dev); +}; -/* Reply message status defines for all messages */ +/** + * i2o_driver_notify_device_remove - Send notification of removed device + * to a single I2O driver + * + * Send notification of removed device to a single registered driver. + */ +static inline void i2o_driver_notify_device_remove(struct i2o_driver *drv, + struct i2o_device *i2o_dev) +{ + if (drv->notify_device_remove) + drv->notify_device_remove(i2o_dev); +}; -#define I2O_REPLY_STATUS_SUCCESS 0x00 -#define I2O_REPLY_STATUS_ABORT_DIRTY 0x01 -#define I2O_REPLY_STATUS_ABORT_NO_DATA_TRANSFER 0x02 -#define I2O_REPLY_STATUS_ABORT_PARTIAL_TRANSFER 0x03 -#define I2O_REPLY_STATUS_ERROR_DIRTY 0x04 -#define I2O_REPLY_STATUS_ERROR_NO_DATA_TRANSFER 0x05 -#define I2O_REPLY_STATUS_ERROR_PARTIAL_TRANSFER 0x06 -#define I2O_REPLY_STATUS_PROCESS_ABORT_DIRTY 0x08 -#define I2O_REPLY_STATUS_PROCESS_ABORT_NO_DATA_TRANSFER 0x09 -#define I2O_REPLY_STATUS_PROCESS_ABORT_PARTIAL_TRANSFER 0x0A -#define I2O_REPLY_STATUS_TRANSACTION_ERROR 0x0B -#define I2O_REPLY_STATUS_PROGRESS_REPORT 0x80 +extern void i2o_driver_notify_controller_add_all(struct i2o_controller *); +extern void i2o_driver_notify_controller_remove_all(struct i2o_controller *); +extern void i2o_driver_notify_device_add_all(struct i2o_device *); +extern void i2o_driver_notify_device_remove_all(struct i2o_device *); -/* Status codes and Error Information for Parameter functions */ +/* I2O device functions */ +extern int i2o_device_claim(struct i2o_device *); +extern int i2o_device_claim_release(struct i2o_device *); -#define I2O_PARAMS_STATUS_SUCCESS 0x00 -#define I2O_PARAMS_STATUS_BAD_KEY_ABORT 0x01 -#define I2O_PARAMS_STATUS_BAD_KEY_CONTINUE 0x02 -#define I2O_PARAMS_STATUS_BUFFER_FULL 0x03 -#define I2O_PARAMS_STATUS_BUFFER_TOO_SMALL 0x04 -#define I2O_PARAMS_STATUS_FIELD_UNREADABLE 0x05 -#define I2O_PARAMS_STATUS_FIELD_UNWRITEABLE 0x06 -#define I2O_PARAMS_STATUS_INSUFFICIENT_FIELDS 0x07 -#define I2O_PARAMS_STATUS_INVALID_GROUP_ID 0x08 -#define I2O_PARAMS_STATUS_INVALID_OPERATION 0x09 -#define I2O_PARAMS_STATUS_NO_KEY_FIELD 0x0A -#define I2O_PARAMS_STATUS_NO_SUCH_FIELD 0x0B -#define I2O_PARAMS_STATUS_NON_DYNAMIC_GROUP 0x0C -#define I2O_PARAMS_STATUS_OPERATION_ERROR 0x0D -#define I2O_PARAMS_STATUS_SCALAR_ERROR 0x0E -#define I2O_PARAMS_STATUS_TABLE_ERROR 0x0F -#define I2O_PARAMS_STATUS_WRONG_GROUP_TYPE 0x10 +/* Exec OSM functions */ +extern int i2o_exec_lct_get(struct i2o_controller *); -/* DetailedStatusCode defines for Executive, DDM, Util and Transaction error - * messages: Table 3-2 Detailed Status Codes.*/ +/* device / driver / kobject conversion functions */ +#define to_i2o_driver(drv) container_of(drv,struct i2o_driver, driver) +#define to_i2o_device(dev) container_of(dev, struct i2o_device, device) +#define to_i2o_controller(dev) container_of(dev, struct i2o_controller, device) +#define kobj_to_i2o_device(kobj) to_i2o_device(container_of(kobj, struct device, kobj)) -#define I2O_DSC_SUCCESS 0x0000 -#define I2O_DSC_BAD_KEY 0x0002 -#define I2O_DSC_TCL_ERROR 0x0003 -#define I2O_DSC_REPLY_BUFFER_FULL 0x0004 -#define I2O_DSC_NO_SUCH_PAGE 0x0005 -#define I2O_DSC_INSUFFICIENT_RESOURCE_SOFT 0x0006 -#define I2O_DSC_INSUFFICIENT_RESOURCE_HARD 0x0007 -#define I2O_DSC_CHAIN_BUFFER_TOO_LARGE 0x0009 -#define I2O_DSC_UNSUPPORTED_FUNCTION 0x000A -#define I2O_DSC_DEVICE_LOCKED 0x000B -#define I2O_DSC_DEVICE_RESET 0x000C -#define I2O_DSC_INAPPROPRIATE_FUNCTION 0x000D -#define I2O_DSC_INVALID_INITIATOR_ADDRESS 0x000E -#define I2O_DSC_INVALID_MESSAGE_FLAGS 0x000F -#define I2O_DSC_INVALID_OFFSET 0x0010 -#define I2O_DSC_INVALID_PARAMETER 0x0011 -#define I2O_DSC_INVALID_REQUEST 0x0012 -#define I2O_DSC_INVALID_TARGET_ADDRESS 0x0013 -#define I2O_DSC_MESSAGE_TOO_LARGE 0x0014 -#define I2O_DSC_MESSAGE_TOO_SMALL 0x0015 -#define I2O_DSC_MISSING_PARAMETER 0x0016 -#define I2O_DSC_TIMEOUT 0x0017 -#define I2O_DSC_UNKNOWN_ERROR 0x0018 -#define I2O_DSC_UNKNOWN_FUNCTION 0x0019 -#define I2O_DSC_UNSUPPORTED_VERSION 0x001A -#define I2O_DSC_DEVICE_BUSY 0x001B -#define I2O_DSC_DEVICE_NOT_AVAILABLE 0x001C +/** + * i2o_out_to_virt - Turn an I2O message to a virtual address + * @c: controller + * @m: message engine value + * + * Turn a receive message from an I2O controller bus address into + * a Linux virtual address. The shared page frame is a linear block + * so we simply have to shift the offset. This function does not + * work for sender side messages as they are ioremap objects + * provided by the I2O controller. + */ +static inline struct i2o_message *i2o_msg_out_to_virt(struct i2o_controller *c, + u32 m) +{ + BUG_ON(m < c->out_queue.phys + || m >= c->out_queue.phys + c->out_queue.len); -/* DetailedStatusCode defines for Block Storage Operation: Table 6-7 Detailed - Status Codes.*/ + return c->out_queue.virt + (m - c->out_queue.phys); +}; -#define I2O_BSA_DSC_SUCCESS 0x0000 -#define I2O_BSA_DSC_MEDIA_ERROR 0x0001 -#define I2O_BSA_DSC_ACCESS_ERROR 0x0002 -#define I2O_BSA_DSC_DEVICE_FAILURE 0x0003 -#define I2O_BSA_DSC_DEVICE_NOT_READY 0x0004 -#define I2O_BSA_DSC_MEDIA_NOT_PRESENT 0x0005 -#define I2O_BSA_DSC_MEDIA_LOCKED 0x0006 -#define I2O_BSA_DSC_MEDIA_FAILURE 0x0007 -#define I2O_BSA_DSC_PROTOCOL_FAILURE 0x0008 -#define I2O_BSA_DSC_BUS_FAILURE 0x0009 -#define I2O_BSA_DSC_ACCESS_VIOLATION 0x000A -#define I2O_BSA_DSC_WRITE_PROTECTED 0x000B -#define I2O_BSA_DSC_DEVICE_RESET 0x000C -#define I2O_BSA_DSC_VOLUME_CHANGED 0x000D -#define I2O_BSA_DSC_TIMEOUT 0x000E +/** + * i2o_msg_in_to_virt - Turn an I2O message to a virtual address + * @c: controller + * @m: message engine value + * + * Turn a send message from an I2O controller bus address into + * a Linux virtual address. The shared page frame is a linear block + * so we simply have to shift the offset. This function does not + * work for receive side messages as they are kmalloc objects + * in a different pool. + */ +static inline struct i2o_message __iomem *i2o_msg_in_to_virt(struct + i2o_controller *c, + u32 m) +{ + return c->in_queue.virt + m; +}; -/* FailureStatusCodes, Table 3-3 Message Failure Codes */ +/** + * i2o_msg_get - obtain an I2O message from the IOP + * @c: I2O controller + * + * This function tries to get a message frame. If no message frame is + * available do not wait until one is availabe (see also i2o_msg_get_wait). + * The returned pointer to the message frame is not in I/O memory, it is + * allocated from a mempool. But because a MFA is allocated from the + * controller too it is guaranteed that i2o_msg_post() will never fail. + * + * On a success a pointer to the message frame is returned. If the message + * queue is empty -EBUSY is returned and if no memory is available -ENOMEM + * is returned. + */ +static inline struct i2o_message *i2o_msg_get(struct i2o_controller *c) +{ + struct i2o_msg_mfa *mmsg = mempool_alloc(c->in_msg.mempool, GFP_ATOMIC); + if (!mmsg) + return ERR_PTR(-ENOMEM); + + mmsg->mfa = readl(c->in_port); + if (mmsg->mfa == I2O_QUEUE_EMPTY) { + mempool_free(mmsg, c->in_msg.mempool); + return ERR_PTR(-EBUSY); + } -#define I2O_FSC_TRANSPORT_SERVICE_SUSPENDED 0x81 -#define I2O_FSC_TRANSPORT_SERVICE_TERMINATED 0x82 -#define I2O_FSC_TRANSPORT_CONGESTION 0x83 -#define I2O_FSC_TRANSPORT_FAILURE 0x84 -#define I2O_FSC_TRANSPORT_STATE_ERROR 0x85 -#define I2O_FSC_TRANSPORT_TIME_OUT 0x86 -#define I2O_FSC_TRANSPORT_ROUTING_FAILURE 0x87 -#define I2O_FSC_TRANSPORT_INVALID_VERSION 0x88 -#define I2O_FSC_TRANSPORT_INVALID_OFFSET 0x89 -#define I2O_FSC_TRANSPORT_INVALID_MSG_FLAGS 0x8A -#define I2O_FSC_TRANSPORT_FRAME_TOO_SMALL 0x8B -#define I2O_FSC_TRANSPORT_FRAME_TOO_LARGE 0x8C -#define I2O_FSC_TRANSPORT_INVALID_TARGET_ID 0x8D -#define I2O_FSC_TRANSPORT_INVALID_INITIATOR_ID 0x8E -#define I2O_FSC_TRANSPORT_INVALID_INITIATOR_CONTEXT 0x8F -#define I2O_FSC_TRANSPORT_UNKNOWN_FAILURE 0xFF + return &mmsg->msg; +}; -/* Device Claim Types */ -#define I2O_CLAIM_PRIMARY 0x01000000 -#define I2O_CLAIM_MANAGEMENT 0x02000000 -#define I2O_CLAIM_AUTHORIZED 0x03000000 -#define I2O_CLAIM_SECONDARY 0x04000000 +/** + * i2o_msg_post - Post I2O message to I2O controller + * @c: I2O controller to which the message should be send + * @msg: message returned by i2o_msg_get() + * + * Post the message to the I2O controller and return immediately. + */ +static inline void i2o_msg_post(struct i2o_controller *c, + struct i2o_message *msg) +{ + struct i2o_msg_mfa *mmsg; -/* Message header defines for VersionOffset */ -#define I2OVER15 0x0001 -#define I2OVER20 0x0002 + mmsg = container_of(msg, struct i2o_msg_mfa, msg); + memcpy_toio(i2o_msg_in_to_virt(c, mmsg->mfa), msg, + (le32_to_cpu(msg->u.head[0]) >> 16) << 2); + writel(mmsg->mfa, c->in_port); + mempool_free(mmsg, c->in_msg.mempool); +}; -/* Default is 1.5 */ -#define I2OVERSION I2OVER15 +/** + * i2o_msg_post_wait - Post and wait a message and wait until return + * @c: controller + * @m: message to post + * @timeout: time in seconds to wait + * + * This API allows an OSM to post a message and then be told whether or + * not the system received a successful reply. If the message times out + * then the value '-ETIMEDOUT' is returned. + * + * Returns 0 on success or negative error code on failure. + */ +static inline int i2o_msg_post_wait(struct i2o_controller *c, + struct i2o_message *msg, + unsigned long timeout) +{ + return i2o_msg_post_wait_mem(c, msg, timeout, NULL); +}; -#define SGL_OFFSET_0 I2OVERSION -#define SGL_OFFSET_4 (0x0040 | I2OVERSION) -#define SGL_OFFSET_5 (0x0050 | I2OVERSION) -#define SGL_OFFSET_6 (0x0060 | I2OVERSION) -#define SGL_OFFSET_7 (0x0070 | I2OVERSION) -#define SGL_OFFSET_8 (0x0080 | I2OVERSION) -#define SGL_OFFSET_9 (0x0090 | I2OVERSION) -#define SGL_OFFSET_10 (0x00A0 | I2OVERSION) -#define SGL_OFFSET_11 (0x00B0 | I2OVERSION) -#define SGL_OFFSET_12 (0x00C0 | I2OVERSION) -#define SGL_OFFSET(x) (((x)<<4) | I2OVERSION) +/** + * i2o_msg_nop_mfa - Returns a fetched MFA back to the controller + * @c: I2O controller from which the MFA was fetched + * @mfa: MFA which should be returned + * + * This function must be used for preserved messages, because i2o_msg_nop() + * also returns the allocated memory back to the msg_pool mempool. + */ +static inline void i2o_msg_nop_mfa(struct i2o_controller *c, u32 mfa) +{ + struct i2o_message __iomem *msg; + u32 nop[3] = { + THREE_WORD_MSG_SIZE | SGL_OFFSET_0, + I2O_CMD_UTIL_NOP << 24 | HOST_TID << 12 | ADAPTER_TID, + 0x00000000 + }; + + msg = i2o_msg_in_to_virt(c, mfa); + memcpy_toio(msg, nop, sizeof(nop)); + writel(mfa, c->in_port); +}; -/* Transaction Reply Lists (TRL) Control Word structure */ -#define TRL_SINGLE_FIXED_LENGTH 0x00 -#define TRL_SINGLE_VARIABLE_LENGTH 0x40 -#define TRL_MULTIPLE_FIXED_LENGTH 0x80 +/** + * i2o_msg_nop - Returns a message which is not used + * @c: I2O controller from which the message was created + * @msg: message which should be returned + * + * If you fetch a message via i2o_msg_get, and can't use it, you must + * return the message with this function. Otherwise the MFA is lost as well + * as the allocated memory from the mempool. + */ +static inline void i2o_msg_nop(struct i2o_controller *c, + struct i2o_message *msg) +{ + struct i2o_msg_mfa *mmsg; + mmsg = container_of(msg, struct i2o_msg_mfa, msg); - /* msg header defines for MsgFlags */ -#define MSG_STATIC 0x0100 -#define MSG_64BIT_CNTXT 0x0200 -#define MSG_MULTI_TRANS 0x1000 -#define MSG_FAIL 0x2000 -#define MSG_FINAL 0x4000 -#define MSG_REPLY 0x8000 + i2o_msg_nop_mfa(c, mmsg->mfa); + mempool_free(mmsg, c->in_msg.mempool); +}; - /* minimum size msg */ -#define THREE_WORD_MSG_SIZE 0x00030000 -#define FOUR_WORD_MSG_SIZE 0x00040000 -#define FIVE_WORD_MSG_SIZE 0x00050000 -#define SIX_WORD_MSG_SIZE 0x00060000 -#define SEVEN_WORD_MSG_SIZE 0x00070000 -#define EIGHT_WORD_MSG_SIZE 0x00080000 -#define NINE_WORD_MSG_SIZE 0x00090000 -#define TEN_WORD_MSG_SIZE 0x000A0000 -#define ELEVEN_WORD_MSG_SIZE 0x000B0000 -#define I2O_MESSAGE_SIZE(x) ((x)<<16) +/** + * i2o_flush_reply - Flush reply from I2O controller + * @c: I2O controller + * @m: the message identifier + * + * The I2O controller must be informed that the reply message is not needed + * anymore. If you forget to flush the reply, the message frame can't be + * used by the controller anymore and is therefore lost. + */ +static inline void i2o_flush_reply(struct i2o_controller *c, u32 m) +{ + writel(m, c->out_port); +}; -/* special TID assignments */ -#define ADAPTER_TID 0 -#define HOST_TID 1 +/* + * Endian handling wrapped into the macro - keeps the core code + * cleaner. + */ -/* outbound queue defines */ -#define I2O_MAX_OUTBOUND_MSG_FRAMES 128 -#define I2O_OUTBOUND_MSG_FRAME_SIZE 128 /* in 32-bit words */ +#define i2o_raw_writel(val, mem) __raw_writel(cpu_to_le32(val), mem) -#define I2O_POST_WAIT_OK 0 -#define I2O_POST_WAIT_TIMEOUT -ETIMEDOUT +extern int i2o_parm_field_get(struct i2o_device *, int, int, void *, int); +extern int i2o_parm_table_get(struct i2o_device *, int, int, int, void *, int, + void *, int); -#define I2O_CONTEXT_LIST_MIN_LENGTH 15 -#define I2O_CONTEXT_LIST_USED 0x01 -#define I2O_CONTEXT_LIST_DELETED 0x02 +/* debugging and troubleshooting/diagnostic helpers. */ +#define osm_printk(level, format, arg...) \ + printk(level "%s: " format, OSM_NAME , ## arg) -/* timeouts */ -#define I2O_TIMEOUT_INIT_OUTBOUND_QUEUE 15 -#define I2O_TIMEOUT_MESSAGE_GET 5 -#define I2O_TIMEOUT_RESET 30 -#define I2O_TIMEOUT_STATUS_GET 5 -#define I2O_TIMEOUT_LCT_GET 360 -#define I2O_TIMEOUT_SCSI_SCB_ABORT 240 +#ifdef DEBUG +#define osm_debug(format, arg...) \ + osm_printk(KERN_DEBUG, format , ## arg) +#else +#define osm_debug(format, arg...) \ + do { } while (0) +#endif -/* retries */ -#define I2O_HRT_GET_TRIES 3 -#define I2O_LCT_GET_TRIES 3 +#define osm_err(format, arg...) \ + osm_printk(KERN_ERR, format , ## arg) +#define osm_info(format, arg...) \ + osm_printk(KERN_INFO, format , ## arg) +#define osm_warn(format, arg...) \ + osm_printk(KERN_WARNING, format , ## arg) -/* defines for max_sectors and max_phys_segments */ -#define I2O_MAX_SECTORS 1024 -#define I2O_MAX_SECTORS_LIMITED 256 -#define I2O_MAX_PHYS_SEGMENTS MAX_PHYS_SEGMENTS +/* debugging functions */ +extern void i2o_report_status(const char *, const char *, struct i2o_message *); +extern void i2o_dump_message(struct i2o_message *); +extern void i2o_dump_hrt(struct i2o_controller *c); +extern void i2o_debug_state(struct i2o_controller *c); #endif /* __KERNEL__ */ #endif /* _I2O_H */ -- cgit v1.2.3-71-gd317 From 24791bd48f643194d806654b587251b0f92233e8 Mon Sep 17 00:00:00 2001 From: Markus Lidel Date: Fri, 6 Jan 2006 00:19:31 -0800 Subject: [PATCH] I2O: Remove wrong I2O device class Removed wrong I2O device class, which was only needed to add sysfs attributes. Signed-off-by: Markus Lidel Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- drivers/message/i2o/core.h | 2 + drivers/message/i2o/device.c | 144 ++++++++++++++++++------------------------- drivers/message/i2o/driver.c | 2 - drivers/message/i2o/iop.c | 34 ++-------- include/linux/i2o.h | 1 - 5 files changed, 68 insertions(+), 115 deletions(-) (limited to 'include/linux') diff --git a/drivers/message/i2o/core.h b/drivers/message/i2o/core.h index 9eefedb16211..9aa9b91170b2 100644 --- a/drivers/message/i2o/core.h +++ b/drivers/message/i2o/core.h @@ -33,6 +33,8 @@ extern int __init i2o_pci_init(void); extern void __exit i2o_pci_exit(void); /* device */ +extern struct device_attribute i2o_device_attrs[]; + extern void i2o_device_remove(struct i2o_device *); extern int i2o_device_parse_lct(struct i2o_controller *); diff --git a/drivers/message/i2o/device.c b/drivers/message/i2o/device.c index 1db26215937f..a5e260b7a3b6 100644 --- a/drivers/message/i2o/device.c +++ b/drivers/message/i2o/device.c @@ -142,8 +142,9 @@ static void i2o_device_release(struct device *dev) /** - * i2o_device_class_show_class_id - Displays class id of I2O device - * @cd: class device of which the class id should be displayed + * i2o_device_show_class_id - Displays class id of I2O device + * @dev: device of which the class id should be displayed + * @attr: pointer to device attribute * @buf: buffer into which the class id should be printed * * Returns the number of bytes which are printed into the buffer. @@ -159,15 +160,15 @@ static ssize_t i2o_device_show_class_id(struct device *dev, } /** - * i2o_device_class_show_tid - Displays TID of I2O device - * @cd: class device of which the TID should be displayed - * @buf: buffer into which the class id should be printed + * i2o_device_show_tid - Displays TID of I2O device + * @dev: device of which the TID should be displayed + * @attr: pointer to device attribute + * @buf: buffer into which the TID should be printed * * Returns the number of bytes which are printed into the buffer. */ static ssize_t i2o_device_show_tid(struct device *dev, - struct device_attribute *attr, - char *buf) + struct device_attribute *attr, char *buf) { struct i2o_device *i2o_dev = to_i2o_device(dev); @@ -208,66 +209,6 @@ static struct i2o_device *i2o_device_alloc(void) return dev; } -/** - * i2o_setup_sysfs_links - Adds attributes to the I2O device - * @cd: I2O class device which is added to the I2O device class - * - * This function get called when a I2O device is added to the class. It - * creates the attributes for each device and creates user/parent symlink - * if necessary. - * - * Returns 0 on success or negative error code on failure. - */ -static void i2o_setup_sysfs_links(struct i2o_device *i2o_dev) -{ - struct i2o_controller *c = i2o_dev->iop; - struct i2o_device *tmp; - - /* create user entries for this device */ - tmp = i2o_iop_find_device(i2o_dev->iop, i2o_dev->lct_data.user_tid); - if (tmp && tmp != i2o_dev) - sysfs_create_link(&i2o_dev->device.kobj, - &tmp->device.kobj, "user"); - - /* create user entries refering to this device */ - list_for_each_entry(tmp, &c->devices, list) - if (tmp->lct_data.user_tid == i2o_dev->lct_data.tid && - tmp != i2o_dev) - sysfs_create_link(&tmp->device.kobj, - &i2o_dev->device.kobj, "user"); - - /* create parent entries for this device */ - tmp = i2o_iop_find_device(i2o_dev->iop, i2o_dev->lct_data.parent_tid); - if (tmp && tmp != i2o_dev) - sysfs_create_link(&i2o_dev->device.kobj, - &tmp->device.kobj, "parent"); - - /* create parent entries refering to this device */ - list_for_each_entry(tmp, &c->devices, list) - if (tmp->lct_data.parent_tid == i2o_dev->lct_data.tid && - tmp != i2o_dev) - sysfs_create_link(&tmp->device.kobj, - &i2o_dev->device.kobj, "parent"); -} - -static void i2o_remove_sysfs_links(struct i2o_device *i2o_dev) -{ - struct i2o_controller *c = i2o_dev->iop; - struct i2o_device *tmp; - - sysfs_remove_link(&i2o_dev->device.kobj, "parent"); - sysfs_remove_link(&i2o_dev->device.kobj, "user"); - - list_for_each_entry(tmp, &c->devices, list) { - if (tmp->lct_data.parent_tid == i2o_dev->lct_data.tid) - sysfs_remove_link(&tmp->device.kobj, "parent"); - if (tmp->lct_data.user_tid == i2o_dev->lct_data.tid) - sysfs_remove_link(&tmp->device.kobj, "user"); - } -} - - - /** * i2o_device_add - allocate a new I2O device and add it to the IOP * @iop: I2O controller where the device is on @@ -282,33 +223,57 @@ static void i2o_remove_sysfs_links(struct i2o_device *i2o_dev) static struct i2o_device *i2o_device_add(struct i2o_controller *c, i2o_lct_entry * entry) { - struct i2o_device *dev; + struct i2o_device *i2o_dev, *tmp; - dev = i2o_device_alloc(); - if (IS_ERR(dev)) { + i2o_dev = i2o_device_alloc(); + if (IS_ERR(i2o_dev)) { printk(KERN_ERR "i2o: unable to allocate i2o device\n"); - return dev; + return i2o_dev; } - dev->lct_data = *entry; - dev->iop = c; + i2o_dev->lct_data = *entry; - snprintf(dev->device.bus_id, BUS_ID_SIZE, "%d:%03x", c->unit, - dev->lct_data.tid); + snprintf(i2o_dev->device.bus_id, BUS_ID_SIZE, "%d:%03x", c->unit, + i2o_dev->lct_data.tid); - dev->device.parent = &c->device; + i2o_dev->iop = c; + i2o_dev->device.parent = &c->device; - device_register(&dev->device); + device_register(&i2o_dev->device); - list_add_tail(&dev->list, &c->devices); + list_add_tail(&i2o_dev->list, &c->devices); - i2o_setup_sysfs_links(dev); + /* create user entries for this device */ + tmp = i2o_iop_find_device(i2o_dev->iop, i2o_dev->lct_data.user_tid); + if (tmp && (tmp != i2o_dev)) + sysfs_create_link(&i2o_dev->device.kobj, &tmp->device.kobj, + "user"); - i2o_driver_notify_device_add_all(dev); + /* create user entries refering to this device */ + list_for_each_entry(tmp, &c->devices, list) + if ((tmp->lct_data.user_tid == i2o_dev->lct_data.tid) + && (tmp != i2o_dev)) + sysfs_create_link(&tmp->device.kobj, + &i2o_dev->device.kobj, "user"); - pr_debug("i2o: device %s added\n", dev->device.bus_id); + /* create parent entries for this device */ + tmp = i2o_iop_find_device(i2o_dev->iop, i2o_dev->lct_data.parent_tid); + if (tmp && (tmp != i2o_dev)) + sysfs_create_link(&i2o_dev->device.kobj, &tmp->device.kobj, + "parent"); - return dev; + /* create parent entries refering to this device */ + list_for_each_entry(tmp, &c->devices, list) + if ((tmp->lct_data.parent_tid == i2o_dev->lct_data.tid) + && (tmp != i2o_dev)) + sysfs_create_link(&tmp->device.kobj, + &i2o_dev->device.kobj, "parent"); + + i2o_driver_notify_device_add_all(i2o_dev); + + pr_debug("i2o: device %s added\n", i2o_dev->device.bus_id); + + return i2o_dev; } /** @@ -321,9 +286,22 @@ static struct i2o_device *i2o_device_add(struct i2o_controller *c, */ void i2o_device_remove(struct i2o_device *i2o_dev) { + struct i2o_device *tmp; + struct i2o_controller *c = i2o_dev->iop; + i2o_driver_notify_device_remove_all(i2o_dev); - i2o_remove_sysfs_links(i2o_dev); + + sysfs_remove_link(&i2o_dev->device.kobj, "parent"); + sysfs_remove_link(&i2o_dev->device.kobj, "user"); + + list_for_each_entry(tmp, &c->devices, list) { + if (tmp->lct_data.parent_tid == i2o_dev->lct_data.tid) + sysfs_remove_link(&tmp->device.kobj, "parent"); + if (tmp->lct_data.user_tid == i2o_dev->lct_data.tid) + sysfs_remove_link(&tmp->device.kobj, "user"); + } list_del(&i2o_dev->list); + device_unregister(&i2o_dev->device); } diff --git a/drivers/message/i2o/driver.c b/drivers/message/i2o/driver.c index 0fb9c4e2ad4c..25292b36e2d9 100644 --- a/drivers/message/i2o/driver.c +++ b/drivers/message/i2o/driver.c @@ -61,8 +61,6 @@ static int i2o_bus_match(struct device *dev, struct device_driver *drv) }; /* I2O bus type */ -extern struct device_attribute i2o_device_attrs[]; - struct bus_type i2o_bus_type = { .name = "i2o", .match = i2o_bus_match, diff --git a/drivers/message/i2o/iop.c b/drivers/message/i2o/iop.c index f86abb42bf89..7411a0504bf2 100644 --- a/drivers/message/i2o/iop.c +++ b/drivers/message/i2o/iop.c @@ -806,7 +806,6 @@ void i2o_iop_remove(struct i2o_controller *c) list_for_each_entry_safe(dev, tmp, &c->devices, list) i2o_device_remove(dev); - class_device_unregister(c->classdev); device_del(&c->device); /* Ask the IOP to switch to RESET state */ @@ -1050,9 +1049,6 @@ static void i2o_iop_release(struct device *dev) i2o_iop_free(c); }; -/* I2O controller class */ -static struct class *i2o_controller_class; - /** * i2o_iop_alloc - Allocate and initialize a i2o_controller struct * @@ -1124,36 +1120,29 @@ int i2o_iop_add(struct i2o_controller *c) goto iop_reset; } - c->classdev = class_device_create(i2o_controller_class, NULL, MKDEV(0,0), - &c->device, "iop%d", c->unit); - if (IS_ERR(c->classdev)) { - osm_err("%s: could not add controller class\n", c->name); - goto device_del; - } - osm_info("%s: Activating I2O controller...\n", c->name); osm_info("%s: This may take a few minutes if there are many devices\n", c->name); if ((rc = i2o_iop_activate(c))) { osm_err("%s: could not activate controller\n", c->name); - goto class_del; + goto device_del; } osm_debug("%s: building sys table...\n", c->name); if ((rc = i2o_systab_build())) - goto class_del; + goto device_del; osm_debug("%s: online controller...\n", c->name); if ((rc = i2o_iop_online(c))) - goto class_del; + goto device_del; osm_debug("%s: getting LCT...\n", c->name); if ((rc = i2o_exec_lct_get(c))) - goto class_del; + goto device_del; list_add(&c->list, &i2o_controllers); @@ -1163,9 +1152,6 @@ int i2o_iop_add(struct i2o_controller *c) return 0; - class_del: - class_device_unregister(c->classdev); - device_del: device_del(&c->device); @@ -1225,14 +1211,8 @@ static int __init i2o_iop_init(void) printk(KERN_INFO OSM_DESCRIPTION " v" OSM_VERSION "\n"); - i2o_controller_class = class_create(THIS_MODULE, "i2o_controller"); - if (IS_ERR(i2o_controller_class)) { - osm_err("can't register class i2o_controller\n"); - goto exit; - } - if ((rc = i2o_driver_init())) - goto class_exit; + goto exit; if ((rc = i2o_exec_init())) goto driver_exit; @@ -1248,9 +1228,6 @@ static int __init i2o_iop_init(void) driver_exit: i2o_driver_exit(); - class_exit: - class_destroy(i2o_controller_class); - exit: return rc; } @@ -1265,7 +1242,6 @@ static void __exit i2o_iop_exit(void) i2o_pci_exit(); i2o_exec_exit(); i2o_driver_exit(); - class_destroy(i2o_controller_class); }; module_init(i2o_iop_init); diff --git a/include/linux/i2o.h b/include/linux/i2o.h index 9e359a981221..4c18b7711bd9 100644 --- a/include/linux/i2o.h +++ b/include/linux/i2o.h @@ -561,7 +561,6 @@ struct i2o_controller { struct resource mem_resource; /* Mem resource allocated to the IOP */ struct device device; - struct class_device *classdev; /* I2O controller class device */ struct i2o_device *exec; /* Executive */ #if BITS_PER_LONG == 64 spinlock_t context_list_lock; /* lock for context_list */ -- cgit v1.2.3-71-gd317 From dcceafe25a5f47cf69e5b46b4da6f15186ec8386 Mon Sep 17 00:00:00 2001 From: Markus Lidel Date: Fri, 6 Jan 2006 00:19:32 -0800 Subject: [PATCH] I2O: Bugfixes - Removed some kmalloc's with __GFP_ZERO and replace it with memset() because it didn't work properly. - Fixed returned message frame in i2o_cfg_passthru() which caused raidutils to display wrong error message in case a disk was missing. - Fixed size of printk() in i2o_scsi.c. - Fixed get_device() and put_device() in probing of the I2O controller. Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- drivers/message/i2o/driver.c | 5 +++-- drivers/message/i2o/i2o_config.c | 29 ++++++++++++++--------------- drivers/message/i2o/i2o_scsi.c | 4 ++-- drivers/message/i2o/pci.c | 6 +----- include/linux/i2o.h | 2 +- 5 files changed, 21 insertions(+), 25 deletions(-) (limited to 'include/linux') diff --git a/drivers/message/i2o/driver.c b/drivers/message/i2o/driver.c index 25292b36e2d9..9c631c873dc6 100644 --- a/drivers/message/i2o/driver.c +++ b/drivers/message/i2o/driver.c @@ -217,14 +217,15 @@ int i2o_driver_dispatch(struct i2o_controller *c, u32 m) /* cut of header from message size (in 32-bit words) */ size = (le32_to_cpu(msg->u.head[0]) >> 16) - 5; - evt = kmalloc(size * 4 + sizeof(*evt), GFP_ATOMIC | __GFP_ZERO); + evt = kmalloc(size * 4 + sizeof(*evt), GFP_ATOMIC); if (!evt) return -ENOMEM; + memset(evt, 0, size * 4 + sizeof(*evt)); evt->size = size; evt->tcntxt = le32_to_cpu(msg->u.s.tcntxt); evt->event_indicator = le32_to_cpu(msg->body[0]); - memcpy(&evt->tcntxt, &msg->u.s.tcntxt, size * 4); + memcpy(&evt->data, &msg->body[1], size * 4); list_for_each_entry_safe(dev, tmp, &c->devices, list) if (dev->lct_data.tid == tid) { diff --git a/drivers/message/i2o/i2o_config.c b/drivers/message/i2o/i2o_config.c index 4fe73d628c5b..286fef3240c4 100644 --- a/drivers/message/i2o/i2o_config.c +++ b/drivers/message/i2o/i2o_config.c @@ -36,12 +36,12 @@ #include -#include "core.h" - #define SG_TABLESIZE 30 -static int i2o_cfg_ioctl(struct inode *inode, struct file *fp, unsigned int cmd, - unsigned long arg); +extern int i2o_parm_issue(struct i2o_device *, int, void *, int, void *, int); + +static int i2o_cfg_ioctl(struct inode *, struct file *, unsigned int, + unsigned long); static spinlock_t i2o_config_lock; @@ -593,9 +593,6 @@ static int i2o_cfg_passthru32(struct file *file, unsigned cmnd, sg_offset = (msg->u.head[0] >> 4) & 0x0f; - msg->u.s.icntxt = cpu_to_le32(i2o_config_driver.context); - msg->u.s.tcntxt = cpu_to_le32(i2o_cntxt_list_add(c, reply)); - memset(sg_list, 0, sizeof(sg_list[0]) * SG_TABLESIZE); if (sg_offset) { struct sg_simple_element *sg; @@ -629,7 +626,7 @@ static int i2o_cfg_passthru32(struct file *file, unsigned cmnd, goto cleanup; } sg_size = sg[i].flag_count & 0xffffff; - p = &(sg_list[sg_index++]); + p = &(sg_list[sg_index]); /* Allocate memory for the transfer */ if (i2o_dma_alloc (&c->pdev->dev, p, sg_size, @@ -640,6 +637,7 @@ static int i2o_cfg_passthru32(struct file *file, unsigned cmnd, rcode = -ENOMEM; goto sg_list_cleanup; } + sg_index++; /* Copy in the user's SG buffer if necessary */ if (sg[i]. flag_count & 0x04000000 /*I2O_SGL_FLAGS_DIR */ ) { @@ -661,8 +659,10 @@ static int i2o_cfg_passthru32(struct file *file, unsigned cmnd, } rcode = i2o_msg_post_wait(c, msg, 60); - if (rcode) + if (rcode) { + reply[4] = ((u32) rcode) << 24; goto sg_list_cleanup; + } if (sg_offset) { u32 msg[I2O_OUTBOUND_MSG_FRAME_SIZE]; @@ -712,6 +712,7 @@ static int i2o_cfg_passthru32(struct file *file, unsigned cmnd, } } + sg_list_cleanup: /* Copy back the reply to user space */ if (reply_size) { // we wrote our own values for context - now restore the user supplied ones @@ -729,7 +730,6 @@ static int i2o_cfg_passthru32(struct file *file, unsigned cmnd, } } - sg_list_cleanup: for (i = 0; i < sg_index; i++) i2o_dma_free(&c->pdev->dev, &sg_list[i]); @@ -827,9 +827,6 @@ static int i2o_cfg_passthru(unsigned long arg) sg_offset = (msg->u.head[0] >> 4) & 0x0f; - msg->u.s.icntxt = cpu_to_le32(i2o_config_driver.context); - msg->u.s.tcntxt = cpu_to_le32(i2o_cntxt_list_add(c, reply)); - memset(sg_list, 0, sizeof(sg_list[0]) * SG_TABLESIZE); if (sg_offset) { struct sg_simple_element *sg; @@ -892,8 +889,10 @@ static int i2o_cfg_passthru(unsigned long arg) } rcode = i2o_msg_post_wait(c, msg, 60); - if (rcode) + if (rcode) { + reply[4] = ((u32) rcode) << 24; goto sg_list_cleanup; + } if (sg_offset) { u32 msg[128]; @@ -943,6 +942,7 @@ static int i2o_cfg_passthru(unsigned long arg) } } + sg_list_cleanup: /* Copy back the reply to user space */ if (reply_size) { // we wrote our own values for context - now restore the user supplied ones @@ -959,7 +959,6 @@ static int i2o_cfg_passthru(unsigned long arg) } } - sg_list_cleanup: for (i = 0; i < sg_index; i++) kfree(sg_list[i]); diff --git a/drivers/message/i2o/i2o_scsi.c b/drivers/message/i2o/i2o_scsi.c index 24061dfd46e4..76b9516b1934 100644 --- a/drivers/message/i2o/i2o_scsi.c +++ b/drivers/message/i2o/i2o_scsi.c @@ -309,9 +309,9 @@ static int i2o_scsi_probe(struct device *dev) sysfs_create_link(&i2o_dev->device.kobj, &scsi_dev->sdev_gendev.kobj, "scsi"); - osm_info("device added (TID: %03x) channel: %d, id: %d, lun: %d\n", + osm_info("device added (TID: %03x) channel: %d, id: %d, lun: %ld\n", i2o_dev->lct_data.tid, channel, le32_to_cpu(id), - (unsigned int)le64_to_cpu(lun)); + (long unsigned int)le64_to_cpu(lun)); return 0; }; diff --git a/drivers/message/i2o/pci.c b/drivers/message/i2o/pci.c index 329d482eee81..c5b656cdea7c 100644 --- a/drivers/message/i2o/pci.c +++ b/drivers/message/i2o/pci.c @@ -339,7 +339,7 @@ static int __devinit i2o_pci_probe(struct pci_dev *pdev, pci_name(pdev)); c->pdev = pdev; - c->device.parent = get_device(&pdev->dev); + c->device.parent = &pdev->dev; /* Cards that fall apart if you hit them with large I/O loads... */ if (pdev->vendor == PCI_VENDOR_ID_NCR && pdev->device == 0x0630) { @@ -410,8 +410,6 @@ static int __devinit i2o_pci_probe(struct pci_dev *pdev, if ((rc = i2o_iop_add(c))) goto uninstall; - get_device(&c->device); - if (i960) pci_write_config_word(i960, 0x42, 0x03ff); @@ -424,7 +422,6 @@ static int __devinit i2o_pci_probe(struct pci_dev *pdev, i2o_pci_free(c); free_controller: - put_device(c->device.parent); i2o_iop_free(c); disable: @@ -454,7 +451,6 @@ static void __devexit i2o_pci_remove(struct pci_dev *pdev) printk(KERN_INFO "%s: Controller removed.\n", c->name); - put_device(c->device.parent); put_device(&c->device); }; diff --git a/include/linux/i2o.h b/include/linux/i2o.h index 4c18b7711bd9..9ba806796667 100644 --- a/include/linux/i2o.h +++ b/include/linux/i2o.h @@ -384,7 +384,7 @@ /* defines for max_sectors and max_phys_segments */ #define I2O_MAX_SECTORS 1024 -#define I2O_MAX_SECTORS_LIMITED 256 +#define I2O_MAX_SECTORS_LIMITED 128 #define I2O_MAX_PHYS_SEGMENTS MAX_PHYS_SEGMENTS /* -- cgit v1.2.3-71-gd317 From 45714d65618407bce1fd0271bc58303ce14b0785 Mon Sep 17 00:00:00 2001 From: Miklos Szeredi Date: Fri, 6 Jan 2006 00:19:36 -0800 Subject: [PATCH] fuse: bump interface version Change interface version to 7.4. Following changes will need backward compatibility support, so store the minor version returned by userspace. Signed-off-by: Miklos Szeredi Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- fs/fuse/dev.c | 2 ++ fs/fuse/fuse_i.h | 3 +++ include/linux/fuse.h | 2 +- 3 files changed, 6 insertions(+), 1 deletion(-) (limited to 'include/linux') diff --git a/fs/fuse/dev.c b/fs/fuse/dev.c index 8f873e621f41..e5bc3f8eebd0 100644 --- a/fs/fuse/dev.c +++ b/fs/fuse/dev.c @@ -178,6 +178,8 @@ static void request_end(struct fuse_conn *fc, struct fuse_req *req) if (req->misc.init_in_out.major != FUSE_KERNEL_VERSION) fc->conn_error = 1; + fc->minor = req->misc.init_in_out.minor; + /* After INIT reply is received other requests can go out. So do (FUSE_MAX_OUTSTANDING - 1) number of up()s on outstanding_sem. The last up() is done in diff --git a/fs/fuse/fuse_i.h b/fs/fuse/fuse_i.h index 0ea5301f86be..2d4835e54c90 100644 --- a/fs/fuse/fuse_i.h +++ b/fs/fuse/fuse_i.h @@ -272,6 +272,9 @@ struct fuse_conn { /** Is create not implemented by fs? */ unsigned no_create : 1; + /** Negotiated minor version */ + unsigned minor; + /** Backing dev info */ struct backing_dev_info bdi; }; diff --git a/include/linux/fuse.h b/include/linux/fuse.h index b76b558b03d4..3c85f1a422cc 100644 --- a/include/linux/fuse.h +++ b/include/linux/fuse.h @@ -14,7 +14,7 @@ #define FUSE_KERNEL_VERSION 7 /** Minor version number of this interface */ -#define FUSE_KERNEL_MINOR_VERSION 3 +#define FUSE_KERNEL_MINOR_VERSION 4 /** The node ID of the root inode */ #define FUSE_ROOT_ID 1 -- cgit v1.2.3-71-gd317 From de5f12025572ef8fcffa4be5453061725acfb754 Mon Sep 17 00:00:00 2001 From: Miklos Szeredi Date: Fri, 6 Jan 2006 00:19:37 -0800 Subject: [PATCH] fuse: add frsize to statfs reply Add 'frsize' member to the statfs reply. I'm not sure if sending f_fsid will ever be needed, but just in case leave some space at the end of the structure, so less compatibility mess would be required. Signed-off-by: Miklos Szeredi Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- fs/fuse/inode.c | 5 ++++- include/linux/fuse.h | 5 +++++ 2 files changed, 9 insertions(+), 1 deletion(-) (limited to 'include/linux') diff --git a/fs/fuse/inode.c b/fs/fuse/inode.c index e69a546844d0..3b928a02af04 100644 --- a/fs/fuse/inode.c +++ b/fs/fuse/inode.c @@ -218,6 +218,7 @@ static void convert_fuse_statfs(struct kstatfs *stbuf, struct fuse_kstatfs *attr { stbuf->f_type = FUSE_SUPER_MAGIC; stbuf->f_bsize = attr->bsize; + stbuf->f_frsize = attr->frsize; stbuf->f_blocks = attr->blocks; stbuf->f_bfree = attr->bfree; stbuf->f_bavail = attr->bavail; @@ -238,10 +239,12 @@ static int fuse_statfs(struct super_block *sb, struct kstatfs *buf) if (!req) return -EINTR; + memset(&outarg, 0, sizeof(outarg)); req->in.numargs = 0; req->in.h.opcode = FUSE_STATFS; req->out.numargs = 1; - req->out.args[0].size = sizeof(outarg); + req->out.args[0].size = + fc->minor < 4 ? FUSE_COMPAT_STATFS_SIZE : sizeof(outarg); req->out.args[0].value = &outarg; request_send(fc, req); err = req->out.h.error; diff --git a/include/linux/fuse.h b/include/linux/fuse.h index 3c85f1a422cc..9d5177c356cc 100644 --- a/include/linux/fuse.h +++ b/include/linux/fuse.h @@ -53,6 +53,9 @@ struct fuse_kstatfs { __u64 ffree; __u32 bsize; __u32 namelen; + __u32 frsize; + __u32 padding; + __u32 spare[6]; }; #define FATTR_MODE (1 << 0) @@ -213,6 +216,8 @@ struct fuse_write_out { __u32 padding; }; +#define FUSE_COMPAT_STATFS_SIZE 48 + struct fuse_statfs_out { struct fuse_kstatfs st; }; -- cgit v1.2.3-71-gd317 From 1d3d752b471d2a3a1d5e4fe177e5e7d52abb4e4c Mon Sep 17 00:00:00 2001 From: Miklos Szeredi Date: Fri, 6 Jan 2006 00:19:40 -0800 Subject: [PATCH] fuse: clean up request size limit checking Change the way a too large request is handled. Until now in this case the device read returned -EINVAL and the operation returned -EIO. Make it more flexibible by not returning -EINVAL from the read, but restarting it instead. Also remove the fixed limit on setxattr data and let the filesystem provide as large a read buffer as it needs to handle the extended attribute data. The symbolic link length is already checked by VFS to be less than PATH_MAX, so the extra check against FUSE_SYMLINK_MAX is not needed. The check in fuse_create_open() against FUSE_NAME_MAX is not needed, since the dentry has already been looked up, and hence the name already checked. Signed-off-by: Miklos Szeredi Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- fs/fuse/dev.c | 26 ++++++++++++++++---------- fs/fuse/dir.c | 14 +------------- fs/fuse/fuse_i.h | 9 ++++++--- fs/fuse/inode.c | 2 +- include/linux/fuse.h | 8 ++------ 5 files changed, 26 insertions(+), 33 deletions(-) (limited to 'include/linux') diff --git a/fs/fuse/dev.c b/fs/fuse/dev.c index e5bc3f8eebd0..1afdffdf80db 100644 --- a/fs/fuse/dev.c +++ b/fs/fuse/dev.c @@ -617,6 +617,7 @@ static ssize_t fuse_dev_readv(struct file *file, const struct iovec *iov, struct fuse_copy_state cs; unsigned reqsize; + restart: spin_lock(&fuse_lock); fc = file->private_data; err = -EPERM; @@ -632,20 +633,25 @@ static ssize_t fuse_dev_readv(struct file *file, const struct iovec *iov, req = list_entry(fc->pending.next, struct fuse_req, list); list_del_init(&req->list); - spin_unlock(&fuse_lock); in = &req->in; - reqsize = req->in.h.len; - fuse_copy_init(&cs, 1, req, iov, nr_segs); - err = -EINVAL; - if (iov_length(iov, nr_segs) >= reqsize) { - err = fuse_copy_one(&cs, &in->h, sizeof(in->h)); - if (!err) - err = fuse_copy_args(&cs, in->numargs, in->argpages, - (struct fuse_arg *) in->args, 0); + reqsize = in->h.len; + /* If request is too large, reply with an error and restart the read */ + if (iov_length(iov, nr_segs) < reqsize) { + req->out.h.error = -EIO; + /* SETXATTR is special, since it may contain too large data */ + if (in->h.opcode == FUSE_SETXATTR) + req->out.h.error = -E2BIG; + request_end(fc, req); + goto restart; } + spin_unlock(&fuse_lock); + fuse_copy_init(&cs, 1, req, iov, nr_segs); + err = fuse_copy_one(&cs, &in->h, sizeof(in->h)); + if (!err) + err = fuse_copy_args(&cs, in->numargs, in->argpages, + (struct fuse_arg *) in->args, 0); fuse_copy_finish(&cs); - spin_lock(&fuse_lock); req->locked = 0; if (!err && req->interrupted) diff --git a/fs/fuse/dir.c b/fs/fuse/dir.c index 9a6075de961f..f156392d019e 100644 --- a/fs/fuse/dir.c +++ b/fs/fuse/dir.c @@ -236,10 +236,6 @@ static int fuse_create_open(struct inode *dir, struct dentry *entry, int mode, if (fc->no_create) goto out; - err = -ENAMETOOLONG; - if (entry->d_name.len > FUSE_NAME_MAX) - goto out; - err = -EINTR; req = fuse_get_request(fc); if (!req) @@ -413,12 +409,7 @@ static int fuse_symlink(struct inode *dir, struct dentry *entry, { struct fuse_conn *fc = get_fuse_conn(dir); unsigned len = strlen(link) + 1; - struct fuse_req *req; - - if (len > FUSE_SYMLINK_MAX) - return -ENAMETOOLONG; - - req = fuse_get_request(fc); + struct fuse_req *req = fuse_get_request(fc); if (!req) return -EINTR; @@ -988,9 +979,6 @@ static int fuse_setxattr(struct dentry *entry, const char *name, struct fuse_setxattr_in inarg; int err; - if (size > FUSE_XATTR_SIZE_MAX) - return -E2BIG; - if (fc->no_setxattr) return -EOPNOTSUPP; diff --git a/fs/fuse/fuse_i.h b/fs/fuse/fuse_i.h index 2d4835e54c90..17fd368559cd 100644 --- a/fs/fuse/fuse_i.h +++ b/fs/fuse/fuse_i.h @@ -21,6 +21,12 @@ /** If more requests are outstanding, then the operation will block */ #define FUSE_MAX_OUTSTANDING 10 +/** Maximum size of data in a write request */ +#define FUSE_MAX_WRITE 4096 + +/** It could be as large as PATH_MAX, but would that have any uses? */ +#define FUSE_NAME_MAX 1024 + /** If the FUSE_DEFAULT_PERMISSIONS flag is given, the filesystem module will check permissions based on the file mode. Otherwise no permission checking is done in the kernel */ @@ -108,9 +114,6 @@ struct fuse_out { struct fuse_arg args[3]; }; -struct fuse_req; -struct fuse_conn; - /** * A request to the client */ diff --git a/fs/fuse/inode.c b/fs/fuse/inode.c index 3b928a02af04..3580b9e12345 100644 --- a/fs/fuse/inode.c +++ b/fs/fuse/inode.c @@ -485,7 +485,7 @@ static int fuse_fill_super(struct super_block *sb, void *data, int silent) fc->max_read = d.max_read; if (fc->max_read / PAGE_CACHE_SIZE < fc->bdi.ra_pages) fc->bdi.ra_pages = fc->max_read / PAGE_CACHE_SIZE; - fc->max_write = FUSE_MAX_IN / 2; + fc->max_write = FUSE_MAX_WRITE; err = -ENOMEM; root = get_root_inode(sb, d.rootmode); diff --git a/include/linux/fuse.h b/include/linux/fuse.h index 9d5177c356cc..8f64cc2205b0 100644 --- a/include/linux/fuse.h +++ b/include/linux/fuse.h @@ -108,12 +108,8 @@ enum fuse_opcode { FUSE_CREATE = 35 }; -/* Conservative buffer size for the client */ -#define FUSE_MAX_IN 8192 - -#define FUSE_NAME_MAX 1024 -#define FUSE_SYMLINK_MAX 4096 -#define FUSE_XATTR_SIZE_MAX 4096 +/* The read buffer is required to be at least 8k, but may be much larger */ +#define FUSE_MIN_READ_BUFFER 8192 struct fuse_entry_out { __u64 nodeid; /* Inode ID */ -- cgit v1.2.3-71-gd317 From 3ec870d524c9150add120475c8ddcfa50574f98e Mon Sep 17 00:00:00 2001 From: Miklos Szeredi Date: Fri, 6 Jan 2006 00:19:41 -0800 Subject: [PATCH] fuse: make maximum write data configurable Make the maximum size of write data configurable by the filesystem. The previous fixed 4096 limit only worked on architectures where the page size is less or equal to this. This change make writing work on other architectures too, and also lets the filesystem receive bigger write requests in direct_io mode. Normal writes which go through the page cache are still limited to a page sized chunk per request. Signed-off-by: Miklos Szeredi Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- fs/fuse/dev.c | 48 ++++++++++++++++++++++++++++++------------------ fs/fuse/fuse_i.h | 6 ++---- fs/fuse/inode.c | 1 - include/linux/fuse.h | 11 +++++++++-- 4 files changed, 41 insertions(+), 25 deletions(-) (limited to 'include/linux') diff --git a/fs/fuse/dev.c b/fs/fuse/dev.c index 1afdffdf80db..e08ab4702d97 100644 --- a/fs/fuse/dev.c +++ b/fs/fuse/dev.c @@ -148,6 +148,26 @@ void fuse_release_background(struct fuse_req *req) spin_unlock(&fuse_lock); } +static void process_init_reply(struct fuse_conn *fc, struct fuse_req *req) +{ + int i; + struct fuse_init_out *arg = &req->misc.init_out; + + if (arg->major != FUSE_KERNEL_VERSION) + fc->conn_error = 1; + else { + fc->minor = arg->minor; + fc->max_write = arg->minor < 5 ? 4096 : arg->max_write; + } + + /* After INIT reply is received other requests can go + out. So do (FUSE_MAX_OUTSTANDING - 1) number of + up()s on outstanding_sem. The last up() is done in + fuse_putback_request() */ + for (i = 1; i < FUSE_MAX_OUTSTANDING; i++) + up(&fc->outstanding_sem); +} + /* * This function is called when a request is finished. Either a reply * has arrived or it was interrupted (and not yet sent) or some error @@ -172,21 +192,9 @@ static void request_end(struct fuse_conn *fc, struct fuse_req *req) up_read(&fc->sbput_sem); } wake_up(&req->waitq); - if (req->in.h.opcode == FUSE_INIT) { - int i; - - if (req->misc.init_in_out.major != FUSE_KERNEL_VERSION) - fc->conn_error = 1; - - fc->minor = req->misc.init_in_out.minor; - - /* After INIT reply is received other requests can go - out. So do (FUSE_MAX_OUTSTANDING - 1) number of - up()s on outstanding_sem. The last up() is done in - fuse_putback_request() */ - for (i = 1; i < FUSE_MAX_OUTSTANDING; i++) - up(&fc->outstanding_sem); - } else if (req->in.h.opcode == FUSE_RELEASE && req->inode == NULL) { + if (req->in.h.opcode == FUSE_INIT) + process_init_reply(fc, req); + else if (req->in.h.opcode == FUSE_RELEASE && req->inode == NULL) { /* Special case for failed iget in CREATE */ u64 nodeid = req->in.h.nodeid; __fuse_get_request(req); @@ -359,7 +367,7 @@ void fuse_send_init(struct fuse_conn *fc) /* This is called from fuse_read_super() so there's guaranteed to be a request available */ struct fuse_req *req = do_get_request(fc); - struct fuse_init_in_out *arg = &req->misc.init_in_out; + struct fuse_init_in *arg = &req->misc.init_in; arg->major = FUSE_KERNEL_VERSION; arg->minor = FUSE_KERNEL_MINOR_VERSION; req->in.h.opcode = FUSE_INIT; @@ -367,8 +375,12 @@ void fuse_send_init(struct fuse_conn *fc) req->in.args[0].size = sizeof(*arg); req->in.args[0].value = arg; req->out.numargs = 1; - req->out.args[0].size = sizeof(*arg); - req->out.args[0].value = arg; + /* Variable length arguement used for backward compatibility + with interface version < 7.5. Rest of init_out is zeroed + by do_get_request(), so a short reply is not a problem */ + req->out.argvar = 1; + req->out.args[0].size = sizeof(struct fuse_init_out); + req->out.args[0].value = &req->misc.init_out; request_send_background(fc, req); } diff --git a/fs/fuse/fuse_i.h b/fs/fuse/fuse_i.h index 17fd368559cd..74c8d098a14a 100644 --- a/fs/fuse/fuse_i.h +++ b/fs/fuse/fuse_i.h @@ -21,9 +21,6 @@ /** If more requests are outstanding, then the operation will block */ #define FUSE_MAX_OUTSTANDING 10 -/** Maximum size of data in a write request */ -#define FUSE_MAX_WRITE 4096 - /** It could be as large as PATH_MAX, but would that have any uses? */ #define FUSE_NAME_MAX 1024 @@ -162,7 +159,8 @@ struct fuse_req { union { struct fuse_forget_in forget_in; struct fuse_release_in release_in; - struct fuse_init_in_out init_in_out; + struct fuse_init_in init_in; + struct fuse_init_out init_out; } misc; /** page vector */ diff --git a/fs/fuse/inode.c b/fs/fuse/inode.c index 3580b9e12345..e4541869831e 100644 --- a/fs/fuse/inode.c +++ b/fs/fuse/inode.c @@ -485,7 +485,6 @@ static int fuse_fill_super(struct super_block *sb, void *data, int silent) fc->max_read = d.max_read; if (fc->max_read / PAGE_CACHE_SIZE < fc->bdi.ra_pages) fc->bdi.ra_pages = fc->max_read / PAGE_CACHE_SIZE; - fc->max_write = FUSE_MAX_WRITE; err = -ENOMEM; root = get_root_inode(sb, d.rootmode); diff --git a/include/linux/fuse.h b/include/linux/fuse.h index 8f64cc2205b0..528959c52f1b 100644 --- a/include/linux/fuse.h +++ b/include/linux/fuse.h @@ -14,7 +14,7 @@ #define FUSE_KERNEL_VERSION 7 /** Minor version number of this interface */ -#define FUSE_KERNEL_MINOR_VERSION 4 +#define FUSE_KERNEL_MINOR_VERSION 5 /** The node ID of the root inode */ #define FUSE_ROOT_ID 1 @@ -244,11 +244,18 @@ struct fuse_access_in { __u32 padding; }; -struct fuse_init_in_out { +struct fuse_init_in { __u32 major; __u32 minor; }; +struct fuse_init_out { + __u32 major; + __u32 minor; + __u32 unused[3]; + __u32 max_write; +}; + struct fuse_in_header { __u32 len; __u32 opcode; -- cgit v1.2.3-71-gd317 From 742ec650e9b63ea61891455bb6f76bac37025c78 Mon Sep 17 00:00:00 2001 From: Marko Kohtala Date: Fri, 6 Jan 2006 00:19:44 -0800 Subject: [PATCH] parport: phase fixes Did not move the parport interface properly into IEEE1284_PH_REV_IDLE phase at end of data due to comparing bytes with nibbles. Internal phase IEEE1284_PH_HBUSY_DNA became unused, so remove it. Signed-off-by: Marko Kohtala Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- drivers/media/video/cpia_pp.c | 30 +++++++++----------- drivers/parport/ieee1284_ops.c | 62 ++++++++++++++++++++---------------------- include/linux/parport.h | 1 - 3 files changed, 42 insertions(+), 51 deletions(-) (limited to 'include/linux') diff --git a/drivers/media/video/cpia_pp.c b/drivers/media/video/cpia_pp.c index ddf184f95d80..6861d408f1b3 100644 --- a/drivers/media/video/cpia_pp.c +++ b/drivers/media/video/cpia_pp.c @@ -170,16 +170,9 @@ static size_t cpia_read_nibble (struct parport *port, /* Does the error line indicate end of data? */ if (((i /*& 1*/) == 0) && (parport_read_status(port) & PARPORT_STATUS_ERROR)) { - port->physport->ieee1284.phase = IEEE1284_PH_HBUSY_DNA; - DBG("%s: No more nibble data (%d bytes)\n", - port->name, i/2); - - /* Go to reverse idle phase. */ - parport_frob_control (port, - PARPORT_CONTROL_AUTOFD, - PARPORT_CONTROL_AUTOFD); - port->physport->ieee1284.phase = IEEE1284_PH_REV_IDLE; - break; + DBG("%s: No more nibble data (%d bytes)\n", + port->name, i/2); + goto end_of_data; } /* Event 7: Set nAutoFd low. */ @@ -227,18 +220,21 @@ static size_t cpia_read_nibble (struct parport *port, byte = nibble; } - i /= 2; /* i is now in bytes */ - if (i == len) { /* Read the last nibble without checking data avail. */ - port = port->physport; - if (parport_read_status (port) & PARPORT_STATUS_ERROR) - port->ieee1284.phase = IEEE1284_PH_HBUSY_DNA; + if (parport_read_status (port) & PARPORT_STATUS_ERROR) { + end_of_data: + /* Go to reverse idle phase. */ + parport_frob_control (port, + PARPORT_CONTROL_AUTOFD, + PARPORT_CONTROL_AUTOFD); + port->physport->ieee1284.phase = IEEE1284_PH_REV_IDLE; + } else - port->ieee1284.phase = IEEE1284_PH_HBUSY_DAVAIL; + port->physport->ieee1284.phase = IEEE1284_PH_HBUSY_DAVAIL; } - return i; + return i/2; } /* CPiA nonstandard "Nibble Stream" mode (2 nibbles per cycle, instead of 1) diff --git a/drivers/parport/ieee1284_ops.c b/drivers/parport/ieee1284_ops.c index ce1e2aad8b10..d6c77658231e 100644 --- a/drivers/parport/ieee1284_ops.c +++ b/drivers/parport/ieee1284_ops.c @@ -165,17 +165,7 @@ size_t parport_ieee1284_read_nibble (struct parport *port, /* Does the error line indicate end of data? */ if (((i & 1) == 0) && (parport_read_status(port) & PARPORT_STATUS_ERROR)) { - port->physport->ieee1284.phase = IEEE1284_PH_HBUSY_DNA; - DPRINTK (KERN_DEBUG - "%s: No more nibble data (%d bytes)\n", - port->name, i/2); - - /* Go to reverse idle phase. */ - parport_frob_control (port, - PARPORT_CONTROL_AUTOFD, - PARPORT_CONTROL_AUTOFD); - port->physport->ieee1284.phase = IEEE1284_PH_REV_IDLE; - break; + goto end_of_data; } /* Event 7: Set nAutoFd low. */ @@ -225,18 +215,25 @@ size_t parport_ieee1284_read_nibble (struct parport *port, byte = nibble; } - i /= 2; /* i is now in bytes */ - if (i == len) { /* Read the last nibble without checking data avail. */ - port = port->physport; - if (parport_read_status (port) & PARPORT_STATUS_ERROR) - port->ieee1284.phase = IEEE1284_PH_HBUSY_DNA; + if (parport_read_status (port) & PARPORT_STATUS_ERROR) { + end_of_data: + DPRINTK (KERN_DEBUG + "%s: No more nibble data (%d bytes)\n", + port->name, i/2); + + /* Go to reverse idle phase. */ + parport_frob_control (port, + PARPORT_CONTROL_AUTOFD, + PARPORT_CONTROL_AUTOFD); + port->physport->ieee1284.phase = IEEE1284_PH_REV_IDLE; + } else - port->ieee1284.phase = IEEE1284_PH_HBUSY_DAVAIL; + port->physport->ieee1284.phase = IEEE1284_PH_HBUSY_DAVAIL; } - return i; + return i/2; #endif /* IEEE1284 support */ } @@ -256,17 +253,7 @@ size_t parport_ieee1284_read_byte (struct parport *port, /* Data available? */ if (parport_read_status (port) & PARPORT_STATUS_ERROR) { - port->physport->ieee1284.phase = IEEE1284_PH_HBUSY_DNA; - DPRINTK (KERN_DEBUG - "%s: No more byte data (%Zd bytes)\n", - port->name, count); - - /* Go to reverse idle phase. */ - parport_frob_control (port, - PARPORT_CONTROL_AUTOFD, - PARPORT_CONTROL_AUTOFD); - port->physport->ieee1284.phase = IEEE1284_PH_REV_IDLE; - break; + goto end_of_data; } /* Event 14: Place data bus in high impedance state. */ @@ -318,11 +305,20 @@ size_t parport_ieee1284_read_byte (struct parport *port, if (count == len) { /* Read the last byte without checking data avail. */ - port = port->physport; - if (parport_read_status (port) & PARPORT_STATUS_ERROR) - port->ieee1284.phase = IEEE1284_PH_HBUSY_DNA; + if (parport_read_status (port) & PARPORT_STATUS_ERROR) { + end_of_data: + DPRINTK (KERN_DEBUG + "%s: No more byte data (%Zd bytes)\n", + port->name, count); + + /* Go to reverse idle phase. */ + parport_frob_control (port, + PARPORT_CONTROL_AUTOFD, + PARPORT_CONTROL_AUTOFD); + port->physport->ieee1284.phase = IEEE1284_PH_REV_IDLE; + } else - port->ieee1284.phase = IEEE1284_PH_HBUSY_DAVAIL; + port->physport->ieee1284.phase = IEEE1284_PH_HBUSY_DAVAIL; } return count; diff --git a/include/linux/parport.h b/include/linux/parport.h index d2a4d9e1e6d1..f7ff0b0c4031 100644 --- a/include/linux/parport.h +++ b/include/linux/parport.h @@ -242,7 +242,6 @@ enum ieee1284_phase { IEEE1284_PH_FWD_IDLE, IEEE1284_PH_TERMINATE, IEEE1284_PH_NEGOTIATION, - IEEE1284_PH_HBUSY_DNA, IEEE1284_PH_REV_IDLE, IEEE1284_PH_HBUSY_DAVAIL, IEEE1284_PH_REV_DATA, -- cgit v1.2.3-71-gd317 From 110bee75d2e03d3b4bcc74743dee5a21fe7b43bd Mon Sep 17 00:00:00 2001 From: Marko Kohtala Date: Fri, 6 Jan 2006 00:19:49 -0800 Subject: [PATCH] parport: DEBUG_PARPORT build fix Add missing "struct" keyword preventing compilation with DEBUG_PARPORT defined. Also add some "const". Signed-off-by: Marko Kohtala Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/parport_pc.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/parport_pc.h b/include/linux/parport_pc.h index c6f762470879..7e62b3429cdd 100644 --- a/include/linux/parport_pc.h +++ b/include/linux/parport_pc.h @@ -85,7 +85,7 @@ extern __inline__ void dump_parport_state (char *str, struct parport *p) unsigned char ecr = inb (ECONTROL (p)); unsigned char dcr = inb (CONTROL (p)); unsigned char dsr = inb (STATUS (p)); - static char *ecr_modes[] = {"SPP", "PS2", "PPFIFO", "ECP", "xXx", "yYy", "TST", "CFG"}; + static const char *const ecr_modes[] = {"SPP", "PS2", "PPFIFO", "ECP", "xXx", "yYy", "TST", "CFG"}; const struct parport_pc_private *priv = p->physport->private_data; int i; -- cgit v1.2.3-71-gd317 From 81684ee645e15601ec935461d9069a3086179c06 Mon Sep 17 00:00:00 2001 From: Adrian Bunk Date: Fri, 6 Jan 2006 00:19:53 -0800 Subject: [PATCH] include/linux/parport_pc.h: "extern inline" -> "static inline" "extern inline" doesn't make much sense. Signed-off-by: Adrian Bunk Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/parport_pc.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/parport_pc.h b/include/linux/parport_pc.h index 7e62b3429cdd..1cc0f6b1a49a 100644 --- a/include/linux/parport_pc.h +++ b/include/linux/parport_pc.h @@ -79,7 +79,7 @@ static __inline__ unsigned char parport_pc_read_data(struct parport *p) } #ifdef DEBUG_PARPORT -extern __inline__ void dump_parport_state (char *str, struct parport *p) +static inline void dump_parport_state (char *str, struct parport *p) { /* here's hoping that reading these ports won't side-effect anything underneath */ unsigned char ecr = inb (ECONTROL (p)); -- cgit v1.2.3-71-gd317 From f93ea411b73594f7d144855fd34278bcf34a9afc Mon Sep 17 00:00:00 2001 From: Jan Kara Date: Fri, 6 Jan 2006 00:19:55 -0800 Subject: [PATCH] jbd: split checkpoint lists Split the checkpoint list of the transaction into two lists. In the first list we keep the buffers that need to be submitted for IO. In the second list are kept buffers that were already submitted and we just have to wait for the IO to complete. This should simplify a handling of checkpoint lists a bit and can eventually be also a performance gain. Signed-off-by: Jan Kara Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- fs/jbd/checkpoint.c | 418 ++++++++++++++++++++++++++++++---------------------- include/linux/jbd.h | 8 +- 2 files changed, 248 insertions(+), 178 deletions(-) (limited to 'include/linux') diff --git a/fs/jbd/checkpoint.c b/fs/jbd/checkpoint.c index 014a51fd00d7..cb3cef525c3b 100644 --- a/fs/jbd/checkpoint.c +++ b/fs/jbd/checkpoint.c @@ -24,29 +24,75 @@ #include /* - * Unlink a buffer from a transaction. + * Unlink a buffer from a transaction checkpoint list. * * Called with j_list_lock held. */ -static inline void __buffer_unlink(struct journal_head *jh) +static void __buffer_unlink_first(struct journal_head *jh) { transaction_t *transaction; transaction = jh->b_cp_transaction; - jh->b_cp_transaction = NULL; jh->b_cpnext->b_cpprev = jh->b_cpprev; jh->b_cpprev->b_cpnext = jh->b_cpnext; - if (transaction->t_checkpoint_list == jh) + if (transaction->t_checkpoint_list == jh) { transaction->t_checkpoint_list = jh->b_cpnext; - if (transaction->t_checkpoint_list == jh) - transaction->t_checkpoint_list = NULL; + if (transaction->t_checkpoint_list == jh) + transaction->t_checkpoint_list = NULL; + } +} + +/* + * Unlink a buffer from a transaction checkpoint(io) list. + * + * Called with j_list_lock held. + */ + +static inline void __buffer_unlink(struct journal_head *jh) +{ + transaction_t *transaction; + + transaction = jh->b_cp_transaction; + + __buffer_unlink_first(jh); + if (transaction->t_checkpoint_io_list == jh) { + transaction->t_checkpoint_io_list = jh->b_cpnext; + if (transaction->t_checkpoint_io_list == jh) + transaction->t_checkpoint_io_list = NULL; + } +} + +/* + * Move a buffer from the checkpoint list to the checkpoint io list + * + * Called with j_list_lock held + */ + +static inline void __buffer_relink_io(struct journal_head *jh) +{ + transaction_t *transaction; + + transaction = jh->b_cp_transaction; + __buffer_unlink_first(jh); + + if (!transaction->t_checkpoint_io_list) { + jh->b_cpnext = jh->b_cpprev = jh; + } else { + jh->b_cpnext = transaction->t_checkpoint_io_list; + jh->b_cpprev = transaction->t_checkpoint_io_list->b_cpprev; + jh->b_cpprev->b_cpnext = jh; + jh->b_cpnext->b_cpprev = jh; + } + transaction->t_checkpoint_io_list = jh; } /* * Try to release a checkpointed buffer from its transaction. - * Returns 1 if we released it. + * Returns 1 if we released it and 2 if we also released the + * whole transaction. + * * Requires j_list_lock * Called under jbd_lock_bh_state(jh2bh(jh)), and drops it */ @@ -57,12 +103,11 @@ static int __try_to_free_cp_buf(struct journal_head *jh) if (jh->b_jlist == BJ_None && !buffer_locked(bh) && !buffer_dirty(bh)) { JBUFFER_TRACE(jh, "remove from checkpoint list"); - __journal_remove_checkpoint(jh); + ret = __journal_remove_checkpoint(jh) + 1; jbd_unlock_bh_state(bh); journal_remove_journal_head(bh); BUFFER_TRACE(bh, "release"); __brelse(bh); - ret = 1; } else { jbd_unlock_bh_state(bh); } @@ -117,83 +162,53 @@ static void jbd_sync_bh(journal_t *journal, struct buffer_head *bh) } /* - * Clean up a transaction's checkpoint list. - * - * We wait for any pending IO to complete and make sure any clean - * buffers are removed from the transaction. - * - * Return 1 if we performed any actions which might have destroyed the - * checkpoint. (journal_remove_checkpoint() deletes the transaction when - * the last checkpoint buffer is cleansed) + * Clean up transaction's list of buffers submitted for io. + * We wait for any pending IO to complete and remove any clean + * buffers. Note that we take the buffers in the opposite ordering + * from the one in which they were submitted for IO. * * Called with j_list_lock held. */ -static int __cleanup_transaction(journal_t *journal, transaction_t *transaction) + +static void __wait_cp_io(journal_t *journal, transaction_t *transaction) { - struct journal_head *jh, *next_jh, *last_jh; + struct journal_head *jh; struct buffer_head *bh; - int ret = 0; - - assert_spin_locked(&journal->j_list_lock); - jh = transaction->t_checkpoint_list; - if (!jh) - return 0; - - last_jh = jh->b_cpprev; - next_jh = jh; - do { - jh = next_jh; + tid_t this_tid; + int released = 0; + + this_tid = transaction->t_tid; +restart: + /* Didn't somebody clean up the transaction in the meanwhile */ + if (journal->j_checkpoint_transactions != transaction || + transaction->t_tid != this_tid) + return; + while (!released && transaction->t_checkpoint_io_list) { + jh = transaction->t_checkpoint_io_list; bh = jh2bh(jh); + if (!jbd_trylock_bh_state(bh)) { + jbd_sync_bh(journal, bh); + spin_lock(&journal->j_list_lock); + goto restart; + } if (buffer_locked(bh)) { atomic_inc(&bh->b_count); spin_unlock(&journal->j_list_lock); + jbd_unlock_bh_state(bh); wait_on_buffer(bh); /* the journal_head may have gone by now */ BUFFER_TRACE(bh, "brelse"); __brelse(bh); - goto out_return_1; - } - - /* - * This is foul - */ - if (!jbd_trylock_bh_state(bh)) { - jbd_sync_bh(journal, bh); - goto out_return_1; + spin_lock(&journal->j_list_lock); + goto restart; } - - if (jh->b_transaction != NULL) { - transaction_t *t = jh->b_transaction; - tid_t tid = t->t_tid; - - spin_unlock(&journal->j_list_lock); - jbd_unlock_bh_state(bh); - log_start_commit(journal, tid); - log_wait_commit(journal, tid); - goto out_return_1; - } - /* - * AKPM: I think the buffer_jbddirty test is redundant - it - * shouldn't have NULL b_transaction? + * Now in whatever state the buffer currently is, we know that + * it has been written out and so we can drop it from the list */ - next_jh = jh->b_cpnext; - if (!buffer_dirty(bh) && !buffer_jbddirty(bh)) { - BUFFER_TRACE(bh, "remove from checkpoint"); - __journal_remove_checkpoint(jh); - jbd_unlock_bh_state(bh); - journal_remove_journal_head(bh); - __brelse(bh); - ret = 1; - } else { - jbd_unlock_bh_state(bh); - } - } while (jh != last_jh); - - return ret; -out_return_1: - spin_lock(&journal->j_list_lock); - return 1; + released = __journal_remove_checkpoint(jh); + jbd_unlock_bh_state(bh); + } } #define NR_BATCH 64 @@ -203,9 +218,7 @@ __flush_batch(journal_t *journal, struct buffer_head **bhs, int *batch_count) { int i; - spin_unlock(&journal->j_list_lock); ll_rw_block(SWRITE, *batch_count, bhs); - spin_lock(&journal->j_list_lock); for (i = 0; i < *batch_count; i++) { struct buffer_head *bh = bhs[i]; clear_buffer_jwrite(bh); @@ -221,19 +234,46 @@ __flush_batch(journal_t *journal, struct buffer_head **bhs, int *batch_count) * Return 1 if something happened which requires us to abort the current * scan of the checkpoint list. * - * Called with j_list_lock held. + * Called with j_list_lock held and drops it if 1 is returned * Called under jbd_lock_bh_state(jh2bh(jh)), and drops it */ -static int __flush_buffer(journal_t *journal, struct journal_head *jh, - struct buffer_head **bhs, int *batch_count, - int *drop_count) +static int __process_buffer(journal_t *journal, struct journal_head *jh, + struct buffer_head **bhs, int *batch_count) { struct buffer_head *bh = jh2bh(jh); int ret = 0; - if (buffer_dirty(bh) && !buffer_locked(bh) && jh->b_jlist == BJ_None) { - J_ASSERT_JH(jh, jh->b_transaction == NULL); + if (buffer_locked(bh)) { + get_bh(bh); + spin_unlock(&journal->j_list_lock); + jbd_unlock_bh_state(bh); + wait_on_buffer(bh); + /* the journal_head may have gone by now */ + BUFFER_TRACE(bh, "brelse"); + put_bh(bh); + ret = 1; + } + else if (jh->b_transaction != NULL) { + transaction_t *t = jh->b_transaction; + tid_t tid = t->t_tid; + spin_unlock(&journal->j_list_lock); + jbd_unlock_bh_state(bh); + log_start_commit(journal, tid); + log_wait_commit(journal, tid); + ret = 1; + } + else if (!buffer_dirty(bh)) { + J_ASSERT_JH(jh, !buffer_jbddirty(bh)); + BUFFER_TRACE(bh, "remove from checkpoint"); + __journal_remove_checkpoint(jh); + spin_unlock(&journal->j_list_lock); + jbd_unlock_bh_state(bh); + journal_remove_journal_head(bh); + put_bh(bh); + ret = 1; + } + else { /* * Important: we are about to write the buffer, and * possibly block, while still holding the journal lock. @@ -246,45 +286,30 @@ static int __flush_buffer(journal_t *journal, struct journal_head *jh, J_ASSERT_BH(bh, !buffer_jwrite(bh)); set_buffer_jwrite(bh); bhs[*batch_count] = bh; + __buffer_relink_io(jh); jbd_unlock_bh_state(bh); (*batch_count)++; if (*batch_count == NR_BATCH) { + spin_unlock(&journal->j_list_lock); __flush_batch(journal, bhs, batch_count); ret = 1; } - } else { - int last_buffer = 0; - if (jh->b_cpnext == jh) { - /* We may be about to drop the transaction. Tell the - * caller that the lists have changed. - */ - last_buffer = 1; - } - if (__try_to_free_cp_buf(jh)) { - (*drop_count)++; - ret = last_buffer; - } } return ret; } /* - * Perform an actual checkpoint. We don't write out only enough to - * satisfy the current blocked requests: rather we submit a reasonably - * sized chunk of the outstanding data to disk at once for - * efficiency. __log_wait_for_space() will retry if we didn't free enough. + * Perform an actual checkpoint. We take the first transaction on the + * list of transactions to be checkpointed and send all its buffers + * to disk. We submit larger chunks of data at once. * - * However, we _do_ take into account the amount requested so that once - * the IO has been queued, we can return as soon as enough of it has - * completed to disk. - * * The journal should be locked before calling this function. */ int log_do_checkpoint(journal_t *journal) { + transaction_t *transaction; + tid_t this_tid; int result; - int batch_count = 0; - struct buffer_head *bhs[NR_BATCH]; jbd_debug(1, "Start checkpoint\n"); @@ -299,79 +324,70 @@ int log_do_checkpoint(journal_t *journal) return result; /* - * OK, we need to start writing disk blocks. Try to free up a - * quarter of the log in a single checkpoint if we can. + * OK, we need to start writing disk blocks. Take one transaction + * and write it. */ + spin_lock(&journal->j_list_lock); + if (!journal->j_checkpoint_transactions) + goto out; + transaction = journal->j_checkpoint_transactions; + this_tid = transaction->t_tid; +restart: /* - * AKPM: check this code. I had a feeling a while back that it - * degenerates into a busy loop at unmount time. + * If someone cleaned up this transaction while we slept, we're + * done (maybe it's a new transaction, but it fell at the same + * address). */ - spin_lock(&journal->j_list_lock); - while (journal->j_checkpoint_transactions) { - transaction_t *transaction; - struct journal_head *jh, *last_jh, *next_jh; - int drop_count = 0; - int cleanup_ret, retry = 0; - tid_t this_tid; - - transaction = journal->j_checkpoint_transactions; - this_tid = transaction->t_tid; - jh = transaction->t_checkpoint_list; - last_jh = jh->b_cpprev; - next_jh = jh; - do { + if (journal->j_checkpoint_transactions == transaction || + transaction->t_tid == this_tid) { + int batch_count = 0; + struct buffer_head *bhs[NR_BATCH]; + struct journal_head *jh; + int retry = 0; + + while (!retry && transaction->t_checkpoint_list) { struct buffer_head *bh; - jh = next_jh; - next_jh = jh->b_cpnext; + jh = transaction->t_checkpoint_list; bh = jh2bh(jh); if (!jbd_trylock_bh_state(bh)) { jbd_sync_bh(journal, bh); - spin_lock(&journal->j_list_lock); retry = 1; break; } - retry = __flush_buffer(journal, jh, bhs, &batch_count, &drop_count); - if (cond_resched_lock(&journal->j_list_lock)) { + retry = __process_buffer(journal, jh, bhs, + &batch_count); + if (!retry && + lock_need_resched(&journal->j_list_lock)) { + spin_unlock(&journal->j_list_lock); retry = 1; break; } - } while (jh != last_jh && !retry); + } if (batch_count) { + if (!retry) { + spin_unlock(&journal->j_list_lock); + retry = 1; + } __flush_batch(journal, bhs, &batch_count); - retry = 1; } + if (retry) { + spin_lock(&journal->j_list_lock); + goto restart; + } /* - * If someone cleaned up this transaction while we slept, we're - * done - */ - if (journal->j_checkpoint_transactions != transaction) - break; - if (retry) - continue; - /* - * Maybe it's a new transaction, but it fell at the same - * address - */ - if (transaction->t_tid != this_tid) - continue; - /* - * We have walked the whole transaction list without - * finding anything to write to disk. We had better be - * able to make some progress or we are in trouble. + * Now we have cleaned up the first transaction's checkpoint + * list. Let's clean up the second one. */ - cleanup_ret = __cleanup_transaction(journal, transaction); - J_ASSERT(drop_count != 0 || cleanup_ret != 0); - if (journal->j_checkpoint_transactions != transaction) - break; + __wait_cp_io(journal, transaction); } +out: spin_unlock(&journal->j_list_lock); result = cleanup_journal_tail(journal); if (result < 0) return result; - return 0; } @@ -455,6 +471,53 @@ int cleanup_journal_tail(journal_t *journal) /* Checkpoint list management */ +/* + * journal_clean_one_cp_list + * + * Find all the written-back checkpoint buffers in the given list and release them. + * + * Called with the journal locked. + * Called with j_list_lock held. + * Returns number of bufers reaped (for debug) + */ + +static int journal_clean_one_cp_list(struct journal_head *jh, int *released) +{ + struct journal_head *last_jh; + struct journal_head *next_jh = jh; + int ret, freed = 0; + + *released = 0; + if (!jh) + return 0; + + last_jh = jh->b_cpprev; + do { + jh = next_jh; + next_jh = jh->b_cpnext; + /* Use trylock because of the ranking */ + if (jbd_trylock_bh_state(jh2bh(jh))) { + ret = __try_to_free_cp_buf(jh); + if (ret) { + freed++; + if (ret == 2) { + *released = 1; + return freed; + } + } + } + /* + * This function only frees up some memory if possible so we + * dont have an obligation to finish processing. Bail out if + * preemption requested: + */ + if (need_resched()) + return freed; + } while (jh != last_jh); + + return freed; +} + /* * journal_clean_checkpoint_list * @@ -462,46 +525,38 @@ int cleanup_journal_tail(journal_t *journal) * * Called with the journal locked. * Called with j_list_lock held. - * Returns number of bufers reaped (for debug) + * Returns number of buffers reaped (for debug) */ int __journal_clean_checkpoint_list(journal_t *journal) { transaction_t *transaction, *last_transaction, *next_transaction; - int ret = 0; + int ret = 0, released; transaction = journal->j_checkpoint_transactions; - if (transaction == 0) + if (!transaction) goto out; last_transaction = transaction->t_cpprev; next_transaction = transaction; do { - struct journal_head *jh; - transaction = next_transaction; next_transaction = transaction->t_cpnext; - jh = transaction->t_checkpoint_list; - if (jh) { - struct journal_head *last_jh = jh->b_cpprev; - struct journal_head *next_jh = jh; - - do { - jh = next_jh; - next_jh = jh->b_cpnext; - /* Use trylock because of the ranknig */ - if (jbd_trylock_bh_state(jh2bh(jh))) - ret += __try_to_free_cp_buf(jh); - /* - * This function only frees up some memory - * if possible so we dont have an obligation - * to finish processing. Bail out if preemption - * requested: - */ - if (need_resched()) - goto out; - } while (jh != last_jh); - } + ret += journal_clean_one_cp_list(transaction-> + t_checkpoint_list, &released); + if (need_resched()) + goto out; + if (released) + continue; + /* + * It is essential that we are as careful as in the case of + * t_checkpoint_list with removing the buffer from the list as + * we can possibly see not yet submitted buffers on io_list + */ + ret += journal_clean_one_cp_list(transaction-> + t_checkpoint_io_list, &released); + if (need_resched()) + goto out; } while (transaction != last_transaction); out: return ret; @@ -516,18 +571,22 @@ out: * buffer updates committed in that transaction have safely been stored * elsewhere on disk. To achieve this, all of the buffers in a * transaction need to be maintained on the transaction's checkpoint - * list until they have been rewritten, at which point this function is + * lists until they have been rewritten, at which point this function is * called to remove the buffer from the existing transaction's - * checkpoint list. + * checkpoint lists. + * + * The function returns 1 if it frees the transaction, 0 otherwise. * * This function is called with the journal locked. * This function is called with j_list_lock held. + * This function is called with jbd_lock_bh_state(jh2bh(jh)) */ -void __journal_remove_checkpoint(struct journal_head *jh) +int __journal_remove_checkpoint(struct journal_head *jh) { transaction_t *transaction; journal_t *journal; + int ret = 0; JBUFFER_TRACE(jh, "entry"); @@ -538,8 +597,10 @@ void __journal_remove_checkpoint(struct journal_head *jh) journal = transaction->t_journal; __buffer_unlink(jh); + jh->b_cp_transaction = NULL; - if (transaction->t_checkpoint_list != NULL) + if (transaction->t_checkpoint_list != NULL || + transaction->t_checkpoint_io_list != NULL) goto out; JBUFFER_TRACE(jh, "transaction has no more buffers"); @@ -565,8 +626,10 @@ void __journal_remove_checkpoint(struct journal_head *jh) /* Just in case anybody was waiting for more transactions to be checkpointed... */ wake_up(&journal->j_wait_logspace); + ret = 1; out: JBUFFER_TRACE(jh, "exit"); + return ret; } /* @@ -628,6 +691,7 @@ void __journal_drop_transaction(journal_t *journal, transaction_t *transaction) J_ASSERT(transaction->t_shadow_list == NULL); J_ASSERT(transaction->t_log_list == NULL); J_ASSERT(transaction->t_checkpoint_list == NULL); + J_ASSERT(transaction->t_checkpoint_io_list == NULL); J_ASSERT(transaction->t_updates == 0); J_ASSERT(journal->j_committing_transaction != transaction); J_ASSERT(journal->j_running_transaction != transaction); diff --git a/include/linux/jbd.h b/include/linux/jbd.h index dcde7adfdce5..558cb4c26ec9 100644 --- a/include/linux/jbd.h +++ b/include/linux/jbd.h @@ -497,6 +497,12 @@ struct transaction_s */ struct journal_head *t_checkpoint_list; + /* + * Doubly-linked circular list of all buffers submitted for IO while + * checkpointing. [j_list_lock] + */ + struct journal_head *t_checkpoint_io_list; + /* * Doubly-linked circular list of temporary buffers currently undergoing * IO in the log [j_list_lock] @@ -843,7 +849,7 @@ extern void journal_commit_transaction(journal_t *); /* Checkpoint list management */ int __journal_clean_checkpoint_list(journal_t *journal); -void __journal_remove_checkpoint(struct journal_head *); +int __journal_remove_checkpoint(struct journal_head *); void __journal_insert_checkpoint(struct journal_head *, transaction_t *); /* Buffer IO */ -- cgit v1.2.3-71-gd317 From a334de28665b14f0a33df82699fa9a78cfeedf31 Mon Sep 17 00:00:00 2001 From: David Shaw Date: Fri, 6 Jan 2006 00:19:58 -0800 Subject: [PATCH] knfsd: check error status from vfs_getattr and i_op->fsync Both vfs_getattr and i_op->fsync return error statuses which nfsd was largely ignoring. This as noticed when exporting directories using fuse. This patch cleans up most of the offences, which involves moving the call to vfs_getattr out of the xdr encoding routines (where it is too late to report an error) into the main NFS procedure handling routines. There is still a called to vfs_gettattr (related to the ACL code) where the status is ignored, and called to nfsd_sync_dir don't check return status either. Signed-off-by: Neil Brown Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- fs/nfsd/nfs3proc.c | 11 +++++++++-- fs/nfsd/nfs3xdr.c | 47 ++++++++++++++++++++++++---------------------- fs/nfsd/nfsxdr.c | 48 +++++++++++++++++++++++------------------------ fs/nfsd/vfs.c | 20 +++++++++++++------- include/linux/nfsd/xdr.h | 3 +++ include/linux/nfsd/xdr3.h | 1 + 6 files changed, 75 insertions(+), 55 deletions(-) (limited to 'include/linux') diff --git a/fs/nfsd/nfs3proc.c b/fs/nfsd/nfs3proc.c index 041380fe667b..6d2dfed1de08 100644 --- a/fs/nfsd/nfs3proc.c +++ b/fs/nfsd/nfs3proc.c @@ -56,13 +56,20 @@ static int nfsd3_proc_getattr(struct svc_rqst *rqstp, struct nfsd_fhandle *argp, struct nfsd3_attrstat *resp) { - int nfserr; + int err, nfserr; dprintk("nfsd: GETATTR(3) %s\n", - SVCFH_fmt(&argp->fh)); + SVCFH_fmt(&argp->fh)); fh_copy(&resp->fh, &argp->fh); nfserr = fh_verify(rqstp, &resp->fh, 0, MAY_NOP); + if (nfserr) + RETURN_STATUS(nfserr); + + err = vfs_getattr(resp->fh.fh_export->ex_mnt, + resp->fh.fh_dentry, &resp->stat); + nfserr = nfserrno(err); + RETURN_STATUS(nfserr); } diff --git a/fs/nfsd/nfs3xdr.c b/fs/nfsd/nfs3xdr.c index 9147b8524d05..243d94b9653a 100644 --- a/fs/nfsd/nfs3xdr.c +++ b/fs/nfsd/nfs3xdr.c @@ -154,37 +154,34 @@ decode_sattr3(u32 *p, struct iattr *iap) } static inline u32 * -encode_fattr3(struct svc_rqst *rqstp, u32 *p, struct svc_fh *fhp) +encode_fattr3(struct svc_rqst *rqstp, u32 *p, struct svc_fh *fhp, + struct kstat *stat) { - struct vfsmount *mnt = fhp->fh_export->ex_mnt; struct dentry *dentry = fhp->fh_dentry; - struct kstat stat; struct timespec time; - vfs_getattr(mnt, dentry, &stat); - - *p++ = htonl(nfs3_ftypes[(stat.mode & S_IFMT) >> 12]); - *p++ = htonl((u32) stat.mode); - *p++ = htonl((u32) stat.nlink); - *p++ = htonl((u32) nfsd_ruid(rqstp, stat.uid)); - *p++ = htonl((u32) nfsd_rgid(rqstp, stat.gid)); - if (S_ISLNK(stat.mode) && stat.size > NFS3_MAXPATHLEN) { + *p++ = htonl(nfs3_ftypes[(stat->mode & S_IFMT) >> 12]); + *p++ = htonl((u32) stat->mode); + *p++ = htonl((u32) stat->nlink); + *p++ = htonl((u32) nfsd_ruid(rqstp, stat->uid)); + *p++ = htonl((u32) nfsd_rgid(rqstp, stat->gid)); + if (S_ISLNK(stat->mode) && stat->size > NFS3_MAXPATHLEN) { p = xdr_encode_hyper(p, (u64) NFS3_MAXPATHLEN); } else { - p = xdr_encode_hyper(p, (u64) stat.size); + p = xdr_encode_hyper(p, (u64) stat->size); } - p = xdr_encode_hyper(p, ((u64)stat.blocks) << 9); - *p++ = htonl((u32) MAJOR(stat.rdev)); - *p++ = htonl((u32) MINOR(stat.rdev)); + p = xdr_encode_hyper(p, ((u64)stat->blocks) << 9); + *p++ = htonl((u32) MAJOR(stat->rdev)); + *p++ = htonl((u32) MINOR(stat->rdev)); if (is_fsid(fhp, rqstp->rq_reffh)) p = xdr_encode_hyper(p, (u64) fhp->fh_export->ex_fsid); else - p = xdr_encode_hyper(p, (u64) huge_encode_dev(stat.dev)); - p = xdr_encode_hyper(p, (u64) stat.ino); - p = encode_time3(p, &stat.atime); + p = xdr_encode_hyper(p, (u64) huge_encode_dev(stat->dev)); + p = xdr_encode_hyper(p, (u64) stat->ino); + p = encode_time3(p, &stat->atime); lease_get_mtime(dentry->d_inode, &time); p = encode_time3(p, &time); - p = encode_time3(p, &stat.ctime); + p = encode_time3(p, &stat->ctime); return p; } @@ -232,8 +229,14 @@ encode_post_op_attr(struct svc_rqst *rqstp, u32 *p, struct svc_fh *fhp) { struct dentry *dentry = fhp->fh_dentry; if (dentry && dentry->d_inode != NULL) { - *p++ = xdr_one; /* attributes follow */ - return encode_fattr3(rqstp, p, fhp); + int err; + struct kstat stat; + + err = vfs_getattr(fhp->fh_export->ex_mnt, dentry, &stat); + if (!err) { + *p++ = xdr_one; /* attributes follow */ + return encode_fattr3(rqstp, p, fhp, &stat); + } } *p++ = xdr_zero; return p; @@ -616,7 +619,7 @@ nfs3svc_encode_attrstat(struct svc_rqst *rqstp, u32 *p, struct nfsd3_attrstat *resp) { if (resp->status == 0) - p = encode_fattr3(rqstp, p, &resp->fh); + p = encode_fattr3(rqstp, p, &resp->fh, &resp->stat); return xdr_ressize_check(rqstp, p); } diff --git a/fs/nfsd/nfsxdr.c b/fs/nfsd/nfsxdr.c index b45999ff33e6..aa7bb41b293d 100644 --- a/fs/nfsd/nfsxdr.c +++ b/fs/nfsd/nfsxdr.c @@ -152,46 +152,44 @@ decode_sattr(u32 *p, struct iattr *iap) } static inline u32 * -encode_fattr(struct svc_rqst *rqstp, u32 *p, struct svc_fh *fhp) +encode_fattr(struct svc_rqst *rqstp, u32 *p, struct svc_fh *fhp, + struct kstat *stat) { - struct vfsmount *mnt = fhp->fh_export->ex_mnt; struct dentry *dentry = fhp->fh_dentry; - struct kstat stat; int type; struct timespec time; - vfs_getattr(mnt, dentry, &stat); - type = (stat.mode & S_IFMT); + type = (stat->mode & S_IFMT); *p++ = htonl(nfs_ftypes[type >> 12]); - *p++ = htonl((u32) stat.mode); - *p++ = htonl((u32) stat.nlink); - *p++ = htonl((u32) nfsd_ruid(rqstp, stat.uid)); - *p++ = htonl((u32) nfsd_rgid(rqstp, stat.gid)); + *p++ = htonl((u32) stat->mode); + *p++ = htonl((u32) stat->nlink); + *p++ = htonl((u32) nfsd_ruid(rqstp, stat->uid)); + *p++ = htonl((u32) nfsd_rgid(rqstp, stat->gid)); - if (S_ISLNK(type) && stat.size > NFS_MAXPATHLEN) { + if (S_ISLNK(type) && stat->size > NFS_MAXPATHLEN) { *p++ = htonl(NFS_MAXPATHLEN); } else { - *p++ = htonl((u32) stat.size); + *p++ = htonl((u32) stat->size); } - *p++ = htonl((u32) stat.blksize); + *p++ = htonl((u32) stat->blksize); if (S_ISCHR(type) || S_ISBLK(type)) - *p++ = htonl(new_encode_dev(stat.rdev)); + *p++ = htonl(new_encode_dev(stat->rdev)); else *p++ = htonl(0xffffffff); - *p++ = htonl((u32) stat.blocks); + *p++ = htonl((u32) stat->blocks); if (is_fsid(fhp, rqstp->rq_reffh)) *p++ = htonl((u32) fhp->fh_export->ex_fsid); else - *p++ = htonl(new_encode_dev(stat.dev)); - *p++ = htonl((u32) stat.ino); - *p++ = htonl((u32) stat.atime.tv_sec); - *p++ = htonl(stat.atime.tv_nsec ? stat.atime.tv_nsec / 1000 : 0); + *p++ = htonl(new_encode_dev(stat->dev)); + *p++ = htonl((u32) stat->ino); + *p++ = htonl((u32) stat->atime.tv_sec); + *p++ = htonl(stat->atime.tv_nsec ? stat->atime.tv_nsec / 1000 : 0); lease_get_mtime(dentry->d_inode, &time); *p++ = htonl((u32) time.tv_sec); *p++ = htonl(time.tv_nsec ? time.tv_nsec / 1000 : 0); - *p++ = htonl((u32) stat.ctime.tv_sec); - *p++ = htonl(stat.ctime.tv_nsec ? stat.ctime.tv_nsec / 1000 : 0); + *p++ = htonl((u32) stat->ctime.tv_sec); + *p++ = htonl(stat->ctime.tv_nsec ? stat->ctime.tv_nsec / 1000 : 0); return p; } @@ -199,7 +197,9 @@ encode_fattr(struct svc_rqst *rqstp, u32 *p, struct svc_fh *fhp) /* Helper function for NFSv2 ACL code */ u32 *nfs2svc_encode_fattr(struct svc_rqst *rqstp, u32 *p, struct svc_fh *fhp) { - return encode_fattr(rqstp, p, fhp); + struct kstat stat; + vfs_getattr(fhp->fh_export->ex_mnt, fhp->fh_dentry, &stat); + return encode_fattr(rqstp, p, fhp, &stat); } /* @@ -394,7 +394,7 @@ int nfssvc_encode_attrstat(struct svc_rqst *rqstp, u32 *p, struct nfsd_attrstat *resp) { - p = encode_fattr(rqstp, p, &resp->fh); + p = encode_fattr(rqstp, p, &resp->fh, &resp->stat); return xdr_ressize_check(rqstp, p); } @@ -403,7 +403,7 @@ nfssvc_encode_diropres(struct svc_rqst *rqstp, u32 *p, struct nfsd_diropres *resp) { p = encode_fh(p, &resp->fh); - p = encode_fattr(rqstp, p, &resp->fh); + p = encode_fattr(rqstp, p, &resp->fh, &resp->stat); return xdr_ressize_check(rqstp, p); } @@ -428,7 +428,7 @@ int nfssvc_encode_readres(struct svc_rqst *rqstp, u32 *p, struct nfsd_readres *resp) { - p = encode_fattr(rqstp, p, &resp->fh); + p = encode_fattr(rqstp, p, &resp->fh, &resp->stat); *p++ = htonl(resp->count); xdr_ressize_check(rqstp, p); diff --git a/fs/nfsd/vfs.c b/fs/nfsd/vfs.c index af7c3c3074b0..f83ab4cf4265 100644 --- a/fs/nfsd/vfs.c +++ b/fs/nfsd/vfs.c @@ -717,27 +717,33 @@ nfsd_close(struct file *filp) * As this calls fsync (not fdatasync) there is no need for a write_inode * after it. */ -static inline void nfsd_dosync(struct file *filp, struct dentry *dp, - struct file_operations *fop) +static inline int nfsd_dosync(struct file *filp, struct dentry *dp, + struct file_operations *fop) { struct inode *inode = dp->d_inode; int (*fsync) (struct file *, struct dentry *, int); + int err = nfs_ok; filemap_fdatawrite(inode->i_mapping); if (fop && (fsync = fop->fsync)) - fsync(filp, dp, 0); + err=fsync(filp, dp, 0); filemap_fdatawait(inode->i_mapping); + + return nfserrno(err); } -static void +static int nfsd_sync(struct file *filp) { + int err; struct inode *inode = filp->f_dentry->d_inode; dprintk("nfsd: sync file %s\n", filp->f_dentry->d_name.name); down(&inode->i_sem); - nfsd_dosync(filp, filp->f_dentry, filp->f_op); + err=nfsd_dosync(filp, filp->f_dentry, filp->f_op); up(&inode->i_sem); + + return err; } void @@ -962,7 +968,7 @@ nfsd_vfs_write(struct svc_rqst *rqstp, struct svc_fh *fhp, struct file *file, if (inode->i_state & I_DIRTY) { dprintk("nfsd: write sync %d\n", current->pid); - nfsd_sync(file); + err=nfsd_sync(file); } #if 0 wake_up(&inode->i_wait); @@ -1066,7 +1072,7 @@ nfsd_commit(struct svc_rqst *rqstp, struct svc_fh *fhp, return err; if (EX_ISSYNC(fhp->fh_export)) { if (file->f_op && file->f_op->fsync) { - nfsd_sync(file); + err = nfsd_sync(file); } else { err = nfserr_notsupp; } diff --git a/include/linux/nfsd/xdr.h b/include/linux/nfsd/xdr.h index 130d4f588a37..3f4f7142bbe3 100644 --- a/include/linux/nfsd/xdr.h +++ b/include/linux/nfsd/xdr.h @@ -88,10 +88,12 @@ struct nfsd_readdirargs { struct nfsd_attrstat { struct svc_fh fh; + struct kstat stat; }; struct nfsd_diropres { struct svc_fh fh; + struct kstat stat; }; struct nfsd_readlinkres { @@ -101,6 +103,7 @@ struct nfsd_readlinkres { struct nfsd_readres { struct svc_fh fh; unsigned long count; + struct kstat stat; }; struct nfsd_readdirres { diff --git a/include/linux/nfsd/xdr3.h b/include/linux/nfsd/xdr3.h index 3c2a71b43bac..a4322741f8b9 100644 --- a/include/linux/nfsd/xdr3.h +++ b/include/linux/nfsd/xdr3.h @@ -126,6 +126,7 @@ struct nfsd3_setaclargs { struct nfsd3_attrstat { __u32 status; struct svc_fh fh; + struct kstat stat; }; /* LOOKUP, CREATE, MKDIR, SYMLINK, MKNOD */ -- cgit v1.2.3-71-gd317 From 6da487dcc0c6f4c827779687a20016efeffc4d60 Mon Sep 17 00:00:00 2001 From: Alasdair G Kergon Date: Fri, 6 Jan 2006 00:20:07 -0800 Subject: [PATCH] device-mapper ioctl: add skip lock_fs flag Add ioctl DM_SKIP_LOCKFS_FLAG for userspace to request that lock_fs is bypassed when suspending a device. There's no change to the behaviour of existing code that doesn't know about the new flag. Signed-off-by: Alasdair G Kergon Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- drivers/md/dm-ioctl.c | 11 +++++++++-- include/linux/dm-ioctl.h | 11 ++++++++--- 2 files changed, 17 insertions(+), 5 deletions(-) (limited to 'include/linux') diff --git a/drivers/md/dm-ioctl.c b/drivers/md/dm-ioctl.c index dbc07afd4462..561bda5011e0 100644 --- a/drivers/md/dm-ioctl.c +++ b/drivers/md/dm-ioctl.c @@ -693,14 +693,18 @@ static int dev_rename(struct dm_ioctl *param, size_t param_size) static int do_suspend(struct dm_ioctl *param) { int r = 0; + int do_lockfs = 1; struct mapped_device *md; md = find_device(param); if (!md) return -ENXIO; + if (param->flags & DM_SKIP_LOCKFS_FLAG) + do_lockfs = 0; + if (!dm_suspended(md)) - r = dm_suspend(md, 1); + r = dm_suspend(md, do_lockfs); if (!r) r = __dev_status(md, param); @@ -712,6 +716,7 @@ static int do_suspend(struct dm_ioctl *param) static int do_resume(struct dm_ioctl *param) { int r = 0; + int do_lockfs = 1; struct hash_cell *hc; struct mapped_device *md; struct dm_table *new_map; @@ -737,8 +742,10 @@ static int do_resume(struct dm_ioctl *param) /* Do we need to load a new map ? */ if (new_map) { /* Suspend if it isn't already suspended */ + if (param->flags & DM_SKIP_LOCKFS_FLAG) + do_lockfs = 0; if (!dm_suspended(md)) - dm_suspend(md, 1); + dm_suspend(md, do_lockfs); r = dm_swap_table(md, new_map); if (r) { diff --git a/include/linux/dm-ioctl.h b/include/linux/dm-ioctl.h index f5eb6b6cd109..fa75ba0d635e 100644 --- a/include/linux/dm-ioctl.h +++ b/include/linux/dm-ioctl.h @@ -272,9 +272,9 @@ typedef char ioctl_struct[308]; #define DM_TARGET_MSG _IOWR(DM_IOCTL, DM_TARGET_MSG_CMD, struct dm_ioctl) #define DM_VERSION_MAJOR 4 -#define DM_VERSION_MINOR 4 +#define DM_VERSION_MINOR 5 #define DM_VERSION_PATCHLEVEL 0 -#define DM_VERSION_EXTRA "-ioctl (2005-01-12)" +#define DM_VERSION_EXTRA "-ioctl (2005-10-04)" /* Status bits */ #define DM_READONLY_FLAG (1 << 0) /* In/Out */ @@ -301,8 +301,13 @@ typedef char ioctl_struct[308]; #define DM_BUFFER_FULL_FLAG (1 << 8) /* Out */ /* - * Set this to improve performance when you aren't going to use open_count + * Set this to improve performance when you aren't going to use open_count. */ #define DM_SKIP_BDGET_FLAG (1 << 9) /* In */ +/* + * Set this to avoid attempting to freeze any filesystem when suspending. + */ +#define DM_SKIP_LOCKFS_FLAG (1 << 10) /* In */ + #endif /* _LINUX_DM_IOCTL_H */ -- cgit v1.2.3-71-gd317 From 17999be4aa408e7ff3b9d32c735649676567a3cd Mon Sep 17 00:00:00 2001 From: NeilBrown Date: Fri, 6 Jan 2006 00:20:12 -0800 Subject: [PATCH] md: improve raid1 "IO Barrier" concept raid1 needs to put up a barrier to new requests while it does resync or other background recovery. The code for this is currently open-coded, slighty obscure by its use of two waitqueues, and not documented. This patch gathers all the related code into 4 functions, and includes a comment which (hopefully) explains what is happening. Signed-off-by: Neil Brown Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- drivers/md/raid1.c | 167 ++++++++++++++++++++++++--------------------- include/linux/raid/raid1.h | 4 +- 2 files changed, 91 insertions(+), 80 deletions(-) (limited to 'include/linux') diff --git a/drivers/md/raid1.c b/drivers/md/raid1.c index 229d7b204297..f5204149ab65 100644 --- a/drivers/md/raid1.c +++ b/drivers/md/raid1.c @@ -51,6 +51,8 @@ static mdk_personality_t raid1_personality; static void unplug_slaves(mddev_t *mddev); +static void allow_barrier(conf_t *conf); +static void lower_barrier(conf_t *conf); static void * r1bio_pool_alloc(gfp_t gfp_flags, void *data) { @@ -160,20 +162,13 @@ static void put_all_bios(conf_t *conf, r1bio_t *r1_bio) static inline void free_r1bio(r1bio_t *r1_bio) { - unsigned long flags; - conf_t *conf = mddev_to_conf(r1_bio->mddev); /* * Wake up any possible resync thread that waits for the device * to go idle. */ - spin_lock_irqsave(&conf->resync_lock, flags); - if (!--conf->nr_pending) { - wake_up(&conf->wait_idle); - wake_up(&conf->wait_resume); - } - spin_unlock_irqrestore(&conf->resync_lock, flags); + allow_barrier(conf); put_all_bios(conf, r1_bio); mempool_free(r1_bio, conf->r1bio_pool); @@ -182,22 +177,10 @@ static inline void free_r1bio(r1bio_t *r1_bio) static inline void put_buf(r1bio_t *r1_bio) { conf_t *conf = mddev_to_conf(r1_bio->mddev); - unsigned long flags; mempool_free(r1_bio, conf->r1buf_pool); - spin_lock_irqsave(&conf->resync_lock, flags); - if (!conf->barrier) - BUG(); - --conf->barrier; - wake_up(&conf->wait_resume); - wake_up(&conf->wait_idle); - - if (!--conf->nr_pending) { - wake_up(&conf->wait_idle); - wake_up(&conf->wait_resume); - } - spin_unlock_irqrestore(&conf->resync_lock, flags); + lower_barrier(conf); } static void reschedule_retry(r1bio_t *r1_bio) @@ -210,6 +193,7 @@ static void reschedule_retry(r1bio_t *r1_bio) list_add(&r1_bio->retry_list, &conf->retry_list); spin_unlock_irqrestore(&conf->device_lock, flags); + wake_up(&conf->wait_barrier); md_wakeup_thread(mddev->thread); } @@ -593,30 +577,83 @@ static int raid1_issue_flush(request_queue_t *q, struct gendisk *disk, return ret; } -/* - * Throttle resync depth, so that we can both get proper overlapping of - * requests, but are still able to handle normal requests quickly. +/* Barriers.... + * Sometimes we need to suspend IO while we do something else, + * either some resync/recovery, or reconfigure the array. + * To do this we raise a 'barrier'. + * The 'barrier' is a counter that can be raised multiple times + * to count how many activities are happening which preclude + * normal IO. + * We can only raise the barrier if there is no pending IO. + * i.e. if nr_pending == 0. + * We choose only to raise the barrier if no-one is waiting for the + * barrier to go down. This means that as soon as an IO request + * is ready, no other operations which require a barrier will start + * until the IO request has had a chance. + * + * So: regular IO calls 'wait_barrier'. When that returns there + * is no backgroup IO happening, It must arrange to call + * allow_barrier when it has finished its IO. + * backgroup IO calls must call raise_barrier. Once that returns + * there is no normal IO happeing. It must arrange to call + * lower_barrier when the particular background IO completes. */ #define RESYNC_DEPTH 32 -static void device_barrier(conf_t *conf, sector_t sect) +static void raise_barrier(conf_t *conf) { spin_lock_irq(&conf->resync_lock); - wait_event_lock_irq(conf->wait_idle, !waitqueue_active(&conf->wait_resume), - conf->resync_lock, raid1_unplug(conf->mddev->queue)); - - if (!conf->barrier++) { - wait_event_lock_irq(conf->wait_idle, !conf->nr_pending, - conf->resync_lock, raid1_unplug(conf->mddev->queue)); - if (conf->nr_pending) - BUG(); + + /* Wait until no block IO is waiting */ + wait_event_lock_irq(conf->wait_barrier, !conf->nr_waiting, + conf->resync_lock, + raid1_unplug(conf->mddev->queue)); + + /* block any new IO from starting */ + conf->barrier++; + + /* No wait for all pending IO to complete */ + wait_event_lock_irq(conf->wait_barrier, + !conf->nr_pending && conf->barrier < RESYNC_DEPTH, + conf->resync_lock, + raid1_unplug(conf->mddev->queue)); + + spin_unlock_irq(&conf->resync_lock); +} + +static void lower_barrier(conf_t *conf) +{ + unsigned long flags; + spin_lock_irqsave(&conf->resync_lock, flags); + conf->barrier--; + spin_unlock_irqrestore(&conf->resync_lock, flags); + wake_up(&conf->wait_barrier); +} + +static void wait_barrier(conf_t *conf) +{ + spin_lock_irq(&conf->resync_lock); + if (conf->barrier) { + conf->nr_waiting++; + wait_event_lock_irq(conf->wait_barrier, !conf->barrier, + conf->resync_lock, + raid1_unplug(conf->mddev->queue)); + conf->nr_waiting--; } - wait_event_lock_irq(conf->wait_resume, conf->barrier < RESYNC_DEPTH, - conf->resync_lock, raid1_unplug(conf->mddev->queue)); - conf->next_resync = sect; + conf->nr_pending++; spin_unlock_irq(&conf->resync_lock); } +static void allow_barrier(conf_t *conf) +{ + unsigned long flags; + spin_lock_irqsave(&conf->resync_lock, flags); + conf->nr_pending--; + spin_unlock_irqrestore(&conf->resync_lock, flags); + wake_up(&conf->wait_barrier); +} + + /* duplicate the data pages for behind I/O */ static struct page **alloc_behind_pages(struct bio *bio) { @@ -678,10 +715,7 @@ static int make_request(request_queue_t *q, struct bio * bio) */ md_write_start(mddev, bio); /* wait on superblock update early */ - spin_lock_irq(&conf->resync_lock); - wait_event_lock_irq(conf->wait_resume, !conf->barrier, conf->resync_lock, ); - conf->nr_pending++; - spin_unlock_irq(&conf->resync_lock); + wait_barrier(conf); disk_stat_inc(mddev->gendisk, ios[rw]); disk_stat_add(mddev->gendisk, sectors[rw], bio_sectors(bio)); @@ -909,13 +943,8 @@ static void print_conf(conf_t *conf) static void close_sync(conf_t *conf) { - spin_lock_irq(&conf->resync_lock); - wait_event_lock_irq(conf->wait_resume, !conf->barrier, - conf->resync_lock, raid1_unplug(conf->mddev->queue)); - spin_unlock_irq(&conf->resync_lock); - - if (conf->barrier) BUG(); - if (waitqueue_active(&conf->wait_idle)) BUG(); + wait_barrier(conf); + allow_barrier(conf); mempool_destroy(conf->r1buf_pool); conf->r1buf_pool = NULL; @@ -1317,12 +1346,16 @@ static sector_t sync_request(mddev_t *mddev, sector_t sector_nr, int *skipped, i return sync_blocks; } /* - * If there is non-resync activity waiting for us then - * put in a delay to throttle resync. + * If there is non-resync activity waiting for a turn, + * and resync is going fast enough, + * then let it though before starting on this new sync request. */ - if (!go_faster && waitqueue_active(&conf->wait_resume)) + if (!go_faster && conf->nr_waiting) msleep_interruptible(1000); - device_barrier(conf, sector_nr + RESYNC_SECTORS); + + raise_barrier(conf); + + conf->next_resync = sector_nr; /* * If reconstructing, and >1 working disc, @@ -1355,10 +1388,6 @@ static sector_t sync_request(mddev_t *mddev, sector_t sector_nr, int *skipped, i r1_bio = mempool_alloc(conf->r1buf_pool, GFP_NOIO); - spin_lock_irq(&conf->resync_lock); - conf->nr_pending++; - spin_unlock_irq(&conf->resync_lock); - r1_bio->mddev = mddev; r1_bio->sector = sector_nr; r1_bio->state = 0; @@ -1542,8 +1571,7 @@ static int run(mddev_t *mddev) mddev->recovery_cp = MaxSector; spin_lock_init(&conf->resync_lock); - init_waitqueue_head(&conf->wait_idle); - init_waitqueue_head(&conf->wait_resume); + init_waitqueue_head(&conf->wait_barrier); bio_list_init(&conf->pending_bio_list); bio_list_init(&conf->flushing_bio_list); @@ -1714,11 +1742,7 @@ static int raid1_reshape(mddev_t *mddev, int raid_disks) } memset(newmirrors, 0, sizeof(struct mirror_info)*raid_disks); - spin_lock_irq(&conf->resync_lock); - conf->barrier++; - wait_event_lock_irq(conf->wait_idle, !conf->nr_pending, - conf->resync_lock, raid1_unplug(mddev->queue)); - spin_unlock_irq(&conf->resync_lock); + raise_barrier(conf); /* ok, everything is stopped */ oldpool = conf->r1bio_pool; @@ -1738,12 +1762,7 @@ static int raid1_reshape(mddev_t *mddev, int raid_disks) conf->raid_disks = mddev->raid_disks = raid_disks; conf->last_used = 0; /* just make sure it is in-range */ - spin_lock_irq(&conf->resync_lock); - conf->barrier--; - spin_unlock_irq(&conf->resync_lock); - wake_up(&conf->wait_resume); - wake_up(&conf->wait_idle); - + lower_barrier(conf); set_bit(MD_RECOVERY_NEEDED, &mddev->recovery); md_wakeup_thread(mddev->thread); @@ -1758,18 +1777,10 @@ static void raid1_quiesce(mddev_t *mddev, int state) switch(state) { case 1: - spin_lock_irq(&conf->resync_lock); - conf->barrier++; - wait_event_lock_irq(conf->wait_idle, !conf->nr_pending, - conf->resync_lock, raid1_unplug(mddev->queue)); - spin_unlock_irq(&conf->resync_lock); + raise_barrier(conf); break; case 0: - spin_lock_irq(&conf->resync_lock); - conf->barrier--; - spin_unlock_irq(&conf->resync_lock); - wake_up(&conf->wait_resume); - wake_up(&conf->wait_idle); + lower_barrier(conf); break; } if (mddev->thread) { diff --git a/include/linux/raid/raid1.h b/include/linux/raid/raid1.h index 292b98f2b408..c55674252533 100644 --- a/include/linux/raid/raid1.h +++ b/include/linux/raid/raid1.h @@ -45,6 +45,7 @@ struct r1_private_data_s { spinlock_t resync_lock; int nr_pending; + int nr_waiting; int barrier; sector_t next_resync; int fullsync; /* set to 1 if a full sync is needed, @@ -52,8 +53,7 @@ struct r1_private_data_s { * Cleared when a sync completes. */ - wait_queue_head_t wait_idle; - wait_queue_head_t wait_resume; + wait_queue_head_t wait_barrier; struct pool_info *poolinfo; -- cgit v1.2.3-71-gd317 From 0a27ec96b6fb1abf867e36d7b0b681d67588767a Mon Sep 17 00:00:00 2001 From: NeilBrown Date: Fri, 6 Jan 2006 00:20:13 -0800 Subject: [PATCH] md: improve raid10 "IO Barrier" concept raid10 needs to put up a barrier to new requests while it does resync or other background recovery. The code for this is currently open-coded, slighty obscure by its use of two waitqueues, and not documented. This patch gathers all the related code into 4 functions, and includes a comment which (hopefully) explains what is happening. Signed-off-by: Neil Brown Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- drivers/md/raid10.c | 135 ++++++++++++++++++++++++++------------------ include/linux/raid/raid10.h | 4 +- 2 files changed, 81 insertions(+), 58 deletions(-) (limited to 'include/linux') diff --git a/drivers/md/raid10.c b/drivers/md/raid10.c index 713dc9c2c730..50bd7b152f28 100644 --- a/drivers/md/raid10.c +++ b/drivers/md/raid10.c @@ -47,6 +47,9 @@ static void unplug_slaves(mddev_t *mddev); +static void allow_barrier(conf_t *conf); +static void lower_barrier(conf_t *conf); + static void * r10bio_pool_alloc(gfp_t gfp_flags, void *data) { conf_t *conf = data; @@ -175,20 +178,13 @@ static void put_all_bios(conf_t *conf, r10bio_t *r10_bio) static inline void free_r10bio(r10bio_t *r10_bio) { - unsigned long flags; - conf_t *conf = mddev_to_conf(r10_bio->mddev); /* * Wake up any possible resync thread that waits for the device * to go idle. */ - spin_lock_irqsave(&conf->resync_lock, flags); - if (!--conf->nr_pending) { - wake_up(&conf->wait_idle); - wake_up(&conf->wait_resume); - } - spin_unlock_irqrestore(&conf->resync_lock, flags); + allow_barrier(conf); put_all_bios(conf, r10_bio); mempool_free(r10_bio, conf->r10bio_pool); @@ -197,22 +193,10 @@ static inline void free_r10bio(r10bio_t *r10_bio) static inline void put_buf(r10bio_t *r10_bio) { conf_t *conf = mddev_to_conf(r10_bio->mddev); - unsigned long flags; mempool_free(r10_bio, conf->r10buf_pool); - spin_lock_irqsave(&conf->resync_lock, flags); - if (!conf->barrier) - BUG(); - --conf->barrier; - wake_up(&conf->wait_resume); - wake_up(&conf->wait_idle); - - if (!--conf->nr_pending) { - wake_up(&conf->wait_idle); - wake_up(&conf->wait_resume); - } - spin_unlock_irqrestore(&conf->resync_lock, flags); + lower_barrier(conf); } static void reschedule_retry(r10bio_t *r10_bio) @@ -640,30 +624,82 @@ static int raid10_issue_flush(request_queue_t *q, struct gendisk *disk, return ret; } -/* - * Throttle resync depth, so that we can both get proper overlapping of - * requests, but are still able to handle normal requests quickly. +/* Barriers.... + * Sometimes we need to suspend IO while we do something else, + * either some resync/recovery, or reconfigure the array. + * To do this we raise a 'barrier'. + * The 'barrier' is a counter that can be raised multiple times + * to count how many activities are happening which preclude + * normal IO. + * We can only raise the barrier if there is no pending IO. + * i.e. if nr_pending == 0. + * We choose only to raise the barrier if no-one is waiting for the + * barrier to go down. This means that as soon as an IO request + * is ready, no other operations which require a barrier will start + * until the IO request has had a chance. + * + * So: regular IO calls 'wait_barrier'. When that returns there + * is no backgroup IO happening, It must arrange to call + * allow_barrier when it has finished its IO. + * backgroup IO calls must call raise_barrier. Once that returns + * there is no normal IO happeing. It must arrange to call + * lower_barrier when the particular background IO completes. */ #define RESYNC_DEPTH 32 -static void device_barrier(conf_t *conf, sector_t sect) +static void raise_barrier(conf_t *conf) { spin_lock_irq(&conf->resync_lock); - wait_event_lock_irq(conf->wait_idle, !waitqueue_active(&conf->wait_resume), - conf->resync_lock, unplug_slaves(conf->mddev)); - - if (!conf->barrier++) { - wait_event_lock_irq(conf->wait_idle, !conf->nr_pending, - conf->resync_lock, unplug_slaves(conf->mddev)); - if (conf->nr_pending) - BUG(); + + /* Wait until no block IO is waiting */ + wait_event_lock_irq(conf->wait_barrier, !conf->nr_waiting, + conf->resync_lock, + raid10_unplug(conf->mddev->queue)); + + /* block any new IO from starting */ + conf->barrier++; + + /* No wait for all pending IO to complete */ + wait_event_lock_irq(conf->wait_barrier, + !conf->nr_pending && conf->barrier < RESYNC_DEPTH, + conf->resync_lock, + raid10_unplug(conf->mddev->queue)); + + spin_unlock_irq(&conf->resync_lock); +} + +static void lower_barrier(conf_t *conf) +{ + unsigned long flags; + spin_lock_irqsave(&conf->resync_lock, flags); + conf->barrier--; + spin_unlock_irqrestore(&conf->resync_lock, flags); + wake_up(&conf->wait_barrier); +} + +static void wait_barrier(conf_t *conf) +{ + spin_lock_irq(&conf->resync_lock); + if (conf->barrier) { + conf->nr_waiting++; + wait_event_lock_irq(conf->wait_barrier, !conf->barrier, + conf->resync_lock, + raid10_unplug(conf->mddev->queue)); + conf->nr_waiting--; } - wait_event_lock_irq(conf->wait_resume, conf->barrier < RESYNC_DEPTH, - conf->resync_lock, unplug_slaves(conf->mddev)); - conf->next_resync = sect; + conf->nr_pending++; spin_unlock_irq(&conf->resync_lock); } +static void allow_barrier(conf_t *conf) +{ + unsigned long flags; + spin_lock_irqsave(&conf->resync_lock, flags); + conf->nr_pending--; + spin_unlock_irqrestore(&conf->resync_lock, flags); + wake_up(&conf->wait_barrier); +} + static int make_request(request_queue_t *q, struct bio * bio) { mddev_t *mddev = q->queuedata; @@ -719,10 +755,7 @@ static int make_request(request_queue_t *q, struct bio * bio) * thread has put up a bar for new requests. * Continue immediately if no resync is active currently. */ - spin_lock_irq(&conf->resync_lock); - wait_event_lock_irq(conf->wait_resume, !conf->barrier, conf->resync_lock, ); - conf->nr_pending++; - spin_unlock_irq(&conf->resync_lock); + wait_barrier(conf); disk_stat_inc(mddev->gendisk, ios[rw]); disk_stat_add(mddev->gendisk, sectors[rw], bio_sectors(bio)); @@ -897,13 +930,8 @@ static void print_conf(conf_t *conf) static void close_sync(conf_t *conf) { - spin_lock_irq(&conf->resync_lock); - wait_event_lock_irq(conf->wait_resume, !conf->barrier, - conf->resync_lock, unplug_slaves(conf->mddev)); - spin_unlock_irq(&conf->resync_lock); - - if (conf->barrier) BUG(); - if (waitqueue_active(&conf->wait_idle)) BUG(); + wait_barrier(conf); + allow_barrier(conf); mempool_destroy(conf->r10buf_pool); conf->r10buf_pool = NULL; @@ -1395,9 +1423,10 @@ static sector_t sync_request(mddev_t *mddev, sector_t sector_nr, int *skipped, i * If there is non-resync activity waiting for us then * put in a delay to throttle resync. */ - if (!go_faster && waitqueue_active(&conf->wait_resume)) + if (!go_faster && conf->nr_waiting) msleep_interruptible(1000); - device_barrier(conf, sector_nr + RESYNC_SECTORS); + raise_barrier(conf); + conf->next_resync = sector_nr; /* Again, very different code for resync and recovery. * Both must result in an r10bio with a list of bios that @@ -1427,7 +1456,6 @@ static sector_t sync_request(mddev_t *mddev, sector_t sector_nr, int *skipped, i r10_bio = mempool_alloc(conf->r10buf_pool, GFP_NOIO); spin_lock_irq(&conf->resync_lock); - conf->nr_pending++; if (rb2) conf->barrier++; spin_unlock_irq(&conf->resync_lock); atomic_set(&r10_bio->remaining, 0); @@ -1500,10 +1528,6 @@ static sector_t sync_request(mddev_t *mddev, sector_t sector_nr, int *skipped, i int count = 0; r10_bio = mempool_alloc(conf->r10buf_pool, GFP_NOIO); - spin_lock_irq(&conf->resync_lock); - conf->nr_pending++; - spin_unlock_irq(&conf->resync_lock); - r10_bio->mddev = mddev; atomic_set(&r10_bio->remaining, 0); @@ -1713,8 +1737,7 @@ static int run(mddev_t *mddev) INIT_LIST_HEAD(&conf->retry_list); spin_lock_init(&conf->resync_lock); - init_waitqueue_head(&conf->wait_idle); - init_waitqueue_head(&conf->wait_resume); + init_waitqueue_head(&conf->wait_barrier); /* need to check that every block has at least one working mirror */ if (!enough(conf)) { diff --git a/include/linux/raid/raid10.h b/include/linux/raid/raid10.h index 60708789c8f9..08317b77802b 100644 --- a/include/linux/raid/raid10.h +++ b/include/linux/raid/raid10.h @@ -39,11 +39,11 @@ struct r10_private_data_s { spinlock_t resync_lock; int nr_pending; + int nr_waiting; int barrier; sector_t next_resync; - wait_queue_head_t wait_idle; - wait_queue_head_t wait_resume; + wait_queue_head_t wait_barrier; mempool_t *r10bio_pool; mempool_t *r10buf_pool; -- cgit v1.2.3-71-gd317 From 6ff8d8ec06690f4011a6c3ad9e0759b9094f0601 Mon Sep 17 00:00:00 2001 From: NeilBrown Date: Fri, 6 Jan 2006 00:20:15 -0800 Subject: [PATCH] md: allow dirty raid[456] arrays to be started at boot See patch to md.txt for more details Signed-off-by: Neil Brown Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- Documentation/md.txt | 24 ++++++++++++++++++++++++ drivers/md/md.c | 4 ++++ drivers/md/raid5.c | 15 +++++++++++---- drivers/md/raid6main.c | 13 +++++++++---- include/linux/raid/md_k.h | 1 + 5 files changed, 49 insertions(+), 8 deletions(-) (limited to 'include/linux') diff --git a/Documentation/md.txt b/Documentation/md.txt index 23e6cce40f9c..1dd0fb6021cf 100644 --- a/Documentation/md.txt +++ b/Documentation/md.txt @@ -51,6 +51,30 @@ superblock can be autodetected and run at boot time. The kernel parameter "raid=partitionable" (or "raid=part") means that all auto-detected arrays are assembled as partitionable. +Boot time assembly of degraded/dirty arrays +------------------------------------------- + +If a raid5 or raid6 array is both dirty and degraded, it could have +undetectable data corruption. This is because the fact that it is +'dirty' means that the parity cannot be trusted, and the fact that it +is degraded means that some datablocks are missing and cannot reliably +be reconstructed (due to no parity). + +For this reason, md will normally refuse to start such an array. This +requires the sysadmin to take action to explicitly start the array +desipite possible corruption. This is normally done with + mdadm --assemble --force .... + +This option is not really available if the array has the root +filesystem on it. In order to support this booting from such an +array, md supports a module parameter "start_dirty_degraded" which, +when set to 1, bypassed the checks and will allows dirty degraded +arrays to be started. + +So, to boot with a root filesystem of a dirty degraded raid[56], use + + md-mod.start_dirty_degraded=1 + Superblock formats ------------------ diff --git a/drivers/md/md.c b/drivers/md/md.c index 8175a2a222da..b4fb7247b3ed 100644 --- a/drivers/md/md.c +++ b/drivers/md/md.c @@ -1937,6 +1937,7 @@ static void md_safemode_timeout(unsigned long data) md_wakeup_thread(mddev->thread); } +static int start_dirty_degraded; static int do_md_run(mddev_t * mddev) { @@ -2048,6 +2049,7 @@ static int do_md_run(mddev_t * mddev) mddev->recovery = 0; mddev->resync_max_sectors = mddev->size << 1; /* may be over-ridden by personality */ mddev->barriers_work = 1; + mddev->ok_start_degraded = start_dirty_degraded; if (start_readonly) mddev->ro = 2; /* read-only, but switch on first write */ @@ -4509,6 +4511,8 @@ static int set_ro(const char *val, struct kernel_param *kp) } module_param_call(start_ro, set_ro, get_ro, NULL, 0600); +module_param(start_dirty_degraded, int, 0644); + EXPORT_SYMBOL(register_md_personality); EXPORT_SYMBOL(unregister_md_personality); diff --git a/drivers/md/raid5.c b/drivers/md/raid5.c index 334ff7a07283..53a0f2ce76c8 100644 --- a/drivers/md/raid5.c +++ b/drivers/md/raid5.c @@ -1904,10 +1904,17 @@ static int run(mddev_t *mddev) if (mddev->degraded == 1 && mddev->recovery_cp != MaxSector) { - printk(KERN_ERR - "raid5: cannot start dirty degraded array for %s\n", - mdname(mddev)); - goto abort; + if (mddev->ok_start_degraded) + printk(KERN_WARNING + "raid5: starting dirty degraded array: %s" + "- data corruption possible.\n", + mdname(mddev)); + else { + printk(KERN_ERR + "raid5: cannot start dirty degraded array for %s\n", + mdname(mddev)); + goto abort; + } } { diff --git a/drivers/md/raid6main.c b/drivers/md/raid6main.c index 0000d162d198..9ac6dcd55127 100644 --- a/drivers/md/raid6main.c +++ b/drivers/md/raid6main.c @@ -1929,13 +1929,18 @@ static int run(mddev_t *mddev) goto abort; } -#if 0 /* FIX: For now */ if (mddev->degraded > 0 && mddev->recovery_cp != MaxSector) { - printk(KERN_ERR "raid6: cannot start dirty degraded array for %s\n", mdname(mddev)); - goto abort; + if (mddev->ok_start_degraded) + printk(KERN_WARNING "raid6: starting dirty degraded array:%s" + "- data corruption possible.\n", + mdname(mddev)); + else { + printk(KERN_ERR "raid6: cannot start dirty degraded array" + " for %s\n", mdname(mddev)); + goto abort; + } } -#endif { mddev->thread = md_register_thread(raid6d, mddev, "%s_raid6"); diff --git a/include/linux/raid/md_k.h b/include/linux/raid/md_k.h index 46629a275ba9..1dd587b5975a 100644 --- a/include/linux/raid/md_k.h +++ b/include/linux/raid/md_k.h @@ -183,6 +183,7 @@ struct mddev_s sector_t resync_mismatches; /* count of sectors where * parity/replica mismatch found */ + int ok_start_degraded; /* recovery/resync flags * NEEDED: we might need to start a resync/recover * RUNNING: a thread is running, or about to be started -- cgit v1.2.3-71-gd317 From 6cce3b23f6f8e974c00af7a9b88f1d413ba368a8 Mon Sep 17 00:00:00 2001 From: NeilBrown Date: Fri, 6 Jan 2006 00:20:16 -0800 Subject: [PATCH] md: write intent bitmap support for raid10 Signed-off-by: Neil Brown Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- drivers/md/md.c | 10 ++- drivers/md/raid10.c | 178 ++++++++++++++++++++++++++++++++++++++------ include/linux/raid/raid10.h | 9 ++- 3 files changed, 171 insertions(+), 26 deletions(-) (limited to 'include/linux') diff --git a/drivers/md/md.c b/drivers/md/md.c index ee199d462520..64e7da3701a5 100644 --- a/drivers/md/md.c +++ b/drivers/md/md.c @@ -714,9 +714,10 @@ static int super_90_validate(mddev_t *mddev, mdk_rdev_t *rdev) if (sb->state & (1<bitmap_file == NULL) { - if (mddev->level != 1 && mddev->level != 5 && mddev->level != 6) { + if (mddev->level != 1 && mddev->level != 5 && mddev->level != 6 + && mddev->level != 10) { /* FIXME use a better test */ - printk(KERN_WARNING "md: bitmaps only support for raid1\n"); + printk(KERN_WARNING "md: bitmaps not supported for this level.\n"); return -EINVAL; } mddev->bitmap_offset = mddev->default_bitmap_offset; @@ -1037,8 +1038,9 @@ static int super_1_validate(mddev_t *mddev, mdk_rdev_t *rdev) if ((le32_to_cpu(sb->feature_map) & MD_FEATURE_BITMAP_OFFSET) && mddev->bitmap_file == NULL ) { - if (mddev->level != 1) { - printk(KERN_WARNING "md: bitmaps only supported for raid1\n"); + if (mddev->level != 1 && mddev->level != 5 && mddev->level != 6 + && mddev->level != 10) { + printk(KERN_WARNING "md: bitmaps not supported for this level.\n"); return -EINVAL; } mddev->bitmap_offset = (__s32)le32_to_cpu(sb->bitmap_offset); diff --git a/drivers/md/raid10.c b/drivers/md/raid10.c index 50bd7b152f28..8f58a447d9f0 100644 --- a/drivers/md/raid10.c +++ b/drivers/md/raid10.c @@ -18,7 +18,9 @@ * Software Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. */ +#include "dm-bio-list.h" #include +#include /* * RAID10 provides a combination of RAID0 and RAID1 functionality. @@ -306,9 +308,11 @@ static int raid10_end_write_request(struct bio *bio, unsigned int bytes_done, in /* * this branch is our 'one mirror IO has finished' event handler: */ - if (!uptodate) + if (!uptodate) { md_error(r10_bio->mddev, conf->mirrors[dev].rdev); - else + /* an I/O failed, we can't clear the bitmap */ + set_bit(R10BIO_Degraded, &r10_bio->state); + } else /* * Set R10BIO_Uptodate in our master bio, so that * we will return a good error code for to the higher @@ -328,6 +332,11 @@ static int raid10_end_write_request(struct bio *bio, unsigned int bytes_done, in * already. */ if (atomic_dec_and_test(&r10_bio->remaining)) { + /* clear the bitmap if all writes complete successfully */ + bitmap_endwrite(r10_bio->mddev->bitmap, r10_bio->sector, + r10_bio->sectors, + !test_bit(R10BIO_Degraded, &r10_bio->state), + 0); md_write_end(r10_bio->mddev); raid_end_bio_io(r10_bio); } @@ -486,8 +495,9 @@ static int read_balance(conf_t *conf, r10bio_t *r10_bio) rcu_read_lock(); /* * Check if we can balance. We can balance on the whole - * device if no resync is going on, or below the resync window. - * We take the first readable disk when above the resync window. + * device if no resync is going on (recovery is ok), or below + * the resync window. We take the first readable disk when + * above the resync window. */ if (conf->mddev->recovery_cp < MaxSector && (this_sector + sectors >= conf->next_resync)) { @@ -591,7 +601,10 @@ static void unplug_slaves(mddev_t *mddev) static void raid10_unplug(request_queue_t *q) { + mddev_t *mddev = q->queuedata; + unplug_slaves(q->queuedata); + md_wakeup_thread(mddev->thread); } static int raid10_issue_flush(request_queue_t *q, struct gendisk *disk, @@ -647,12 +660,13 @@ static int raid10_issue_flush(request_queue_t *q, struct gendisk *disk, */ #define RESYNC_DEPTH 32 -static void raise_barrier(conf_t *conf) +static void raise_barrier(conf_t *conf, int force) { + BUG_ON(force && !conf->barrier); spin_lock_irq(&conf->resync_lock); - /* Wait until no block IO is waiting */ - wait_event_lock_irq(conf->wait_barrier, !conf->nr_waiting, + /* Wait until no block IO is waiting (unless 'force') */ + wait_event_lock_irq(conf->wait_barrier, force || !conf->nr_waiting, conf->resync_lock, raid10_unplug(conf->mddev->queue)); @@ -710,6 +724,8 @@ static int make_request(request_queue_t *q, struct bio * bio) int i; int chunk_sects = conf->chunk_mask + 1; const int rw = bio_data_dir(bio); + struct bio_list bl; + unsigned long flags; if (unlikely(bio_barrier(bio))) { bio_endio(bio, bio->bi_size, -EOPNOTSUPP); @@ -767,6 +783,7 @@ static int make_request(request_queue_t *q, struct bio * bio) r10_bio->mddev = mddev; r10_bio->sector = bio->bi_sector; + r10_bio->state = 0; if (rw == READ) { /* @@ -811,13 +828,16 @@ static int make_request(request_queue_t *q, struct bio * bio) !test_bit(Faulty, &rdev->flags)) { atomic_inc(&rdev->nr_pending); r10_bio->devs[i].bio = bio; - } else + } else { r10_bio->devs[i].bio = NULL; + set_bit(R10BIO_Degraded, &r10_bio->state); + } } rcu_read_unlock(); - atomic_set(&r10_bio->remaining, 1); + atomic_set(&r10_bio->remaining, 0); + bio_list_init(&bl); for (i = 0; i < conf->copies; i++) { struct bio *mbio; int d = r10_bio->devs[i].devnum; @@ -835,13 +855,14 @@ static int make_request(request_queue_t *q, struct bio * bio) mbio->bi_private = r10_bio; atomic_inc(&r10_bio->remaining); - generic_make_request(mbio); + bio_list_add(&bl, mbio); } - if (atomic_dec_and_test(&r10_bio->remaining)) { - md_write_end(mddev); - raid_end_bio_io(r10_bio); - } + bitmap_startwrite(mddev->bitmap, bio->bi_sector, r10_bio->sectors, 0); + spin_lock_irqsave(&conf->device_lock, flags); + bio_list_merge(&conf->pending_bio_list, &bl); + blk_plug_device(mddev->queue); + spin_unlock_irqrestore(&conf->device_lock, flags); return 0; } @@ -999,7 +1020,12 @@ static int raid10_add_disk(mddev_t *mddev, mdk_rdev_t *rdev) if (!enough(conf)) return 0; - for (mirror=0; mirror < mddev->raid_disks; mirror++) + if (rdev->saved_raid_disk >= 0 && + conf->mirrors[rdev->saved_raid_disk].rdev == NULL) + mirror = rdev->saved_raid_disk; + else + mirror = 0; + for ( ; mirror < mddev->raid_disks; mirror++) if ( !(p=conf->mirrors+mirror)->rdev) { blk_queue_stack_limits(mddev->queue, @@ -1015,6 +1041,8 @@ static int raid10_add_disk(mddev_t *mddev, mdk_rdev_t *rdev) p->head_position = 0; rdev->raid_disk = mirror; found = 1; + if (rdev->saved_raid_disk != mirror) + conf->fullsync = 1; rcu_assign_pointer(p->rdev, rdev); break; } @@ -1282,6 +1310,26 @@ static void raid10d(mddev_t *mddev) for (;;) { char b[BDEVNAME_SIZE]; spin_lock_irqsave(&conf->device_lock, flags); + + if (conf->pending_bio_list.head) { + bio = bio_list_get(&conf->pending_bio_list); + blk_remove_plug(mddev->queue); + spin_unlock_irqrestore(&conf->device_lock, flags); + /* flush any pending bitmap writes to disk before proceeding w/ I/O */ + if (bitmap_unplug(mddev->bitmap) != 0) + printk("%s: bitmap file write failed!\n", mdname(mddev)); + + while (bio) { /* submit pending writes */ + struct bio *next = bio->bi_next; + bio->bi_next = NULL; + generic_make_request(bio); + bio = next; + } + unplug = 1; + + continue; + } + if (list_empty(head)) break; r10_bio = list_entry(head->prev, r10bio_t, retry_list); @@ -1388,6 +1436,8 @@ static sector_t sync_request(mddev_t *mddev, sector_t sector_nr, int *skipped, i sector_t max_sector, nr_sectors; int disk; int i; + int max_sync; + int sync_blocks; sector_t sectors_skipped = 0; int chunks_skipped = 0; @@ -1401,6 +1451,29 @@ static sector_t sync_request(mddev_t *mddev, sector_t sector_nr, int *skipped, i if (test_bit(MD_RECOVERY_SYNC, &mddev->recovery)) max_sector = mddev->resync_max_sectors; if (sector_nr >= max_sector) { + /* If we aborted, we need to abort the + * sync on the 'current' bitmap chucks (there can + * be several when recovering multiple devices). + * as we may have started syncing it but not finished. + * We can find the current address in + * mddev->curr_resync, but for recovery, + * we need to convert that to several + * virtual addresses. + */ + if (mddev->curr_resync < max_sector) { /* aborted */ + if (test_bit(MD_RECOVERY_SYNC, &mddev->recovery)) + bitmap_end_sync(mddev->bitmap, mddev->curr_resync, + &sync_blocks, 1); + else for (i=0; iraid_disks; i++) { + sector_t sect = + raid10_find_virt(conf, mddev->curr_resync, i); + bitmap_end_sync(mddev->bitmap, sect, + &sync_blocks, 1); + } + } else /* completed sync */ + conf->fullsync = 0; + + bitmap_close_sync(mddev->bitmap); close_sync(conf); *skipped = 1; return sectors_skipped; @@ -1425,8 +1498,6 @@ static sector_t sync_request(mddev_t *mddev, sector_t sector_nr, int *skipped, i */ if (!go_faster && conf->nr_waiting) msleep_interruptible(1000); - raise_barrier(conf); - conf->next_resync = sector_nr; /* Again, very different code for resync and recovery. * Both must result in an r10bio with a list of bios that @@ -1443,6 +1514,7 @@ static sector_t sync_request(mddev_t *mddev, sector_t sector_nr, int *skipped, i * end_sync_write if we will want to write. */ + max_sync = RESYNC_PAGES << (PAGE_SHIFT-9); if (!test_bit(MD_RECOVERY_SYNC, &mddev->recovery)) { /* recovery... the complicated one */ int i, j, k; @@ -1451,13 +1523,29 @@ static sector_t sync_request(mddev_t *mddev, sector_t sector_nr, int *skipped, i for (i=0 ; iraid_disks; i++) if (conf->mirrors[i].rdev && !test_bit(In_sync, &conf->mirrors[i].rdev->flags)) { + int still_degraded = 0; /* want to reconstruct this device */ r10bio_t *rb2 = r10_bio; + sector_t sect = raid10_find_virt(conf, sector_nr, i); + int must_sync; + /* Unless we are doing a full sync, we only need + * to recover the block if it is set in the bitmap + */ + must_sync = bitmap_start_sync(mddev->bitmap, sect, + &sync_blocks, 1); + if (sync_blocks < max_sync) + max_sync = sync_blocks; + if (!must_sync && + !conf->fullsync) { + /* yep, skip the sync_blocks here, but don't assume + * that there will never be anything to do here + */ + chunks_skipped = -1; + continue; + } r10_bio = mempool_alloc(conf->r10buf_pool, GFP_NOIO); - spin_lock_irq(&conf->resync_lock); - if (rb2) conf->barrier++; - spin_unlock_irq(&conf->resync_lock); + raise_barrier(conf, rb2 != NULL); atomic_set(&r10_bio->remaining, 0); r10_bio->master_bio = (struct bio*)rb2; @@ -1465,8 +1553,21 @@ static sector_t sync_request(mddev_t *mddev, sector_t sector_nr, int *skipped, i atomic_inc(&rb2->remaining); r10_bio->mddev = mddev; set_bit(R10BIO_IsRecover, &r10_bio->state); - r10_bio->sector = raid10_find_virt(conf, sector_nr, i); + r10_bio->sector = sect; + raid10_find_phys(conf, r10_bio); + /* Need to check if this section will still be + * degraded + */ + for (j=0; jcopies;j++) { + int d = r10_bio->devs[j].devnum; + if (conf->mirrors[d].rdev == NULL || + test_bit(Faulty, &conf->mirrors[d].rdev->flags)) + still_degraded = 1; + } + must_sync = bitmap_start_sync(mddev->bitmap, sect, + &sync_blocks, still_degraded); + for (j=0; jcopies;j++) { int d = r10_bio->devs[j].devnum; if (conf->mirrors[d].rdev && @@ -1526,10 +1627,22 @@ static sector_t sync_request(mddev_t *mddev, sector_t sector_nr, int *skipped, i } else { /* resync. Schedule a read for every block at this virt offset */ int count = 0; + + if (!bitmap_start_sync(mddev->bitmap, sector_nr, + &sync_blocks, mddev->degraded) && + !conf->fullsync && !test_bit(MD_RECOVERY_REQUESTED, &mddev->recovery)) { + /* We can skip this block */ + *skipped = 1; + return sync_blocks + sectors_skipped; + } + if (sync_blocks < max_sync) + max_sync = sync_blocks; r10_bio = mempool_alloc(conf->r10buf_pool, GFP_NOIO); r10_bio->mddev = mddev; atomic_set(&r10_bio->remaining, 0); + raise_barrier(conf, 0); + conf->next_resync = sector_nr; r10_bio->master_bio = NULL; r10_bio->sector = sector_nr; @@ -1582,6 +1695,8 @@ static sector_t sync_request(mddev_t *mddev, sector_t sector_nr, int *skipped, i } nr_sectors = 0; + if (sector_nr + max_sync < max_sector) + max_sector = sector_nr + max_sync; do { struct page *page; int len = PAGE_SIZE; @@ -1821,6 +1936,26 @@ static int stop(mddev_t *mddev) return 0; } +static void raid10_quiesce(mddev_t *mddev, int state) +{ + conf_t *conf = mddev_to_conf(mddev); + + switch(state) { + case 1: + raise_barrier(conf, 0); + break; + case 0: + lower_barrier(conf); + break; + } + if (mddev->thread) { + if (mddev->bitmap) + mddev->thread->timeout = mddev->bitmap->daemon_sleep * HZ; + else + mddev->thread->timeout = MAX_SCHEDULE_TIMEOUT; + md_wakeup_thread(mddev->thread); + } +} static mdk_personality_t raid10_personality = { @@ -1835,6 +1970,7 @@ static mdk_personality_t raid10_personality = .hot_remove_disk= raid10_remove_disk, .spare_active = raid10_spare_active, .sync_request = sync_request, + .quiesce = raid10_quiesce, }; static int __init raid_init(void) diff --git a/include/linux/raid/raid10.h b/include/linux/raid/raid10.h index 08317b77802b..b660cbf628d8 100644 --- a/include/linux/raid/raid10.h +++ b/include/linux/raid/raid10.h @@ -35,13 +35,19 @@ struct r10_private_data_s { sector_t chunk_mask; struct list_head retry_list; - /* for use when syncing mirrors: */ + /* queue pending writes and submit them on unplug */ + struct bio_list pending_bio_list; + spinlock_t resync_lock; int nr_pending; int nr_waiting; int barrier; sector_t next_resync; + int fullsync; /* set to 1 if a full sync is needed, + * (fresh device added). + * Cleared when a sync completes. + */ wait_queue_head_t wait_barrier; @@ -100,4 +106,5 @@ struct r10bio_s { #define R10BIO_Uptodate 0 #define R10BIO_IsSync 1 #define R10BIO_IsRecover 2 +#define R10BIO_Degraded 3 #endif -- cgit v1.2.3-71-gd317 From ca65b73bd9c301d243df93780f7b26579e6c9204 Mon Sep 17 00:00:00 2001 From: NeilBrown Date: Fri, 6 Jan 2006 00:20:17 -0800 Subject: [PATCH] md: fix raid6 resync check/repair code raid6 currently does not check the P/Q syndromes when doing a resync, it just calculates the correct value and writes it. Doing the check can reduce writes (often to 0) for a resync, and it is needed to properly implement the echo check > sync_action operation. This patch implements the appropriate checks and tidies up some related code. It also allows raid6 user-requested resync to bypass the intent bitmap. Signed-off-by: Neil Brown Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- drivers/md/raid6main.c | 182 ++++++++++++++++++++++++++------------------- include/linux/raid/raid5.h | 2 + 2 files changed, 108 insertions(+), 76 deletions(-) (limited to 'include/linux') diff --git a/drivers/md/raid6main.c b/drivers/md/raid6main.c index 304455d236f9..52e8796bb8ac 100644 --- a/drivers/md/raid6main.c +++ b/drivers/md/raid6main.c @@ -805,7 +805,7 @@ static void compute_parity(struct stripe_head *sh, int method) } /* Compute one missing block */ -static void compute_block_1(struct stripe_head *sh, int dd_idx) +static void compute_block_1(struct stripe_head *sh, int dd_idx, int nozero) { raid6_conf_t *conf = sh->raid_conf; int i, count, disks = conf->raid_disks; @@ -821,7 +821,7 @@ static void compute_block_1(struct stripe_head *sh, int dd_idx) compute_parity(sh, UPDATE_PARITY); } else { ptr[0] = page_address(sh->dev[dd_idx].page); - memset(ptr[0], 0, STRIPE_SIZE); + if (!nozero) memset(ptr[0], 0, STRIPE_SIZE); count = 1; for (i = disks ; i--; ) { if (i == dd_idx || i == qd_idx) @@ -838,7 +838,8 @@ static void compute_block_1(struct stripe_head *sh, int dd_idx) } if (count != 1) xor_block(count, STRIPE_SIZE, ptr); - set_bit(R5_UPTODATE, &sh->dev[dd_idx].flags); + if (!nozero) set_bit(R5_UPTODATE, &sh->dev[dd_idx].flags); + else clear_bit(R5_UPTODATE, &sh->dev[dd_idx].flags); } } @@ -871,7 +872,7 @@ static void compute_block_2(struct stripe_head *sh, int dd_idx1, int dd_idx2) return; } else { /* We're missing D+Q; recompute D from P */ - compute_block_1(sh, (dd_idx1 == qd_idx) ? dd_idx2 : dd_idx1); + compute_block_1(sh, (dd_idx1 == qd_idx) ? dd_idx2 : dd_idx1, 0); compute_parity(sh, UPDATE_PARITY); /* Is this necessary? */ return; } @@ -982,6 +983,12 @@ static int add_stripe_bio(struct stripe_head *sh, struct bio *bi, int dd_idx, in } +static int page_is_zero(struct page *p) +{ + char *a = page_address(p); + return ((*(u32*)a) == 0 && + memcmp(a, a+4, STRIPE_SIZE-4)==0); +} /* * handle_stripe - do things to a stripe. * @@ -1000,7 +1007,7 @@ static int add_stripe_bio(struct stripe_head *sh, struct bio *bi, int dd_idx, in * */ -static void handle_stripe(struct stripe_head *sh) +static void handle_stripe(struct stripe_head *sh, struct page *tmp_page) { raid6_conf_t *conf = sh->raid_conf; int disks = conf->raid_disks; @@ -1228,7 +1235,7 @@ static void handle_stripe(struct stripe_head *sh) if (uptodate == disks-1) { PRINTK("Computing stripe %llu block %d\n", (unsigned long long)sh->sector, i); - compute_block_1(sh, i); + compute_block_1(sh, i, 0); uptodate++; } else if ( uptodate == disks-2 && failed >= 2 ) { /* Computing 2-failure is *very* expensive; only do it if failed >= 2 */ @@ -1323,7 +1330,7 @@ static void handle_stripe(struct stripe_head *sh) /* We have failed blocks and need to compute them */ switch ( failed ) { case 0: BUG(); - case 1: compute_block_1(sh, failed_num[0]); break; + case 1: compute_block_1(sh, failed_num[0], 0); break; case 2: compute_block_2(sh, failed_num[0], failed_num[1]); break; default: BUG(); /* This request should have been failed? */ } @@ -1338,12 +1345,10 @@ static void handle_stripe(struct stripe_head *sh) (unsigned long long)sh->sector, i); locked++; set_bit(R5_Wantwrite, &sh->dev[i].flags); -#if 0 /**** FIX: I don't understand the logic here... ****/ - if (!test_bit(R5_Insync, &sh->dev[i].flags) - || ((i==pd_idx || i==qd_idx) && failed == 0)) /* FIX? */ - set_bit(STRIPE_INSYNC, &sh->state); -#endif } + /* after a RECONSTRUCT_WRITE, the stripe MUST be in-sync */ + set_bit(STRIPE_INSYNC, &sh->state); + if (test_and_clear_bit(STRIPE_PREREAD_ACTIVE, &sh->state)) { atomic_dec(&conf->preread_active_stripes); if (atomic_read(&conf->preread_active_stripes) < IO_THRESHOLD) @@ -1356,79 +1361,97 @@ static void handle_stripe(struct stripe_head *sh) * Any reads will already have been scheduled, so we just see if enough data * is available */ - if (syncing && locked == 0 && - !test_bit(STRIPE_INSYNC, &sh->state) && failed <= 2) { - set_bit(STRIPE_HANDLE, &sh->state); -#if 0 /* RAID-6: Don't support CHECK PARITY yet */ - if (failed == 0) { - char *pagea; - if (uptodate != disks) - BUG(); - compute_parity(sh, CHECK_PARITY); - uptodate--; - pagea = page_address(sh->dev[pd_idx].page); - if ((*(u32*)pagea) == 0 && - !memcmp(pagea, pagea+4, STRIPE_SIZE-4)) { - /* parity is correct (on disc, not in buffer any more) */ - set_bit(STRIPE_INSYNC, &sh->state); - } - } -#endif - if (!test_bit(STRIPE_INSYNC, &sh->state)) { - int failed_needupdate[2]; - struct r5dev *adev, *bdev; - - if ( failed < 1 ) - failed_num[0] = pd_idx; - if ( failed < 2 ) - failed_num[1] = (failed_num[0] == qd_idx) ? pd_idx : qd_idx; + if (syncing && locked == 0 && !test_bit(STRIPE_INSYNC, &sh->state)) { + int update_p = 0, update_q = 0; + struct r5dev *dev; - failed_needupdate[0] = !test_bit(R5_UPTODATE, &sh->dev[failed_num[0]].flags); - failed_needupdate[1] = !test_bit(R5_UPTODATE, &sh->dev[failed_num[1]].flags); + set_bit(STRIPE_HANDLE, &sh->state); - PRINTK("sync: failed=%d num=%d,%d fnu=%u%u\n", - failed, failed_num[0], failed_num[1], failed_needupdate[0], failed_needupdate[1]); + BUG_ON(failed>2); + BUG_ON(uptodate < disks); + /* Want to check and possibly repair P and Q. + * However there could be one 'failed' device, in which + * case we can only check one of them, possibly using the + * other to generate missing data + */ -#if 0 /* RAID-6: This code seems to require that CHECK_PARITY destroys the uptodateness of the parity */ - /* should be able to compute the missing block(s) and write to spare */ - if ( failed_needupdate[0] ^ failed_needupdate[1] ) { - if (uptodate+1 != disks) - BUG(); - compute_block_1(sh, failed_needupdate[0] ? failed_num[0] : failed_num[1]); - uptodate++; - } else if ( failed_needupdate[0] & failed_needupdate[1] ) { - if (uptodate+2 != disks) - BUG(); - compute_block_2(sh, failed_num[0], failed_num[1]); - uptodate += 2; + /* If !tmp_page, we cannot do the calculations, + * but as we have set STRIPE_HANDLE, we will soon be called + * by stripe_handle with a tmp_page - just wait until then. + */ + if (tmp_page) { + if (failed == q_failed) { + /* The only possible failed device holds 'Q', so it makes + * sense to check P (If anything else were failed, we would + * have used P to recreate it). + */ + compute_block_1(sh, pd_idx, 1); + if (!page_is_zero(sh->dev[pd_idx].page)) { + compute_block_1(sh,pd_idx,0); + update_p = 1; + } + } + if (!q_failed && failed < 2) { + /* q is not failed, and we didn't use it to generate + * anything, so it makes sense to check it + */ + memcpy(page_address(tmp_page), + page_address(sh->dev[qd_idx].page), + STRIPE_SIZE); + compute_parity(sh, UPDATE_PARITY); + if (memcmp(page_address(tmp_page), + page_address(sh->dev[qd_idx].page), + STRIPE_SIZE)!= 0) { + clear_bit(STRIPE_INSYNC, &sh->state); + update_q = 1; + } + } + if (update_p || update_q) { + conf->mddev->resync_mismatches += STRIPE_SECTORS; + if (test_bit(MD_RECOVERY_CHECK, &conf->mddev->recovery)) + /* don't try to repair!! */ + update_p = update_q = 0; } -#else - compute_block_2(sh, failed_num[0], failed_num[1]); - uptodate += failed_needupdate[0] + failed_needupdate[1]; -#endif - if (uptodate != disks) - BUG(); + /* now write out any block on a failed drive, + * or P or Q if they need it + */ - PRINTK("Marking for sync stripe %llu blocks %d,%d\n", - (unsigned long long)sh->sector, failed_num[0], failed_num[1]); + if (failed == 2) { + dev = &sh->dev[failed_num[1]]; + locked++; + set_bit(R5_LOCKED, &dev->flags); + set_bit(R5_Wantwrite, &dev->flags); + set_bit(R5_Syncio, &dev->flags); + } + if (failed >= 1) { + dev = &sh->dev[failed_num[0]]; + locked++; + set_bit(R5_LOCKED, &dev->flags); + set_bit(R5_Wantwrite, &dev->flags); + set_bit(R5_Syncio, &dev->flags); + } - /**** FIX: Should we really do both of these unconditionally? ****/ - adev = &sh->dev[failed_num[0]]; - locked += !test_bit(R5_LOCKED, &adev->flags); - set_bit(R5_LOCKED, &adev->flags); - set_bit(R5_Wantwrite, &adev->flags); - bdev = &sh->dev[failed_num[1]]; - locked += !test_bit(R5_LOCKED, &bdev->flags); - set_bit(R5_LOCKED, &bdev->flags); + if (update_p) { + dev = &sh->dev[pd_idx]; + locked ++; + set_bit(R5_LOCKED, &dev->flags); + set_bit(R5_Wantwrite, &dev->flags); + set_bit(R5_Syncio, &dev->flags); + } + if (update_q) { + dev = &sh->dev[qd_idx]; + locked++; + set_bit(R5_LOCKED, &dev->flags); + set_bit(R5_Wantwrite, &dev->flags); + set_bit(R5_Syncio, &dev->flags); + } clear_bit(STRIPE_DEGRADED, &sh->state); - set_bit(R5_Wantwrite, &bdev->flags); set_bit(STRIPE_INSYNC, &sh->state); - set_bit(R5_Syncio, &adev->flags); - set_bit(R5_Syncio, &bdev->flags); } } + if (syncing && locked == 0 && test_bit(STRIPE_INSYNC, &sh->state)) { md_done_sync(conf->mddev, STRIPE_SECTORS,1); clear_bit(STRIPE_SYNCING, &sh->state); @@ -1664,7 +1687,7 @@ static int make_request (request_queue_t *q, struct bio * bi) } finish_wait(&conf->wait_for_overlap, &w); raid6_plug_device(conf); - handle_stripe(sh); + handle_stripe(sh, NULL); release_stripe(sh); } else { /* cannot get stripe for read-ahead, just give-up */ @@ -1728,6 +1751,7 @@ static sector_t sync_request(mddev_t *mddev, sector_t sector_nr, int *skipped, i return rv; } if (!bitmap_start_sync(mddev->bitmap, sector_nr, &sync_blocks, 1) && + !test_bit(MD_RECOVERY_REQUESTED, &mddev->recovery) && !conf->fullsync && sync_blocks >= STRIPE_SECTORS) { /* we can skip this block, and probably more */ sync_blocks /= STRIPE_SECTORS; @@ -1765,7 +1789,7 @@ static sector_t sync_request(mddev_t *mddev, sector_t sector_nr, int *skipped, i clear_bit(STRIPE_INSYNC, &sh->state); spin_unlock(&sh->lock); - handle_stripe(sh); + handle_stripe(sh, NULL); release_stripe(sh); return STRIPE_SECTORS; @@ -1821,7 +1845,7 @@ static void raid6d (mddev_t *mddev) spin_unlock_irq(&conf->device_lock); handled++; - handle_stripe(sh); + handle_stripe(sh, conf->spare_page); release_stripe(sh); spin_lock_irq(&conf->device_lock); @@ -1860,6 +1884,10 @@ static int run(mddev_t *mddev) goto abort; memset(conf->stripe_hashtbl, 0, HASH_PAGES * PAGE_SIZE); + conf->spare_page = alloc_page(GFP_KERNEL); + if (!conf->spare_page) + goto abort; + spin_lock_init(&conf->device_lock); init_waitqueue_head(&conf->wait_for_stripe); init_waitqueue_head(&conf->wait_for_overlap); @@ -1996,6 +2024,8 @@ static int run(mddev_t *mddev) abort: if (conf) { print_raid6_conf(conf); + if (conf->spare_page) + page_cache_release(conf->spare_page); if (conf->stripe_hashtbl) free_pages((unsigned long) conf->stripe_hashtbl, HASH_PAGES_ORDER); diff --git a/include/linux/raid/raid5.h b/include/linux/raid/raid5.h index f025ba6fb14c..e9c1c0d4f90b 100644 --- a/include/linux/raid/raid5.h +++ b/include/linux/raid/raid5.h @@ -228,6 +228,8 @@ struct raid5_private_data { * Cleared when a sync completes. */ + struct page *spare_page; /* Used when checking P/Q in raid6 */ + /* * Free stripes pool */ -- cgit v1.2.3-71-gd317 From ddaf22abaa831763e75775e6d4c7693504237997 Mon Sep 17 00:00:00 2001 From: NeilBrown Date: Fri, 6 Jan 2006 00:20:19 -0800 Subject: [PATCH] md: attempt to auto-correct read errors in raid1 On a read-error we suspend the array, then synchronously read the block from other arrays until we find one where we can read it. Then we try writing the good data back everywhere and make sure it works. If any write or subsequent read fails, only then do we fail the device out of the array. To be able to suspend the array, we need to also keep track of how many requests are queued for handling by raid1d. Signed-off-by: Neil Brown Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- drivers/md/md.c | 1 + drivers/md/raid1.c | 115 +++++++++++++++++++++++++++++++++++++++++---- include/linux/raid/raid1.h | 3 ++ 3 files changed, 109 insertions(+), 10 deletions(-) (limited to 'include/linux') diff --git a/drivers/md/md.c b/drivers/md/md.c index 64e7da3701a5..1364a1c97e6f 100644 --- a/drivers/md/md.c +++ b/drivers/md/md.c @@ -461,6 +461,7 @@ int sync_page_io(struct block_device *bdev, sector_t sector, int size, bio_put(bio); return ret; } +EXPORT_SYMBOL(sync_page_io); static int read_disk_sb(mdk_rdev_t * rdev, int size) { diff --git a/drivers/md/raid1.c b/drivers/md/raid1.c index c618015f07f6..b3856db8d6c2 100644 --- a/drivers/md/raid1.c +++ b/drivers/md/raid1.c @@ -191,6 +191,7 @@ static void reschedule_retry(r1bio_t *r1_bio) spin_lock_irqsave(&conf->device_lock, flags); list_add(&r1_bio->retry_list, &conf->retry_list); + conf->nr_queued ++; spin_unlock_irqrestore(&conf->device_lock, flags); wake_up(&conf->wait_barrier); @@ -245,9 +246,9 @@ static int raid1_end_read_request(struct bio *bio, unsigned int bytes_done, int /* * this branch is our 'one mirror IO has finished' event handler: */ - if (!uptodate) - md_error(r1_bio->mddev, conf->mirrors[mirror].rdev); - else + update_head_pos(mirror, r1_bio); + + if (uptodate || conf->working_disks <= 1) { /* * Set R1BIO_Uptodate in our master bio, so that * we will return a good error code for to the higher @@ -259,14 +260,8 @@ static int raid1_end_read_request(struct bio *bio, unsigned int bytes_done, int */ set_bit(R1BIO_Uptodate, &r1_bio->state); - update_head_pos(mirror, r1_bio); - - /* - * we have only one bio on the read side - */ - if (uptodate) raid_end_bio_io(r1_bio); - else { + } else { /* * oops, read error: */ @@ -653,6 +648,32 @@ static void allow_barrier(conf_t *conf) wake_up(&conf->wait_barrier); } +static void freeze_array(conf_t *conf) +{ + /* stop syncio and normal IO and wait for everything to + * go quite. + * We increment barrier and nr_waiting, and then + * wait until barrier+nr_pending match nr_queued+2 + */ + spin_lock_irq(&conf->resync_lock); + conf->barrier++; + conf->nr_waiting++; + wait_event_lock_irq(conf->wait_barrier, + conf->barrier+conf->nr_pending == conf->nr_queued+2, + conf->resync_lock, + raid1_unplug(conf->mddev->queue)); + spin_unlock_irq(&conf->resync_lock); +} +static void unfreeze_array(conf_t *conf) +{ + /* reverse the effect of the freeze */ + spin_lock_irq(&conf->resync_lock); + conf->barrier--; + conf->nr_waiting--; + wake_up(&conf->wait_barrier); + spin_unlock_irq(&conf->resync_lock); +} + /* duplicate the data pages for behind I/O */ static struct page **alloc_behind_pages(struct bio *bio) @@ -1196,6 +1217,7 @@ static void raid1d(mddev_t *mddev) break; r1_bio = list_entry(head->prev, r1bio_t, retry_list); list_del(head->prev); + conf->nr_queued--; spin_unlock_irqrestore(&conf->device_lock, flags); mddev = r1_bio->mddev; @@ -1235,6 +1257,74 @@ static void raid1d(mddev_t *mddev) } } else { int disk; + + /* we got a read error. Maybe the drive is bad. Maybe just + * the block and we can fix it. + * We freeze all other IO, and try reading the block from + * other devices. When we find one, we re-write + * and check it that fixes the read error. + * This is all done synchronously while the array is + * frozen + */ + sector_t sect = r1_bio->sector; + int sectors = r1_bio->sectors; + freeze_array(conf); + while(sectors) { + int s = sectors; + int d = r1_bio->read_disk; + int success = 0; + + if (s > (PAGE_SIZE>>9)) + s = PAGE_SIZE >> 9; + + do { + rdev = conf->mirrors[d].rdev; + if (rdev && + test_bit(In_sync, &rdev->flags) && + sync_page_io(rdev->bdev, + sect + rdev->data_offset, + s<<9, + conf->tmppage, READ)) + success = 1; + else { + d++; + if (d == conf->raid_disks) + d = 0; + } + } while (!success && d != r1_bio->read_disk); + + if (success) { + /* write it back and re-read */ + while (d != r1_bio->read_disk) { + if (d==0) + d = conf->raid_disks; + d--; + rdev = conf->mirrors[d].rdev; + if (rdev && + test_bit(In_sync, &rdev->flags)) { + if (sync_page_io(rdev->bdev, + sect + rdev->data_offset, + s<<9, conf->tmppage, WRITE) == 0 || + sync_page_io(rdev->bdev, + sect + rdev->data_offset, + s<<9, conf->tmppage, READ) == 0) { + /* Well, this device is dead */ + md_error(mddev, rdev); + } + } + } + } else { + /* Cannot read from anywhere -- bye bye array */ + md_error(mddev, conf->mirrors[r1_bio->read_disk].rdev); + break; + } + sectors -= s; + sect += s; + } + + + unfreeze_array(conf); + bio = r1_bio->bios[r1_bio->read_disk]; if ((disk=read_balance(conf, r1_bio)) == -1) { printk(KERN_ALERT "raid1: %s: unrecoverable I/O" @@ -1529,6 +1619,10 @@ static int run(mddev_t *mddev) memset(conf->mirrors, 0, sizeof(struct mirror_info)*mddev->raid_disks); + conf->tmppage = alloc_page(GFP_KERNEL); + if (!conf->tmppage) + goto out_no_mem; + conf->poolinfo = kmalloc(sizeof(*conf->poolinfo), GFP_KERNEL); if (!conf->poolinfo) goto out_no_mem; @@ -1635,6 +1729,7 @@ out_free_conf: if (conf->r1bio_pool) mempool_destroy(conf->r1bio_pool); kfree(conf->mirrors); + __free_page(conf->tmppage); kfree(conf->poolinfo); kfree(conf); mddev->private = NULL; diff --git a/include/linux/raid/raid1.h b/include/linux/raid/raid1.h index c55674252533..cbe4238d3f9f 100644 --- a/include/linux/raid/raid1.h +++ b/include/linux/raid/raid1.h @@ -46,6 +46,7 @@ struct r1_private_data_s { spinlock_t resync_lock; int nr_pending; int nr_waiting; + int nr_queued; int barrier; sector_t next_resync; int fullsync; /* set to 1 if a full sync is needed, @@ -57,6 +58,8 @@ struct r1_private_data_s { struct pool_info *poolinfo; + struct page *tmppage; + mempool_t *r1bio_pool; mempool_t *r1buf_pool; }; -- cgit v1.2.3-71-gd317 From cf30a473a02901fe4db37abc0b0fa26dd5ba3f72 Mon Sep 17 00:00:00 2001 From: NeilBrown Date: Fri, 6 Jan 2006 00:20:23 -0800 Subject: [PATCH] md: handle errors when read-only Signed-off-by: Neil Brown Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- drivers/md/raid1.c | 18 +++++++++++------- include/linux/raid/raid1.h | 7 +++++++ 2 files changed, 18 insertions(+), 7 deletions(-) (limited to 'include/linux') diff --git a/drivers/md/raid1.c b/drivers/md/raid1.c index 14a8fe0349c7..a8bc93d6ff63 100644 --- a/drivers/md/raid1.c +++ b/drivers/md/raid1.c @@ -154,7 +154,7 @@ static void put_all_bios(conf_t *conf, r1bio_t *r1_bio) for (i = 0; i < conf->raid_disks; i++) { struct bio **bio = r1_bio->bios + i; - if (*bio) + if (*bio && *bio != IO_BLOCKED) bio_put(*bio); *bio = NULL; } @@ -419,11 +419,13 @@ static int read_balance(conf_t *conf, r1bio_t *r1_bio) new_disk = 0; for (rdev = rcu_dereference(conf->mirrors[new_disk].rdev); + r1_bio->bios[new_disk] == IO_BLOCKED || !rdev || !test_bit(In_sync, &rdev->flags) || test_bit(WriteMostly, &rdev->flags); rdev = rcu_dereference(conf->mirrors[++new_disk].rdev)) { - if (rdev && test_bit(In_sync, &rdev->flags)) + if (rdev && test_bit(In_sync, &rdev->flags) && + r1_bio->bios[new_disk] != IO_BLOCKED) wonly_disk = new_disk; if (new_disk == conf->raid_disks - 1) { @@ -437,11 +439,13 @@ static int read_balance(conf_t *conf, r1bio_t *r1_bio) /* make sure the disk is operational */ for (rdev = rcu_dereference(conf->mirrors[new_disk].rdev); + r1_bio->bios[new_disk] == IO_BLOCKED || !rdev || !test_bit(In_sync, &rdev->flags) || test_bit(WriteMostly, &rdev->flags); rdev = rcu_dereference(conf->mirrors[new_disk].rdev)) { - if (rdev && test_bit(In_sync, &rdev->flags)) + if (rdev && test_bit(In_sync, &rdev->flags) && + r1_bio->bios[new_disk] != IO_BLOCKED) wonly_disk = new_disk; if (new_disk <= 0) @@ -478,7 +482,7 @@ static int read_balance(conf_t *conf, r1bio_t *r1_bio) rdev = rcu_dereference(conf->mirrors[disk].rdev); - if (!rdev || + if (!rdev || r1_bio->bios[disk] == IO_BLOCKED || !test_bit(In_sync, &rdev->flags) || test_bit(WriteMostly, &rdev->flags)) continue; @@ -1335,7 +1339,7 @@ static void raid1d(mddev_t *mddev) sector_t sect = r1_bio->sector; int sectors = r1_bio->sectors; freeze_array(conf); - while(sectors) { + if (mddev->ro == 0) while(sectors) { int s = sectors; int d = r1_bio->read_disk; int success = 0; @@ -1388,7 +1392,6 @@ static void raid1d(mddev_t *mddev) sect += s; } - unfreeze_array(conf); bio = r1_bio->bios[r1_bio->read_disk]; @@ -1399,7 +1402,8 @@ static void raid1d(mddev_t *mddev) (unsigned long long)r1_bio->sector); raid_end_bio_io(r1_bio); } else { - r1_bio->bios[r1_bio->read_disk] = NULL; + r1_bio->bios[r1_bio->read_disk] = + mddev->ro ? IO_BLOCKED : NULL; r1_bio->read_disk = disk; bio_put(bio); bio = bio_clone(r1_bio->master_bio, GFP_NOIO); diff --git a/include/linux/raid/raid1.h b/include/linux/raid/raid1.h index cbe4238d3f9f..9d5494aaac0f 100644 --- a/include/linux/raid/raid1.h +++ b/include/linux/raid/raid1.h @@ -109,6 +109,13 @@ struct r1bio_s { /* DO NOT PUT ANY NEW FIELDS HERE - bios array is contiguously alloced*/ }; +/* when we get a read error on a read-only array, we redirect to another + * device without failing the first device, or trying to over-write to + * correct the read error. To keep track of bad blocks on a per-bio + * level, we store IO_BLOCKED in the appropriate 'bios' pointer + */ +#define IO_BLOCKED ((struct bio*)1) + /* bits for r1bio.state */ #define R1BIO_Uptodate 0 #define R1BIO_IsSync 1 -- cgit v1.2.3-71-gd317 From 9910f16af35419a5382fa7850eecc220103036fa Mon Sep 17 00:00:00 2001 From: NeilBrown Date: Fri, 6 Jan 2006 00:20:24 -0800 Subject: [PATCH] md: fix up some rdev rcu locking in raid5/6 There is this "FIXME" comment with a typo in it!! that been annoying me for days, so I just had to remove it. conf->disks[i].rdev should only be accessed if - we know we hold a reference or - the mddev->reconfig_sem is down or - we have a rcu_readlock handle_stripe was referencing rdev in three places without any of these. For the first two, get an rcu_readlock. For the last, the same access (md_sync_acct call) is made a little later after the rdev has been claimed under and rcu_readlock, if R5_Syncio is set. So just use that access... However R5_Syncio isn't really needed as the 'syncing' variable contains the same information. So use that instead. Issues, comment, and fix are identical in raid5 and raid6. Signed-off-by: Neil Brown Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- drivers/md/raid5.c | 16 ++++++++-------- drivers/md/raid6main.c | 19 ++++++++----------- include/linux/raid/raid5.h | 1 - 3 files changed, 16 insertions(+), 20 deletions(-) (limited to 'include/linux') diff --git a/drivers/md/raid5.c b/drivers/md/raid5.c index 0d016a844ec6..0222ba1a6d35 100644 --- a/drivers/md/raid5.c +++ b/drivers/md/raid5.c @@ -961,11 +961,11 @@ static void handle_stripe(struct stripe_head *sh) syncing = test_bit(STRIPE_SYNCING, &sh->state); /* Now to look around and see what can be done */ + rcu_read_lock(); for (i=disks; i--; ) { mdk_rdev_t *rdev; dev = &sh->dev[i]; clear_bit(R5_Insync, &dev->flags); - clear_bit(R5_Syncio, &dev->flags); PRINTK("check %d: state 0x%lx read %p write %p written %p\n", i, dev->flags, dev->toread, dev->towrite, dev->written); @@ -1004,7 +1004,7 @@ static void handle_stripe(struct stripe_head *sh) non_overwrite++; } if (dev->written) written++; - rdev = conf->disks[i].rdev; /* FIXME, should I be looking rdev */ + rdev = rcu_dereference(conf->disks[i].rdev); if (!rdev || !test_bit(In_sync, &rdev->flags)) { /* The ReadError flag will just be confusing now */ clear_bit(R5_ReadError, &dev->flags); @@ -1017,6 +1017,7 @@ static void handle_stripe(struct stripe_head *sh) } else set_bit(R5_Insync, &dev->flags); } + rcu_read_unlock(); PRINTK("locked=%d uptodate=%d to_read=%d" " to_write=%d failed=%d failed_num=%d\n", locked, uptodate, to_read, to_write, failed, failed_num); @@ -1028,10 +1029,13 @@ static void handle_stripe(struct stripe_head *sh) int bitmap_end = 0; if (test_bit(R5_ReadError, &sh->dev[i].flags)) { - mdk_rdev_t *rdev = conf->disks[i].rdev; + mdk_rdev_t *rdev; + rcu_read_lock(); + rdev = rcu_dereference(conf->disks[i].rdev); if (rdev && test_bit(In_sync, &rdev->flags)) /* multiple read failures in one stripe */ md_error(conf->mddev, rdev); + rcu_read_unlock(); } spin_lock_irq(&conf->device_lock); @@ -1180,9 +1184,6 @@ static void handle_stripe(struct stripe_head *sh) locked++; PRINTK("Reading block %d (sync=%d)\n", i, syncing); - if (syncing) - md_sync_acct(conf->disks[i].rdev->bdev, - STRIPE_SECTORS); } } } @@ -1326,7 +1327,6 @@ static void handle_stripe(struct stripe_head *sh) clear_bit(STRIPE_DEGRADED, &sh->state); locked++; set_bit(STRIPE_INSYNC, &sh->state); - set_bit(R5_Syncio, &dev->flags); } } if (syncing && locked == 0 && test_bit(STRIPE_INSYNC, &sh->state)) { @@ -1392,7 +1392,7 @@ static void handle_stripe(struct stripe_head *sh) rcu_read_unlock(); if (rdev) { - if (test_bit(R5_Syncio, &sh->dev[i].flags)) + if (syncing) md_sync_acct(rdev->bdev, STRIPE_SECTORS); bi->bi_bdev = rdev->bdev; diff --git a/drivers/md/raid6main.c b/drivers/md/raid6main.c index 7a51553d8be5..b5b7a8d0b165 100644 --- a/drivers/md/raid6main.c +++ b/drivers/md/raid6main.c @@ -1060,11 +1060,11 @@ static void handle_stripe(struct stripe_head *sh, struct page *tmp_page) syncing = test_bit(STRIPE_SYNCING, &sh->state); /* Now to look around and see what can be done */ + rcu_read_lock(); for (i=disks; i--; ) { mdk_rdev_t *rdev; dev = &sh->dev[i]; clear_bit(R5_Insync, &dev->flags); - clear_bit(R5_Syncio, &dev->flags); PRINTK("check %d: state 0x%lx read %p write %p written %p\n", i, dev->flags, dev->toread, dev->towrite, dev->written); @@ -1103,7 +1103,7 @@ static void handle_stripe(struct stripe_head *sh, struct page *tmp_page) non_overwrite++; } if (dev->written) written++; - rdev = conf->disks[i].rdev; /* FIXME, should I be looking rdev */ + rdev = rcu_dereference(conf->disks[i].rdev); if (!rdev || !test_bit(In_sync, &rdev->flags)) { /* The ReadError flag will just be confusing now */ clear_bit(R5_ReadError, &dev->flags); @@ -1117,6 +1117,7 @@ static void handle_stripe(struct stripe_head *sh, struct page *tmp_page) } else set_bit(R5_Insync, &dev->flags); } + rcu_read_unlock(); PRINTK("locked=%d uptodate=%d to_read=%d" " to_write=%d failed=%d failed_num=%d,%d\n", locked, uptodate, to_read, to_write, failed, @@ -1129,10 +1130,13 @@ static void handle_stripe(struct stripe_head *sh, struct page *tmp_page) int bitmap_end = 0; if (test_bit(R5_ReadError, &sh->dev[i].flags)) { - mdk_rdev_t *rdev = conf->disks[i].rdev; + mdk_rdev_t *rdev; + rcu_read_lock(); + rdev = rcu_dereference(conf->disks[i].rdev); if (rdev && test_bit(In_sync, &rdev->flags)) /* multiple read failures in one stripe */ md_error(conf->mddev, rdev); + rcu_read_unlock(); } spin_lock_irq(&conf->device_lock); @@ -1307,9 +1311,6 @@ static void handle_stripe(struct stripe_head *sh, struct page *tmp_page) locked++; PRINTK("Reading block %d (sync=%d)\n", i, syncing); - if (syncing) - md_sync_acct(conf->disks[i].rdev->bdev, - STRIPE_SECTORS); } } } @@ -1463,14 +1464,12 @@ static void handle_stripe(struct stripe_head *sh, struct page *tmp_page) locked++; set_bit(R5_LOCKED, &dev->flags); set_bit(R5_Wantwrite, &dev->flags); - set_bit(R5_Syncio, &dev->flags); } if (failed >= 1) { dev = &sh->dev[failed_num[0]]; locked++; set_bit(R5_LOCKED, &dev->flags); set_bit(R5_Wantwrite, &dev->flags); - set_bit(R5_Syncio, &dev->flags); } if (update_p) { @@ -1478,14 +1477,12 @@ static void handle_stripe(struct stripe_head *sh, struct page *tmp_page) locked ++; set_bit(R5_LOCKED, &dev->flags); set_bit(R5_Wantwrite, &dev->flags); - set_bit(R5_Syncio, &dev->flags); } if (update_q) { dev = &sh->dev[qd_idx]; locked++; set_bit(R5_LOCKED, &dev->flags); set_bit(R5_Wantwrite, &dev->flags); - set_bit(R5_Syncio, &dev->flags); } clear_bit(STRIPE_DEGRADED, &sh->state); @@ -1557,7 +1554,7 @@ static void handle_stripe(struct stripe_head *sh, struct page *tmp_page) rcu_read_unlock(); if (rdev) { - if (test_bit(R5_Syncio, &sh->dev[i].flags)) + if (syncing) md_sync_acct(rdev->bdev, STRIPE_SECTORS); bi->bi_bdev = rdev->bdev; diff --git a/include/linux/raid/raid5.h b/include/linux/raid/raid5.h index e9c1c0d4f90b..28fcd7533ac4 100644 --- a/include/linux/raid/raid5.h +++ b/include/linux/raid/raid5.h @@ -152,7 +152,6 @@ struct stripe_head { #define R5_Insync 3 /* rdev && rdev->in_sync at start */ #define R5_Wantread 4 /* want to schedule a read */ #define R5_Wantwrite 5 -#define R5_Syncio 6 /* this io need to be accounted as resync io */ #define R5_Overlap 7 /* There is a pending overlapping request on this block */ #define R5_ReadError 8 /* seen a read error here recently */ #define R5_ReWrite 9 /* have tried to over-write the readerror */ -- cgit v1.2.3-71-gd317 From 4443ae10ca15d07922ceda622f03db8865fa3d13 Mon Sep 17 00:00:00 2001 From: NeilBrown Date: Fri, 6 Jan 2006 00:20:28 -0800 Subject: [PATCH] md: auto-correct correctable read errors in raid10 Largely just a cross-port from raid1. Signed-off-by: Neil Brown Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- drivers/md/raid10.c | 127 ++++++++++++++++++++++++++++++++++++++------ include/linux/raid/raid10.h | 2 + 2 files changed, 114 insertions(+), 15 deletions(-) (limited to 'include/linux') diff --git a/drivers/md/raid10.c b/drivers/md/raid10.c index 1fa70c34b7d2..64bb4ddc6798 100644 --- a/drivers/md/raid10.c +++ b/drivers/md/raid10.c @@ -209,6 +209,7 @@ static void reschedule_retry(r10bio_t *r10_bio) spin_lock_irqsave(&conf->device_lock, flags); list_add(&r10_bio->retry_list, &conf->retry_list); + conf->nr_queued ++; spin_unlock_irqrestore(&conf->device_lock, flags); md_wakeup_thread(mddev->thread); @@ -254,9 +255,9 @@ static int raid10_end_read_request(struct bio *bio, unsigned int bytes_done, int /* * this branch is our 'one mirror IO has finished' event handler: */ - if (!uptodate) - md_error(r10_bio->mddev, conf->mirrors[dev].rdev); - else + update_head_pos(slot, r10_bio); + + if (uptodate) { /* * Set R10BIO_Uptodate in our master bio, so that * we will return a good error code to the higher @@ -267,15 +268,8 @@ static int raid10_end_read_request(struct bio *bio, unsigned int bytes_done, int * wait for the 'master' bio. */ set_bit(R10BIO_Uptodate, &r10_bio->state); - - update_head_pos(slot, r10_bio); - - /* - * we have only one bio on the read side - */ - if (uptodate) raid_end_bio_io(r10_bio); - else { + } else { /* * oops, read error: */ @@ -714,6 +708,33 @@ static void allow_barrier(conf_t *conf) wake_up(&conf->wait_barrier); } +static void freeze_array(conf_t *conf) +{ + /* stop syncio and normal IO and wait for everything to + * go quite. + * We increment barrier and nr_waiting, and then + * wait until barrier+nr_pending match nr_queued+2 + */ + spin_lock_irq(&conf->resync_lock); + conf->barrier++; + conf->nr_waiting++; + wait_event_lock_irq(conf->wait_barrier, + conf->barrier+conf->nr_pending == conf->nr_queued+2, + conf->resync_lock, + raid10_unplug(conf->mddev->queue)); + spin_unlock_irq(&conf->resync_lock); +} + +static void unfreeze_array(conf_t *conf) +{ + /* reverse the effect of the freeze */ + spin_lock_irq(&conf->resync_lock); + conf->barrier--; + conf->nr_waiting--; + wake_up(&conf->wait_barrier); + spin_unlock_irq(&conf->resync_lock); +} + static int make_request(request_queue_t *q, struct bio * bio) { mddev_t *mddev = q->queuedata; @@ -1338,6 +1359,7 @@ static void raid10d(mddev_t *mddev) break; r10_bio = list_entry(head->prev, r10bio_t, retry_list); list_del(head->prev); + conf->nr_queued--; spin_unlock_irqrestore(&conf->device_lock, flags); mddev = r10_bio->mddev; @@ -1350,6 +1372,78 @@ static void raid10d(mddev_t *mddev) unplug = 1; } else { int mirror; + /* we got a read error. Maybe the drive is bad. Maybe just + * the block and we can fix it. + * We freeze all other IO, and try reading the block from + * other devices. When we find one, we re-write + * and check it that fixes the read error. + * This is all done synchronously while the array is + * frozen. + */ + int sect = 0; /* Offset from r10_bio->sector */ + int sectors = r10_bio->sectors; + freeze_array(conf); + if (mddev->ro == 0) while(sectors) { + int s = sectors; + int sl = r10_bio->read_slot; + int success = 0; + + if (s > (PAGE_SIZE>>9)) + s = PAGE_SIZE >> 9; + + do { + int d = r10_bio->devs[sl].devnum; + rdev = conf->mirrors[d].rdev; + if (rdev && + test_bit(In_sync, &rdev->flags) && + sync_page_io(rdev->bdev, + r10_bio->devs[sl].addr + + sect + rdev->data_offset, + s<<9, + conf->tmppage, READ)) + success = 1; + else { + sl++; + if (sl == conf->copies) + sl = 0; + } + } while (!success && sl != r10_bio->read_slot); + + if (success) { + /* write it back and re-read */ + while (sl != r10_bio->read_slot) { + int d; + if (sl==0) + sl = conf->copies; + sl--; + d = r10_bio->devs[sl].devnum; + rdev = conf->mirrors[d].rdev; + if (rdev && + test_bit(In_sync, &rdev->flags)) { + if (sync_page_io(rdev->bdev, + r10_bio->devs[sl].addr + + sect + rdev->data_offset, + s<<9, conf->tmppage, WRITE) == 0 || + sync_page_io(rdev->bdev, + r10_bio->devs[sl].addr + + sect + rdev->data_offset, + s<<9, conf->tmppage, READ) == 0) { + /* Well, this device is dead */ + md_error(mddev, rdev); + } + } + } + } else { + /* Cannot read from anywhere -- bye bye array */ + md_error(mddev, conf->mirrors[r10_bio->devs[r10_bio->read_slot].devnum].rdev); + break; + } + sectors -= s; + sect += s; + } + + unfreeze_array(conf); + bio = r10_bio->devs[r10_bio->read_slot].bio; r10_bio->devs[r10_bio->read_slot].bio = NULL; bio_put(bio); @@ -1793,22 +1887,24 @@ static int run(mddev_t *mddev) * bookkeeping area. [whatever we allocate in run(), * should be freed in stop()] */ - conf = kmalloc(sizeof(conf_t), GFP_KERNEL); + conf = kzalloc(sizeof(conf_t), GFP_KERNEL); mddev->private = conf; if (!conf) { printk(KERN_ERR "raid10: couldn't allocate memory for %s\n", mdname(mddev)); goto out; } - memset(conf, 0, sizeof(*conf)); - conf->mirrors = kmalloc(sizeof(struct mirror_info)*mddev->raid_disks, + conf->mirrors = kzalloc(sizeof(struct mirror_info)*mddev->raid_disks, GFP_KERNEL); if (!conf->mirrors) { printk(KERN_ERR "raid10: couldn't allocate memory for %s\n", mdname(mddev)); goto out_free_conf; } - memset(conf->mirrors, 0, sizeof(struct mirror_info)*mddev->raid_disks); + + conf->tmppage = alloc_page(GFP_KERNEL); + if (!conf->tmppage) + goto out_free_conf; conf->near_copies = nc; conf->far_copies = fc; @@ -1918,6 +2014,7 @@ static int run(mddev_t *mddev) out_free_conf: if (conf->r10bio_pool) mempool_destroy(conf->r10bio_pool); + put_page(conf->tmppage); kfree(conf->mirrors); kfree(conf); mddev->private = NULL; diff --git a/include/linux/raid/raid10.h b/include/linux/raid/raid10.h index b660cbf628d8..dfa528385e3f 100644 --- a/include/linux/raid/raid10.h +++ b/include/linux/raid/raid10.h @@ -42,6 +42,7 @@ struct r10_private_data_s { spinlock_t resync_lock; int nr_pending; int nr_waiting; + int nr_queued; int barrier; sector_t next_resync; int fullsync; /* set to 1 if a full sync is needed, @@ -53,6 +54,7 @@ struct r10_private_data_s { mempool_t *r10bio_pool; mempool_t *r10buf_pool; + struct page *tmppage; }; typedef struct r10_private_data_s conf_t; -- cgit v1.2.3-71-gd317 From 0eb3ff12aa8a12538ef681dc83f4361636a0699f Mon Sep 17 00:00:00 2001 From: NeilBrown Date: Fri, 6 Jan 2006 00:20:29 -0800 Subject: [PATCH] md: raid10 read-error handling - resync and read-only Add in correct read-error handling for resync and read-only situations. When read-only, we don't over-write, so we need to mark the failed drive in the r10_bio so we don't re-try it. During resync, we always read all blocks, so if there is a read error, we simply over-write it with the good block that we found (assuming we found one). Note that the recovery case still isn't handled in an interesting way. There is nothing useful to do for the 2-copies case. If there are 3 or more copies, then we could try reading from one of the non-missing copies, but this is a bit complicated and very rarely would be used, so I'm leaving it for now. Signed-off-by: Neil Brown Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- drivers/md/raid10.c | 56 ++++++++++++++++++++++++++++----------------- include/linux/raid/raid10.h | 7 ++++++ 2 files changed, 42 insertions(+), 21 deletions(-) (limited to 'include/linux') diff --git a/drivers/md/raid10.c b/drivers/md/raid10.c index 64bb4ddc6798..3f8df2ecbae3 100644 --- a/drivers/md/raid10.c +++ b/drivers/md/raid10.c @@ -172,7 +172,7 @@ static void put_all_bios(conf_t *conf, r10bio_t *r10_bio) for (i = 0; i < conf->copies; i++) { struct bio **bio = & r10_bio->devs[i].bio; - if (*bio) + if (*bio && *bio != IO_BLOCKED) bio_put(*bio); *bio = NULL; } @@ -500,6 +500,7 @@ static int read_balance(conf_t *conf, r10bio_t *r10_bio) disk = r10_bio->devs[slot].devnum; while ((rdev = rcu_dereference(conf->mirrors[disk].rdev)) == NULL || + r10_bio->devs[slot].bio == IO_BLOCKED || !test_bit(In_sync, &rdev->flags)) { slot++; if (slot == conf->copies) { @@ -517,6 +518,7 @@ static int read_balance(conf_t *conf, r10bio_t *r10_bio) slot = 0; disk = r10_bio->devs[slot].devnum; while ((rdev=rcu_dereference(conf->mirrors[disk].rdev)) == NULL || + r10_bio->devs[slot].bio == IO_BLOCKED || !test_bit(In_sync, &rdev->flags)) { slot ++; if (slot == conf->copies) { @@ -537,6 +539,7 @@ static int read_balance(conf_t *conf, r10bio_t *r10_bio) if ((rdev=rcu_dereference(conf->mirrors[ndisk].rdev)) == NULL || + r10_bio->devs[nslot].bio == IO_BLOCKED || !test_bit(In_sync, &rdev->flags)) continue; @@ -1104,7 +1107,6 @@ abort: static int end_sync_read(struct bio *bio, unsigned int bytes_done, int error) { - int uptodate = test_bit(BIO_UPTODATE, &bio->bi_flags); r10bio_t * r10_bio = (r10bio_t *)(bio->bi_private); conf_t *conf = mddev_to_conf(r10_bio->mddev); int i,d; @@ -1119,7 +1121,10 @@ static int end_sync_read(struct bio *bio, unsigned int bytes_done, int error) BUG(); update_head_pos(i, r10_bio); d = r10_bio->devs[i].devnum; - if (!uptodate) + + if (test_bit(BIO_UPTODATE, &bio->bi_flags)) + set_bit(R10BIO_Uptodate, &r10_bio->state); + else if (!test_bit(MD_RECOVERY_SYNC, &conf->mddev->recovery)) md_error(r10_bio->mddev, conf->mirrors[d].rdev); @@ -1209,25 +1214,30 @@ static void sync_request_write(mddev_t *mddev, r10bio_t *r10_bio) fbio = r10_bio->devs[i].bio; /* now find blocks with errors */ - for (i=first+1 ; i < conf->copies ; i++) { - int vcnt, j, d; + for (i=0 ; i < conf->copies ; i++) { + int j, d; + int vcnt = r10_bio->sectors >> (PAGE_SHIFT-9); - if (!test_bit(BIO_UPTODATE, &r10_bio->devs[i].bio->bi_flags)) - continue; - /* We know that the bi_io_vec layout is the same for - * both 'first' and 'i', so we just compare them. - * All vec entries are PAGE_SIZE; - */ tbio = r10_bio->devs[i].bio; - vcnt = r10_bio->sectors >> (PAGE_SHIFT-9); - for (j = 0; j < vcnt; j++) - if (memcmp(page_address(fbio->bi_io_vec[j].bv_page), - page_address(tbio->bi_io_vec[j].bv_page), - PAGE_SIZE)) - break; - if (j == vcnt) + + if (tbio->bi_end_io != end_sync_read) + continue; + if (i == first) continue; - mddev->resync_mismatches += r10_bio->sectors; + if (test_bit(BIO_UPTODATE, &r10_bio->devs[i].bio->bi_flags)) { + /* We know that the bi_io_vec layout is the same for + * both 'first' and 'i', so we just compare them. + * All vec entries are PAGE_SIZE; + */ + for (j = 0; j < vcnt; j++) + if (memcmp(page_address(fbio->bi_io_vec[j].bv_page), + page_address(tbio->bi_io_vec[j].bv_page), + PAGE_SIZE)) + break; + if (j == vcnt) + continue; + mddev->resync_mismatches += r10_bio->sectors; + } if (test_bit(MD_RECOVERY_CHECK, &mddev->recovery)) /* Don't fix anything. */ continue; @@ -1308,7 +1318,10 @@ static void recovery_request_write(mddev_t *mddev, r10bio_t *r10_bio) atomic_inc(&conf->mirrors[d].rdev->nr_pending); md_sync_acct(conf->mirrors[d].rdev->bdev, wbio->bi_size >> 9); - generic_make_request(wbio); + if (test_bit(R10BIO_Uptodate, &r10_bio->state)) + generic_make_request(wbio); + else + bio_endio(wbio, wbio->bi_size, -EIO); } @@ -1445,7 +1458,8 @@ static void raid10d(mddev_t *mddev) unfreeze_array(conf); bio = r10_bio->devs[r10_bio->read_slot].bio; - r10_bio->devs[r10_bio->read_slot].bio = NULL; + r10_bio->devs[r10_bio->read_slot].bio = + mddev->ro ? IO_BLOCKED : NULL; bio_put(bio); mirror = read_balance(conf, r10_bio); if (mirror == -1) { diff --git a/include/linux/raid/raid10.h b/include/linux/raid/raid10.h index dfa528385e3f..b1103298a8c2 100644 --- a/include/linux/raid/raid10.h +++ b/include/linux/raid/raid10.h @@ -104,6 +104,13 @@ struct r10bio_s { } devs[0]; }; +/* when we get a read error on a read-only array, we redirect to another + * device without failing the first device, or trying to over-write to + * correct the read error. To keep track of bad blocks on a per-bio + * level, we store IO_BLOCKED in the appropriate 'bios' pointer + */ +#define IO_BLOCKED ((struct bio*)1) + /* bits for r10bio.state */ #define R10BIO_Uptodate 0 #define R10BIO_IsSync 1 -- cgit v1.2.3-71-gd317 From fccddba060f2b4916a30aa27acc3d03b01bb981e Mon Sep 17 00:00:00 2001 From: NeilBrown Date: Fri, 6 Jan 2006 00:20:33 -0800 Subject: [PATCH] md: tidy up raid5/6 hash table code - replace open-coded hash chain with hlist macros - Fix hash-table size at one page - it is already quite generous, so there will never be a need to use multiple pages, so no need for __get_free_pages No functional change. Signed-off-by: Neil Brown Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- drivers/md/raid5.c | 40 ++++++++++++++-------------------------- drivers/md/raid6main.c | 46 +++++++++++++++++----------------------------- include/linux/raid/raid5.h | 4 ++-- 3 files changed, 33 insertions(+), 57 deletions(-) (limited to 'include/linux') diff --git a/drivers/md/raid5.c b/drivers/md/raid5.c index 9fc50487e2ed..6e4db95cebb1 100644 --- a/drivers/md/raid5.c +++ b/drivers/md/raid5.c @@ -35,12 +35,10 @@ #define STRIPE_SHIFT (PAGE_SHIFT - 9) #define STRIPE_SECTORS (STRIPE_SIZE>>9) #define IO_THRESHOLD 1 -#define HASH_PAGES 1 -#define HASH_PAGES_ORDER 0 -#define NR_HASH (HASH_PAGES * PAGE_SIZE / sizeof(struct stripe_head *)) +#define NR_HASH (PAGE_SIZE / sizeof(struct hlist_head)) #define HASH_MASK (NR_HASH - 1) -#define stripe_hash(conf, sect) ((conf)->stripe_hashtbl[((sect) >> STRIPE_SHIFT) & HASH_MASK]) +#define stripe_hash(conf, sect) (&((conf)->stripe_hashtbl[((sect) >> STRIPE_SHIFT) & HASH_MASK])) /* bio's attached to a stripe+device for I/O are linked together in bi_sector * order without overlap. There may be several bio's per stripe+device, and @@ -113,29 +111,21 @@ static void release_stripe(struct stripe_head *sh) spin_unlock_irqrestore(&conf->device_lock, flags); } -static void remove_hash(struct stripe_head *sh) +static inline void remove_hash(struct stripe_head *sh) { PRINTK("remove_hash(), stripe %llu\n", (unsigned long long)sh->sector); - if (sh->hash_pprev) { - if (sh->hash_next) - sh->hash_next->hash_pprev = sh->hash_pprev; - *sh->hash_pprev = sh->hash_next; - sh->hash_pprev = NULL; - } + hlist_del_init(&sh->hash); } -static __inline__ void insert_hash(raid5_conf_t *conf, struct stripe_head *sh) +static inline void insert_hash(raid5_conf_t *conf, struct stripe_head *sh) { - struct stripe_head **shp = &stripe_hash(conf, sh->sector); + struct hlist_head *hp = stripe_hash(conf, sh->sector); PRINTK("insert_hash(), stripe %llu\n", (unsigned long long)sh->sector); CHECK_DEVLOCK(); - if ((sh->hash_next = *shp) != NULL) - (*shp)->hash_pprev = &sh->hash_next; - *shp = sh; - sh->hash_pprev = shp; + hlist_add_head(&sh->hash, hp); } @@ -228,10 +218,11 @@ static inline void init_stripe(struct stripe_head *sh, sector_t sector, int pd_i static struct stripe_head *__find_stripe(raid5_conf_t *conf, sector_t sector) { struct stripe_head *sh; + struct hlist_node *hn; CHECK_DEVLOCK(); PRINTK("__find_stripe, sector %llu\n", (unsigned long long)sector); - for (sh = stripe_hash(conf, sector); sh; sh = sh->hash_next) + hlist_for_each_entry(sh, hn, stripe_hash(conf, sector), hash) if (sh->sector == sector) return sh; PRINTK("__stripe %llu not in cache\n", (unsigned long long)sector); @@ -1835,9 +1826,8 @@ static int run(mddev_t *mddev) conf->mddev = mddev; - if ((conf->stripe_hashtbl = (struct stripe_head **) __get_free_pages(GFP_ATOMIC, HASH_PAGES_ORDER)) == NULL) + if ((conf->stripe_hashtbl = kzalloc(PAGE_SIZE, GFP_KERNEL)) == NULL) goto abort; - memset(conf->stripe_hashtbl, 0, HASH_PAGES * PAGE_SIZE); spin_lock_init(&conf->device_lock); init_waitqueue_head(&conf->wait_for_stripe); @@ -1972,9 +1962,7 @@ static int run(mddev_t *mddev) abort: if (conf) { print_raid5_conf(conf); - if (conf->stripe_hashtbl) - free_pages((unsigned long) conf->stripe_hashtbl, - HASH_PAGES_ORDER); + kfree(conf->stripe_hashtbl); kfree(conf); } mddev->private = NULL; @@ -1991,7 +1979,7 @@ static int stop(mddev_t *mddev) md_unregister_thread(mddev->thread); mddev->thread = NULL; shrink_stripes(conf); - free_pages((unsigned long) conf->stripe_hashtbl, HASH_PAGES_ORDER); + kfree(conf->stripe_hashtbl); blk_sync_queue(mddev->queue); /* the unplug fn references 'conf'*/ sysfs_remove_group(&mddev->kobj, &raid5_attrs_group); kfree(conf); @@ -2019,12 +2007,12 @@ static void print_sh (struct stripe_head *sh) static void printall (raid5_conf_t *conf) { struct stripe_head *sh; + struct hlist_node *hn; int i; spin_lock_irq(&conf->device_lock); for (i = 0; i < NR_HASH; i++) { - sh = conf->stripe_hashtbl[i]; - for (; sh; sh = sh->hash_next) { + hlist_for_each_entry(sh, hn, &conf->stripe_hashtbl[i], hash) { if (sh->raid_conf != conf) continue; print_sh(sh); diff --git a/drivers/md/raid6main.c b/drivers/md/raid6main.c index 4062fc16ac2b..79b5244f44f4 100644 --- a/drivers/md/raid6main.c +++ b/drivers/md/raid6main.c @@ -40,12 +40,10 @@ #define STRIPE_SHIFT (PAGE_SHIFT - 9) #define STRIPE_SECTORS (STRIPE_SIZE>>9) #define IO_THRESHOLD 1 -#define HASH_PAGES 1 -#define HASH_PAGES_ORDER 0 -#define NR_HASH (HASH_PAGES * PAGE_SIZE / sizeof(struct stripe_head *)) +#define NR_HASH (PAGE_SIZE / sizeof(struct hlist_head)) #define HASH_MASK (NR_HASH - 1) -#define stripe_hash(conf, sect) ((conf)->stripe_hashtbl[((sect) >> STRIPE_SHIFT) & HASH_MASK]) +#define stripe_hash(conf, sect) (&((conf)->stripe_hashtbl[((sect) >> STRIPE_SHIFT) & HASH_MASK])) /* bio's attached to a stripe+device for I/O are linked together in bi_sector * order without overlap. There may be several bio's per stripe+device, and @@ -132,29 +130,21 @@ static void release_stripe(struct stripe_head *sh) spin_unlock_irqrestore(&conf->device_lock, flags); } -static void remove_hash(struct stripe_head *sh) +static inline void remove_hash(struct stripe_head *sh) { PRINTK("remove_hash(), stripe %llu\n", (unsigned long long)sh->sector); - if (sh->hash_pprev) { - if (sh->hash_next) - sh->hash_next->hash_pprev = sh->hash_pprev; - *sh->hash_pprev = sh->hash_next; - sh->hash_pprev = NULL; - } + hlist_del_init(&sh->hash); } -static __inline__ void insert_hash(raid6_conf_t *conf, struct stripe_head *sh) +static inline void insert_hash(raid6_conf_t *conf, struct stripe_head *sh) { - struct stripe_head **shp = &stripe_hash(conf, sh->sector); + struct hlist_head *hp = stripe_hash(conf, sh->sector); PRINTK("insert_hash(), stripe %llu\n", (unsigned long long)sh->sector); CHECK_DEVLOCK(); - if ((sh->hash_next = *shp) != NULL) - (*shp)->hash_pprev = &sh->hash_next; - *shp = sh; - sh->hash_pprev = shp; + hlist_add_head(&sh->hash, hp); } @@ -247,10 +237,11 @@ static inline void init_stripe(struct stripe_head *sh, sector_t sector, int pd_i static struct stripe_head *__find_stripe(raid6_conf_t *conf, sector_t sector) { struct stripe_head *sh; + struct hlist_node *hn; CHECK_DEVLOCK(); PRINTK("__find_stripe, sector %llu\n", (unsigned long long)sector); - for (sh = stripe_hash(conf, sector); sh; sh = sh->hash_next) + hlist_for_each_entry (sh, hn, stripe_hash(conf, sector), hash) if (sh->sector == sector) return sh; PRINTK("__stripe %llu not in cache\n", (unsigned long long)sector); @@ -1931,17 +1922,15 @@ static int run(mddev_t *mddev) return -EIO; } - mddev->private = kmalloc (sizeof (raid6_conf_t) - + mddev->raid_disks * sizeof(struct disk_info), - GFP_KERNEL); + mddev->private = kzalloc(sizeof (raid6_conf_t) + + mddev->raid_disks * sizeof(struct disk_info), + GFP_KERNEL); if ((conf = mddev->private) == NULL) goto abort; - memset (conf, 0, sizeof (*conf) + mddev->raid_disks * sizeof(struct disk_info) ); conf->mddev = mddev; - if ((conf->stripe_hashtbl = (struct stripe_head **) __get_free_pages(GFP_ATOMIC, HASH_PAGES_ORDER)) == NULL) + if ((conf->stripe_hashtbl = kzalloc(PAGE_SIZE, GFP_KERNEL)) == NULL) goto abort; - memset(conf->stripe_hashtbl, 0, HASH_PAGES * PAGE_SIZE); conf->spare_page = alloc_page(GFP_KERNEL); if (!conf->spare_page) @@ -2085,9 +2074,7 @@ abort: print_raid6_conf(conf); if (conf->spare_page) put_page(conf->spare_page); - if (conf->stripe_hashtbl) - free_pages((unsigned long) conf->stripe_hashtbl, - HASH_PAGES_ORDER); + kfree(conf->stripe_hashtbl); kfree(conf); } mddev->private = NULL; @@ -2104,7 +2091,7 @@ static int stop (mddev_t *mddev) md_unregister_thread(mddev->thread); mddev->thread = NULL; shrink_stripes(conf); - free_pages((unsigned long) conf->stripe_hashtbl, HASH_PAGES_ORDER); + kfree(conf->stripe_hashtbl); blk_sync_queue(mddev->queue); /* the unplug fn references 'conf'*/ kfree(conf); mddev->private = NULL; @@ -2131,12 +2118,13 @@ static void print_sh (struct seq_file *seq, struct stripe_head *sh) static void printall (struct seq_file *seq, raid6_conf_t *conf) { struct stripe_head *sh; + struct hlist_node *hn; int i; spin_lock_irq(&conf->device_lock); for (i = 0; i < NR_HASH; i++) { sh = conf->stripe_hashtbl[i]; - for (; sh; sh = sh->hash_next) { + hlist_for_each_entry(sh, hn, &conf->stripe_hashtbl[i], hash) { if (sh->raid_conf != conf) continue; print_sh(seq, sh); diff --git a/include/linux/raid/raid5.h b/include/linux/raid/raid5.h index 28fcd7533ac4..394da8207b34 100644 --- a/include/linux/raid/raid5.h +++ b/include/linux/raid/raid5.h @@ -126,7 +126,7 @@ */ struct stripe_head { - struct stripe_head *hash_next, **hash_pprev; /* hash pointers */ + struct hlist_node hash; struct list_head lru; /* inactive_list or handle_list */ struct raid5_private_data *raid_conf; sector_t sector; /* sector of this row */ @@ -204,7 +204,7 @@ struct disk_info { }; struct raid5_private_data { - struct stripe_head **stripe_hashtbl; + struct hlist_head *stripe_hashtbl; mddev_t *mddev; struct disk_info *spare; int chunk_size, level, algorithm; -- cgit v1.2.3-71-gd317 From 2604b703b6b3db80e3c75ce472a54dfd0b7bf9f4 Mon Sep 17 00:00:00 2001 From: NeilBrown Date: Fri, 6 Jan 2006 00:20:36 -0800 Subject: [PATCH] md: remove personality numbering from md md supports multiple different RAID level, each being implemented by a 'personality' (which is often in a separate module). These personalities have fairly artificial 'numbers'. The numbers are use to: 1- provide an index into an array where the various personalities are recorded 2- identify the module (via an alias) which implements are particular personality. Neither of these uses really justify the existence of personality numbers. The array can be replaced by a linked list which is searched (array lookup only happens very rarely). Module identification can be done using an alias based on level rather than 'personality' number. The current 'raid5' modules support two level (4 and 5) but only one personality. This slight awkwardness (which was handled in the mapping from level to personality) can be better handled by allowing raid5 to register 2 personalities. With this change in place, the core md module does not need to have an exhaustive list of all possible personalities, so other personalities can be added independently. This patch also moves the check for chunksize being non-zero into the ->run routines for the personalities that need it, rather than having it in core-md. This has a side effect of allowing 'faulty' and 'linear' not to have a chunk-size set. Signed-off-by: Neil Brown Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- drivers/md/faulty.c | 8 +++-- drivers/md/linear.c | 10 +++--- drivers/md/md.c | 79 +++++++++++++++++------------------------------ drivers/md/multipath.c | 11 +++---- drivers/md/raid0.c | 14 ++++++--- drivers/md/raid1.c | 9 +++--- drivers/md/raid10.c | 16 +++++----- drivers/md/raid5.c | 34 +++++++++++++++++--- drivers/md/raid6main.c | 10 +++--- include/linux/raid/md.h | 4 +-- include/linux/raid/md_k.h | 63 ++++++------------------------------- init/do_mounts_md.c | 22 ++++++------- 12 files changed, 125 insertions(+), 155 deletions(-) (limited to 'include/linux') diff --git a/drivers/md/faulty.c b/drivers/md/faulty.c index 0248f8e7eac0..f12e83086897 100644 --- a/drivers/md/faulty.c +++ b/drivers/md/faulty.c @@ -316,9 +316,10 @@ static int stop(mddev_t *mddev) return 0; } -static mdk_personality_t faulty_personality = +static struct mdk_personality faulty_personality = { .name = "faulty", + .level = LEVEL_FAULTY, .owner = THIS_MODULE, .make_request = make_request, .run = run, @@ -329,15 +330,16 @@ static mdk_personality_t faulty_personality = static int __init raid_init(void) { - return register_md_personality(FAULTY, &faulty_personality); + return register_md_personality(&faulty_personality); } static void raid_exit(void) { - unregister_md_personality(FAULTY); + unregister_md_personality(&faulty_personality); } module_init(raid_init); module_exit(raid_exit); MODULE_LICENSE("GPL"); MODULE_ALIAS("md-personality-10"); /* faulty */ +MODULE_ALIAS("md-level--5"); diff --git a/drivers/md/linear.c b/drivers/md/linear.c index f46c98d05b44..79dee8159217 100644 --- a/drivers/md/linear.c +++ b/drivers/md/linear.c @@ -351,9 +351,10 @@ static void linear_status (struct seq_file *seq, mddev_t *mddev) } -static mdk_personality_t linear_personality= +static struct mdk_personality linear_personality = { .name = "linear", + .level = LEVEL_LINEAR, .owner = THIS_MODULE, .make_request = linear_make_request, .run = linear_run, @@ -363,16 +364,17 @@ static mdk_personality_t linear_personality= static int __init linear_init (void) { - return register_md_personality (LINEAR, &linear_personality); + return register_md_personality (&linear_personality); } static void linear_exit (void) { - unregister_md_personality (LINEAR); + unregister_md_personality (&linear_personality); } module_init(linear_init); module_exit(linear_exit); MODULE_LICENSE("GPL"); -MODULE_ALIAS("md-personality-1"); /* LINEAR */ +MODULE_ALIAS("md-personality-1"); /* LINEAR - degrecated*/ +MODULE_ALIAS("md-level--1"); diff --git a/drivers/md/md.c b/drivers/md/md.c index a6a066fc92e3..07f180f95b47 100644 --- a/drivers/md/md.c +++ b/drivers/md/md.c @@ -68,7 +68,7 @@ static void autostart_arrays (int part); #endif -static mdk_personality_t *pers[MAX_PERSONALITY]; +static LIST_HEAD(pers_list); static DEFINE_SPINLOCK(pers_lock); /* @@ -303,6 +303,15 @@ static mdk_rdev_t * find_rdev(mddev_t * mddev, dev_t dev) return NULL; } +static struct mdk_personality *find_pers(int level) +{ + struct mdk_personality *pers; + list_for_each_entry(pers, &pers_list, list) + if (pers->level == level) + return pers; + return NULL; +} + static inline sector_t calc_dev_sboffset(struct block_device *bdev) { sector_t size = bdev->bd_inode->i_size >> BLOCK_SIZE_BITS; @@ -1744,7 +1753,7 @@ static void analyze_sbs(mddev_t * mddev) static ssize_t level_show(mddev_t *mddev, char *page) { - mdk_personality_t *p = mddev->pers; + struct mdk_personality *p = mddev->pers; if (p == NULL && mddev->raid_disks == 0) return 0; if (mddev->level >= 0) @@ -1960,11 +1969,12 @@ static int start_dirty_degraded; static int do_md_run(mddev_t * mddev) { - int pnum, err; + int err; int chunk_size; struct list_head *tmp; mdk_rdev_t *rdev; struct gendisk *disk; + struct mdk_personality *pers; char b[BDEVNAME_SIZE]; if (list_empty(&mddev->disks)) @@ -1981,20 +1991,8 @@ static int do_md_run(mddev_t * mddev) analyze_sbs(mddev); chunk_size = mddev->chunk_size; - pnum = level_to_pers(mddev->level); - if ((pnum != MULTIPATH) && (pnum != RAID1)) { - if (!chunk_size) { - /* - * 'default chunksize' in the old md code used to - * be PAGE_SIZE, baaad. - * we abort here to be on the safe side. We don't - * want to continue the bad practice. - */ - printk(KERN_ERR - "no chunksize specified, see 'man raidtab'\n"); - return -EINVAL; - } + if (chunk_size) { if (chunk_size > MAX_CHUNK_SIZE) { printk(KERN_ERR "too big chunk_size: %d > %d\n", chunk_size, MAX_CHUNK_SIZE); @@ -2030,10 +2028,7 @@ static int do_md_run(mddev_t * mddev) } #ifdef CONFIG_KMOD - if (!pers[pnum]) - { - request_module("md-personality-%d", pnum); - } + request_module("md-level-%d", mddev->level); #endif /* @@ -2055,14 +2050,14 @@ static int do_md_run(mddev_t * mddev) return -ENOMEM; spin_lock(&pers_lock); - if (!pers[pnum] || !try_module_get(pers[pnum]->owner)) { + pers = find_pers(mddev->level); + if (!pers || !try_module_get(pers->owner)) { spin_unlock(&pers_lock); - printk(KERN_WARNING "md: personality %d is not loaded!\n", - pnum); + printk(KERN_WARNING "md: personality for level %d is not loaded!\n", + mddev->level); return -EINVAL; } - - mddev->pers = pers[pnum]; + mddev->pers = pers; spin_unlock(&pers_lock); mddev->recovery = 0; @@ -3701,15 +3696,14 @@ static int md_seq_show(struct seq_file *seq, void *v) struct list_head *tmp2; mdk_rdev_t *rdev; struct mdstat_info *mi = seq->private; - int i; struct bitmap *bitmap; if (v == (void*)1) { + struct mdk_personality *pers; seq_printf(seq, "Personalities : "); spin_lock(&pers_lock); - for (i = 0; i < MAX_PERSONALITY; i++) - if (pers[i]) - seq_printf(seq, "[%s] ", pers[i]->name); + list_for_each_entry(pers, &pers_list, list) + seq_printf(seq, "[%s] ", pers->name); spin_unlock(&pers_lock); seq_printf(seq, "\n"); @@ -3870,35 +3864,20 @@ static struct file_operations md_seq_fops = { .poll = mdstat_poll, }; -int register_md_personality(int pnum, mdk_personality_t *p) +int register_md_personality(struct mdk_personality *p) { - if (pnum >= MAX_PERSONALITY) { - printk(KERN_ERR - "md: tried to install personality %s as nr %d, but max is %lu\n", - p->name, pnum, MAX_PERSONALITY-1); - return -EINVAL; - } - spin_lock(&pers_lock); - if (pers[pnum]) { - spin_unlock(&pers_lock); - return -EBUSY; - } - - pers[pnum] = p; - printk(KERN_INFO "md: %s personality registered as nr %d\n", p->name, pnum); + list_add_tail(&p->list, &pers_list); + printk(KERN_INFO "md: %s personality registered for level %d\n", p->name, p->level); spin_unlock(&pers_lock); return 0; } -int unregister_md_personality(int pnum) +int unregister_md_personality(struct mdk_personality *p) { - if (pnum >= MAX_PERSONALITY) - return -EINVAL; - - printk(KERN_INFO "md: %s personality unregistered\n", pers[pnum]->name); + printk(KERN_INFO "md: %s personality unregistered\n", p->name); spin_lock(&pers_lock); - pers[pnum] = NULL; + list_del_init(&p->list); spin_unlock(&pers_lock); return 0; } diff --git a/drivers/md/multipath.c b/drivers/md/multipath.c index 97a56aaaef6d..d4d838e3f9f8 100644 --- a/drivers/md/multipath.c +++ b/drivers/md/multipath.c @@ -35,9 +35,6 @@ #define NR_RESERVED_BUFS 32 -static mdk_personality_t multipath_personality; - - static void *mp_pool_alloc(gfp_t gfp_flags, void *data) { struct multipath_bh *mpb; @@ -553,9 +550,10 @@ static int multipath_stop (mddev_t *mddev) return 0; } -static mdk_personality_t multipath_personality= +static struct mdk_personality multipath_personality = { .name = "multipath", + .level = LEVEL_MULTIPATH, .owner = THIS_MODULE, .make_request = multipath_make_request, .run = multipath_run, @@ -568,15 +566,16 @@ static mdk_personality_t multipath_personality= static int __init multipath_init (void) { - return register_md_personality (MULTIPATH, &multipath_personality); + return register_md_personality (&multipath_personality); } static void __exit multipath_exit (void) { - unregister_md_personality (MULTIPATH); + unregister_md_personality (&multipath_personality); } module_init(multipath_init); module_exit(multipath_exit); MODULE_LICENSE("GPL"); MODULE_ALIAS("md-personality-7"); /* MULTIPATH */ +MODULE_ALIAS("md-level--4"); diff --git a/drivers/md/raid0.c b/drivers/md/raid0.c index b4eaa67fabde..7fb69e29391b 100644 --- a/drivers/md/raid0.c +++ b/drivers/md/raid0.c @@ -275,7 +275,11 @@ static int raid0_run (mddev_t *mddev) mdk_rdev_t *rdev; struct list_head *tmp; - printk("%s: setting max_sectors to %d, segment boundary to %d\n", + if (mddev->chunk_size == 0) { + printk(KERN_ERR "md/raid0: non-zero chunk size required.\n"); + return -EINVAL; + } + printk(KERN_INFO "%s: setting max_sectors to %d, segment boundary to %d\n", mdname(mddev), mddev->chunk_size >> 9, (mddev->chunk_size>>1)-1); @@ -507,9 +511,10 @@ static void raid0_status (struct seq_file *seq, mddev_t *mddev) return; } -static mdk_personality_t raid0_personality= +static struct mdk_personality raid0_personality= { .name = "raid0", + .level = 0, .owner = THIS_MODULE, .make_request = raid0_make_request, .run = raid0_run, @@ -519,15 +524,16 @@ static mdk_personality_t raid0_personality= static int __init raid0_init (void) { - return register_md_personality (RAID0, &raid0_personality); + return register_md_personality (&raid0_personality); } static void raid0_exit (void) { - unregister_md_personality (RAID0); + unregister_md_personality (&raid0_personality); } module_init(raid0_init); module_exit(raid0_exit); MODULE_LICENSE("GPL"); MODULE_ALIAS("md-personality-2"); /* RAID0 */ +MODULE_ALIAS("md-level-0"); diff --git a/drivers/md/raid1.c b/drivers/md/raid1.c index c42ef1c99fa0..6e0f59ed3d80 100644 --- a/drivers/md/raid1.c +++ b/drivers/md/raid1.c @@ -47,7 +47,6 @@ */ #define NR_RAID1_BIOS 256 -static mdk_personality_t raid1_personality; static void unplug_slaves(mddev_t *mddev); @@ -2036,9 +2035,10 @@ static void raid1_quiesce(mddev_t *mddev, int state) } -static mdk_personality_t raid1_personality = +static struct mdk_personality raid1_personality = { .name = "raid1", + .level = 1, .owner = THIS_MODULE, .make_request = make_request, .run = run, @@ -2056,15 +2056,16 @@ static mdk_personality_t raid1_personality = static int __init raid_init(void) { - return register_md_personality(RAID1, &raid1_personality); + return register_md_personality(&raid1_personality); } static void raid_exit(void) { - unregister_md_personality(RAID1); + unregister_md_personality(&raid1_personality); } module_init(raid_init); module_exit(raid_exit); MODULE_LICENSE("GPL"); MODULE_ALIAS("md-personality-3"); /* RAID1 */ +MODULE_ALIAS("md-level-1"); diff --git a/drivers/md/raid10.c b/drivers/md/raid10.c index 253322ae9195..f23d52c5df94 100644 --- a/drivers/md/raid10.c +++ b/drivers/md/raid10.c @@ -1883,11 +1883,11 @@ static int run(mddev_t *mddev) int nc, fc; sector_t stride, size; - if (mddev->level != 10) { - printk(KERN_ERR "raid10: %s: raid level not set correctly... (%d)\n", - mdname(mddev), mddev->level); - goto out; + if (mddev->chunk_size == 0) { + printk(KERN_ERR "md/raid10: non-zero chunk size required.\n"); + return -EINVAL; } + nc = mddev->layout & 255; fc = (mddev->layout >> 8) & 255; if ((nc*fc) <2 || (nc*fc) > mddev->raid_disks || @@ -2072,9 +2072,10 @@ static void raid10_quiesce(mddev_t *mddev, int state) } } -static mdk_personality_t raid10_personality = +static struct mdk_personality raid10_personality = { .name = "raid10", + .level = 10, .owner = THIS_MODULE, .make_request = make_request, .run = run, @@ -2090,15 +2091,16 @@ static mdk_personality_t raid10_personality = static int __init raid_init(void) { - return register_md_personality(RAID10, &raid10_personality); + return register_md_personality(&raid10_personality); } static void raid_exit(void) { - unregister_md_personality(RAID10); + unregister_md_personality(&raid10_personality); } module_init(raid_init); module_exit(raid_exit); MODULE_LICENSE("GPL"); MODULE_ALIAS("md-personality-9"); /* RAID10 */ +MODULE_ALIAS("md-level-10"); diff --git a/drivers/md/raid5.c b/drivers/md/raid5.c index 6e4db95cebb1..b0cfd3ca9ca0 100644 --- a/drivers/md/raid5.c +++ b/drivers/md/raid5.c @@ -2187,9 +2187,10 @@ static void raid5_quiesce(mddev_t *mddev, int state) } } -static mdk_personality_t raid5_personality= +static struct mdk_personality raid5_personality = { .name = "raid5", + .level = 5, .owner = THIS_MODULE, .make_request = make_request, .run = run, @@ -2204,17 +2205,40 @@ static mdk_personality_t raid5_personality= .quiesce = raid5_quiesce, }; -static int __init raid5_init (void) +static struct mdk_personality raid4_personality = { - return register_md_personality (RAID5, &raid5_personality); + .name = "raid4", + .level = 4, + .owner = THIS_MODULE, + .make_request = make_request, + .run = run, + .stop = stop, + .status = status, + .error_handler = error, + .hot_add_disk = raid5_add_disk, + .hot_remove_disk= raid5_remove_disk, + .spare_active = raid5_spare_active, + .sync_request = sync_request, + .resize = raid5_resize, + .quiesce = raid5_quiesce, +}; + +static int __init raid5_init(void) +{ + register_md_personality(&raid5_personality); + register_md_personality(&raid4_personality); + return 0; } -static void raid5_exit (void) +static void raid5_exit(void) { - unregister_md_personality (RAID5); + unregister_md_personality(&raid5_personality); + unregister_md_personality(&raid4_personality); } module_init(raid5_init); module_exit(raid5_exit); MODULE_LICENSE("GPL"); MODULE_ALIAS("md-personality-4"); /* RAID5 */ +MODULE_ALIAS("md-level-5"); +MODULE_ALIAS("md-level-4"); diff --git a/drivers/md/raid6main.c b/drivers/md/raid6main.c index 79b5244f44f4..950e5fa6e1f2 100644 --- a/drivers/md/raid6main.c +++ b/drivers/md/raid6main.c @@ -2304,9 +2304,10 @@ static void raid6_quiesce(mddev_t *mddev, int state) } } -static mdk_personality_t raid6_personality= +static struct mdk_personality raid6_personality = { .name = "raid6", + .level = 6, .owner = THIS_MODULE, .make_request = make_request, .run = run, @@ -2321,7 +2322,7 @@ static mdk_personality_t raid6_personality= .quiesce = raid6_quiesce, }; -static int __init raid6_init (void) +static int __init raid6_init(void) { int e; @@ -2329,15 +2330,16 @@ static int __init raid6_init (void) if ( e ) return e; - return register_md_personality (RAID6, &raid6_personality); + return register_md_personality(&raid6_personality); } static void raid6_exit (void) { - unregister_md_personality (RAID6); + unregister_md_personality(&raid6_personality); } module_init(raid6_init); module_exit(raid6_exit); MODULE_LICENSE("GPL"); MODULE_ALIAS("md-personality-8"); /* RAID6 */ +MODULE_ALIAS("md-level-6"); diff --git a/include/linux/raid/md.h b/include/linux/raid/md.h index 13e7c4b62367..b6e0bcad84e1 100644 --- a/include/linux/raid/md.h +++ b/include/linux/raid/md.h @@ -71,8 +71,8 @@ */ #define MD_PATCHLEVEL_VERSION 3 -extern int register_md_personality (int p_num, mdk_personality_t *p); -extern int unregister_md_personality (int p_num); +extern int register_md_personality (struct mdk_personality *p); +extern int unregister_md_personality (struct mdk_personality *p); extern mdk_thread_t * md_register_thread (void (*run) (mddev_t *mddev), mddev_t *mddev, const char *name); extern void md_unregister_thread (mdk_thread_t *thread); diff --git a/include/linux/raid/md_k.h b/include/linux/raid/md_k.h index 1dd587b5975a..e559fb701aa1 100644 --- a/include/linux/raid/md_k.h +++ b/include/linux/raid/md_k.h @@ -18,62 +18,19 @@ /* and dm-bio-list.h is not under include/linux because.... ??? */ #include "../../../drivers/md/dm-bio-list.h" -#define MD_RESERVED 0UL -#define LINEAR 1UL -#define RAID0 2UL -#define RAID1 3UL -#define RAID5 4UL -#define TRANSLUCENT 5UL -#define HSM 6UL -#define MULTIPATH 7UL -#define RAID6 8UL -#define RAID10 9UL -#define FAULTY 10UL -#define MAX_PERSONALITY 11UL - #define LEVEL_MULTIPATH (-4) #define LEVEL_LINEAR (-1) #define LEVEL_FAULTY (-5) +/* we need a value for 'no level specified' and 0 + * means 'raid0', so we need something else. This is + * for internal use only + */ +#define LEVEL_NONE (-1000000) + #define MaxSector (~(sector_t)0) #define MD_THREAD_NAME_MAX 14 -static inline int pers_to_level (int pers) -{ - switch (pers) { - case FAULTY: return LEVEL_FAULTY; - case MULTIPATH: return LEVEL_MULTIPATH; - case HSM: return -3; - case TRANSLUCENT: return -2; - case LINEAR: return LEVEL_LINEAR; - case RAID0: return 0; - case RAID1: return 1; - case RAID5: return 5; - case RAID6: return 6; - case RAID10: return 10; - } - BUG(); - return MD_RESERVED; -} - -static inline int level_to_pers (int level) -{ - switch (level) { - case LEVEL_FAULTY: return FAULTY; - case LEVEL_MULTIPATH: return MULTIPATH; - case -3: return HSM; - case -2: return TRANSLUCENT; - case LEVEL_LINEAR: return LINEAR; - case 0: return RAID0; - case 1: return RAID1; - case 4: - case 5: return RAID5; - case 6: return RAID6; - case 10: return RAID10; - } - return MD_RESERVED; -} - typedef struct mddev_s mddev_t; typedef struct mdk_rdev_s mdk_rdev_t; @@ -140,12 +97,10 @@ struct mdk_rdev_s */ }; -typedef struct mdk_personality_s mdk_personality_t; - struct mddev_s { void *private; - mdk_personality_t *pers; + struct mdk_personality *pers; dev_t unit; int md_minor; struct list_head disks; @@ -266,9 +221,11 @@ static inline void md_sync_acct(struct block_device *bdev, unsigned long nr_sect atomic_add(nr_sectors, &bdev->bd_contains->bd_disk->sync_io); } -struct mdk_personality_s +struct mdk_personality { char *name; + int level; + struct list_head list; struct module *owner; int (*make_request)(request_queue_t *q, struct bio *bio); int (*run)(mddev_t *mddev); diff --git a/init/do_mounts_md.c b/init/do_mounts_md.c index 3fbc3555ce96..f6f36806f84a 100644 --- a/init/do_mounts_md.c +++ b/init/do_mounts_md.c @@ -17,7 +17,7 @@ static int __initdata raid_noautodetect, raid_autopart; static struct { int minor; int partitioned; - int pers; + int level; int chunk; char *device_names; } md_setup_args[MAX_MD_DEVS] __initdata; @@ -47,7 +47,7 @@ extern int mdp_major; */ static int __init md_setup(char *str) { - int minor, level, factor, fault, pers, partitioned = 0; + int minor, level, factor, fault, partitioned = 0; char *pername = ""; char *str1; int ent; @@ -78,7 +78,7 @@ static int __init md_setup(char *str) } if (ent >= md_setup_ents) md_setup_ents++; - switch (get_option(&str, &level)) { /* RAID Personality */ + switch (get_option(&str, &level)) { /* RAID level */ case 2: /* could be 0 or -1.. */ if (level == 0 || level == LEVEL_LINEAR) { if (get_option(&str, &factor) != 2 || /* Chunk Size */ @@ -86,16 +86,12 @@ static int __init md_setup(char *str) printk(KERN_WARNING "md: Too few arguments supplied to md=.\n"); return 0; } - md_setup_args[ent].pers = level; + md_setup_args[ent].level = level; md_setup_args[ent].chunk = 1 << (factor+12); - if (level == LEVEL_LINEAR) { - pers = LINEAR; + if (level == LEVEL_LINEAR) pername = "linear"; - } else { - pers = RAID0; + else pername = "raid0"; - } - md_setup_args[ent].pers = pers; break; } /* FALL THROUGH */ @@ -103,7 +99,7 @@ static int __init md_setup(char *str) str = str1; /* FALL THROUGH */ case 0: - md_setup_args[ent].pers = 0; + md_setup_args[ent].level = LEVEL_NONE; pername="super-block"; } @@ -190,10 +186,10 @@ static void __init md_setup_drive(void) continue; } - if (md_setup_args[ent].pers) { + if (md_setup_args[ent].level != LEVEL_NONE) { /* non-persistent */ mdu_array_info_t ainfo; - ainfo.level = pers_to_level(md_setup_args[ent].pers); + ainfo.level = md_setup_args[ent].level; ainfo.size = 0; ainfo.nr_disks =0; ainfo.raid_disks =0; -- cgit v1.2.3-71-gd317 From 1345b1d8adbdeceb1c871d9a4af5e2a700b341c6 Mon Sep 17 00:00:00 2001 From: NeilBrown Date: Fri, 6 Jan 2006 00:20:40 -0800 Subject: [PATCH] md: define and use safe_put_page for md md sometimes call put_page on NULL pointers (treating it like kfree). This is not safe, so define and use a 'safe_put_page' which checks for NULL. Signed-off-by: Neil Brown Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- drivers/md/bitmap.c | 3 +-- drivers/md/raid1.c | 8 ++++---- drivers/md/raid10.c | 8 ++++---- drivers/md/raid6main.c | 3 +-- include/linux/raid/md_k.h | 5 +++++ 5 files changed, 15 insertions(+), 12 deletions(-) (limited to 'include/linux') diff --git a/drivers/md/bitmap.c b/drivers/md/bitmap.c index abe415f0c039..ee4a3424a8a3 100644 --- a/drivers/md/bitmap.c +++ b/drivers/md/bitmap.c @@ -626,8 +626,7 @@ static void bitmap_file_unmap(struct bitmap *bitmap) kfree(map); kfree(attr); - if (sb_page) - put_page(sb_page); + safe_put_page(sb_page); } static void bitmap_stop_daemon(struct bitmap *bitmap); diff --git a/drivers/md/raid1.c b/drivers/md/raid1.c index 39c10a65683d..feea4eeca1d9 100644 --- a/drivers/md/raid1.c +++ b/drivers/md/raid1.c @@ -136,7 +136,7 @@ static void * r1buf_pool_alloc(gfp_t gfp_flags, void *data) out_free_pages: for (i=0; i < RESYNC_PAGES ; i++) for (j=0 ; j < pi->raid_disks; j++) - put_page(r1_bio->bios[j]->bi_io_vec[i].bv_page); + safe_put_page(r1_bio->bios[j]->bi_io_vec[i].bv_page); j = -1; out_free_bio: while ( ++j < pi->raid_disks ) @@ -156,7 +156,7 @@ static void r1buf_pool_free(void *__r1_bio, void *data) if (j == 0 || r1bio->bios[j]->bi_io_vec[i].bv_page != r1bio->bios[0]->bi_io_vec[i].bv_page) - put_page(r1bio->bios[j]->bi_io_vec[i].bv_page); + safe_put_page(r1bio->bios[j]->bi_io_vec[i].bv_page); } for (i=0 ; i < pi->raid_disks; i++) bio_put(r1bio->bios[i]); @@ -381,7 +381,7 @@ static int raid1_end_write_request(struct bio *bio, unsigned int bytes_done, int /* free extra copy of the data pages */ int i = bio->bi_vcnt; while (i--) - put_page(bio->bi_io_vec[i].bv_page); + safe_put_page(bio->bi_io_vec[i].bv_page); } /* clear the bitmap if all writes complete successfully */ bitmap_endwrite(r1_bio->mddev->bitmap, r1_bio->sector, @@ -1907,7 +1907,7 @@ out_free_conf: if (conf->r1bio_pool) mempool_destroy(conf->r1bio_pool); kfree(conf->mirrors); - put_page(conf->tmppage); + safe_put_page(conf->tmppage); kfree(conf->poolinfo); kfree(conf); mddev->private = NULL; diff --git a/drivers/md/raid10.c b/drivers/md/raid10.c index 9647ebb0983a..fb952000fae2 100644 --- a/drivers/md/raid10.c +++ b/drivers/md/raid10.c @@ -132,10 +132,10 @@ static void * r10buf_pool_alloc(gfp_t gfp_flags, void *data) out_free_pages: for ( ; i > 0 ; i--) - put_page(bio->bi_io_vec[i-1].bv_page); + safe_put_page(bio->bi_io_vec[i-1].bv_page); while (j--) for (i = 0; i < RESYNC_PAGES ; i++) - put_page(r10_bio->devs[j].bio->bi_io_vec[i].bv_page); + safe_put_page(r10_bio->devs[j].bio->bi_io_vec[i].bv_page); j = -1; out_free_bio: while ( ++j < nalloc ) @@ -155,7 +155,7 @@ static void r10buf_pool_free(void *__r10_bio, void *data) struct bio *bio = r10bio->devs[j].bio; if (bio) { for (i = 0; i < RESYNC_PAGES; i++) { - put_page(bio->bi_io_vec[i].bv_page); + safe_put_page(bio->bi_io_vec[i].bv_page); bio->bi_io_vec[i].bv_page = NULL; } bio_put(bio); @@ -2042,7 +2042,7 @@ static int run(mddev_t *mddev) out_free_conf: if (conf->r10bio_pool) mempool_destroy(conf->r10bio_pool); - put_page(conf->tmppage); + safe_put_page(conf->tmppage); kfree(conf->mirrors); kfree(conf); mddev->private = NULL; diff --git a/drivers/md/raid6main.c b/drivers/md/raid6main.c index 950e5fa6e1f2..06b32bd671a3 100644 --- a/drivers/md/raid6main.c +++ b/drivers/md/raid6main.c @@ -2072,8 +2072,7 @@ static int run(mddev_t *mddev) abort: if (conf) { print_raid6_conf(conf); - if (conf->spare_page) - put_page(conf->spare_page); + safe_put_page(conf->spare_page); kfree(conf->stripe_hashtbl); kfree(conf); } diff --git a/include/linux/raid/md_k.h b/include/linux/raid/md_k.h index e559fb701aa1..12b3203e3419 100644 --- a/include/linux/raid/md_k.h +++ b/include/linux/raid/md_k.h @@ -324,5 +324,10 @@ do { \ __wait_event_lock_irq(wq, condition, lock, cmd); \ } while (0) +static inline void safe_put_page(struct page *p) +{ + if (p) put_page(p); +} + #endif -- cgit v1.2.3-71-gd317 From 2989ddbd6e1d9638a188311b896362c4bf7b7c25 Mon Sep 17 00:00:00 2001 From: NeilBrown Date: Fri, 6 Jan 2006 00:20:43 -0800 Subject: [PATCH] md: make a couple of names in md.c static .. because they aren't used outside md.c Signed-off-by: Neil Brown Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- drivers/md/md.c | 4 ++-- include/linux/raid/md_k.h | 2 -- 2 files changed, 2 insertions(+), 4 deletions(-) (limited to 'include/linux') diff --git a/drivers/md/md.c b/drivers/md/md.c index c1613854f38d..3cf089349e77 100644 --- a/drivers/md/md.c +++ b/drivers/md/md.c @@ -144,7 +144,7 @@ static int start_readonly; * start array, stop array, error, add device, remove device, * start build, activate spare */ -DECLARE_WAIT_QUEUE_HEAD(md_event_waiters); +static DECLARE_WAIT_QUEUE_HEAD(md_event_waiters); static atomic_t md_event_count; void md_new_event(mddev_t *mddev) { @@ -279,7 +279,7 @@ static inline void mddev_unlock(mddev_t * mddev) md_wakeup_thread(mddev->thread); } -mdk_rdev_t * find_rdev_nr(mddev_t *mddev, int nr) +static mdk_rdev_t * find_rdev_nr(mddev_t *mddev, int nr) { mdk_rdev_t * rdev; struct list_head *tmp; diff --git a/include/linux/raid/md_k.h b/include/linux/raid/md_k.h index 12b3203e3419..0fb5af6d622d 100644 --- a/include/linux/raid/md_k.h +++ b/include/linux/raid/md_k.h @@ -263,8 +263,6 @@ static inline char * mdname (mddev_t * mddev) return mddev->gendisk ? mddev->gendisk->disk_name : "mdX"; } -extern mdk_rdev_t * find_rdev_nr(mddev_t *mddev, int nr); - /* * iterates through some rdev ringlist. It's safe to remove the * current 'rdev'. Dont touch 'tmp' though. -- cgit v1.2.3-71-gd317 From d9d166c2a9d5d01af34396793950aa695883eed4 Mon Sep 17 00:00:00 2001 From: NeilBrown Date: Fri, 6 Jan 2006 00:20:51 -0800 Subject: [PATCH] md: allow array level to be set textually via sysfs Signed-off-by: Neil Brown Acked-by: Greg KH Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- Documentation/md.txt | 8 +++++++ drivers/md/faulty.c | 1 + drivers/md/linear.c | 3 ++- drivers/md/md.c | 61 +++++++++++++++++++++++++++++++++++++---------- drivers/md/multipath.c | 1 + drivers/md/raid0.c | 1 + drivers/md/raid1.c | 1 + drivers/md/raid10.c | 1 + drivers/md/raid5.c | 2 ++ drivers/md/raid6main.c | 1 + include/linux/raid/md_k.h | 1 + 11 files changed, 67 insertions(+), 14 deletions(-) (limited to 'include/linux') diff --git a/Documentation/md.txt b/Documentation/md.txt index c5512afd5917..fd43fd2cad2f 100644 --- a/Documentation/md.txt +++ b/Documentation/md.txt @@ -189,6 +189,14 @@ All md devices contain: 1.2 (newer format in varying locations) or "none" indicating that the kernel isn't managing metadata at all. + level + The raid 'level' for this array. The name will often (but not + always) be the same as the name of the module that implements the + level. To be auto-loaded the module must have an alias + md-$LEVEL e.g. md-raid5 + This can be written only while the array is being assembled, not + after it is started. + As component devices are added to an md array, they appear in the 'md' directory as new directories named dev-XXX diff --git a/drivers/md/faulty.c b/drivers/md/faulty.c index f12e83086897..a7a5ab554338 100644 --- a/drivers/md/faulty.c +++ b/drivers/md/faulty.c @@ -342,4 +342,5 @@ module_init(raid_init); module_exit(raid_exit); MODULE_LICENSE("GPL"); MODULE_ALIAS("md-personality-10"); /* faulty */ +MODULE_ALIAS("md-faulty"); MODULE_ALIAS("md-level--5"); diff --git a/drivers/md/linear.c b/drivers/md/linear.c index 79dee8159217..777585458c85 100644 --- a/drivers/md/linear.c +++ b/drivers/md/linear.c @@ -376,5 +376,6 @@ static void linear_exit (void) module_init(linear_init); module_exit(linear_exit); MODULE_LICENSE("GPL"); -MODULE_ALIAS("md-personality-1"); /* LINEAR - degrecated*/ +MODULE_ALIAS("md-personality-1"); /* LINEAR - deprecated*/ +MODULE_ALIAS("md-linear"); MODULE_ALIAS("md-level--1"); diff --git a/drivers/md/md.c b/drivers/md/md.c index ecc0166ba779..594d8c312e6a 100644 --- a/drivers/md/md.c +++ b/drivers/md/md.c @@ -303,12 +303,15 @@ static mdk_rdev_t * find_rdev(mddev_t * mddev, dev_t dev) return NULL; } -static struct mdk_personality *find_pers(int level) +static struct mdk_personality *find_pers(int level, char *clevel) { struct mdk_personality *pers; - list_for_each_entry(pers, &pers_list, list) - if (pers->level == level) + list_for_each_entry(pers, &pers_list, list) { + if (level != LEVEL_NONE && pers->level == level) return pers; + if (strcmp(pers->name, clevel)==0) + return pers; + } return NULL; } @@ -715,6 +718,7 @@ static int super_90_validate(mddev_t *mddev, mdk_rdev_t *rdev) mddev->ctime = sb->ctime; mddev->utime = sb->utime; mddev->level = sb->level; + mddev->clevel[0] = 0; mddev->layout = sb->layout; mddev->raid_disks = sb->raid_disks; mddev->size = sb->size; @@ -1051,6 +1055,7 @@ static int super_1_validate(mddev_t *mddev, mdk_rdev_t *rdev) mddev->ctime = le64_to_cpu(sb->ctime) & ((1ULL << 32)-1); mddev->utime = le64_to_cpu(sb->utime) & ((1ULL << 32)-1); mddev->level = le32_to_cpu(sb->level); + mddev->clevel[0] = 0; mddev->layout = le32_to_cpu(sb->layout); mddev->raid_disks = le32_to_cpu(sb->raid_disks); mddev->size = le64_to_cpu(sb->size)/2; @@ -1774,15 +1779,36 @@ static ssize_t level_show(mddev_t *mddev, char *page) { struct mdk_personality *p = mddev->pers; - if (p == NULL && mddev->raid_disks == 0) - return 0; - if (mddev->level >= 0) - return sprintf(page, "raid%d\n", mddev->level); - else + if (p) return sprintf(page, "%s\n", p->name); + else if (mddev->clevel[0]) + return sprintf(page, "%s\n", mddev->clevel); + else if (mddev->level != LEVEL_NONE) + return sprintf(page, "%d\n", mddev->level); + else + return 0; +} + +static ssize_t +level_store(mddev_t *mddev, const char *buf, size_t len) +{ + int rv = len; + if (mddev->pers) + return -EBUSY; + if (len == 0) + return 0; + if (len >= sizeof(mddev->clevel)) + return -ENOSPC; + strncpy(mddev->clevel, buf, len); + if (mddev->clevel[len-1] == '\n') + len--; + mddev->clevel[len] = 0; + mddev->level = LEVEL_NONE; + return rv; } -static struct md_sysfs_entry md_level = __ATTR_RO(level); +static struct md_sysfs_entry md_level = +__ATTR(level, 0644, level_show, level_store); static ssize_t raid_disks_show(mddev_t *mddev, char *page) @@ -2158,7 +2184,10 @@ static int do_md_run(mddev_t * mddev) } #ifdef CONFIG_KMOD - request_module("md-level-%d", mddev->level); + if (mddev->level != LEVEL_NONE) + request_module("md-level-%d", mddev->level); + else if (mddev->clevel[0]) + request_module("md-%s", mddev->clevel); #endif /* @@ -2180,15 +2209,21 @@ static int do_md_run(mddev_t * mddev) return -ENOMEM; spin_lock(&pers_lock); - pers = find_pers(mddev->level); + pers = find_pers(mddev->level, mddev->clevel); if (!pers || !try_module_get(pers->owner)) { spin_unlock(&pers_lock); - printk(KERN_WARNING "md: personality for level %d is not loaded!\n", - mddev->level); + if (mddev->level != LEVEL_NONE) + printk(KERN_WARNING "md: personality for level %d is not loaded!\n", + mddev->level); + else + printk(KERN_WARNING "md: personality for level %s is not loaded!\n", + mddev->clevel); return -EINVAL; } mddev->pers = pers; spin_unlock(&pers_lock); + mddev->level = pers->level; + strlcpy(mddev->clevel, pers->name, sizeof(mddev->clevel)); mddev->recovery = 0; mddev->resync_max_sectors = mddev->size << 1; /* may be over-ridden by personality */ diff --git a/drivers/md/multipath.c b/drivers/md/multipath.c index d4d838e3f9f8..e6aa309a66d7 100644 --- a/drivers/md/multipath.c +++ b/drivers/md/multipath.c @@ -578,4 +578,5 @@ module_init(multipath_init); module_exit(multipath_exit); MODULE_LICENSE("GPL"); MODULE_ALIAS("md-personality-7"); /* MULTIPATH */ +MODULE_ALIAS("md-multipath"); MODULE_ALIAS("md-level--4"); diff --git a/drivers/md/raid0.c b/drivers/md/raid0.c index 7fb69e29391b..abbca150202b 100644 --- a/drivers/md/raid0.c +++ b/drivers/md/raid0.c @@ -536,4 +536,5 @@ module_init(raid0_init); module_exit(raid0_exit); MODULE_LICENSE("GPL"); MODULE_ALIAS("md-personality-2"); /* RAID0 */ +MODULE_ALIAS("md-raid0"); MODULE_ALIAS("md-level-0"); diff --git a/drivers/md/raid1.c b/drivers/md/raid1.c index 7d4465f93064..181c9616d5f1 100644 --- a/drivers/md/raid1.c +++ b/drivers/md/raid1.c @@ -2092,4 +2092,5 @@ module_init(raid_init); module_exit(raid_exit); MODULE_LICENSE("GPL"); MODULE_ALIAS("md-personality-3"); /* RAID1 */ +MODULE_ALIAS("md-raid1"); MODULE_ALIAS("md-level-1"); diff --git a/drivers/md/raid10.c b/drivers/md/raid10.c index 8e6f6dfddb2b..201dc7168a5f 100644 --- a/drivers/md/raid10.c +++ b/drivers/md/raid10.c @@ -2117,4 +2117,5 @@ module_init(raid_init); module_exit(raid_exit); MODULE_LICENSE("GPL"); MODULE_ALIAS("md-personality-9"); /* RAID10 */ +MODULE_ALIAS("md-raid10"); MODULE_ALIAS("md-level-10"); diff --git a/drivers/md/raid5.c b/drivers/md/raid5.c index b0cfd3ca9ca0..9cc844f455bf 100644 --- a/drivers/md/raid5.c +++ b/drivers/md/raid5.c @@ -2240,5 +2240,7 @@ module_init(raid5_init); module_exit(raid5_exit); MODULE_LICENSE("GPL"); MODULE_ALIAS("md-personality-4"); /* RAID5 */ +MODULE_ALIAS("md-raid5"); +MODULE_ALIAS("md-raid4"); MODULE_ALIAS("md-level-5"); MODULE_ALIAS("md-level-4"); diff --git a/drivers/md/raid6main.c b/drivers/md/raid6main.c index 06b32bd671a3..84dd875bb2f6 100644 --- a/drivers/md/raid6main.c +++ b/drivers/md/raid6main.c @@ -2341,4 +2341,5 @@ module_init(raid6_init); module_exit(raid6_exit); MODULE_LICENSE("GPL"); MODULE_ALIAS("md-personality-8"); /* RAID6 */ +MODULE_ALIAS("md-raid6"); MODULE_ALIAS("md-level-6"); diff --git a/include/linux/raid/md_k.h b/include/linux/raid/md_k.h index 0fb5af6d622d..686463115438 100644 --- a/include/linux/raid/md_k.h +++ b/include/linux/raid/md_k.h @@ -119,6 +119,7 @@ struct mddev_s int chunk_size; time_t ctime, utime; int level, layout; + char clevel[16]; int raid_disks; int max_disks; sector_t size; /* used size of component devices */ -- cgit v1.2.3-71-gd317 From 4dbcdc751cb25ffca3a8374cbc5ab6de961cc545 Mon Sep 17 00:00:00 2001 From: NeilBrown Date: Fri, 6 Jan 2006 00:20:52 -0800 Subject: [PATCH] md: count corrected read errors per drive Store this total in superblock (As appropriate), and make it available to userspace via sysfs. Signed-off-by: Neil Brown Acked-by: Greg KH Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- Documentation/md.txt | 11 +++++++++++ drivers/md/md.c | 27 ++++++++++++++++++++++++++- drivers/md/raid1.c | 2 ++ drivers/md/raid10.c | 11 ++++++++--- drivers/md/raid5.c | 3 +++ drivers/md/raid6main.c | 3 +++ include/linux/raid/md_k.h | 4 ++++ 7 files changed, 57 insertions(+), 4 deletions(-) (limited to 'include/linux') diff --git a/Documentation/md.txt b/Documentation/md.txt index fd43fd2cad2f..a3eadf8e1701 100644 --- a/Documentation/md.txt +++ b/Documentation/md.txt @@ -222,6 +222,17 @@ Each directory contains: of being recoverred to This list make grow in future. + errors + An approximate count of read errors that have been detected on + this device but have not caused the device to be evicted from + the array (either because they were corrected or because they + happened while the array was read-only). When using version-1 + metadata, this value persists across restarts of the array. + + This value can be written while assembling an array thus + providing an ongoing count for arrays with metadata managed by + userspace. + An active md device will also contain and entry for each active device in the array. These are named diff --git a/drivers/md/md.c b/drivers/md/md.c index 594d8c312e6a..32a4e2311e43 100644 --- a/drivers/md/md.c +++ b/drivers/md/md.c @@ -1000,6 +1000,7 @@ static int super_1_load(mdk_rdev_t *rdev, mdk_rdev_t *refdev, int minor_version) } rdev->preferred_minor = 0xffff; rdev->data_offset = le64_to_cpu(sb->data_offset); + atomic_set(&rdev->corrected_errors, le32_to_cpu(sb->cnt_corrected_read)); rdev->sb_size = le32_to_cpu(sb->max_dev) * 2 + 256; bmask = queue_hardsect_size(rdev->bdev->bd_disk->queue)-1; @@ -1139,6 +1140,8 @@ static void super_1_sync(mddev_t *mddev, mdk_rdev_t *rdev) else sb->resync_offset = cpu_to_le64(0); + sb->cnt_corrected_read = atomic_read(&rdev->corrected_errors); + if (mddev->bitmap && mddev->bitmap_file == NULL) { sb->bitmap_offset = cpu_to_le32((__u32)mddev->bitmap_offset); sb->feature_map = cpu_to_le32(MD_FEATURE_BITMAP_OFFSET); @@ -1592,9 +1595,30 @@ super_show(mdk_rdev_t *rdev, char *page) } static struct rdev_sysfs_entry rdev_super = __ATTR_RO(super); +static ssize_t +errors_show(mdk_rdev_t *rdev, char *page) +{ + return sprintf(page, "%d\n", atomic_read(&rdev->corrected_errors)); +} + +static ssize_t +errors_store(mdk_rdev_t *rdev, const char *buf, size_t len) +{ + char *e; + unsigned long n = simple_strtoul(buf, &e, 10); + if (*buf && (*e == 0 || *e == '\n')) { + atomic_set(&rdev->corrected_errors, n); + return len; + } + return -EINVAL; +} +static struct rdev_sysfs_entry rdev_errors = +__ATTR(errors, 0644, errors_show, errors_store); + static struct attribute *rdev_default_attrs[] = { &rdev_state.attr, &rdev_super.attr, + &rdev_errors.attr, NULL, }; static ssize_t @@ -1674,6 +1698,7 @@ static mdk_rdev_t *md_import_device(dev_t newdev, int super_format, int super_mi rdev->data_offset = 0; atomic_set(&rdev->nr_pending, 0); atomic_set(&rdev->read_errors, 0); + atomic_set(&rdev->corrected_errors, 0); size = rdev->bdev->bd_inode->i_size >> BLOCK_SIZE_BITS; if (!size) { @@ -4729,7 +4754,7 @@ static int set_ro(const char *val, struct kernel_param *kp) int num = simple_strtoul(val, &e, 10); if (*val && (*e == '\0' || *e == '\n')) { start_readonly = num; - return 0;; + return 0; } return -EINVAL; } diff --git a/drivers/md/raid1.c b/drivers/md/raid1.c index 181c9616d5f1..a06ff91f27e2 100644 --- a/drivers/md/raid1.c +++ b/drivers/md/raid1.c @@ -1265,6 +1265,7 @@ static void sync_request_write(mddev_t *mddev, r1bio_t *r1_bio) if (r1_bio->bios[d]->bi_end_io != end_sync_read) continue; rdev = conf->mirrors[d].rdev; + atomic_add(s, &rdev->corrected_errors); if (sync_page_io(rdev->bdev, sect + rdev->data_offset, s<<9, @@ -1463,6 +1464,7 @@ static void raid1d(mddev_t *mddev) d = conf->raid_disks; d--; rdev = conf->mirrors[d].rdev; + atomic_add(s, &rdev->corrected_errors); if (rdev && test_bit(In_sync, &rdev->flags)) { if (sync_page_io(rdev->bdev, diff --git a/drivers/md/raid10.c b/drivers/md/raid10.c index 201dc7168a5f..9e658e519a27 100644 --- a/drivers/md/raid10.c +++ b/drivers/md/raid10.c @@ -1122,9 +1122,13 @@ static int end_sync_read(struct bio *bio, unsigned int bytes_done, int error) if (test_bit(BIO_UPTODATE, &bio->bi_flags)) set_bit(R10BIO_Uptodate, &r10_bio->state); - else if (!test_bit(MD_RECOVERY_SYNC, &conf->mddev->recovery)) - md_error(r10_bio->mddev, - conf->mirrors[d].rdev); + else { + atomic_add(r10_bio->sectors, + &conf->mirrors[d].rdev->corrected_errors); + if (!test_bit(MD_RECOVERY_SYNC, &conf->mddev->recovery)) + md_error(r10_bio->mddev, + conf->mirrors[d].rdev); + } /* for reconstruct, we always reschedule after a read. * for resync, only after all reads @@ -1430,6 +1434,7 @@ static void raid10d(mddev_t *mddev) sl--; d = r10_bio->devs[sl].devnum; rdev = conf->mirrors[d].rdev; + atomic_add(s, &rdev->corrected_errors); if (rdev && test_bit(In_sync, &rdev->flags)) { if (sync_page_io(rdev->bdev, diff --git a/drivers/md/raid5.c b/drivers/md/raid5.c index 9cc844f455bf..54f4a9847e38 100644 --- a/drivers/md/raid5.c +++ b/drivers/md/raid5.c @@ -1400,6 +1400,9 @@ static void handle_stripe(struct stripe_head *sh) bi->bi_io_vec[0].bv_offset = 0; bi->bi_size = STRIPE_SIZE; bi->bi_next = NULL; + if (rw == WRITE && + test_bit(R5_ReWrite, &sh->dev[i].flags)) + atomic_add(STRIPE_SECTORS, &rdev->corrected_errors); generic_make_request(bi); } else { if (rw == 1) diff --git a/drivers/md/raid6main.c b/drivers/md/raid6main.c index 84dd875bb2f6..8c823d686a60 100644 --- a/drivers/md/raid6main.c +++ b/drivers/md/raid6main.c @@ -1562,6 +1562,9 @@ static void handle_stripe(struct stripe_head *sh, struct page *tmp_page) bi->bi_io_vec[0].bv_offset = 0; bi->bi_size = STRIPE_SIZE; bi->bi_next = NULL; + if (rw == WRITE && + test_bit(R5_ReWrite, &sh->dev[i].flags)) + atomic_add(STRIPE_SECTORS, &rdev->corrected_errors); generic_make_request(bi); } else { if (rw == 1) diff --git a/include/linux/raid/md_k.h b/include/linux/raid/md_k.h index 686463115438..68b929c079ab 100644 --- a/include/linux/raid/md_k.h +++ b/include/linux/raid/md_k.h @@ -95,6 +95,10 @@ struct mdk_rdev_s atomic_t read_errors; /* number of consecutive read errors that * we have tried to ignore. */ + atomic_t corrected_errors; /* number of corrected read errors, + * for reporting to userspace and storing + * in superblock. + */ }; struct mddev_s -- cgit v1.2.3-71-gd317 From 88202a0c84e1951d6630d1d557d4801a8cc5b5ef Mon Sep 17 00:00:00 2001 From: NeilBrown Date: Fri, 6 Jan 2006 00:21:36 -0800 Subject: [PATCH] md: allow sync-speed to be controlled per-device Also export current (average) speed and status in sysfs. Signed-off-by: Neil Brown Acked-by: Greg KH Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- Documentation/md.txt | 22 ++++++++++ drivers/md/md.c | 110 +++++++++++++++++++++++++++++++++++++++++++--- include/linux/raid/md_k.h | 4 ++ 3 files changed, 131 insertions(+), 5 deletions(-) (limited to 'include/linux') diff --git a/Documentation/md.txt b/Documentation/md.txt index b8d172b254f7..03a13c462cf2 100644 --- a/Documentation/md.txt +++ b/Documentation/md.txt @@ -207,6 +207,28 @@ All md devices contain: available. It will then appear at md/dev-XXX (depending on the name of the device) and further configuration is then possible. + sync_speed_min + sync_speed_max + This are similar to /proc/sys/dev/raid/speed_limit_{min,max} + however they only apply to the particular array. + If no value has been written to these, of if the word 'system' + is written, then the system-wide value is used. If a value, + in kibibytes-per-second is written, then it is used. + When the files are read, they show the currently active value + followed by "(local)" or "(system)" depending on whether it is + a locally set or system-wide value. + + sync_completed + This shows the number of sectors that have been completed of + whatever the current sync_action is, followed by the number of + sectors in total that could need to be processed. The two + numbers are separated by a '/' thus effectively showing one + value, a fraction of the process that is complete. + + sync_speed + This shows the current actual speed, in K/sec, of the current + sync_action. It is averaged over the last 30 seconds. + As component devices are added to an md array, they appear in the 'md' directory as new directories named diff --git a/drivers/md/md.c b/drivers/md/md.c index 825e235b791b..1b76fb29fb70 100644 --- a/drivers/md/md.c +++ b/drivers/md/md.c @@ -81,10 +81,22 @@ static DEFINE_SPINLOCK(pers_lock); * idle IO detection. * * you can change it via /proc/sys/dev/raid/speed_limit_min and _max. + * or /sys/block/mdX/md/sync_speed_{min,max} */ static int sysctl_speed_limit_min = 1000; static int sysctl_speed_limit_max = 200000; +static inline int speed_min(mddev_t *mddev) +{ + return mddev->sync_speed_min ? + mddev->sync_speed_min : sysctl_speed_limit_min; +} + +static inline int speed_max(mddev_t *mddev) +{ + return mddev->sync_speed_max ? + mddev->sync_speed_max : sysctl_speed_limit_max; +} static struct ctl_table_header *raid_table_header; @@ -2197,6 +2209,90 @@ md_scan_mode = __ATTR(sync_action, S_IRUGO|S_IWUSR, action_show, action_store); static struct md_sysfs_entry md_mismatches = __ATTR_RO(mismatch_cnt); +static ssize_t +sync_min_show(mddev_t *mddev, char *page) +{ + return sprintf(page, "%d (%s)\n", speed_min(mddev), + mddev->sync_speed_min ? "local": "system"); +} + +static ssize_t +sync_min_store(mddev_t *mddev, const char *buf, size_t len) +{ + int min; + char *e; + if (strncmp(buf, "system", 6)==0) { + mddev->sync_speed_min = 0; + return len; + } + min = simple_strtoul(buf, &e, 10); + if (buf == e || (*e && *e != '\n') || min <= 0) + return -EINVAL; + mddev->sync_speed_min = min; + return len; +} + +static struct md_sysfs_entry md_sync_min = +__ATTR(sync_speed_min, S_IRUGO|S_IWUSR, sync_min_show, sync_min_store); + +static ssize_t +sync_max_show(mddev_t *mddev, char *page) +{ + return sprintf(page, "%d (%s)\n", speed_max(mddev), + mddev->sync_speed_max ? "local": "system"); +} + +static ssize_t +sync_max_store(mddev_t *mddev, const char *buf, size_t len) +{ + int max; + char *e; + if (strncmp(buf, "system", 6)==0) { + mddev->sync_speed_max = 0; + return len; + } + max = simple_strtoul(buf, &e, 10); + if (buf == e || (*e && *e != '\n') || max <= 0) + return -EINVAL; + mddev->sync_speed_max = max; + return len; +} + +static struct md_sysfs_entry md_sync_max = +__ATTR(sync_speed_max, S_IRUGO|S_IWUSR, sync_max_show, sync_max_store); + + +static ssize_t +sync_speed_show(mddev_t *mddev, char *page) +{ + unsigned long resync, dt, db; + resync = (mddev->curr_resync - atomic_read(&mddev->recovery_active)); + dt = ((jiffies - mddev->resync_mark) / HZ); + if (!dt) dt++; + db = resync - (mddev->resync_mark_cnt); + return sprintf(page, "%ld\n", db/dt/2); /* K/sec */ +} + +static struct md_sysfs_entry +md_sync_speed = __ATTR_RO(sync_speed); + +static ssize_t +sync_completed_show(mddev_t *mddev, char *page) +{ + unsigned long max_blocks, resync; + + if (test_bit(MD_RECOVERY_SYNC, &mddev->recovery)) + max_blocks = mddev->resync_max_sectors; + else + max_blocks = mddev->size << 1; + + resync = (mddev->curr_resync - atomic_read(&mddev->recovery_active)); + return sprintf(page, "%lu / %lu\n", resync, max_blocks); +} + +static struct md_sysfs_entry +md_sync_completed = __ATTR_RO(sync_completed); + static struct attribute *md_default_attrs[] = { &md_level.attr, &md_raid_disks.attr, @@ -2210,6 +2306,10 @@ static struct attribute *md_default_attrs[] = { static struct attribute *md_redundancy_attrs[] = { &md_scan_mode.attr, &md_mismatches.attr, + &md_sync_min.attr, + &md_sync_max.attr, + &md_sync_speed.attr, + &md_sync_completed.attr, NULL, }; static struct attribute_group md_redundancy_group = { @@ -4433,10 +4533,10 @@ static void md_do_sync(mddev_t *mddev) printk(KERN_INFO "md: syncing RAID array %s\n", mdname(mddev)); printk(KERN_INFO "md: minimum _guaranteed_ reconstruction speed:" - " %d KB/sec/disc.\n", sysctl_speed_limit_min); + " %d KB/sec/disc.\n", speed_min(mddev)); printk(KERN_INFO "md: using maximum available idle IO bandwidth " "(but not more than %d KB/sec) for reconstruction.\n", - sysctl_speed_limit_max); + speed_max(mddev)); is_mddev_idle(mddev); /* this also initializes IO event counters */ /* we don't use the checkpoint if there's a bitmap */ @@ -4477,7 +4577,7 @@ static void md_do_sync(mddev_t *mddev) skipped = 0; sectors = mddev->pers->sync_request(mddev, j, &skipped, - currspeed < sysctl_speed_limit_min); + currspeed < speed_min(mddev)); if (sectors == 0) { set_bit(MD_RECOVERY_ERR, &mddev->recovery); goto out; @@ -4542,8 +4642,8 @@ static void md_do_sync(mddev_t *mddev) currspeed = ((unsigned long)(io_sectors-mddev->resync_mark_cnt))/2 /((jiffies-mddev->resync_mark)/HZ +1) +1; - if (currspeed > sysctl_speed_limit_min) { - if ((currspeed > sysctl_speed_limit_max) || + if (currspeed > speed_min(mddev)) { + if ((currspeed > speed_max(mddev)) || !is_mddev_idle(mddev)) { msleep(500); goto repeat; diff --git a/include/linux/raid/md_k.h b/include/linux/raid/md_k.h index 68b929c079ab..617b9506c760 100644 --- a/include/linux/raid/md_k.h +++ b/include/linux/raid/md_k.h @@ -143,6 +143,10 @@ struct mddev_s sector_t resync_mismatches; /* count of sectors where * parity/replica mismatch found */ + /* if zero, use the system-wide default */ + int sync_speed_min; + int sync_speed_max; + int ok_start_degraded; /* recovery/resync flags * NEEDED: we might need to start a resync/recover -- cgit v1.2.3-71-gd317 From 9b847548663ef1039dd49f0eb4463d001e596bc3 Mon Sep 17 00:00:00 2001 From: Jens Axboe Date: Fri, 6 Jan 2006 09:28:07 +0100 Subject: [PATCH] Suspend support for libata This patch adds suspend patch to libata, and ata_piix in particular. For most low level drivers, they should just need to add the 4 hooks to work. As I can only test ata_piix, I didn't enable it for more though. Suspend support is the single most important feature on a notebook, and most new notebooks have sata drives. It's quite embarrassing that we _still_ do not support this. Right now, it's perfectly possible to suspend the drive in mid-transfer. Signed-off-by: Jens Axboe Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- drivers/scsi/ata_piix.c | 4 ++ drivers/scsi/libata-core.c | 114 +++++++++++++++++++++++++++++++++++++++++++++ drivers/scsi/libata-scsi.c | 16 +++++++ drivers/scsi/scsi_sysfs.c | 31 ++++++++++++ include/linux/ata.h | 2 + include/linux/libata.h | 8 ++++ include/scsi/scsi_host.h | 6 +++ 7 files changed, 181 insertions(+) (limited to 'include/linux') diff --git a/drivers/scsi/ata_piix.c b/drivers/scsi/ata_piix.c index 0ea27873b9ff..f79630340028 100644 --- a/drivers/scsi/ata_piix.c +++ b/drivers/scsi/ata_piix.c @@ -166,6 +166,8 @@ static struct pci_driver piix_pci_driver = { .id_table = piix_pci_tbl, .probe = piix_init_one, .remove = ata_pci_remove_one, + .suspend = ata_pci_device_suspend, + .resume = ata_pci_device_resume, }; static struct scsi_host_template piix_sht = { @@ -186,6 +188,8 @@ static struct scsi_host_template piix_sht = { .slave_configure = ata_scsi_slave_config, .bios_param = ata_std_bios_param, .ordered_flush = 1, + .resume = ata_scsi_device_resume, + .suspend = ata_scsi_device_suspend, }; static const struct ata_port_operations piix_pata_ops = { diff --git a/drivers/scsi/libata-core.c b/drivers/scsi/libata-core.c index 9ea102587914..9c66d4059399 100644 --- a/drivers/scsi/libata-core.c +++ b/drivers/scsi/libata-core.c @@ -4154,6 +4154,96 @@ err_out: * Inherited from caller. */ +/* + * Execute a 'simple' command, that only consists of the opcode 'cmd' itself, + * without filling any other registers + */ +static int ata_do_simple_cmd(struct ata_port *ap, struct ata_device *dev, + u8 cmd) +{ + struct ata_taskfile tf; + int err; + + ata_tf_init(ap, &tf, dev->devno); + + tf.command = cmd; + tf.flags |= ATA_TFLAG_DEVICE; + tf.protocol = ATA_PROT_NODATA; + + err = ata_exec_internal(ap, dev, &tf, DMA_NONE, NULL, 0); + if (err) + printk(KERN_ERR "%s: ata command failed: %d\n", + __FUNCTION__, err); + + return err; +} + +static int ata_flush_cache(struct ata_port *ap, struct ata_device *dev) +{ + u8 cmd; + + if (!ata_try_flush_cache(dev)) + return 0; + + if (ata_id_has_flush_ext(dev->id)) + cmd = ATA_CMD_FLUSH_EXT; + else + cmd = ATA_CMD_FLUSH; + + return ata_do_simple_cmd(ap, dev, cmd); +} + +static int ata_standby_drive(struct ata_port *ap, struct ata_device *dev) +{ + return ata_do_simple_cmd(ap, dev, ATA_CMD_STANDBYNOW1); +} + +static int ata_start_drive(struct ata_port *ap, struct ata_device *dev) +{ + return ata_do_simple_cmd(ap, dev, ATA_CMD_IDLEIMMEDIATE); +} + +/** + * ata_device_resume - wakeup a previously suspended devices + * + * Kick the drive back into action, by sending it an idle immediate + * command and making sure its transfer mode matches between drive + * and host. + * + */ +int ata_device_resume(struct ata_port *ap, struct ata_device *dev) +{ + if (ap->flags & ATA_FLAG_SUSPENDED) { + ap->flags &= ~ATA_FLAG_SUSPENDED; + ata_set_mode(ap); + } + if (!ata_dev_present(dev)) + return 0; + if (dev->class == ATA_DEV_ATA) + ata_start_drive(ap, dev); + + return 0; +} + +/** + * ata_device_suspend - prepare a device for suspend + * + * Flush the cache on the drive, if appropriate, then issue a + * standbynow command. + * + */ +int ata_device_suspend(struct ata_port *ap, struct ata_device *dev) +{ + if (!ata_dev_present(dev)) + return 0; + if (dev->class == ATA_DEV_ATA) + ata_flush_cache(ap, dev); + + ata_standby_drive(ap, dev); + ap->flags |= ATA_FLAG_SUSPENDED; + return 0; +} + int ata_port_start (struct ata_port *ap) { struct device *dev = ap->host_set->dev; @@ -4902,6 +4992,23 @@ int pci_test_config_bits(struct pci_dev *pdev, const struct pci_bits *bits) return (tmp == bits->val) ? 1 : 0; } + +int ata_pci_device_suspend(struct pci_dev *pdev, pm_message_t state) +{ + pci_save_state(pdev); + pci_disable_device(pdev); + pci_set_power_state(pdev, PCI_D3hot); + return 0; +} + +int ata_pci_device_resume(struct pci_dev *pdev) +{ + pci_set_power_state(pdev, PCI_D0); + pci_restore_state(pdev); + pci_enable_device(pdev); + pci_set_master(pdev); + return 0; +} #endif /* CONFIG_PCI */ @@ -5005,4 +5112,11 @@ EXPORT_SYMBOL_GPL(ata_pci_host_stop); EXPORT_SYMBOL_GPL(ata_pci_init_native_mode); EXPORT_SYMBOL_GPL(ata_pci_init_one); EXPORT_SYMBOL_GPL(ata_pci_remove_one); +EXPORT_SYMBOL_GPL(ata_pci_device_suspend); +EXPORT_SYMBOL_GPL(ata_pci_device_resume); #endif /* CONFIG_PCI */ + +EXPORT_SYMBOL_GPL(ata_device_suspend); +EXPORT_SYMBOL_GPL(ata_device_resume); +EXPORT_SYMBOL_GPL(ata_scsi_device_suspend); +EXPORT_SYMBOL_GPL(ata_scsi_device_resume); diff --git a/drivers/scsi/libata-scsi.c b/drivers/scsi/libata-scsi.c index e0439be4b573..c1ebede14a48 100644 --- a/drivers/scsi/libata-scsi.c +++ b/drivers/scsi/libata-scsi.c @@ -396,6 +396,22 @@ void ata_dump_status(unsigned id, struct ata_taskfile *tf) } } +int ata_scsi_device_resume(struct scsi_device *sdev) +{ + struct ata_port *ap = (struct ata_port *) &sdev->host->hostdata[0]; + struct ata_device *dev = &ap->device[sdev->id]; + + return ata_device_resume(ap, dev); +} + +int ata_scsi_device_suspend(struct scsi_device *sdev) +{ + struct ata_port *ap = (struct ata_port *) &sdev->host->hostdata[0]; + struct ata_device *dev = &ap->device[sdev->id]; + + return ata_device_suspend(ap, dev); +} + /** * ata_to_sense_error - convert ATA error to SCSI error * @id: ATA device number diff --git a/drivers/scsi/scsi_sysfs.c b/drivers/scsi/scsi_sysfs.c index 15842b1f0f4a..ea7f3a433572 100644 --- a/drivers/scsi/scsi_sysfs.c +++ b/drivers/scsi/scsi_sysfs.c @@ -263,9 +263,40 @@ static int scsi_bus_match(struct device *dev, struct device_driver *gendrv) return (sdp->inq_periph_qual == SCSI_INQ_PQ_CON)? 1: 0; } +static int scsi_bus_suspend(struct device * dev, pm_message_t state) +{ + struct scsi_device *sdev = to_scsi_device(dev); + struct scsi_host_template *sht = sdev->host->hostt; + int err; + + err = scsi_device_quiesce(sdev); + if (err) + return err; + + if (sht->suspend) + err = sht->suspend(sdev); + + return err; +} + +static int scsi_bus_resume(struct device * dev) +{ + struct scsi_device *sdev = to_scsi_device(dev); + struct scsi_host_template *sht = sdev->host->hostt; + int err = 0; + + if (sht->resume) + err = sht->resume(sdev); + + scsi_device_resume(sdev); + return err; +} + struct bus_type scsi_bus_type = { .name = "scsi", .match = scsi_bus_match, + .suspend = scsi_bus_suspend, + .resume = scsi_bus_resume, }; int scsi_sysfs_register(void) diff --git a/include/linux/ata.h b/include/linux/ata.h index d2873b732bb1..3eb80c391b39 100644 --- a/include/linux/ata.h +++ b/include/linux/ata.h @@ -141,6 +141,8 @@ enum { ATA_CMD_PACKET = 0xA0, ATA_CMD_VERIFY = 0x40, ATA_CMD_VERIFY_EXT = 0x42, + ATA_CMD_STANDBYNOW1 = 0xE0, + ATA_CMD_IDLEIMMEDIATE = 0xE1, ATA_CMD_INIT_DEV_PARAMS = 0x91, /* SETFEATURES stuff */ diff --git a/include/linux/libata.h b/include/linux/libata.h index e828e172ccbf..cdab75c209a0 100644 --- a/include/linux/libata.h +++ b/include/linux/libata.h @@ -124,6 +124,8 @@ enum { ATA_FLAG_DEBUGMSG = (1 << 10), ATA_FLAG_NO_ATAPI = (1 << 11), /* No ATAPI support */ + ATA_FLAG_SUSPENDED = (1 << 12), /* port is suspended */ + ATA_QCFLAG_ACTIVE = (1 << 1), /* cmd not yet ack'd to scsi lyer */ ATA_QCFLAG_SG = (1 << 3), /* have s/g table? */ ATA_QCFLAG_SINGLE = (1 << 4), /* no s/g, just a single buffer */ @@ -436,6 +438,8 @@ extern void ata_std_ports(struct ata_ioports *ioaddr); extern int ata_pci_init_one (struct pci_dev *pdev, struct ata_port_info **port_info, unsigned int n_ports); extern void ata_pci_remove_one (struct pci_dev *pdev); +extern int ata_pci_device_suspend(struct pci_dev *pdev, pm_message_t state); +extern int ata_pci_device_resume(struct pci_dev *pdev); #endif /* CONFIG_PCI */ extern int ata_device_add(const struct ata_probe_ent *ent); extern void ata_host_set_remove(struct ata_host_set *host_set); @@ -445,6 +449,10 @@ extern int ata_scsi_queuecmd(struct scsi_cmnd *cmd, void (*done)(struct scsi_cmn extern int ata_scsi_error(struct Scsi_Host *host); extern int ata_scsi_release(struct Scsi_Host *host); extern unsigned int ata_host_intr(struct ata_port *ap, struct ata_queued_cmd *qc); +extern int ata_scsi_device_resume(struct scsi_device *); +extern int ata_scsi_device_suspend(struct scsi_device *); +extern int ata_device_resume(struct ata_port *, struct ata_device *); +extern int ata_device_suspend(struct ata_port *, struct ata_device *); extern int ata_ratelimit(void); /* diff --git a/include/scsi/scsi_host.h b/include/scsi/scsi_host.h index 6cbb1982ed03..6297885a35e7 100644 --- a/include/scsi/scsi_host.h +++ b/include/scsi/scsi_host.h @@ -295,6 +295,12 @@ struct scsi_host_template { */ int (*proc_info)(struct Scsi_Host *, char *, char **, off_t, int, int); + /* + * suspend support + */ + int (*resume)(struct scsi_device *); + int (*suspend)(struct scsi_device *); + /* * Name of proc directory */ -- cgit v1.2.3-71-gd317 From 22905f775dd6a8b73be99826dcad07ceec00244b Mon Sep 17 00:00:00 2001 From: Andrew Morton Date: Wed, 16 Nov 2005 15:07:01 -0800 Subject: identify multipage ->writepages() calls NFS needs to be able to distinguish between single-page ->writepage() calls and multipage ->writepages() calls. For the single-page writepage calls NFS can kick off the I/O within the context of ->writepage(). For multipage ->writepages calls, nfs_writepage() will leave the I/O pending and nfs_writepages() will kick off the I/O when it all has been queued up within NFS. Cc: Trond Myklebust Signed-off-by: Andrew Morton Signed-off-by: Trond Myklebust --- include/linux/writeback.h | 9 +++++---- mm/page-writeback.c | 10 ++++++++-- 2 files changed, 13 insertions(+), 6 deletions(-) (limited to 'include/linux') diff --git a/include/linux/writeback.h b/include/linux/writeback.h index 64a36ba43b2f..b096159086e8 100644 --- a/include/linux/writeback.h +++ b/include/linux/writeback.h @@ -53,10 +53,11 @@ struct writeback_control { loff_t start; loff_t end; - unsigned nonblocking:1; /* Don't get stuck on request queues */ - unsigned encountered_congestion:1; /* An output: a queue is full */ - unsigned for_kupdate:1; /* A kupdate writeback */ - unsigned for_reclaim:1; /* Invoked from the page allocator */ + unsigned nonblocking:1; /* Don't get stuck on request queues */ + unsigned encountered_congestion:1; /* An output: a queue is full */ + unsigned for_kupdate:1; /* A kupdate writeback */ + unsigned for_reclaim:1; /* Invoked from the page allocator */ + unsigned for_writepages:1; /* This is a writepages() call */ }; /* diff --git a/mm/page-writeback.c b/mm/page-writeback.c index 0166ea15c9ee..5240e426c1f7 100644 --- a/mm/page-writeback.c +++ b/mm/page-writeback.c @@ -550,11 +550,17 @@ void __init page_writeback_init(void) int do_writepages(struct address_space *mapping, struct writeback_control *wbc) { + int ret; + if (wbc->nr_to_write <= 0) return 0; + wbc->for_writepages = 1; if (mapping->a_ops->writepages) - return mapping->a_ops->writepages(mapping, wbc); - return generic_writepages(mapping, wbc); + ret = mapping->a_ops->writepages(mapping, wbc); + else + ret = generic_writepages(mapping, wbc); + wbc->for_writepages = 0; + return ret; } /** -- cgit v1.2.3-71-gd317 From abbcf28f23d53e8ec56a91f3528743913fa2694a Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Tue, 3 Jan 2006 09:55:03 +0100 Subject: SUNRPC: Yet more RPC cleanups Signed-off-by: Trond Myklebust --- include/linux/sunrpc/sched.h | 3 ++- net/sunrpc/clnt.c | 32 +++++++++++++++++--------------- net/sunrpc/pmap_clnt.c | 8 +++----- net/sunrpc/sched.c | 31 ++++++++++++------------------- 4 files changed, 34 insertions(+), 40 deletions(-) (limited to 'include/linux') diff --git a/include/linux/sunrpc/sched.h b/include/linux/sunrpc/sched.h index 4d77e90d0b30..4c4b2dc8aca5 100644 --- a/include/linux/sunrpc/sched.h +++ b/include/linux/sunrpc/sched.h @@ -233,6 +233,7 @@ struct rpc_task *rpc_new_child(struct rpc_clnt *, struct rpc_task *parent); void rpc_init_task(struct rpc_task *, struct rpc_clnt *, rpc_action exitfunc, int flags); void rpc_release_task(struct rpc_task *); +void rpc_exit_task(struct rpc_task *); void rpc_killall_tasks(struct rpc_clnt *); int rpc_execute(struct rpc_task *); void rpc_run_child(struct rpc_task *parent, struct rpc_task *child, @@ -259,7 +260,7 @@ void rpc_destroy_mempool(void); static inline void rpc_exit(struct rpc_task *task, int status) { task->tk_status = status; - task->tk_action = NULL; + task->tk_action = rpc_exit_task; } #ifdef RPC_DEBUG diff --git a/net/sunrpc/clnt.c b/net/sunrpc/clnt.c index 61c3abeaccae..6ab4cbd8a901 100644 --- a/net/sunrpc/clnt.c +++ b/net/sunrpc/clnt.c @@ -511,7 +511,7 @@ rpc_call_setup(struct rpc_task *task, struct rpc_message *msg, int flags) if (task->tk_status == 0) task->tk_action = call_start; else - task->tk_action = NULL; + task->tk_action = rpc_exit_task; } void @@ -892,7 +892,7 @@ call_transmit(struct rpc_task *task) if (task->tk_status < 0) return; if (!task->tk_msg.rpc_proc->p_decode) { - task->tk_action = NULL; + task->tk_action = rpc_exit_task; rpc_wake_up_task(task); } return; @@ -1039,13 +1039,14 @@ call_decode(struct rpc_task *task) sizeof(req->rq_rcv_buf)) != 0); /* Verify the RPC header */ - if (!(p = call_verify(task))) { - if (task->tk_action == NULL) - return; - goto out_retry; + p = call_verify(task); + if (IS_ERR(p)) { + if (p == ERR_PTR(-EAGAIN)) + goto out_retry; + return; } - task->tk_action = NULL; + task->tk_action = rpc_exit_task; if (decode) task->tk_status = rpcauth_unwrap_resp(task, decode, req, p, @@ -1138,7 +1139,7 @@ call_verify(struct rpc_task *task) if ((n = ntohl(*p++)) != RPC_REPLY) { printk(KERN_WARNING "call_verify: not an RPC reply: %x\n", n); - goto out_retry; + goto out_garbage; } if ((n = ntohl(*p++)) != RPC_MSG_ACCEPTED) { if (--len < 0) @@ -1168,7 +1169,7 @@ call_verify(struct rpc_task *task) task->tk_pid); rpcauth_invalcred(task); task->tk_action = call_refresh; - return NULL; + goto out_retry; case RPC_AUTH_BADCRED: case RPC_AUTH_BADVERF: /* possibly garbled cred/verf? */ @@ -1178,7 +1179,7 @@ call_verify(struct rpc_task *task) dprintk("RPC: %4d call_verify: retry garbled creds\n", task->tk_pid); task->tk_action = call_bind; - return NULL; + goto out_retry; case RPC_AUTH_TOOWEAK: printk(KERN_NOTICE "call_verify: server requires stronger " "authentication.\n"); @@ -1193,7 +1194,7 @@ call_verify(struct rpc_task *task) } if (!(p = rpcauth_checkverf(task, p))) { printk(KERN_WARNING "call_verify: auth check failed\n"); - goto out_retry; /* bad verifier, retry */ + goto out_garbage; /* bad verifier, retry */ } len = p - (u32 *)iov->iov_base - 1; if (len < 0) @@ -1230,23 +1231,24 @@ call_verify(struct rpc_task *task) /* Also retry */ } -out_retry: +out_garbage: task->tk_client->cl_stats->rpcgarbage++; if (task->tk_garb_retry) { task->tk_garb_retry--; dprintk("RPC %s: retrying %4d\n", __FUNCTION__, task->tk_pid); task->tk_action = call_bind; - return NULL; +out_retry: + return ERR_PTR(-EAGAIN); } printk(KERN_WARNING "RPC %s: retry failed, exit EIO\n", __FUNCTION__); out_eio: error = -EIO; out_err: rpc_exit(task, error); - return NULL; + return ERR_PTR(error); out_overflow: printk(KERN_WARNING "RPC %s: server reply was truncated.\n", __FUNCTION__); - goto out_retry; + goto out_garbage; } static int rpcproc_encode_null(void *rqstp, u32 *data, void *obj) diff --git a/net/sunrpc/pmap_clnt.c b/net/sunrpc/pmap_clnt.c index a398575f94b8..cad4568fbbe2 100644 --- a/net/sunrpc/pmap_clnt.c +++ b/net/sunrpc/pmap_clnt.c @@ -90,8 +90,7 @@ bailout: map->pm_binding = 0; rpc_wake_up(&map->pm_bindwait); spin_unlock(&pmap_lock); - task->tk_status = -EIO; - task->tk_action = NULL; + rpc_exit(task, -EIO); } #ifdef CONFIG_ROOT_NFS @@ -138,11 +137,10 @@ pmap_getport_done(struct rpc_task *task) task->tk_pid, task->tk_status, clnt->cl_port); if (task->tk_status < 0) { /* Make the calling task exit with an error */ - task->tk_action = NULL; + task->tk_action = rpc_exit_task; } else if (clnt->cl_port == 0) { /* Program not registered */ - task->tk_status = -EACCES; - task->tk_action = NULL; + rpc_exit(task, -EACCES); } else { /* byte-swap port number first */ clnt->cl_port = htons(clnt->cl_port); diff --git a/net/sunrpc/sched.c b/net/sunrpc/sched.c index 54e60a657500..3fcf7b0e1f6c 100644 --- a/net/sunrpc/sched.c +++ b/net/sunrpc/sched.c @@ -555,28 +555,22 @@ __rpc_atrun(struct rpc_task *task) } /* - * Helper that calls task->tk_exit if it exists and then returns - * true if we should exit __rpc_execute. + * Helper that calls task->tk_exit if it exists */ -static inline int __rpc_do_exit(struct rpc_task *task) +void rpc_exit_task(struct rpc_task *task) { + task->tk_action = NULL; if (task->tk_exit != NULL) { - lock_kernel(); task->tk_exit(task); - unlock_kernel(); - /* If tk_action is non-null, we should restart the call */ if (task->tk_action != NULL) { - if (!RPC_ASSASSINATED(task)) { - /* Release RPC slot and buffer memory */ - xprt_release(task); - rpc_free(task); - return 0; - } - printk(KERN_ERR "RPC: dead task tried to walk away.\n"); + WARN_ON(RPC_ASSASSINATED(task)); + /* Always release the RPC slot and buffer memory */ + xprt_release(task); + rpc_free(task); } } - return 1; } +EXPORT_SYMBOL(rpc_exit_task); static int rpc_wait_bit_interruptible(void *word) { @@ -631,12 +625,11 @@ static int __rpc_execute(struct rpc_task *task) * by someone else. */ if (!RPC_IS_QUEUED(task)) { - if (task->tk_action != NULL) { - lock_kernel(); - task->tk_action(task); - unlock_kernel(); - } else if (__rpc_do_exit(task)) + if (task->tk_action == NULL) break; + lock_kernel(); + task->tk_action(task); + unlock_kernel(); } /* -- cgit v1.2.3-71-gd317 From 963d8fe53339128ee46a7701f2e36305f0ccff8c Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Tue, 3 Jan 2006 09:55:04 +0100 Subject: RPC: Clean up RPC task structure Shrink the RPC task structure. Instead of storing separate pointers for task->tk_exit and task->tk_release, put them in a structure. Also pass the user data pointer as a parameter instead of passing it via task->tk_calldata. This enables us to nest callbacks. Signed-off-by: Trond Myklebust --- fs/lockd/clntproc.c | 38 ++++++++------- fs/lockd/svc4proc.c | 15 +++--- fs/lockd/svclock.c | 14 ++++-- fs/lockd/svcproc.c | 14 ++++-- fs/nfs/direct.c | 1 - fs/nfs/nfs3proc.c | 44 +++++++++++------- fs/nfs/nfs4proc.c | 107 +++++++++++++++++++++++++------------------ fs/nfs/proc.c | 28 +++++++---- fs/nfs/read.c | 10 ++-- fs/nfs/unlink.c | 19 ++++---- fs/nfs/write.c | 21 +++------ fs/nfsd/nfs4callback.c | 10 ++-- include/linux/lockd/lockd.h | 2 +- include/linux/nfs_fs.h | 12 +++-- include/linux/sunrpc/clnt.h | 3 +- include/linux/sunrpc/sched.h | 20 +++++--- net/sunrpc/clnt.c | 15 +++--- net/sunrpc/sched.c | 53 +++++++++++---------- 18 files changed, 241 insertions(+), 185 deletions(-) (limited to 'include/linux') diff --git a/fs/lockd/clntproc.c b/fs/lockd/clntproc.c index c5a33648e9fd..816333cd377b 100644 --- a/fs/lockd/clntproc.c +++ b/fs/lockd/clntproc.c @@ -26,11 +26,12 @@ static int nlmclnt_test(struct nlm_rqst *, struct file_lock *); static int nlmclnt_lock(struct nlm_rqst *, struct file_lock *); static int nlmclnt_unlock(struct nlm_rqst *, struct file_lock *); -static void nlmclnt_unlock_callback(struct rpc_task *); -static void nlmclnt_cancel_callback(struct rpc_task *); static int nlm_stat_to_errno(u32 stat); static void nlmclnt_locks_init_private(struct file_lock *fl, struct nlm_host *host); +static const struct rpc_call_ops nlmclnt_unlock_ops; +static const struct rpc_call_ops nlmclnt_cancel_ops; + /* * Cookie counter for NLM requests */ @@ -399,8 +400,7 @@ in_grace_period: /* * Generic NLM call, async version. */ -int -nlmsvc_async_call(struct nlm_rqst *req, u32 proc, rpc_action callback) +int nlmsvc_async_call(struct nlm_rqst *req, u32 proc, const struct rpc_call_ops *tk_ops) { struct nlm_host *host = req->a_host; struct rpc_clnt *clnt; @@ -419,13 +419,12 @@ nlmsvc_async_call(struct nlm_rqst *req, u32 proc, rpc_action callback) msg.rpc_proc = &clnt->cl_procinfo[proc]; /* bootstrap and kick off the async RPC call */ - status = rpc_call_async(clnt, &msg, RPC_TASK_ASYNC, callback, req); + status = rpc_call_async(clnt, &msg, RPC_TASK_ASYNC, tk_ops, req); return status; } -static int -nlmclnt_async_call(struct nlm_rqst *req, u32 proc, rpc_action callback) +static int nlmclnt_async_call(struct nlm_rqst *req, u32 proc, const struct rpc_call_ops *tk_ops) { struct nlm_host *host = req->a_host; struct rpc_clnt *clnt; @@ -448,7 +447,7 @@ nlmclnt_async_call(struct nlm_rqst *req, u32 proc, rpc_action callback) /* Increment host refcount */ nlm_get_host(host); /* bootstrap and kick off the async RPC call */ - status = rpc_call_async(clnt, &msg, RPC_TASK_ASYNC, callback, req); + status = rpc_call_async(clnt, &msg, RPC_TASK_ASYNC, tk_ops, req); if (status < 0) nlm_release_host(host); return status; @@ -664,7 +663,7 @@ nlmclnt_unlock(struct nlm_rqst *req, struct file_lock *fl) if (req->a_flags & RPC_TASK_ASYNC) { status = nlmclnt_async_call(req, NLMPROC_UNLOCK, - nlmclnt_unlock_callback); + &nlmclnt_unlock_ops); /* Hrmf... Do the unlock early since locks_remove_posix() * really expects us to free the lock synchronously */ do_vfs_lock(fl); @@ -692,10 +691,9 @@ nlmclnt_unlock(struct nlm_rqst *req, struct file_lock *fl) return -ENOLCK; } -static void -nlmclnt_unlock_callback(struct rpc_task *task) +static void nlmclnt_unlock_callback(struct rpc_task *task, void *data) { - struct nlm_rqst *req = (struct nlm_rqst *) task->tk_calldata; + struct nlm_rqst *req = data; int status = req->a_res.status; if (RPC_ASSASSINATED(task)) @@ -722,6 +720,10 @@ die: rpc_restart_call(task); } +static const struct rpc_call_ops nlmclnt_unlock_ops = { + .rpc_call_done = nlmclnt_unlock_callback, +}; + /* * Cancel a blocked lock request. * We always use an async RPC call for this in order not to hang a @@ -750,8 +752,7 @@ nlmclnt_cancel(struct nlm_host *host, struct file_lock *fl) nlmclnt_setlockargs(req, fl); - status = nlmclnt_async_call(req, NLMPROC_CANCEL, - nlmclnt_cancel_callback); + status = nlmclnt_async_call(req, NLMPROC_CANCEL, &nlmclnt_cancel_ops); if (status < 0) { nlmclnt_release_lockargs(req); kfree(req); @@ -765,10 +766,9 @@ nlmclnt_cancel(struct nlm_host *host, struct file_lock *fl) return status; } -static void -nlmclnt_cancel_callback(struct rpc_task *task) +static void nlmclnt_cancel_callback(struct rpc_task *task, void *data) { - struct nlm_rqst *req = (struct nlm_rqst *) task->tk_calldata; + struct nlm_rqst *req = data; if (RPC_ASSASSINATED(task)) goto die; @@ -807,6 +807,10 @@ retry_cancel: rpc_delay(task, 30 * HZ); } +static const struct rpc_call_ops nlmclnt_cancel_ops = { + .rpc_call_done = nlmclnt_cancel_callback, +}; + /* * Convert an NLM status code to a generic kernel errno */ diff --git a/fs/lockd/svc4proc.c b/fs/lockd/svc4proc.c index 489670e21769..4063095d849e 100644 --- a/fs/lockd/svc4proc.c +++ b/fs/lockd/svc4proc.c @@ -22,7 +22,8 @@ #define NLMDBG_FACILITY NLMDBG_CLIENT static u32 nlm4svc_callback(struct svc_rqst *, u32, struct nlm_res *); -static void nlm4svc_callback_exit(struct rpc_task *); + +static const struct rpc_call_ops nlm4svc_callback_ops; /* * Obtain client and file from arguments @@ -470,7 +471,6 @@ nlm4svc_proc_granted_res(struct svc_rqst *rqstp, struct nlm_res *argp, } - /* * This is the generic lockd callback for async RPC calls */ @@ -494,7 +494,7 @@ nlm4svc_callback(struct svc_rqst *rqstp, u32 proc, struct nlm_res *resp) call->a_host = host; memcpy(&call->a_args, resp, sizeof(*resp)); - if (nlmsvc_async_call(call, proc, nlm4svc_callback_exit) < 0) + if (nlmsvc_async_call(call, proc, &nlm4svc_callback_ops) < 0) goto error; return rpc_success; @@ -504,10 +504,9 @@ nlm4svc_callback(struct svc_rqst *rqstp, u32 proc, struct nlm_res *resp) return rpc_system_err; } -static void -nlm4svc_callback_exit(struct rpc_task *task) +static void nlm4svc_callback_exit(struct rpc_task *task, void *data) { - struct nlm_rqst *call = (struct nlm_rqst *) task->tk_calldata; + struct nlm_rqst *call = data; if (task->tk_status < 0) { dprintk("lockd: %4d callback failed (errno = %d)\n", @@ -517,6 +516,10 @@ nlm4svc_callback_exit(struct rpc_task *task) kfree(call); } +static const struct rpc_call_ops nlm4svc_callback_ops = { + .rpc_call_done = nlm4svc_callback_exit, +}; + /* * NLM Server procedures. */ diff --git a/fs/lockd/svclock.c b/fs/lockd/svclock.c index 49f959796b66..87d09a0d8f64 100644 --- a/fs/lockd/svclock.c +++ b/fs/lockd/svclock.c @@ -41,7 +41,8 @@ static void nlmsvc_insert_block(struct nlm_block *block, unsigned long); static int nlmsvc_remove_block(struct nlm_block *block); -static void nlmsvc_grant_callback(struct rpc_task *task); + +static const struct rpc_call_ops nlmsvc_grant_ops; /* * The list of blocked locks to retry @@ -562,7 +563,7 @@ callback: /* Call the client */ nlm_get_host(block->b_call.a_host); if (nlmsvc_async_call(&block->b_call, NLMPROC_GRANTED_MSG, - nlmsvc_grant_callback) < 0) + &nlmsvc_grant_ops) < 0) nlm_release_host(block->b_call.a_host); up(&file->f_sema); } @@ -575,10 +576,9 @@ callback: * chain once more in order to have it removed by lockd itself (which can * then sleep on the file semaphore without disrupting e.g. the nfs client). */ -static void -nlmsvc_grant_callback(struct rpc_task *task) +static void nlmsvc_grant_callback(struct rpc_task *task, void *data) { - struct nlm_rqst *call = (struct nlm_rqst *) task->tk_calldata; + struct nlm_rqst *call = data; struct nlm_block *block; unsigned long timeout; struct sockaddr_in *peer_addr = RPC_PEERADDR(task->tk_client); @@ -614,6 +614,10 @@ nlmsvc_grant_callback(struct rpc_task *task) nlm_release_host(call->a_host); } +static const struct rpc_call_ops nlmsvc_grant_ops = { + .rpc_call_done = nlmsvc_grant_callback, +}; + /* * We received a GRANT_RES callback. Try to find the corresponding * block. diff --git a/fs/lockd/svcproc.c b/fs/lockd/svcproc.c index 757e344cf200..3bc437e0cf5b 100644 --- a/fs/lockd/svcproc.c +++ b/fs/lockd/svcproc.c @@ -23,7 +23,8 @@ #define NLMDBG_FACILITY NLMDBG_CLIENT static u32 nlmsvc_callback(struct svc_rqst *, u32, struct nlm_res *); -static void nlmsvc_callback_exit(struct rpc_task *); + +static const struct rpc_call_ops nlmsvc_callback_ops; #ifdef CONFIG_LOCKD_V4 static u32 @@ -518,7 +519,7 @@ nlmsvc_callback(struct svc_rqst *rqstp, u32 proc, struct nlm_res *resp) call->a_host = host; memcpy(&call->a_args, resp, sizeof(*resp)); - if (nlmsvc_async_call(call, proc, nlmsvc_callback_exit) < 0) + if (nlmsvc_async_call(call, proc, &nlmsvc_callback_ops) < 0) goto error; return rpc_success; @@ -528,10 +529,9 @@ nlmsvc_callback(struct svc_rqst *rqstp, u32 proc, struct nlm_res *resp) return rpc_system_err; } -static void -nlmsvc_callback_exit(struct rpc_task *task) +static void nlmsvc_callback_exit(struct rpc_task *task, void *data) { - struct nlm_rqst *call = (struct nlm_rqst *) task->tk_calldata; + struct nlm_rqst *call = data; if (task->tk_status < 0) { dprintk("lockd: %4d callback failed (errno = %d)\n", @@ -541,6 +541,10 @@ nlmsvc_callback_exit(struct rpc_task *task) kfree(call); } +static const struct rpc_call_ops nlmsvc_callback_ops = { + .rpc_call_done = nlmsvc_callback_exit, +}; + /* * NLM Server procedures. */ diff --git a/fs/nfs/direct.c b/fs/nfs/direct.c index 079228817603..a834423942c7 100644 --- a/fs/nfs/direct.c +++ b/fs/nfs/direct.c @@ -269,7 +269,6 @@ static void nfs_direct_read_schedule(struct nfs_direct_req *dreq, data->task.tk_cookie = (unsigned long) inode; data->task.tk_calldata = data; - data->task.tk_release = nfs_readdata_release; data->complete = nfs_direct_read_result; lock_kernel(); diff --git a/fs/nfs/nfs3proc.c b/fs/nfs/nfs3proc.c index 92c870d19ccd..c172a7584646 100644 --- a/fs/nfs/nfs3proc.c +++ b/fs/nfs/nfs3proc.c @@ -732,19 +732,23 @@ nfs3_proc_pathconf(struct nfs_server *server, struct nfs_fh *fhandle, extern u32 *nfs3_decode_dirent(u32 *, struct nfs_entry *, int); -static void -nfs3_read_done(struct rpc_task *task) +static void nfs3_read_done(struct rpc_task *task, void *calldata) { - struct nfs_read_data *data = (struct nfs_read_data *) task->tk_calldata; + struct nfs_read_data *data = calldata; if (nfs3_async_handle_jukebox(task)) return; /* Call back common NFS readpage processing */ if (task->tk_status >= 0) nfs_refresh_inode(data->inode, &data->fattr); - nfs_readpage_result(task); + nfs_readpage_result(task, calldata); } +static const struct rpc_call_ops nfs3_read_ops = { + .rpc_call_done = nfs3_read_done, + .rpc_release = nfs_readdata_release, +}; + static void nfs3_proc_read_setup(struct nfs_read_data *data) { @@ -762,23 +766,26 @@ nfs3_proc_read_setup(struct nfs_read_data *data) flags = RPC_TASK_ASYNC | (IS_SWAPFILE(inode)? NFS_RPC_SWAPFLAGS : 0); /* Finalize the task. */ - rpc_init_task(task, NFS_CLIENT(inode), nfs3_read_done, flags); + rpc_init_task(task, NFS_CLIENT(inode), flags, &nfs3_read_ops, data); rpc_call_setup(task, &msg, 0); } -static void -nfs3_write_done(struct rpc_task *task) +static void nfs3_write_done(struct rpc_task *task, void *calldata) { - struct nfs_write_data *data; + struct nfs_write_data *data = calldata; if (nfs3_async_handle_jukebox(task)) return; - data = (struct nfs_write_data *)task->tk_calldata; if (task->tk_status >= 0) nfs_post_op_update_inode(data->inode, data->res.fattr); - nfs_writeback_done(task); + nfs_writeback_done(task, calldata); } +static const struct rpc_call_ops nfs3_write_ops = { + .rpc_call_done = nfs3_write_done, + .rpc_release = nfs_writedata_release, +}; + static void nfs3_proc_write_setup(struct nfs_write_data *data, int how) { @@ -806,23 +813,26 @@ nfs3_proc_write_setup(struct nfs_write_data *data, int how) flags = (how & FLUSH_SYNC) ? 0 : RPC_TASK_ASYNC; /* Finalize the task. */ - rpc_init_task(task, NFS_CLIENT(inode), nfs3_write_done, flags); + rpc_init_task(task, NFS_CLIENT(inode), flags, &nfs3_write_ops, data); rpc_call_setup(task, &msg, 0); } -static void -nfs3_commit_done(struct rpc_task *task) +static void nfs3_commit_done(struct rpc_task *task, void *calldata) { - struct nfs_write_data *data; + struct nfs_write_data *data = calldata; if (nfs3_async_handle_jukebox(task)) return; - data = (struct nfs_write_data *)task->tk_calldata; if (task->tk_status >= 0) nfs_post_op_update_inode(data->inode, data->res.fattr); - nfs_commit_done(task); + nfs_commit_done(task, calldata); } +static const struct rpc_call_ops nfs3_commit_ops = { + .rpc_call_done = nfs3_commit_done, + .rpc_release = nfs_commit_release, +}; + static void nfs3_proc_commit_setup(struct nfs_write_data *data, int how) { @@ -840,7 +850,7 @@ nfs3_proc_commit_setup(struct nfs_write_data *data, int how) flags = (how & FLUSH_SYNC) ? 0 : RPC_TASK_ASYNC; /* Finalize the task. */ - rpc_init_task(task, NFS_CLIENT(inode), nfs3_commit_done, flags); + rpc_init_task(task, NFS_CLIENT(inode), flags, &nfs3_commit_ops, data); rpc_call_setup(task, &msg, 0); } diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c index f988a9417b13..3d5d3c07d621 100644 --- a/fs/nfs/nfs4proc.c +++ b/fs/nfs/nfs4proc.c @@ -196,14 +196,12 @@ static void update_changeattr(struct inode *inode, struct nfs4_change_info *cinf /* Helper for asynchronous RPC calls */ static int nfs4_call_async(struct rpc_clnt *clnt, rpc_action tk_begin, - rpc_action tk_exit, void *calldata) + const struct rpc_call_ops *tk_ops, void *calldata) { struct rpc_task *task; - if (!(task = rpc_new_task(clnt, tk_exit, RPC_TASK_ASYNC))) + if (!(task = rpc_new_task(clnt, RPC_TASK_ASYNC, tk_ops, calldata))) return -ENOMEM; - - task->tk_calldata = calldata; task->tk_action = tk_begin; rpc_execute(task); return 0; @@ -867,10 +865,10 @@ struct nfs4_closedata { struct nfs_fattr fattr; }; -static void nfs4_free_closedata(struct nfs4_closedata *calldata) +static void nfs4_free_closedata(void *data) { - struct nfs4_state *state = calldata->state; - struct nfs4_state_owner *sp = state->owner; + struct nfs4_closedata *calldata = data; + struct nfs4_state_owner *sp = calldata->state->owner; nfs4_put_open_state(calldata->state); nfs_free_seqid(calldata->arg.seqid); @@ -878,9 +876,9 @@ static void nfs4_free_closedata(struct nfs4_closedata *calldata) kfree(calldata); } -static void nfs4_close_done(struct rpc_task *task) +static void nfs4_close_done(struct rpc_task *task, void *data) { - struct nfs4_closedata *calldata = (struct nfs4_closedata *)task->tk_calldata; + struct nfs4_closedata *calldata = data; struct nfs4_state *state = calldata->state; struct nfs_server *server = NFS_SERVER(calldata->inode); @@ -904,7 +902,6 @@ static void nfs4_close_done(struct rpc_task *task) } } nfs_refresh_inode(calldata->inode, calldata->res.fattr); - nfs4_free_closedata(calldata); } static void nfs4_close_begin(struct rpc_task *task) @@ -918,10 +915,8 @@ static void nfs4_close_begin(struct rpc_task *task) .rpc_cred = state->owner->so_cred, }; int mode = 0, old_mode; - int status; - status = nfs_wait_on_sequence(calldata->arg.seqid, task); - if (status != 0) + if (nfs_wait_on_sequence(calldata->arg.seqid, task) != 0) return; /* Recalculate the new open mode in case someone reopened the file * while we were waiting in line to be scheduled. @@ -937,9 +932,8 @@ static void nfs4_close_begin(struct rpc_task *task) spin_unlock(&calldata->inode->i_lock); spin_unlock(&state->owner->so_lock); if (mode == old_mode || test_bit(NFS_DELEGATED_STATE, &state->flags)) { - nfs4_free_closedata(calldata); - task->tk_exit = NULL; - rpc_exit(task, 0); + /* Note: exit _without_ calling nfs4_close_done */ + task->tk_action = NULL; return; } nfs_fattr_init(calldata->res.fattr); @@ -949,6 +943,11 @@ static void nfs4_close_begin(struct rpc_task *task) rpc_call_setup(task, &msg, 0); } +static const struct rpc_call_ops nfs4_close_ops = { + .rpc_call_done = nfs4_close_done, + .rpc_release = nfs4_free_closedata, +}; + /* * It is possible for data to be read/written from a mem-mapped file * after the sys_close call (which hits the vfs layer as a flush). @@ -982,7 +981,7 @@ int nfs4_do_close(struct inode *inode, struct nfs4_state *state) calldata->res.server = server; status = nfs4_call_async(server->client, nfs4_close_begin, - nfs4_close_done, calldata); + &nfs4_close_ops, calldata); if (status == 0) goto out; @@ -2125,10 +2124,9 @@ static int nfs4_proc_pathconf(struct nfs_server *server, struct nfs_fh *fhandle, return err; } -static void -nfs4_read_done(struct rpc_task *task) +static void nfs4_read_done(struct rpc_task *task, void *calldata) { - struct nfs_read_data *data = (struct nfs_read_data *) task->tk_calldata; + struct nfs_read_data *data = calldata; struct inode *inode = data->inode; if (nfs4_async_handle_error(task, NFS_SERVER(inode)) == -EAGAIN) { @@ -2138,9 +2136,14 @@ nfs4_read_done(struct rpc_task *task) if (task->tk_status > 0) renew_lease(NFS_SERVER(inode), data->timestamp); /* Call back common NFS readpage processing */ - nfs_readpage_result(task); + nfs_readpage_result(task, calldata); } +static const struct rpc_call_ops nfs4_read_ops = { + .rpc_call_done = nfs4_read_done, + .rpc_release = nfs_readdata_release, +}; + static void nfs4_proc_read_setup(struct nfs_read_data *data) { @@ -2160,14 +2163,13 @@ nfs4_proc_read_setup(struct nfs_read_data *data) flags = RPC_TASK_ASYNC | (IS_SWAPFILE(inode)? NFS_RPC_SWAPFLAGS : 0); /* Finalize the task. */ - rpc_init_task(task, NFS_CLIENT(inode), nfs4_read_done, flags); + rpc_init_task(task, NFS_CLIENT(inode), flags, &nfs4_read_ops, data); rpc_call_setup(task, &msg, 0); } -static void -nfs4_write_done(struct rpc_task *task) +static void nfs4_write_done(struct rpc_task *task, void *calldata) { - struct nfs_write_data *data = (struct nfs_write_data *) task->tk_calldata; + struct nfs_write_data *data = calldata; struct inode *inode = data->inode; if (nfs4_async_handle_error(task, NFS_SERVER(inode)) == -EAGAIN) { @@ -2179,9 +2181,14 @@ nfs4_write_done(struct rpc_task *task) nfs_post_op_update_inode(inode, data->res.fattr); } /* Call back common NFS writeback processing */ - nfs_writeback_done(task); + nfs_writeback_done(task, calldata); } +static const struct rpc_call_ops nfs4_write_ops = { + .rpc_call_done = nfs4_write_done, + .rpc_release = nfs_writedata_release, +}; + static void nfs4_proc_write_setup(struct nfs_write_data *data, int how) { @@ -2214,14 +2221,13 @@ nfs4_proc_write_setup(struct nfs_write_data *data, int how) flags = (how & FLUSH_SYNC) ? 0 : RPC_TASK_ASYNC; /* Finalize the task. */ - rpc_init_task(task, NFS_CLIENT(inode), nfs4_write_done, flags); + rpc_init_task(task, NFS_CLIENT(inode), flags, &nfs4_write_ops, data); rpc_call_setup(task, &msg, 0); } -static void -nfs4_commit_done(struct rpc_task *task) +static void nfs4_commit_done(struct rpc_task *task, void *calldata) { - struct nfs_write_data *data = (struct nfs_write_data *) task->tk_calldata; + struct nfs_write_data *data = calldata; struct inode *inode = data->inode; if (nfs4_async_handle_error(task, NFS_SERVER(inode)) == -EAGAIN) { @@ -2231,9 +2237,14 @@ nfs4_commit_done(struct rpc_task *task) if (task->tk_status >= 0) nfs_post_op_update_inode(inode, data->res.fattr); /* Call back common NFS writeback processing */ - nfs_commit_done(task); + nfs_commit_done(task, calldata); } +static const struct rpc_call_ops nfs4_commit_ops = { + .rpc_call_done = nfs4_commit_done, + .rpc_release = nfs_commit_release, +}; + static void nfs4_proc_commit_setup(struct nfs_write_data *data, int how) { @@ -2255,7 +2266,7 @@ nfs4_proc_commit_setup(struct nfs_write_data *data, int how) flags = (how & FLUSH_SYNC) ? 0 : RPC_TASK_ASYNC; /* Finalize the task. */ - rpc_init_task(task, NFS_CLIENT(inode), nfs4_commit_done, flags); + rpc_init_task(task, NFS_CLIENT(inode), flags, &nfs4_commit_ops, data); rpc_call_setup(task, &msg, 0); } @@ -2263,11 +2274,10 @@ nfs4_proc_commit_setup(struct nfs_write_data *data, int how) * nfs4_proc_async_renew(): This is not one of the nfs_rpc_ops; it is a special * standalone procedure for queueing an asynchronous RENEW. */ -static void -renew_done(struct rpc_task *task) +static void nfs4_renew_done(struct rpc_task *task, void *data) { struct nfs4_client *clp = (struct nfs4_client *)task->tk_msg.rpc_argp; - unsigned long timestamp = (unsigned long)task->tk_calldata; + unsigned long timestamp = (unsigned long)data; if (task->tk_status < 0) { switch (task->tk_status) { @@ -2284,6 +2294,10 @@ renew_done(struct rpc_task *task) spin_unlock(&clp->cl_lock); } +static const struct rpc_call_ops nfs4_renew_ops = { + .rpc_call_done = nfs4_renew_done, +}; + int nfs4_proc_async_renew(struct nfs4_client *clp) { @@ -2294,7 +2308,7 @@ nfs4_proc_async_renew(struct nfs4_client *clp) }; return rpc_call_async(clp->cl_rpcclient, &msg, RPC_TASK_SOFT, - renew_done, (void *)jiffies); + &nfs4_renew_ops, (void *)jiffies); } int @@ -2866,15 +2880,16 @@ static void nfs4_locku_release_calldata(struct nfs4_unlockdata *calldata) } } -static void nfs4_locku_complete(struct nfs4_unlockdata *calldata) +static void nfs4_locku_complete(void *data) { + struct nfs4_unlockdata *calldata = data; complete(&calldata->completion); nfs4_locku_release_calldata(calldata); } -static void nfs4_locku_done(struct rpc_task *task) +static void nfs4_locku_done(struct rpc_task *task, void *data) { - struct nfs4_unlockdata *calldata = (struct nfs4_unlockdata *)task->tk_calldata; + struct nfs4_unlockdata *calldata = data; nfs_increment_lock_seqid(task->tk_status, calldata->luargs.seqid); switch (task->tk_status) { @@ -2890,10 +2905,8 @@ static void nfs4_locku_done(struct rpc_task *task) default: if (nfs4_async_handle_error(task, calldata->res.server) == -EAGAIN) { rpc_restart_call(task); - return; } } - nfs4_locku_complete(calldata); } static void nfs4_locku_begin(struct rpc_task *task) @@ -2911,14 +2924,18 @@ static void nfs4_locku_begin(struct rpc_task *task) if (status != 0) return; if ((calldata->lsp->ls_flags & NFS_LOCK_INITIALIZED) == 0) { - nfs4_locku_complete(calldata); - task->tk_exit = NULL; - rpc_exit(task, 0); + /* Note: exit _without_ running nfs4_locku_done */ + task->tk_action = NULL; return; } rpc_call_setup(task, &msg, 0); } +static const struct rpc_call_ops nfs4_locku_ops = { + .rpc_call_done = nfs4_locku_done, + .rpc_release = nfs4_locku_complete, +}; + static int nfs4_proc_unlck(struct nfs4_state *state, int cmd, struct file_lock *request) { struct nfs4_unlockdata *calldata; @@ -2963,7 +2980,7 @@ static int nfs4_proc_unlck(struct nfs4_state *state, int cmd, struct file_lock * init_completion(&calldata->completion); status = nfs4_call_async(NFS_SERVER(inode)->client, nfs4_locku_begin, - nfs4_locku_done, calldata); + &nfs4_locku_ops, calldata); if (status == 0) wait_for_completion_interruptible(&calldata->completion); do_vfs_lock(request->fl_file, request); diff --git a/fs/nfs/proc.c b/fs/nfs/proc.c index e1e3ca5d746b..6145e82b45e8 100644 --- a/fs/nfs/proc.c +++ b/fs/nfs/proc.c @@ -547,10 +547,9 @@ nfs_proc_pathconf(struct nfs_server *server, struct nfs_fh *fhandle, extern u32 * nfs_decode_dirent(u32 *, struct nfs_entry *, int); -static void -nfs_read_done(struct rpc_task *task) +static void nfs_read_done(struct rpc_task *task, void *calldata) { - struct nfs_read_data *data = (struct nfs_read_data *) task->tk_calldata; + struct nfs_read_data *data = calldata; if (task->tk_status >= 0) { nfs_refresh_inode(data->inode, data->res.fattr); @@ -560,9 +559,14 @@ nfs_read_done(struct rpc_task *task) if (data->args.offset + data->args.count >= data->res.fattr->size) data->res.eof = 1; } - nfs_readpage_result(task); + nfs_readpage_result(task, calldata); } +static const struct rpc_call_ops nfs_read_ops = { + .rpc_call_done = nfs_read_done, + .rpc_release = nfs_readdata_release, +}; + static void nfs_proc_read_setup(struct nfs_read_data *data) { @@ -580,20 +584,24 @@ nfs_proc_read_setup(struct nfs_read_data *data) flags = RPC_TASK_ASYNC | (IS_SWAPFILE(inode)? NFS_RPC_SWAPFLAGS : 0); /* Finalize the task. */ - rpc_init_task(task, NFS_CLIENT(inode), nfs_read_done, flags); + rpc_init_task(task, NFS_CLIENT(inode), flags, &nfs_read_ops, data); rpc_call_setup(task, &msg, 0); } -static void -nfs_write_done(struct rpc_task *task) +static void nfs_write_done(struct rpc_task *task, void *calldata) { - struct nfs_write_data *data = (struct nfs_write_data *) task->tk_calldata; + struct nfs_write_data *data = calldata; if (task->tk_status >= 0) nfs_post_op_update_inode(data->inode, data->res.fattr); - nfs_writeback_done(task); + nfs_writeback_done(task, calldata); } +static const struct rpc_call_ops nfs_write_ops = { + .rpc_call_done = nfs_write_done, + .rpc_release = nfs_writedata_release, +}; + static void nfs_proc_write_setup(struct nfs_write_data *data, int how) { @@ -614,7 +622,7 @@ nfs_proc_write_setup(struct nfs_write_data *data, int how) flags = (how & FLUSH_SYNC) ? 0 : RPC_TASK_ASYNC; /* Finalize the task. */ - rpc_init_task(task, NFS_CLIENT(inode), nfs_write_done, flags); + rpc_init_task(task, NFS_CLIENT(inode), flags, &nfs_write_ops, data); rpc_call_setup(task, &msg, 0); } diff --git a/fs/nfs/read.c b/fs/nfs/read.c index 5f20eafba8ec..21486242c3d3 100644 --- a/fs/nfs/read.c +++ b/fs/nfs/read.c @@ -42,9 +42,8 @@ mempool_t *nfs_rdata_mempool; #define MIN_POOL_READ (32) -void nfs_readdata_release(struct rpc_task *task) +void nfs_readdata_release(void *data) { - struct nfs_read_data *data = (struct nfs_read_data *)task->tk_calldata; nfs_readdata_free(data); } @@ -220,9 +219,6 @@ static void nfs_read_rpcsetup(struct nfs_page *req, struct nfs_read_data *data, NFS_PROTO(inode)->read_setup(data); data->task.tk_cookie = (unsigned long)inode; - data->task.tk_calldata = data; - /* Release requests */ - data->task.tk_release = nfs_readdata_release; dprintk("NFS: %4d initiated read call (req %s/%Ld, %u bytes @ offset %Lu)\n", data->task.tk_pid, @@ -452,9 +448,9 @@ static void nfs_readpage_result_full(struct nfs_read_data *data, int status) * This is the callback from RPC telling us whether a reply was * received or some error occurred (timeout or socket shutdown). */ -void nfs_readpage_result(struct rpc_task *task) +void nfs_readpage_result(struct rpc_task *task, void *calldata) { - struct nfs_read_data *data = (struct nfs_read_data *)task->tk_calldata; + struct nfs_read_data *data = calldata; struct nfs_readargs *argp = &data->args; struct nfs_readres *resp = &data->res; int status = task->tk_status; diff --git a/fs/nfs/unlink.c b/fs/nfs/unlink.c index d639d172d568..1494484ba86d 100644 --- a/fs/nfs/unlink.c +++ b/fs/nfs/unlink.c @@ -116,10 +116,9 @@ nfs_async_unlink_init(struct rpc_task *task) * * Do the directory attribute update. */ -static void -nfs_async_unlink_done(struct rpc_task *task) +static void nfs_async_unlink_done(struct rpc_task *task, void *calldata) { - struct nfs_unlinkdata *data = (struct nfs_unlinkdata *)task->tk_calldata; + struct nfs_unlinkdata *data = calldata; struct dentry *dir = data->dir; struct inode *dir_i; @@ -141,13 +140,17 @@ nfs_async_unlink_done(struct rpc_task *task) * We need to call nfs_put_unlinkdata as a 'tk_release' task since the * rpc_task would be freed too. */ -static void -nfs_async_unlink_release(struct rpc_task *task) +static void nfs_async_unlink_release(void *calldata) { - struct nfs_unlinkdata *data = (struct nfs_unlinkdata *)task->tk_calldata; + struct nfs_unlinkdata *data = calldata; nfs_put_unlinkdata(data); } +static const struct rpc_call_ops nfs_unlink_ops = { + .rpc_call_done = nfs_async_unlink_done, + .rpc_release = nfs_async_unlink_release, +}; + /** * nfs_async_unlink - asynchronous unlinking of a file * @dentry: dentry to unlink @@ -179,10 +182,8 @@ nfs_async_unlink(struct dentry *dentry) data->count = 1; task = &data->task; - rpc_init_task(task, clnt, nfs_async_unlink_done , RPC_TASK_ASYNC); - task->tk_calldata = data; + rpc_init_task(task, clnt, RPC_TASK_ASYNC, &nfs_unlink_ops, data); task->tk_action = nfs_async_unlink_init; - task->tk_release = nfs_async_unlink_release; spin_lock(&dentry->d_lock); dentry->d_flags |= DCACHE_NFSFS_RENAMED; diff --git a/fs/nfs/write.c b/fs/nfs/write.c index 95d00f9132d0..80bc4ea1b824 100644 --- a/fs/nfs/write.c +++ b/fs/nfs/write.c @@ -104,9 +104,8 @@ static inline void nfs_commit_free(struct nfs_write_data *p) mempool_free(p, nfs_commit_mempool); } -static void nfs_writedata_release(struct rpc_task *task) +void nfs_writedata_release(void *wdata) { - struct nfs_write_data *wdata = (struct nfs_write_data *)task->tk_calldata; nfs_writedata_free(wdata); } @@ -871,9 +870,6 @@ static void nfs_write_rpcsetup(struct nfs_page *req, data->task.tk_priority = flush_task_priority(how); data->task.tk_cookie = (unsigned long)inode; - data->task.tk_calldata = data; - /* Release requests */ - data->task.tk_release = nfs_writedata_release; dprintk("NFS: %4d initiated write call (req %s/%Ld, %u bytes @ offset %Lu)\n", data->task.tk_pid, @@ -1131,9 +1127,9 @@ static void nfs_writeback_done_full(struct nfs_write_data *data, int status) /* * This function is called when the WRITE call is complete. */ -void nfs_writeback_done(struct rpc_task *task) +void nfs_writeback_done(struct rpc_task *task, void *calldata) { - struct nfs_write_data *data = (struct nfs_write_data *) task->tk_calldata; + struct nfs_write_data *data = calldata; struct nfs_writeargs *argp = &data->args; struct nfs_writeres *resp = &data->res; @@ -1200,9 +1196,8 @@ void nfs_writeback_done(struct rpc_task *task) #if defined(CONFIG_NFS_V3) || defined(CONFIG_NFS_V4) -static void nfs_commit_release(struct rpc_task *task) +void nfs_commit_release(void *wdata) { - struct nfs_write_data *wdata = (struct nfs_write_data *)task->tk_calldata; nfs_commit_free(wdata); } @@ -1238,9 +1233,6 @@ static void nfs_commit_rpcsetup(struct list_head *head, data->task.tk_priority = flush_task_priority(how); data->task.tk_cookie = (unsigned long)inode; - data->task.tk_calldata = data; - /* Release requests */ - data->task.tk_release = nfs_commit_release; dprintk("NFS: %4d initiated commit call\n", data->task.tk_pid); } @@ -1277,10 +1269,9 @@ nfs_commit_list(struct list_head *head, int how) /* * COMMIT call returned */ -void -nfs_commit_done(struct rpc_task *task) +void nfs_commit_done(struct rpc_task *task, void *calldata) { - struct nfs_write_data *data = (struct nfs_write_data *)task->tk_calldata; + struct nfs_write_data *data = calldata; struct nfs_page *req; int res = 0; diff --git a/fs/nfsd/nfs4callback.c b/fs/nfsd/nfs4callback.c index 583c0710e45e..cf92008f219a 100644 --- a/fs/nfsd/nfs4callback.c +++ b/fs/nfsd/nfs4callback.c @@ -53,7 +53,7 @@ #define NFSPROC4_CB_COMPOUND 1 /* declarations */ -static void nfs4_cb_null(struct rpc_task *task); +static const struct rpc_call_ops nfs4_cb_null_ops; /* Index of predefined Linux callback client operations */ @@ -447,7 +447,7 @@ nfsd4_probe_callback(struct nfs4_client *clp) msg.rpc_cred = nfsd4_lookupcred(clp,0); if (IS_ERR(msg.rpc_cred)) goto out_rpciod; - status = rpc_call_async(clnt, &msg, RPC_TASK_ASYNC, nfs4_cb_null, NULL); + status = rpc_call_async(clnt, &msg, RPC_TASK_ASYNC, &nfs4_cb_null_ops, NULL); put_rpccred(msg.rpc_cred); if (status != 0) { @@ -469,7 +469,7 @@ out_err: } static void -nfs4_cb_null(struct rpc_task *task) +nfs4_cb_null(struct rpc_task *task, void *dummy) { struct nfs4_client *clp = (struct nfs4_client *)task->tk_msg.rpc_argp; struct nfs4_callback *cb = &clp->cl_callback; @@ -488,6 +488,10 @@ out: put_nfs4_client(clp); } +static const struct rpc_call_ops nfs4_cb_null_ops = { + .rpc_call_done = nfs4_cb_null, +}; + /* * called with dp->dl_count inc'ed. * nfs4_lock_state() may or may not have been called. diff --git a/include/linux/lockd/lockd.h b/include/linux/lockd/lockd.h index 16d4e5a08e1d..95c8fea293ba 100644 --- a/include/linux/lockd/lockd.h +++ b/include/linux/lockd/lockd.h @@ -172,7 +172,7 @@ extern struct nlm_host *nlm_find_client(void); /* * Server-side lock handling */ -int nlmsvc_async_call(struct nlm_rqst *, u32, rpc_action); +int nlmsvc_async_call(struct nlm_rqst *, u32, const struct rpc_call_ops *); u32 nlmsvc_lock(struct svc_rqst *, struct nlm_file *, struct nlm_lock *, int, struct nlm_cookie *); u32 nlmsvc_unlock(struct nlm_file *, struct nlm_lock *); diff --git a/include/linux/nfs_fs.h b/include/linux/nfs_fs.h index 2516adeccecf..4dff705d2ff2 100644 --- a/include/linux/nfs_fs.h +++ b/include/linux/nfs_fs.h @@ -406,10 +406,12 @@ extern int nfs_writepage(struct page *page, struct writeback_control *wbc); extern int nfs_writepages(struct address_space *, struct writeback_control *); extern int nfs_flush_incompatible(struct file *file, struct page *page); extern int nfs_updatepage(struct file *, struct page *, unsigned int, unsigned int); -extern void nfs_writeback_done(struct rpc_task *task); +extern void nfs_writeback_done(struct rpc_task *task, void *data); +extern void nfs_writedata_release(void *data); #if defined(CONFIG_NFS_V3) || defined(CONFIG_NFS_V4) -extern void nfs_commit_done(struct rpc_task *); +extern void nfs_commit_done(struct rpc_task *, void *data); +extern void nfs_commit_release(void *data); #endif /* @@ -481,7 +483,9 @@ static inline void nfs_writedata_free(struct nfs_write_data *p) extern int nfs_readpage(struct file *, struct page *); extern int nfs_readpages(struct file *, struct address_space *, struct list_head *, unsigned); -extern void nfs_readpage_result(struct rpc_task *); +extern void nfs_readpage_result(struct rpc_task *, void *); +extern void nfs_readdata_release(void *data); + /* * Allocate and free nfs_read_data structures @@ -501,8 +505,6 @@ static inline void nfs_readdata_free(struct nfs_read_data *p) mempool_free(p, nfs_rdata_mempool); } -extern void nfs_readdata_release(struct rpc_task *task); - /* * linux/fs/nfs3proc.c */ diff --git a/include/linux/sunrpc/clnt.h b/include/linux/sunrpc/clnt.h index ab151bbb66df..b0ab959eca65 100644 --- a/include/linux/sunrpc/clnt.h +++ b/include/linux/sunrpc/clnt.h @@ -126,7 +126,8 @@ int rpc_register(u32, u32, int, unsigned short, int *); void rpc_call_setup(struct rpc_task *, struct rpc_message *, int); int rpc_call_async(struct rpc_clnt *clnt, struct rpc_message *msg, - int flags, rpc_action callback, void *clntdata); + int flags, const struct rpc_call_ops *tk_ops, + void *calldata); int rpc_call_sync(struct rpc_clnt *clnt, struct rpc_message *msg, int flags); void rpc_restart_call(struct rpc_task *); diff --git a/include/linux/sunrpc/sched.h b/include/linux/sunrpc/sched.h index 4c4b2dc8aca5..581d8cdc3b86 100644 --- a/include/linux/sunrpc/sched.h +++ b/include/linux/sunrpc/sched.h @@ -27,6 +27,7 @@ struct rpc_message { struct rpc_cred * rpc_cred; /* Credentials */ }; +struct rpc_call_ops; struct rpc_wait_queue; struct rpc_wait { struct list_head list; /* wait queue links */ @@ -61,13 +62,12 @@ struct rpc_task { * timeout_fn to be executed by timer bottom half * callback to be executed after waking up * action next procedure for async tasks - * exit exit async task and report to caller + * tk_ops caller callbacks */ void (*tk_timeout_fn)(struct rpc_task *); void (*tk_callback)(struct rpc_task *); void (*tk_action)(struct rpc_task *); - void (*tk_exit)(struct rpc_task *); - void (*tk_release)(struct rpc_task *); + const struct rpc_call_ops *tk_ops; void * tk_calldata; /* @@ -111,6 +111,12 @@ struct rpc_task { typedef void (*rpc_action)(struct rpc_task *); +struct rpc_call_ops { + void (*rpc_call_done)(struct rpc_task *, void *); + void (*rpc_release)(void *); +}; + + /* * RPC task flags */ @@ -228,10 +234,12 @@ struct rpc_wait_queue { /* * Function prototypes */ -struct rpc_task *rpc_new_task(struct rpc_clnt *, rpc_action, int flags); +struct rpc_task *rpc_new_task(struct rpc_clnt *, int flags, + const struct rpc_call_ops *ops, void *data); struct rpc_task *rpc_new_child(struct rpc_clnt *, struct rpc_task *parent); -void rpc_init_task(struct rpc_task *, struct rpc_clnt *, - rpc_action exitfunc, int flags); +void rpc_init_task(struct rpc_task *task, struct rpc_clnt *clnt, + int flags, const struct rpc_call_ops *ops, + void *data); void rpc_release_task(struct rpc_task *); void rpc_exit_task(struct rpc_task *); void rpc_killall_tasks(struct rpc_clnt *); diff --git a/net/sunrpc/clnt.c b/net/sunrpc/clnt.c index 6ab4cbd8a901..8b2f75bc006d 100644 --- a/net/sunrpc/clnt.c +++ b/net/sunrpc/clnt.c @@ -374,10 +374,14 @@ out: * Default callback for async RPC calls */ static void -rpc_default_callback(struct rpc_task *task) +rpc_default_callback(struct rpc_task *task, void *data) { } +static const struct rpc_call_ops rpc_default_ops = { + .rpc_call_done = rpc_default_callback, +}; + /* * Export the signal mask handling for synchronous code that * sleeps on RPC calls @@ -432,7 +436,7 @@ int rpc_call_sync(struct rpc_clnt *clnt, struct rpc_message *msg, int flags) BUG_ON(flags & RPC_TASK_ASYNC); status = -ENOMEM; - task = rpc_new_task(clnt, NULL, flags); + task = rpc_new_task(clnt, flags, &rpc_default_ops, NULL); if (task == NULL) goto out; @@ -459,7 +463,7 @@ out: */ int rpc_call_async(struct rpc_clnt *clnt, struct rpc_message *msg, int flags, - rpc_action callback, void *data) + const struct rpc_call_ops *tk_ops, void *data) { struct rpc_task *task; sigset_t oldset; @@ -472,12 +476,9 @@ rpc_call_async(struct rpc_clnt *clnt, struct rpc_message *msg, int flags, flags |= RPC_TASK_ASYNC; /* Create/initialize a new RPC task */ - if (!callback) - callback = rpc_default_callback; status = -ENOMEM; - if (!(task = rpc_new_task(clnt, callback, flags))) + if (!(task = rpc_new_task(clnt, flags, tk_ops, data))) goto out; - task->tk_calldata = data; /* Mask signals on GSS_AUTH upcalls */ rpc_task_sigmask(task, &oldset); diff --git a/net/sunrpc/sched.c b/net/sunrpc/sched.c index 3fcf7b0e1f6c..8d6233d3248b 100644 --- a/net/sunrpc/sched.c +++ b/net/sunrpc/sched.c @@ -555,13 +555,13 @@ __rpc_atrun(struct rpc_task *task) } /* - * Helper that calls task->tk_exit if it exists + * Helper that calls task->tk_ops->rpc_call_done if it exists */ void rpc_exit_task(struct rpc_task *task) { task->tk_action = NULL; - if (task->tk_exit != NULL) { - task->tk_exit(task); + if (task->tk_ops->rpc_call_done != NULL) { + task->tk_ops->rpc_call_done(task, task->tk_calldata); if (task->tk_action != NULL) { WARN_ON(RPC_ASSASSINATED(task)); /* Always release the RPC slot and buffer memory */ @@ -747,7 +747,7 @@ rpc_free(struct rpc_task *task) /* * Creation and deletion of RPC task structures */ -void rpc_init_task(struct rpc_task *task, struct rpc_clnt *clnt, rpc_action callback, int flags) +void rpc_init_task(struct rpc_task *task, struct rpc_clnt *clnt, int flags, const struct rpc_call_ops *tk_ops, void *calldata) { memset(task, 0, sizeof(*task)); init_timer(&task->tk_timer); @@ -755,7 +755,8 @@ void rpc_init_task(struct rpc_task *task, struct rpc_clnt *clnt, rpc_action call task->tk_timer.function = (void (*)(unsigned long)) rpc_run_timer; task->tk_client = clnt; task->tk_flags = flags; - task->tk_exit = callback; + task->tk_ops = tk_ops; + task->tk_calldata = calldata; /* Initialize retry counters */ task->tk_garb_retry = 2; @@ -784,6 +785,8 @@ void rpc_init_task(struct rpc_task *task, struct rpc_clnt *clnt, rpc_action call list_add_tail(&task->tk_task, &all_tasks); spin_unlock(&rpc_sched_lock); + BUG_ON(task->tk_ops == NULL); + dprintk("RPC: %4d new task procpid %d\n", task->tk_pid, current->pid); } @@ -794,8 +797,7 @@ rpc_alloc_task(void) return (struct rpc_task *)mempool_alloc(rpc_task_mempool, GFP_NOFS); } -static void -rpc_default_free_task(struct rpc_task *task) +static void rpc_free_task(struct rpc_task *task) { dprintk("RPC: %4d freeing task\n", task->tk_pid); mempool_free(task, rpc_task_mempool); @@ -806,8 +808,7 @@ rpc_default_free_task(struct rpc_task *task) * clean up after an allocation failure, as the client may * have specified "oneshot". */ -struct rpc_task * -rpc_new_task(struct rpc_clnt *clnt, rpc_action callback, int flags) +struct rpc_task *rpc_new_task(struct rpc_clnt *clnt, int flags, const struct rpc_call_ops *tk_ops, void *calldata) { struct rpc_task *task; @@ -815,10 +816,7 @@ rpc_new_task(struct rpc_clnt *clnt, rpc_action callback, int flags) if (!task) goto cleanup; - rpc_init_task(task, clnt, callback, flags); - - /* Replace tk_release */ - task->tk_release = rpc_default_free_task; + rpc_init_task(task, clnt, flags, tk_ops, calldata); dprintk("RPC: %4d allocated task\n", task->tk_pid); task->tk_flags |= RPC_TASK_DYNAMIC; @@ -838,6 +836,8 @@ cleanup: void rpc_release_task(struct rpc_task *task) { + const struct rpc_call_ops *tk_ops = task->tk_ops; + void *calldata = task->tk_calldata; dprintk("RPC: %4d release task\n", task->tk_pid); #ifdef RPC_DEBUG @@ -869,8 +869,10 @@ void rpc_release_task(struct rpc_task *task) #ifdef RPC_DEBUG task->tk_magic = 0; #endif - if (task->tk_release) - task->tk_release(task); + if (task->tk_flags & RPC_TASK_DYNAMIC) + rpc_free_task(task); + if (tk_ops->rpc_release) + tk_ops->rpc_release(calldata); } /** @@ -883,12 +885,11 @@ void rpc_release_task(struct rpc_task *task) * * Caller must hold childq.lock */ -static inline struct rpc_task *rpc_find_parent(struct rpc_task *child) +static inline struct rpc_task *rpc_find_parent(struct rpc_task *child, struct rpc_task *parent) { - struct rpc_task *task, *parent; + struct rpc_task *task; struct list_head *le; - parent = (struct rpc_task *) child->tk_calldata; task_for_each(task, le, &childq.tasks[0]) if (task == parent) return parent; @@ -896,18 +897,22 @@ static inline struct rpc_task *rpc_find_parent(struct rpc_task *child) return NULL; } -static void rpc_child_exit(struct rpc_task *child) +static void rpc_child_exit(struct rpc_task *child, void *calldata) { struct rpc_task *parent; spin_lock_bh(&childq.lock); - if ((parent = rpc_find_parent(child)) != NULL) { + if ((parent = rpc_find_parent(child, calldata)) != NULL) { parent->tk_status = child->tk_status; __rpc_wake_up_task(parent); } spin_unlock_bh(&childq.lock); } +static const struct rpc_call_ops rpc_child_ops = { + .rpc_call_done = rpc_child_exit, +}; + /* * Note: rpc_new_task releases the client after a failure. */ @@ -916,11 +921,9 @@ rpc_new_child(struct rpc_clnt *clnt, struct rpc_task *parent) { struct rpc_task *task; - task = rpc_new_task(clnt, NULL, RPC_TASK_ASYNC | RPC_TASK_CHILD); + task = rpc_new_task(clnt, RPC_TASK_ASYNC | RPC_TASK_CHILD, &rpc_child_ops, parent); if (!task) goto fail; - task->tk_exit = rpc_child_exit; - task->tk_calldata = parent; return task; fail: @@ -1056,7 +1059,7 @@ void rpc_show_tasks(void) return; } printk("-pid- proc flgs status -client- -prog- --rqstp- -timeout " - "-rpcwait -action- --exit--\n"); + "-rpcwait -action- ---ops--\n"); alltask_for_each(t, le, &all_tasks) { const char *rpc_waitq = "none"; @@ -1071,7 +1074,7 @@ void rpc_show_tasks(void) (t->tk_client ? t->tk_client->cl_prog : 0), t->tk_rqstp, t->tk_timeout, rpc_waitq, - t->tk_action, t->tk_exit); + t->tk_action, t->tk_ops); } spin_unlock(&rpc_sched_lock); } -- cgit v1.2.3-71-gd317 From 4ce70ada1ff1d0b80916ec9ec5764ce44a50a54f Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Tue, 3 Jan 2006 09:55:05 +0100 Subject: SUNRPC: Further cleanups Signed-off-by: Trond Myklebust --- fs/nfs/nfs4proc.c | 21 +++++++++++---------- fs/nfs/unlink.c | 13 +++++-------- include/linux/sunrpc/sched.h | 1 + net/sunrpc/sched.c | 10 ++++++++++ 4 files changed, 27 insertions(+), 18 deletions(-) (limited to 'include/linux') diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c index 3d5d3c07d621..368b75b3bcba 100644 --- a/fs/nfs/nfs4proc.c +++ b/fs/nfs/nfs4proc.c @@ -195,14 +195,13 @@ static void update_changeattr(struct inode *inode, struct nfs4_change_info *cinf } /* Helper for asynchronous RPC calls */ -static int nfs4_call_async(struct rpc_clnt *clnt, rpc_action tk_begin, +static int nfs4_call_async(struct rpc_clnt *clnt, const struct rpc_call_ops *tk_ops, void *calldata) { struct rpc_task *task; if (!(task = rpc_new_task(clnt, RPC_TASK_ASYNC, tk_ops, calldata))) return -ENOMEM; - task->tk_action = tk_begin; rpc_execute(task); return 0; } @@ -882,6 +881,8 @@ static void nfs4_close_done(struct rpc_task *task, void *data) struct nfs4_state *state = calldata->state; struct nfs_server *server = NFS_SERVER(calldata->inode); + if (RPC_ASSASSINATED(task)) + return; /* hmm. we are done with the inode, and in the process of freeing * the state_owner. we keep this around to process errors */ @@ -904,9 +905,9 @@ static void nfs4_close_done(struct rpc_task *task, void *data) nfs_refresh_inode(calldata->inode, calldata->res.fattr); } -static void nfs4_close_begin(struct rpc_task *task) +static void nfs4_close_prepare(struct rpc_task *task, void *data) { - struct nfs4_closedata *calldata = (struct nfs4_closedata *)task->tk_calldata; + struct nfs4_closedata *calldata = data; struct nfs4_state *state = calldata->state; struct rpc_message msg = { .rpc_proc = &nfs4_procedures[NFSPROC4_CLNT_CLOSE], @@ -944,6 +945,7 @@ static void nfs4_close_begin(struct rpc_task *task) } static const struct rpc_call_ops nfs4_close_ops = { + .rpc_call_prepare = nfs4_close_prepare, .rpc_call_done = nfs4_close_done, .rpc_release = nfs4_free_closedata, }; @@ -980,8 +982,7 @@ int nfs4_do_close(struct inode *inode, struct nfs4_state *state) calldata->res.fattr = &calldata->fattr; calldata->res.server = server; - status = nfs4_call_async(server->client, nfs4_close_begin, - &nfs4_close_ops, calldata); + status = nfs4_call_async(server->client, &nfs4_close_ops, calldata); if (status == 0) goto out; @@ -2909,9 +2910,9 @@ static void nfs4_locku_done(struct rpc_task *task, void *data) } } -static void nfs4_locku_begin(struct rpc_task *task) +static void nfs4_locku_prepare(struct rpc_task *task, void *data) { - struct nfs4_unlockdata *calldata = (struct nfs4_unlockdata *)task->tk_calldata; + struct nfs4_unlockdata *calldata = data; struct rpc_message msg = { .rpc_proc = &nfs4_procedures[NFSPROC4_CLNT_LOCKU], .rpc_argp = &calldata->arg, @@ -2932,6 +2933,7 @@ static void nfs4_locku_begin(struct rpc_task *task) } static const struct rpc_call_ops nfs4_locku_ops = { + .rpc_call_prepare = nfs4_locku_prepare, .rpc_call_done = nfs4_locku_done, .rpc_release = nfs4_locku_complete, }; @@ -2979,8 +2981,7 @@ static int nfs4_proc_unlck(struct nfs4_state *state, int cmd, struct file_lock * atomic_set(&calldata->refcount, 2); init_completion(&calldata->completion); - status = nfs4_call_async(NFS_SERVER(inode)->client, nfs4_locku_begin, - &nfs4_locku_ops, calldata); + status = nfs4_call_async(NFS_SERVER(inode)->client, &nfs4_locku_ops, calldata); if (status == 0) wait_for_completion_interruptible(&calldata->completion); do_vfs_lock(request->fl_file, request); diff --git a/fs/nfs/unlink.c b/fs/nfs/unlink.c index 1494484ba86d..a65c7b53d558 100644 --- a/fs/nfs/unlink.c +++ b/fs/nfs/unlink.c @@ -87,10 +87,9 @@ nfs_copy_dname(struct dentry *dentry, struct nfs_unlinkdata *data) * We delay initializing RPC info until after the call to dentry_iput() * in order to minimize races against rename(). */ -static void -nfs_async_unlink_init(struct rpc_task *task) +static void nfs_async_unlink_init(struct rpc_task *task, void *calldata) { - struct nfs_unlinkdata *data = (struct nfs_unlinkdata *)task->tk_calldata; + struct nfs_unlinkdata *data = calldata; struct dentry *dir = data->dir; struct rpc_message msg = { .rpc_cred = data->cred, @@ -147,6 +146,7 @@ static void nfs_async_unlink_release(void *calldata) } static const struct rpc_call_ops nfs_unlink_ops = { + .rpc_call_prepare = nfs_async_unlink_init, .rpc_call_done = nfs_async_unlink_done, .rpc_release = nfs_async_unlink_release, }; @@ -160,7 +160,6 @@ nfs_async_unlink(struct dentry *dentry) { struct dentry *dir = dentry->d_parent; struct nfs_unlinkdata *data; - struct rpc_task *task; struct rpc_clnt *clnt = NFS_CLIENT(dir->d_inode); int status = -ENOMEM; @@ -181,15 +180,13 @@ nfs_async_unlink(struct dentry *dentry) nfs_deletes = data; data->count = 1; - task = &data->task; - rpc_init_task(task, clnt, RPC_TASK_ASYNC, &nfs_unlink_ops, data); - task->tk_action = nfs_async_unlink_init; + rpc_init_task(&data->task, clnt, RPC_TASK_ASYNC, &nfs_unlink_ops, data); spin_lock(&dentry->d_lock); dentry->d_flags |= DCACHE_NFSFS_RENAMED; spin_unlock(&dentry->d_lock); - rpc_sleep_on(&nfs_delete_queue, task, NULL, NULL); + rpc_sleep_on(&nfs_delete_queue, &data->task, NULL, NULL); status = 0; out: return status; diff --git a/include/linux/sunrpc/sched.h b/include/linux/sunrpc/sched.h index 581d8cdc3b86..ac1326fc3e1a 100644 --- a/include/linux/sunrpc/sched.h +++ b/include/linux/sunrpc/sched.h @@ -112,6 +112,7 @@ struct rpc_task { typedef void (*rpc_action)(struct rpc_task *); struct rpc_call_ops { + void (*rpc_call_prepare)(struct rpc_task *, void *); void (*rpc_call_done)(struct rpc_task *, void *); void (*rpc_release)(void *); }; diff --git a/net/sunrpc/sched.c b/net/sunrpc/sched.c index 8d6233d3248b..2d74a1672028 100644 --- a/net/sunrpc/sched.c +++ b/net/sunrpc/sched.c @@ -554,6 +554,14 @@ __rpc_atrun(struct rpc_task *task) rpc_wake_up_task(task); } +/* + * Helper to call task->tk_ops->rpc_call_prepare + */ +static void rpc_prepare_task(struct rpc_task *task) +{ + task->tk_ops->rpc_call_prepare(task, task->tk_calldata); +} + /* * Helper that calls task->tk_ops->rpc_call_done if it exists */ @@ -756,6 +764,8 @@ void rpc_init_task(struct rpc_task *task, struct rpc_clnt *clnt, int flags, cons task->tk_client = clnt; task->tk_flags = flags; task->tk_ops = tk_ops; + if (tk_ops->rpc_call_prepare != NULL) + task->tk_action = rpc_prepare_task; task->tk_calldata = calldata; /* Initialize retry counters */ -- cgit v1.2.3-71-gd317 From 44c288732fdbd7e38460d156a40d29590bf93bce Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Tue, 3 Jan 2006 09:55:06 +0100 Subject: NFSv4: stateful NFSv4 RPC call interface The NFSv4 model requires us to complete all RPC calls that might establish state on the server whether or not the user wants to interrupt it. We may also need to schedule new work (including new RPC calls) in order to cancel the new state. The asynchronous RPC model will allow us to ensure that RPC calls always complete, but in order to allow for "synchronous" RPC, we want to add the ability to wait for completion. The waits are, of course, interruptible. Signed-off-by: Trond Myklebust --- fs/nfs/direct.c | 1 - include/linux/sunrpc/sched.h | 21 ++++++++++-- net/sunrpc/sched.c | 78 +++++++++++++++++++++++++++++++++----------- 3 files changed, 78 insertions(+), 22 deletions(-) (limited to 'include/linux') diff --git a/fs/nfs/direct.c b/fs/nfs/direct.c index a834423942c7..ae2be0744191 100644 --- a/fs/nfs/direct.c +++ b/fs/nfs/direct.c @@ -268,7 +268,6 @@ static void nfs_direct_read_schedule(struct nfs_direct_req *dreq, NFS_PROTO(inode)->read_setup(data); data->task.tk_cookie = (unsigned long) inode; - data->task.tk_calldata = data; data->complete = nfs_direct_read_result; lock_kernel(); diff --git a/include/linux/sunrpc/sched.h b/include/linux/sunrpc/sched.h index ac1326fc3e1a..94b0afa4ab05 100644 --- a/include/linux/sunrpc/sched.h +++ b/include/linux/sunrpc/sched.h @@ -42,6 +42,7 @@ struct rpc_task { #ifdef RPC_DEBUG unsigned long tk_magic; /* 0xf00baa */ #endif + atomic_t tk_count; /* Reference count */ struct list_head tk_task; /* global list of tasks */ struct rpc_clnt * tk_client; /* RPC client */ struct rpc_rqst * tk_rqstp; /* RPC request */ @@ -78,7 +79,6 @@ struct rpc_task { struct timer_list tk_timer; /* kernel timer */ unsigned long tk_timeout; /* timeout for rpc_sleep() */ unsigned short tk_flags; /* misc flags */ - unsigned char tk_active : 1;/* Task has been activated */ unsigned char tk_priority : 2;/* Task priority */ unsigned long tk_runstate; /* Task run status */ struct workqueue_struct *tk_workqueue; /* Normally rpciod, but could @@ -136,7 +136,6 @@ struct rpc_call_ops { #define RPC_IS_SWAPPER(t) ((t)->tk_flags & RPC_TASK_SWAPPER) #define RPC_DO_ROOTOVERRIDE(t) ((t)->tk_flags & RPC_TASK_ROOTCREDS) #define RPC_ASSASSINATED(t) ((t)->tk_flags & RPC_TASK_KILLED) -#define RPC_IS_ACTIVATED(t) ((t)->tk_active) #define RPC_DO_CALLBACK(t) ((t)->tk_callback != NULL) #define RPC_IS_SOFT(t) ((t)->tk_flags & RPC_TASK_SOFT) #define RPC_TASK_UNINTERRUPTIBLE(t) ((t)->tk_flags & RPC_TASK_NOINTR) @@ -145,6 +144,7 @@ struct rpc_call_ops { #define RPC_TASK_QUEUED 1 #define RPC_TASK_WAKEUP 2 #define RPC_TASK_HAS_TIMER 3 +#define RPC_TASK_ACTIVE 4 #define RPC_IS_RUNNING(t) (test_bit(RPC_TASK_RUNNING, &(t)->tk_runstate)) #define rpc_set_running(t) (set_bit(RPC_TASK_RUNNING, &(t)->tk_runstate)) @@ -175,6 +175,15 @@ struct rpc_call_ops { smp_mb__after_clear_bit(); \ } while (0) +#define RPC_IS_ACTIVATED(t) (test_bit(RPC_TASK_ACTIVE, &(t)->tk_runstate)) +#define rpc_set_active(t) (set_bit(RPC_TASK_ACTIVE, &(t)->tk_runstate)) +#define rpc_clear_active(t) \ + do { \ + smp_mb__before_clear_bit(); \ + clear_bit(RPC_TASK_ACTIVE, &(t)->tk_runstate); \ + smp_mb__after_clear_bit(); \ + } while(0) + /* * Task priorities. * Note: if you change these, you must also change @@ -237,6 +246,8 @@ struct rpc_wait_queue { */ struct rpc_task *rpc_new_task(struct rpc_clnt *, int flags, const struct rpc_call_ops *ops, void *data); +struct rpc_task *rpc_run_task(struct rpc_clnt *clnt, int flags, + const struct rpc_call_ops *ops, void *data); struct rpc_task *rpc_new_child(struct rpc_clnt *, struct rpc_task *parent); void rpc_init_task(struct rpc_task *task, struct rpc_clnt *clnt, int flags, const struct rpc_call_ops *ops, @@ -260,6 +271,7 @@ void * rpc_malloc(struct rpc_task *, size_t); int rpciod_up(void); void rpciod_down(void); void rpciod_wake_up(void); +int __rpc_wait_for_completion_task(struct rpc_task *task, int (*)(void *)); #ifdef RPC_DEBUG void rpc_show_tasks(void); #endif @@ -272,6 +284,11 @@ static inline void rpc_exit(struct rpc_task *task, int status) task->tk_action = rpc_exit_task; } +static inline int rpc_wait_for_completion_task(struct rpc_task *task) +{ + return __rpc_wait_for_completion_task(task, NULL); +} + #ifdef RPC_DEBUG static inline const char * rpc_qname(struct rpc_wait_queue *q) { diff --git a/net/sunrpc/sched.c b/net/sunrpc/sched.c index 2d74a1672028..82d158dad16d 100644 --- a/net/sunrpc/sched.c +++ b/net/sunrpc/sched.c @@ -264,6 +264,35 @@ void rpc_init_wait_queue(struct rpc_wait_queue *queue, const char *qname) } EXPORT_SYMBOL(rpc_init_wait_queue); +static int rpc_wait_bit_interruptible(void *word) +{ + if (signal_pending(current)) + return -ERESTARTSYS; + schedule(); + return 0; +} + +/* + * Mark an RPC call as having completed by clearing the 'active' bit + */ +static inline void rpc_mark_complete_task(struct rpc_task *task) +{ + rpc_clear_active(task); + wake_up_bit(&task->tk_runstate, RPC_TASK_ACTIVE); +} + +/* + * Allow callers to wait for completion of an RPC call + */ +int __rpc_wait_for_completion_task(struct rpc_task *task, int (*action)(void *)) +{ + if (action == NULL) + action = rpc_wait_bit_interruptible; + return wait_on_bit(&task->tk_runstate, RPC_TASK_ACTIVE, + action, TASK_INTERRUPTIBLE); +} +EXPORT_SYMBOL(__rpc_wait_for_completion_task); + /* * Make an RPC task runnable. * @@ -299,10 +328,7 @@ static void rpc_make_runnable(struct rpc_task *task) static inline void rpc_schedule_run(struct rpc_task *task) { - /* Don't run a child twice! */ - if (RPC_IS_ACTIVATED(task)) - return; - task->tk_active = 1; + rpc_set_active(task); rpc_make_runnable(task); } @@ -324,8 +350,7 @@ static void __rpc_sleep_on(struct rpc_wait_queue *q, struct rpc_task *task, } /* Mark the task as being activated if so needed */ - if (!RPC_IS_ACTIVATED(task)) - task->tk_active = 1; + rpc_set_active(task); __rpc_add_wait_queue(q, task); @@ -580,14 +605,6 @@ void rpc_exit_task(struct rpc_task *task) } EXPORT_SYMBOL(rpc_exit_task); -static int rpc_wait_bit_interruptible(void *word) -{ - if (signal_pending(current)) - return -ERESTARTSYS; - schedule(); - return 0; -} - /* * This is the RPC `scheduler' (or rather, the finite state machine). */ @@ -680,6 +697,8 @@ static int __rpc_execute(struct rpc_task *task) dprintk("RPC: %4d exit() = %d\n", task->tk_pid, task->tk_status); status = task->tk_status; + /* Wake up anyone who is waiting for task completion */ + rpc_mark_complete_task(task); /* Release all resources associated with the task */ rpc_release_task(task); return status; @@ -697,9 +716,7 @@ static int __rpc_execute(struct rpc_task *task) int rpc_execute(struct rpc_task *task) { - BUG_ON(task->tk_active); - - task->tk_active = 1; + rpc_set_active(task); rpc_set_running(task); return __rpc_execute(task); } @@ -761,6 +778,7 @@ void rpc_init_task(struct rpc_task *task, struct rpc_clnt *clnt, int flags, cons init_timer(&task->tk_timer); task->tk_timer.data = (unsigned long) task; task->tk_timer.function = (void (*)(unsigned long)) rpc_run_timer; + atomic_set(&task->tk_count, 1); task->tk_client = clnt; task->tk_flags = flags; task->tk_ops = tk_ops; @@ -848,11 +866,13 @@ void rpc_release_task(struct rpc_task *task) { const struct rpc_call_ops *tk_ops = task->tk_ops; void *calldata = task->tk_calldata; - dprintk("RPC: %4d release task\n", task->tk_pid); #ifdef RPC_DEBUG BUG_ON(task->tk_magic != RPC_TASK_MAGIC_ID); #endif + if (!atomic_dec_and_test(&task->tk_count)) + return; + dprintk("RPC: %4d release task\n", task->tk_pid); /* Remove from global task list */ spin_lock(&rpc_sched_lock); @@ -860,7 +880,6 @@ void rpc_release_task(struct rpc_task *task) spin_unlock(&rpc_sched_lock); BUG_ON (RPC_IS_QUEUED(task)); - task->tk_active = 0; /* Synchronously delete any running timer */ rpc_delete_timer(task); @@ -885,6 +904,27 @@ void rpc_release_task(struct rpc_task *task) tk_ops->rpc_release(calldata); } +/** + * rpc_run_task - Allocate a new RPC task, then run rpc_execute against it + * @clnt - pointer to RPC client + * @flags - RPC flags + * @ops - RPC call ops + * @data - user call data + */ +struct rpc_task *rpc_run_task(struct rpc_clnt *clnt, int flags, + const struct rpc_call_ops *ops, + void *data) +{ + struct rpc_task *task; + task = rpc_new_task(clnt, flags, ops, data); + if (task == NULL) + return ERR_PTR(-ENOMEM); + atomic_inc(&task->tk_count); + rpc_execute(task); + return task; +} +EXPORT_SYMBOL(rpc_run_task); + /** * rpc_find_parent - find the parent of a child task. * @child: child task -- cgit v1.2.3-71-gd317 From cdd4e68b5f0ed12c64b3e2be83655d2a47588a74 Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Tue, 3 Jan 2006 09:55:12 +0100 Subject: NFSv4: Make open_confirm() asynchronous too Signed-off-by: Trond Myklebust --- fs/nfs/nfs4proc.c | 101 +++++++++++++++++++++++++++++++++++++----------- fs/nfs/nfs4xdr.c | 7 +--- include/linux/nfs_xdr.h | 2 +- 3 files changed, 81 insertions(+), 29 deletions(-) (limited to 'include/linux') diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c index aed8701c1a36..8154f2579469 100644 --- a/fs/nfs/nfs4proc.c +++ b/fs/nfs/nfs4proc.c @@ -57,7 +57,8 @@ #define NFS4_POLL_RETRY_MIN (1*HZ) #define NFS4_POLL_RETRY_MAX (15*HZ) -static int _nfs4_proc_open_confirm(struct rpc_clnt *clnt, const struct nfs_fh *fh, struct nfs4_state_owner *sp, nfs4_stateid *stateid, struct nfs_seqid *seqid); +struct nfs4_opendata; +static int _nfs4_proc_open_confirm(struct nfs4_opendata *data); static int nfs4_do_fsinfo(struct nfs_server *, struct nfs_fh *, struct nfs_fsinfo *); static int nfs4_async_handle_error(struct rpc_task *, const struct nfs_server *); static int _nfs4_proc_access(struct inode *inode, struct nfs_access_entry *entry); @@ -198,6 +199,8 @@ struct nfs4_opendata { atomic_t count; struct nfs_openargs o_arg; struct nfs_openres o_res; + struct nfs_open_confirmargs c_arg; + struct nfs_open_confirmres c_res; struct nfs_fattr f_attr; struct nfs_fattr dir_attr; struct dentry *dentry; @@ -249,6 +252,9 @@ static struct nfs4_opendata *nfs4_opendata_alloc(struct dentry *dentry, p->o_arg.u.attrs = &p->attrs; memcpy(&p->attrs, attrs, sizeof(p->attrs)); } + p->c_arg.fh = &p->o_res.fh; + p->c_arg.stateid = &p->o_res.stateid; + p->c_arg.seqid = p->o_arg.seqid; return p; err_free: kfree(p); @@ -433,8 +439,7 @@ static int _nfs4_open_delegation_recall(struct dentry *dentry, struct nfs4_state if (status != 0) goto out_free; if(opendata->o_res.rflags & NFS4_OPEN_RESULT_CONFIRM) { - status = _nfs4_proc_open_confirm(server->client, NFS_FH(inode), - sp, &opendata->o_res.stateid, opendata->o_arg.seqid); + status = _nfs4_proc_open_confirm(opendata); if (status != 0) goto out_free; } @@ -472,28 +477,79 @@ int nfs4_open_delegation_recall(struct dentry *dentry, struct nfs4_state *state) return err; } -static int _nfs4_proc_open_confirm(struct rpc_clnt *clnt, const struct nfs_fh *fh, struct nfs4_state_owner *sp, nfs4_stateid *stateid, struct nfs_seqid *seqid) +static void nfs4_open_confirm_prepare(struct rpc_task *task, void *calldata) { - struct nfs_open_confirmargs arg = { - .fh = fh, - .seqid = seqid, - .stateid = *stateid, - }; - struct nfs_open_confirmres res; - struct rpc_message msg = { - .rpc_proc = &nfs4_procedures[NFSPROC4_CLNT_OPEN_CONFIRM], - .rpc_argp = &arg, - .rpc_resp = &res, - .rpc_cred = sp->so_cred, + struct nfs4_opendata *data = calldata; + struct rpc_message msg = { + .rpc_proc = &nfs4_procedures[NFSPROC4_CLNT_OPEN_CONFIRM], + .rpc_argp = &data->c_arg, + .rpc_resp = &data->c_res, + .rpc_cred = data->owner->so_cred, }; + rpc_call_setup(task, &msg, 0); +} + +static void nfs4_open_confirm_done(struct rpc_task *task, void *calldata) +{ + struct nfs4_opendata *data = calldata; + + data->rpc_status = task->tk_status; + if (RPC_ASSASSINATED(task)) + return; + if (data->rpc_status == 0) + memcpy(data->o_res.stateid.data, data->c_res.stateid.data, + sizeof(data->o_res.stateid.data)); + nfs_increment_open_seqid(data->rpc_status, data->c_arg.seqid); + nfs_confirm_seqid(&data->owner->so_seqid, data->rpc_status); +} + +static void nfs4_open_confirm_release(void *calldata) +{ + struct nfs4_opendata *data = calldata; + struct nfs4_state *state = NULL; + + /* If this request hasn't been cancelled, do nothing */ + if (data->cancelled == 0) + goto out_free; + /* In case of error, no cleanup! */ + if (data->rpc_status != 0) + goto out_free; + nfs_confirm_seqid(&data->owner->so_seqid, 0); + state = nfs4_opendata_to_nfs4_state(data); + if (state != NULL) + nfs4_close_state(state, data->o_arg.open_flags); +out_free: + nfs4_opendata_free(data); +} + +static const struct rpc_call_ops nfs4_open_confirm_ops = { + .rpc_call_prepare = nfs4_open_confirm_prepare, + .rpc_call_done = nfs4_open_confirm_done, + .rpc_release = nfs4_open_confirm_release, +}; + +/* + * Note: On error, nfs4_proc_open_confirm will free the struct nfs4_opendata + */ +static int _nfs4_proc_open_confirm(struct nfs4_opendata *data) +{ + struct nfs_server *server = NFS_SERVER(data->dir->d_inode); + struct rpc_task *task; int status; - status = rpc_call_sync(clnt, &msg, RPC_TASK_NOINTR); - /* Confirm the sequence as being established */ - nfs_confirm_seqid(&sp->so_seqid, status); - nfs_increment_open_seqid(status, seqid); - if (status >= 0) - memcpy(stateid, &res.stateid, sizeof(*stateid)); + atomic_inc(&data->count); + task = rpc_run_task(server->client, RPC_TASK_ASYNC, &nfs4_open_confirm_ops, data); + if (IS_ERR(task)) { + nfs4_opendata_free(data); + return PTR_ERR(task); + } + status = nfs4_wait_for_completion_rpc_task(task); + if (status != 0) { + data->cancelled = 1; + smp_wmb(); + } else + status = data->rpc_status; + rpc_release_task(task); return status; } @@ -602,8 +658,7 @@ static int _nfs4_proc_open(struct nfs4_opendata *data) } else nfs_refresh_inode(dir, o_res->dir_attr); if(o_res->rflags & NFS4_OPEN_RESULT_CONFIRM) { - status = _nfs4_proc_open_confirm(server->client, &o_res->fh, - data->owner, &o_res->stateid, o_arg->seqid); + status = _nfs4_proc_open_confirm(data); if (status != 0) return status; } diff --git a/fs/nfs/nfs4xdr.c b/fs/nfs/nfs4xdr.c index db2bcf722f91..2ba9906f2a51 100644 --- a/fs/nfs/nfs4xdr.c +++ b/fs/nfs/nfs4xdr.c @@ -964,9 +964,9 @@ static int encode_open_confirm(struct xdr_stream *xdr, const struct nfs_open_con { uint32_t *p; - RESERVE_SPACE(8+sizeof(arg->stateid.data)); + RESERVE_SPACE(8+sizeof(arg->stateid->data)); WRITE32(OP_OPEN_CONFIRM); - WRITEMEM(arg->stateid.data, sizeof(arg->stateid.data)); + WRITEMEM(arg->stateid->data, sizeof(arg->stateid->data)); WRITE32(arg->seqid->sequence->counter); return 0; @@ -1535,9 +1535,6 @@ static int nfs4_xdr_enc_open_confirm(struct rpc_rqst *req, uint32_t *p, struct n }; int status; - status = nfs_wait_on_sequence(args->seqid, req->rq_task); - if (status != 0) - goto out; xdr_init_encode(&xdr, &req->rq_snd_buf, p); encode_compound_hdr(&xdr, &hdr); status = encode_putfh(&xdr, args->fh); diff --git a/include/linux/nfs_xdr.h b/include/linux/nfs_xdr.h index 40718669b9c8..518cfa5cd024 100644 --- a/include/linux/nfs_xdr.h +++ b/include/linux/nfs_xdr.h @@ -137,7 +137,7 @@ struct nfs_openres { */ struct nfs_open_confirmargs { const struct nfs_fh * fh; - nfs4_stateid stateid; + nfs4_stateid * stateid; struct nfs_seqid * seqid; }; -- cgit v1.2.3-71-gd317 From 911d1aaf26fc4d771174d98fcab710a44e2a5fa0 Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Tue, 3 Jan 2006 09:55:16 +0100 Subject: NFSv4: locking XDR cleanup Get rid of some unnecessary intermediate structures Signed-off-by: Trond Myklebust --- fs/nfs/nfs4proc.c | 192 ++++++++++++++++++++---------------------------- fs/nfs/nfs4xdr.c | 131 +++++++++++++++++++-------------- include/linux/nfs_xdr.h | 52 ++++++------- 3 files changed, 179 insertions(+), 196 deletions(-) (limited to 'include/linux') diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c index 3ecb7da220f5..857125705b6f 100644 --- a/fs/nfs/nfs4proc.c +++ b/fs/nfs/nfs4proc.c @@ -2935,43 +2935,17 @@ nfs4_set_lock_task_retry(unsigned long timeout) return timeout; } -static inline int -nfs4_lck_type(int cmd, struct file_lock *request) -{ - /* set lock type */ - switch (request->fl_type) { - case F_RDLCK: - return IS_SETLKW(cmd) ? NFS4_READW_LT : NFS4_READ_LT; - case F_WRLCK: - return IS_SETLKW(cmd) ? NFS4_WRITEW_LT : NFS4_WRITE_LT; - case F_UNLCK: - return NFS4_WRITE_LT; - } - BUG(); - return 0; -} - -static inline uint64_t -nfs4_lck_length(struct file_lock *request) -{ - if (request->fl_end == OFFSET_MAX) - return ~(uint64_t)0; - return request->fl_end - request->fl_start + 1; -} - static int _nfs4_proc_getlk(struct nfs4_state *state, int cmd, struct file_lock *request) { struct inode *inode = state->inode; struct nfs_server *server = NFS_SERVER(inode); struct nfs4_client *clp = server->nfs4_state; - struct nfs_lockargs arg = { + struct nfs_lockt_args arg = { .fh = NFS_FH(inode), - .type = nfs4_lck_type(cmd, request), - .offset = request->fl_start, - .length = nfs4_lck_length(request), + .fl = request, }; - struct nfs_lockres res = { - .server = server, + struct nfs_lockt_res res = { + .denied = request, }; struct rpc_message msg = { .rpc_proc = &nfs4_procedures[NFSPROC4_CLNT_LOCKT], @@ -2979,36 +2953,23 @@ static int _nfs4_proc_getlk(struct nfs4_state *state, int cmd, struct file_lock .rpc_resp = &res, .rpc_cred = state->owner->so_cred, }; - struct nfs_lowner nlo; struct nfs4_lock_state *lsp; int status; down_read(&clp->cl_sem); - nlo.clientid = clp->cl_clientid; + arg.lock_owner.clientid = clp->cl_clientid; status = nfs4_set_lock_state(state, request); if (status != 0) goto out; lsp = request->fl_u.nfs4_fl.owner; - nlo.id = lsp->ls_id; - arg.u.lockt = &nlo; + arg.lock_owner.id = lsp->ls_id; status = rpc_call_sync(server->client, &msg, 0); - if (!status) { - request->fl_type = F_UNLCK; - } else if (status == -NFS4ERR_DENIED) { - int64_t len, start, end; - start = res.u.denied.offset; - len = res.u.denied.length; - end = start + len - 1; - if (end < 0 || len == 0) - request->fl_end = OFFSET_MAX; - else - request->fl_end = (loff_t)end; - request->fl_start = (loff_t)start; - request->fl_type = F_WRLCK; - if (res.u.denied.type & 1) - request->fl_type = F_RDLCK; - request->fl_pid = 0; - status = 0; + switch (status) { + case 0: + request->fl_type = F_UNLCK; + break; + case -NFS4ERR_DENIED: + status = 0; } out: up_read(&clp->cl_sem); @@ -3048,17 +3009,42 @@ static int do_vfs_lock(struct file *file, struct file_lock *fl) } struct nfs4_unlockdata { - struct nfs_lockargs arg; - struct nfs_locku_opargs luargs; - struct nfs_lockres res; + struct nfs_locku_args arg; + struct nfs_locku_res res; struct nfs4_lock_state *lsp; struct nfs_open_context *ctx; + struct file_lock fl; + const struct nfs_server *server; }; +static struct nfs4_unlockdata *nfs4_alloc_unlockdata(struct file_lock *fl, + struct nfs_open_context *ctx, + struct nfs4_lock_state *lsp, + struct nfs_seqid *seqid) +{ + struct nfs4_unlockdata *p; + struct inode *inode = lsp->ls_state->inode; + + p = kmalloc(sizeof(*p), GFP_KERNEL); + if (p == NULL) + return NULL; + p->arg.fh = NFS_FH(inode); + p->arg.fl = &p->fl; + p->arg.seqid = seqid; + p->arg.stateid = &lsp->ls_stateid; + p->lsp = lsp; + atomic_inc(&lsp->ls_count); + /* Ensure we don't close file until we're done freeing locks! */ + p->ctx = get_nfs_open_context(ctx); + memcpy(&p->fl, fl, sizeof(p->fl)); + p->server = NFS_SERVER(inode); + return p; +} + static void nfs4_locku_release_calldata(void *data) { struct nfs4_unlockdata *calldata = data; - nfs_free_seqid(calldata->luargs.seqid); + nfs_free_seqid(calldata->arg.seqid); nfs4_put_lock_state(calldata->lsp); put_nfs_open_context(calldata->ctx); kfree(calldata); @@ -3070,19 +3056,19 @@ static void nfs4_locku_done(struct rpc_task *task, void *data) if (RPC_ASSASSINATED(task)) return; - nfs_increment_lock_seqid(task->tk_status, calldata->luargs.seqid); + nfs_increment_lock_seqid(task->tk_status, calldata->arg.seqid); switch (task->tk_status) { case 0: memcpy(calldata->lsp->ls_stateid.data, - calldata->res.u.stateid.data, + calldata->res.stateid.data, sizeof(calldata->lsp->ls_stateid.data)); break; case -NFS4ERR_STALE_STATEID: case -NFS4ERR_EXPIRED: - nfs4_schedule_state_recovery(calldata->res.server->nfs4_state); + nfs4_schedule_state_recovery(calldata->server->nfs4_state); break; default: - if (nfs4_async_handle_error(task, calldata->res.server) == -EAGAIN) { + if (nfs4_async_handle_error(task, calldata->server) == -EAGAIN) { rpc_restart_call(task); } } @@ -3097,10 +3083,8 @@ static void nfs4_locku_prepare(struct rpc_task *task, void *data) .rpc_resp = &calldata->res, .rpc_cred = calldata->lsp->ls_state->owner->so_cred, }; - int status; - status = nfs_wait_on_sequence(calldata->luargs.seqid, task); - if (status != 0) + if (nfs_wait_on_sequence(calldata->arg.seqid, task) != 0) return; if ((calldata->lsp->ls_flags & NFS_LOCK_INITIALIZED) == 0) { /* Note: exit _without_ running nfs4_locku_done */ @@ -3121,43 +3105,32 @@ static int nfs4_proc_unlck(struct nfs4_state *state, int cmd, struct file_lock * struct nfs4_unlockdata *calldata; struct inode *inode = state->inode; struct nfs_server *server = NFS_SERVER(inode); + struct nfs_seqid *seqid; struct nfs4_lock_state *lsp; struct rpc_task *task; int status = 0; /* Is this a delegated lock? */ if (test_bit(NFS_DELEGATED_STATE, &state->flags)) - goto out; + goto out_unlock; + /* Is this open_owner holding any locks on the server? */ + if (test_bit(LK_STATE_IN_USE, &state->flags) == 0) + goto out_unlock; status = nfs4_set_lock_state(state, request); if (status != 0) - goto out; + goto out_unlock; lsp = request->fl_u.nfs4_fl.owner; - /* We might have lost the locks! */ - if ((lsp->ls_flags & NFS_LOCK_INITIALIZED) == 0) - goto out; status = -ENOMEM; - calldata = kmalloc(sizeof(*calldata), GFP_KERNEL); + seqid = nfs_alloc_seqid(&lsp->ls_seqid); + if (seqid == NULL) + goto out_unlock; + calldata = nfs4_alloc_unlockdata(request, request->fl_file->private_data, + lsp, seqid); if (calldata == NULL) - goto out; - calldata->luargs.seqid = nfs_alloc_seqid(&lsp->ls_seqid); - if (calldata->luargs.seqid == NULL) { - kfree(calldata); - goto out; - } - calldata->luargs.stateid = &lsp->ls_stateid; - calldata->arg.fh = NFS_FH(inode); - calldata->arg.type = nfs4_lck_type(cmd, request); - calldata->arg.offset = request->fl_start; - calldata->arg.length = nfs4_lck_length(request); - calldata->arg.u.locku = &calldata->luargs; - calldata->res.server = server; - calldata->lsp = lsp; - atomic_inc(&lsp->ls_count); - - /* Ensure we don't close file until we're done freeing locks! */ - calldata->ctx = get_nfs_open_context((struct nfs_open_context*)request->fl_file->private_data); - + goto out_free_seqid; + /* Unlock _before_ we do the RPC call */ + do_vfs_lock(request->fl_file, request); task = rpc_run_task(server->client, RPC_TASK_ASYNC, &nfs4_locku_ops, calldata); if (!IS_ERR(task)) { status = nfs4_wait_for_completion_rpc_task(task); @@ -3166,7 +3139,10 @@ static int nfs4_proc_unlck(struct nfs4_state *state, int cmd, struct file_lock * status = PTR_ERR(task); nfs4_locku_release_calldata(calldata); } -out: + return status; +out_free_seqid: + nfs_free_seqid(seqid); +out_unlock: do_vfs_lock(request->fl_file, request); return status; } @@ -3176,27 +3152,19 @@ static int _nfs4_do_setlk(struct nfs4_state *state, int cmd, struct file_lock *r struct inode *inode = state->inode; struct nfs_server *server = NFS_SERVER(inode); struct nfs4_lock_state *lsp = request->fl_u.nfs4_fl.owner; - struct nfs_lock_opargs largs = { + struct nfs_lock_args arg = { + .fh = NFS_FH(inode), + .fl = request, .lock_stateid = &lsp->ls_stateid, .open_stateid = &state->stateid, .lock_owner = { .clientid = server->nfs4_state->cl_clientid, .id = lsp->ls_id, }, + .block = (IS_SETLKW(cmd)) ? 1 : 0, .reclaim = reclaim, }; - struct nfs_lockargs arg = { - .fh = NFS_FH(inode), - .type = nfs4_lck_type(cmd, request), - .offset = request->fl_start, - .length = nfs4_lck_length(request), - .u = { - .lock = &largs, - }, - }; - struct nfs_lockres res = { - .server = server, - }; + struct nfs_lock_res res; struct rpc_message msg = { .rpc_proc = &nfs4_procedures[NFSPROC4_CLNT_LOCK], .rpc_argp = &arg, @@ -3205,37 +3173,37 @@ static int _nfs4_do_setlk(struct nfs4_state *state, int cmd, struct file_lock *r }; int status = -ENOMEM; - largs.lock_seqid = nfs_alloc_seqid(&lsp->ls_seqid); - if (largs.lock_seqid == NULL) + arg.lock_seqid = nfs_alloc_seqid(&lsp->ls_seqid); + if (arg.lock_seqid == NULL) return -ENOMEM; if (!(lsp->ls_seqid.flags & NFS_SEQID_CONFIRMED)) { struct nfs4_state_owner *owner = state->owner; - largs.open_seqid = nfs_alloc_seqid(&owner->so_seqid); - if (largs.open_seqid == NULL) + arg.open_seqid = nfs_alloc_seqid(&owner->so_seqid); + if (arg.open_seqid == NULL) goto out; - largs.new_lock_owner = 1; + arg.new_lock_owner = 1; status = rpc_call_sync(server->client, &msg, RPC_TASK_NOINTR); /* increment open seqid on success, and seqid mutating errors */ - if (largs.new_lock_owner != 0) { - nfs_increment_open_seqid(status, largs.open_seqid); + if (arg.new_lock_owner != 0) { + nfs_increment_open_seqid(status, arg.open_seqid); if (status == 0) nfs_confirm_seqid(&lsp->ls_seqid, 0); } - nfs_free_seqid(largs.open_seqid); + nfs_free_seqid(arg.open_seqid); } else status = rpc_call_sync(server->client, &msg, RPC_TASK_NOINTR); /* increment lock seqid on success, and seqid mutating errors*/ - nfs_increment_lock_seqid(status, largs.lock_seqid); + nfs_increment_lock_seqid(status, arg.lock_seqid); /* save the returned stateid. */ if (status == 0) { - memcpy(lsp->ls_stateid.data, res.u.stateid.data, + memcpy(lsp->ls_stateid.data, res.stateid.data, sizeof(lsp->ls_stateid.data)); lsp->ls_flags |= NFS_LOCK_INITIALIZED; } else if (status == -NFS4ERR_DENIED) status = -EAGAIN; out: - nfs_free_seqid(largs.lock_seqid); + nfs_free_seqid(arg.lock_seqid); return status; } diff --git a/fs/nfs/nfs4xdr.c b/fs/nfs/nfs4xdr.c index 3100172822c9..a7b5de899c6d 100644 --- a/fs/nfs/nfs4xdr.c +++ b/fs/nfs/nfs4xdr.c @@ -742,69 +742,80 @@ static int encode_link(struct xdr_stream *xdr, const struct qstr *name) return 0; } +static inline int nfs4_lock_type(struct file_lock *fl, int block) +{ + if ((fl->fl_type & (F_RDLCK|F_WRLCK|F_UNLCK)) == F_RDLCK) + return block ? NFS4_READW_LT : NFS4_READ_LT; + return block ? NFS4_WRITEW_LT : NFS4_WRITE_LT; +} + +static inline uint64_t nfs4_lock_length(struct file_lock *fl) +{ + if (fl->fl_end == OFFSET_MAX) + return ~(uint64_t)0; + return fl->fl_end - fl->fl_start + 1; +} + /* * opcode,type,reclaim,offset,length,new_lock_owner = 32 * open_seqid,open_stateid,lock_seqid,lock_owner.clientid, lock_owner.id = 40 */ -static int encode_lock(struct xdr_stream *xdr, const struct nfs_lockargs *arg) +static int encode_lock(struct xdr_stream *xdr, const struct nfs_lock_args *args) { uint32_t *p; - struct nfs_lock_opargs *opargs = arg->u.lock; RESERVE_SPACE(32); WRITE32(OP_LOCK); - WRITE32(arg->type); - WRITE32(opargs->reclaim); - WRITE64(arg->offset); - WRITE64(arg->length); - WRITE32(opargs->new_lock_owner); - if (opargs->new_lock_owner){ + WRITE32(nfs4_lock_type(args->fl, args->block)); + WRITE32(args->reclaim); + WRITE64(args->fl->fl_start); + WRITE64(nfs4_lock_length(args->fl)); + WRITE32(args->new_lock_owner); + if (args->new_lock_owner){ RESERVE_SPACE(40); - WRITE32(opargs->open_seqid->sequence->counter); - WRITEMEM(opargs->open_stateid->data, sizeof(opargs->open_stateid->data)); - WRITE32(opargs->lock_seqid->sequence->counter); - WRITE64(opargs->lock_owner.clientid); + WRITE32(args->open_seqid->sequence->counter); + WRITEMEM(args->open_stateid->data, sizeof(args->open_stateid->data)); + WRITE32(args->lock_seqid->sequence->counter); + WRITE64(args->lock_owner.clientid); WRITE32(4); - WRITE32(opargs->lock_owner.id); + WRITE32(args->lock_owner.id); } else { RESERVE_SPACE(20); - WRITEMEM(opargs->lock_stateid->data, sizeof(opargs->lock_stateid->data)); - WRITE32(opargs->lock_seqid->sequence->counter); + WRITEMEM(args->lock_stateid->data, sizeof(args->lock_stateid->data)); + WRITE32(args->lock_seqid->sequence->counter); } return 0; } -static int encode_lockt(struct xdr_stream *xdr, const struct nfs_lockargs *arg) +static int encode_lockt(struct xdr_stream *xdr, const struct nfs_lockt_args *args) { uint32_t *p; - struct nfs_lowner *opargs = arg->u.lockt; RESERVE_SPACE(40); WRITE32(OP_LOCKT); - WRITE32(arg->type); - WRITE64(arg->offset); - WRITE64(arg->length); - WRITE64(opargs->clientid); + WRITE32(nfs4_lock_type(args->fl, 0)); + WRITE64(args->fl->fl_start); + WRITE64(nfs4_lock_length(args->fl)); + WRITE64(args->lock_owner.clientid); WRITE32(4); - WRITE32(opargs->id); + WRITE32(args->lock_owner.id); return 0; } -static int encode_locku(struct xdr_stream *xdr, const struct nfs_lockargs *arg) +static int encode_locku(struct xdr_stream *xdr, const struct nfs_locku_args *args) { uint32_t *p; - struct nfs_locku_opargs *opargs = arg->u.locku; RESERVE_SPACE(44); WRITE32(OP_LOCKU); - WRITE32(arg->type); - WRITE32(opargs->seqid->sequence->counter); - WRITEMEM(opargs->stateid->data, sizeof(opargs->stateid->data)); - WRITE64(arg->offset); - WRITE64(arg->length); + WRITE32(nfs4_lock_type(args->fl, 0)); + WRITE32(args->seqid->sequence->counter); + WRITEMEM(args->stateid->data, sizeof(args->stateid->data)); + WRITE64(args->fl->fl_start); + WRITE64(nfs4_lock_length(args->fl)); return 0; } @@ -1596,21 +1607,20 @@ out: /* * Encode a LOCK request */ -static int nfs4_xdr_enc_lock(struct rpc_rqst *req, uint32_t *p, struct nfs_lockargs *args) +static int nfs4_xdr_enc_lock(struct rpc_rqst *req, uint32_t *p, struct nfs_lock_args *args) { struct xdr_stream xdr; struct compound_hdr hdr = { .nops = 2, }; - struct nfs_lock_opargs *opargs = args->u.lock; int status; - status = nfs_wait_on_sequence(opargs->lock_seqid, req->rq_task); + status = nfs_wait_on_sequence(args->lock_seqid, req->rq_task); if (status != 0) goto out; /* Do we need to do an open_to_lock_owner? */ - if (opargs->lock_seqid->sequence->flags & NFS_SEQID_CONFIRMED) - opargs->new_lock_owner = 0; + if (args->lock_seqid->sequence->flags & NFS_SEQID_CONFIRMED) + args->new_lock_owner = 0; xdr_init_encode(&xdr, &req->rq_snd_buf, p); encode_compound_hdr(&xdr, &hdr); status = encode_putfh(&xdr, args->fh); @@ -1624,7 +1634,7 @@ out: /* * Encode a LOCKT request */ -static int nfs4_xdr_enc_lockt(struct rpc_rqst *req, uint32_t *p, struct nfs_lockargs *args) +static int nfs4_xdr_enc_lockt(struct rpc_rqst *req, uint32_t *p, struct nfs_lockt_args *args) { struct xdr_stream xdr; struct compound_hdr hdr = { @@ -1645,7 +1655,7 @@ out: /* * Encode a LOCKU request */ -static int nfs4_xdr_enc_locku(struct rpc_rqst *req, uint32_t *p, struct nfs_lockargs *args) +static int nfs4_xdr_enc_locku(struct rpc_rqst *req, uint32_t *p, struct nfs_locku_args *args) { struct xdr_stream xdr; struct compound_hdr hdr = { @@ -2949,55 +2959,64 @@ static int decode_link(struct xdr_stream *xdr, struct nfs4_change_info *cinfo) /* * We create the owner, so we know a proper owner.id length is 4. */ -static int decode_lock_denied (struct xdr_stream *xdr, struct nfs_lock_denied *denied) +static int decode_lock_denied (struct xdr_stream *xdr, struct file_lock *fl) { + uint64_t offset, length, clientid; uint32_t *p; - uint32_t namelen; + uint32_t namelen, type; READ_BUF(32); - READ64(denied->offset); - READ64(denied->length); - READ32(denied->type); - READ64(denied->owner.clientid); + READ64(offset); + READ64(length); + READ32(type); + if (fl != NULL) { + fl->fl_start = (loff_t)offset; + fl->fl_end = fl->fl_start + (loff_t)length - 1; + if (length == ~(uint64_t)0) + fl->fl_end = OFFSET_MAX; + fl->fl_type = F_WRLCK; + if (type & 1) + fl->fl_type = F_RDLCK; + fl->fl_pid = 0; + } + READ64(clientid); READ32(namelen); READ_BUF(namelen); - if (namelen == 4) - READ32(denied->owner.id); return -NFS4ERR_DENIED; } -static int decode_lock(struct xdr_stream *xdr, struct nfs_lockres *res) +static int decode_lock(struct xdr_stream *xdr, struct nfs_lock_res *res) { uint32_t *p; int status; status = decode_op_hdr(xdr, OP_LOCK); if (status == 0) { - READ_BUF(sizeof(res->u.stateid.data)); - COPYMEM(res->u.stateid.data, sizeof(res->u.stateid.data)); + READ_BUF(sizeof(res->stateid.data)); + COPYMEM(res->stateid.data, sizeof(res->stateid.data)); } else if (status == -NFS4ERR_DENIED) - return decode_lock_denied(xdr, &res->u.denied); + return decode_lock_denied(xdr, NULL); return status; } -static int decode_lockt(struct xdr_stream *xdr, struct nfs_lockres *res) +static int decode_lockt(struct xdr_stream *xdr, struct nfs_lockt_res *res) { int status; status = decode_op_hdr(xdr, OP_LOCKT); if (status == -NFS4ERR_DENIED) - return decode_lock_denied(xdr, &res->u.denied); + return decode_lock_denied(xdr, res->denied); return status; } -static int decode_locku(struct xdr_stream *xdr, struct nfs_lockres *res) +static int decode_locku(struct xdr_stream *xdr, struct nfs_locku_res *res) { uint32_t *p; int status; status = decode_op_hdr(xdr, OP_LOCKU); if (status == 0) { - READ_BUF(sizeof(res->u.stateid.data)); - COPYMEM(res->u.stateid.data, sizeof(res->u.stateid.data)); + READ_BUF(sizeof(res->stateid.data)); + COPYMEM(res->stateid.data, sizeof(res->stateid.data)); } return status; } @@ -3861,7 +3880,7 @@ out: /* * Decode LOCK response */ -static int nfs4_xdr_dec_lock(struct rpc_rqst *rqstp, uint32_t *p, struct nfs_lockres *res) +static int nfs4_xdr_dec_lock(struct rpc_rqst *rqstp, uint32_t *p, struct nfs_lock_res *res) { struct xdr_stream xdr; struct compound_hdr hdr; @@ -3882,7 +3901,7 @@ out: /* * Decode LOCKT response */ -static int nfs4_xdr_dec_lockt(struct rpc_rqst *rqstp, uint32_t *p, struct nfs_lockres *res) +static int nfs4_xdr_dec_lockt(struct rpc_rqst *rqstp, uint32_t *p, struct nfs_lockt_res *res) { struct xdr_stream xdr; struct compound_hdr hdr; @@ -3903,7 +3922,7 @@ out: /* * Decode LOCKU response */ -static int nfs4_xdr_dec_locku(struct rpc_rqst *rqstp, uint32_t *p, struct nfs_lockres *res) +static int nfs4_xdr_dec_locku(struct rpc_rqst *rqstp, uint32_t *p, struct nfs_locku_res *res) { struct xdr_stream xdr; struct compound_hdr hdr; diff --git a/include/linux/nfs_xdr.h b/include/linux/nfs_xdr.h index 518cfa5cd024..b8b0eed98ec9 100644 --- a/include/linux/nfs_xdr.h +++ b/include/linux/nfs_xdr.h @@ -165,50 +165,46 @@ struct nfs_closeres { * * Arguments to the lock,lockt, and locku call. * */ struct nfs_lowner { - __u64 clientid; - u32 id; + __u64 clientid; + u32 id; }; -struct nfs_lock_opargs { +struct nfs_lock_args { + struct nfs_fh * fh; + struct file_lock * fl; struct nfs_seqid * lock_seqid; nfs4_stateid * lock_stateid; struct nfs_seqid * open_seqid; nfs4_stateid * open_stateid; - struct nfs_lowner lock_owner; - __u32 reclaim; - __u32 new_lock_owner; + struct nfs_lowner lock_owner; + unsigned char block : 1; + unsigned char reclaim : 1; + unsigned char new_lock_owner : 1; +}; + +struct nfs_lock_res { + nfs4_stateid stateid; }; -struct nfs_locku_opargs { +struct nfs_locku_args { + struct nfs_fh * fh; + struct file_lock * fl; struct nfs_seqid * seqid; nfs4_stateid * stateid; }; -struct nfs_lockargs { - struct nfs_fh * fh; - __u32 type; - __u64 offset; - __u64 length; - union { - struct nfs_lock_opargs *lock; /* LOCK */ - struct nfs_lowner *lockt; /* LOCKT */ - struct nfs_locku_opargs *locku; /* LOCKU */ - } u; +struct nfs_locku_res { + nfs4_stateid stateid; }; -struct nfs_lock_denied { - __u64 offset; - __u64 length; - __u32 type; - struct nfs_lowner owner; +struct nfs_lockt_args { + struct nfs_fh * fh; + struct file_lock * fl; + struct nfs_lowner lock_owner; }; -struct nfs_lockres { - union { - nfs4_stateid stateid;/* LOCK success, LOCKU */ - struct nfs_lock_denied denied; /* LOCK failed, LOCKT success */ - } u; - const struct nfs_server * server; +struct nfs_lockt_res { + struct file_lock * denied; /* LOCK, LOCKT failed */ }; struct nfs4_delegreturnargs { -- cgit v1.2.3-71-gd317 From a911fd9a6046200e439b4af172e8379c0942eec3 Mon Sep 17 00:00:00 2001 From: Chuck Lever Date: Wed, 30 Nov 2005 18:08:59 -0500 Subject: NFS: simplify inlined bit ops in nfs_page.h Minor cleanup: inlined bit ops in nfs_page.h can be simpler. Test plan: Write-intensive workload against a server that requires COMMITs. Signed-off-by: Chuck Lever Signed-off-by: Trond Myklebust --- include/linux/nfs_page.h | 12 +++--------- 1 file changed, 3 insertions(+), 9 deletions(-) (limited to 'include/linux') diff --git a/include/linux/nfs_page.h b/include/linux/nfs_page.h index da2e077b65e2..66e2ed658527 100644 --- a/include/linux/nfs_page.h +++ b/include/linux/nfs_page.h @@ -79,9 +79,7 @@ extern void nfs_clear_page_writeback(struct nfs_page *req); static inline int nfs_lock_request_dontget(struct nfs_page *req) { - if (test_and_set_bit(PG_BUSY, &req->wb_flags)) - return 0; - return 1; + return !test_and_set_bit(PG_BUSY, &req->wb_flags); } /* @@ -125,9 +123,7 @@ nfs_list_remove_request(struct nfs_page *req) static inline int nfs_defer_commit(struct nfs_page *req) { - if (test_and_set_bit(PG_NEED_COMMIT, &req->wb_flags)) - return 0; - return 1; + return !test_and_set_bit(PG_NEED_COMMIT, &req->wb_flags); } static inline void @@ -141,9 +137,7 @@ nfs_clear_commit(struct nfs_page *req) static inline int nfs_defer_reschedule(struct nfs_page *req) { - if (test_and_set_bit(PG_NEED_RESCHED, &req->wb_flags)) - return 0; - return 1; + return !test_and_set_bit(PG_NEED_RESCHED, &req->wb_flags); } static inline void -- cgit v1.2.3-71-gd317 From 40859d7ee64ed6bfad8a4e93f9bb5c1074afadff Mon Sep 17 00:00:00 2001 From: Chuck Lever Date: Wed, 30 Nov 2005 18:09:02 -0500 Subject: NFS: support large reads and writes on the wire Most NFS server implementations allow up to 64KB reads and writes on the wire. The Solaris NFS server allows up to a megabyte, for instance. Now the Linux NFS client supports transfer sizes up to 1MB, too. This will help reduce protocol and context switch overhead on read/write intensive NFS workloads, and support larger atomic read and write operations on servers that support them. Test-plan: Connectathon and iozone on mount point with wsize=rsize>32768 over TCP. Tests with NFS over UDP to verify the maximum RPC payload size cap. Signed-off-by: Chuck Lever Signed-off-by: Trond Myklebust --- fs/nfs/direct.c | 5 +++-- fs/nfs/inode.c | 25 ++++++++++--------------- fs/nfs/nfsroot.c | 4 ++-- fs/nfs/read.c | 6 +++--- fs/nfs/write.c | 29 ++++++++++++++++++++++------- include/linux/nfs_fs.h | 41 +++++++++++++++++++++++++++++++++++------ include/linux/nfs_xdr.h | 29 ++++++++++++++++------------- include/linux/sunrpc/xdr.h | 5 ----- 8 files changed, 91 insertions(+), 53 deletions(-) (limited to 'include/linux') diff --git a/fs/nfs/direct.c b/fs/nfs/direct.c index f69d95aa78b2..fd7ac5e841c1 100644 --- a/fs/nfs/direct.c +++ b/fs/nfs/direct.c @@ -154,6 +154,7 @@ static struct nfs_direct_req *nfs_direct_read_alloc(size_t nbytes, unsigned int struct list_head *list; struct nfs_direct_req *dreq; unsigned int reads = 0; + unsigned int rpages = (rsize + PAGE_CACHE_SIZE - 1) >> PAGE_CACHE_SHIFT; dreq = kmem_cache_alloc(nfs_direct_cachep, SLAB_KERNEL); if (!dreq) @@ -167,7 +168,7 @@ static struct nfs_direct_req *nfs_direct_read_alloc(size_t nbytes, unsigned int list = &dreq->list; for(;;) { - struct nfs_read_data *data = nfs_readdata_alloc(); + struct nfs_read_data *data = nfs_readdata_alloc(rpages); if (unlikely(!data)) { while (!list_empty(list)) { @@ -431,7 +432,7 @@ static ssize_t nfs_direct_write_seg(struct inode *inode, struct nfs_writeverf first_verf; struct nfs_write_data *wdata; - wdata = nfs_writedata_alloc(); + wdata = nfs_writedata_alloc(NFS_SERVER(inode)->wpages); if (!wdata) return -ENOMEM; diff --git a/fs/nfs/inode.c b/fs/nfs/inode.c index 4e6558df54b8..acde2c5725bf 100644 --- a/fs/nfs/inode.c +++ b/fs/nfs/inode.c @@ -221,10 +221,10 @@ nfs_calc_block_size(u64 tsize) static inline unsigned long nfs_block_size(unsigned long bsize, unsigned char *nrbitsp) { - if (bsize < 1024) - bsize = NFS_DEF_FILE_IO_BUFFER_SIZE; - else if (bsize >= NFS_MAX_FILE_IO_BUFFER_SIZE) - bsize = NFS_MAX_FILE_IO_BUFFER_SIZE; + if (bsize < NFS_MIN_FILE_IO_SIZE) + bsize = NFS_DEF_FILE_IO_SIZE; + else if (bsize >= NFS_MAX_FILE_IO_SIZE) + bsize = NFS_MAX_FILE_IO_SIZE; return nfs_block_bits(bsize, nrbitsp); } @@ -307,20 +307,15 @@ nfs_sb_init(struct super_block *sb, rpc_authflavor_t authflavor) max_rpc_payload = nfs_block_size(rpc_max_payload(server->client), NULL); if (server->rsize > max_rpc_payload) server->rsize = max_rpc_payload; - if (server->wsize > max_rpc_payload) - server->wsize = max_rpc_payload; - + if (server->rsize > NFS_MAX_FILE_IO_SIZE) + server->rsize = NFS_MAX_FILE_IO_SIZE; server->rpages = (server->rsize + PAGE_CACHE_SIZE - 1) >> PAGE_CACHE_SHIFT; - if (server->rpages > NFS_READ_MAXIOV) { - server->rpages = NFS_READ_MAXIOV; - server->rsize = server->rpages << PAGE_CACHE_SHIFT; - } + if (server->wsize > max_rpc_payload) + server->wsize = max_rpc_payload; + if (server->wsize > NFS_MAX_FILE_IO_SIZE) + server->wsize = NFS_MAX_FILE_IO_SIZE; server->wpages = (server->wsize + PAGE_CACHE_SIZE - 1) >> PAGE_CACHE_SHIFT; - if (server->wpages > NFS_WRITE_MAXIOV) { - server->wpages = NFS_WRITE_MAXIOV; - server->wsize = server->wpages << PAGE_CACHE_SHIFT; - } if (sb->s_blocksize == 0) sb->s_blocksize = nfs_block_bits(server->wsize, diff --git a/fs/nfs/nfsroot.c b/fs/nfs/nfsroot.c index 1b272a135a31..985cc53b8dd5 100644 --- a/fs/nfs/nfsroot.c +++ b/fs/nfs/nfsroot.c @@ -296,8 +296,8 @@ static int __init root_nfs_name(char *name) nfs_port = -1; nfs_data.version = NFS_MOUNT_VERSION; nfs_data.flags = NFS_MOUNT_NONLM; /* No lockd in nfs root yet */ - nfs_data.rsize = NFS_DEF_FILE_IO_BUFFER_SIZE; - nfs_data.wsize = NFS_DEF_FILE_IO_BUFFER_SIZE; + nfs_data.rsize = NFS_DEF_FILE_IO_SIZE; + nfs_data.wsize = NFS_DEF_FILE_IO_SIZE; nfs_data.acregmin = 3; nfs_data.acregmax = 60; nfs_data.acdirmin = 30; diff --git a/fs/nfs/read.c b/fs/nfs/read.c index 21486242c3d3..05eb43fadf8e 100644 --- a/fs/nfs/read.c +++ b/fs/nfs/read.c @@ -83,7 +83,7 @@ static int nfs_readpage_sync(struct nfs_open_context *ctx, struct inode *inode, int result; struct nfs_read_data *rdata; - rdata = nfs_readdata_alloc(); + rdata = nfs_readdata_alloc(1); if (!rdata) return -ENOMEM; @@ -283,7 +283,7 @@ static int nfs_pagein_multi(struct list_head *head, struct inode *inode) nbytes = req->wb_bytes; for(;;) { - data = nfs_readdata_alloc(); + data = nfs_readdata_alloc(1); if (!data) goto out_bad; INIT_LIST_HEAD(&data->pages); @@ -339,7 +339,7 @@ static int nfs_pagein_one(struct list_head *head, struct inode *inode) if (NFS_SERVER(inode)->rsize < PAGE_CACHE_SIZE) return nfs_pagein_multi(head, inode); - data = nfs_readdata_alloc(); + data = nfs_readdata_alloc(NFS_SERVER(inode)->rpages); if (!data) goto out_bad; diff --git a/fs/nfs/write.c b/fs/nfs/write.c index 80bc4ea1b824..1ce0c200df16 100644 --- a/fs/nfs/write.c +++ b/fs/nfs/write.c @@ -89,18 +89,33 @@ static mempool_t *nfs_commit_mempool; static DECLARE_WAIT_QUEUE_HEAD(nfs_write_congestion); -static inline struct nfs_write_data *nfs_commit_alloc(void) +static inline struct nfs_write_data *nfs_commit_alloc(unsigned int pagecount) { struct nfs_write_data *p = mempool_alloc(nfs_commit_mempool, SLAB_NOFS); + if (p) { memset(p, 0, sizeof(*p)); INIT_LIST_HEAD(&p->pages); + if (pagecount < NFS_PAGEVEC_SIZE) + p->pagevec = &p->page_array[0]; + else { + size_t size = ++pagecount * sizeof(struct page *); + p->pagevec = kmalloc(size, GFP_NOFS); + if (p->pagevec) { + memset(p->pagevec, 0, size); + } else { + mempool_free(p, nfs_commit_mempool); + p = NULL; + } + } } return p; } static inline void nfs_commit_free(struct nfs_write_data *p) { + if (p && (p->pagevec != &p->page_array[0])) + kfree(p->pagevec); mempool_free(p, nfs_commit_mempool); } @@ -167,7 +182,7 @@ static int nfs_writepage_sync(struct nfs_open_context *ctx, struct inode *inode, int result, written = 0; struct nfs_write_data *wdata; - wdata = nfs_writedata_alloc(); + wdata = nfs_writedata_alloc(1); if (!wdata) return -ENOMEM; @@ -909,7 +924,7 @@ static int nfs_flush_multi(struct list_head *head, struct inode *inode, int how) nbytes = req->wb_bytes; for (;;) { - data = nfs_writedata_alloc(); + data = nfs_writedata_alloc(1); if (!data) goto out_bad; list_add(&data->pages, &list); @@ -973,7 +988,7 @@ static int nfs_flush_one(struct list_head *head, struct inode *inode, int how) if (NFS_SERVER(inode)->wsize < PAGE_CACHE_SIZE) return nfs_flush_multi(head, inode, how); - data = nfs_writedata_alloc(); + data = nfs_writedata_alloc(NFS_SERVER(inode)->wpages); if (!data) goto out_bad; @@ -1241,12 +1256,12 @@ static void nfs_commit_rpcsetup(struct list_head *head, * Commit dirty pages */ static int -nfs_commit_list(struct list_head *head, int how) +nfs_commit_list(struct inode *inode, struct list_head *head, int how) { struct nfs_write_data *data; struct nfs_page *req; - data = nfs_commit_alloc(); + data = nfs_commit_alloc(NFS_SERVER(inode)->wpages); if (!data) goto out_bad; @@ -1351,7 +1366,7 @@ int nfs_commit_inode(struct inode *inode, int how) res = nfs_scan_commit(inode, &head, 0, 0); spin_unlock(&nfsi->req_lock); if (res) { - error = nfs_commit_list(&head, how); + error = nfs_commit_list(inode, &head, how); if (error < 0) return error; } diff --git a/include/linux/nfs_fs.h b/include/linux/nfs_fs.h index 4dff705d2ff2..d38010ba6477 100644 --- a/include/linux/nfs_fs.h +++ b/include/linux/nfs_fs.h @@ -38,9 +38,6 @@ # define NFS_DEBUG #endif -#define NFS_MAX_FILE_IO_BUFFER_SIZE 32768 -#define NFS_DEF_FILE_IO_BUFFER_SIZE 4096 - /* Default timeout values */ #define NFS_MAX_UDP_TIMEOUT (60*HZ) #define NFS_MAX_TCP_TIMEOUT (600*HZ) @@ -462,18 +459,33 @@ static inline int nfs_wb_page(struct inode *inode, struct page* page) */ extern mempool_t *nfs_wdata_mempool; -static inline struct nfs_write_data *nfs_writedata_alloc(void) +static inline struct nfs_write_data *nfs_writedata_alloc(unsigned int pagecount) { struct nfs_write_data *p = mempool_alloc(nfs_wdata_mempool, SLAB_NOFS); + if (p) { memset(p, 0, sizeof(*p)); INIT_LIST_HEAD(&p->pages); + if (pagecount < NFS_PAGEVEC_SIZE) + p->pagevec = &p->page_array[0]; + else { + size_t size = ++pagecount * sizeof(struct page *); + p->pagevec = kmalloc(size, GFP_NOFS); + if (p->pagevec) { + memset(p->pagevec, 0, size); + } else { + mempool_free(p, nfs_wdata_mempool); + p = NULL; + } + } } return p; } static inline void nfs_writedata_free(struct nfs_write_data *p) { + if (p && (p->pagevec != &p->page_array[0])) + kfree(p->pagevec); mempool_free(p, nfs_wdata_mempool); } @@ -492,16 +504,33 @@ extern void nfs_readdata_release(void *data); */ extern mempool_t *nfs_rdata_mempool; -static inline struct nfs_read_data *nfs_readdata_alloc(void) +static inline struct nfs_read_data *nfs_readdata_alloc(unsigned int pagecount) { struct nfs_read_data *p = mempool_alloc(nfs_rdata_mempool, SLAB_NOFS); - if (p) + + if (p) { memset(p, 0, sizeof(*p)); + INIT_LIST_HEAD(&p->pages); + if (pagecount < NFS_PAGEVEC_SIZE) + p->pagevec = &p->page_array[0]; + else { + size_t size = ++pagecount * sizeof(struct page *); + p->pagevec = kmalloc(size, GFP_NOFS); + if (p->pagevec) { + memset(p->pagevec, 0, size); + } else { + mempool_free(p, nfs_rdata_mempool); + p = NULL; + } + } + } return p; } static inline void nfs_readdata_free(struct nfs_read_data *p) { + if (p && (p->pagevec != &p->page_array[0])) + kfree(p->pagevec); mempool_free(p, nfs_rdata_mempool); } diff --git a/include/linux/nfs_xdr.h b/include/linux/nfs_xdr.h index b8b0eed98ec9..9f422fd87673 100644 --- a/include/linux/nfs_xdr.h +++ b/include/linux/nfs_xdr.h @@ -4,6 +4,16 @@ #include #include +/* + * To change the maximum rsize and wsize supported by the NFS client, adjust + * NFS_MAX_FILE_IO_SIZE. 64KB is a typical maximum, but some servers can + * support a megabyte or more. The default is left at 4096 bytes, which is + * reasonable for NFS over UDP. + */ +#define NFS_MAX_FILE_IO_SIZE (1048576U) +#define NFS_DEF_FILE_IO_SIZE (4096U) +#define NFS_MIN_FILE_IO_SIZE (1024U) + struct nfs4_fsid { __u64 major; __u64 minor; @@ -215,12 +225,6 @@ struct nfs4_delegreturnargs { /* * Arguments to the read call. */ - -#define NFS_READ_MAXIOV (9U) -#if (NFS_READ_MAXIOV > (MAX_IOVEC -2)) -#error "NFS_READ_MAXIOV is too large" -#endif - struct nfs_readargs { struct nfs_fh * fh; struct nfs_open_context *context; @@ -239,11 +243,6 @@ struct nfs_readres { /* * Arguments to the write call. */ -#define NFS_WRITE_MAXIOV (9U) -#if (NFS_WRITE_MAXIOV > (MAX_IOVEC -2)) -#error "NFS_WRITE_MAXIOV is too large" -#endif - struct nfs_writeargs { struct nfs_fh * fh; struct nfs_open_context *context; @@ -674,6 +673,8 @@ struct nfs4_server_caps_res { struct nfs_page; +#define NFS_PAGEVEC_SIZE (8U) + struct nfs_read_data { int flags; struct rpc_task task; @@ -682,13 +683,14 @@ struct nfs_read_data { struct nfs_fattr fattr; /* fattr storage */ struct list_head pages; /* Coalesced read requests */ struct nfs_page *req; /* multi ops per nfs_page */ - struct page *pagevec[NFS_READ_MAXIOV]; + struct page **pagevec; struct nfs_readargs args; struct nfs_readres res; #ifdef CONFIG_NFS_V4 unsigned long timestamp; /* For lease renewal */ #endif void (*complete) (struct nfs_read_data *, int); + struct page *page_array[NFS_PAGEVEC_SIZE + 1]; }; struct nfs_write_data { @@ -700,13 +702,14 @@ struct nfs_write_data { struct nfs_writeverf verf; struct list_head pages; /* Coalesced requests we wish to flush */ struct nfs_page *req; /* multi ops per nfs_page */ - struct page *pagevec[NFS_WRITE_MAXIOV]; + struct page **pagevec; struct nfs_writeargs args; /* argument struct */ struct nfs_writeres res; /* result struct */ #ifdef CONFIG_NFS_V4 unsigned long timestamp; /* For lease renewal */ #endif void (*complete) (struct nfs_write_data *, int); + struct page *page_array[NFS_PAGEVEC_SIZE + 1]; }; struct nfs_access_entry; diff --git a/include/linux/sunrpc/xdr.h b/include/linux/sunrpc/xdr.h index 5da968729cf8..5676794ee34f 100644 --- a/include/linux/sunrpc/xdr.h +++ b/include/linux/sunrpc/xdr.h @@ -134,11 +134,6 @@ xdr_adjust_iovec(struct kvec *iov, u32 *p) return iov->iov_len = ((u8 *) p - (u8 *) iov->iov_base); } -/* - * Maximum number of iov's we use. - */ -#define MAX_IOVEC (12) - /* * XDR buffer helper functions */ -- cgit v1.2.3-71-gd317 From 70b9ecbdb9c5fdc731f8780bffd45d9519020c4a Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Tue, 3 Jan 2006 09:55:34 +0100 Subject: NFS: Make stat() return updated mtimes after a write() The SuS states that a call to write() will cause mtime to be updated on the file. In order to satisfy that requirement, we need to flush out any cached writes in nfs_getattr(). Speed things up slightly by not committing the writes. Signed-off-by: Trond Myklebust --- fs/nfs/inode.c | 2 ++ fs/nfs/write.c | 23 ++++++++++++----------- include/linux/nfs_fs.h | 1 + 3 files changed, 15 insertions(+), 11 deletions(-) (limited to 'include/linux') diff --git a/fs/nfs/inode.c b/fs/nfs/inode.c index acde2c5725bf..2c7f8aac1dec 100644 --- a/fs/nfs/inode.c +++ b/fs/nfs/inode.c @@ -952,6 +952,8 @@ int nfs_getattr(struct vfsmount *mnt, struct dentry *dentry, struct kstat *stat) int need_atime = NFS_I(inode)->cache_validity & NFS_INO_INVALID_ATIME; int err; + /* Flush out writes to the server in order to update c/mtime */ + nfs_sync_inode(inode, 0, 0, FLUSH_WAIT|FLUSH_NOCOMMIT); if (__IS_FLG(inode, MS_NOATIME)) need_atime = 0; else if (__IS_FLG(inode, MS_NODIRATIME) && S_ISDIR(inode->i_mode)) diff --git a/fs/nfs/write.c b/fs/nfs/write.c index 1ce0c200df16..9449b6835509 100644 --- a/fs/nfs/write.c +++ b/fs/nfs/write.c @@ -1377,22 +1377,23 @@ int nfs_commit_inode(struct inode *inode, int how) int nfs_sync_inode(struct inode *inode, unsigned long idx_start, unsigned int npages, int how) { - int error, - wait; + int nocommit = how & FLUSH_NOCOMMIT; + int wait = how & FLUSH_WAIT; + int error; - wait = how & FLUSH_WAIT; - how &= ~FLUSH_WAIT; + how &= ~(FLUSH_WAIT|FLUSH_NOCOMMIT); do { - error = 0; - if (wait) + if (wait) { error = nfs_wait_on_requests(inode, idx_start, npages); - if (error == 0) - error = nfs_flush_inode(inode, idx_start, npages, how); -#if defined(CONFIG_NFS_V3) || defined(CONFIG_NFS_V4) - if (error == 0) + if (error != 0) + continue; + } + error = nfs_flush_inode(inode, idx_start, npages, how); + if (error != 0) + continue; + if (!nocommit) error = nfs_commit_inode(inode, how); -#endif } while (error > 0); return error; } diff --git a/include/linux/nfs_fs.h b/include/linux/nfs_fs.h index d38010ba6477..408d82d3d97c 100644 --- a/include/linux/nfs_fs.h +++ b/include/linux/nfs_fs.h @@ -62,6 +62,7 @@ #define FLUSH_STABLE 4 /* commit to stable storage */ #define FLUSH_LOWPRI 8 /* low priority background flush */ #define FLUSH_HIGHPRI 16 /* high priority memory reclaim flush */ +#define FLUSH_NOCOMMIT 32 /* Don't send the NFSv3/v4 COMMIT */ #ifdef __KERNEL__ -- cgit v1.2.3-71-gd317 From fa178f29c0f8a0dce748181a5351f4a92fd4f455 Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Tue, 3 Jan 2006 09:55:38 +0100 Subject: NFSv4: Ensure DELEGRETURN returns attributes Upon return of a write delegation, the server will almost always bump the change attribute. Ensure that we pick up that change so that we don't invalidate our data cache unnecessarily. Signed-off-by: Trond Myklebust --- fs/nfs/delegation.c | 2 -- fs/nfs/nfs4proc.c | 17 +++++++++++++---- fs/nfs/nfs4xdr.c | 33 ++++++++++++++++++++++----------- include/linux/nfs_xdr.h | 6 ++++++ 4 files changed, 41 insertions(+), 17 deletions(-) (limited to 'include/linux') diff --git a/fs/nfs/delegation.c b/fs/nfs/delegation.c index d2ee09b38cee..66cc720e3927 100644 --- a/fs/nfs/delegation.c +++ b/fs/nfs/delegation.c @@ -159,8 +159,6 @@ static int nfs_do_return_delegation(struct inode *inode, struct nfs_delegation * { int res = 0; - __nfs_revalidate_inode(NFS_SERVER(inode), inode); - res = nfs4_proc_delegreturn(inode, delegation->cred, &delegation->stateid); nfs_free_delegation(delegation); return res; diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c index b3349154994b..984ca3454d04 100644 --- a/fs/nfs/nfs4proc.c +++ b/fs/nfs/nfs4proc.c @@ -2920,11 +2920,12 @@ nfs4_proc_setclientid_confirm(struct nfs4_client *clp, struct rpc_cred *cred) struct nfs4_delegreturndata { struct nfs4_delegreturnargs args; + struct nfs4_delegreturnres res; struct nfs_fh fh; nfs4_stateid stateid; struct rpc_cred *cred; unsigned long timestamp; - const struct nfs_server *server; + struct nfs_fattr fattr; int rpc_status; }; @@ -2934,8 +2935,10 @@ static void nfs4_delegreturn_prepare(struct rpc_task *task, void *calldata) struct rpc_message msg = { .rpc_proc = &nfs4_procedures[NFSPROC4_CLNT_DELEGRETURN], .rpc_argp = &data->args, + .rpc_resp = &data->res, .rpc_cred = data->cred, }; + nfs_fattr_init(data->res.fattr); rpc_call_setup(task, &msg, 0); } @@ -2944,7 +2947,7 @@ static void nfs4_delegreturn_done(struct rpc_task *task, void *calldata) struct nfs4_delegreturndata *data = calldata; data->rpc_status = task->tk_status; if (data->rpc_status == 0) - renew_lease(data->server, data->timestamp); + renew_lease(data->res.server, data->timestamp); } static void nfs4_delegreturn_release(void *calldata) @@ -2964,6 +2967,7 @@ const static struct rpc_call_ops nfs4_delegreturn_ops = { static int _nfs4_proc_delegreturn(struct inode *inode, struct rpc_cred *cred, const nfs4_stateid *stateid) { struct nfs4_delegreturndata *data; + struct nfs_server *server = NFS_SERVER(inode); struct rpc_task *task; int status; @@ -2972,11 +2976,13 @@ static int _nfs4_proc_delegreturn(struct inode *inode, struct rpc_cred *cred, co return -ENOMEM; data->args.fhandle = &data->fh; data->args.stateid = &data->stateid; + data->args.bitmask = server->attr_bitmask; nfs_copy_fh(&data->fh, NFS_FH(inode)); memcpy(&data->stateid, stateid, sizeof(data->stateid)); + data->res.fattr = &data->fattr; + data->res.server = server; data->cred = get_rpccred(cred); data->timestamp = jiffies; - data->server = NFS_SERVER(inode); data->rpc_status = 0; task = rpc_run_task(NFS_CLIENT(inode), RPC_TASK_ASYNC, &nfs4_delegreturn_ops, data); @@ -2985,8 +2991,11 @@ static int _nfs4_proc_delegreturn(struct inode *inode, struct rpc_cred *cred, co return PTR_ERR(task); } status = nfs4_wait_for_completion_rpc_task(task); - if (status == 0) + if (status == 0) { status = data->rpc_status; + if (status == 0) + nfs_post_op_update_inode(inode, &data->fattr); + } rpc_release_task(task); return status; } diff --git a/fs/nfs/nfs4xdr.c b/fs/nfs/nfs4xdr.c index 5d6bda43dfaa..12be1d682164 100644 --- a/fs/nfs/nfs4xdr.c +++ b/fs/nfs/nfs4xdr.c @@ -392,9 +392,11 @@ static int nfs_stat_to_errno(int); decode_getattr_maxsz) #define NFS4_enc_delegreturn_sz (compound_encode_hdr_maxsz + \ encode_putfh_maxsz + \ - encode_delegreturn_maxsz) + encode_delegreturn_maxsz + \ + encode_getattr_maxsz) #define NFS4_dec_delegreturn_sz (compound_decode_hdr_maxsz + \ - decode_delegreturn_maxsz) + decode_delegreturn_maxsz + \ + decode_getattr_maxsz) #define NFS4_enc_getacl_sz (compound_encode_hdr_maxsz + \ encode_putfh_maxsz + \ encode_getattr_maxsz) @@ -1983,14 +1985,20 @@ static int nfs4_xdr_enc_delegreturn(struct rpc_rqst *req, uint32_t *p, const str { struct xdr_stream xdr; struct compound_hdr hdr = { - .nops = 2, + .nops = 3, }; int status; xdr_init_encode(&xdr, &req->rq_snd_buf, p); encode_compound_hdr(&xdr, &hdr); - if ((status = encode_putfh(&xdr, args->fhandle)) == 0) - status = encode_delegreturn(&xdr, args->stateid); + status = encode_putfh(&xdr, args->fhandle); + if (status != 0) + goto out; + status = encode_delegreturn(&xdr, args->stateid); + if (status != 0) + goto out; + status = encode_getfattr(&xdr, args->bitmask); +out: return status; } @@ -4184,7 +4192,7 @@ static int nfs4_xdr_dec_setclientid_confirm(struct rpc_rqst *req, uint32_t *p, s /* * DELEGRETURN request */ -static int nfs4_xdr_dec_delegreturn(struct rpc_rqst *rqstp, uint32_t *p, void *dummy) +static int nfs4_xdr_dec_delegreturn(struct rpc_rqst *rqstp, uint32_t *p, struct nfs4_delegreturnres *res) { struct xdr_stream xdr; struct compound_hdr hdr; @@ -4192,11 +4200,14 @@ static int nfs4_xdr_dec_delegreturn(struct rpc_rqst *rqstp, uint32_t *p, void *d xdr_init_decode(&xdr, &rqstp->rq_rcv_buf, p); status = decode_compound_hdr(&xdr, &hdr); - if (status == 0) { - status = decode_putfh(&xdr); - if (status == 0) - status = decode_delegreturn(&xdr); - } + if (status != 0) + goto out; + status = decode_putfh(&xdr); + if (status != 0) + goto out; + status = decode_delegreturn(&xdr); + decode_getfattr(&xdr, res->fattr, res->server); +out: return status; } diff --git a/include/linux/nfs_xdr.h b/include/linux/nfs_xdr.h index 9f422fd87673..6d6f69ec5675 100644 --- a/include/linux/nfs_xdr.h +++ b/include/linux/nfs_xdr.h @@ -220,6 +220,12 @@ struct nfs_lockt_res { struct nfs4_delegreturnargs { const struct nfs_fh *fhandle; const nfs4_stateid *stateid; + const u32 * bitmask; +}; + +struct nfs4_delegreturnres { + struct nfs_fattr * fattr; + const struct nfs_server *server; }; /* -- cgit v1.2.3-71-gd317 From a72b44222d222749d54b3e370d825094352e389f Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Tue, 3 Jan 2006 09:55:41 +0100 Subject: NFSv4: Allow user to set the port used by the NFSv4 callback channel Signed-off-by: Trond Myklebust --- Documentation/kernel-parameters.txt | 4 ++ fs/nfs/Makefile | 1 + fs/nfs/callback.c | 3 +- fs/nfs/callback.h | 1 + fs/nfs/inode.c | 37 ++++++++++++++++++- fs/nfs/sysctl.c | 74 +++++++++++++++++++++++++++++++++++++ include/linux/nfs_fs.h | 11 ++++++ 7 files changed, 128 insertions(+), 3 deletions(-) create mode 100644 fs/nfs/sysctl.c (limited to 'include/linux') diff --git a/Documentation/kernel-parameters.txt b/Documentation/kernel-parameters.txt index 61a56b100c62..309c9cec6e7c 100644 --- a/Documentation/kernel-parameters.txt +++ b/Documentation/kernel-parameters.txt @@ -910,6 +910,10 @@ running once the system is up. nfsroot= [NFS] nfs root filesystem for disk-less boxes. See Documentation/nfsroot.txt. + nfs.callback_tcpport= + [NFS] set the TCP port on which the NFSv4 callback + channel should listen. + nmi_watchdog= [KNL,BUGS=IA-32] Debugging features for SMP kernels no387 [BUGS=IA-32] Tells the kernel to use the 387 maths diff --git a/fs/nfs/Makefile b/fs/nfs/Makefile index 8b3bb715d177..ec61fd56a1a9 100644 --- a/fs/nfs/Makefile +++ b/fs/nfs/Makefile @@ -13,4 +13,5 @@ nfs-$(CONFIG_NFS_V4) += nfs4proc.o nfs4xdr.o nfs4state.o nfs4renewd.o \ delegation.o idmap.o \ callback.o callback_xdr.o callback_proc.o nfs-$(CONFIG_NFS_DIRECTIO) += direct.o +nfs-$(CONFIG_SYSCTL) += sysctl.o nfs-objs := $(nfs-y) diff --git a/fs/nfs/callback.c b/fs/nfs/callback.c index 30cae3602867..fcd97406a778 100644 --- a/fs/nfs/callback.c +++ b/fs/nfs/callback.c @@ -34,6 +34,7 @@ static struct nfs_callback_data nfs_callback_info; static DECLARE_MUTEX(nfs_callback_sema); static struct svc_program nfs4_callback_program; +unsigned int nfs_callback_set_tcpport; unsigned short nfs_callback_tcpport; /* @@ -98,7 +99,7 @@ int nfs_callback_up(void) if (!serv) goto out_err; /* FIXME: We don't want to register this socket with the portmapper */ - ret = svc_makesock(serv, IPPROTO_TCP, 0); + ret = svc_makesock(serv, IPPROTO_TCP, nfs_callback_set_tcpport); if (ret < 0) goto out_destroy; if (!list_empty(&serv->sv_permsocks)) { diff --git a/fs/nfs/callback.h b/fs/nfs/callback.h index a0db2d4f9415..b252e7fe53a5 100644 --- a/fs/nfs/callback.h +++ b/fs/nfs/callback.h @@ -65,6 +65,7 @@ extern unsigned nfs4_callback_recall(struct cb_recallargs *args, void *dummy); extern int nfs_callback_up(void); extern int nfs_callback_down(void); +extern unsigned int nfs_callback_set_tcpport; extern unsigned short nfs_callback_tcpport; #endif /* __LINUX_FS_NFS_CALLBACK_H */ diff --git a/fs/nfs/inode.c b/fs/nfs/inode.c index 7270b1d73d30..648cb1aef3b1 100644 --- a/fs/nfs/inode.c +++ b/fs/nfs/inode.c @@ -40,6 +40,7 @@ #include #include "nfs4_fs.h" +#include "callback.h" #include "delegation.h" #define NFSDBG_FACILITY NFSDBG_VFS @@ -2036,6 +2037,21 @@ static struct file_system_type nfs4_fs_type = { .fs_flags = FS_ODD_RENAME|FS_REVAL_DOT|FS_BINARY_MOUNTDATA, }; +static const int nfs_set_port_min = 0; +static const int nfs_set_port_max = 65535; +static int param_set_port(const char *val, struct kernel_param *kp) +{ + char *endp; + int num = simple_strtol(val, &endp, 0); + if (endp == val || *endp || num < nfs_set_port_min || num > nfs_set_port_max) + return -EINVAL; + *((int *)kp->arg) = num; + return 0; +} + +module_param_call(callback_tcpport, param_set_port, param_get_int, + &nfs_callback_set_tcpport, 0644); + #define nfs4_init_once(nfsi) \ do { \ INIT_LIST_HEAD(&(nfsi)->open_states); \ @@ -2043,8 +2059,25 @@ static struct file_system_type nfs4_fs_type = { nfsi->delegation_state = 0; \ init_rwsem(&nfsi->rwsem); \ } while(0) -#define register_nfs4fs() register_filesystem(&nfs4_fs_type) -#define unregister_nfs4fs() unregister_filesystem(&nfs4_fs_type) + +static inline int register_nfs4fs(void) +{ + int ret; + + ret = nfs_register_sysctl(); + if (ret != 0) + return ret; + ret = register_filesystem(&nfs4_fs_type); + if (ret != 0) + nfs_unregister_sysctl(); + return ret; +} + +static inline void unregister_nfs4fs(void) +{ + unregister_filesystem(&nfs4_fs_type); + nfs_unregister_sysctl(); +} #else #define nfs4_init_once(nfsi) \ do { } while (0) diff --git a/fs/nfs/sysctl.c b/fs/nfs/sysctl.c new file mode 100644 index 000000000000..fdc64b59a4ee --- /dev/null +++ b/fs/nfs/sysctl.c @@ -0,0 +1,74 @@ +/* + * linux/fs/nfs/sysctl.c + * + * Sysctl interface to NFS parameters + */ +#include +#include +#include +#include +#include +#include +#include +#include + +#include "callback.h" + +static const int nfs_set_port_min = 0; +static const int nfs_set_port_max = 65535; +static struct ctl_table_header *nfs_callback_sysctl_table; +/* + * Something that isn't CTL_ANY, CTL_NONE or a value that may clash. + * Use the same values as fs/lockd/svc.c + */ +#define CTL_UNNUMBERED -2 + +static ctl_table nfs_cb_sysctls[] = { +#ifdef CONFIG_NFS_V4 + { + .ctl_name = CTL_UNNUMBERED, + .procname = "nfs_callback_tcpport", + .data = &nfs_callback_set_tcpport, + .maxlen = sizeof(int), + .mode = 0644, + .proc_handler = &proc_dointvec_minmax, + .extra1 = (int *)&nfs_set_port_min, + .extra2 = (int *)&nfs_set_port_max, + }, +#endif + { .ctl_name = 0 } +}; + +static ctl_table nfs_cb_sysctl_dir[] = { + { + .ctl_name = CTL_UNNUMBERED, + .procname = "nfs", + .mode = 0555, + .child = nfs_cb_sysctls, + }, + { .ctl_name = 0 } +}; + +static ctl_table nfs_cb_sysctl_root[] = { + { + .ctl_name = CTL_FS, + .procname = "fs", + .mode = 0555, + .child = nfs_cb_sysctl_dir, + }, + { .ctl_name = 0 } +}; + +int nfs_register_sysctl(void) +{ + nfs_callback_sysctl_table = register_sysctl_table(nfs_cb_sysctl_root, 0); + if (nfs_callback_sysctl_table == NULL) + return -ENOMEM; + return 0; +} + +void nfs_unregister_sysctl(void) +{ + unregister_sysctl_table(nfs_callback_sysctl_table); + nfs_callback_sysctl_table = NULL; +} diff --git a/include/linux/nfs_fs.h b/include/linux/nfs_fs.h index 408d82d3d97c..547d649b274e 100644 --- a/include/linux/nfs_fs.h +++ b/include/linux/nfs_fs.h @@ -391,6 +391,17 @@ extern int nfs_instantiate(struct dentry *dentry, struct nfs_fh *fh, struct nfs_ */ extern struct inode_operations nfs_symlink_inode_operations; +/* + * linux/fs/nfs/sysctl.c + */ +#ifdef CONFIG_SYSCTL +extern int nfs_register_sysctl(void); +extern void nfs_unregister_sysctl(void); +#else +#define nfs_register_sysctl() do { } while(0) +#define nfs_unregister_sysctl() do { } while(0) +#endif + /* * linux/fs/nfs/unlink.c */ -- cgit v1.2.3-71-gd317 From fb459f45f7c7689714023d41b3dca999bb90a5d3 Mon Sep 17 00:00:00 2001 From: Adrian Bunk Date: Tue, 3 Jan 2006 09:55:41 +0100 Subject: SUNRPC: net/sunrpc/xdr.c: remove xdr_decode_string() This patch removes ths unused function xdr_decode_string(). Signed-off-by: Adrian Bunk Acked-by: Neil Brown Acked-by: Charles Lever Signed-off-by: Trond Myklebust --- include/linux/sunrpc/xdr.h | 1 - net/sunrpc/xdr.c | 21 --------------------- 2 files changed, 22 deletions(-) (limited to 'include/linux') diff --git a/include/linux/sunrpc/xdr.h b/include/linux/sunrpc/xdr.h index 5676794ee34f..84c35d42d250 100644 --- a/include/linux/sunrpc/xdr.h +++ b/include/linux/sunrpc/xdr.h @@ -91,7 +91,6 @@ struct xdr_buf { u32 * xdr_encode_opaque_fixed(u32 *p, const void *ptr, unsigned int len); u32 * xdr_encode_opaque(u32 *p, const void *ptr, unsigned int len); u32 * xdr_encode_string(u32 *p, const char *s); -u32 * xdr_decode_string(u32 *p, char **sp, int *lenp, int maxlen); u32 * xdr_decode_string_inplace(u32 *p, char **sp, int *lenp, int maxlen); u32 * xdr_encode_netobj(u32 *p, const struct xdr_netobj *); u32 * xdr_decode_netobj(u32 *p, struct xdr_netobj *); diff --git a/net/sunrpc/xdr.c b/net/sunrpc/xdr.c index aaf08cdd19f0..ca4bfa57e116 100644 --- a/net/sunrpc/xdr.c +++ b/net/sunrpc/xdr.c @@ -92,27 +92,6 @@ xdr_encode_string(u32 *p, const char *string) return xdr_encode_array(p, string, strlen(string)); } -u32 * -xdr_decode_string(u32 *p, char **sp, int *lenp, int maxlen) -{ - unsigned int len; - char *string; - - if ((len = ntohl(*p++)) > maxlen) - return NULL; - if (lenp) - *lenp = len; - if ((len % 4) != 0) { - string = (char *) p; - } else { - string = (char *) (p - 1); - memmove(string, p, len); - } - string[len] = '\0'; - *sp = string; - return p + XDR_QUADLEN(len); -} - u32 * xdr_decode_string_inplace(u32 *p, char **sp, int *lenp, int maxlen) { -- cgit v1.2.3-71-gd317 From 64a318ee2af9000df482d7a125c3b3e1f1007404 Mon Sep 17 00:00:00 2001 From: "J. Bruce Fields" Date: Tue, 3 Jan 2006 09:55:46 +0100 Subject: NLM: Further cancel fixes If the server receives an NLM cancel call and finds no waiting lock to cancel, then chances are the lock has already been applied, and the client just hadn't yet processed the NLM granted callback before it sent the cancel. The Open Group text, for example, perimts a server to return either success (LCK_GRANTED) or failure (LCK_DENIED) in this case. But returning an error seems more helpful; the client may be able to use it to recognize that a race has occurred and to recover from the race. So, modify the relevant functions to return an error in this case. Signed-off-by: J. Bruce Fields Signed-off-by: Trond Myklebust --- fs/lockd/svclock.c | 15 ++++++++++----- fs/locks.c | 7 ++++++- include/linux/fs.h | 2 +- 3 files changed, 17 insertions(+), 7 deletions(-) (limited to 'include/linux') diff --git a/fs/lockd/svclock.c b/fs/lockd/svclock.c index b56d439bad82..9cfced65d4a2 100644 --- a/fs/lockd/svclock.c +++ b/fs/lockd/svclock.c @@ -227,25 +227,27 @@ failed: * It is the caller's responsibility to check whether the file * can be closed hereafter. */ -static void +static int nlmsvc_delete_block(struct nlm_block *block, int unlock) { struct file_lock *fl = &block->b_call.a_args.lock.fl; struct nlm_file *file = block->b_file; struct nlm_block **bp; + int status = 0; dprintk("lockd: deleting block %p...\n", block); /* Remove block from list */ nlmsvc_remove_block(block); - posix_unblock_lock(file->f_file, fl); + if (unlock) + status = posix_unblock_lock(file->f_file, fl); /* If the block is in the middle of a GRANT callback, * don't kill it yet. */ if (block->b_incall) { nlmsvc_insert_block(block, NLM_NEVER); block->b_done = 1; - return; + return status; } /* Remove block from file's list of blocks */ @@ -260,6 +262,7 @@ nlmsvc_delete_block(struct nlm_block *block, int unlock) nlm_release_host(block->b_host); nlmclnt_freegrantargs(&block->b_call); kfree(block); + return status; } /* @@ -270,6 +273,7 @@ int nlmsvc_traverse_blocks(struct nlm_host *host, struct nlm_file *file, int action) { struct nlm_block *block, *next; + /* XXX: Will everything get cleaned up if we don't unlock here? */ down(&file->f_sema); for (block = file->f_blocks; block; block = next) { @@ -439,6 +443,7 @@ u32 nlmsvc_cancel_blocked(struct nlm_file *file, struct nlm_lock *lock) { struct nlm_block *block; + int status = 0; dprintk("lockd: nlmsvc_cancel(%s/%ld, pi=%d, %Ld-%Ld)\n", file->f_file->f_dentry->d_inode->i_sb->s_id, @@ -449,9 +454,9 @@ nlmsvc_cancel_blocked(struct nlm_file *file, struct nlm_lock *lock) down(&file->f_sema); if ((block = nlmsvc_lookup_block(file, lock, 1)) != NULL) - nlmsvc_delete_block(block, 1); + status = nlmsvc_delete_block(block, 1); up(&file->f_sema); - return nlm_granted; + return status ? nlm_lck_denied : nlm_granted; } /* diff --git a/fs/locks.c b/fs/locks.c index 75650d52fe60..fb32d6218e21 100644 --- a/fs/locks.c +++ b/fs/locks.c @@ -1958,13 +1958,18 @@ EXPORT_SYMBOL(posix_block_lock); * * lockd needs to block waiting for locks. */ -void +int posix_unblock_lock(struct file *filp, struct file_lock *waiter) { + int status = 0; + lock_kernel(); if (waiter->fl_next) __locks_delete_block(waiter); + else + status = -ENOENT; unlock_kernel(); + return status; } EXPORT_SYMBOL(posix_unblock_lock); diff --git a/include/linux/fs.h b/include/linux/fs.h index 115e72be25d0..2c9c48d65630 100644 --- a/include/linux/fs.h +++ b/include/linux/fs.h @@ -760,7 +760,7 @@ extern struct file_lock *posix_test_lock(struct file *, struct file_lock *); extern int posix_lock_file(struct file *, struct file_lock *); extern int posix_lock_file_wait(struct file *, struct file_lock *); extern void posix_block_lock(struct file_lock *, struct file_lock *); -extern void posix_unblock_lock(struct file *, struct file_lock *); +extern int posix_unblock_lock(struct file *, struct file_lock *); extern int posix_locks_deadlock(struct file_lock *, struct file_lock *); extern int flock_lock_file_wait(struct file *filp, struct file_lock *fl); extern int __break_lease(struct inode *inode, unsigned int flags); -- cgit v1.2.3-71-gd317 From 02107148349f31eee7c0fb06fd7a880df73dbd20 Mon Sep 17 00:00:00 2001 From: Chuck Lever Date: Tue, 3 Jan 2006 09:55:49 +0100 Subject: SUNRPC: switchable buffer allocation Add RPC client transport switch support for replacing buffer management on a per-transport basis. In the current IPv4 socket transport implementation, RPC buffers are allocated as needed for each RPC message that is sent. Some transport implementations may choose to use pre-allocated buffers for encoding, sending, receiving, and unmarshalling RPC messages, however. For transports capable of direct data placement, the buffers can be carved out of a pre-registered area of memory rather than from a slab cache. Test-plan: Millions of fsx operations. Performance characterization with "sio" and "iozone". Use oprofile and other tools to look for significant regression in CPU utilization. Signed-off-by: Chuck Lever Signed-off-by: Trond Myklebust --- include/linux/sunrpc/sched.h | 3 +-- include/linux/sunrpc/xprt.h | 10 ++++----- net/sunrpc/clnt.c | 14 +++++++------ net/sunrpc/sched.c | 50 ++++++++++++++++++++++++-------------------- net/sunrpc/xprt.c | 3 +++ net/sunrpc/xprtsock.c | 5 +++++ 6 files changed, 49 insertions(+), 36 deletions(-) (limited to 'include/linux') diff --git a/include/linux/sunrpc/sched.h b/include/linux/sunrpc/sched.h index 94b0afa4ab05..8b25629accd8 100644 --- a/include/linux/sunrpc/sched.h +++ b/include/linux/sunrpc/sched.h @@ -52,8 +52,6 @@ struct rpc_task { * RPC call state */ struct rpc_message tk_msg; /* RPC call info */ - __u32 * tk_buffer; /* XDR buffer */ - size_t tk_bufsize; __u8 tk_garb_retry; __u8 tk_cred_retry; @@ -268,6 +266,7 @@ struct rpc_task *rpc_wake_up_next(struct rpc_wait_queue *); void rpc_wake_up_status(struct rpc_wait_queue *, int); void rpc_delay(struct rpc_task *, unsigned long); void * rpc_malloc(struct rpc_task *, size_t); +void rpc_free(struct rpc_task *); int rpciod_up(void); void rpciod_down(void); void rpciod_wake_up(void); diff --git a/include/linux/sunrpc/xprt.h b/include/linux/sunrpc/xprt.h index 3b8b6e823c70..7885b9621ce3 100644 --- a/include/linux/sunrpc/xprt.h +++ b/include/linux/sunrpc/xprt.h @@ -79,21 +79,19 @@ struct rpc_rqst { void (*rq_release_snd_buf)(struct rpc_rqst *); /* release rq_enc_pages */ struct list_head rq_list; + __u32 * rq_buffer; /* XDR encode buffer */ + size_t rq_bufsize; + struct xdr_buf rq_private_buf; /* The receive buffer * used in the softirq. */ unsigned long rq_majortimeo; /* major timeout alarm */ unsigned long rq_timeout; /* Current timeout value */ unsigned int rq_retries; /* # of retries */ - /* - * For authentication (e.g. auth_des) - */ - u32 rq_creddata[2]; /* * Partial send handling */ - u32 rq_bytes_sent; /* Bytes we have sent */ unsigned long rq_xtime; /* when transmitted */ @@ -107,6 +105,8 @@ struct rpc_xprt_ops { int (*reserve_xprt)(struct rpc_task *task); void (*release_xprt)(struct rpc_xprt *xprt, struct rpc_task *task); void (*connect)(struct rpc_task *task); + void * (*buf_alloc)(struct rpc_task *task, size_t size); + void (*buf_free)(struct rpc_task *task); int (*send_request)(struct rpc_task *task); void (*set_retrans_timeout)(struct rpc_task *task); void (*timer)(struct rpc_task *task); diff --git a/net/sunrpc/clnt.c b/net/sunrpc/clnt.c index b23c0d328c9c..25cba94c5683 100644 --- a/net/sunrpc/clnt.c +++ b/net/sunrpc/clnt.c @@ -644,24 +644,26 @@ call_reserveresult(struct rpc_task *task) /* * 2. Allocate the buffer. For details, see sched.c:rpc_malloc. - * (Note: buffer memory is freed in rpc_task_release). + * (Note: buffer memory is freed in xprt_release). */ static void call_allocate(struct rpc_task *task) { + struct rpc_rqst *req = task->tk_rqstp; + struct rpc_xprt *xprt = task->tk_xprt; unsigned int bufsiz; dprintk("RPC: %4d call_allocate (status %d)\n", task->tk_pid, task->tk_status); task->tk_action = call_bind; - if (task->tk_buffer) + if (req->rq_buffer) return; /* FIXME: compute buffer requirements more exactly using * auth->au_wslack */ bufsiz = task->tk_msg.rpc_proc->p_bufsiz + RPC_SLACK_SPACE; - if (rpc_malloc(task, bufsiz << 1) != NULL) + if (xprt->ops->buf_alloc(task, bufsiz << 1) != NULL) return; printk(KERN_INFO "RPC: buffer allocation failed for task %p\n", task); @@ -704,14 +706,14 @@ call_encode(struct rpc_task *task) task->tk_pid, task->tk_status); /* Default buffer setup */ - bufsiz = task->tk_bufsize >> 1; - sndbuf->head[0].iov_base = (void *)task->tk_buffer; + bufsiz = req->rq_bufsize >> 1; + sndbuf->head[0].iov_base = (void *)req->rq_buffer; sndbuf->head[0].iov_len = bufsiz; sndbuf->tail[0].iov_len = 0; sndbuf->page_len = 0; sndbuf->len = 0; sndbuf->buflen = bufsiz; - rcvbuf->head[0].iov_base = (void *)((char *)task->tk_buffer + bufsiz); + rcvbuf->head[0].iov_base = (void *)((char *)req->rq_buffer + bufsiz); rcvbuf->head[0].iov_len = bufsiz; rcvbuf->tail[0].iov_len = 0; rcvbuf->page_len = 0; diff --git a/net/sunrpc/sched.c b/net/sunrpc/sched.c index 48510e3ffa02..7415406aa1ae 100644 --- a/net/sunrpc/sched.c +++ b/net/sunrpc/sched.c @@ -41,8 +41,6 @@ static mempool_t *rpc_buffer_mempool __read_mostly; static void __rpc_default_timer(struct rpc_task *task); static void rpciod_killall(void); -static void rpc_free(struct rpc_task *task); - static void rpc_async_schedule(void *); /* @@ -599,7 +597,6 @@ void rpc_exit_task(struct rpc_task *task) WARN_ON(RPC_ASSASSINATED(task)); /* Always release the RPC slot and buffer memory */ xprt_release(task); - rpc_free(task); } } } @@ -724,17 +721,19 @@ static void rpc_async_schedule(void *arg) __rpc_execute((struct rpc_task *)arg); } -/* - * Allocate memory for RPC purposes. +/** + * rpc_malloc - allocate an RPC buffer + * @task: RPC task that will use this buffer + * @size: requested byte size * * We try to ensure that some NFS reads and writes can always proceed * by using a mempool when allocating 'small' buffers. * In order to avoid memory starvation triggering more writebacks of * NFS requests, we use GFP_NOFS rather than GFP_KERNEL. */ -void * -rpc_malloc(struct rpc_task *task, size_t size) +void * rpc_malloc(struct rpc_task *task, size_t size) { + struct rpc_rqst *req = task->tk_rqstp; gfp_t gfp; if (task->tk_flags & RPC_TASK_SWAPPER) @@ -743,27 +742,33 @@ rpc_malloc(struct rpc_task *task, size_t size) gfp = GFP_NOFS; if (size > RPC_BUFFER_MAXSIZE) { - task->tk_buffer = kmalloc(size, gfp); - if (task->tk_buffer) - task->tk_bufsize = size; + req->rq_buffer = kmalloc(size, gfp); + if (req->rq_buffer) + req->rq_bufsize = size; } else { - task->tk_buffer = mempool_alloc(rpc_buffer_mempool, gfp); - if (task->tk_buffer) - task->tk_bufsize = RPC_BUFFER_MAXSIZE; + req->rq_buffer = mempool_alloc(rpc_buffer_mempool, gfp); + if (req->rq_buffer) + req->rq_bufsize = RPC_BUFFER_MAXSIZE; } - return task->tk_buffer; + return req->rq_buffer; } -static void -rpc_free(struct rpc_task *task) +/** + * rpc_free - free buffer allocated via rpc_malloc + * @task: RPC task with a buffer to be freed + * + */ +void rpc_free(struct rpc_task *task) { - if (task->tk_buffer) { - if (task->tk_bufsize == RPC_BUFFER_MAXSIZE) - mempool_free(task->tk_buffer, rpc_buffer_mempool); + struct rpc_rqst *req = task->tk_rqstp; + + if (req->rq_buffer) { + if (req->rq_bufsize == RPC_BUFFER_MAXSIZE) + mempool_free(req->rq_buffer, rpc_buffer_mempool); else - kfree(task->tk_buffer); - task->tk_buffer = NULL; - task->tk_bufsize = 0; + kfree(req->rq_buffer); + req->rq_buffer = NULL; + req->rq_bufsize = 0; } } @@ -887,7 +892,6 @@ void rpc_release_task(struct rpc_task *task) xprt_release(task); if (task->tk_msg.rpc_cred) rpcauth_unbindcred(task); - rpc_free(task); if (task->tk_client) { rpc_release_client(task->tk_client); task->tk_client = NULL; diff --git a/net/sunrpc/xprt.c b/net/sunrpc/xprt.c index 6dda3860351f..069a6cbd49ea 100644 --- a/net/sunrpc/xprt.c +++ b/net/sunrpc/xprt.c @@ -838,6 +838,8 @@ static void xprt_request_init(struct rpc_task *task, struct rpc_xprt *xprt) req->rq_timeout = xprt->timeout.to_initval; req->rq_task = task; req->rq_xprt = xprt; + req->rq_buffer = NULL; + req->rq_bufsize = 0; req->rq_xid = xprt_alloc_xid(xprt); req->rq_release_snd_buf = NULL; dprintk("RPC: %4d reserved req %p xid %08x\n", task->tk_pid, @@ -867,6 +869,7 @@ void xprt_release(struct rpc_task *task) mod_timer(&xprt->timer, xprt->last_used + xprt->idle_timeout); spin_unlock_bh(&xprt->transport_lock); + xprt->ops->buf_free(task); task->tk_rqstp = NULL; if (req->rq_release_snd_buf) req->rq_release_snd_buf(req); diff --git a/net/sunrpc/xprtsock.c b/net/sunrpc/xprtsock.c index 77e8800d4127..51f07c9a751b 100644 --- a/net/sunrpc/xprtsock.c +++ b/net/sunrpc/xprtsock.c @@ -28,6 +28,7 @@ #include #include #include +#include #include #include @@ -1161,6 +1162,8 @@ static struct rpc_xprt_ops xs_udp_ops = { .reserve_xprt = xprt_reserve_xprt_cong, .release_xprt = xprt_release_xprt_cong, .connect = xs_connect, + .buf_alloc = rpc_malloc, + .buf_free = rpc_free, .send_request = xs_udp_send_request, .set_retrans_timeout = xprt_set_retrans_timeout_rtt, .timer = xs_udp_timer, @@ -1173,6 +1176,8 @@ static struct rpc_xprt_ops xs_tcp_ops = { .reserve_xprt = xprt_reserve_xprt, .release_xprt = xprt_release_xprt, .connect = xs_connect, + .buf_alloc = rpc_malloc, + .buf_free = rpc_free, .send_request = xs_tcp_send_request, .set_retrans_timeout = xprt_set_retrans_timeout_def, .close = xs_close, -- cgit v1.2.3-71-gd317 From 35f5a422ce1af836007f811b613c440d0e348e06 Mon Sep 17 00:00:00 2001 From: Chuck Lever Date: Tue, 3 Jan 2006 09:55:50 +0100 Subject: SUNRPC: new interface to force an RPC rebind We'd like to hide fields in rpc_xprt and rpc_clnt from upper layer protocols. Start by creating an API to force RPC rebind, replacing logic that simply sets cl_port to zero. Test-plan: Destructive testing (unplugging the network temporarily). Connectathon with UDP and TCP. NFSv2/3 and NFSv4 mounting should be carefully checked. Probably need to rig a server where certain services aren't running, or that returns an error for some typical operation. Signed-off-by: Chuck Lever Signed-off-by: Trond Myklebust --- fs/lockd/host.c | 4 ++-- include/linux/sunrpc/clnt.h | 1 + net/sunrpc/clnt.c | 21 +++++++++++++++------ 3 files changed, 18 insertions(+), 8 deletions(-) (limited to 'include/linux') diff --git a/fs/lockd/host.c b/fs/lockd/host.c index c4c8601096e0..82f7a0b1d8ae 100644 --- a/fs/lockd/host.c +++ b/fs/lockd/host.c @@ -177,7 +177,7 @@ nlm_bind_host(struct nlm_host *host) if ((clnt = host->h_rpcclnt) != NULL) { xprt = clnt->cl_xprt; if (time_after_eq(jiffies, host->h_nextrebind)) { - clnt->cl_port = 0; + rpc_force_rebind(clnt); host->h_nextrebind = jiffies + NLM_HOST_REBIND; dprintk("lockd: next rebind in %ld jiffies\n", host->h_nextrebind - jiffies); @@ -217,7 +217,7 @@ nlm_rebind_host(struct nlm_host *host) { dprintk("lockd: rebind host %s\n", host->h_name); if (host->h_rpcclnt && time_after_eq(jiffies, host->h_nextrebind)) { - host->h_rpcclnt->cl_port = 0; + rpc_force_rebind(host->h_rpcclnt); host->h_nextrebind = jiffies + NLM_HOST_REBIND; } } diff --git a/include/linux/sunrpc/clnt.h b/include/linux/sunrpc/clnt.h index b0ab959eca65..3d605765f84b 100644 --- a/include/linux/sunrpc/clnt.h +++ b/include/linux/sunrpc/clnt.h @@ -135,6 +135,7 @@ void rpc_clnt_sigmask(struct rpc_clnt *clnt, sigset_t *oldset); void rpc_clnt_sigunmask(struct rpc_clnt *clnt, sigset_t *oldset); void rpc_setbufsize(struct rpc_clnt *, unsigned int, unsigned int); size_t rpc_max_payload(struct rpc_clnt *); +void rpc_force_rebind(struct rpc_clnt *); int rpc_ping(struct rpc_clnt *clnt, int flags); static __inline__ diff --git a/net/sunrpc/clnt.c b/net/sunrpc/clnt.c index 25cba94c5683..2789d3083fe7 100644 --- a/net/sunrpc/clnt.c +++ b/net/sunrpc/clnt.c @@ -538,6 +538,18 @@ size_t rpc_max_payload(struct rpc_clnt *clnt) } EXPORT_SYMBOL(rpc_max_payload); +/** + * rpc_force_rebind - force transport to check that remote port is unchanged + * @clnt: client to rebind + * + */ +void rpc_force_rebind(struct rpc_clnt *clnt) +{ + if (clnt->cl_autobind) + clnt->cl_port = 0; +} +EXPORT_SYMBOL(rpc_force_rebind); + /* * Restart an (async) RPC call. Usually called from within the * exit handler. @@ -853,8 +865,7 @@ call_connect_status(struct rpc_task *task) } /* Something failed: remote service port may have changed */ - if (clnt->cl_autobind) - clnt->cl_port = 0; + rpc_force_rebind(clnt); switch (status) { case -ENOTCONN: @@ -935,8 +946,7 @@ call_status(struct rpc_task *task) break; case -ECONNREFUSED: case -ENOTCONN: - if (clnt->cl_autobind) - clnt->cl_port = 0; + rpc_force_rebind(clnt); task->tk_action = call_bind; break; case -EAGAIN: @@ -995,8 +1005,7 @@ call_timeout(struct rpc_task *task) printk(KERN_NOTICE "%s: server %s not responding, still trying\n", clnt->cl_protname, clnt->cl_server); } - if (clnt->cl_autobind) - clnt->cl_port = 0; + rpc_force_rebind(clnt); retry: clnt->cl_stats->rpcretrans++; -- cgit v1.2.3-71-gd317 From 922004120b10dcb0ce04b55014168e8a7a8c1a0e Mon Sep 17 00:00:00 2001 From: Chuck Lever Date: Tue, 3 Jan 2006 09:55:51 +0100 Subject: SUNRPC: transport switch API for setting port number At some point, transport endpoint addresses will no longer be IPv4. To hide the structure of the rpc_xprt's address field from ULPs and port mappers, add an API for setting the port number during an RPC bind operation. Test-plan: Destructive testing (unplugging the network temporarily). Connectathon with UDP and TCP. NFSv2/3 and NFSv4 mounting should be carefully checked. Probably need to rig a server where certain services aren't running, or that returns an error for some typical operation. Signed-off-by: Chuck Lever Signed-off-by: Trond Myklebust --- include/linux/sunrpc/xprt.h | 1 + net/sunrpc/pmap_clnt.c | 8 +++++--- net/sunrpc/xprtsock.c | 14 ++++++++++++++ 3 files changed, 20 insertions(+), 3 deletions(-) (limited to 'include/linux') diff --git a/include/linux/sunrpc/xprt.h b/include/linux/sunrpc/xprt.h index 7885b9621ce3..dd860128ceda 100644 --- a/include/linux/sunrpc/xprt.h +++ b/include/linux/sunrpc/xprt.h @@ -104,6 +104,7 @@ struct rpc_xprt_ops { void (*set_buffer_size)(struct rpc_xprt *xprt, size_t sndsize, size_t rcvsize); int (*reserve_xprt)(struct rpc_task *task); void (*release_xprt)(struct rpc_xprt *xprt, struct rpc_task *task); + void (*set_port)(struct rpc_xprt *xprt, unsigned short port); void (*connect)(struct rpc_task *task); void * (*buf_alloc)(struct rpc_task *task, size_t size); void (*buf_free)(struct rpc_task *task); diff --git a/net/sunrpc/pmap_clnt.c b/net/sunrpc/pmap_clnt.c index cad4568fbbe2..0935adb91b3c 100644 --- a/net/sunrpc/pmap_clnt.c +++ b/net/sunrpc/pmap_clnt.c @@ -131,10 +131,13 @@ static void pmap_getport_done(struct rpc_task *task) { struct rpc_clnt *clnt = task->tk_client; + struct rpc_xprt *xprt = task->tk_xprt; struct rpc_portmap *map = clnt->cl_pmap; dprintk("RPC: %4d pmap_getport_done(status %d, port %d)\n", task->tk_pid, task->tk_status, clnt->cl_port); + + xprt->ops->set_port(xprt, 0); if (task->tk_status < 0) { /* Make the calling task exit with an error */ task->tk_action = rpc_exit_task; @@ -142,9 +145,8 @@ pmap_getport_done(struct rpc_task *task) /* Program not registered */ rpc_exit(task, -EACCES); } else { - /* byte-swap port number first */ + xprt->ops->set_port(xprt, clnt->cl_port); clnt->cl_port = htons(clnt->cl_port); - clnt->cl_xprt->addr.sin_port = clnt->cl_port; } spin_lock(&pmap_lock); map->pm_binding = 0; @@ -205,7 +207,7 @@ pmap_create(char *hostname, struct sockaddr_in *srvaddr, int proto, int privileg xprt = xprt_create_proto(proto, srvaddr, NULL); if (IS_ERR(xprt)) return (struct rpc_clnt *)xprt; - xprt->addr.sin_port = htons(RPC_PMAP_PORT); + xprt->ops->set_port(xprt, RPC_PMAP_PORT); if (!privileged) xprt->resvport = 0; diff --git a/net/sunrpc/xprtsock.c b/net/sunrpc/xprtsock.c index 51f07c9a751b..3e8893001479 100644 --- a/net/sunrpc/xprtsock.c +++ b/net/sunrpc/xprtsock.c @@ -921,6 +921,18 @@ static void xs_udp_timer(struct rpc_task *task) xprt_adjust_cwnd(task, -ETIMEDOUT); } +/** + * xs_set_port - reset the port number in the remote endpoint address + * @xprt: generic transport + * @port: new port number + * + */ +static void xs_set_port(struct rpc_xprt *xprt, unsigned short port) +{ + dprintk("RPC: setting port for xprt %p to %u\n", xprt, port); + xprt->addr.sin_port = htons(port); +} + static int xs_bindresvport(struct rpc_xprt *xprt, struct socket *sock) { struct sockaddr_in myaddr = { @@ -1161,6 +1173,7 @@ static struct rpc_xprt_ops xs_udp_ops = { .set_buffer_size = xs_udp_set_buffer_size, .reserve_xprt = xprt_reserve_xprt_cong, .release_xprt = xprt_release_xprt_cong, + .set_port = xs_set_port, .connect = xs_connect, .buf_alloc = rpc_malloc, .buf_free = rpc_free, @@ -1175,6 +1188,7 @@ static struct rpc_xprt_ops xs_udp_ops = { static struct rpc_xprt_ops xs_tcp_ops = { .reserve_xprt = xprt_reserve_xprt, .release_xprt = xprt_release_xprt, + .set_port = xs_set_port, .connect = xs_connect, .buf_alloc = rpc_malloc, .buf_free = rpc_free, -- cgit v1.2.3-71-gd317 From f518e35aec984036903c1003e867f833747a9d79 Mon Sep 17 00:00:00 2001 From: Chuck Lever Date: Tue, 3 Jan 2006 09:55:52 +0100 Subject: SUNRPC: get rid of cl_chatty Clean up: Every ULP that uses the in-kernel RPC client, except the NLM client, sets cl_chatty. There's no reason why NLM shouldn't set it, so just get rid of cl_chatty and always be verbose. Test-plan: Compile with CONFIG_NFS enabled. Signed-off-by: Chuck Lever Signed-off-by: Trond Myklebust --- fs/lockd/clntproc.c | 3 +-- fs/lockd/mon.c | 1 - fs/nfs/inode.c | 2 -- fs/nfs/mount_clnt.c | 1 - fs/nfsd/nfs4callback.c | 1 - include/linux/sunrpc/clnt.h | 1 - net/sunrpc/clnt.c | 10 ++++------ net/sunrpc/pmap_clnt.c | 1 - 8 files changed, 5 insertions(+), 15 deletions(-) (limited to 'include/linux') diff --git a/fs/lockd/clntproc.c b/fs/lockd/clntproc.c index 816333cd377b..145524039577 100644 --- a/fs/lockd/clntproc.c +++ b/fs/lockd/clntproc.c @@ -222,8 +222,7 @@ nlmclnt_proc(struct inode *inode, int cmd, struct file_lock *fl) goto done; } clnt->cl_softrtry = nfssrv->client->cl_softrtry; - clnt->cl_intr = nfssrv->client->cl_intr; - clnt->cl_chatty = nfssrv->client->cl_chatty; + clnt->cl_intr = nfssrv->client->cl_intr; } /* Keep the old signal mask */ diff --git a/fs/lockd/mon.c b/fs/lockd/mon.c index 2d144abe84ad..0edc03e67966 100644 --- a/fs/lockd/mon.c +++ b/fs/lockd/mon.c @@ -123,7 +123,6 @@ nsm_create(void) if (IS_ERR(clnt)) goto out_err; clnt->cl_softrtry = 1; - clnt->cl_chatty = 1; clnt->cl_oneshot = 1; return clnt; diff --git a/fs/nfs/inode.c b/fs/nfs/inode.c index 648cb1aef3b1..4625479a6b62 100644 --- a/fs/nfs/inode.c +++ b/fs/nfs/inode.c @@ -413,7 +413,6 @@ nfs_create_client(struct nfs_server *server, const struct nfs_mount_data *data) clnt->cl_intr = 1; clnt->cl_softrtry = 1; - clnt->cl_chatty = 1; return clnt; @@ -1838,7 +1837,6 @@ static int nfs4_fill_super(struct super_block *sb, struct nfs4_mount_data *data, } clnt->cl_intr = 1; clnt->cl_softrtry = 1; - clnt->cl_chatty = 1; clp->cl_rpcclient = clnt; memcpy(clp->cl_ipaddr, server->ip_addr, sizeof(clp->cl_ipaddr)); nfs_idmap_new(clp); diff --git a/fs/nfs/mount_clnt.c b/fs/nfs/mount_clnt.c index 0e82617f2de0..db99b8f678f8 100644 --- a/fs/nfs/mount_clnt.c +++ b/fs/nfs/mount_clnt.c @@ -82,7 +82,6 @@ mnt_create(char *hostname, struct sockaddr_in *srvaddr, int version, RPC_AUTH_UNIX); if (!IS_ERR(clnt)) { clnt->cl_softrtry = 1; - clnt->cl_chatty = 1; clnt->cl_oneshot = 1; clnt->cl_intr = 1; } diff --git a/fs/nfsd/nfs4callback.c b/fs/nfsd/nfs4callback.c index cf92008f219a..d828662d737d 100644 --- a/fs/nfsd/nfs4callback.c +++ b/fs/nfsd/nfs4callback.c @@ -431,7 +431,6 @@ nfsd4_probe_callback(struct nfs4_client *clp) } clnt->cl_intr = 0; clnt->cl_softrtry = 1; - clnt->cl_chatty = 1; /* Kick rpciod, put the call on the wire. */ diff --git a/include/linux/sunrpc/clnt.h b/include/linux/sunrpc/clnt.h index 3d605765f84b..f147e6b84332 100644 --- a/include/linux/sunrpc/clnt.h +++ b/include/linux/sunrpc/clnt.h @@ -49,7 +49,6 @@ struct rpc_clnt { unsigned int cl_softrtry : 1,/* soft timeouts */ cl_intr : 1,/* interruptible */ - cl_chatty : 1,/* be verbose */ cl_autobind : 1,/* use getport() */ cl_oneshot : 1,/* dispose after use */ cl_dead : 1;/* abandoned */ diff --git a/net/sunrpc/clnt.c b/net/sunrpc/clnt.c index 2789d3083fe7..5530ac8c6df9 100644 --- a/net/sunrpc/clnt.c +++ b/net/sunrpc/clnt.c @@ -957,8 +957,7 @@ call_status(struct rpc_task *task) rpc_exit(task, status); break; default: - if (clnt->cl_chatty) - printk("%s: RPC call returned error %d\n", + printk("%s: RPC call returned error %d\n", clnt->cl_protname, -status); rpc_exit(task, status); break; @@ -993,14 +992,13 @@ call_timeout(struct rpc_task *task) dprintk("RPC: %4d call_timeout (major)\n", task->tk_pid); if (RPC_IS_SOFT(task)) { - if (clnt->cl_chatty) - printk(KERN_NOTICE "%s: server %s not responding, timed out\n", + printk(KERN_NOTICE "%s: server %s not responding, timed out\n", clnt->cl_protname, clnt->cl_server); rpc_exit(task, -EIO); return; } - if (clnt->cl_chatty && !(task->tk_flags & RPC_CALL_MAJORSEEN)) { + if (!(task->tk_flags & RPC_CALL_MAJORSEEN)) { task->tk_flags |= RPC_CALL_MAJORSEEN; printk(KERN_NOTICE "%s: server %s not responding, still trying\n", clnt->cl_protname, clnt->cl_server); @@ -1027,7 +1025,7 @@ call_decode(struct rpc_task *task) dprintk("RPC: %4d call_decode (status %d)\n", task->tk_pid, task->tk_status); - if (clnt->cl_chatty && (task->tk_flags & RPC_CALL_MAJORSEEN)) { + if (task->tk_flags & RPC_CALL_MAJORSEEN) { printk(KERN_NOTICE "%s: server %s OK\n", clnt->cl_protname, clnt->cl_server); task->tk_flags &= ~RPC_CALL_MAJORSEEN; diff --git a/net/sunrpc/pmap_clnt.c b/net/sunrpc/pmap_clnt.c index 0935adb91b3c..8139ce68e915 100644 --- a/net/sunrpc/pmap_clnt.c +++ b/net/sunrpc/pmap_clnt.c @@ -217,7 +217,6 @@ pmap_create(char *hostname, struct sockaddr_in *srvaddr, int proto, int privileg RPC_AUTH_UNIX); if (!IS_ERR(clnt)) { clnt->cl_softrtry = 1; - clnt->cl_chatty = 1; clnt->cl_oneshot = 1; } return clnt; -- cgit v1.2.3-71-gd317 From 632e3bdc5006334cea894d078660b691685e1075 Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Tue, 3 Jan 2006 09:55:55 +0100 Subject: SUNRPC: Ensure client closes the socket when server initiates a close If the server decides to close the RPC socket, we currently don't actually respond until either another RPC call is scheduled, or until xprt_autoclose() gets called by the socket expiry timer (which may be up to 5 minutes later). This patch ensures that xprt_autoclose() is called much sooner if the server closes the socket. Signed-off-by: Trond Myklebust --- include/linux/sunrpc/xprt.h | 1 + net/sunrpc/xprt.c | 33 ++++++++++++++++----------------- net/sunrpc/xprtsock.c | 12 ++++++++++-- 3 files changed, 27 insertions(+), 19 deletions(-) (limited to 'include/linux') diff --git a/include/linux/sunrpc/xprt.h b/include/linux/sunrpc/xprt.h index dd860128ceda..6ef99b14ff09 100644 --- a/include/linux/sunrpc/xprt.h +++ b/include/linux/sunrpc/xprt.h @@ -254,6 +254,7 @@ int xs_setup_tcp(struct rpc_xprt *xprt, struct rpc_timeout *to); #define XPRT_LOCKED (0) #define XPRT_CONNECTED (1) #define XPRT_CONNECTING (2) +#define XPRT_CLOSE_WAIT (3) static inline void xprt_set_connected(struct rpc_xprt *xprt) { diff --git a/net/sunrpc/xprt.c b/net/sunrpc/xprt.c index 069a6cbd49ea..8bc0d5acf0da 100644 --- a/net/sunrpc/xprt.c +++ b/net/sunrpc/xprt.c @@ -119,6 +119,17 @@ out_sleep: return 0; } +static void xprt_clear_locked(struct rpc_xprt *xprt) +{ + xprt->snd_task = NULL; + if (!test_bit(XPRT_CLOSE_WAIT, &xprt->state) || xprt->shutdown) { + smp_mb__before_clear_bit(); + clear_bit(XPRT_LOCKED, &xprt->state); + smp_mb__after_clear_bit(); + } else + schedule_work(&xprt->task_cleanup); +} + /* * xprt_reserve_xprt_cong - serialize write access to transports * @task: task that is requesting access to the transport @@ -145,9 +156,7 @@ int xprt_reserve_xprt_cong(struct rpc_task *task) } return 1; } - smp_mb__before_clear_bit(); - clear_bit(XPRT_LOCKED, &xprt->state); - smp_mb__after_clear_bit(); + xprt_clear_locked(xprt); out_sleep: dprintk("RPC: %4d failed to lock transport %p\n", task->tk_pid, xprt); task->tk_timeout = 0; @@ -193,9 +202,7 @@ static void __xprt_lock_write_next(struct rpc_xprt *xprt) return; out_unlock: - smp_mb__before_clear_bit(); - clear_bit(XPRT_LOCKED, &xprt->state); - smp_mb__after_clear_bit(); + xprt_clear_locked(xprt); } static void __xprt_lock_write_next_cong(struct rpc_xprt *xprt) @@ -222,9 +229,7 @@ static void __xprt_lock_write_next_cong(struct rpc_xprt *xprt) return; } out_unlock: - smp_mb__before_clear_bit(); - clear_bit(XPRT_LOCKED, &xprt->state); - smp_mb__after_clear_bit(); + xprt_clear_locked(xprt); } /** @@ -237,10 +242,7 @@ out_unlock: void xprt_release_xprt(struct rpc_xprt *xprt, struct rpc_task *task) { if (xprt->snd_task == task) { - xprt->snd_task = NULL; - smp_mb__before_clear_bit(); - clear_bit(XPRT_LOCKED, &xprt->state); - smp_mb__after_clear_bit(); + xprt_clear_locked(xprt); __xprt_lock_write_next(xprt); } } @@ -256,10 +258,7 @@ void xprt_release_xprt(struct rpc_xprt *xprt, struct rpc_task *task) void xprt_release_xprt_cong(struct rpc_xprt *xprt, struct rpc_task *task) { if (xprt->snd_task == task) { - xprt->snd_task = NULL; - smp_mb__before_clear_bit(); - clear_bit(XPRT_LOCKED, &xprt->state); - smp_mb__after_clear_bit(); + xprt_clear_locked(xprt); __xprt_lock_write_next_cong(xprt); } } diff --git a/net/sunrpc/xprtsock.c b/net/sunrpc/xprtsock.c index 3e8893001479..c458f8d1d6d1 100644 --- a/net/sunrpc/xprtsock.c +++ b/net/sunrpc/xprtsock.c @@ -425,7 +425,7 @@ static void xs_close(struct rpc_xprt *xprt) struct sock *sk = xprt->inet; if (!sk) - return; + goto clear_close_wait; dprintk("RPC: xs_close xprt %p\n", xprt); @@ -442,6 +442,10 @@ static void xs_close(struct rpc_xprt *xprt) sk->sk_no_check = 0; sock_release(sock); +clear_close_wait: + smp_mb__before_clear_bit(); + clear_bit(XPRT_CLOSE_WAIT, &xprt->state); + smp_mb__after_clear_bit(); } /** @@ -801,9 +805,13 @@ static void xs_tcp_state_change(struct sock *sk) case TCP_SYN_SENT: case TCP_SYN_RECV: break; + case TCP_CLOSE_WAIT: + /* Try to schedule an autoclose RPC calls */ + set_bit(XPRT_CLOSE_WAIT, &xprt->state); + if (test_and_set_bit(XPRT_LOCKED, &xprt->state) == 0) + schedule_work(&xprt->task_cleanup); default: xprt_disconnect(xprt); - break; } out: read_unlock(&sk->sk_callback_lock); -- cgit v1.2.3-71-gd317 From 58df095b732529ade8f4051b41d7c29731afecd6 Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Tue, 3 Jan 2006 09:55:57 +0100 Subject: NFSv4: Allow entries in the idmap cache to expire If someone changes the uid/gid mapping in userland, then we do eventually want those changes to be propagated to the kernel. Currently the kernel assumes that it may cache entries forever. Add an expiration time + garbage collector for idmap entries. Signed-off-by: Trond Myklebust --- Documentation/kernel-parameters.txt | 4 ++++ fs/nfs/idmap.c | 9 +++++++++ fs/nfs/inode.c | 14 ++++++++++++++ fs/nfs/sysctl.c | 10 ++++++++++ include/linux/nfs_idmap.h | 2 ++ 5 files changed, 39 insertions(+) (limited to 'include/linux') diff --git a/Documentation/kernel-parameters.txt b/Documentation/kernel-parameters.txt index 309c9cec6e7c..a482fde09bbb 100644 --- a/Documentation/kernel-parameters.txt +++ b/Documentation/kernel-parameters.txt @@ -914,6 +914,10 @@ running once the system is up. [NFS] set the TCP port on which the NFSv4 callback channel should listen. + nfs.idmap_cache_timeout= + [NFS] set the maximum lifetime for idmapper cache + entries. + nmi_watchdog= [KNL,BUGS=IA-32] Debugging features for SMP kernels no387 [BUGS=IA-32] Tells the kernel to use the 387 maths diff --git a/fs/nfs/idmap.c b/fs/nfs/idmap.c index ffb8df91dc34..821edd30333b 100644 --- a/fs/nfs/idmap.c +++ b/fs/nfs/idmap.c @@ -54,7 +54,11 @@ #define IDMAP_HASH_SZ 128 +/* Default cache timeout is 10 minutes */ +unsigned int nfs_idmap_cache_timeout = 600 * HZ; + struct idmap_hashent { + unsigned long ih_expires; __u32 ih_id; int ih_namelen; char ih_name[IDMAP_NAMESZ]; @@ -149,6 +153,8 @@ idmap_lookup_name(struct idmap_hashtable *h, const char *name, size_t len) if (he->ih_namelen != len || memcmp(he->ih_name, name, len) != 0) return NULL; + if (time_after(jiffies, he->ih_expires)) + return NULL; return he; } @@ -164,6 +170,8 @@ idmap_lookup_id(struct idmap_hashtable *h, __u32 id) struct idmap_hashent *he = idmap_id_hash(h, id); if (he->ih_id != id || he->ih_namelen == 0) return NULL; + if (time_after(jiffies, he->ih_expires)) + return NULL; return he; } @@ -192,6 +200,7 @@ idmap_update_entry(struct idmap_hashent *he, const char *name, memcpy(he->ih_name, name, namelen); he->ih_name[namelen] = '\0'; he->ih_namelen = namelen; + he->ih_expires = jiffies + nfs_idmap_cache_timeout; } /* diff --git a/fs/nfs/inode.c b/fs/nfs/inode.c index 4625479a6b62..e7bd0d92600f 100644 --- a/fs/nfs/inode.c +++ b/fs/nfs/inode.c @@ -2050,6 +2050,20 @@ static int param_set_port(const char *val, struct kernel_param *kp) module_param_call(callback_tcpport, param_set_port, param_get_int, &nfs_callback_set_tcpport, 0644); +static int param_set_idmap_timeout(const char *val, struct kernel_param *kp) +{ + char *endp; + int num = simple_strtol(val, &endp, 0); + int jif = num * HZ; + if (endp == val || *endp || num < 0 || jif < num) + return -EINVAL; + *((int *)kp->arg) = jif; + return 0; +} + +module_param_call(idmap_cache_timeout, param_set_idmap_timeout, param_get_int, + &nfs_idmap_cache_timeout, 0644); + #define nfs4_init_once(nfsi) \ do { \ INIT_LIST_HEAD(&(nfsi)->open_states); \ diff --git a/fs/nfs/sysctl.c b/fs/nfs/sysctl.c index fdc64b59a4ee..4c486eb867ca 100644 --- a/fs/nfs/sysctl.c +++ b/fs/nfs/sysctl.c @@ -11,6 +11,7 @@ #include #include #include +#include #include "callback.h" @@ -35,6 +36,15 @@ static ctl_table nfs_cb_sysctls[] = { .extra1 = (int *)&nfs_set_port_min, .extra2 = (int *)&nfs_set_port_max, }, + { + .ctl_name = CTL_UNNUMBERED, + .procname = "idmap_cache_timeout", + .data = &nfs_idmap_cache_timeout, + .maxlen = sizeof(int), + .mode = 0644, + .proc_handler = &proc_dointvec_jiffies, + .strategy = &sysctl_jiffies, + }, #endif { .ctl_name = 0 } }; diff --git a/include/linux/nfs_idmap.h b/include/linux/nfs_idmap.h index a0f1f25e0ead..102e56094296 100644 --- a/include/linux/nfs_idmap.h +++ b/include/linux/nfs_idmap.h @@ -71,6 +71,8 @@ int nfs_map_name_to_uid(struct nfs4_client *, const char *, size_t, __u32 *); int nfs_map_group_to_gid(struct nfs4_client *, const char *, size_t, __u32 *); int nfs_map_uid_to_name(struct nfs4_client *, __u32, char *); int nfs_map_gid_to_group(struct nfs4_client *, __u32, char *); + +extern unsigned int nfs_idmap_cache_timeout; #endif /* __KERNEL__ */ #endif /* NFS_IDMAP_H */ -- cgit v1.2.3-71-gd317 From 9eed129bbde80cbd7ffeacaa1555ba1e0c9a0997 Mon Sep 17 00:00:00 2001 From: "J. Bruce Fields" Date: Tue, 3 Jan 2006 09:56:00 +0100 Subject: SUNRPC: Update the spkm3 code to use the make_checksum interface Also update the tokenlen calculations to accomodate g_token_size(). Signed-off-by: Andy Adamson Signed-off-by: J. Bruce Fields Signed-off-by: Trond Myklebust --- include/linux/sunrpc/gss_spkm3.h | 2 +- net/sunrpc/auth_gss/gss_spkm3_seal.c | 11 +++++------ net/sunrpc/auth_gss/gss_spkm3_token.c | 3 ++- net/sunrpc/auth_gss/gss_spkm3_unseal.c | 2 +- 4 files changed, 9 insertions(+), 9 deletions(-) (limited to 'include/linux') diff --git a/include/linux/sunrpc/gss_spkm3.h b/include/linux/sunrpc/gss_spkm3.h index 0beb2cf00a84..336e218c2782 100644 --- a/include/linux/sunrpc/gss_spkm3.h +++ b/include/linux/sunrpc/gss_spkm3.h @@ -48,7 +48,7 @@ u32 spkm3_read_token(struct spkm3_ctx *ctx, struct xdr_netobj *read_token, struc #define CKSUMTYPE_RSA_MD5 0x0007 s32 make_checksum(s32 cksumtype, char *header, int hdrlen, struct xdr_buf *body, - struct xdr_netobj *cksum); + int body_offset, struct xdr_netobj *cksum); void asn1_bitstring_len(struct xdr_netobj *in, int *enclen, int *zerobits); int decode_asn1_bitstring(struct xdr_netobj *out, char *in, int enclen, int explen); diff --git a/net/sunrpc/auth_gss/gss_spkm3_seal.c b/net/sunrpc/auth_gss/gss_spkm3_seal.c index d1e12b25d6e2..86fbf7c3e39c 100644 --- a/net/sunrpc/auth_gss/gss_spkm3_seal.c +++ b/net/sunrpc/auth_gss/gss_spkm3_seal.c @@ -59,7 +59,7 @@ spkm3_make_token(struct spkm3_ctx *ctx, char tokhdrbuf[25]; struct xdr_netobj md5cksum = {.len = 0, .data = NULL}; struct xdr_netobj mic_hdr = {.len = 0, .data = tokhdrbuf}; - int tmsglen, tokenlen = 0; + int tokenlen = 0; unsigned char *ptr; s32 now; int ctxelen = 0, ctxzbit = 0; @@ -92,24 +92,23 @@ spkm3_make_token(struct spkm3_ctx *ctx, } if (toktype == SPKM_MIC_TOK) { - tmsglen = 0; /* Calculate checksum over the mic-header */ asn1_bitstring_len(&ctx->ctx_id, &ctxelen, &ctxzbit); spkm3_mic_header(&mic_hdr.data, &mic_hdr.len, ctx->ctx_id.data, ctxelen, ctxzbit); if (make_checksum(checksum_type, mic_hdr.data, mic_hdr.len, - text, &md5cksum)) + text, 0, &md5cksum)) goto out_err; asn1_bitstring_len(&md5cksum, &md5elen, &md5zbit); - tokenlen = 10 + ctxelen + 1 + 2 + md5elen + 1; + tokenlen = 10 + ctxelen + 1 + md5elen + 1; /* Create token header using generic routines */ - token->len = g_token_size(&ctx->mech_used, tokenlen + tmsglen); + token->len = g_token_size(&ctx->mech_used, tokenlen); ptr = token->data; - g_make_token_header(&ctx->mech_used, tokenlen + tmsglen, &ptr); + g_make_token_header(&ctx->mech_used, tokenlen, &ptr); spkm3_make_mic_token(&ptr, tokenlen, &mic_hdr, &md5cksum, md5elen, md5zbit); } else if (toktype == SPKM_WRAP_TOK) { /* Not Supported */ diff --git a/net/sunrpc/auth_gss/gss_spkm3_token.c b/net/sunrpc/auth_gss/gss_spkm3_token.c index 1f824578d773..af0d7ce74686 100644 --- a/net/sunrpc/auth_gss/gss_spkm3_token.c +++ b/net/sunrpc/auth_gss/gss_spkm3_token.c @@ -182,6 +182,7 @@ spkm3_mic_header(unsigned char **hdrbuf, unsigned int *hdrlen, unsigned char *ct * *tokp points to the beginning of the SPKM_MIC token described * in rfc 2025, section 3.2.1: * + * toklen is the inner token length */ void spkm3_make_mic_token(unsigned char **tokp, int toklen, struct xdr_netobj *mic_hdr, struct xdr_netobj *md5cksum, int md5elen, int md5zbit) @@ -189,7 +190,7 @@ spkm3_make_mic_token(unsigned char **tokp, int toklen, struct xdr_netobj *mic_hd unsigned char *ict = *tokp; *(u8 *)ict++ = 0xa4; - *(u8 *)ict++ = toklen - 2; + *(u8 *)ict++ = toklen; memcpy(ict, mic_hdr->data, mic_hdr->len); ict += mic_hdr->len; diff --git a/net/sunrpc/auth_gss/gss_spkm3_unseal.c b/net/sunrpc/auth_gss/gss_spkm3_unseal.c index 241d5b30dfcb..96851b0ba1ba 100644 --- a/net/sunrpc/auth_gss/gss_spkm3_unseal.c +++ b/net/sunrpc/auth_gss/gss_spkm3_unseal.c @@ -95,7 +95,7 @@ spkm3_read_token(struct spkm3_ctx *ctx, ret = GSS_S_DEFECTIVE_TOKEN; code = make_checksum(CKSUMTYPE_RSA_MD5, ptr + 2, mic_hdrlen + 2, - message_buffer, &md5cksum); + message_buffer, 0, &md5cksum); if (code) goto out; -- cgit v1.2.3-71-gd317 From 367cb704212cd0c9273ba2b1e62523139210563b Mon Sep 17 00:00:00 2001 From: Sam Ravnborg Date: Fri, 6 Jan 2006 21:17:50 +0100 Subject: kbuild: un-stringnify KBUILD_MODNAME Now when kbuild passes KBUILD_MODNAME with "" do not __stringify it when used. Remove __stringnify for all users. This also fixes the output of: $ ls -l /sys/module/ drwxr-xr-x 4 root root 0 2006-01-05 14:24 pcmcia drwxr-xr-x 4 root root 0 2006-01-05 14:24 pcmcia_core drwxr-xr-x 3 root root 0 2006-01-05 14:24 "processor" drwxr-xr-x 3 root root 0 2006-01-05 14:24 "psmouse" The quoting of the module names will be gone again. Thanks to GregKH + Kay Sievers for reproting this. Signed-off-by: Sam Ravnborg --- drivers/media/dvb/cinergyT2/cinergyT2.c | 2 +- drivers/media/dvb/ttpci/budget.h | 2 +- drivers/media/video/tda9840.c | 2 +- drivers/media/video/tea6415c.c | 2 +- drivers/media/video/tea6420.c | 2 +- include/linux/moduleparam.h | 2 +- include/media/saa7146.h | 6 +++--- net/ipv4/netfilter/ip_nat_ftp.c | 2 +- net/ipv4/netfilter/ip_nat_irc.c | 2 +- security/capability.c | 6 ++---- 10 files changed, 13 insertions(+), 15 deletions(-) (limited to 'include/linux') diff --git a/drivers/media/dvb/cinergyT2/cinergyT2.c b/drivers/media/dvb/cinergyT2/cinergyT2.c index b996fb59b7e4..1d69bf031fb9 100644 --- a/drivers/media/dvb/cinergyT2/cinergyT2.c +++ b/drivers/media/dvb/cinergyT2/cinergyT2.c @@ -60,7 +60,7 @@ MODULE_PARM_DESC(debug, "Turn on/off debugging (default:off)."); #define dprintk(level, args...) \ do { \ if ((debug & level)) { \ - printk("%s: %s(): ", __stringify(KBUILD_MODNAME), \ + printk("%s: %s(): ", KBUILD_MODNAME, \ __FUNCTION__); \ printk(args); } \ } while (0) diff --git a/drivers/media/dvb/ttpci/budget.h b/drivers/media/dvb/ttpci/budget.h index fdaa3318ad3a..c8d48cfba277 100644 --- a/drivers/media/dvb/ttpci/budget.h +++ b/drivers/media/dvb/ttpci/budget.h @@ -19,7 +19,7 @@ extern int budget_debug; #endif #define dprintk(level,args...) \ - do { if ((budget_debug & level)) { printk("%s: %s(): ",__stringify(KBUILD_MODNAME), __FUNCTION__); printk(args); } } while (0) + do { if ((budget_debug & level)) { printk("%s: %s(): ", KBUILD_MODNAME, __FUNCTION__); printk(args); } } while (0) struct budget_info { char *name; diff --git a/drivers/media/video/tda9840.c b/drivers/media/video/tda9840.c index 1794686612c6..0cb5c7e9a884 100644 --- a/drivers/media/video/tda9840.c +++ b/drivers/media/video/tda9840.c @@ -34,7 +34,7 @@ static int debug = 0; /* insmod parameter */ module_param(debug, int, 0644); MODULE_PARM_DESC(debug, "Turn on/off device debugging (default:off)."); #define dprintk(args...) \ - do { if (debug) { printk("%s: %s()[%d]: ",__stringify(KBUILD_MODNAME), __FUNCTION__, __LINE__); printk(args); } } while (0) + do { if (debug) { printk("%s: %s()[%d]: ", KBUILD_MODNAME, __FUNCTION__, __LINE__); printk(args); } } while (0) #define SWITCH 0x00 #define LEVEL_ADJUST 0x02 diff --git a/drivers/media/video/tea6415c.c b/drivers/media/video/tea6415c.c index ee3688348b66..09149dad1b84 100644 --- a/drivers/media/video/tea6415c.c +++ b/drivers/media/video/tea6415c.c @@ -36,7 +36,7 @@ static int debug = 0; /* insmod parameter */ module_param(debug, int, 0644); MODULE_PARM_DESC(debug, "Turn on/off device debugging (default:off)."); #define dprintk(args...) \ - do { if (debug) { printk("%s: %s()[%d]: ",__stringify(KBUILD_MODNAME), __FUNCTION__, __LINE__); printk(args); } } while (0) + do { if (debug) { printk("%s: %s()[%d]: ", KBUILD_MODNAME, __FUNCTION__, __LINE__); printk(args); } } while (0) #define TEA6415C_NUM_INPUTS 8 #define TEA6415C_NUM_OUTPUTS 6 diff --git a/drivers/media/video/tea6420.c b/drivers/media/video/tea6420.c index 17975c19da5e..e908f917c8d2 100644 --- a/drivers/media/video/tea6420.c +++ b/drivers/media/video/tea6420.c @@ -36,7 +36,7 @@ static int debug = 0; /* insmod parameter */ module_param(debug, int, 0644); MODULE_PARM_DESC(debug, "Turn on/off device debugging (default:off)."); #define dprintk(args...) \ - do { if (debug) { printk("%s: %s()[%d]: ",__stringify(KBUILD_MODNAME), __FUNCTION__, __LINE__); printk(args); } } while (0) + do { if (debug) { printk("%s: %s()[%d]: ", KBUILD_MODNAME, __FUNCTION__, __LINE__); printk(args); } } while (0) /* addresses to scan, found only at 0x4c and/or 0x4d (7-Bit) */ static unsigned short normal_i2c[] = { I2C_TEA6420_1, I2C_TEA6420_2, I2C_CLIENT_END }; diff --git a/include/linux/moduleparam.h b/include/linux/moduleparam.h index 368ec8e45bd0..b5c98c43779e 100644 --- a/include/linux/moduleparam.h +++ b/include/linux/moduleparam.h @@ -10,7 +10,7 @@ #ifdef MODULE #define MODULE_PARAM_PREFIX /* empty */ #else -#define MODULE_PARAM_PREFIX __stringify(KBUILD_MODNAME) "." +#define MODULE_PARAM_PREFIX KBUILD_MODNAME "." #endif #ifdef MODULE diff --git a/include/media/saa7146.h b/include/media/saa7146.h index e5be2b9b846b..2bc634fcb7bb 100644 --- a/include/media/saa7146.h +++ b/include/media/saa7146.h @@ -21,14 +21,14 @@ extern unsigned int saa7146_debug; -//#define DEBUG_PROLOG printk("(0x%08x)(0x%08x) %s: %s(): ",(dev==0?-1:(dev->mem==0?-1:saa7146_read(dev,RPS_ADDR0))),(dev==0?-1:(dev->mem==0?-1:saa7146_read(dev,IER))),__stringify(KBUILD_MODNAME),__FUNCTION__) +//#define DEBUG_PROLOG printk("(0x%08x)(0x%08x) %s: %s(): ",(dev==0?-1:(dev->mem==0?-1:saa7146_read(dev,RPS_ADDR0))),(dev==0?-1:(dev->mem==0?-1:saa7146_read(dev,IER))),KBUILD_MODNAME,__FUNCTION__) #ifndef DEBUG_VARIABLE #define DEBUG_VARIABLE saa7146_debug #endif -#define DEBUG_PROLOG printk("%s: %s(): ",__stringify(KBUILD_MODNAME),__FUNCTION__) -#define INFO(x) { printk("%s: ",__stringify(KBUILD_MODNAME)); printk x; } +#define DEBUG_PROLOG printk("%s: %s(): ",KBUILD_MODNAME,__FUNCTION__) +#define INFO(x) { printk("%s: ",KBUILD_MODNAME); printk x; } #define ERR(x) { DEBUG_PROLOG; printk x; } diff --git a/net/ipv4/netfilter/ip_nat_ftp.c b/net/ipv4/netfilter/ip_nat_ftp.c index d83757a70d9f..b8daab3c64af 100644 --- a/net/ipv4/netfilter/ip_nat_ftp.c +++ b/net/ipv4/netfilter/ip_nat_ftp.c @@ -171,7 +171,7 @@ static int __init init(void) /* Prior to 2.6.11, we had a ports param. No longer, but don't break users. */ static int warn_set(const char *val, struct kernel_param *kp) { - printk(KERN_INFO __stringify(KBUILD_MODNAME) + printk(KERN_INFO KBUILD_MODNAME ": kernel >= 2.6.10 only uses 'ports' for conntrack modules\n"); return 0; } diff --git a/net/ipv4/netfilter/ip_nat_irc.c b/net/ipv4/netfilter/ip_nat_irc.c index de31942babe3..461c833eaca1 100644 --- a/net/ipv4/netfilter/ip_nat_irc.c +++ b/net/ipv4/netfilter/ip_nat_irc.c @@ -113,7 +113,7 @@ static int __init init(void) /* Prior to 2.6.11, we had a ports param. No longer, but don't break users. */ static int warn_set(const char *val, struct kernel_param *kp) { - printk(KERN_INFO __stringify(KBUILD_MODNAME) + printk(KERN_INFO KBUILD_MODNAME ": kernel >= 2.6.10 only uses 'ports' for conntrack modules\n"); return 0; } diff --git a/security/capability.c b/security/capability.c index ec18d6075625..f9b35cc0b248 100644 --- a/security/capability.c +++ b/security/capability.c @@ -49,8 +49,6 @@ static struct security_operations capability_ops = { .vm_enough_memory = cap_vm_enough_memory, }; -#define MY_NAME __stringify(KBUILD_MODNAME) - /* flag to keep track of how we were registered */ static int secondary; @@ -67,7 +65,7 @@ static int __init capability_init (void) /* register ourselves with the security framework */ if (register_security (&capability_ops)) { /* try registering with primary module */ - if (mod_reg_security (MY_NAME, &capability_ops)) { + if (mod_reg_security (KBUILD_MODNAME, &capability_ops)) { printk (KERN_INFO "Failure registering capabilities " "with primary security module.\n"); return -EINVAL; @@ -85,7 +83,7 @@ static void __exit capability_exit (void) return; /* remove ourselves from the security framework */ if (secondary) { - if (mod_unreg_security (MY_NAME, &capability_ops)) + if (mod_unreg_security (KBUILD_MODNAME, &capability_ops)) printk (KERN_INFO "Failure unregistering capabilities " "with primary module.\n"); return; -- cgit v1.2.3-71-gd317 From 4bad4dc919573dbe9a5b41dd9edff279e99822d7 Mon Sep 17 00:00:00 2001 From: Kris Katterjohn Date: Fri, 6 Jan 2006 13:08:20 -0800 Subject: [NET]: Change sk_run_filter()'s return type in net/core/filter.c It should return an unsigned value, and fix sk_filter() as well. Signed-off-by: Kris Katterjohn Signed-off-by: David S. Miller --- include/linux/filter.h | 2 +- include/net/sock.h | 4 ++-- net/core/filter.c | 6 +++--- 3 files changed, 6 insertions(+), 6 deletions(-) (limited to 'include/linux') diff --git a/include/linux/filter.h b/include/linux/filter.h index 3ba843c46382..c6cb8f095088 100644 --- a/include/linux/filter.h +++ b/include/linux/filter.h @@ -143,7 +143,7 @@ static inline unsigned int sk_filter_len(struct sk_filter *fp) struct sk_buff; struct sock; -extern int sk_run_filter(struct sk_buff *skb, struct sock_filter *filter, int flen); +extern unsigned int sk_run_filter(struct sk_buff *skb, struct sock_filter *filter, int flen); extern int sk_attach_filter(struct sock_fprog *fprog, struct sock *sk); extern int sk_chk_filter(struct sock_filter *filter, int flen); #endif /* __KERNEL__ */ diff --git a/include/net/sock.h b/include/net/sock.h index 6961700ff3a0..1806e5b61419 100644 --- a/include/net/sock.h +++ b/include/net/sock.h @@ -856,8 +856,8 @@ static inline int sk_filter(struct sock *sk, struct sk_buff *skb, int needlock) filter = sk->sk_filter; if (filter) { - int pkt_len = sk_run_filter(skb, filter->insns, - filter->len); + unsigned int pkt_len = sk_run_filter(skb, filter->insns, + filter->len); if (!pkt_len) err = -EPERM; else diff --git a/net/core/filter.c b/net/core/filter.c index 8964d3445588..9eb9d0017a01 100644 --- a/net/core/filter.c +++ b/net/core/filter.c @@ -75,7 +75,7 @@ static inline void *load_pointer(struct sk_buff *skb, int k, * len is the number of filter blocks in the array. */ -int sk_run_filter(struct sk_buff *skb, struct sock_filter *filter, int flen) +unsigned int sk_run_filter(struct sk_buff *skb, struct sock_filter *filter, int flen) { struct sock_filter *fentry; /* We walk down these */ void *ptr; @@ -241,9 +241,9 @@ load_b: A = X; continue; case BPF_RET|BPF_K: - return ((unsigned int)fentry->k); + return fentry->k; case BPF_RET|BPF_A: - return ((unsigned int)A); + return A; case BPF_ST: mem[fentry->k] = A; continue; -- cgit v1.2.3-71-gd317 From 76ab608d86cf1ef5c5c46819b5733eb9f9f964f8 Mon Sep 17 00:00:00 2001 From: Alexey Dobriyan Date: Fri, 6 Jan 2006 13:24:29 -0800 Subject: [NET]: Endian-annotate struct iphdr And fix trivial warnings that emerged. Signed-off-by: Alexey Dobriyan Signed-off-by: David S. Miller --- include/linux/ip.h | 10 +++++----- net/ipv4/ip_fragment.c | 2 +- net/ipv4/ip_output.c | 4 ++-- net/ipv4/ipvs/ip_vs_xmit.c | 2 +- 4 files changed, 9 insertions(+), 9 deletions(-) (limited to 'include/linux') diff --git a/include/linux/ip.h b/include/linux/ip.h index 9e2eb9a602eb..4b55cf1df732 100644 --- a/include/linux/ip.h +++ b/include/linux/ip.h @@ -90,14 +90,14 @@ struct iphdr { #error "Please fix " #endif __u8 tos; - __u16 tot_len; - __u16 id; - __u16 frag_off; + __be16 tot_len; + __be16 id; + __be16 frag_off; __u8 ttl; __u8 protocol; __u16 check; - __u32 saddr; - __u32 daddr; + __be32 saddr; + __be32 daddr; /*The options start here. */ }; diff --git a/net/ipv4/ip_fragment.c b/net/ipv4/ip_fragment.c index ce2b70ce4018..2a8adda15e11 100644 --- a/net/ipv4/ip_fragment.c +++ b/net/ipv4/ip_fragment.c @@ -383,7 +383,7 @@ out_nomem: */ static inline struct ipq *ip_find(struct iphdr *iph, u32 user) { - __u16 id = iph->id; + __be16 id = iph->id; __u32 saddr = iph->saddr; __u32 daddr = iph->daddr; __u8 protocol = iph->protocol; diff --git a/net/ipv4/ip_output.c b/net/ipv4/ip_output.c index 71da31818cfc..8b1c9bd0091e 100644 --- a/net/ipv4/ip_output.c +++ b/net/ipv4/ip_output.c @@ -418,7 +418,7 @@ int ip_fragment(struct sk_buff *skb, int (*output)(struct sk_buff*)) struct sk_buff *skb2; unsigned int mtu, hlen, left, len, ll_rs; int offset; - int not_last_frag; + __be16 not_last_frag; struct rtable *rt = (struct rtable*)skb->dst; int err = 0; @@ -1180,7 +1180,7 @@ int ip_push_pending_frames(struct sock *sk) struct ip_options *opt = NULL; struct rtable *rt = inet->cork.rt; struct iphdr *iph; - int df = 0; + __be16 df = 0; __u8 ttl; int err = 0; diff --git a/net/ipv4/ipvs/ip_vs_xmit.c b/net/ipv4/ipvs/ip_vs_xmit.c index 3b87482049cf..52c12e9edbbc 100644 --- a/net/ipv4/ipvs/ip_vs_xmit.c +++ b/net/ipv4/ipvs/ip_vs_xmit.c @@ -322,7 +322,7 @@ ip_vs_tunnel_xmit(struct sk_buff *skb, struct ip_vs_conn *cp, struct net_device *tdev; /* Device to other host */ struct iphdr *old_iph = skb->nh.iph; u8 tos = old_iph->tos; - u16 df = old_iph->frag_off; + __be16 df = old_iph->frag_off; struct iphdr *iph; /* Our new IP header */ int max_headroom; /* The extra header space needed */ int mtu; -- cgit v1.2.3-71-gd317 From a2167dc62e9142b9a4bfb20f7e001c0f0a26fd8c Mon Sep 17 00:00:00 2001 From: Alexey Dobriyan Date: Fri, 6 Jan 2006 13:24:54 -0800 Subject: [NET]: Endian-annotate in_aton() Signed-off-by: Alexey Dobriyan Signed-off-by: David S. Miller --- include/linux/inet.h | 2 +- net/core/utils.c | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/inet.h b/include/linux/inet.h index 3b5e9fdff872..6c5587af118d 100644 --- a/include/linux/inet.h +++ b/include/linux/inet.h @@ -45,6 +45,6 @@ #ifdef __KERNEL__ #include -extern __u32 in_aton(const char *str); +extern __be32 in_aton(const char *str); #endif #endif /* _LINUX_INET_H */ diff --git a/net/core/utils.c b/net/core/utils.c index 587eb7787deb..ac1d1fcf8673 100644 --- a/net/core/utils.c +++ b/net/core/utils.c @@ -162,7 +162,7 @@ EXPORT_SYMBOL(net_srandom); * is otherwise not dependent on the TCP/IP stack. */ -__u32 in_aton(const char *str) +__be32 in_aton(const char *str) { unsigned long l; unsigned int val; -- cgit v1.2.3-71-gd317 From a62c80e559809e6c7851ec04d30575e85ad6f6ed Mon Sep 17 00:00:00 2001 From: Russell King Date: Sat, 7 Jan 2006 13:52:45 +0000 Subject: [ARM] Move AMBA include files to include/linux/amba/ Since the ARM AMBA bus is used on MIPS as well as ARM, we need to make the bus available for other architectures to use. Move the AMBA include files from include/asm-arm/hardware/ to include/linux/amba/ Signed-off-by: Russell King --- arch/arm/common/amba.c | 2 +- arch/arm/mach-aaec2000/core.c | 2 +- arch/arm/mach-aaec2000/core.h | 2 +- arch/arm/mach-integrator/core.c | 2 +- arch/arm/mach-integrator/impd1.c | 4 +- arch/arm/mach-integrator/integrator_ap.c | 4 +- arch/arm/mach-integrator/integrator_cp.c | 6 +- arch/arm/mach-integrator/time.c | 2 +- arch/arm/mach-realview/core.c | 4 +- arch/arm/mach-realview/core.h | 3 +- arch/arm/mach-realview/realview_eb.c | 2 +- arch/arm/mach-versatile/core.c | 4 +- arch/arm/mach-versatile/core.h | 2 +- arch/arm/mach-versatile/versatile_ab.c | 2 +- arch/arm/mach-versatile/versatile_pb.c | 2 +- drivers/input/serio/ambakmi.c | 4 +- drivers/mmc/mmci.c | 2 +- drivers/serial/amba-pl010.c | 4 +- drivers/serial/amba-pl011.c | 4 +- drivers/video/amba-clcd.c | 5 +- include/asm-arm/arch-integrator/debug-macro.S | 2 +- include/asm-arm/arch-realview/debug-macro.S | 2 +- include/asm-arm/arch-versatile/debug-macro.S | 2 +- include/asm-arm/hardware/amba.h | 55 ------ include/asm-arm/hardware/amba_clcd.h | 271 -------------------------- include/asm-arm/hardware/amba_kmi.h | 92 --------- include/asm-arm/hardware/amba_serial.h | 161 --------------- include/linux/amba/bus.h | 55 ++++++ include/linux/amba/clcd.h | 271 ++++++++++++++++++++++++++ include/linux/amba/kmi.h | 92 +++++++++ include/linux/amba/serial.h | 161 +++++++++++++++ sound/arm/aaci.c | 2 +- 32 files changed, 614 insertions(+), 614 deletions(-) delete mode 100644 include/asm-arm/hardware/amba.h delete mode 100644 include/asm-arm/hardware/amba_clcd.h delete mode 100644 include/asm-arm/hardware/amba_kmi.h delete mode 100644 include/asm-arm/hardware/amba_serial.h create mode 100644 include/linux/amba/bus.h create mode 100644 include/linux/amba/clcd.h create mode 100644 include/linux/amba/kmi.h create mode 100644 include/linux/amba/serial.h (limited to 'include/linux') diff --git a/arch/arm/common/amba.c b/arch/arm/common/amba.c index e1013112c354..2bb0ce81bb69 100644 --- a/arch/arm/common/amba.c +++ b/arch/arm/common/amba.c @@ -12,10 +12,10 @@ #include #include #include +#include #include #include -#include #include #define to_amba_device(d) container_of(d, struct amba_device, dev) diff --git a/arch/arm/mach-aaec2000/core.c b/arch/arm/mach-aaec2000/core.c index 4e706d9ad368..dce4815cf53c 100644 --- a/arch/arm/mach-aaec2000/core.c +++ b/arch/arm/mach-aaec2000/core.c @@ -20,11 +20,11 @@ #include #include #include +#include #include #include #include -#include #include #include diff --git a/arch/arm/mach-aaec2000/core.h b/arch/arm/mach-aaec2000/core.h index daefc0ea14a1..b6029a95f19c 100644 --- a/arch/arm/mach-aaec2000/core.h +++ b/arch/arm/mach-aaec2000/core.h @@ -9,7 +9,7 @@ * */ -#include +#include struct sys_timer; diff --git a/arch/arm/mach-integrator/core.c b/arch/arm/mach-integrator/core.c index dacbf504dae2..20071a2767cc 100644 --- a/arch/arm/mach-integrator/core.c +++ b/arch/arm/mach-integrator/core.c @@ -15,11 +15,11 @@ #include #include #include +#include #include #include #include -#include #include #include #include diff --git a/arch/arm/mach-integrator/impd1.c b/arch/arm/mach-integrator/impd1.c index a4bafee77a06..a85d471c5bfa 100644 --- a/arch/arm/mach-integrator/impd1.c +++ b/arch/arm/mach-integrator/impd1.c @@ -18,11 +18,11 @@ #include #include #include +#include +#include #include #include -#include -#include #include #include #include diff --git a/arch/arm/mach-integrator/integrator_ap.c b/arch/arm/mach-integrator/integrator_ap.c index 4c0f7c65facf..3afedeb56a6e 100644 --- a/arch/arm/mach-integrator/integrator_ap.c +++ b/arch/arm/mach-integrator/integrator_ap.c @@ -25,6 +25,8 @@ #include #include #include +#include +#include #include #include @@ -32,8 +34,6 @@ #include #include /* HZ */ #include -#include -#include #include diff --git a/arch/arm/mach-integrator/integrator_cp.c b/arch/arm/mach-integrator/integrator_cp.c index 93f7ccb22c27..16cf2482a3e9 100644 --- a/arch/arm/mach-integrator/integrator_cp.c +++ b/arch/arm/mach-integrator/integrator_cp.c @@ -16,15 +16,15 @@ #include #include #include +#include +#include +#include #include #include #include #include #include -#include -#include -#include #include #include diff --git a/arch/arm/mach-integrator/time.c b/arch/arm/mach-integrator/time.c index 1a844ca139e0..9f46aaef8968 100644 --- a/arch/arm/mach-integrator/time.c +++ b/arch/arm/mach-integrator/time.c @@ -14,8 +14,8 @@ #include #include #include +#include -#include #include #include #include diff --git a/arch/arm/mach-realview/core.c b/arch/arm/mach-realview/core.c index af6580f1ceb8..4a222f59f2cf 100644 --- a/arch/arm/mach-realview/core.c +++ b/arch/arm/mach-realview/core.c @@ -24,14 +24,14 @@ #include #include #include +#include +#include #include #include #include #include #include -#include -#include #include #include diff --git a/arch/arm/mach-realview/core.h b/arch/arm/mach-realview/core.h index c06e6041df41..93e86d9f439c 100644 --- a/arch/arm/mach-realview/core.h +++ b/arch/arm/mach-realview/core.h @@ -22,7 +22,8 @@ #ifndef __ASM_ARCH_REALVIEW_H #define __ASM_ARCH_REALVIEW_H -#include +#include + #include #include diff --git a/arch/arm/mach-realview/realview_eb.c b/arch/arm/mach-realview/realview_eb.c index 7dc32503fdf2..112f7592aca9 100644 --- a/arch/arm/mach-realview/realview_eb.c +++ b/arch/arm/mach-realview/realview_eb.c @@ -23,6 +23,7 @@ #include #include #include +#include #include #include @@ -30,7 +31,6 @@ #include #include #include -#include #include #include diff --git a/arch/arm/mach-versatile/core.c b/arch/arm/mach-versatile/core.c index a1ca46630dda..90023745b23a 100644 --- a/arch/arm/mach-versatile/core.c +++ b/arch/arm/mach-versatile/core.c @@ -25,14 +25,14 @@ #include #include #include +#include +#include #include #include #include #include #include -#include -#include #include #include diff --git a/arch/arm/mach-versatile/core.h b/arch/arm/mach-versatile/core.h index 588c20669d5d..afcaa858eb1f 100644 --- a/arch/arm/mach-versatile/core.h +++ b/arch/arm/mach-versatile/core.h @@ -22,7 +22,7 @@ #ifndef __ASM_ARCH_VERSATILE_H #define __ASM_ARCH_VERSATILE_H -#include +#include extern void __init versatile_init(void); extern void __init versatile_init_irq(void); diff --git a/arch/arm/mach-versatile/versatile_ab.c b/arch/arm/mach-versatile/versatile_ab.c index 8b0b3bef24ae..e74c8a2fbb95 100644 --- a/arch/arm/mach-versatile/versatile_ab.c +++ b/arch/arm/mach-versatile/versatile_ab.c @@ -23,12 +23,12 @@ #include #include #include +#include #include #include #include #include -#include #include diff --git a/arch/arm/mach-versatile/versatile_pb.c b/arch/arm/mach-versatile/versatile_pb.c index 7c3078c38916..22d5ca07f75d 100644 --- a/arch/arm/mach-versatile/versatile_pb.c +++ b/arch/arm/mach-versatile/versatile_pb.c @@ -23,12 +23,12 @@ #include #include #include +#include #include #include #include #include -#include #include #include diff --git a/drivers/input/serio/ambakmi.c b/drivers/input/serio/ambakmi.c index d847ed51cfb1..cbab5d26377b 100644 --- a/drivers/input/serio/ambakmi.c +++ b/drivers/input/serio/ambakmi.c @@ -19,11 +19,11 @@ #include #include #include +#include +#include #include #include -#include -#include #include #define KMI_BASE (kmi->base) diff --git a/drivers/mmc/mmci.c b/drivers/mmc/mmci.c index 31b0b6d612bf..57375bc12372 100644 --- a/drivers/mmc/mmci.c +++ b/drivers/mmc/mmci.c @@ -19,12 +19,12 @@ #include #include #include +#include #include #include #include #include -#include #include #include diff --git a/drivers/serial/amba-pl010.c b/drivers/serial/amba-pl010.c index ddd0307fece2..48f6e872314b 100644 --- a/drivers/serial/amba-pl010.c +++ b/drivers/serial/amba-pl010.c @@ -47,12 +47,12 @@ #include #include #include +#include +#include #include #include #include -#include -#include #define UART_NR 2 diff --git a/drivers/serial/amba-pl011.c b/drivers/serial/amba-pl011.c index 531b0e4f25e5..4ae4dff59795 100644 --- a/drivers/serial/amba-pl011.c +++ b/drivers/serial/amba-pl011.c @@ -47,12 +47,12 @@ #include #include #include +#include +#include #include #include -#include #include -#include #define UART_NR 14 diff --git a/drivers/video/amba-clcd.c b/drivers/video/amba-clcd.c index 69421c86252c..3358a1429651 100644 --- a/drivers/video/amba-clcd.c +++ b/drivers/video/amba-clcd.c @@ -21,13 +21,12 @@ #include #include #include +#include +#include #include -#include #include -#include - #define to_clcd(info) container_of(info, struct clcd_fb, fb) /* This is limited to 16 characters when displayed by X startup */ diff --git a/include/asm-arm/arch-integrator/debug-macro.S b/include/asm-arm/arch-integrator/debug-macro.S index 484a1aa47098..031d30941791 100644 --- a/include/asm-arm/arch-integrator/debug-macro.S +++ b/include/asm-arm/arch-integrator/debug-macro.S @@ -11,7 +11,7 @@ * */ -#include +#include .macro addruart,rx mrc p15, 0, \rx, c1, c0 diff --git a/include/asm-arm/arch-realview/debug-macro.S b/include/asm-arm/arch-realview/debug-macro.S index ed28bd012236..017ad996848d 100644 --- a/include/asm-arm/arch-realview/debug-macro.S +++ b/include/asm-arm/arch-realview/debug-macro.S @@ -11,7 +11,7 @@ * */ -#include +#include .macro addruart,rx mrc p15, 0, \rx, c1, c0 diff --git a/include/asm-arm/arch-versatile/debug-macro.S b/include/asm-arm/arch-versatile/debug-macro.S index 89e38ac1444e..ef6167116dbb 100644 --- a/include/asm-arm/arch-versatile/debug-macro.S +++ b/include/asm-arm/arch-versatile/debug-macro.S @@ -11,7 +11,7 @@ * */ -#include +#include .macro addruart,rx mrc p15, 0, \rx, c1, c0 diff --git a/include/asm-arm/hardware/amba.h b/include/asm-arm/hardware/amba.h deleted file mode 100644 index 51e6e54b2aa1..000000000000 --- a/include/asm-arm/hardware/amba.h +++ /dev/null @@ -1,55 +0,0 @@ -/* - * linux/include/asm-arm/hardware/amba.h - * - * Copyright (C) 2003 Deep Blue Solutions Ltd, All Rights Reserved. - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License version 2 as - * published by the Free Software Foundation. - */ -#ifndef ASMARM_AMBA_H -#define ASMARM_AMBA_H - -#define AMBA_NR_IRQS 2 - -struct amba_device { - struct device dev; - struct resource res; - u64 dma_mask; - unsigned int periphid; - unsigned int irq[AMBA_NR_IRQS]; -}; - -struct amba_id { - unsigned int id; - unsigned int mask; - void *data; -}; - -struct amba_driver { - struct device_driver drv; - int (*probe)(struct amba_device *, void *); - int (*remove)(struct amba_device *); - void (*shutdown)(struct amba_device *); - int (*suspend)(struct amba_device *, pm_message_t); - int (*resume)(struct amba_device *); - struct amba_id *id_table; -}; - -#define amba_get_drvdata(d) dev_get_drvdata(&d->dev) -#define amba_set_drvdata(d,p) dev_set_drvdata(&d->dev, p) - -int amba_driver_register(struct amba_driver *); -void amba_driver_unregister(struct amba_driver *); -int amba_device_register(struct amba_device *, struct resource *); -void amba_device_unregister(struct amba_device *); -struct amba_device *amba_find_device(const char *, struct device *, unsigned int, unsigned int); -int amba_request_regions(struct amba_device *, const char *); -void amba_release_regions(struct amba_device *); - -#define amba_config(d) (((d)->periphid >> 24) & 0xff) -#define amba_rev(d) (((d)->periphid >> 20) & 0x0f) -#define amba_manf(d) (((d)->periphid >> 12) & 0xff) -#define amba_part(d) ((d)->periphid & 0xfff) - -#endif diff --git a/include/asm-arm/hardware/amba_clcd.h b/include/asm-arm/hardware/amba_clcd.h deleted file mode 100644 index 6b8d73dc1ab0..000000000000 --- a/include/asm-arm/hardware/amba_clcd.h +++ /dev/null @@ -1,271 +0,0 @@ -/* - * linux/include/asm-arm/hardware/amba_clcd.h -- Integrator LCD panel. - * - * David A Rusling - * - * Copyright (C) 2001 ARM Limited - * - * This file is subject to the terms and conditions of the GNU General Public - * License. See the file COPYING in the main directory of this archive - * for more details. - */ -#include -#include - -/* - * CLCD Controller Internal Register addresses - */ -#define CLCD_TIM0 0x00000000 -#define CLCD_TIM1 0x00000004 -#define CLCD_TIM2 0x00000008 -#define CLCD_TIM3 0x0000000c -#define CLCD_UBAS 0x00000010 -#define CLCD_LBAS 0x00000014 - -#if !defined(CONFIG_ARCH_VERSATILE) && !defined(CONFIG_ARCH_REALVIEW) -#define CLCD_IENB 0x00000018 -#define CLCD_CNTL 0x0000001c -#else -/* - * Someone rearranged these two registers on the Versatile - * platform... - */ -#define CLCD_IENB 0x0000001c -#define CLCD_CNTL 0x00000018 -#endif - -#define CLCD_STAT 0x00000020 -#define CLCD_INTR 0x00000024 -#define CLCD_UCUR 0x00000028 -#define CLCD_LCUR 0x0000002C -#define CLCD_PALL 0x00000200 -#define CLCD_PALETTE 0x00000200 - -#define TIM2_CLKSEL (1 << 5) -#define TIM2_IVS (1 << 11) -#define TIM2_IHS (1 << 12) -#define TIM2_IPC (1 << 13) -#define TIM2_IOE (1 << 14) -#define TIM2_BCD (1 << 26) - -#define CNTL_LCDEN (1 << 0) -#define CNTL_LCDBPP1 (0 << 1) -#define CNTL_LCDBPP2 (1 << 1) -#define CNTL_LCDBPP4 (2 << 1) -#define CNTL_LCDBPP8 (3 << 1) -#define CNTL_LCDBPP16 (4 << 1) -#define CNTL_LCDBPP24 (5 << 1) -#define CNTL_LCDBW (1 << 4) -#define CNTL_LCDTFT (1 << 5) -#define CNTL_LCDMONO8 (1 << 6) -#define CNTL_LCDDUAL (1 << 7) -#define CNTL_BGR (1 << 8) -#define CNTL_BEBO (1 << 9) -#define CNTL_BEPO (1 << 10) -#define CNTL_LCDPWR (1 << 11) -#define CNTL_LCDVCOMP(x) ((x) << 12) -#define CNTL_LDMAFIFOTIME (1 << 15) -#define CNTL_WATERMARK (1 << 16) - -struct clcd_panel { - struct fb_videomode mode; - signed short width; /* width in mm */ - signed short height; /* height in mm */ - u32 tim2; - u32 tim3; - u32 cntl; - unsigned int bpp:8, - fixedtimings:1, - grayscale:1; - unsigned int connector; -}; - -struct clcd_regs { - u32 tim0; - u32 tim1; - u32 tim2; - u32 tim3; - u32 cntl; - unsigned long pixclock; -}; - -struct clcd_fb; - -/* - * the board-type specific routines - */ -struct clcd_board { - const char *name; - - /* - * Optional. Check whether the var structure is acceptable - * for this display. - */ - int (*check)(struct clcd_fb *fb, struct fb_var_screeninfo *var); - - /* - * Compulsary. Decode fb->fb.var into regs->*. In the case of - * fixed timing, set regs->* to the register values required. - */ - void (*decode)(struct clcd_fb *fb, struct clcd_regs *regs); - - /* - * Optional. Disable any extra display hardware. - */ - void (*disable)(struct clcd_fb *); - - /* - * Optional. Enable any extra display hardware. - */ - void (*enable)(struct clcd_fb *); - - /* - * Setup platform specific parts of CLCD driver - */ - int (*setup)(struct clcd_fb *); - - /* - * mmap the framebuffer memory - */ - int (*mmap)(struct clcd_fb *, struct vm_area_struct *); - - /* - * Remove platform specific parts of CLCD driver - */ - void (*remove)(struct clcd_fb *); -}; - -struct amba_device; -struct clk; - -/* this data structure describes each frame buffer device we find */ -struct clcd_fb { - struct fb_info fb; - struct amba_device *dev; - struct clk *clk; - struct clcd_panel *panel; - struct clcd_board *board; - void *board_data; - void __iomem *regs; - u32 clcd_cntl; - u32 cmap[16]; -}; - -static inline void clcdfb_decode(struct clcd_fb *fb, struct clcd_regs *regs) -{ - u32 val, cpl; - - /* - * Program the CLCD controller registers and start the CLCD - */ - val = ((fb->fb.var.xres / 16) - 1) << 2; - val |= (fb->fb.var.hsync_len - 1) << 8; - val |= (fb->fb.var.right_margin - 1) << 16; - val |= (fb->fb.var.left_margin - 1) << 24; - regs->tim0 = val; - - val = fb->fb.var.yres; - if (fb->panel->cntl & CNTL_LCDDUAL) - val /= 2; - val -= 1; - val |= (fb->fb.var.vsync_len - 1) << 10; - val |= fb->fb.var.lower_margin << 16; - val |= fb->fb.var.upper_margin << 24; - regs->tim1 = val; - - val = fb->panel->tim2; - val |= fb->fb.var.sync & FB_SYNC_HOR_HIGH_ACT ? 0 : TIM2_IHS; - val |= fb->fb.var.sync & FB_SYNC_VERT_HIGH_ACT ? 0 : TIM2_IVS; - - cpl = fb->fb.var.xres_virtual; - if (fb->panel->cntl & CNTL_LCDTFT) /* TFT */ - /* / 1 */; - else if (!fb->fb.var.grayscale) /* STN color */ - cpl = cpl * 8 / 3; - else if (fb->panel->cntl & CNTL_LCDMONO8) /* STN monochrome, 8bit */ - cpl /= 8; - else /* STN monochrome, 4bit */ - cpl /= 4; - - regs->tim2 = val | ((cpl - 1) << 16); - - regs->tim3 = fb->panel->tim3; - - val = fb->panel->cntl; - if (fb->fb.var.grayscale) - val |= CNTL_LCDBW; - - switch (fb->fb.var.bits_per_pixel) { - case 1: - val |= CNTL_LCDBPP1; - break; - case 2: - val |= CNTL_LCDBPP2; - break; - case 4: - val |= CNTL_LCDBPP4; - break; - case 8: - val |= CNTL_LCDBPP8; - break; - case 16: - val |= CNTL_LCDBPP16; - break; - case 32: - val |= CNTL_LCDBPP24; - break; - } - - regs->cntl = val; - regs->pixclock = fb->fb.var.pixclock; -} - -static inline int clcdfb_check(struct clcd_fb *fb, struct fb_var_screeninfo *var) -{ - var->xres_virtual = var->xres = (var->xres + 15) & ~15; - var->yres_virtual = var->yres = (var->yres + 1) & ~1; - -#define CHECK(e,l,h) (var->e < l || var->e > h) - if (CHECK(right_margin, (5+1), 256) || /* back porch */ - CHECK(left_margin, (5+1), 256) || /* front porch */ - CHECK(hsync_len, (5+1), 256) || - var->xres > 4096 || - var->lower_margin > 255 || /* back porch */ - var->upper_margin > 255 || /* front porch */ - var->vsync_len > 32 || - var->yres > 1024) - return -EINVAL; -#undef CHECK - - /* single panel mode: PCD = max(PCD, 1) */ - /* dual panel mode: PCD = max(PCD, 5) */ - - /* - * You can't change the grayscale setting, and - * we can only do non-interlaced video. - */ - if (var->grayscale != fb->fb.var.grayscale || - (var->vmode & FB_VMODE_MASK) != FB_VMODE_NONINTERLACED) - return -EINVAL; - -#define CHECK(e) (var->e != fb->fb.var.e) - if (fb->panel->fixedtimings && - (CHECK(xres) || - CHECK(yres) || - CHECK(bits_per_pixel) || - CHECK(pixclock) || - CHECK(left_margin) || - CHECK(right_margin) || - CHECK(upper_margin) || - CHECK(lower_margin) || - CHECK(hsync_len) || - CHECK(vsync_len) || - CHECK(sync))) - return -EINVAL; -#undef CHECK - - var->nonstd = 0; - var->accel_flags = 0; - - return 0; -} diff --git a/include/asm-arm/hardware/amba_kmi.h b/include/asm-arm/hardware/amba_kmi.h deleted file mode 100644 index a39e5be751b3..000000000000 --- a/include/asm-arm/hardware/amba_kmi.h +++ /dev/null @@ -1,92 +0,0 @@ -/* - * linux/include/asm-arm/hardware/amba_kmi.h - * - * Internal header file for AMBA KMI ports - * - * Copyright (C) 2000 Deep Blue Solutions Ltd. - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License as published by - * the Free Software Foundation; either version 2 of the License, or - * (at your option) any later version. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program; if not, write to the Free Software - * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA - * - * - * --------------------------------------------------------------------------- - * From ARM PrimeCell(tm) PS2 Keyboard/Mouse Interface (PL050) Technical - * Reference Manual - ARM DDI 0143B - see http://www.arm.com/ - * --------------------------------------------------------------------------- - */ -#ifndef ASM_ARM_HARDWARE_AMBA_KMI_H -#define ASM_ARM_HARDWARE_AMBA_KMI_H - -/* - * KMI control register: - * KMICR_TYPE 0 = PS2/AT mode, 1 = No line control bit mode - * KMICR_RXINTREN 1 = enable RX interrupts - * KMICR_TXINTREN 1 = enable TX interrupts - * KMICR_EN 1 = enable KMI - * KMICR_FD 1 = force KMI data low - * KMICR_FC 1 = force KMI clock low - */ -#define KMICR (KMI_BASE + 0x00) -#define KMICR_TYPE (1 << 5) -#define KMICR_RXINTREN (1 << 4) -#define KMICR_TXINTREN (1 << 3) -#define KMICR_EN (1 << 2) -#define KMICR_FD (1 << 1) -#define KMICR_FC (1 << 0) - -/* - * KMI status register: - * KMISTAT_TXEMPTY 1 = transmitter register empty - * KMISTAT_TXBUSY 1 = currently sending data - * KMISTAT_RXFULL 1 = receiver register ready to be read - * KMISTAT_RXBUSY 1 = currently receiving data - * KMISTAT_RXPARITY parity of last databyte received - * KMISTAT_IC current level of KMI clock input - * KMISTAT_ID current level of KMI data input - */ -#define KMISTAT (KMI_BASE + 0x04) -#define KMISTAT_TXEMPTY (1 << 6) -#define KMISTAT_TXBUSY (1 << 5) -#define KMISTAT_RXFULL (1 << 4) -#define KMISTAT_RXBUSY (1 << 3) -#define KMISTAT_RXPARITY (1 << 2) -#define KMISTAT_IC (1 << 1) -#define KMISTAT_ID (1 << 0) - -/* - * KMI data register - */ -#define KMIDATA (KMI_BASE + 0x08) - -/* - * KMI clock divisor: to generate 8MHz internal clock - * div = (ref / 8MHz) - 1; 0 <= div <= 15 - */ -#define KMICLKDIV (KMI_BASE + 0x0c) - -/* - * KMI interrupt register: - * KMIIR_TXINTR 1 = transmit interrupt asserted - * KMIIR_RXINTR 1 = receive interrupt asserted - */ -#define KMIIR (KMI_BASE + 0x10) -#define KMIIR_TXINTR (1 << 1) -#define KMIIR_RXINTR (1 << 0) - -/* - * The size of the KMI primecell - */ -#define KMI_SIZE (0x100) - -#endif diff --git a/include/asm-arm/hardware/amba_serial.h b/include/asm-arm/hardware/amba_serial.h deleted file mode 100644 index dc726ffccebd..000000000000 --- a/include/asm-arm/hardware/amba_serial.h +++ /dev/null @@ -1,161 +0,0 @@ -/* - * linux/include/asm-arm/hardware/serial_amba.h - * - * Internal header file for AMBA serial ports - * - * Copyright (C) ARM Limited - * Copyright (C) 2000 Deep Blue Solutions Ltd. - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License as published by - * the Free Software Foundation; either version 2 of the License, or - * (at your option) any later version. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program; if not, write to the Free Software - * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA - */ -#ifndef ASM_ARM_HARDWARE_SERIAL_AMBA_H -#define ASM_ARM_HARDWARE_SERIAL_AMBA_H - -/* ------------------------------------------------------------------------------- - * From AMBA UART (PL010) Block Specification - * ------------------------------------------------------------------------------- - * UART Register Offsets. - */ -#define UART01x_DR 0x00 /* Data read or written from the interface. */ -#define UART01x_RSR 0x04 /* Receive status register (Read). */ -#define UART01x_ECR 0x04 /* Error clear register (Write). */ -#define UART010_LCRH 0x08 /* Line control register, high byte. */ -#define UART010_LCRM 0x0C /* Line control register, middle byte. */ -#define UART010_LCRL 0x10 /* Line control register, low byte. */ -#define UART010_CR 0x14 /* Control register. */ -#define UART01x_FR 0x18 /* Flag register (Read only). */ -#define UART010_IIR 0x1C /* Interrupt indentification register (Read). */ -#define UART010_ICR 0x1C /* Interrupt clear register (Write). */ -#define UART01x_ILPR 0x20 /* IrDA low power counter register. */ -#define UART011_IBRD 0x24 /* Integer baud rate divisor register. */ -#define UART011_FBRD 0x28 /* Fractional baud rate divisor register. */ -#define UART011_LCRH 0x2c /* Line control register. */ -#define UART011_CR 0x30 /* Control register. */ -#define UART011_IFLS 0x34 /* Interrupt fifo level select. */ -#define UART011_IMSC 0x38 /* Interrupt mask. */ -#define UART011_RIS 0x3c /* Raw interrupt status. */ -#define UART011_MIS 0x40 /* Masked interrupt status. */ -#define UART011_ICR 0x44 /* Interrupt clear register. */ -#define UART011_DMACR 0x48 /* DMA control register. */ - -#define UART011_DR_OE (1 << 11) -#define UART011_DR_BE (1 << 10) -#define UART011_DR_PE (1 << 9) -#define UART011_DR_FE (1 << 8) - -#define UART01x_RSR_OE 0x08 -#define UART01x_RSR_BE 0x04 -#define UART01x_RSR_PE 0x02 -#define UART01x_RSR_FE 0x01 - -#define UART011_FR_RI 0x100 -#define UART011_FR_TXFE 0x080 -#define UART011_FR_RXFF 0x040 -#define UART01x_FR_TXFF 0x020 -#define UART01x_FR_RXFE 0x010 -#define UART01x_FR_BUSY 0x008 -#define UART01x_FR_DCD 0x004 -#define UART01x_FR_DSR 0x002 -#define UART01x_FR_CTS 0x001 -#define UART01x_FR_TMSK (UART01x_FR_TXFF + UART01x_FR_BUSY) - -#define UART011_CR_CTSEN 0x8000 /* CTS hardware flow control */ -#define UART011_CR_RTSEN 0x4000 /* RTS hardware flow control */ -#define UART011_CR_OUT2 0x2000 /* OUT2 */ -#define UART011_CR_OUT1 0x1000 /* OUT1 */ -#define UART011_CR_RTS 0x0800 /* RTS */ -#define UART011_CR_DTR 0x0400 /* DTR */ -#define UART011_CR_RXE 0x0200 /* receive enable */ -#define UART011_CR_TXE 0x0100 /* transmit enable */ -#define UART011_CR_LBE 0x0080 /* loopback enable */ -#define UART010_CR_RTIE 0x0040 -#define UART010_CR_TIE 0x0020 -#define UART010_CR_RIE 0x0010 -#define UART010_CR_MSIE 0x0008 -#define UART01x_CR_IIRLP 0x0004 /* SIR low power mode */ -#define UART01x_CR_SIREN 0x0002 /* SIR enable */ -#define UART01x_CR_UARTEN 0x0001 /* UART enable */ - -#define UART011_LCRH_SPS 0x80 -#define UART01x_LCRH_WLEN_8 0x60 -#define UART01x_LCRH_WLEN_7 0x40 -#define UART01x_LCRH_WLEN_6 0x20 -#define UART01x_LCRH_WLEN_5 0x00 -#define UART01x_LCRH_FEN 0x10 -#define UART01x_LCRH_STP2 0x08 -#define UART01x_LCRH_EPS 0x04 -#define UART01x_LCRH_PEN 0x02 -#define UART01x_LCRH_BRK 0x01 - -#define UART010_IIR_RTIS 0x08 -#define UART010_IIR_TIS 0x04 -#define UART010_IIR_RIS 0x02 -#define UART010_IIR_MIS 0x01 - -#define UART011_IFLS_RX1_8 (0 << 3) -#define UART011_IFLS_RX2_8 (1 << 3) -#define UART011_IFLS_RX4_8 (2 << 3) -#define UART011_IFLS_RX6_8 (3 << 3) -#define UART011_IFLS_RX7_8 (4 << 3) -#define UART011_IFLS_TX1_8 (0 << 0) -#define UART011_IFLS_TX2_8 (1 << 0) -#define UART011_IFLS_TX4_8 (2 << 0) -#define UART011_IFLS_TX6_8 (3 << 0) -#define UART011_IFLS_TX7_8 (4 << 0) - -#define UART011_OEIM (1 << 10) /* overrun error interrupt mask */ -#define UART011_BEIM (1 << 9) /* break error interrupt mask */ -#define UART011_PEIM (1 << 8) /* parity error interrupt mask */ -#define UART011_FEIM (1 << 7) /* framing error interrupt mask */ -#define UART011_RTIM (1 << 6) /* receive timeout interrupt mask */ -#define UART011_TXIM (1 << 5) /* transmit interrupt mask */ -#define UART011_RXIM (1 << 4) /* receive interrupt mask */ -#define UART011_DSRMIM (1 << 3) /* DSR interrupt mask */ -#define UART011_DCDMIM (1 << 2) /* DCD interrupt mask */ -#define UART011_CTSMIM (1 << 1) /* CTS interrupt mask */ -#define UART011_RIMIM (1 << 0) /* RI interrupt mask */ - -#define UART011_OEIS (1 << 10) /* overrun error interrupt status */ -#define UART011_BEIS (1 << 9) /* break error interrupt status */ -#define UART011_PEIS (1 << 8) /* parity error interrupt status */ -#define UART011_FEIS (1 << 7) /* framing error interrupt status */ -#define UART011_RTIS (1 << 6) /* receive timeout interrupt status */ -#define UART011_TXIS (1 << 5) /* transmit interrupt status */ -#define UART011_RXIS (1 << 4) /* receive interrupt status */ -#define UART011_DSRMIS (1 << 3) /* DSR interrupt status */ -#define UART011_DCDMIS (1 << 2) /* DCD interrupt status */ -#define UART011_CTSMIS (1 << 1) /* CTS interrupt status */ -#define UART011_RIMIS (1 << 0) /* RI interrupt status */ - -#define UART011_OEIC (1 << 10) /* overrun error interrupt clear */ -#define UART011_BEIC (1 << 9) /* break error interrupt clear */ -#define UART011_PEIC (1 << 8) /* parity error interrupt clear */ -#define UART011_FEIC (1 << 7) /* framing error interrupt clear */ -#define UART011_RTIC (1 << 6) /* receive timeout interrupt clear */ -#define UART011_TXIC (1 << 5) /* transmit interrupt clear */ -#define UART011_RXIC (1 << 4) /* receive interrupt clear */ -#define UART011_DSRMIC (1 << 3) /* DSR interrupt clear */ -#define UART011_DCDMIC (1 << 2) /* DCD interrupt clear */ -#define UART011_CTSMIC (1 << 1) /* CTS interrupt clear */ -#define UART011_RIMIC (1 << 0) /* RI interrupt clear */ - -#define UART011_DMAONERR (1 << 2) /* disable dma on error */ -#define UART011_TXDMAE (1 << 1) /* enable transmit dma */ -#define UART011_RXDMAE (1 << 0) /* enable receive dma */ - -#define UART01x_RSR_ANY (UART01x_RSR_OE|UART01x_RSR_BE|UART01x_RSR_PE|UART01x_RSR_FE) -#define UART01x_FR_MODEM_ANY (UART01x_FR_DCD|UART01x_FR_DSR|UART01x_FR_CTS) - -#endif diff --git a/include/linux/amba/bus.h b/include/linux/amba/bus.h new file mode 100644 index 000000000000..51e6e54b2aa1 --- /dev/null +++ b/include/linux/amba/bus.h @@ -0,0 +1,55 @@ +/* + * linux/include/asm-arm/hardware/amba.h + * + * Copyright (C) 2003 Deep Blue Solutions Ltd, All Rights Reserved. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + */ +#ifndef ASMARM_AMBA_H +#define ASMARM_AMBA_H + +#define AMBA_NR_IRQS 2 + +struct amba_device { + struct device dev; + struct resource res; + u64 dma_mask; + unsigned int periphid; + unsigned int irq[AMBA_NR_IRQS]; +}; + +struct amba_id { + unsigned int id; + unsigned int mask; + void *data; +}; + +struct amba_driver { + struct device_driver drv; + int (*probe)(struct amba_device *, void *); + int (*remove)(struct amba_device *); + void (*shutdown)(struct amba_device *); + int (*suspend)(struct amba_device *, pm_message_t); + int (*resume)(struct amba_device *); + struct amba_id *id_table; +}; + +#define amba_get_drvdata(d) dev_get_drvdata(&d->dev) +#define amba_set_drvdata(d,p) dev_set_drvdata(&d->dev, p) + +int amba_driver_register(struct amba_driver *); +void amba_driver_unregister(struct amba_driver *); +int amba_device_register(struct amba_device *, struct resource *); +void amba_device_unregister(struct amba_device *); +struct amba_device *amba_find_device(const char *, struct device *, unsigned int, unsigned int); +int amba_request_regions(struct amba_device *, const char *); +void amba_release_regions(struct amba_device *); + +#define amba_config(d) (((d)->periphid >> 24) & 0xff) +#define amba_rev(d) (((d)->periphid >> 20) & 0x0f) +#define amba_manf(d) (((d)->periphid >> 12) & 0xff) +#define amba_part(d) ((d)->periphid & 0xfff) + +#endif diff --git a/include/linux/amba/clcd.h b/include/linux/amba/clcd.h new file mode 100644 index 000000000000..6b8d73dc1ab0 --- /dev/null +++ b/include/linux/amba/clcd.h @@ -0,0 +1,271 @@ +/* + * linux/include/asm-arm/hardware/amba_clcd.h -- Integrator LCD panel. + * + * David A Rusling + * + * Copyright (C) 2001 ARM Limited + * + * This file is subject to the terms and conditions of the GNU General Public + * License. See the file COPYING in the main directory of this archive + * for more details. + */ +#include +#include + +/* + * CLCD Controller Internal Register addresses + */ +#define CLCD_TIM0 0x00000000 +#define CLCD_TIM1 0x00000004 +#define CLCD_TIM2 0x00000008 +#define CLCD_TIM3 0x0000000c +#define CLCD_UBAS 0x00000010 +#define CLCD_LBAS 0x00000014 + +#if !defined(CONFIG_ARCH_VERSATILE) && !defined(CONFIG_ARCH_REALVIEW) +#define CLCD_IENB 0x00000018 +#define CLCD_CNTL 0x0000001c +#else +/* + * Someone rearranged these two registers on the Versatile + * platform... + */ +#define CLCD_IENB 0x0000001c +#define CLCD_CNTL 0x00000018 +#endif + +#define CLCD_STAT 0x00000020 +#define CLCD_INTR 0x00000024 +#define CLCD_UCUR 0x00000028 +#define CLCD_LCUR 0x0000002C +#define CLCD_PALL 0x00000200 +#define CLCD_PALETTE 0x00000200 + +#define TIM2_CLKSEL (1 << 5) +#define TIM2_IVS (1 << 11) +#define TIM2_IHS (1 << 12) +#define TIM2_IPC (1 << 13) +#define TIM2_IOE (1 << 14) +#define TIM2_BCD (1 << 26) + +#define CNTL_LCDEN (1 << 0) +#define CNTL_LCDBPP1 (0 << 1) +#define CNTL_LCDBPP2 (1 << 1) +#define CNTL_LCDBPP4 (2 << 1) +#define CNTL_LCDBPP8 (3 << 1) +#define CNTL_LCDBPP16 (4 << 1) +#define CNTL_LCDBPP24 (5 << 1) +#define CNTL_LCDBW (1 << 4) +#define CNTL_LCDTFT (1 << 5) +#define CNTL_LCDMONO8 (1 << 6) +#define CNTL_LCDDUAL (1 << 7) +#define CNTL_BGR (1 << 8) +#define CNTL_BEBO (1 << 9) +#define CNTL_BEPO (1 << 10) +#define CNTL_LCDPWR (1 << 11) +#define CNTL_LCDVCOMP(x) ((x) << 12) +#define CNTL_LDMAFIFOTIME (1 << 15) +#define CNTL_WATERMARK (1 << 16) + +struct clcd_panel { + struct fb_videomode mode; + signed short width; /* width in mm */ + signed short height; /* height in mm */ + u32 tim2; + u32 tim3; + u32 cntl; + unsigned int bpp:8, + fixedtimings:1, + grayscale:1; + unsigned int connector; +}; + +struct clcd_regs { + u32 tim0; + u32 tim1; + u32 tim2; + u32 tim3; + u32 cntl; + unsigned long pixclock; +}; + +struct clcd_fb; + +/* + * the board-type specific routines + */ +struct clcd_board { + const char *name; + + /* + * Optional. Check whether the var structure is acceptable + * for this display. + */ + int (*check)(struct clcd_fb *fb, struct fb_var_screeninfo *var); + + /* + * Compulsary. Decode fb->fb.var into regs->*. In the case of + * fixed timing, set regs->* to the register values required. + */ + void (*decode)(struct clcd_fb *fb, struct clcd_regs *regs); + + /* + * Optional. Disable any extra display hardware. + */ + void (*disable)(struct clcd_fb *); + + /* + * Optional. Enable any extra display hardware. + */ + void (*enable)(struct clcd_fb *); + + /* + * Setup platform specific parts of CLCD driver + */ + int (*setup)(struct clcd_fb *); + + /* + * mmap the framebuffer memory + */ + int (*mmap)(struct clcd_fb *, struct vm_area_struct *); + + /* + * Remove platform specific parts of CLCD driver + */ + void (*remove)(struct clcd_fb *); +}; + +struct amba_device; +struct clk; + +/* this data structure describes each frame buffer device we find */ +struct clcd_fb { + struct fb_info fb; + struct amba_device *dev; + struct clk *clk; + struct clcd_panel *panel; + struct clcd_board *board; + void *board_data; + void __iomem *regs; + u32 clcd_cntl; + u32 cmap[16]; +}; + +static inline void clcdfb_decode(struct clcd_fb *fb, struct clcd_regs *regs) +{ + u32 val, cpl; + + /* + * Program the CLCD controller registers and start the CLCD + */ + val = ((fb->fb.var.xres / 16) - 1) << 2; + val |= (fb->fb.var.hsync_len - 1) << 8; + val |= (fb->fb.var.right_margin - 1) << 16; + val |= (fb->fb.var.left_margin - 1) << 24; + regs->tim0 = val; + + val = fb->fb.var.yres; + if (fb->panel->cntl & CNTL_LCDDUAL) + val /= 2; + val -= 1; + val |= (fb->fb.var.vsync_len - 1) << 10; + val |= fb->fb.var.lower_margin << 16; + val |= fb->fb.var.upper_margin << 24; + regs->tim1 = val; + + val = fb->panel->tim2; + val |= fb->fb.var.sync & FB_SYNC_HOR_HIGH_ACT ? 0 : TIM2_IHS; + val |= fb->fb.var.sync & FB_SYNC_VERT_HIGH_ACT ? 0 : TIM2_IVS; + + cpl = fb->fb.var.xres_virtual; + if (fb->panel->cntl & CNTL_LCDTFT) /* TFT */ + /* / 1 */; + else if (!fb->fb.var.grayscale) /* STN color */ + cpl = cpl * 8 / 3; + else if (fb->panel->cntl & CNTL_LCDMONO8) /* STN monochrome, 8bit */ + cpl /= 8; + else /* STN monochrome, 4bit */ + cpl /= 4; + + regs->tim2 = val | ((cpl - 1) << 16); + + regs->tim3 = fb->panel->tim3; + + val = fb->panel->cntl; + if (fb->fb.var.grayscale) + val |= CNTL_LCDBW; + + switch (fb->fb.var.bits_per_pixel) { + case 1: + val |= CNTL_LCDBPP1; + break; + case 2: + val |= CNTL_LCDBPP2; + break; + case 4: + val |= CNTL_LCDBPP4; + break; + case 8: + val |= CNTL_LCDBPP8; + break; + case 16: + val |= CNTL_LCDBPP16; + break; + case 32: + val |= CNTL_LCDBPP24; + break; + } + + regs->cntl = val; + regs->pixclock = fb->fb.var.pixclock; +} + +static inline int clcdfb_check(struct clcd_fb *fb, struct fb_var_screeninfo *var) +{ + var->xres_virtual = var->xres = (var->xres + 15) & ~15; + var->yres_virtual = var->yres = (var->yres + 1) & ~1; + +#define CHECK(e,l,h) (var->e < l || var->e > h) + if (CHECK(right_margin, (5+1), 256) || /* back porch */ + CHECK(left_margin, (5+1), 256) || /* front porch */ + CHECK(hsync_len, (5+1), 256) || + var->xres > 4096 || + var->lower_margin > 255 || /* back porch */ + var->upper_margin > 255 || /* front porch */ + var->vsync_len > 32 || + var->yres > 1024) + return -EINVAL; +#undef CHECK + + /* single panel mode: PCD = max(PCD, 1) */ + /* dual panel mode: PCD = max(PCD, 5) */ + + /* + * You can't change the grayscale setting, and + * we can only do non-interlaced video. + */ + if (var->grayscale != fb->fb.var.grayscale || + (var->vmode & FB_VMODE_MASK) != FB_VMODE_NONINTERLACED) + return -EINVAL; + +#define CHECK(e) (var->e != fb->fb.var.e) + if (fb->panel->fixedtimings && + (CHECK(xres) || + CHECK(yres) || + CHECK(bits_per_pixel) || + CHECK(pixclock) || + CHECK(left_margin) || + CHECK(right_margin) || + CHECK(upper_margin) || + CHECK(lower_margin) || + CHECK(hsync_len) || + CHECK(vsync_len) || + CHECK(sync))) + return -EINVAL; +#undef CHECK + + var->nonstd = 0; + var->accel_flags = 0; + + return 0; +} diff --git a/include/linux/amba/kmi.h b/include/linux/amba/kmi.h new file mode 100644 index 000000000000..a39e5be751b3 --- /dev/null +++ b/include/linux/amba/kmi.h @@ -0,0 +1,92 @@ +/* + * linux/include/asm-arm/hardware/amba_kmi.h + * + * Internal header file for AMBA KMI ports + * + * Copyright (C) 2000 Deep Blue Solutions Ltd. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + * + * + * --------------------------------------------------------------------------- + * From ARM PrimeCell(tm) PS2 Keyboard/Mouse Interface (PL050) Technical + * Reference Manual - ARM DDI 0143B - see http://www.arm.com/ + * --------------------------------------------------------------------------- + */ +#ifndef ASM_ARM_HARDWARE_AMBA_KMI_H +#define ASM_ARM_HARDWARE_AMBA_KMI_H + +/* + * KMI control register: + * KMICR_TYPE 0 = PS2/AT mode, 1 = No line control bit mode + * KMICR_RXINTREN 1 = enable RX interrupts + * KMICR_TXINTREN 1 = enable TX interrupts + * KMICR_EN 1 = enable KMI + * KMICR_FD 1 = force KMI data low + * KMICR_FC 1 = force KMI clock low + */ +#define KMICR (KMI_BASE + 0x00) +#define KMICR_TYPE (1 << 5) +#define KMICR_RXINTREN (1 << 4) +#define KMICR_TXINTREN (1 << 3) +#define KMICR_EN (1 << 2) +#define KMICR_FD (1 << 1) +#define KMICR_FC (1 << 0) + +/* + * KMI status register: + * KMISTAT_TXEMPTY 1 = transmitter register empty + * KMISTAT_TXBUSY 1 = currently sending data + * KMISTAT_RXFULL 1 = receiver register ready to be read + * KMISTAT_RXBUSY 1 = currently receiving data + * KMISTAT_RXPARITY parity of last databyte received + * KMISTAT_IC current level of KMI clock input + * KMISTAT_ID current level of KMI data input + */ +#define KMISTAT (KMI_BASE + 0x04) +#define KMISTAT_TXEMPTY (1 << 6) +#define KMISTAT_TXBUSY (1 << 5) +#define KMISTAT_RXFULL (1 << 4) +#define KMISTAT_RXBUSY (1 << 3) +#define KMISTAT_RXPARITY (1 << 2) +#define KMISTAT_IC (1 << 1) +#define KMISTAT_ID (1 << 0) + +/* + * KMI data register + */ +#define KMIDATA (KMI_BASE + 0x08) + +/* + * KMI clock divisor: to generate 8MHz internal clock + * div = (ref / 8MHz) - 1; 0 <= div <= 15 + */ +#define KMICLKDIV (KMI_BASE + 0x0c) + +/* + * KMI interrupt register: + * KMIIR_TXINTR 1 = transmit interrupt asserted + * KMIIR_RXINTR 1 = receive interrupt asserted + */ +#define KMIIR (KMI_BASE + 0x10) +#define KMIIR_TXINTR (1 << 1) +#define KMIIR_RXINTR (1 << 0) + +/* + * The size of the KMI primecell + */ +#define KMI_SIZE (0x100) + +#endif diff --git a/include/linux/amba/serial.h b/include/linux/amba/serial.h new file mode 100644 index 000000000000..dc726ffccebd --- /dev/null +++ b/include/linux/amba/serial.h @@ -0,0 +1,161 @@ +/* + * linux/include/asm-arm/hardware/serial_amba.h + * + * Internal header file for AMBA serial ports + * + * Copyright (C) ARM Limited + * Copyright (C) 2000 Deep Blue Solutions Ltd. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + */ +#ifndef ASM_ARM_HARDWARE_SERIAL_AMBA_H +#define ASM_ARM_HARDWARE_SERIAL_AMBA_H + +/* ------------------------------------------------------------------------------- + * From AMBA UART (PL010) Block Specification + * ------------------------------------------------------------------------------- + * UART Register Offsets. + */ +#define UART01x_DR 0x00 /* Data read or written from the interface. */ +#define UART01x_RSR 0x04 /* Receive status register (Read). */ +#define UART01x_ECR 0x04 /* Error clear register (Write). */ +#define UART010_LCRH 0x08 /* Line control register, high byte. */ +#define UART010_LCRM 0x0C /* Line control register, middle byte. */ +#define UART010_LCRL 0x10 /* Line control register, low byte. */ +#define UART010_CR 0x14 /* Control register. */ +#define UART01x_FR 0x18 /* Flag register (Read only). */ +#define UART010_IIR 0x1C /* Interrupt indentification register (Read). */ +#define UART010_ICR 0x1C /* Interrupt clear register (Write). */ +#define UART01x_ILPR 0x20 /* IrDA low power counter register. */ +#define UART011_IBRD 0x24 /* Integer baud rate divisor register. */ +#define UART011_FBRD 0x28 /* Fractional baud rate divisor register. */ +#define UART011_LCRH 0x2c /* Line control register. */ +#define UART011_CR 0x30 /* Control register. */ +#define UART011_IFLS 0x34 /* Interrupt fifo level select. */ +#define UART011_IMSC 0x38 /* Interrupt mask. */ +#define UART011_RIS 0x3c /* Raw interrupt status. */ +#define UART011_MIS 0x40 /* Masked interrupt status. */ +#define UART011_ICR 0x44 /* Interrupt clear register. */ +#define UART011_DMACR 0x48 /* DMA control register. */ + +#define UART011_DR_OE (1 << 11) +#define UART011_DR_BE (1 << 10) +#define UART011_DR_PE (1 << 9) +#define UART011_DR_FE (1 << 8) + +#define UART01x_RSR_OE 0x08 +#define UART01x_RSR_BE 0x04 +#define UART01x_RSR_PE 0x02 +#define UART01x_RSR_FE 0x01 + +#define UART011_FR_RI 0x100 +#define UART011_FR_TXFE 0x080 +#define UART011_FR_RXFF 0x040 +#define UART01x_FR_TXFF 0x020 +#define UART01x_FR_RXFE 0x010 +#define UART01x_FR_BUSY 0x008 +#define UART01x_FR_DCD 0x004 +#define UART01x_FR_DSR 0x002 +#define UART01x_FR_CTS 0x001 +#define UART01x_FR_TMSK (UART01x_FR_TXFF + UART01x_FR_BUSY) + +#define UART011_CR_CTSEN 0x8000 /* CTS hardware flow control */ +#define UART011_CR_RTSEN 0x4000 /* RTS hardware flow control */ +#define UART011_CR_OUT2 0x2000 /* OUT2 */ +#define UART011_CR_OUT1 0x1000 /* OUT1 */ +#define UART011_CR_RTS 0x0800 /* RTS */ +#define UART011_CR_DTR 0x0400 /* DTR */ +#define UART011_CR_RXE 0x0200 /* receive enable */ +#define UART011_CR_TXE 0x0100 /* transmit enable */ +#define UART011_CR_LBE 0x0080 /* loopback enable */ +#define UART010_CR_RTIE 0x0040 +#define UART010_CR_TIE 0x0020 +#define UART010_CR_RIE 0x0010 +#define UART010_CR_MSIE 0x0008 +#define UART01x_CR_IIRLP 0x0004 /* SIR low power mode */ +#define UART01x_CR_SIREN 0x0002 /* SIR enable */ +#define UART01x_CR_UARTEN 0x0001 /* UART enable */ + +#define UART011_LCRH_SPS 0x80 +#define UART01x_LCRH_WLEN_8 0x60 +#define UART01x_LCRH_WLEN_7 0x40 +#define UART01x_LCRH_WLEN_6 0x20 +#define UART01x_LCRH_WLEN_5 0x00 +#define UART01x_LCRH_FEN 0x10 +#define UART01x_LCRH_STP2 0x08 +#define UART01x_LCRH_EPS 0x04 +#define UART01x_LCRH_PEN 0x02 +#define UART01x_LCRH_BRK 0x01 + +#define UART010_IIR_RTIS 0x08 +#define UART010_IIR_TIS 0x04 +#define UART010_IIR_RIS 0x02 +#define UART010_IIR_MIS 0x01 + +#define UART011_IFLS_RX1_8 (0 << 3) +#define UART011_IFLS_RX2_8 (1 << 3) +#define UART011_IFLS_RX4_8 (2 << 3) +#define UART011_IFLS_RX6_8 (3 << 3) +#define UART011_IFLS_RX7_8 (4 << 3) +#define UART011_IFLS_TX1_8 (0 << 0) +#define UART011_IFLS_TX2_8 (1 << 0) +#define UART011_IFLS_TX4_8 (2 << 0) +#define UART011_IFLS_TX6_8 (3 << 0) +#define UART011_IFLS_TX7_8 (4 << 0) + +#define UART011_OEIM (1 << 10) /* overrun error interrupt mask */ +#define UART011_BEIM (1 << 9) /* break error interrupt mask */ +#define UART011_PEIM (1 << 8) /* parity error interrupt mask */ +#define UART011_FEIM (1 << 7) /* framing error interrupt mask */ +#define UART011_RTIM (1 << 6) /* receive timeout interrupt mask */ +#define UART011_TXIM (1 << 5) /* transmit interrupt mask */ +#define UART011_RXIM (1 << 4) /* receive interrupt mask */ +#define UART011_DSRMIM (1 << 3) /* DSR interrupt mask */ +#define UART011_DCDMIM (1 << 2) /* DCD interrupt mask */ +#define UART011_CTSMIM (1 << 1) /* CTS interrupt mask */ +#define UART011_RIMIM (1 << 0) /* RI interrupt mask */ + +#define UART011_OEIS (1 << 10) /* overrun error interrupt status */ +#define UART011_BEIS (1 << 9) /* break error interrupt status */ +#define UART011_PEIS (1 << 8) /* parity error interrupt status */ +#define UART011_FEIS (1 << 7) /* framing error interrupt status */ +#define UART011_RTIS (1 << 6) /* receive timeout interrupt status */ +#define UART011_TXIS (1 << 5) /* transmit interrupt status */ +#define UART011_RXIS (1 << 4) /* receive interrupt status */ +#define UART011_DSRMIS (1 << 3) /* DSR interrupt status */ +#define UART011_DCDMIS (1 << 2) /* DCD interrupt status */ +#define UART011_CTSMIS (1 << 1) /* CTS interrupt status */ +#define UART011_RIMIS (1 << 0) /* RI interrupt status */ + +#define UART011_OEIC (1 << 10) /* overrun error interrupt clear */ +#define UART011_BEIC (1 << 9) /* break error interrupt clear */ +#define UART011_PEIC (1 << 8) /* parity error interrupt clear */ +#define UART011_FEIC (1 << 7) /* framing error interrupt clear */ +#define UART011_RTIC (1 << 6) /* receive timeout interrupt clear */ +#define UART011_TXIC (1 << 5) /* transmit interrupt clear */ +#define UART011_RXIC (1 << 4) /* receive interrupt clear */ +#define UART011_DSRMIC (1 << 3) /* DSR interrupt clear */ +#define UART011_DCDMIC (1 << 2) /* DCD interrupt clear */ +#define UART011_CTSMIC (1 << 1) /* CTS interrupt clear */ +#define UART011_RIMIC (1 << 0) /* RI interrupt clear */ + +#define UART011_DMAONERR (1 << 2) /* disable dma on error */ +#define UART011_TXDMAE (1 << 1) /* enable transmit dma */ +#define UART011_RXDMAE (1 << 0) /* enable receive dma */ + +#define UART01x_RSR_ANY (UART01x_RSR_OE|UART01x_RSR_BE|UART01x_RSR_PE|UART01x_RSR_FE) +#define UART01x_FR_MODEM_ANY (UART01x_FR_DCD|UART01x_FR_DSR|UART01x_FR_CTS) + +#endif diff --git a/sound/arm/aaci.c b/sound/arm/aaci.c index 559ead6367da..5b6cae50d0d5 100644 --- a/sound/arm/aaci.c +++ b/sound/arm/aaci.c @@ -17,11 +17,11 @@ #include #include #include +#include #include #include #include -#include #include #include -- cgit v1.2.3-71-gd317 From f8ce25476d5f12ffa29b885e49c38cd95053437e Mon Sep 17 00:00:00 2001 From: Russell King Date: Sat, 7 Jan 2006 16:15:52 +0000 Subject: [ARM] Move asm/hardware/clock.h to linux/clk.h This is needs to be visible to other architectures using the AMBA bus and peripherals. Signed-off-by: Russell King --- arch/arm/mach-aaec2000/clock.c | 2 +- arch/arm/mach-integrator/clock.c | 2 +- arch/arm/mach-omap1/board-palmte.c | 2 +- arch/arm/mach-omap1/clock.c | 2 +- arch/arm/mach-omap1/serial.c | 2 +- arch/arm/mach-omap2/clock.c | 2 +- arch/arm/mach-omap2/serial.c | 2 +- arch/arm/mach-omap2/timer-gp.c | 3 +- arch/arm/mach-realview/clock.c | 2 +- arch/arm/mach-s3c2410/clock.c | 3 +- arch/arm/mach-s3c2410/s3c2440-clock.c | 3 +- arch/arm/mach-s3c2410/s3c2440.c | 2 +- arch/arm/mach-s3c2410/time.c | 2 +- arch/arm/mach-versatile/clock.c | 2 +- arch/arm/plat-omap/clock.c | 2 +- arch/arm/plat-omap/common.c | 2 +- arch/arm/plat-omap/cpu-omap.c | 3 +- arch/arm/plat-omap/gpio.c | 2 +- arch/arm/plat-omap/mcbsp.c | 3 +- arch/arm/plat-omap/ocpi.c | 2 +- drivers/char/s3c2410-rtc.c | 2 +- drivers/char/watchdog/s3c2410_wdt.c | 2 +- drivers/i2c/busses/i2c-s3c2410.c | 2 +- drivers/input/serio/ambakmi.c | 2 +- drivers/mmc/mmci.c | 2 +- drivers/mtd/nand/s3c2410.c | 2 +- drivers/serial/amba-pl011.c | 2 +- drivers/serial/s3c2410.c | 2 +- drivers/usb/host/ohci-omap.c | 2 +- drivers/usb/host/ohci-s3c2410.c | 2 +- drivers/video/amba-clcd.c | 2 +- drivers/video/s3c2410fb.c | 2 +- include/asm-arm/arch-omap/system.h | 3 +- include/asm-arm/hardware/clock.h | 124 ---------------------------------- include/linux/clk.h | 124 ++++++++++++++++++++++++++++++++++ 35 files changed, 159 insertions(+), 161 deletions(-) delete mode 100644 include/asm-arm/hardware/clock.h create mode 100644 include/linux/clk.h (limited to 'include/linux') diff --git a/arch/arm/mach-aaec2000/clock.c b/arch/arm/mach-aaec2000/clock.c index 828208348b76..1c84c60941e1 100644 --- a/arch/arm/mach-aaec2000/clock.c +++ b/arch/arm/mach-aaec2000/clock.c @@ -15,9 +15,9 @@ #include #include #include +#include #include -#include #include "clock.h" diff --git a/arch/arm/mach-integrator/clock.c b/arch/arm/mach-integrator/clock.c index bbfe46cd91fe..40684e01e865 100644 --- a/arch/arm/mach-integrator/clock.c +++ b/arch/arm/mach-integrator/clock.c @@ -14,9 +14,9 @@ #include #include #include +#include #include -#include #include #include "clock.h" diff --git a/arch/arm/mach-omap1/board-palmte.c b/arch/arm/mach-omap1/board-palmte.c index 540b20d78cca..5c975eb5c34b 100644 --- a/arch/arm/mach-omap1/board-palmte.c +++ b/arch/arm/mach-omap1/board-palmte.c @@ -19,6 +19,7 @@ #include #include #include +#include #include #include @@ -30,7 +31,6 @@ #include #include #include -#include static void __init omap_generic_init_irq(void) { diff --git a/arch/arm/mach-omap1/clock.c b/arch/arm/mach-omap1/clock.c index 4277eee44ed5..9d862f86bba6 100644 --- a/arch/arm/mach-omap1/clock.c +++ b/arch/arm/mach-omap1/clock.c @@ -16,9 +16,9 @@ #include #include #include +#include #include -#include #include #include diff --git a/arch/arm/mach-omap1/serial.c b/arch/arm/mach-omap1/serial.c index 6810cfb84462..fcfb81d13cfe 100644 --- a/arch/arm/mach-omap1/serial.c +++ b/arch/arm/mach-omap1/serial.c @@ -17,10 +17,10 @@ #include #include #include +#include #include #include -#include #include #include diff --git a/arch/arm/mach-omap2/clock.c b/arch/arm/mach-omap2/clock.c index 85818d9f2635..5407b9549150 100644 --- a/arch/arm/mach-omap2/clock.c +++ b/arch/arm/mach-omap2/clock.c @@ -22,10 +22,10 @@ #include #include #include +#include #include -#include #include #include #include diff --git a/arch/arm/mach-omap2/serial.c b/arch/arm/mach-omap2/serial.c index f4df04fe1dd8..e1bd46a96e11 100644 --- a/arch/arm/mach-omap2/serial.c +++ b/arch/arm/mach-omap2/serial.c @@ -16,9 +16,9 @@ #include #include #include +#include #include -#include #include #include diff --git a/arch/arm/mach-omap2/timer-gp.c b/arch/arm/mach-omap2/timer-gp.c index 9ec11443200f..23d36b1c40fe 100644 --- a/arch/arm/mach-omap2/timer-gp.c +++ b/arch/arm/mach-omap2/timer-gp.c @@ -21,10 +21,11 @@ #include #include #include +#include + #include #include #include -#include #define OMAP2_GP_TIMER1_BASE 0x48028000 #define OMAP2_GP_TIMER2_BASE 0x4802a000 diff --git a/arch/arm/mach-realview/clock.c b/arch/arm/mach-realview/clock.c index 331e1b483aa7..ec3f7e798623 100644 --- a/arch/arm/mach-realview/clock.c +++ b/arch/arm/mach-realview/clock.c @@ -13,9 +13,9 @@ #include #include #include +#include #include -#include #include #include "clock.h" diff --git a/arch/arm/mach-s3c2410/clock.c b/arch/arm/mach-s3c2410/clock.c index 77f321fac281..5830ae3ddd19 100644 --- a/arch/arm/mach-s3c2410/clock.c +++ b/arch/arm/mach-s3c2410/clock.c @@ -34,16 +34,15 @@ #include #include #include - #include #include +#include #include #include #include #include -#include #include #include "clock.h" diff --git a/arch/arm/mach-s3c2410/s3c2440-clock.c b/arch/arm/mach-s3c2410/s3c2440-clock.c index c67e0979aec3..b557a2be8a01 100644 --- a/arch/arm/mach-s3c2410/s3c2440-clock.c +++ b/arch/arm/mach-s3c2410/s3c2440-clock.c @@ -29,16 +29,15 @@ #include #include #include - #include #include +#include #include #include #include #include -#include #include #include "clock.h" diff --git a/arch/arm/mach-s3c2410/s3c2440.c b/arch/arm/mach-s3c2410/s3c2440.c index 4d63e7133b48..b7fe6d9453fb 100644 --- a/arch/arm/mach-s3c2410/s3c2440.c +++ b/arch/arm/mach-s3c2410/s3c2440.c @@ -28,6 +28,7 @@ #include #include #include +#include #include #include @@ -36,7 +37,6 @@ #include #include #include -#include #include #include diff --git a/arch/arm/mach-s3c2410/time.c b/arch/arm/mach-s3c2410/time.c index 9acda44b25a6..10a2976aefdd 100644 --- a/arch/arm/mach-s3c2410/time.c +++ b/arch/arm/mach-s3c2410/time.c @@ -24,6 +24,7 @@ #include #include #include +#include #include #include @@ -35,7 +36,6 @@ #include #include #include -#include #include "clock.h" #include "cpu.h" diff --git a/arch/arm/mach-versatile/clock.c b/arch/arm/mach-versatile/clock.c index ada3142da8dc..dcf10014f5cd 100644 --- a/arch/arm/mach-versatile/clock.c +++ b/arch/arm/mach-versatile/clock.c @@ -14,9 +14,9 @@ #include #include #include +#include #include -#include #include #include "clock.h" diff --git a/arch/arm/plat-omap/clock.c b/arch/arm/plat-omap/clock.c index 7ce39b986e23..84fd65656fcf 100644 --- a/arch/arm/plat-omap/clock.c +++ b/arch/arm/plat-omap/clock.c @@ -19,10 +19,10 @@ #include #include #include +#include #include #include -#include #include diff --git a/arch/arm/plat-omap/common.c b/arch/arm/plat-omap/common.c index ccdb452630cf..adffc5a859ee 100644 --- a/arch/arm/plat-omap/common.c +++ b/arch/arm/plat-omap/common.c @@ -18,12 +18,12 @@ #include #include #include +#include #include #include #include #include -#include #include #include diff --git a/arch/arm/plat-omap/cpu-omap.c b/arch/arm/plat-omap/cpu-omap.c index fd894bb00107..98edc9fdd6d1 100644 --- a/arch/arm/plat-omap/cpu-omap.c +++ b/arch/arm/plat-omap/cpu-omap.c @@ -19,13 +19,12 @@ #include #include #include +#include #include #include #include -#include - /* TODO: Add support for SDRAM timing changes */ int omap_verify_speed(struct cpufreq_policy *policy) diff --git a/arch/arm/plat-omap/gpio.c b/arch/arm/plat-omap/gpio.c index 76f721d85137..ca3681a824ac 100644 --- a/arch/arm/plat-omap/gpio.c +++ b/arch/arm/plat-omap/gpio.c @@ -19,9 +19,9 @@ #include #include #include +#include #include -#include #include #include #include diff --git a/arch/arm/plat-omap/mcbsp.c b/arch/arm/plat-omap/mcbsp.c index ea9475c86656..be0e0f32a598 100644 --- a/arch/arm/plat-omap/mcbsp.c +++ b/arch/arm/plat-omap/mcbsp.c @@ -19,6 +19,7 @@ #include #include #include +#include #include #include @@ -30,8 +31,6 @@ #include #include -#include - #ifdef CONFIG_MCBSP_DEBUG #define DBG(x...) printk(x) #else diff --git a/arch/arm/plat-omap/ocpi.c b/arch/arm/plat-omap/ocpi.c index b86148227480..e40fcc8b43d4 100644 --- a/arch/arm/plat-omap/ocpi.c +++ b/arch/arm/plat-omap/ocpi.c @@ -31,9 +31,9 @@ #include #include #include +#include #include -#include #include #define OCPI_BASE 0xfffec320 diff --git a/drivers/char/s3c2410-rtc.c b/drivers/char/s3c2410-rtc.c index 3df7a574267b..2e308657f6f6 100644 --- a/drivers/char/s3c2410-rtc.c +++ b/drivers/char/s3c2410-rtc.c @@ -24,6 +24,7 @@ #include #include #include +#include #include #include @@ -33,7 +34,6 @@ #include -#include #include /* need this for the RTC_AF definitions */ diff --git a/drivers/char/watchdog/s3c2410_wdt.c b/drivers/char/watchdog/s3c2410_wdt.c index 621e8a99e733..9dc54736e4eb 100644 --- a/drivers/char/watchdog/s3c2410_wdt.c +++ b/drivers/char/watchdog/s3c2410_wdt.c @@ -46,12 +46,12 @@ #include #include #include +#include #include #include #include -#include #undef S3C24XX_VA_WATCHDOG #define S3C24XX_VA_WATCHDOG (0) diff --git a/drivers/i2c/busses/i2c-s3c2410.c b/drivers/i2c/busses/i2c-s3c2410.c index 2a2f86d8c2d8..f7d40f8e5f5c 100644 --- a/drivers/i2c/busses/i2c-s3c2410.c +++ b/drivers/i2c/busses/i2c-s3c2410.c @@ -34,12 +34,12 @@ #include #include #include +#include #include #include #include -#include #include #include #include diff --git a/drivers/input/serio/ambakmi.c b/drivers/input/serio/ambakmi.c index cbab5d26377b..3df5eedf8f31 100644 --- a/drivers/input/serio/ambakmi.c +++ b/drivers/input/serio/ambakmi.c @@ -21,10 +21,10 @@ #include #include #include +#include #include #include -#include #define KMI_BASE (kmi->base) diff --git a/drivers/mmc/mmci.c b/drivers/mmc/mmci.c index 57375bc12372..a0cd916ab792 100644 --- a/drivers/mmc/mmci.c +++ b/drivers/mmc/mmci.c @@ -20,12 +20,12 @@ #include #include #include +#include #include #include #include #include -#include #include #include "mmci.h" diff --git a/drivers/mtd/nand/s3c2410.c b/drivers/mtd/nand/s3c2410.c index b796a9a6b924..5b55599739f3 100644 --- a/drivers/mtd/nand/s3c2410.c +++ b/drivers/mtd/nand/s3c2410.c @@ -53,6 +53,7 @@ #include #include #include +#include #include #include @@ -60,7 +61,6 @@ #include #include -#include #include #include diff --git a/drivers/serial/amba-pl011.c b/drivers/serial/amba-pl011.c index 4ae4dff59795..129670556162 100644 --- a/drivers/serial/amba-pl011.c +++ b/drivers/serial/amba-pl011.c @@ -49,10 +49,10 @@ #include #include #include +#include #include #include -#include #define UART_NR 14 diff --git a/drivers/serial/s3c2410.c b/drivers/serial/s3c2410.c index eb47f5b71aeb..fe83ce6fef52 100644 --- a/drivers/serial/s3c2410.c +++ b/drivers/serial/s3c2410.c @@ -72,12 +72,12 @@ #include #include #include +#include #include #include #include -#include #include #include diff --git a/drivers/usb/host/ohci-omap.c b/drivers/usb/host/ohci-omap.c index c9e29d808711..3785b3f7df1b 100644 --- a/drivers/usb/host/ohci-omap.c +++ b/drivers/usb/host/ohci-omap.c @@ -17,6 +17,7 @@ #include /* SA_INTERRUPT */ #include #include +#include #include #include @@ -27,7 +28,6 @@ #include #include #include -#include /* OMAP-1510 OHCI has its own MMU for DMA */ diff --git a/drivers/usb/host/ohci-s3c2410.c b/drivers/usb/host/ohci-s3c2410.c index add198a4be79..372527a83593 100644 --- a/drivers/usb/host/ohci-s3c2410.c +++ b/drivers/usb/host/ohci-s3c2410.c @@ -20,9 +20,9 @@ */ #include +#include #include -#include #include #define valid_port(idx) ((idx) == 1 || (idx) == 2) diff --git a/drivers/video/amba-clcd.c b/drivers/video/amba-clcd.c index 3358a1429651..0da4083ba908 100644 --- a/drivers/video/amba-clcd.c +++ b/drivers/video/amba-clcd.c @@ -23,9 +23,9 @@ #include #include #include +#include #include -#include #define to_clcd(info) container_of(info, struct clcd_fb, fb) diff --git a/drivers/video/s3c2410fb.c b/drivers/video/s3c2410fb.c index d9c08cc7ac44..fe99d17a21d7 100644 --- a/drivers/video/s3c2410fb.c +++ b/drivers/video/s3c2410fb.c @@ -87,6 +87,7 @@ #include #include #include +#include #include #include @@ -96,7 +97,6 @@ #include #include #include -#include #ifdef CONFIG_PM #include diff --git a/include/asm-arm/arch-omap/system.h b/include/asm-arm/arch-omap/system.h index 9af415d2944a..6724a81bd10b 100644 --- a/include/asm-arm/arch-omap/system.h +++ b/include/asm-arm/arch-omap/system.h @@ -5,8 +5,9 @@ #ifndef __ASM_ARCH_SYSTEM_H #define __ASM_ARCH_SYSTEM_H #include +#include + #include -#include #include #include diff --git a/include/asm-arm/hardware/clock.h b/include/asm-arm/hardware/clock.h deleted file mode 100644 index 69f33215e437..000000000000 --- a/include/asm-arm/hardware/clock.h +++ /dev/null @@ -1,124 +0,0 @@ -/* - * linux/include/asm-arm/hardware/clock.h - * - * Copyright (C) 2004 ARM Limited. - * Written by Deep Blue Solutions Limited. - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License version 2 as - * published by the Free Software Foundation. - */ -#ifndef ASMARM_CLOCK_H -#define ASMARM_CLOCK_H - -struct device; - -/* - * The base API. - */ - - -/* - * struct clk - an machine class defined object / cookie. - */ -struct clk; - -/** - * clk_get - lookup and obtain a reference to a clock producer. - * @dev: device for clock "consumer" - * @id: clock comsumer ID - * - * Returns a struct clk corresponding to the clock producer, or - * valid IS_ERR() condition containing errno. The implementation - * uses @dev and @id to determine the clock consumer, and thereby - * the clock producer. (IOW, @id may be identical strings, but - * clk_get may return different clock producers depending on @dev.) - * - * Drivers must assume that the clock source is not enabled. - */ -struct clk *clk_get(struct device *dev, const char *id); - -/** - * clk_enable - inform the system when the clock source should be running. - * @clk: clock source - * - * If the clock can not be enabled/disabled, this should return success. - * - * Returns success (0) or negative errno. - */ -int clk_enable(struct clk *clk); - -/** - * clk_disable - inform the system when the clock source is no longer required. - * @clk: clock source - * - * Inform the system that a clock source is no longer required by - * a driver and may be shut down. - * - * Implementation detail: if the clock source is shared between - * multiple drivers, clk_enable() calls must be balanced by the - * same number of clk_disable() calls for the clock source to be - * disabled. - */ -void clk_disable(struct clk *clk); - -/** - * clk_get_rate - obtain the current clock rate (in Hz) for a clock source. - * This is only valid once the clock source has been enabled. - * @clk: clock source - */ -unsigned long clk_get_rate(struct clk *clk); - -/** - * clk_put - "free" the clock source - * @clk: clock source - * - * Note: drivers must ensure that all clk_enable calls made on this - * clock source are balanced by clk_disable calls prior to calling - * this function. - */ -void clk_put(struct clk *clk); - - -/* - * The remaining APIs are optional for machine class support. - */ - - -/** - * clk_round_rate - adjust a rate to the exact rate a clock can provide - * @clk: clock source - * @rate: desired clock rate in Hz - * - * Returns rounded clock rate in Hz, or negative errno. - */ -long clk_round_rate(struct clk *clk, unsigned long rate); - -/** - * clk_set_rate - set the clock rate for a clock source - * @clk: clock source - * @rate: desired clock rate in Hz - * - * Returns success (0) or negative errno. - */ -int clk_set_rate(struct clk *clk, unsigned long rate); - -/** - * clk_set_parent - set the parent clock source for this clock - * @clk: clock source - * @parent: parent clock source - * - * Returns success (0) or negative errno. - */ -int clk_set_parent(struct clk *clk, struct clk *parent); - -/** - * clk_get_parent - get the parent clock source for this clock - * @clk: clock source - * - * Returns struct clk corresponding to parent clock source, or - * valid IS_ERR() condition containing errno. - */ -struct clk *clk_get_parent(struct clk *clk); - -#endif diff --git a/include/linux/clk.h b/include/linux/clk.h new file mode 100644 index 000000000000..12848f81bb37 --- /dev/null +++ b/include/linux/clk.h @@ -0,0 +1,124 @@ +/* + * linux/include/linux/clk.h + * + * Copyright (C) 2004 ARM Limited. + * Written by Deep Blue Solutions Limited. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + */ +#ifndef ASMARM_CLOCK_H +#define ASMARM_CLOCK_H + +struct device; + +/* + * The base API. + */ + + +/* + * struct clk - an machine class defined object / cookie. + */ +struct clk; + +/** + * clk_get - lookup and obtain a reference to a clock producer. + * @dev: device for clock "consumer" + * @id: clock comsumer ID + * + * Returns a struct clk corresponding to the clock producer, or + * valid IS_ERR() condition containing errno. The implementation + * uses @dev and @id to determine the clock consumer, and thereby + * the clock producer. (IOW, @id may be identical strings, but + * clk_get may return different clock producers depending on @dev.) + * + * Drivers must assume that the clock source is not enabled. + */ +struct clk *clk_get(struct device *dev, const char *id); + +/** + * clk_enable - inform the system when the clock source should be running. + * @clk: clock source + * + * If the clock can not be enabled/disabled, this should return success. + * + * Returns success (0) or negative errno. + */ +int clk_enable(struct clk *clk); + +/** + * clk_disable - inform the system when the clock source is no longer required. + * @clk: clock source + * + * Inform the system that a clock source is no longer required by + * a driver and may be shut down. + * + * Implementation detail: if the clock source is shared between + * multiple drivers, clk_enable() calls must be balanced by the + * same number of clk_disable() calls for the clock source to be + * disabled. + */ +void clk_disable(struct clk *clk); + +/** + * clk_get_rate - obtain the current clock rate (in Hz) for a clock source. + * This is only valid once the clock source has been enabled. + * @clk: clock source + */ +unsigned long clk_get_rate(struct clk *clk); + +/** + * clk_put - "free" the clock source + * @clk: clock source + * + * Note: drivers must ensure that all clk_enable calls made on this + * clock source are balanced by clk_disable calls prior to calling + * this function. + */ +void clk_put(struct clk *clk); + + +/* + * The remaining APIs are optional for machine class support. + */ + + +/** + * clk_round_rate - adjust a rate to the exact rate a clock can provide + * @clk: clock source + * @rate: desired clock rate in Hz + * + * Returns rounded clock rate in Hz, or negative errno. + */ +long clk_round_rate(struct clk *clk, unsigned long rate); + +/** + * clk_set_rate - set the clock rate for a clock source + * @clk: clock source + * @rate: desired clock rate in Hz + * + * Returns success (0) or negative errno. + */ +int clk_set_rate(struct clk *clk, unsigned long rate); + +/** + * clk_set_parent - set the parent clock source for this clock + * @clk: clock source + * @parent: parent clock source + * + * Returns success (0) or negative errno. + */ +int clk_set_parent(struct clk *clk, struct clk *parent); + +/** + * clk_get_parent - get the parent clock source for this clock + * @clk: clock source + * + * Returns struct clk corresponding to parent clock source, or + * valid IS_ERR() condition containing errno. + */ +struct clk *clk_get_parent(struct clk *clk); + +#endif -- cgit v1.2.3-71-gd317 From 16a6677fdf1d1194f688f8291b06fbaff248c353 Mon Sep 17 00:00:00 2001 From: Patrick McHardy Date: Fri, 6 Jan 2006 23:01:48 -0800 Subject: [XFRM]: Netfilter IPsec output hooks Call netfilter hooks before IPsec transforms. Packets visit the FORWARD/LOCAL_OUT and POST_ROUTING hook before the first encapsulation and the LOCAL_OUT and POST_ROUTING hook before each following tunnel mode transform. Patch from Herbert Xu : Move the loop from dst_output into xfrm4_output/xfrm6_output since they're the only ones who need to it. xfrm{4,6}_output_one() processes the first SA all subsequent transport mode SAs and is called in a loop that calls the netfilter hooks between each two calls. In order to avoid the tail call issue, I've added the inline function nf_hook which is nf_hook_slow plus the empty list check. Signed-off-by: Patrick McHardy Signed-off-by: David S. Miller --- include/linux/netfilter.h | 61 +++++++++++++++++++++++--------------- include/net/dst.h | 11 +------ net/ipv4/xfrm4_output.c | 71 +++++++++++++++++++++++++++++++++----------- net/ipv6/xfrm6_output.c | 75 +++++++++++++++++++++++++++++++++++------------ 4 files changed, 148 insertions(+), 70 deletions(-) (limited to 'include/linux') diff --git a/include/linux/netfilter.h b/include/linux/netfilter.h index be365e70ee99..79bb977afeac 100644 --- a/include/linux/netfilter.h +++ b/include/linux/netfilter.h @@ -168,6 +168,37 @@ void nf_log_packet(int pf, const struct net_device *out, struct nf_loginfo *li, const char *fmt, ...); + +int nf_hook_slow(int pf, unsigned int hook, struct sk_buff **pskb, + struct net_device *indev, struct net_device *outdev, + int (*okfn)(struct sk_buff *), int thresh); + +/** + * nf_hook_thresh - call a netfilter hook + * + * Returns 1 if the hook has allowed the packet to pass. The function + * okfn must be invoked by the caller in this case. Any other return + * value indicates the packet has been consumed by the hook. + */ +static inline int nf_hook_thresh(int pf, unsigned int hook, + struct sk_buff **pskb, + struct net_device *indev, + struct net_device *outdev, + int (*okfn)(struct sk_buff *), int thresh) +{ +#ifndef CONFIG_NETFILTER_DEBUG + if (list_empty(&nf_hooks[pf][hook])) + return 1; +#endif + return nf_hook_slow(pf, hook, pskb, indev, outdev, okfn, thresh); +} + +static inline int nf_hook(int pf, unsigned int hook, struct sk_buff **pskb, + struct net_device *indev, struct net_device *outdev, + int (*okfn)(struct sk_buff *)) +{ + return nf_hook_thresh(pf, hook, pskb, indev, outdev, okfn, INT_MIN); +} /* Activate hook; either okfn or kfree_skb called, unless a hook returns NF_STOLEN (in which case, it's up to the hook to deal with @@ -188,35 +219,17 @@ void nf_log_packet(int pf, /* This is gross, but inline doesn't cut it for avoiding the function call in fast path: gcc doesn't inline (needs value tracking?). --RR */ -#ifdef CONFIG_NETFILTER_DEBUG -#define NF_HOOK(pf, hook, skb, indev, outdev, okfn) \ -({int __ret; \ -if ((__ret=nf_hook_slow(pf, hook, &(skb), indev, outdev, okfn, INT_MIN)) == 1) \ - __ret = (okfn)(skb); \ -__ret;}) -#define NF_HOOK_THRESH(pf, hook, skb, indev, outdev, okfn, thresh) \ -({int __ret; \ -if ((__ret=nf_hook_slow(pf, hook, &(skb), indev, outdev, okfn, thresh)) == 1) \ - __ret = (okfn)(skb); \ -__ret;}) -#else -#define NF_HOOK(pf, hook, skb, indev, outdev, okfn) \ -({int __ret; \ -if (list_empty(&nf_hooks[pf][hook]) || \ - (__ret=nf_hook_slow(pf, hook, &(skb), indev, outdev, okfn, INT_MIN)) == 1) \ - __ret = (okfn)(skb); \ -__ret;}) + +/* HX: It's slightly less gross now. */ + #define NF_HOOK_THRESH(pf, hook, skb, indev, outdev, okfn, thresh) \ ({int __ret; \ -if (list_empty(&nf_hooks[pf][hook]) || \ - (__ret=nf_hook_slow(pf, hook, &(skb), indev, outdev, okfn, thresh)) == 1) \ +if ((__ret=nf_hook_thresh(pf, hook, &(skb), indev, outdev, okfn, thresh)) == 1)\ __ret = (okfn)(skb); \ __ret;}) -#endif -int nf_hook_slow(int pf, unsigned int hook, struct sk_buff **pskb, - struct net_device *indev, struct net_device *outdev, - int (*okfn)(struct sk_buff *), int thresh); +#define NF_HOOK(pf, hook, skb, indev, outdev, okfn) \ + NF_HOOK_THRESH(pf, hook, skb, indev, outdev, okfn, INT_MIN) /* Call setsockopt() */ int nf_setsockopt(struct sock *sk, int pf, int optval, char __user *opt, diff --git a/include/net/dst.h b/include/net/dst.h index bee8b84d329d..5161e89017f9 100644 --- a/include/net/dst.h +++ b/include/net/dst.h @@ -225,16 +225,7 @@ static inline void dst_set_expires(struct dst_entry *dst, int timeout) /* Output packet to network from transport. */ static inline int dst_output(struct sk_buff *skb) { - int err; - - for (;;) { - err = skb->dst->output(skb); - - if (likely(err == 0)) - return err; - if (unlikely(err != NET_XMIT_BYPASS)) - return err; - } + return skb->dst->output(skb); } /* Input packet from network to transport. */ diff --git a/net/ipv4/xfrm4_output.c b/net/ipv4/xfrm4_output.c index 66620a95942a..51fabb8f7c54 100644 --- a/net/ipv4/xfrm4_output.c +++ b/net/ipv4/xfrm4_output.c @@ -8,8 +8,10 @@ * 2 of the License, or (at your option) any later version. */ +#include #include #include +#include #include #include #include @@ -95,7 +97,7 @@ out: return ret; } -int xfrm4_output(struct sk_buff *skb) +static int xfrm4_output_one(struct sk_buff *skb) { struct dst_entry *dst = skb->dst; struct xfrm_state *x = dst->xfrm; @@ -113,27 +115,32 @@ int xfrm4_output(struct sk_buff *skb) goto error_nolock; } - spin_lock_bh(&x->lock); - err = xfrm_state_check(x, skb); - if (err) - goto error; + do { + spin_lock_bh(&x->lock); + err = xfrm_state_check(x, skb); + if (err) + goto error; - xfrm4_encap(skb); + xfrm4_encap(skb); - err = x->type->output(x, skb); - if (err) - goto error; + err = x->type->output(x, skb); + if (err) + goto error; - x->curlft.bytes += skb->len; - x->curlft.packets++; + x->curlft.bytes += skb->len; + x->curlft.packets++; - spin_unlock_bh(&x->lock); + spin_unlock_bh(&x->lock); - if (!(skb->dst = dst_pop(dst))) { - err = -EHOSTUNREACH; - goto error_nolock; - } - err = NET_XMIT_BYPASS; + if (!(skb->dst = dst_pop(dst))) { + err = -EHOSTUNREACH; + goto error_nolock; + } + dst = skb->dst; + x = dst->xfrm; + } while (x && !x->props.mode); + + err = 0; out_exit: return err; @@ -143,3 +150,33 @@ error_nolock: kfree_skb(skb); goto out_exit; } + +static int xfrm4_output_finish(struct sk_buff *skb) +{ + int err; + + while (likely((err = xfrm4_output_one(skb)) == 0)) { + nf_reset(skb); + + err = nf_hook(PF_INET, NF_IP_LOCAL_OUT, &skb, NULL, + skb->dst->dev, dst_output); + if (unlikely(err != 1)) + break; + + if (!skb->dst->xfrm) + return dst_output(skb); + + err = nf_hook(PF_INET, NF_IP_POST_ROUTING, &skb, NULL, + skb->dst->dev, xfrm4_output_finish); + if (unlikely(err != 1)) + break; + } + + return err; +} + +int xfrm4_output(struct sk_buff *skb) +{ + return NF_HOOK(PF_INET, NF_IP_POST_ROUTING, skb, NULL, skb->dst->dev, + xfrm4_output_finish); +} diff --git a/net/ipv6/xfrm6_output.c b/net/ipv6/xfrm6_output.c index 6b9867717d11..fc0ea38953c4 100644 --- a/net/ipv6/xfrm6_output.c +++ b/net/ipv6/xfrm6_output.c @@ -9,9 +9,11 @@ * 2 of the License, or (at your option) any later version. */ +#include #include #include #include +#include #include #include #include @@ -92,7 +94,7 @@ static int xfrm6_tunnel_check_size(struct sk_buff *skb) return ret; } -int xfrm6_output(struct sk_buff *skb) +static int xfrm6_output_one(struct sk_buff *skb) { struct dst_entry *dst = skb->dst; struct xfrm_state *x = dst->xfrm; @@ -110,29 +112,34 @@ int xfrm6_output(struct sk_buff *skb) goto error_nolock; } - spin_lock_bh(&x->lock); - err = xfrm_state_check(x, skb); - if (err) - goto error; + do { + spin_lock_bh(&x->lock); + err = xfrm_state_check(x, skb); + if (err) + goto error; - xfrm6_encap(skb); + xfrm6_encap(skb); - err = x->type->output(x, skb); - if (err) - goto error; + err = x->type->output(x, skb); + if (err) + goto error; - x->curlft.bytes += skb->len; - x->curlft.packets++; + x->curlft.bytes += skb->len; + x->curlft.packets++; - spin_unlock_bh(&x->lock); + spin_unlock_bh(&x->lock); - skb->nh.raw = skb->data; - - if (!(skb->dst = dst_pop(dst))) { - err = -EHOSTUNREACH; - goto error_nolock; - } - err = NET_XMIT_BYPASS; + skb->nh.raw = skb->data; + + if (!(skb->dst = dst_pop(dst))) { + err = -EHOSTUNREACH; + goto error_nolock; + } + dst = skb->dst; + x = dst->xfrm; + } while (x && !x->props.mode); + + err = 0; out_exit: return err; @@ -142,3 +149,33 @@ error_nolock: kfree_skb(skb); goto out_exit; } + +static int xfrm6_output_finish(struct sk_buff *skb) +{ + int err; + + while (likely((err = xfrm6_output_one(skb)) == 0)) { + nf_reset(skb); + + err = nf_hook(PF_INET6, NF_IP6_LOCAL_OUT, &skb, NULL, + skb->dst->dev, dst_output); + if (unlikely(err != 1)) + break; + + if (!skb->dst->xfrm) + return dst_output(skb); + + err = nf_hook(PF_INET6, NF_IP6_POST_ROUTING, &skb, NULL, + skb->dst->dev, xfrm6_output_finish); + if (unlikely(err != 1)) + break; + } + + return err; +} + +int xfrm6_output(struct sk_buff *skb) +{ + return NF_HOOK(PF_INET6, NF_IP6_POST_ROUTING, skb, NULL, skb->dst->dev, + xfrm6_output_finish); +} -- cgit v1.2.3-71-gd317 From 951dbc8ac714b04c36296b8b5c36c8e036ce433f Mon Sep 17 00:00:00 2001 From: Patrick McHardy Date: Fri, 6 Jan 2006 23:02:34 -0800 Subject: [IPV6]: Move nextheader offset to the IP6CB Move nextheader offset to the IP6CB to make it possible to pass a packet to ip6_input_finish multiple times and have it skip already parsed headers. As a nice side effect this gets rid of the manual hopopts skipping in ip6_input_finish. Signed-off-by: Patrick McHardy Signed-off-by: David S. Miller --- include/linux/ipv6.h | 1 + include/net/protocol.h | 2 +- include/net/xfrm.h | 6 +++--- net/dccp/ipv6.c | 2 +- net/ipv6/exthdrs.c | 19 ++++++++++++------- net/ipv6/icmp.c | 4 ++-- net/ipv6/ip6_input.c | 21 ++++++--------------- net/ipv6/ip6_tunnel.c | 2 +- net/ipv6/reassembly.c | 11 +++++------ net/ipv6/tcp_ipv6.c | 2 +- net/ipv6/udp.c | 2 +- net/ipv6/xfrm6_input.c | 8 ++++---- net/ipv6/xfrm6_tunnel.c | 6 +++--- net/sctp/ipv6.c | 2 +- 14 files changed, 42 insertions(+), 46 deletions(-) (limited to 'include/linux') diff --git a/include/linux/ipv6.h b/include/linux/ipv6.h index 93bbed5c6cf4..5cfc71529595 100644 --- a/include/linux/ipv6.h +++ b/include/linux/ipv6.h @@ -191,6 +191,7 @@ struct inet6_skb_parm { __u16 srcrt; __u16 dst1; __u16 lastopt; + __u32 nhoff; }; #define IP6CB(skb) ((struct inet6_skb_parm*)((skb)->cb)) diff --git a/include/net/protocol.h b/include/net/protocol.h index 63f7db99c2a6..6dc5970612d7 100644 --- a/include/net/protocol.h +++ b/include/net/protocol.h @@ -43,7 +43,7 @@ struct net_protocol { #if defined(CONFIG_IPV6) || defined (CONFIG_IPV6_MODULE) struct inet6_protocol { - int (*handler)(struct sk_buff **skb, unsigned int *nhoffp); + int (*handler)(struct sk_buff **skb); void (*err_handler)(struct sk_buff *skb, struct inet6_skb_parm *opt, diff --git a/include/net/xfrm.h b/include/net/xfrm.h index 07d7b50cdd76..297d09d28fe4 100644 --- a/include/net/xfrm.h +++ b/include/net/xfrm.h @@ -831,7 +831,7 @@ struct xfrm_tunnel { }; struct xfrm6_tunnel { - int (*handler)(struct sk_buff **pskb, unsigned int *nhoffp); + int (*handler)(struct sk_buff **pskb); void (*err_handler)(struct sk_buff *skb, struct inet6_skb_parm *opt, int type, int code, int offset, __u32 info); }; @@ -868,8 +868,8 @@ extern int xfrm4_rcv(struct sk_buff *skb); extern int xfrm4_output(struct sk_buff *skb); extern int xfrm4_tunnel_register(struct xfrm_tunnel *handler); extern int xfrm4_tunnel_deregister(struct xfrm_tunnel *handler); -extern int xfrm6_rcv_spi(struct sk_buff **pskb, unsigned int *nhoffp, u32 spi); -extern int xfrm6_rcv(struct sk_buff **pskb, unsigned int *nhoffp); +extern int xfrm6_rcv_spi(struct sk_buff **pskb, u32 spi); +extern int xfrm6_rcv(struct sk_buff **pskb); extern int xfrm6_tunnel_register(struct xfrm6_tunnel *handler); extern int xfrm6_tunnel_deregister(struct xfrm6_tunnel *handler); extern u32 xfrm6_tunnel_alloc_spi(xfrm_address_t *saddr); diff --git a/net/dccp/ipv6.c b/net/dccp/ipv6.c index 683250a05f58..df074259f9c3 100644 --- a/net/dccp/ipv6.c +++ b/net/dccp/ipv6.c @@ -1029,7 +1029,7 @@ discard: return 0; } -static int dccp_v6_rcv(struct sk_buff **pskb, unsigned int *nhoffp) +static int dccp_v6_rcv(struct sk_buff **pskb) { const struct dccp_hdr *dh; struct sk_buff *skb = *pskb; diff --git a/net/ipv6/exthdrs.c b/net/ipv6/exthdrs.c index 113374dc342c..2a1e7e45b890 100644 --- a/net/ipv6/exthdrs.c +++ b/net/ipv6/exthdrs.c @@ -152,7 +152,7 @@ static struct tlvtype_proc tlvprocdestopt_lst[] = { {-1, NULL} }; -static int ipv6_destopt_rcv(struct sk_buff **skbp, unsigned int *nhoffp) +static int ipv6_destopt_rcv(struct sk_buff **skbp) { struct sk_buff *skb = *skbp; struct inet6_skb_parm *opt = IP6CB(skb); @@ -169,7 +169,7 @@ static int ipv6_destopt_rcv(struct sk_buff **skbp, unsigned int *nhoffp) if (ip6_parse_tlv(tlvprocdestopt_lst, skb)) { skb->h.raw += ((skb->h.raw[1]+1)<<3); - *nhoffp = opt->dst1; + opt->nhoff = opt->dst1; return 1; } @@ -192,7 +192,7 @@ void __init ipv6_destopt_init(void) NONE header. No data in packet. ********************************/ -static int ipv6_nodata_rcv(struct sk_buff **skbp, unsigned int *nhoffp) +static int ipv6_nodata_rcv(struct sk_buff **skbp) { struct sk_buff *skb = *skbp; @@ -215,7 +215,7 @@ void __init ipv6_nodata_init(void) Routing header. ********************************/ -static int ipv6_rthdr_rcv(struct sk_buff **skbp, unsigned int *nhoffp) +static int ipv6_rthdr_rcv(struct sk_buff **skbp) { struct sk_buff *skb = *skbp; struct inet6_skb_parm *opt = IP6CB(skb); @@ -249,7 +249,7 @@ looped_back: skb->h.raw += (hdr->hdrlen + 1) << 3; opt->dst0 = opt->dst1; opt->dst1 = 0; - *nhoffp = (&hdr->nexthdr) - skb->nh.raw; + opt->nhoff = (&hdr->nexthdr) - skb->nh.raw; return 1; } @@ -487,9 +487,14 @@ static struct tlvtype_proc tlvprochopopt_lst[] = { int ipv6_parse_hopopts(struct sk_buff *skb, int nhoff) { - IP6CB(skb)->hop = sizeof(struct ipv6hdr); - if (ip6_parse_tlv(tlvprochopopt_lst, skb)) + struct inet6_skb_parm *opt = IP6CB(skb); + + opt->hop = sizeof(struct ipv6hdr); + if (ip6_parse_tlv(tlvprochopopt_lst, skb)) { + skb->h.raw += (skb->h.raw[1]+1)<<3; + opt->nhoff = sizeof(struct ipv6hdr); return sizeof(struct ipv6hdr); + } return -1; } diff --git a/net/ipv6/icmp.c b/net/ipv6/icmp.c index 6ec6a2b549bb..53c81fcd20ba 100644 --- a/net/ipv6/icmp.c +++ b/net/ipv6/icmp.c @@ -79,7 +79,7 @@ DEFINE_SNMP_STAT(struct icmpv6_mib, icmpv6_statistics) __read_mostly; static DEFINE_PER_CPU(struct socket *, __icmpv6_socket) = NULL; #define icmpv6_socket __get_cpu_var(__icmpv6_socket) -static int icmpv6_rcv(struct sk_buff **pskb, unsigned int *nhoffp); +static int icmpv6_rcv(struct sk_buff **pskb); static struct inet6_protocol icmpv6_protocol = { .handler = icmpv6_rcv, @@ -581,7 +581,7 @@ static void icmpv6_notify(struct sk_buff *skb, int type, int code, u32 info) * Handle icmp messages */ -static int icmpv6_rcv(struct sk_buff **pskb, unsigned int *nhoffp) +static int icmpv6_rcv(struct sk_buff **pskb) { struct sk_buff *skb = *pskb; struct net_device *dev = skb->dev; diff --git a/net/ipv6/ip6_input.c b/net/ipv6/ip6_input.c index a6026d2787d2..13d724150f33 100644 --- a/net/ipv6/ip6_input.c +++ b/net/ipv6/ip6_input.c @@ -97,6 +97,9 @@ int ipv6_rcv(struct sk_buff *skb, struct net_device *dev, struct packet_type *pt if (hdr->version != 6) goto err; + skb->h.raw = (u8 *)(hdr + 1); + IP6CB(skb)->nhoff = offsetof(struct ipv6hdr, nexthdr); + pkt_len = ntohs(hdr->payload_len); /* pkt_len may be zero if Jumbo payload option is present */ @@ -111,8 +114,7 @@ int ipv6_rcv(struct sk_buff *skb, struct net_device *dev, struct packet_type *pt } if (hdr->nexthdr == NEXTHDR_HOP) { - skb->h.raw = (u8*)(hdr+1); - if (ipv6_parse_hopopts(skb, offsetof(struct ipv6hdr, nexthdr)) < 0) { + if (ipv6_parse_hopopts(skb, IP6CB(skb)->nhoff) < 0) { IP6_INC_STATS_BH(IPSTATS_MIB_INHDRERRORS); return 0; } @@ -143,26 +145,15 @@ static inline int ip6_input_finish(struct sk_buff *skb) int nexthdr; u8 hash; - skb->h.raw = skb->nh.raw + sizeof(struct ipv6hdr); - /* * Parse extension headers */ - nexthdr = skb->nh.ipv6h->nexthdr; - nhoff = offsetof(struct ipv6hdr, nexthdr); - - /* Skip hop-by-hop options, they are already parsed. */ - if (nexthdr == NEXTHDR_HOP) { - nhoff = sizeof(struct ipv6hdr); - nexthdr = skb->h.raw[0]; - skb->h.raw += (skb->h.raw[1]+1)<<3; - } - rcu_read_lock(); resubmit: if (!pskb_pull(skb, skb->h.raw - skb->data)) goto discard; + nhoff = IP6CB(skb)->nhoff; nexthdr = skb->nh.raw[nhoff]; raw_sk = sk_head(&raw_v6_htable[nexthdr & (MAX_INET_PROTOS - 1)]); @@ -194,7 +185,7 @@ resubmit: !xfrm6_policy_check(NULL, XFRM_POLICY_IN, skb)) goto discard; - ret = ipprot->handler(&skb, &nhoff); + ret = ipprot->handler(&skb); if (ret > 0) goto resubmit; else if (ret == 0) diff --git a/net/ipv6/ip6_tunnel.c b/net/ipv6/ip6_tunnel.c index e315d0f80af1..f079621c8b67 100644 --- a/net/ipv6/ip6_tunnel.c +++ b/net/ipv6/ip6_tunnel.c @@ -510,7 +510,7 @@ static inline void ip6ip6_ecn_decapsulate(struct ipv6hdr *outer_iph, **/ static int -ip6ip6_rcv(struct sk_buff **pskb, unsigned int *nhoffp) +ip6ip6_rcv(struct sk_buff **pskb) { struct sk_buff *skb = *pskb; struct ipv6hdr *ipv6h; diff --git a/net/ipv6/reassembly.c b/net/ipv6/reassembly.c index 5d316cb72ec9..15e1456b3f18 100644 --- a/net/ipv6/reassembly.c +++ b/net/ipv6/reassembly.c @@ -581,7 +581,6 @@ err: * the last and the first frames arrived and all the bits are here. */ static int ip6_frag_reasm(struct frag_queue *fq, struct sk_buff **skb_in, - unsigned int *nhoffp, struct net_device *dev) { struct sk_buff *fp, *head = fq->fragments; @@ -654,6 +653,7 @@ static int ip6_frag_reasm(struct frag_queue *fq, struct sk_buff **skb_in, head->dev = dev; skb_set_timestamp(head, &fq->stamp); head->nh.ipv6h->payload_len = htons(payload_len); + IP6CB(head)->nhoff = nhoff; *skb_in = head; @@ -663,7 +663,6 @@ static int ip6_frag_reasm(struct frag_queue *fq, struct sk_buff **skb_in, IP6_INC_STATS_BH(IPSTATS_MIB_REASMOKS); fq->fragments = NULL; - *nhoffp = nhoff; return 1; out_oversize: @@ -678,7 +677,7 @@ out_fail: return -1; } -static int ipv6_frag_rcv(struct sk_buff **skbp, unsigned int *nhoffp) +static int ipv6_frag_rcv(struct sk_buff **skbp) { struct sk_buff *skb = *skbp; struct net_device *dev = skb->dev; @@ -710,7 +709,7 @@ static int ipv6_frag_rcv(struct sk_buff **skbp, unsigned int *nhoffp) skb->h.raw += sizeof(struct frag_hdr); IP6_INC_STATS_BH(IPSTATS_MIB_REASMOKS); - *nhoffp = (u8*)fhdr - skb->nh.raw; + IP6CB(skb)->nhoff = (u8*)fhdr - skb->nh.raw; return 1; } @@ -722,11 +721,11 @@ static int ipv6_frag_rcv(struct sk_buff **skbp, unsigned int *nhoffp) spin_lock(&fq->lock); - ip6_frag_queue(fq, skb, fhdr, *nhoffp); + ip6_frag_queue(fq, skb, fhdr, IP6CB(skb)->nhoff); if (fq->last_in == (FIRST_IN|LAST_IN) && fq->meat == fq->len) - ret = ip6_frag_reasm(fq, skbp, nhoffp, dev); + ret = ip6_frag_reasm(fq, skbp, dev); spin_unlock(&fq->lock); fq_put(fq, NULL); diff --git a/net/ipv6/tcp_ipv6.c b/net/ipv6/tcp_ipv6.c index 2947bc56d8a0..a25f4e8a8ada 100644 --- a/net/ipv6/tcp_ipv6.c +++ b/net/ipv6/tcp_ipv6.c @@ -1153,7 +1153,7 @@ ipv6_pktoptions: return 0; } -static int tcp_v6_rcv(struct sk_buff **pskb, unsigned int *nhoffp) +static int tcp_v6_rcv(struct sk_buff **pskb) { struct sk_buff *skb = *pskb; struct tcphdr *th; diff --git a/net/ipv6/udp.c b/net/ipv6/udp.c index d8538dcea813..c47648892c04 100644 --- a/net/ipv6/udp.c +++ b/net/ipv6/udp.c @@ -435,7 +435,7 @@ out: read_unlock(&udp_hash_lock); } -static int udpv6_rcv(struct sk_buff **pskb, unsigned int *nhoffp) +static int udpv6_rcv(struct sk_buff **pskb) { struct sk_buff *skb = *pskb; struct sock *sk; diff --git a/net/ipv6/xfrm6_input.c b/net/ipv6/xfrm6_input.c index 28c29d78338e..1079e47f3933 100644 --- a/net/ipv6/xfrm6_input.c +++ b/net/ipv6/xfrm6_input.c @@ -26,7 +26,7 @@ static inline void ipip6_ecn_decapsulate(struct sk_buff *skb) IP6_ECN_set_ce(inner_iph); } -int xfrm6_rcv_spi(struct sk_buff **pskb, unsigned int *nhoffp, u32 spi) +int xfrm6_rcv_spi(struct sk_buff **pskb, u32 spi) { struct sk_buff *skb = *pskb; int err; @@ -38,7 +38,7 @@ int xfrm6_rcv_spi(struct sk_buff **pskb, unsigned int *nhoffp, u32 spi) int nexthdr; unsigned int nhoff; - nhoff = *nhoffp; + nhoff = IP6CB(skb)->nhoff; nexthdr = skb->nh.raw[nhoff]; seq = 0; @@ -144,7 +144,7 @@ drop: EXPORT_SYMBOL(xfrm6_rcv_spi); -int xfrm6_rcv(struct sk_buff **pskb, unsigned int *nhoffp) +int xfrm6_rcv(struct sk_buff **pskb) { - return xfrm6_rcv_spi(pskb, nhoffp, 0); + return xfrm6_rcv_spi(pskb, 0); } diff --git a/net/ipv6/xfrm6_tunnel.c b/net/ipv6/xfrm6_tunnel.c index fbef7826a74f..da09ff258648 100644 --- a/net/ipv6/xfrm6_tunnel.c +++ b/net/ipv6/xfrm6_tunnel.c @@ -397,7 +397,7 @@ int xfrm6_tunnel_deregister(struct xfrm6_tunnel *handler) EXPORT_SYMBOL(xfrm6_tunnel_deregister); -static int xfrm6_tunnel_rcv(struct sk_buff **pskb, unsigned int *nhoffp) +static int xfrm6_tunnel_rcv(struct sk_buff **pskb) { struct sk_buff *skb = *pskb; struct xfrm6_tunnel *handler = xfrm6_tunnel_handler; @@ -405,11 +405,11 @@ static int xfrm6_tunnel_rcv(struct sk_buff **pskb, unsigned int *nhoffp) u32 spi; /* device-like_ip6ip6_handler() */ - if (handler && handler->handler(pskb, nhoffp) == 0) + if (handler && handler->handler(pskb) == 0) return 0; spi = xfrm6_tunnel_spi_lookup((xfrm_address_t *)&iph->saddr); - return xfrm6_rcv_spi(pskb, nhoffp, spi); + return xfrm6_rcv_spi(pskb, spi); } static void xfrm6_tunnel_err(struct sk_buff *skb, struct inet6_skb_parm *opt, diff --git a/net/sctp/ipv6.c b/net/sctp/ipv6.c index 15c05165c905..04c7fab4edc4 100644 --- a/net/sctp/ipv6.c +++ b/net/sctp/ipv6.c @@ -905,7 +905,7 @@ static struct inet_protosw sctpv6_stream_protosw = { .flags = SCTP_PROTOSW_FLAG, }; -static int sctp6_rcv(struct sk_buff **pskb, unsigned int *nhoffp) +static int sctp6_rcv(struct sk_buff **pskb) { return sctp_rcv(*pskb) ? -1 : 0; } -- cgit v1.2.3-71-gd317 From 3e3850e989c5d2eb1aab6f0fd9257759f0f4cbc6 Mon Sep 17 00:00:00 2001 From: Patrick McHardy Date: Fri, 6 Jan 2006 23:04:54 -0800 Subject: [NETFILTER]: Fix xfrm lookup in ip_route_me_harder/ip6_route_me_harder ip_route_me_harder doesn't use the port numbers of the xfrm lookup and uses ip_route_input for non-local addresses which doesn't do a xfrm lookup, ip6_route_me_harder doesn't do a xfrm lookup at all. Use xfrm_decode_session and do the lookup manually, make sure both only do the lookup if the packet hasn't been transformed already. Makeing sure the lookup only happens once needs a new field in the IP6CB, which exceeds the size of skb->cb. The size of skb->cb is increased to 48b. Apparently the IPv6 mobile extensions need some more room anyway. Signed-off-by: Patrick McHardy Signed-off-by: David S. Miller --- include/linux/ipv6.h | 3 +++ include/linux/skbuff.h | 2 +- include/net/ip.h | 3 ++- include/net/xfrm.h | 2 +- net/ipv4/ip_gre.c | 2 +- net/ipv4/ipip.c | 2 +- net/ipv4/netfilter.c | 12 ++++++++++-- net/ipv4/xfrm4_output.c | 1 + net/ipv6/netfilter.c | 9 ++++++++- net/ipv6/xfrm6_output.c | 1 + net/xfrm/xfrm_policy.c | 9 +++++---- 11 files changed, 34 insertions(+), 12 deletions(-) (limited to 'include/linux') diff --git a/include/linux/ipv6.h b/include/linux/ipv6.h index 5cfc71529595..9c8f4c9ed429 100644 --- a/include/linux/ipv6.h +++ b/include/linux/ipv6.h @@ -192,6 +192,9 @@ struct inet6_skb_parm { __u16 dst1; __u16 lastopt; __u32 nhoff; + __u16 flags; + +#define IP6SKB_XFRM_TRANSFORMED 1 }; #define IP6CB(skb) ((struct inet6_skb_parm*)((skb)->cb)) diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h index 483cfc47ec34..e5fd66c5650b 100644 --- a/include/linux/skbuff.h +++ b/include/linux/skbuff.h @@ -251,7 +251,7 @@ struct sk_buff { * want to keep them across layers you have to do a skb_clone() * first. This is owned by whoever has the skb queued ATM. */ - char cb[40]; + char cb[48]; unsigned int len, data_len, diff --git a/include/net/ip.h b/include/net/ip.h index 52f4d9c69704..a494d04e5dea 100644 --- a/include/net/ip.h +++ b/include/net/ip.h @@ -39,7 +39,8 @@ struct inet_skb_parm #define IPSKB_FORWARDED 1 #define IPSKB_XFRM_TUNNEL_SIZE 2 -#define IPSKB_FRAG_COMPLETE 4 +#define IPSKB_XFRM_TRANSFORMED 4 +#define IPSKB_FRAG_COMPLETE 8 }; struct ipcm_cookie diff --git a/include/net/xfrm.h b/include/net/xfrm.h index 297d09d28fe4..d6111a2f0a23 100644 --- a/include/net/xfrm.h +++ b/include/net/xfrm.h @@ -668,7 +668,7 @@ static inline int xfrm6_policy_check(struct sock *sk, int dir, struct sk_buff *s return xfrm_policy_check(sk, dir, skb, AF_INET6); } - +extern int xfrm_decode_session(struct sk_buff *skb, struct flowi *fl, unsigned short family); extern int __xfrm_route_forward(struct sk_buff *skb, unsigned short family); static inline int xfrm_route_forward(struct sk_buff *skb, unsigned short family) diff --git a/net/ipv4/ip_gre.c b/net/ipv4/ip_gre.c index 65c3a91ed85e..de16e944777f 100644 --- a/net/ipv4/ip_gre.c +++ b/net/ipv4/ip_gre.c @@ -832,7 +832,7 @@ static int ipgre_tunnel_xmit(struct sk_buff *skb, struct net_device *dev) skb->h.raw = skb->nh.raw; skb->nh.raw = skb_push(skb, gre_hlen); memset(&(IPCB(skb)->opt), 0, sizeof(IPCB(skb)->opt)); - IPCB(skb)->flags &= ~IPSKB_XFRM_TUNNEL_SIZE; + IPCB(skb)->flags &= ~(IPSKB_XFRM_TUNNEL_SIZE|IPSKB_XFRM_TRANSFORMED); dst_release(skb->dst); skb->dst = &rt->u.dst; diff --git a/net/ipv4/ipip.c b/net/ipv4/ipip.c index 078b59be91f4..bbd85f5ec985 100644 --- a/net/ipv4/ipip.c +++ b/net/ipv4/ipip.c @@ -621,7 +621,7 @@ static int ipip_tunnel_xmit(struct sk_buff *skb, struct net_device *dev) skb->h.raw = skb->nh.raw; skb->nh.raw = skb_push(skb, sizeof(struct iphdr)); memset(&(IPCB(skb)->opt), 0, sizeof(IPCB(skb)->opt)); - IPCB(skb)->flags &= ~IPSKB_XFRM_TUNNEL_SIZE; + IPCB(skb)->flags &= ~(IPSKB_XFRM_TUNNEL_SIZE|IPSKB_XFRM_TRANSFORMED); dst_release(skb->dst); skb->dst = &rt->u.dst; diff --git a/net/ipv4/netfilter.c b/net/ipv4/netfilter.c index ae0779d82c5d..4c637a1cbd23 100644 --- a/net/ipv4/netfilter.c +++ b/net/ipv4/netfilter.c @@ -7,11 +7,13 @@ #include #include +#include #include #include #include #include -#include +#include +#include /* route_me_harder function, used by iptable_nat, iptable_mangle + ip_queue */ int ip_route_me_harder(struct sk_buff **pskb) @@ -33,7 +35,6 @@ int ip_route_me_harder(struct sk_buff **pskb) #ifdef CONFIG_IP_ROUTE_FWMARK fl.nl_u.ip4_u.fwmark = (*pskb)->nfmark; #endif - fl.proto = iph->protocol; if (ip_route_output_key(&rt, &fl) != 0) return -1; @@ -60,6 +61,13 @@ int ip_route_me_harder(struct sk_buff **pskb) if ((*pskb)->dst->error) return -1; +#ifdef CONFIG_XFRM + if (!(IPCB(*pskb)->flags & IPSKB_XFRM_TRANSFORMED) && + xfrm_decode_session(*pskb, &fl, AF_INET) == 0) + if (xfrm_lookup(&(*pskb)->dst, &fl, (*pskb)->sk, 0)) + return -1; +#endif + /* Change in oif may mean change in hh_len. */ hh_len = (*pskb)->dst->dev->hard_header_len; if (skb_headroom(*pskb) < hh_len) { diff --git a/net/ipv4/xfrm4_output.c b/net/ipv4/xfrm4_output.c index 51fabb8f7c54..160c48800ab8 100644 --- a/net/ipv4/xfrm4_output.c +++ b/net/ipv4/xfrm4_output.c @@ -140,6 +140,7 @@ static int xfrm4_output_one(struct sk_buff *skb) x = dst->xfrm; } while (x && !x->props.mode); + IPCB(skb)->flags |= IPSKB_XFRM_TRANSFORMED; err = 0; out_exit: diff --git a/net/ipv6/netfilter.c b/net/ipv6/netfilter.c index f8626ebf90fd..b63678328a3b 100644 --- a/net/ipv6/netfilter.c +++ b/net/ipv6/netfilter.c @@ -10,6 +10,7 @@ #include #include #include +#include int ip6_route_me_harder(struct sk_buff *skb) { @@ -21,11 +22,17 @@ int ip6_route_me_harder(struct sk_buff *skb) { .ip6_u = { .daddr = iph->daddr, .saddr = iph->saddr, } }, - .proto = iph->nexthdr, }; dst = ip6_route_output(skb->sk, &fl); +#ifdef CONFIG_XFRM + if (!(IP6CB(skb)->flags & IP6SKB_XFRM_TRANSFORMED) && + xfrm_decode_session(skb, &fl, AF_INET6) == 0) + if (xfrm_lookup(&skb->dst, &fl, skb->sk, 0)) + return -1; +#endif + if (dst->error) { IP6_INC_STATS(IPSTATS_MIB_OUTNOROUTES); LIMIT_NETDEBUG(KERN_DEBUG "ip6_route_me_harder: No more route.\n"); diff --git a/net/ipv6/xfrm6_output.c b/net/ipv6/xfrm6_output.c index fc0ea38953c4..80242172a5df 100644 --- a/net/ipv6/xfrm6_output.c +++ b/net/ipv6/xfrm6_output.c @@ -139,6 +139,7 @@ static int xfrm6_output_one(struct sk_buff *skb) x = dst->xfrm; } while (x && !x->props.mode); + IP6CB(skb)->flags |= IP6SKB_XFRM_TRANSFORMED; err = 0; out_exit: diff --git a/net/xfrm/xfrm_policy.c b/net/xfrm/xfrm_policy.c index 64a447375fdb..f2edc9225b6a 100644 --- a/net/xfrm/xfrm_policy.c +++ b/net/xfrm/xfrm_policy.c @@ -951,8 +951,8 @@ xfrm_policy_ok(struct xfrm_tmpl *tmpl, struct sec_path *sp, int start, return start; } -static int -_decode_session(struct sk_buff *skb, struct flowi *fl, unsigned short family) +int +xfrm_decode_session(struct sk_buff *skb, struct flowi *fl, unsigned short family) { struct xfrm_policy_afinfo *afinfo = xfrm_policy_get_afinfo(family); @@ -963,6 +963,7 @@ _decode_session(struct sk_buff *skb, struct flowi *fl, unsigned short family) xfrm_policy_put_afinfo(afinfo); return 0; } +EXPORT_SYMBOL(xfrm_decode_session); static inline int secpath_has_tunnel(struct sec_path *sp, int k) { @@ -982,7 +983,7 @@ int __xfrm_policy_check(struct sock *sk, int dir, struct sk_buff *skb, u8 fl_dir = policy_to_flow_dir(dir); u32 sk_sid; - if (_decode_session(skb, &fl, family) < 0) + if (xfrm_decode_session(skb, &fl, family) < 0) return 0; sk_sid = security_sk_sid(sk, &fl, fl_dir); @@ -1055,7 +1056,7 @@ int __xfrm_route_forward(struct sk_buff *skb, unsigned short family) { struct flowi fl; - if (_decode_session(skb, &fl, family) < 0) + if (xfrm_decode_session(skb, &fl, family) < 0) return 0; return xfrm_lookup(&skb->dst, &fl, NULL, 0) == 0; -- cgit v1.2.3-71-gd317 From eb9c7ebe6980c41cf6ae889e301c3b49f473ee9f Mon Sep 17 00:00:00 2001 From: Patrick McHardy Date: Fri, 6 Jan 2006 23:06:30 -0800 Subject: [NETFILTER]: Handle NAT in IPsec policy checks Handle NAT of decapsulated IPsec packets by reconstructing the struct flowi of the original packet from the conntrack information for IPsec policy checks. Signed-off-by: Patrick McHardy Signed-off-by: David S. Miller --- include/linux/netfilter.h | 16 +++++++++++ net/dccp/ipv4.c | 2 +- net/ipv4/netfilter.c | 3 ++ net/ipv4/netfilter/ip_nat_standalone.c | 50 ++++++++++++++++++++++++++++++++-- net/xfrm/xfrm_policy.c | 2 ++ 5 files changed, 70 insertions(+), 3 deletions(-) (limited to 'include/linux') diff --git a/include/linux/netfilter.h b/include/linux/netfilter.h index 79bb977afeac..84506dfa1f37 100644 --- a/include/linux/netfilter.h +++ b/include/linux/netfilter.h @@ -274,6 +274,20 @@ struct nf_queue_rerouter { extern int nf_register_queue_rerouter(int pf, struct nf_queue_rerouter *rer); extern int nf_unregister_queue_rerouter(int pf); +#include +extern void (*ip_nat_decode_session)(struct sk_buff *, struct flowi *); + +static inline void +nf_nat_decode_session(struct sk_buff *skb, struct flowi *fl, int family) +{ +#ifdef CONFIG_IP_NF_NAT_NEEDED + void (*decodefn)(struct sk_buff *, struct flowi *); + + if (family == AF_INET && (decodefn = ip_nat_decode_session) != NULL) + decodefn(skb, fl); +#endif +} + #ifdef CONFIG_PROC_FS #include extern struct proc_dir_entry *proc_net_netfilter; @@ -282,6 +296,8 @@ extern struct proc_dir_entry *proc_net_netfilter; #else /* !CONFIG_NETFILTER */ #define NF_HOOK(pf, hook, skb, indev, outdev, okfn) (okfn)(skb) static inline void nf_ct_attach(struct sk_buff *new, struct sk_buff *skb) {} +static inline void +nf_nat_decode_session(struct sk_buff *skb, struct flowi *fl, int family) {} #endif /*CONFIG_NETFILTER*/ #endif /*__KERNEL__*/ diff --git a/net/dccp/ipv4.c b/net/dccp/ipv4.c index 23ba177c1150..00f983226672 100644 --- a/net/dccp/ipv4.c +++ b/net/dccp/ipv4.c @@ -986,6 +986,7 @@ int dccp_v4_rcv(struct sk_buff *skb) if (!xfrm4_policy_check(sk, XFRM_POLICY_IN, skb)) goto discard_and_relse; + nf_reset(skb); return sk_receive_skb(sk, skb); @@ -1099,7 +1100,6 @@ int dccp_v4_destroy_sock(struct sock *sk) kfree_skb(sk->sk_send_head); sk->sk_send_head = NULL; } - nf_reset(skb); /* Clean up a referenced DCCP bind bucket. */ if (inet_csk(sk)->icsk_bind_hash != NULL) diff --git a/net/ipv4/netfilter.c b/net/ipv4/netfilter.c index 4c637a1cbd23..3321092b0914 100644 --- a/net/ipv4/netfilter.c +++ b/net/ipv4/netfilter.c @@ -86,6 +86,9 @@ int ip_route_me_harder(struct sk_buff **pskb) } EXPORT_SYMBOL(ip_route_me_harder); +void (*ip_nat_decode_session)(struct sk_buff *, struct flowi *); +EXPORT_SYMBOL(ip_nat_decode_session); + /* * Extra routing may needed on local out, as the QUEUE target never * returns control to the table. diff --git a/net/ipv4/netfilter/ip_nat_standalone.c b/net/ipv4/netfilter/ip_nat_standalone.c index b518697af4db..8b8a1f00bbf4 100644 --- a/net/ipv4/netfilter/ip_nat_standalone.c +++ b/net/ipv4/netfilter/ip_nat_standalone.c @@ -55,6 +55,44 @@ : ((hooknum) == NF_IP_LOCAL_IN ? "LOCAL_IN" \ : "*ERROR*"))) +#ifdef CONFIG_XFRM +static void nat_decode_session(struct sk_buff *skb, struct flowi *fl) +{ + struct ip_conntrack *ct; + struct ip_conntrack_tuple *t; + enum ip_conntrack_info ctinfo; + enum ip_conntrack_dir dir; + unsigned long statusbit; + + ct = ip_conntrack_get(skb, &ctinfo); + if (ct == NULL) + return; + dir = CTINFO2DIR(ctinfo); + t = &ct->tuplehash[dir].tuple; + + if (dir == IP_CT_DIR_ORIGINAL) + statusbit = IPS_DST_NAT; + else + statusbit = IPS_SRC_NAT; + + if (ct->status & statusbit) { + fl->fl4_dst = t->dst.ip; + if (t->dst.protonum == IPPROTO_TCP || + t->dst.protonum == IPPROTO_UDP) + fl->fl_ip_dport = t->dst.u.tcp.port; + } + + statusbit ^= IPS_NAT_MASK; + + if (ct->status & statusbit) { + fl->fl4_src = t->src.ip; + if (t->dst.protonum == IPPROTO_TCP || + t->dst.protonum == IPPROTO_UDP) + fl->fl_ip_sport = t->src.u.tcp.port; + } +} +#endif + static unsigned int ip_nat_fn(unsigned int hooknum, struct sk_buff **pskb, @@ -330,10 +368,14 @@ static int init_or_cleanup(int init) if (!init) goto cleanup; +#ifdef CONFIG_XFRM + BUG_ON(ip_nat_decode_session != NULL); + ip_nat_decode_session = nat_decode_session; +#endif ret = ip_nat_rule_init(); if (ret < 0) { printk("ip_nat_init: can't setup rules.\n"); - goto cleanup_nothing; + goto cleanup_decode_session; } ret = nf_register_hook(&ip_nat_in_ops); if (ret < 0) { @@ -381,7 +423,11 @@ static int init_or_cleanup(int init) nf_unregister_hook(&ip_nat_in_ops); cleanup_rule_init: ip_nat_rule_cleanup(); - cleanup_nothing: + cleanup_decode_session: +#ifdef CONFIG_XFRM + ip_nat_decode_session = NULL; + synchronize_net(); +#endif return ret; } diff --git a/net/xfrm/xfrm_policy.c b/net/xfrm/xfrm_policy.c index f2edc9225b6a..59614a994b4e 100644 --- a/net/xfrm/xfrm_policy.c +++ b/net/xfrm/xfrm_policy.c @@ -22,6 +22,7 @@ #include #include #include +#include #include #include #include @@ -985,6 +986,7 @@ int __xfrm_policy_check(struct sock *sk, int dir, struct sk_buff *skb, if (xfrm_decode_session(skb, &fl, family) < 0) return 0; + nf_nat_decode_session(skb, &fl, family); sk_sid = security_sk_sid(sk, &fl, fl_dir); -- cgit v1.2.3-71-gd317 From e16a8f0b8c53312beb1d8b52e463aae79aa809c7 Mon Sep 17 00:00:00 2001 From: Patrick McHardy Date: Fri, 6 Jan 2006 23:06:48 -0800 Subject: [NETFILTER]: Add ipt_policy/ip6t_policy matches Signed-off-by: Patrick McHardy Signed-off-by: David S. Miller --- include/linux/netfilter_ipv4/ipt_policy.h | 52 +++++++++ include/linux/netfilter_ipv6/ip6t_policy.h | 52 +++++++++ net/ipv4/netfilter/Kconfig | 10 ++ net/ipv4/netfilter/Makefile | 1 + net/ipv4/netfilter/ipt_policy.c | 170 ++++++++++++++++++++++++++++ net/ipv6/netfilter/Kconfig | 10 ++ net/ipv6/netfilter/Makefile | 1 + net/ipv6/netfilter/ip6t_policy.c | 175 +++++++++++++++++++++++++++++ 8 files changed, 471 insertions(+) create mode 100644 include/linux/netfilter_ipv4/ipt_policy.h create mode 100644 include/linux/netfilter_ipv6/ip6t_policy.h create mode 100644 net/ipv4/netfilter/ipt_policy.c create mode 100644 net/ipv6/netfilter/ip6t_policy.c (limited to 'include/linux') diff --git a/include/linux/netfilter_ipv4/ipt_policy.h b/include/linux/netfilter_ipv4/ipt_policy.h new file mode 100644 index 000000000000..7fd1bec453f1 --- /dev/null +++ b/include/linux/netfilter_ipv4/ipt_policy.h @@ -0,0 +1,52 @@ +#ifndef _IPT_POLICY_H +#define _IPT_POLICY_H + +#define IPT_POLICY_MAX_ELEM 4 + +enum ipt_policy_flags +{ + IPT_POLICY_MATCH_IN = 0x1, + IPT_POLICY_MATCH_OUT = 0x2, + IPT_POLICY_MATCH_NONE = 0x4, + IPT_POLICY_MATCH_STRICT = 0x8, +}; + +enum ipt_policy_modes +{ + IPT_POLICY_MODE_TRANSPORT, + IPT_POLICY_MODE_TUNNEL +}; + +struct ipt_policy_spec +{ + u_int8_t saddr:1, + daddr:1, + proto:1, + mode:1, + spi:1, + reqid:1; +}; + +struct ipt_policy_elem +{ + u_int32_t saddr; + u_int32_t smask; + u_int32_t daddr; + u_int32_t dmask; + u_int32_t spi; + u_int32_t reqid; + u_int8_t proto; + u_int8_t mode; + + struct ipt_policy_spec match; + struct ipt_policy_spec invert; +}; + +struct ipt_policy_info +{ + struct ipt_policy_elem pol[IPT_POLICY_MAX_ELEM]; + u_int16_t flags; + u_int16_t len; +}; + +#endif /* _IPT_POLICY_H */ diff --git a/include/linux/netfilter_ipv6/ip6t_policy.h b/include/linux/netfilter_ipv6/ip6t_policy.h new file mode 100644 index 000000000000..5a93afcd2ff1 --- /dev/null +++ b/include/linux/netfilter_ipv6/ip6t_policy.h @@ -0,0 +1,52 @@ +#ifndef _IP6T_POLICY_H +#define _IP6T_POLICY_H + +#define IP6T_POLICY_MAX_ELEM 4 + +enum ip6t_policy_flags +{ + IP6T_POLICY_MATCH_IN = 0x1, + IP6T_POLICY_MATCH_OUT = 0x2, + IP6T_POLICY_MATCH_NONE = 0x4, + IP6T_POLICY_MATCH_STRICT = 0x8, +}; + +enum ip6t_policy_modes +{ + IP6T_POLICY_MODE_TRANSPORT, + IP6T_POLICY_MODE_TUNNEL +}; + +struct ip6t_policy_spec +{ + u_int8_t saddr:1, + daddr:1, + proto:1, + mode:1, + spi:1, + reqid:1; +}; + +struct ip6t_policy_elem +{ + struct in6_addr saddr; + struct in6_addr smask; + struct in6_addr daddr; + struct in6_addr dmask; + u_int32_t spi; + u_int32_t reqid; + u_int8_t proto; + u_int8_t mode; + + struct ip6t_policy_spec match; + struct ip6t_policy_spec invert; +}; + +struct ip6t_policy_info +{ + struct ip6t_policy_elem pol[IP6T_POLICY_MAX_ELEM]; + u_int16_t flags; + u_int16_t len; +}; + +#endif /* _IP6T_POLICY_H */ diff --git a/net/ipv4/netfilter/Kconfig b/net/ipv4/netfilter/Kconfig index 88a60650e6b8..a9893ec03e02 100644 --- a/net/ipv4/netfilter/Kconfig +++ b/net/ipv4/netfilter/Kconfig @@ -487,6 +487,16 @@ config IP_NF_MATCH_STRING To compile it as a module, choose M here. If unsure, say N. +config IP_NF_MATCH_POLICY + tristate "IPsec policy match support" + depends on IP_NF_IPTABLES && XFRM + help + Policy matching allows you to match packets based on the + IPsec policy that was used during decapsulation/will + be used during encapsulation. + + To compile it as a module, choose M here. If unsure, say N. + # `filter', generic and specific targets config IP_NF_FILTER tristate "Packet filtering" diff --git a/net/ipv4/netfilter/Makefile b/net/ipv4/netfilter/Makefile index d0a447e520a2..549b01a648b3 100644 --- a/net/ipv4/netfilter/Makefile +++ b/net/ipv4/netfilter/Makefile @@ -72,6 +72,7 @@ obj-$(CONFIG_IP_NF_MATCH_TCPMSS) += ipt_tcpmss.o obj-$(CONFIG_IP_NF_MATCH_REALM) += ipt_realm.o obj-$(CONFIG_IP_NF_MATCH_ADDRTYPE) += ipt_addrtype.o obj-$(CONFIG_IP_NF_MATCH_PHYSDEV) += ipt_physdev.o +obj-$(CONFIG_IP_NF_MATCH_POLICY) += ipt_policy.o obj-$(CONFIG_IP_NF_MATCH_COMMENT) += ipt_comment.o obj-$(CONFIG_IP_NF_MATCH_STRING) += ipt_string.o diff --git a/net/ipv4/netfilter/ipt_policy.c b/net/ipv4/netfilter/ipt_policy.c new file mode 100644 index 000000000000..709debcc69c9 --- /dev/null +++ b/net/ipv4/netfilter/ipt_policy.c @@ -0,0 +1,170 @@ +/* IP tables module for matching IPsec policy + * + * Copyright (c) 2004,2005 Patrick McHardy, + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + */ + +#include +#include +#include +#include +#include +#include + +#include +#include +#include + +MODULE_AUTHOR("Patrick McHardy "); +MODULE_DESCRIPTION("IPtables IPsec policy matching module"); +MODULE_LICENSE("GPL"); + + +static inline int +match_xfrm_state(struct xfrm_state *x, const struct ipt_policy_elem *e) +{ +#define MATCH(x,y) (!e->match.x || ((e->x == (y)) ^ e->invert.x)) + + return MATCH(saddr, x->props.saddr.a4 & e->smask) && + MATCH(daddr, x->id.daddr.a4 & e->dmask) && + MATCH(proto, x->id.proto) && + MATCH(mode, x->props.mode) && + MATCH(spi, x->id.spi) && + MATCH(reqid, x->props.reqid); +} + +static int +match_policy_in(const struct sk_buff *skb, const struct ipt_policy_info *info) +{ + const struct ipt_policy_elem *e; + struct sec_path *sp = skb->sp; + int strict = info->flags & IPT_POLICY_MATCH_STRICT; + int i, pos; + + if (sp == NULL) + return -1; + if (strict && info->len != sp->len) + return 0; + + for (i = sp->len - 1; i >= 0; i--) { + pos = strict ? i - sp->len + 1 : 0; + if (pos >= info->len) + return 0; + e = &info->pol[pos]; + + if (match_xfrm_state(sp->x[i].xvec, e)) { + if (!strict) + return 1; + } else if (strict) + return 0; + } + + return strict ? 1 : 0; +} + +static int +match_policy_out(const struct sk_buff *skb, const struct ipt_policy_info *info) +{ + const struct ipt_policy_elem *e; + struct dst_entry *dst = skb->dst; + int strict = info->flags & IPT_POLICY_MATCH_STRICT; + int i, pos; + + if (dst->xfrm == NULL) + return -1; + + for (i = 0; dst && dst->xfrm; dst = dst->child, i++) { + pos = strict ? i : 0; + if (pos >= info->len) + return 0; + e = &info->pol[pos]; + + if (match_xfrm_state(dst->xfrm, e)) { + if (!strict) + return 1; + } else if (strict) + return 0; + } + + return strict ? 1 : 0; +} + +static int match(const struct sk_buff *skb, + const struct net_device *in, + const struct net_device *out, + const void *matchinfo, int offset, int *hotdrop) +{ + const struct ipt_policy_info *info = matchinfo; + int ret; + + if (info->flags & IPT_POLICY_MATCH_IN) + ret = match_policy_in(skb, info); + else + ret = match_policy_out(skb, info); + + if (ret < 0) + ret = info->flags & IPT_POLICY_MATCH_NONE ? 1 : 0; + else if (info->flags & IPT_POLICY_MATCH_NONE) + ret = 0; + + return ret; +} + +static int checkentry(const char *tablename, const struct ipt_ip *ip, + void *matchinfo, unsigned int matchsize, + unsigned int hook_mask) +{ + struct ipt_policy_info *info = matchinfo; + + if (matchsize != IPT_ALIGN(sizeof(*info))) { + printk(KERN_ERR "ipt_policy: matchsize %u != %zu\n", + matchsize, IPT_ALIGN(sizeof(*info))); + return 0; + } + if (!(info->flags & (IPT_POLICY_MATCH_IN|IPT_POLICY_MATCH_OUT))) { + printk(KERN_ERR "ipt_policy: neither incoming nor " + "outgoing policy selected\n"); + return 0; + } + if (hook_mask & (1 << NF_IP_PRE_ROUTING | 1 << NF_IP_LOCAL_IN) + && info->flags & IPT_POLICY_MATCH_OUT) { + printk(KERN_ERR "ipt_policy: output policy not valid in " + "PRE_ROUTING and INPUT\n"); + return 0; + } + if (hook_mask & (1 << NF_IP_POST_ROUTING | 1 << NF_IP_LOCAL_OUT) + && info->flags & IPT_POLICY_MATCH_IN) { + printk(KERN_ERR "ipt_policy: input policy not valid in " + "POST_ROUTING and OUTPUT\n"); + return 0; + } + if (info->len > IPT_POLICY_MAX_ELEM) { + printk(KERN_ERR "ipt_policy: too many policy elements\n"); + return 0; + } + + return 1; +} + +static struct ipt_match policy_match = { + .name = "policy", + .match = match, + .checkentry = checkentry, + .me = THIS_MODULE, +}; + +static int __init init(void) +{ + return ipt_register_match(&policy_match); +} + +static void __exit fini(void) +{ + ipt_unregister_match(&policy_match); +} + +module_init(init); +module_exit(fini); diff --git a/net/ipv6/netfilter/Kconfig b/net/ipv6/netfilter/Kconfig index 04912f9b35c3..105dd69ee9fb 100644 --- a/net/ipv6/netfilter/Kconfig +++ b/net/ipv6/netfilter/Kconfig @@ -179,6 +179,16 @@ config IP6_NF_MATCH_PHYSDEV To compile it as a module, choose M here. If unsure, say N. +config IP6_NF_MATCH_POLICY + tristate "IPsec policy match support" + depends on IP6_NF_IPTABLES && XFRM + help + Policy matching allows you to match packets based on the + IPsec policy that was used during decapsulation/will + be used during encapsulation. + + To compile it as a module, choose M here. If unsure, say N. + # The targets config IP6_NF_FILTER tristate "Packet filtering" diff --git a/net/ipv6/netfilter/Makefile b/net/ipv6/netfilter/Makefile index 9ab5b2ca1f59..c0c809b426e8 100644 --- a/net/ipv6/netfilter/Makefile +++ b/net/ipv6/netfilter/Makefile @@ -13,6 +13,7 @@ obj-$(CONFIG_IP6_NF_MATCH_OPTS) += ip6t_hbh.o ip6t_dst.o obj-$(CONFIG_IP6_NF_MATCH_IPV6HEADER) += ip6t_ipv6header.o obj-$(CONFIG_IP6_NF_MATCH_FRAG) += ip6t_frag.o obj-$(CONFIG_IP6_NF_MATCH_AHESP) += ip6t_esp.o ip6t_ah.o +obj-$(CONFIG_IP6_NF_MATCH_POLICY) += ip6t_policy.o obj-$(CONFIG_IP6_NF_MATCH_EUI64) += ip6t_eui64.o obj-$(CONFIG_IP6_NF_MATCH_MULTIPORT) += ip6t_multiport.o obj-$(CONFIG_IP6_NF_MATCH_OWNER) += ip6t_owner.o diff --git a/net/ipv6/netfilter/ip6t_policy.c b/net/ipv6/netfilter/ip6t_policy.c new file mode 100644 index 000000000000..13fedad48c1d --- /dev/null +++ b/net/ipv6/netfilter/ip6t_policy.c @@ -0,0 +1,175 @@ +/* IP tables module for matching IPsec policy + * + * Copyright (c) 2004,2005 Patrick McHardy, + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + */ + +#include +#include +#include +#include +#include +#include + +#include +#include +#include + +MODULE_AUTHOR("Patrick McHardy "); +MODULE_DESCRIPTION("IPtables IPsec policy matching module"); +MODULE_LICENSE("GPL"); + + +static inline int +match_xfrm_state(struct xfrm_state *x, const struct ip6t_policy_elem *e) +{ +#define MATCH_ADDR(x,y,z) (!e->match.x || \ + ((ip6_masked_addrcmp((z), &e->x, &e->y)) == 0) ^ e->invert.x) +#define MATCH(x,y) (!e->match.x || ((e->x == (y)) ^ e->invert.x)) + + return MATCH_ADDR(saddr, smask, (struct in6_addr *)&x->props.saddr.a6) && + MATCH_ADDR(daddr, dmask, (struct in6_addr *)&x->id.daddr.a6) && + MATCH(proto, x->id.proto) && + MATCH(mode, x->props.mode) && + MATCH(spi, x->id.spi) && + MATCH(reqid, x->props.reqid); +} + +static int +match_policy_in(const struct sk_buff *skb, const struct ip6t_policy_info *info) +{ + const struct ip6t_policy_elem *e; + struct sec_path *sp = skb->sp; + int strict = info->flags & IP6T_POLICY_MATCH_STRICT; + int i, pos; + + if (sp == NULL) + return -1; + if (strict && info->len != sp->len) + return 0; + + for (i = sp->len - 1; i >= 0; i--) { + pos = strict ? i - sp->len + 1 : 0; + if (pos >= info->len) + return 0; + e = &info->pol[pos]; + + if (match_xfrm_state(sp->x[i].xvec, e)) { + if (!strict) + return 1; + } else if (strict) + return 0; + } + + return strict ? 1 : 0; +} + +static int +match_policy_out(const struct sk_buff *skb, const struct ip6t_policy_info *info) +{ + const struct ip6t_policy_elem *e; + struct dst_entry *dst = skb->dst; + int strict = info->flags & IP6T_POLICY_MATCH_STRICT; + int i, pos; + + if (dst->xfrm == NULL) + return -1; + + for (i = 0; dst && dst->xfrm; dst = dst->child, i++) { + pos = strict ? i : 0; + if (pos >= info->len) + return 0; + e = &info->pol[pos]; + + if (match_xfrm_state(dst->xfrm, e)) { + if (!strict) + return 1; + } else if (strict) + return 0; + } + + return strict ? 1 : 0; +} + +static int match(const struct sk_buff *skb, + const struct net_device *in, + const struct net_device *out, + const void *matchinfo, + int offset, + unsigned int protoff, + int *hotdrop) +{ + const struct ip6t_policy_info *info = matchinfo; + int ret; + + if (info->flags & IP6T_POLICY_MATCH_IN) + ret = match_policy_in(skb, info); + else + ret = match_policy_out(skb, info); + + if (ret < 0) + ret = info->flags & IP6T_POLICY_MATCH_NONE ? 1 : 0; + else if (info->flags & IP6T_POLICY_MATCH_NONE) + ret = 0; + + return ret; +} + +static int checkentry(const char *tablename, const struct ip6t_ip6 *ip, + void *matchinfo, unsigned int matchsize, + unsigned int hook_mask) +{ + struct ip6t_policy_info *info = matchinfo; + + if (matchsize != IP6T_ALIGN(sizeof(*info))) { + printk(KERN_ERR "ip6t_policy: matchsize %u != %zu\n", + matchsize, IP6T_ALIGN(sizeof(*info))); + return 0; + } + if (!(info->flags & (IP6T_POLICY_MATCH_IN|IP6T_POLICY_MATCH_OUT))) { + printk(KERN_ERR "ip6t_policy: neither incoming nor " + "outgoing policy selected\n"); + return 0; + } + if (hook_mask & (1 << NF_IP6_PRE_ROUTING | 1 << NF_IP6_LOCAL_IN) + && info->flags & IP6T_POLICY_MATCH_OUT) { + printk(KERN_ERR "ip6t_policy: output policy not valid in " + "PRE_ROUTING and INPUT\n"); + return 0; + } + if (hook_mask & (1 << NF_IP6_POST_ROUTING | 1 << NF_IP6_LOCAL_OUT) + && info->flags & IP6T_POLICY_MATCH_IN) { + printk(KERN_ERR "ip6t_policy: input policy not valid in " + "POST_ROUTING and OUTPUT\n"); + return 0; + } + if (info->len > IP6T_POLICY_MAX_ELEM) { + printk(KERN_ERR "ip6t_policy: too many policy elements\n"); + return 0; + } + + return 1; +} + +static struct ip6t_match policy_match = { + .name = "policy", + .match = match, + .checkentry = checkentry, + .me = THIS_MODULE, +}; + +static int __init init(void) +{ + return ip6t_register_match(&policy_match); +} + +static void __exit fini(void) +{ + ip6t_unregister_match(&policy_match); +} + +module_init(init); +module_exit(fini); -- cgit v1.2.3-71-gd317 From f53b61d8c385140fe7f09e0c9187ae813ee9f330 Mon Sep 17 00:00:00 2001 From: "David S. Miller" Date: Sat, 7 Jan 2006 12:50:27 -0800 Subject: [NETFILTER]: Add dummy nf_hook{_thresh}() when NETFILTER is disabled. Signed-off-by: David S. Miller --- include/linux/netfilter.h | 15 +++++++++++++++ 1 file changed, 15 insertions(+) (limited to 'include/linux') diff --git a/include/linux/netfilter.h b/include/linux/netfilter.h index 84506dfa1f37..4cf6088625c1 100644 --- a/include/linux/netfilter.h +++ b/include/linux/netfilter.h @@ -295,7 +295,22 @@ extern struct proc_dir_entry *proc_net_netfilter; #else /* !CONFIG_NETFILTER */ #define NF_HOOK(pf, hook, skb, indev, outdev, okfn) (okfn)(skb) +static inline int nf_hook_thresh(int pf, unsigned int hook, + struct sk_buff **pskb, + struct net_device *indev, + struct net_device *outdev, + int (*okfn)(struct sk_buff *), int thresh) +{ + return okfn(*pskb); +} +static inline int nf_hook(int pf, unsigned int hook, struct sk_buff **pskb, + struct net_device *indev, struct net_device *outdev, + int (*okfn)(struct sk_buff *)) +{ + return okfn(*pskb); +} static inline void nf_ct_attach(struct sk_buff *new, struct sk_buff *skb) {} +struct flowi; static inline void nf_nat_decode_session(struct sk_buff *skb, struct flowi *fl, int family) {} #endif /*CONFIG_NETFILTER*/ -- cgit v1.2.3-71-gd317 From 67207b9664a8d603138ef1556141e6d0a102bea7 Mon Sep 17 00:00:00 2001 From: Arnd Bergmann Date: Tue, 15 Nov 2005 15:53:48 -0500 Subject: [PATCH] spufs: The SPU file system, base This is the current version of the spu file system, used for driving SPEs on the Cell Broadband Engine. This release is almost identical to the version for the 2.6.14 kernel posted earlier, which is available as part of the Cell BE Linux distribution from http://www.bsc.es/projects/deepcomputing/linuxoncell/. The first patch provides all the interfaces for running spu application, but does not have any support for debugging SPU tasks or for scheduling. Both these functionalities are added in the subsequent patches. See Documentation/filesystems/spufs.txt on how to use spufs. Signed-off-by: Arnd Bergmann Signed-off-by: Paul Mackerras --- Documentation/filesystems/spufs.txt | 521 +++++++++++++++++++ arch/powerpc/Kconfig | 1 + arch/powerpc/kernel/systbl.S | 2 + arch/powerpc/mm/hash_utils_64.c | 1 + arch/powerpc/platforms/cell/Kconfig | 13 + arch/powerpc/platforms/cell/Makefile | 3 + arch/powerpc/platforms/cell/spu_base.c | 740 +++++++++++++++++++++++++++ arch/powerpc/platforms/cell/spu_syscalls.c | 86 ++++ arch/powerpc/platforms/cell/spufs/Makefile | 3 + arch/powerpc/platforms/cell/spufs/context.c | 67 +++ arch/powerpc/platforms/cell/spufs/file.c | 596 +++++++++++++++++++++ arch/powerpc/platforms/cell/spufs/inode.c | 470 +++++++++++++++++ arch/powerpc/platforms/cell/spufs/spufs.h | 71 +++ arch/powerpc/platforms/cell/spufs/syscalls.c | 106 ++++ arch/ppc/kernel/ppc_ksyms.c | 1 - include/asm-powerpc/spu.h | 498 ++++++++++++++++++ include/asm-powerpc/unistd.h | 2 + include/linux/syscalls.h | 5 + kernel/sys_ni.c | 2 + mm/memory.c | 2 + 20 files changed, 3189 insertions(+), 1 deletion(-) create mode 100644 Documentation/filesystems/spufs.txt create mode 100644 arch/powerpc/platforms/cell/Kconfig create mode 100644 arch/powerpc/platforms/cell/spu_base.c create mode 100644 arch/powerpc/platforms/cell/spu_syscalls.c create mode 100644 arch/powerpc/platforms/cell/spufs/Makefile create mode 100644 arch/powerpc/platforms/cell/spufs/context.c create mode 100644 arch/powerpc/platforms/cell/spufs/file.c create mode 100644 arch/powerpc/platforms/cell/spufs/inode.c create mode 100644 arch/powerpc/platforms/cell/spufs/spufs.h create mode 100644 arch/powerpc/platforms/cell/spufs/syscalls.c create mode 100644 include/asm-powerpc/spu.h (limited to 'include/linux') diff --git a/Documentation/filesystems/spufs.txt b/Documentation/filesystems/spufs.txt new file mode 100644 index 000000000000..8edc3952eff4 --- /dev/null +++ b/Documentation/filesystems/spufs.txt @@ -0,0 +1,521 @@ +SPUFS(2) Linux Programmer's Manual SPUFS(2) + + + +NAME + spufs - the SPU file system + + +DESCRIPTION + The SPU file system is used on PowerPC machines that implement the Cell + Broadband Engine Architecture in order to access Synergistic Processor + Units (SPUs). + + The file system provides a name space similar to posix shared memory or + message queues. Users that have write permissions on the file system + can use spu_create(2) to establish SPU contexts in the spufs root. + + Every SPU context is represented by a directory containing a predefined + set of files. These files can be used for manipulating the state of the + logical SPU. Users can change permissions on those files, but not actu- + ally add or remove files. + + +MOUNT OPTIONS + uid= + set the user owning the mount point, the default is 0 (root). + + gid= + set the group owning the mount point, the default is 0 (root). + + +FILES + The files in spufs mostly follow the standard behavior for regular sys- + tem calls like read(2) or write(2), but often support only a subset of + the operations supported on regular file systems. This list details the + supported operations and the deviations from the behaviour in the + respective man pages. + + All files that support the read(2) operation also support readv(2) and + all files that support the write(2) operation also support writev(2). + All files support the access(2) and stat(2) family of operations, but + only the st_mode, st_nlink, st_uid and st_gid fields of struct stat + contain reliable information. + + All files support the chmod(2)/fchmod(2) and chown(2)/fchown(2) opera- + tions, but will not be able to grant permissions that contradict the + possible operations, e.g. read access on the wbox file. + + The current set of files is: + + + /mem + the contents of the local storage memory of the SPU. This can be + accessed like a regular shared memory file and contains both code and + data in the address space of the SPU. The possible operations on an + open mem file are: + + read(2), pread(2), write(2), pwrite(2), lseek(2) + These operate as documented, with the exception that seek(2), + write(2) and pwrite(2) are not supported beyond the end of the + file. The file size is the size of the local storage of the SPU, + which normally is 256 kilobytes. + + mmap(2) + Mapping mem into the process address space gives access to the + SPU local storage within the process address space. Only + MAP_SHARED mappings are allowed. + + + /mbox + The first SPU to CPU communication mailbox. This file is read-only and + can be read in units of 32 bits. The file can only be used in non- + blocking mode and it even poll() will not block on it. The possible + operations on an open mbox file are: + + read(2) + If a count smaller than four is requested, read returns -1 and + sets errno to EINVAL. If there is no data available in the mail + box, the return value is set to -1 and errno becomes EAGAIN. + When data has been read successfully, four bytes are placed in + the data buffer and the value four is returned. + + + /ibox + The second SPU to CPU communication mailbox. This file is similar to + the first mailbox file, but can be read in blocking I/O mode, and the + poll familiy of system calls can be used to wait for it. The possible + operations on an open ibox file are: + + read(2) + If a count smaller than four is requested, read returns -1 and + sets errno to EINVAL. If there is no data available in the mail + box and the file descriptor has been opened with O_NONBLOCK, the + return value is set to -1 and errno becomes EAGAIN. + + If there is no data available in the mail box and the file + descriptor has been opened without O_NONBLOCK, the call will + block until the SPU writes to its interrupt mailbox channel. + When data has been read successfully, four bytes are placed in + the data buffer and the value four is returned. + + poll(2) + Poll on the ibox file returns (POLLIN | POLLRDNORM) whenever + data is available for reading. + + + /wbox + The CPU to SPU communation mailbox. It is write-only can can be written + in units of 32 bits. If the mailbox is full, write() will block and + poll can be used to wait for it becoming empty again. The possible + operations on an open wbox file are: write(2) If a count smaller than + four is requested, write returns -1 and sets errno to EINVAL. If there + is no space available in the mail box and the file descriptor has been + opened with O_NONBLOCK, the return value is set to -1 and errno becomes + EAGAIN. + + If there is no space available in the mail box and the file descriptor + has been opened without O_NONBLOCK, the call will block until the SPU + reads from its PPE mailbox channel. When data has been read success- + fully, four bytes are placed in the data buffer and the value four is + returned. + + poll(2) + Poll on the ibox file returns (POLLOUT | POLLWRNORM) whenever + space is available for writing. + + + /mbox_stat + /ibox_stat + /wbox_stat + Read-only files that contain the length of the current queue, i.e. how + many words can be read from mbox or ibox or how many words can be + written to wbox without blocking. The files can be read only in 4-byte + units and return a big-endian binary integer number. The possible + operations on an open *box_stat file are: + + read(2) + If a count smaller than four is requested, read returns -1 and + sets errno to EINVAL. Otherwise, a four byte value is placed in + the data buffer, containing the number of elements that can be + read from (for mbox_stat and ibox_stat) or written to (for + wbox_stat) the respective mail box without blocking or resulting + in EAGAIN. + + + /npc + /decr + /decr_status + /spu_tag_mask + /event_mask + /srr0 + Internal registers of the SPU. The representation is an ASCII string + with the numeric value of the next instruction to be executed. These + can be used in read/write mode for debugging, but normal operation of + programs should not rely on them because access to any of them except + npc requires an SPU context save and is therefore very inefficient. + + The contents of these files are: + + npc Next Program Counter + + decr SPU Decrementer + + decr_status Decrementer Status + + spu_tag_mask MFC tag mask for SPU DMA + + event_mask Event mask for SPU interrupts + + srr0 Interrupt Return address register + + + The possible operations on an open npc, decr, decr_status, + spu_tag_mask, event_mask or srr0 file are: + + read(2) + When the count supplied to the read call is shorter than the + required length for the pointer value plus a newline character, + subsequent reads from the same file descriptor will result in + completing the string, regardless of changes to the register by + a running SPU task. When a complete string has been read, all + subsequent read operations will return zero bytes and a new file + descriptor needs to be opened to read the value again. + + write(2) + A write operation on the file results in setting the register to + the value given in the string. The string is parsed from the + beginning to the first non-numeric character or the end of the + buffer. Subsequent writes to the same file descriptor overwrite + the previous setting. + + + /fpcr + This file gives access to the Floating Point Status and Control Regis- + ter as a four byte long file. The operations on the fpcr file are: + + read(2) + If a count smaller than four is requested, read returns -1 and + sets errno to EINVAL. Otherwise, a four byte value is placed in + the data buffer, containing the current value of the fpcr regis- + ter. + + write(2) + If a count smaller than four is requested, write returns -1 and + sets errno to EINVAL. Otherwise, a four byte value is copied + from the data buffer, updating the value of the fpcr register. + + + /signal1 + /signal2 + The two signal notification channels of an SPU. These are read-write + files that operate on a 32 bit word. Writing to one of these files + triggers an interrupt on the SPU. The value writting to the signal + files can be read from the SPU through a channel read or from host user + space through the file. After the value has been read by the SPU, it + is reset to zero. The possible operations on an open signal1 or sig- + nal2 file are: + + read(2) + If a count smaller than four is requested, read returns -1 and + sets errno to EINVAL. Otherwise, a four byte value is placed in + the data buffer, containing the current value of the specified + signal notification register. + + write(2) + If a count smaller than four is requested, write returns -1 and + sets errno to EINVAL. Otherwise, a four byte value is copied + from the data buffer, updating the value of the specified signal + notification register. The signal notification register will + either be replaced with the input data or will be updated to the + bitwise OR or the old value and the input data, depending on the + contents of the signal1_type, or signal2_type respectively, + file. + + + /signal1_type + /signal2_type + These two files change the behavior of the signal1 and signal2 notifi- + cation files. The contain a numerical ASCII string which is read as + either "1" or "0". In mode 0 (overwrite), the hardware replaces the + contents of the signal channel with the data that is written to it. in + mode 1 (logical OR), the hardware accumulates the bits that are subse- + quently written to it. The possible operations on an open signal1_type + or signal2_type file are: + + read(2) + When the count supplied to the read call is shorter than the + required length for the digit plus a newline character, subse- + quent reads from the same file descriptor will result in com- + pleting the string. When a complete string has been read, all + subsequent read operations will return zero bytes and a new file + descriptor needs to be opened to read the value again. + + write(2) + A write operation on the file results in setting the register to + the value given in the string. The string is parsed from the + beginning to the first non-numeric character or the end of the + buffer. Subsequent writes to the same file descriptor overwrite + the previous setting. + + +EXAMPLES + /etc/fstab entry + none /spu spufs gid=spu 0 0 + + +AUTHORS + Arnd Bergmann , Mark Nutter , + Ulrich Weigand + +SEE ALSO + capabilities(7), close(2), spu_create(2), spu_run(2), spufs(7) + + + +Linux 2005-09-28 SPUFS(2) + +------------------------------------------------------------------------------ + +SPU_RUN(2) Linux Programmer's Manual SPU_RUN(2) + + + +NAME + spu_run - execute an spu context + + +SYNOPSIS + #include + + int spu_run(int fd, unsigned int *npc, unsigned int *event); + +DESCRIPTION + The spu_run system call is used on PowerPC machines that implement the + Cell Broadband Engine Architecture in order to access Synergistic Pro- + cessor Units (SPUs). It uses the fd that was returned from spu_cre- + ate(2) to address a specific SPU context. When the context gets sched- + uled to a physical SPU, it starts execution at the instruction pointer + passed in npc. + + Execution of SPU code happens synchronously, meaning that spu_run does + not return while the SPU is still running. If there is a need to exe- + cute SPU code in parallel with other code on either the main CPU or + other SPUs, you need to create a new thread of execution first, e.g. + using the pthread_create(3) call. + + When spu_run returns, the current value of the SPU instruction pointer + is written back to npc, so you can call spu_run again without updating + the pointers. + + event can be a NULL pointer or point to an extended status code that + gets filled when spu_run returns. It can be one of the following con- + stants: + + SPE_EVENT_DMA_ALIGNMENT + A DMA alignment error + + SPE_EVENT_SPE_DATA_SEGMENT + A DMA segmentation error + + SPE_EVENT_SPE_DATA_STORAGE + A DMA storage error + + If NULL is passed as the event argument, these errors will result in a + signal delivered to the calling process. + +RETURN VALUE + spu_run returns the value of the spu_status register or -1 to indicate + an error and set errno to one of the error codes listed below. The + spu_status register value contains a bit mask of status codes and + optionally a 14 bit code returned from the stop-and-signal instruction + on the SPU. The bit masks for the status codes are: + + 0x02 SPU was stopped by stop-and-signal. + + 0x04 SPU was stopped by halt. + + 0x08 SPU is waiting for a channel. + + 0x10 SPU is in single-step mode. + + 0x20 SPU has tried to execute an invalid instruction. + + 0x40 SPU has tried to access an invalid channel. + + 0x3fff0000 + The bits masked with this value contain the code returned from + stop-and-signal. + + There are always one or more of the lower eight bits set or an error + code is returned from spu_run. + +ERRORS + EAGAIN or EWOULDBLOCK + fd is in non-blocking mode and spu_run would block. + + EBADF fd is not a valid file descriptor. + + EFAULT npc is not a valid pointer or status is neither NULL nor a valid + pointer. + + EINTR A signal occured while spu_run was in progress. The npc value + has been updated to the new program counter value if necessary. + + EINVAL fd is not a file descriptor returned from spu_create(2). + + ENOMEM Insufficient memory was available to handle a page fault result- + ing from an MFC direct memory access. + + ENOSYS the functionality is not provided by the current system, because + either the hardware does not provide SPUs or the spufs module is + not loaded. + + +NOTES + spu_run is meant to be used from libraries that implement a more + abstract interface to SPUs, not to be used from regular applications. + See http://www.bsc.es/projects/deepcomputing/linuxoncell/ for the rec- + ommended libraries. + + +CONFORMING TO + This call is Linux specific and only implemented by the ppc64 architec- + ture. Programs using this system call are not portable. + + +BUGS + The code does not yet fully implement all features lined out here. + + +AUTHOR + Arnd Bergmann + +SEE ALSO + capabilities(7), close(2), spu_create(2), spufs(7) + + + +Linux 2005-09-28 SPU_RUN(2) + +------------------------------------------------------------------------------ + +SPU_CREATE(2) Linux Programmer's Manual SPU_CREATE(2) + + + +NAME + spu_create - create a new spu context + + +SYNOPSIS + #include + #include + + int spu_create(const char *pathname, int flags, mode_t mode); + +DESCRIPTION + The spu_create system call is used on PowerPC machines that implement + the Cell Broadband Engine Architecture in order to access Synergistic + Processor Units (SPUs). It creates a new logical context for an SPU in + pathname and returns a handle to associated with it. pathname must + point to a non-existing directory in the mount point of the SPU file + system (spufs). When spu_create is successful, a directory gets cre- + ated on pathname and it is populated with files. + + The returned file handle can only be passed to spu_run(2) or closed, + other operations are not defined on it. When it is closed, all associ- + ated directory entries in spufs are removed. When the last file handle + pointing either inside of the context directory or to this file + descriptor is closed, the logical SPU context is destroyed. + + The parameter flags can be zero or any bitwise or'd combination of the + following constants: + + SPU_RAWIO + Allow mapping of some of the hardware registers of the SPU into + user space. This flag requires the CAP_SYS_RAWIO capability, see + capabilities(7). + + The mode parameter specifies the permissions used for creating the new + directory in spufs. mode is modified with the user's umask(2) value + and then used for both the directory and the files contained in it. The + file permissions mask out some more bits of mode because they typically + support only read or write access. See stat(2) for a full list of the + possible mode values. + + +RETURN VALUE + spu_create returns a new file descriptor. It may return -1 to indicate + an error condition and set errno to one of the error codes listed + below. + + +ERRORS + EACCESS + The current user does not have write access on the spufs mount + point. + + EEXIST An SPU context already exists at the given path name. + + EFAULT pathname is not a valid string pointer in the current address + space. + + EINVAL pathname is not a directory in the spufs mount point. + + ELOOP Too many symlinks were found while resolving pathname. + + EMFILE The process has reached its maximum open file limit. + + ENAMETOOLONG + pathname was too long. + + ENFILE The system has reached the global open file limit. + + ENOENT Part of pathname could not be resolved. + + ENOMEM The kernel could not allocate all resources required. + + ENOSPC There are not enough SPU resources available to create a new + context or the user specific limit for the number of SPU con- + texts has been reached. + + ENOSYS the functionality is not provided by the current system, because + either the hardware does not provide SPUs or the spufs module is + not loaded. + + ENOTDIR + A part of pathname is not a directory. + + + +NOTES + spu_create is meant to be used from libraries that implement a more + abstract interface to SPUs, not to be used from regular applications. + See http://www.bsc.es/projects/deepcomputing/linuxoncell/ for the rec- + ommended libraries. + + +FILES + pathname must point to a location beneath the mount point of spufs. By + convention, it gets mounted in /spu. + + +CONFORMING TO + This call is Linux specific and only implemented by the ppc64 architec- + ture. Programs using this system call are not portable. + + +BUGS + The code does not yet fully implement all features lined out here. + + +AUTHOR + Arnd Bergmann + +SEE ALSO + capabilities(7), close(2), spu_run(2), spufs(7) + + + +Linux 2005-09-28 SPU_CREATE(2) diff --git a/arch/powerpc/Kconfig b/arch/powerpc/Kconfig index 4d71aa3ecbb5..39ca7b9da369 100644 --- a/arch/powerpc/Kconfig +++ b/arch/powerpc/Kconfig @@ -482,6 +482,7 @@ source arch/powerpc/platforms/embedded6xx/Kconfig source arch/powerpc/platforms/4xx/Kconfig source arch/powerpc/platforms/85xx/Kconfig source arch/powerpc/platforms/8xx/Kconfig +source arch/powerpc/platforms/cell/Kconfig menu "Kernel options" diff --git a/arch/powerpc/kernel/systbl.S b/arch/powerpc/kernel/systbl.S index 4bb3650420b4..989f6286991a 100644 --- a/arch/powerpc/kernel/systbl.S +++ b/arch/powerpc/kernel/systbl.S @@ -319,3 +319,5 @@ COMPAT_SYS(ioprio_get) SYSCALL(inotify_init) SYSCALL(inotify_add_watch) SYSCALL(inotify_rm_watch) +SYSCALL(spu_run) +SYSCALL(spu_create) diff --git a/arch/powerpc/mm/hash_utils_64.c b/arch/powerpc/mm/hash_utils_64.c index a606504678bd..846a1894cf95 100644 --- a/arch/powerpc/mm/hash_utils_64.c +++ b/arch/powerpc/mm/hash_utils_64.c @@ -644,6 +644,7 @@ int hash_page(unsigned long ea, unsigned long access, unsigned long trap) DBG_LOW(" -> rc=%d\n", rc); return rc; } +EXPORT_SYMBOL_GPL(hash_page); void hash_preload(struct mm_struct *mm, unsigned long ea, unsigned long access, unsigned long trap) diff --git a/arch/powerpc/platforms/cell/Kconfig b/arch/powerpc/platforms/cell/Kconfig new file mode 100644 index 000000000000..3157071e241c --- /dev/null +++ b/arch/powerpc/platforms/cell/Kconfig @@ -0,0 +1,13 @@ +menu "Cell Broadband Engine options" + depends on PPC_CELL + +config SPU_FS + tristate "SPU file system" + default m + depends on PPC_CELL + help + The SPU file system is used to access Synergistic Processing + Units on machines implementing the Broadband Processor + Architecture. + +endmenu diff --git a/arch/powerpc/platforms/cell/Makefile b/arch/powerpc/platforms/cell/Makefile index 55e094b96bc0..74616cf13af9 100644 --- a/arch/powerpc/platforms/cell/Makefile +++ b/arch/powerpc/platforms/cell/Makefile @@ -1,2 +1,5 @@ obj-y += interrupt.o iommu.o setup.o spider-pic.o obj-$(CONFIG_SMP) += smp.o +obj-$(CONFIG_SPU_FS) += spufs/ spu_base.o +builtin-spufs-$(CONFIG_SPU_FS) += spu_syscalls.o +obj-y += $(builtin-spufs-m) diff --git a/arch/powerpc/platforms/cell/spu_base.c b/arch/powerpc/platforms/cell/spu_base.c new file mode 100644 index 000000000000..9e9096590a07 --- /dev/null +++ b/arch/powerpc/platforms/cell/spu_base.c @@ -0,0 +1,740 @@ +/* + * Low-level SPU handling + * + * (C) Copyright IBM Deutschland Entwicklung GmbH 2005 + * + * Author: Arnd Bergmann + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2, or (at your option) + * any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. + */ + +#define DEBUG 1 + +#include +#include +#include +#include +#include +#include +#include + +#include +#include +#include +#include +#include + +#include "interrupt.h" + +static int __spu_trap_invalid_dma(struct spu *spu) +{ + pr_debug("%s\n", __FUNCTION__); + force_sig(SIGBUS, /* info, */ current); + return 0; +} + +static int __spu_trap_dma_align(struct spu *spu) +{ + pr_debug("%s\n", __FUNCTION__); + force_sig(SIGBUS, /* info, */ current); + return 0; +} + +static int __spu_trap_error(struct spu *spu) +{ + pr_debug("%s\n", __FUNCTION__); + force_sig(SIGILL, /* info, */ current); + return 0; +} + +static void spu_restart_dma(struct spu *spu) +{ + struct spu_priv2 __iomem *priv2 = spu->priv2; + out_be64(&priv2->mfc_control_RW, MFC_CNTL_RESTART_DMA_COMMAND); +} + +static int __spu_trap_data_seg(struct spu *spu, unsigned long ea) +{ + struct spu_priv2 __iomem *priv2; + struct mm_struct *mm; + + pr_debug("%s\n", __FUNCTION__); + + if (REGION_ID(ea) != USER_REGION_ID) { + pr_debug("invalid region access at %016lx\n", ea); + return 1; + } + + priv2 = spu->priv2; + mm = spu->mm; + + if (spu->slb_replace >= 8) + spu->slb_replace = 0; + + out_be64(&priv2->slb_index_W, spu->slb_replace); + out_be64(&priv2->slb_vsid_RW, + (get_vsid(mm->context.id, ea) << SLB_VSID_SHIFT) + | SLB_VSID_USER); + out_be64(&priv2->slb_esid_RW, (ea & ESID_MASK) | SLB_ESID_V); + + spu_restart_dma(spu); + + pr_debug("set slb %d context %lx, ea %016lx, vsid %016lx, esid %016lx\n", + spu->slb_replace, mm->context.id, ea, + (get_vsid(mm->context.id, ea) << SLB_VSID_SHIFT)| SLB_VSID_USER, + (ea & ESID_MASK) | SLB_ESID_V); + return 0; +} + +static int __spu_trap_data_map(struct spu *spu, unsigned long ea) +{ + unsigned long dsisr; + struct spu_priv1 __iomem *priv1; + + pr_debug("%s\n", __FUNCTION__); + priv1 = spu->priv1; + dsisr = in_be64(&priv1->mfc_dsisr_RW); + + wake_up(&spu->stop_wq); + + return 0; +} + +static int __spu_trap_mailbox(struct spu *spu) +{ + wake_up_all(&spu->ibox_wq); + kill_fasync(&spu->ibox_fasync, SIGIO, POLLIN); + + /* atomically disable SPU mailbox interrupts */ + spin_lock(&spu->register_lock); + out_be64(&spu->priv1->int_mask_class2_RW, + in_be64(&spu->priv1->int_mask_class2_RW) & ~0x1); + spin_unlock(&spu->register_lock); + return 0; +} + +static int __spu_trap_stop(struct spu *spu) +{ + pr_debug("%s\n", __FUNCTION__); + spu->stop_code = in_be32(&spu->problem->spu_status_R); + wake_up(&spu->stop_wq); + return 0; +} + +static int __spu_trap_halt(struct spu *spu) +{ + pr_debug("%s\n", __FUNCTION__); + spu->stop_code = in_be32(&spu->problem->spu_status_R); + wake_up(&spu->stop_wq); + return 0; +} + +static int __spu_trap_tag_group(struct spu *spu) +{ + pr_debug("%s\n", __FUNCTION__); + /* wake_up(&spu->dma_wq); */ + return 0; +} + +static int __spu_trap_spubox(struct spu *spu) +{ + wake_up_all(&spu->wbox_wq); + kill_fasync(&spu->wbox_fasync, SIGIO, POLLOUT); + + /* atomically disable SPU mailbox interrupts */ + spin_lock(&spu->register_lock); + out_be64(&spu->priv1->int_mask_class2_RW, + in_be64(&spu->priv1->int_mask_class2_RW) & ~0x10); + spin_unlock(&spu->register_lock); + return 0; +} + +static irqreturn_t +spu_irq_class_0(int irq, void *data, struct pt_regs *regs) +{ + struct spu *spu; + + spu = data; + spu->class_0_pending = 1; + wake_up(&spu->stop_wq); + + return IRQ_HANDLED; +} + +static int +spu_irq_class_0_bottom(struct spu *spu) +{ + unsigned long stat; + + spu->class_0_pending = 0; + + stat = in_be64(&spu->priv1->int_stat_class0_RW); + + if (stat & 1) /* invalid MFC DMA */ + __spu_trap_invalid_dma(spu); + + if (stat & 2) /* invalid DMA alignment */ + __spu_trap_dma_align(spu); + + if (stat & 4) /* error on SPU */ + __spu_trap_error(spu); + + out_be64(&spu->priv1->int_stat_class0_RW, stat); + return 0; +} + +static irqreturn_t +spu_irq_class_1(int irq, void *data, struct pt_regs *regs) +{ + struct spu *spu; + unsigned long stat, dar; + + spu = data; + stat = in_be64(&spu->priv1->int_stat_class1_RW); + dar = in_be64(&spu->priv1->mfc_dar_RW); + + if (stat & 1) /* segment fault */ + __spu_trap_data_seg(spu, dar); + + if (stat & 2) { /* mapping fault */ + __spu_trap_data_map(spu, dar); + } + + if (stat & 4) /* ls compare & suspend on get */ + ; + + if (stat & 8) /* ls compare & suspend on put */ + ; + + out_be64(&spu->priv1->int_stat_class1_RW, stat); + return stat ? IRQ_HANDLED : IRQ_NONE; +} + +static irqreturn_t +spu_irq_class_2(int irq, void *data, struct pt_regs *regs) +{ + struct spu *spu; + unsigned long stat; + + spu = data; + stat = in_be64(&spu->priv1->int_stat_class2_RW); + + pr_debug("class 2 interrupt %d, %lx, %lx\n", irq, stat, + in_be64(&spu->priv1->int_mask_class2_RW)); + + + if (stat & 1) /* PPC core mailbox */ + __spu_trap_mailbox(spu); + + if (stat & 2) /* SPU stop-and-signal */ + __spu_trap_stop(spu); + + if (stat & 4) /* SPU halted */ + __spu_trap_halt(spu); + + if (stat & 8) /* DMA tag group complete */ + __spu_trap_tag_group(spu); + + if (stat & 0x10) /* SPU mailbox threshold */ + __spu_trap_spubox(spu); + + out_be64(&spu->priv1->int_stat_class2_RW, stat); + return stat ? IRQ_HANDLED : IRQ_NONE; +} + +static int +spu_request_irqs(struct spu *spu) +{ + int ret; + int irq_base; + + irq_base = IIC_NODE_STRIDE * spu->node + IIC_SPE_OFFSET; + + snprintf(spu->irq_c0, sizeof (spu->irq_c0), "spe%02d.0", spu->number); + ret = request_irq(irq_base + spu->isrc, + spu_irq_class_0, 0, spu->irq_c0, spu); + if (ret) + goto out; + out_be64(&spu->priv1->int_mask_class0_RW, 0x7); + + snprintf(spu->irq_c1, sizeof (spu->irq_c1), "spe%02d.1", spu->number); + ret = request_irq(irq_base + IIC_CLASS_STRIDE + spu->isrc, + spu_irq_class_1, 0, spu->irq_c1, spu); + if (ret) + goto out1; + out_be64(&spu->priv1->int_mask_class1_RW, 0x3); + + snprintf(spu->irq_c2, sizeof (spu->irq_c2), "spe%02d.2", spu->number); + ret = request_irq(irq_base + 2*IIC_CLASS_STRIDE + spu->isrc, + spu_irq_class_2, 0, spu->irq_c2, spu); + if (ret) + goto out2; + out_be64(&spu->priv1->int_mask_class2_RW, 0xe); + goto out; + +out2: + free_irq(irq_base + IIC_CLASS_STRIDE + spu->isrc, spu); +out1: + free_irq(irq_base + spu->isrc, spu); +out: + return ret; +} + +static void +spu_free_irqs(struct spu *spu) +{ + int irq_base; + + irq_base = IIC_NODE_STRIDE * spu->node + IIC_SPE_OFFSET; + + free_irq(irq_base + spu->isrc, spu); + free_irq(irq_base + IIC_CLASS_STRIDE + spu->isrc, spu); + free_irq(irq_base + 2*IIC_CLASS_STRIDE + spu->isrc, spu); +} + +static LIST_HEAD(spu_list); +static DECLARE_MUTEX(spu_mutex); + +static void spu_init_channels(struct spu *spu) +{ + static const struct { + unsigned channel; + unsigned count; + } zero_list[] = { + { 0x00, 1, }, { 0x01, 1, }, { 0x03, 1, }, { 0x04, 1, }, + { 0x18, 1, }, { 0x19, 1, }, { 0x1b, 1, }, { 0x1d, 1, }, + }, count_list[] = { + { 0x00, 0, }, { 0x03, 0, }, { 0x04, 0, }, { 0x15, 16, }, + { 0x17, 1, }, { 0x18, 0, }, { 0x19, 0, }, { 0x1b, 0, }, + { 0x1c, 1, }, { 0x1d, 0, }, { 0x1e, 1, }, + }; + struct spu_priv2 *priv2; + int i; + + priv2 = spu->priv2; + + /* initialize all channel data to zero */ + for (i = 0; i < ARRAY_SIZE(zero_list); i++) { + int count; + + out_be64(&priv2->spu_chnlcntptr_RW, zero_list[i].channel); + for (count = 0; count < zero_list[i].count; count++) + out_be64(&priv2->spu_chnldata_RW, 0); + } + + /* initialize channel counts to meaningful values */ + for (i = 0; i < ARRAY_SIZE(count_list); i++) { + out_be64(&priv2->spu_chnlcntptr_RW, count_list[i].channel); + out_be64(&priv2->spu_chnlcnt_RW, count_list[i].count); + } +} + +static void spu_init_regs(struct spu *spu) +{ + out_be64(&spu->priv1->int_mask_class0_RW, 0x7); + out_be64(&spu->priv1->int_mask_class1_RW, 0x3); + out_be64(&spu->priv1->int_mask_class2_RW, 0xe); +} + +struct spu *spu_alloc(void) +{ + struct spu *spu; + + down(&spu_mutex); + if (!list_empty(&spu_list)) { + spu = list_entry(spu_list.next, struct spu, list); + list_del_init(&spu->list); + pr_debug("Got SPU %x %d\n", spu->isrc, spu->number); + } else { + pr_debug("No SPU left\n"); + spu = NULL; + } + up(&spu_mutex); + + if (spu) { + spu_init_channels(spu); + spu_init_regs(spu); + } + + return spu; +} +EXPORT_SYMBOL(spu_alloc); + +void spu_free(struct spu *spu) +{ + down(&spu_mutex); + spu->ibox_fasync = NULL; + spu->wbox_fasync = NULL; + list_add_tail(&spu->list, &spu_list); + up(&spu_mutex); +} +EXPORT_SYMBOL(spu_free); + +extern int hash_page(unsigned long ea, unsigned long access, unsigned long trap); //XXX +static int spu_handle_mm_fault(struct spu *spu) +{ + struct spu_priv1 __iomem *priv1; + struct mm_struct *mm = spu->mm; + struct vm_area_struct *vma; + u64 ea, dsisr, is_write; + int ret; + + priv1 = spu->priv1; + ea = in_be64(&priv1->mfc_dar_RW); + dsisr = in_be64(&priv1->mfc_dsisr_RW); +#if 0 + if (!IS_VALID_EA(ea)) { + return -EFAULT; + } +#endif /* XXX */ + if (mm == NULL) { + return -EFAULT; + } + if (mm->pgd == NULL) { + return -EFAULT; + } + + down_read(&mm->mmap_sem); + vma = find_vma(mm, ea); + if (!vma) + goto bad_area; + if (vma->vm_start <= ea) + goto good_area; + if (!(vma->vm_flags & VM_GROWSDOWN)) + goto bad_area; +#if 0 + if (expand_stack(vma, ea)) + goto bad_area; +#endif /* XXX */ +good_area: + is_write = dsisr & MFC_DSISR_ACCESS_PUT; + if (is_write) { + if (!(vma->vm_flags & VM_WRITE)) + goto bad_area; + } else { + if (dsisr & MFC_DSISR_ACCESS_DENIED) + goto bad_area; + if (!(vma->vm_flags & (VM_READ | VM_EXEC))) + goto bad_area; + } + ret = 0; + switch (handle_mm_fault(mm, vma, ea, is_write)) { + case VM_FAULT_MINOR: + current->min_flt++; + break; + case VM_FAULT_MAJOR: + current->maj_flt++; + break; + case VM_FAULT_SIGBUS: + ret = -EFAULT; + goto bad_area; + case VM_FAULT_OOM: + ret = -ENOMEM; + goto bad_area; + default: + BUG(); + } + up_read(&mm->mmap_sem); + return ret; + +bad_area: + up_read(&mm->mmap_sem); + return -EFAULT; +} + +static int spu_handle_pte_fault(struct spu *spu) +{ + struct spu_priv1 __iomem *priv1; + u64 ea, dsisr, access, error = 0UL; + int ret = 0; + + priv1 = spu->priv1; + ea = in_be64(&priv1->mfc_dar_RW); + dsisr = in_be64(&priv1->mfc_dsisr_RW); + access = (_PAGE_PRESENT | _PAGE_USER); + if (dsisr & MFC_DSISR_PTE_NOT_FOUND) { + if (hash_page(ea, access, 0x300) != 0) + error |= CLASS1_ENABLE_STORAGE_FAULT_INTR; + } + if ((error & CLASS1_ENABLE_STORAGE_FAULT_INTR) || + (dsisr & MFC_DSISR_ACCESS_DENIED)) { + if ((ret = spu_handle_mm_fault(spu)) != 0) + error |= CLASS1_ENABLE_STORAGE_FAULT_INTR; + else + error &= ~CLASS1_ENABLE_STORAGE_FAULT_INTR; + } + if (!error) + spu_restart_dma(spu); + + return ret; +} + +int spu_run(struct spu *spu) +{ + struct spu_problem __iomem *prob; + struct spu_priv1 __iomem *priv1; + struct spu_priv2 __iomem *priv2; + unsigned long status; + int ret; + + prob = spu->problem; + priv1 = spu->priv1; + priv2 = spu->priv2; + + /* Let SPU run. */ + spu->mm = current->mm; + eieio(); + out_be32(&prob->spu_runcntl_RW, SPU_RUNCNTL_RUNNABLE); + + do { + ret = wait_event_interruptible(spu->stop_wq, + (!((status = in_be32(&prob->spu_status_R)) & 0x1)) + || (in_be64(&priv1->mfc_dsisr_RW) & MFC_DSISR_PTE_NOT_FOUND) + || spu->class_0_pending); + + if (status & SPU_STATUS_STOPPED_BY_STOP) + ret = -EAGAIN; + else if (status & SPU_STATUS_STOPPED_BY_HALT) + ret = -EIO; + else if (in_be64(&priv1->mfc_dsisr_RW) & MFC_DSISR_PTE_NOT_FOUND) + ret = spu_handle_pte_fault(spu); + + if (spu->class_0_pending) + spu_irq_class_0_bottom(spu); + + if (!ret && signal_pending(current)) + ret = -ERESTARTSYS; + + } while (!ret); + + /* Ensure SPU is stopped. */ + out_be32(&prob->spu_runcntl_RW, SPU_RUNCNTL_STOP); + eieio(); + while (in_be32(&prob->spu_status_R) & SPU_STATUS_RUNNING) + cpu_relax(); + + out_be64(&priv2->slb_invalidate_all_W, 0); + out_be64(&priv1->tlb_invalidate_entry_W, 0UL); + eieio(); + + spu->mm = NULL; + + /* Check for SPU breakpoint. */ + if (unlikely(current->ptrace & PT_PTRACED)) { + status = in_be32(&prob->spu_status_R); + + if ((status & SPU_STATUS_STOPPED_BY_STOP) + && status >> SPU_STOP_STATUS_SHIFT == 0x3fff) { + force_sig(SIGTRAP, current); + ret = -ERESTARTSYS; + } + } + + return ret; +} +EXPORT_SYMBOL(spu_run); + +static void __iomem * __init map_spe_prop(struct device_node *n, + const char *name) +{ + struct address_prop { + unsigned long address; + unsigned int len; + } __attribute__((packed)) *prop; + + void *p; + int proplen; + + p = get_property(n, name, &proplen); + if (proplen != sizeof (struct address_prop)) + return NULL; + + prop = p; + + return ioremap(prop->address, prop->len); +} + +static void spu_unmap(struct spu *spu) +{ + iounmap(spu->priv2); + iounmap(spu->priv1); + iounmap(spu->problem); + iounmap((u8 __iomem *)spu->local_store); +} + +static int __init spu_map_device(struct spu *spu, struct device_node *spe) +{ + char *prop; + int ret; + + ret = -ENODEV; + prop = get_property(spe, "isrc", NULL); + if (!prop) + goto out; + spu->isrc = *(unsigned int *)prop; + + spu->name = get_property(spe, "name", NULL); + if (!spu->name) + goto out; + + prop = get_property(spe, "local-store", NULL); + if (!prop) + goto out; + spu->local_store_phys = *(unsigned long *)prop; + + /* we use local store as ram, not io memory */ + spu->local_store = (void __force *)map_spe_prop(spe, "local-store"); + if (!spu->local_store) + goto out; + + spu->problem= map_spe_prop(spe, "problem"); + if (!spu->problem) + goto out_unmap; + + spu->priv1= map_spe_prop(spe, "priv1"); + if (!spu->priv1) + goto out_unmap; + + spu->priv2= map_spe_prop(spe, "priv2"); + if (!spu->priv2) + goto out_unmap; + ret = 0; + goto out; + +out_unmap: + spu_unmap(spu); +out: + return ret; +} + +static int __init find_spu_node_id(struct device_node *spe) +{ + unsigned int *id; + struct device_node *cpu; + + cpu = spe->parent->parent; + id = (unsigned int *)get_property(cpu, "node-id", NULL); + + return id ? *id : 0; +} + +static int __init create_spu(struct device_node *spe) +{ + struct spu *spu; + int ret; + static int number; + + ret = -ENOMEM; + spu = kmalloc(sizeof (*spu), GFP_KERNEL); + if (!spu) + goto out; + + ret = spu_map_device(spu, spe); + if (ret) + goto out_free; + + spu->node = find_spu_node_id(spe); + spu->stop_code = 0; + spu->slb_replace = 0; + spu->mm = NULL; + spu->class_0_pending = 0; + spin_lock_init(&spu->register_lock); + + out_be64(&spu->priv1->mfc_sdr_RW, mfspr(SPRN_SDR1)); + out_be64(&spu->priv1->mfc_sr1_RW, 0x33); + + init_waitqueue_head(&spu->stop_wq); + init_waitqueue_head(&spu->wbox_wq); + init_waitqueue_head(&spu->ibox_wq); + + spu->ibox_fasync = NULL; + spu->wbox_fasync = NULL; + + down(&spu_mutex); + spu->number = number++; + ret = spu_request_irqs(spu); + if (ret) + goto out_unmap; + + list_add(&spu->list, &spu_list); + up(&spu_mutex); + + pr_debug(KERN_DEBUG "Using SPE %s %02x %p %p %p %p %d\n", + spu->name, spu->isrc, spu->local_store, + spu->problem, spu->priv1, spu->priv2, spu->number); + goto out; + +out_unmap: + up(&spu_mutex); + spu_unmap(spu); +out_free: + kfree(spu); +out: + return ret; +} + +static void destroy_spu(struct spu *spu) +{ + list_del_init(&spu->list); + + spu_free_irqs(spu); + spu_unmap(spu); + kfree(spu); +} + +static void cleanup_spu_base(void) +{ + struct spu *spu, *tmp; + down(&spu_mutex); + list_for_each_entry_safe(spu, tmp, &spu_list, list) + destroy_spu(spu); + up(&spu_mutex); +} +module_exit(cleanup_spu_base); + +static int __init init_spu_base(void) +{ + struct device_node *node; + int ret; + + ret = -ENODEV; + for (node = of_find_node_by_type(NULL, "spe"); + node; node = of_find_node_by_type(node, "spe")) { + ret = create_spu(node); + if (ret) { + printk(KERN_WARNING "%s: Error initializing %s\n", + __FUNCTION__, node->name); + cleanup_spu_base(); + break; + } + } + /* in some old firmware versions, the spe is called 'spc', so we + look for that as well */ + for (node = of_find_node_by_type(NULL, "spc"); + node; node = of_find_node_by_type(node, "spc")) { + ret = create_spu(node); + if (ret) { + printk(KERN_WARNING "%s: Error initializing %s\n", + __FUNCTION__, node->name); + cleanup_spu_base(); + break; + } + } + return ret; +} +module_init(init_spu_base); + +MODULE_LICENSE("GPL"); +MODULE_AUTHOR("Arnd Bergmann "); diff --git a/arch/powerpc/platforms/cell/spu_syscalls.c b/arch/powerpc/platforms/cell/spu_syscalls.c new file mode 100644 index 000000000000..43e0b187ffde --- /dev/null +++ b/arch/powerpc/platforms/cell/spu_syscalls.c @@ -0,0 +1,86 @@ +/* + * SPU file system -- system call stubs + * + * (C) Copyright IBM Deutschland Entwicklung GmbH 2005 + * + * Author: Arnd Bergmann + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2, or (at your option) + * any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. + */ +#include +#include +#include + +#include + +struct spufs_calls spufs_calls = { + .owner = NULL, +}; + +/* These stub syscalls are needed to have the actual implementation + * within a loadable module. When spufs is built into the kernel, + * this file is not used and the syscalls directly enter the fs code */ + +asmlinkage long sys_spu_create(const char __user *name, + unsigned int flags, mode_t mode) +{ + long ret; + + ret = -ENOSYS; + if (try_module_get(spufs_calls.owner)) { + ret = spufs_calls.create_thread(name, flags, mode); + module_put(spufs_calls.owner); + } + return ret; +} + +asmlinkage long sys_spu_run(int fd, __u32 __user *unpc, __u32 __user *ustatus) +{ + long ret; + struct file *filp; + int fput_needed; + + ret = -ENOSYS; + if (try_module_get(spufs_calls.owner)) { + ret = -EBADF; + filp = fget_light(fd, &fput_needed); + if (filp) { + ret = spufs_calls.spu_run(filp, unpc, ustatus); + fput_light(filp, fput_needed); + } + module_put(spufs_calls.owner); + } + return ret; +} + +int register_spu_syscalls(struct spufs_calls *calls) +{ + if (spufs_calls.owner) + return -EBUSY; + + spufs_calls.create_thread = calls->create_thread; + spufs_calls.spu_run = calls->spu_run; + smp_mb(); + spufs_calls.owner = calls->owner; + return 0; +} +EXPORT_SYMBOL_GPL(register_spu_syscalls); + +void unregister_spu_syscalls(struct spufs_calls *calls) +{ + BUG_ON(spufs_calls.owner != calls->owner); + spufs_calls.owner = NULL; +} +EXPORT_SYMBOL_GPL(unregister_spu_syscalls); diff --git a/arch/powerpc/platforms/cell/spufs/Makefile b/arch/powerpc/platforms/cell/spufs/Makefile new file mode 100644 index 000000000000..6f496e37bcb7 --- /dev/null +++ b/arch/powerpc/platforms/cell/spufs/Makefile @@ -0,0 +1,3 @@ +obj-$(CONFIG_SPU_FS) += spufs.o + +spufs-y += inode.o file.o context.o syscalls.o diff --git a/arch/powerpc/platforms/cell/spufs/context.c b/arch/powerpc/platforms/cell/spufs/context.c new file mode 100644 index 000000000000..a69b85e2778a --- /dev/null +++ b/arch/powerpc/platforms/cell/spufs/context.c @@ -0,0 +1,67 @@ +/* + * SPU file system -- SPU context management + * + * (C) Copyright IBM Deutschland Entwicklung GmbH 2005 + * + * Author: Arnd Bergmann + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2, or (at your option) + * any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. + */ + +#include +#include +#include "spufs.h" + +struct spu_context *alloc_spu_context(void) +{ + struct spu_context *ctx; + ctx = kmalloc(sizeof *ctx, GFP_KERNEL); + if (!ctx) + goto out; + ctx->spu = spu_alloc(); + if (!ctx->spu) + goto out_free; + init_rwsem(&ctx->backing_sema); + spin_lock_init(&ctx->mmio_lock); + kref_init(&ctx->kref); + goto out; +out_free: + kfree(ctx); + ctx = NULL; +out: + return ctx; +} + +void destroy_spu_context(struct kref *kref) +{ + struct spu_context *ctx; + ctx = container_of(kref, struct spu_context, kref); + if (ctx->spu) + spu_free(ctx->spu); + kfree(ctx); +} + +struct spu_context * get_spu_context(struct spu_context *ctx) +{ + kref_get(&ctx->kref); + return ctx; +} + +int put_spu_context(struct spu_context *ctx) +{ + return kref_put(&ctx->kref, &destroy_spu_context); +} + + diff --git a/arch/powerpc/platforms/cell/spufs/file.c b/arch/powerpc/platforms/cell/spufs/file.c new file mode 100644 index 000000000000..c1e643310494 --- /dev/null +++ b/arch/powerpc/platforms/cell/spufs/file.c @@ -0,0 +1,596 @@ +/* + * SPU file system -- file contents + * + * (C) Copyright IBM Deutschland Entwicklung GmbH 2005 + * + * Author: Arnd Bergmann + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2, or (at your option) + * any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. + */ + +#include +#include +#include +#include + +#include +#include +#include +#include + +#include "spufs.h" + +static int +spufs_mem_open(struct inode *inode, struct file *file) +{ + struct spufs_inode_info *i = SPUFS_I(inode); + file->private_data = i->i_ctx; + return 0; +} + +static ssize_t +spufs_mem_read(struct file *file, char __user *buffer, + size_t size, loff_t *pos) +{ + struct spu *spu; + struct spu_context *ctx; + int ret; + + ctx = file->private_data; + spu = ctx->spu; + + down_read(&ctx->backing_sema); + if (spu->number & 0/*1*/) { + ret = generic_file_read(file, buffer, size, pos); + goto out; + } + + ret = simple_read_from_buffer(buffer, size, pos, + spu->local_store, LS_SIZE); +out: + up_read(&ctx->backing_sema); + return ret; +} + +static ssize_t +spufs_mem_write(struct file *file, const char __user *buffer, + size_t size, loff_t *pos) +{ + struct spu_context *ctx = file->private_data; + struct spu *spu = ctx->spu; + + if (spu->number & 0) //1) + return generic_file_write(file, buffer, size, pos); + + size = min_t(ssize_t, LS_SIZE - *pos, size); + if (size <= 0) + return -EFBIG; + *pos += size; + return copy_from_user(spu->local_store + *pos - size, + buffer, size) ? -EFAULT : size; +} + +static int +spufs_mem_mmap(struct file *file, struct vm_area_struct *vma) +{ + struct spu_context *ctx = file->private_data; + struct spu *spu = ctx->spu; + unsigned long pfn; + + if (spu->number & 0) //1) + return generic_file_mmap(file, vma); + + vma->vm_flags |= VM_RESERVED; + vma->vm_page_prot = __pgprot(pgprot_val (vma->vm_page_prot) + | _PAGE_NO_CACHE); + pfn = spu->local_store_phys >> PAGE_SHIFT; + /* + * This will work for actual SPUs, but not for vmalloc memory: + */ + if (remap_pfn_range(vma, vma->vm_start, pfn, + vma->vm_end-vma->vm_start, vma->vm_page_prot)) + return -EAGAIN; + return 0; +} + +static struct file_operations spufs_mem_fops = { + .open = spufs_mem_open, + .read = spufs_mem_read, + .write = spufs_mem_write, + .mmap = spufs_mem_mmap, + .llseek = generic_file_llseek, +}; + +/* generic open function for all pipe-like files */ +static int spufs_pipe_open(struct inode *inode, struct file *file) +{ + struct spufs_inode_info *i = SPUFS_I(inode); + file->private_data = i->i_ctx; + + return nonseekable_open(inode, file); +} + +static ssize_t spufs_mbox_read(struct file *file, char __user *buf, + size_t len, loff_t *pos) +{ + struct spu_context *ctx; + struct spu_problem __iomem *prob; + u32 mbox_stat; + u32 mbox_data; + + if (len < 4) + return -EINVAL; + + ctx = file->private_data; + prob = ctx->spu->problem; + mbox_stat = in_be32(&prob->mb_stat_R); + if (!(mbox_stat & 0x0000ff)) + return -EAGAIN; + + mbox_data = in_be32(&prob->pu_mb_R); + + if (copy_to_user(buf, &mbox_data, sizeof mbox_data)) + return -EFAULT; + + return 4; +} + +static struct file_operations spufs_mbox_fops = { + .open = spufs_pipe_open, + .read = spufs_mbox_read, +}; + +static ssize_t spufs_mbox_stat_read(struct file *file, char __user *buf, + size_t len, loff_t *pos) +{ + struct spu_context *ctx; + u32 mbox_stat; + + if (len < 4) + return -EINVAL; + + ctx = file->private_data; + mbox_stat = in_be32(&ctx->spu->problem->mb_stat_R) & 0xff; + + if (copy_to_user(buf, &mbox_stat, sizeof mbox_stat)) + return -EFAULT; + + return 4; +} + +static struct file_operations spufs_mbox_stat_fops = { + .open = spufs_pipe_open, + .read = spufs_mbox_stat_read, +}; + +/* low-level ibox access function */ +size_t spu_ibox_read(struct spu *spu, u32 *data) +{ + int ret; + + spin_lock_irq(&spu->register_lock); + + if (in_be32(&spu->problem->mb_stat_R) & 0xff0000) { + /* read the first available word */ + *data = in_be64(&spu->priv2->puint_mb_R); + ret = 4; + } else { + /* make sure we get woken up by the interrupt */ + out_be64(&spu->priv1->int_mask_class2_RW, + in_be64(&spu->priv1->int_mask_class2_RW) | 0x1); + ret = 0; + } + + spin_unlock_irq(&spu->register_lock); + return ret; +} +EXPORT_SYMBOL(spu_ibox_read); + +static int spufs_ibox_fasync(int fd, struct file *file, int on) +{ + struct spu_context *ctx; + ctx = file->private_data; + return fasync_helper(fd, file, on, &ctx->spu->ibox_fasync); +} + +static ssize_t spufs_ibox_read(struct file *file, char __user *buf, + size_t len, loff_t *pos) +{ + struct spu_context *ctx; + u32 ibox_data; + ssize_t ret; + + if (len < 4) + return -EINVAL; + + ctx = file->private_data; + + ret = 0; + if (file->f_flags & O_NONBLOCK) { + if (!spu_ibox_read(ctx->spu, &ibox_data)) + ret = -EAGAIN; + } else { + ret = wait_event_interruptible(ctx->spu->ibox_wq, + spu_ibox_read(ctx->spu, &ibox_data)); + } + + if (ret) + return ret; + + ret = 4; + if (copy_to_user(buf, &ibox_data, sizeof ibox_data)) + ret = -EFAULT; + + return ret; +} + +static unsigned int spufs_ibox_poll(struct file *file, poll_table *wait) +{ + struct spu_context *ctx; + struct spu_problem __iomem *prob; + u32 mbox_stat; + unsigned int mask; + + ctx = file->private_data; + prob = ctx->spu->problem; + mbox_stat = in_be32(&prob->mb_stat_R); + + poll_wait(file, &ctx->spu->ibox_wq, wait); + + mask = 0; + if (mbox_stat & 0xff0000) + mask |= POLLIN | POLLRDNORM; + + return mask; +} + +static struct file_operations spufs_ibox_fops = { + .open = spufs_pipe_open, + .read = spufs_ibox_read, + .poll = spufs_ibox_poll, + .fasync = spufs_ibox_fasync, +}; + +static ssize_t spufs_ibox_stat_read(struct file *file, char __user *buf, + size_t len, loff_t *pos) +{ + struct spu_context *ctx; + u32 ibox_stat; + + if (len < 4) + return -EINVAL; + + ctx = file->private_data; + ibox_stat = (in_be32(&ctx->spu->problem->mb_stat_R) >> 16) & 0xff; + + if (copy_to_user(buf, &ibox_stat, sizeof ibox_stat)) + return -EFAULT; + + return 4; +} + +static struct file_operations spufs_ibox_stat_fops = { + .open = spufs_pipe_open, + .read = spufs_ibox_stat_read, +}; + +/* low-level mailbox write */ +size_t spu_wbox_write(struct spu *spu, u32 data) +{ + int ret; + + spin_lock_irq(&spu->register_lock); + + if (in_be32(&spu->problem->mb_stat_R) & 0x00ff00) { + /* we have space to write wbox_data to */ + out_be32(&spu->problem->spu_mb_W, data); + ret = 4; + } else { + /* make sure we get woken up by the interrupt when space + becomes available */ + out_be64(&spu->priv1->int_mask_class2_RW, + in_be64(&spu->priv1->int_mask_class2_RW) | 0x10); + ret = 0; + } + + spin_unlock_irq(&spu->register_lock); + return ret; +} +EXPORT_SYMBOL(spu_wbox_write); + +static int spufs_wbox_fasync(int fd, struct file *file, int on) +{ + struct spu_context *ctx; + ctx = file->private_data; + return fasync_helper(fd, file, on, &ctx->spu->wbox_fasync); +} + +static ssize_t spufs_wbox_write(struct file *file, const char __user *buf, + size_t len, loff_t *pos) +{ + struct spu_context *ctx; + u32 wbox_data; + int ret; + + if (len < 4) + return -EINVAL; + + ctx = file->private_data; + + if (copy_from_user(&wbox_data, buf, sizeof wbox_data)) + return -EFAULT; + + ret = 0; + if (file->f_flags & O_NONBLOCK) { + if (!spu_wbox_write(ctx->spu, wbox_data)) + ret = -EAGAIN; + } else { + ret = wait_event_interruptible(ctx->spu->wbox_wq, + spu_wbox_write(ctx->spu, wbox_data)); + } + + return ret ? ret : sizeof wbox_data; +} + +static unsigned int spufs_wbox_poll(struct file *file, poll_table *wait) +{ + struct spu_context *ctx; + struct spu_problem __iomem *prob; + u32 mbox_stat; + unsigned int mask; + + ctx = file->private_data; + prob = ctx->spu->problem; + mbox_stat = in_be32(&prob->mb_stat_R); + + poll_wait(file, &ctx->spu->wbox_wq, wait); + + mask = 0; + if (mbox_stat & 0x00ff00) + mask = POLLOUT | POLLWRNORM; + + return mask; +} + +static struct file_operations spufs_wbox_fops = { + .open = spufs_pipe_open, + .write = spufs_wbox_write, + .poll = spufs_wbox_poll, + .fasync = spufs_wbox_fasync, +}; + +static ssize_t spufs_wbox_stat_read(struct file *file, char __user *buf, + size_t len, loff_t *pos) +{ + struct spu_context *ctx; + u32 wbox_stat; + + if (len < 4) + return -EINVAL; + + ctx = file->private_data; + wbox_stat = (in_be32(&ctx->spu->problem->mb_stat_R) >> 8) & 0xff; + + if (copy_to_user(buf, &wbox_stat, sizeof wbox_stat)) + return -EFAULT; + + return 4; +} + +static struct file_operations spufs_wbox_stat_fops = { + .open = spufs_pipe_open, + .read = spufs_wbox_stat_read, +}; + +long spufs_run_spu(struct file *file, struct spu_context *ctx, + u32 *npc, u32 *status) +{ + struct spu_problem __iomem *prob; + int ret; + + if (file->f_flags & O_NONBLOCK) { + ret = -EAGAIN; + if (!down_write_trylock(&ctx->backing_sema)) + goto out; + } else { + down_write(&ctx->backing_sema); + } + + prob = ctx->spu->problem; + out_be32(&prob->spu_npc_RW, *npc); + + ret = spu_run(ctx->spu); + + *status = in_be32(&prob->spu_status_R); + *npc = in_be32(&prob->spu_npc_RW); + + up_write(&ctx->backing_sema); + +out: + return ret; +} + +static ssize_t spufs_signal1_read(struct file *file, char __user *buf, + size_t len, loff_t *pos) +{ + struct spu_context *ctx; + struct spu_problem *prob; + u32 data; + + ctx = file->private_data; + prob = ctx->spu->problem; + + if (len < 4) + return -EINVAL; + + data = in_be32(&prob->signal_notify1); + if (copy_to_user(buf, &data, 4)) + return -EFAULT; + + return 4; +} + +static ssize_t spufs_signal1_write(struct file *file, const char __user *buf, + size_t len, loff_t *pos) +{ + struct spu_context *ctx; + struct spu_problem *prob; + u32 data; + + ctx = file->private_data; + prob = ctx->spu->problem; + + if (len < 4) + return -EINVAL; + + if (copy_from_user(&data, buf, 4)) + return -EFAULT; + + out_be32(&prob->signal_notify1, data); + + return 4; +} + +static struct file_operations spufs_signal1_fops = { + .open = spufs_pipe_open, + .read = spufs_signal1_read, + .write = spufs_signal1_write, +}; + +static ssize_t spufs_signal2_read(struct file *file, char __user *buf, + size_t len, loff_t *pos) +{ + struct spu_context *ctx; + struct spu_problem *prob; + u32 data; + + ctx = file->private_data; + prob = ctx->spu->problem; + + if (len < 4) + return -EINVAL; + + data = in_be32(&prob->signal_notify2); + if (copy_to_user(buf, &data, 4)) + return -EFAULT; + + return 4; +} + +static ssize_t spufs_signal2_write(struct file *file, const char __user *buf, + size_t len, loff_t *pos) +{ + struct spu_context *ctx; + struct spu_problem *prob; + u32 data; + + ctx = file->private_data; + prob = ctx->spu->problem; + + if (len < 4) + return -EINVAL; + + if (copy_from_user(&data, buf, 4)) + return -EFAULT; + + out_be32(&prob->signal_notify2, data); + + return 4; +} + +static struct file_operations spufs_signal2_fops = { + .open = spufs_pipe_open, + .read = spufs_signal2_read, + .write = spufs_signal2_write, +}; + +static void spufs_signal1_type_set(void *data, u64 val) +{ + struct spu_context *ctx = data; + struct spu_priv2 *priv2 = ctx->spu->priv2; + u64 tmp; + + spin_lock_irq(&ctx->spu->register_lock); + tmp = in_be64(&priv2->spu_cfg_RW); + if (val) + tmp |= 1; + else + tmp &= ~1; + out_be64(&priv2->spu_cfg_RW, tmp); + spin_unlock_irq(&ctx->spu->register_lock); +} + +static u64 spufs_signal1_type_get(void *data) +{ + struct spu_context *ctx = data; + return (in_be64(&ctx->spu->priv2->spu_cfg_RW) & 1) != 0; +} +DEFINE_SIMPLE_ATTRIBUTE(spufs_signal1_type, spufs_signal1_type_get, + spufs_signal1_type_set, "%llu"); + +static void spufs_signal2_type_set(void *data, u64 val) +{ + struct spu_context *ctx = data; + struct spu_priv2 *priv2 = ctx->spu->priv2; + u64 tmp; + + spin_lock_irq(&ctx->spu->register_lock); + tmp = in_be64(&priv2->spu_cfg_RW); + if (val) + tmp |= 2; + else + tmp &= ~2; + out_be64(&priv2->spu_cfg_RW, tmp); + spin_unlock_irq(&ctx->spu->register_lock); +} + +static u64 spufs_signal2_type_get(void *data) +{ + struct spu_context *ctx = data; + return (in_be64(&ctx->spu->priv2->spu_cfg_RW) & 2) != 0; +} +DEFINE_SIMPLE_ATTRIBUTE(spufs_signal2_type, spufs_signal2_type_get, + spufs_signal2_type_set, "%llu"); + +static void spufs_npc_set(void *data, u64 val) +{ + struct spu_context *ctx = data; + out_be32(&ctx->spu->problem->spu_npc_RW, val); +} + +static u64 spufs_npc_get(void *data) +{ + struct spu_context *ctx = data; + u64 ret; + ret = in_be32(&ctx->spu->problem->spu_npc_RW); + return ret; +} +DEFINE_SIMPLE_ATTRIBUTE(spufs_npc_ops, spufs_npc_get, spufs_npc_set, "%llx\n") + +struct tree_descr spufs_dir_contents[] = { + { "mem", &spufs_mem_fops, 0666, }, + { "mbox", &spufs_mbox_fops, 0444, }, + { "ibox", &spufs_ibox_fops, 0444, }, + { "wbox", &spufs_wbox_fops, 0222, }, + { "mbox_stat", &spufs_mbox_stat_fops, 0444, }, + { "ibox_stat", &spufs_ibox_stat_fops, 0444, }, + { "wbox_stat", &spufs_wbox_stat_fops, 0444, }, + { "signal1", &spufs_signal1_fops, 0666, }, + { "signal2", &spufs_signal2_fops, 0666, }, + { "signal1_type", &spufs_signal1_type, 0666, }, + { "signal2_type", &spufs_signal2_type, 0666, }, + { "npc", &spufs_npc_ops, 0666, }, + {}, +}; diff --git a/arch/powerpc/platforms/cell/spufs/inode.c b/arch/powerpc/platforms/cell/spufs/inode.c new file mode 100644 index 000000000000..f7aa0a6b1ce5 --- /dev/null +++ b/arch/powerpc/platforms/cell/spufs/inode.c @@ -0,0 +1,470 @@ +/* + * SPU file system + * + * (C) Copyright IBM Deutschland Entwicklung GmbH 2005 + * + * Author: Arnd Bergmann + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2, or (at your option) + * any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include +#include +#include +#include + +#include "spufs.h" + +static kmem_cache_t *spufs_inode_cache; + +/* Information about the backing dev, same as ramfs */ +#if 0 +static struct backing_dev_info spufs_backing_dev_info = { + .ra_pages = 0, /* No readahead */ + .capabilities = BDI_CAP_NO_ACCT_DIRTY | BDI_CAP_NO_WRITEBACK | + BDI_CAP_MAP_DIRECT | BDI_CAP_MAP_COPY | BDI_CAP_READ_MAP | + BDI_CAP_WRITE_MAP, +}; + +static struct address_space_operations spufs_aops = { + .readpage = simple_readpage, + .prepare_write = simple_prepare_write, + .commit_write = simple_commit_write, +}; +#endif + +/* Inode operations */ + +static struct inode * +spufs_alloc_inode(struct super_block *sb) +{ + struct spufs_inode_info *ei; + + ei = kmem_cache_alloc(spufs_inode_cache, SLAB_KERNEL); + if (!ei) + return NULL; + return &ei->vfs_inode; +} + +static void +spufs_destroy_inode(struct inode *inode) +{ + kmem_cache_free(spufs_inode_cache, SPUFS_I(inode)); +} + +static void +spufs_init_once(void *p, kmem_cache_t * cachep, unsigned long flags) +{ + struct spufs_inode_info *ei = p; + + if ((flags & (SLAB_CTOR_VERIFY|SLAB_CTOR_CONSTRUCTOR)) == + SLAB_CTOR_CONSTRUCTOR) { + inode_init_once(&ei->vfs_inode); + } +} + +static struct inode * +spufs_new_inode(struct super_block *sb, int mode) +{ + struct inode *inode; + + inode = new_inode(sb); + if (!inode) + goto out; + + inode->i_mode = mode; + inode->i_uid = current->fsuid; + inode->i_gid = current->fsgid; + inode->i_blksize = PAGE_CACHE_SIZE; + inode->i_blocks = 0; + inode->i_atime = inode->i_mtime = inode->i_ctime = CURRENT_TIME; +out: + return inode; +} + +static int +spufs_setattr(struct dentry *dentry, struct iattr *attr) +{ + struct inode *inode = dentry->d_inode; + +/* dump_stack(); + pr_debug("ia_size %lld, i_size:%lld\n", attr->ia_size, inode->i_size); +*/ + if ((attr->ia_valid & ATTR_SIZE) && + (attr->ia_size != inode->i_size)) + return -EINVAL; + return inode_setattr(inode, attr); +} + + +static int +spufs_new_file(struct super_block *sb, struct dentry *dentry, + struct file_operations *fops, int mode, + struct spu_context *ctx) +{ + static struct inode_operations spufs_file_iops = { + .getattr = simple_getattr, + .setattr = spufs_setattr, + .unlink = simple_unlink, + }; + struct inode *inode; + int ret; + + ret = -ENOSPC; + inode = spufs_new_inode(sb, S_IFREG | mode); + if (!inode) + goto out; + + ret = 0; + inode->i_op = &spufs_file_iops; + inode->i_fop = fops; + inode->u.generic_ip = SPUFS_I(inode)->i_ctx = get_spu_context(ctx); + d_add(dentry, inode); +out: + return ret; +} + +static void +spufs_delete_inode(struct inode *inode) +{ + if (SPUFS_I(inode)->i_ctx) + put_spu_context(SPUFS_I(inode)->i_ctx); + clear_inode(inode); +} + +static int +spufs_fill_dir(struct dentry *dir, struct tree_descr *files, + int mode, struct spu_context *ctx) +{ + struct dentry *dentry; + int ret; + + while (files->name && files->name[0]) { + ret = -ENOMEM; + dentry = d_alloc_name(dir, files->name); + if (!dentry) + goto out; + ret = spufs_new_file(dir->d_sb, dentry, files->ops, + files->mode & mode, ctx); + if (ret) + goto out; + files++; + } + return 0; +out: + // FIXME: remove all files that are left + + return ret; +} + +static int spufs_rmdir(struct inode *root, struct dentry *dir_dentry) +{ + struct dentry *dentry; + int err; + + spin_lock(&dcache_lock); + /* remove all entries */ + err = 0; + list_for_each_entry(dentry, &dir_dentry->d_subdirs, d_child) { + if (d_unhashed(dentry) || !dentry->d_inode) + continue; + atomic_dec(&dentry->d_count); + spin_lock(&dentry->d_lock); + __d_drop(dentry); + spin_unlock(&dentry->d_lock); + } + spin_unlock(&dcache_lock); + if (!err) { + shrink_dcache_parent(dir_dentry); + err = simple_rmdir(root, dir_dentry); + } + return err; +} + +static int spufs_dir_close(struct inode *inode, struct file *file) +{ + struct inode *dir; + struct dentry *dentry; + int ret; + + dentry = file->f_dentry; + dir = dentry->d_parent->d_inode; + down(&dir->i_sem); + ret = spufs_rmdir(dir, file->f_dentry); + WARN_ON(ret); + up(&dir->i_sem); + return dcache_dir_close(inode, file); +} + +struct inode_operations spufs_dir_inode_operations = { + .lookup = simple_lookup, +}; + +struct file_operations spufs_autodelete_dir_operations = { + .open = dcache_dir_open, + .release = spufs_dir_close, + .llseek = dcache_dir_lseek, + .read = generic_read_dir, + .readdir = dcache_readdir, + .fsync = simple_sync_file, +}; + +static int +spufs_mkdir(struct inode *dir, struct dentry *dentry, int mode) +{ + int ret; + struct inode *inode; + struct spu_context *ctx; + + ret = -ENOSPC; + inode = spufs_new_inode(dir->i_sb, mode | S_IFDIR); + if (!inode) + goto out; + + if (dir->i_mode & S_ISGID) { + inode->i_gid = dir->i_gid; + inode->i_mode &= S_ISGID; + } + ctx = alloc_spu_context(); + SPUFS_I(inode)->i_ctx = ctx; + if (!ctx) + goto out_iput; + + inode->i_op = &spufs_dir_inode_operations; + inode->i_fop = &simple_dir_operations; + ret = spufs_fill_dir(dentry, spufs_dir_contents, mode, ctx); + if (ret) + goto out_free_ctx; + + d_instantiate(dentry, inode); + dget(dentry); + dir->i_nlink++; + goto out; + +out_free_ctx: + put_spu_context(ctx); +out_iput: + iput(inode); +out: + return ret; +} + +long +spufs_create_thread(struct nameidata *nd, const char *name, + unsigned int flags, mode_t mode) +{ + struct dentry *dentry; + struct file *filp; + int ret; + + /* need to be at the root of spufs */ + ret = -EINVAL; + if (nd->dentry->d_sb->s_magic != SPUFS_MAGIC || + nd->dentry != nd->dentry->d_sb->s_root) + goto out; + + dentry = lookup_create(nd, 1); + ret = PTR_ERR(dentry); + if (IS_ERR(dentry)) + goto out_dir; + + ret = -EEXIST; + if (dentry->d_inode) + goto out_dput; + + mode &= ~current->fs->umask; + ret = spufs_mkdir(nd->dentry->d_inode, dentry, mode & S_IRWXUGO); + if (ret) + goto out_dput; + + ret = get_unused_fd(); + if (ret < 0) + goto out_dput; + + dentry->d_inode->i_nlink++; + + filp = filp_open(name, O_RDONLY, mode); + if (IS_ERR(filp)) { + // FIXME: remove directory again + put_unused_fd(ret); + ret = PTR_ERR(filp); + } else { + filp->f_op = &spufs_autodelete_dir_operations; + fd_install(ret, filp); + } + +out_dput: + dput(dentry); +out_dir: + up(&nd->dentry->d_inode->i_sem); +out: + return ret; +} + +/* File system initialization */ +enum { + Opt_uid, Opt_gid, Opt_err, +}; + +static match_table_t spufs_tokens = { + { Opt_uid, "uid=%d" }, + { Opt_gid, "gid=%d" }, + { Opt_err, NULL }, +}; + +static int +spufs_parse_options(char *options, struct inode *root) +{ + char *p; + substring_t args[MAX_OPT_ARGS]; + + while ((p = strsep(&options, ",")) != NULL) { + int token, option; + + if (!*p) + continue; + + token = match_token(p, spufs_tokens, args); + switch (token) { + case Opt_uid: + if (match_int(&args[0], &option)) + return 0; + root->i_uid = option; + break; + case Opt_gid: + if (match_int(&args[0], &option)) + return 0; + root->i_gid = option; + break; + default: + return 0; + } + } + return 1; +} + +static int +spufs_create_root(struct super_block *sb, void *data) { + struct inode *inode; + int ret; + + ret = -ENOMEM; + inode = spufs_new_inode(sb, S_IFDIR | 0775); + if (!inode) + goto out; + + inode->i_op = &spufs_dir_inode_operations; + inode->i_fop = &simple_dir_operations; + SPUFS_I(inode)->i_ctx = NULL; + + ret = -EINVAL; + if (!spufs_parse_options(data, inode)) + goto out_iput; + + ret = -ENOMEM; + sb->s_root = d_alloc_root(inode); + if (!sb->s_root) + goto out_iput; + + return 0; +out_iput: + iput(inode); +out: + return ret; +} + +static int +spufs_fill_super(struct super_block *sb, void *data, int silent) +{ + static struct super_operations s_ops = { + .alloc_inode = spufs_alloc_inode, + .destroy_inode = spufs_destroy_inode, + .statfs = simple_statfs, + .delete_inode = spufs_delete_inode, + .drop_inode = generic_delete_inode, + }; + + sb->s_maxbytes = MAX_LFS_FILESIZE; + sb->s_blocksize = PAGE_CACHE_SIZE; + sb->s_blocksize_bits = PAGE_CACHE_SHIFT; + sb->s_magic = SPUFS_MAGIC; + sb->s_op = &s_ops; + + return spufs_create_root(sb, data); +} + +static struct super_block * +spufs_get_sb(struct file_system_type *fstype, int flags, + const char *name, void *data) +{ + return get_sb_single(fstype, flags, data, spufs_fill_super); +} + +static struct file_system_type spufs_type = { + .owner = THIS_MODULE, + .name = "spufs", + .get_sb = spufs_get_sb, + .kill_sb = kill_litter_super, +}; + +static int spufs_init(void) +{ + int ret; + ret = -ENOMEM; + spufs_inode_cache = kmem_cache_create("spufs_inode_cache", + sizeof(struct spufs_inode_info), 0, + SLAB_HWCACHE_ALIGN, spufs_init_once, NULL); + + if (!spufs_inode_cache) + goto out; + ret = register_filesystem(&spufs_type); + if (ret) + goto out_cache; + ret = register_spu_syscalls(&spufs_calls); + if (ret) + goto out_fs; + return 0; +out_fs: + unregister_filesystem(&spufs_type); +out_cache: + kmem_cache_destroy(spufs_inode_cache); +out: + return ret; +} +module_init(spufs_init); + +static void spufs_exit(void) +{ + unregister_spu_syscalls(&spufs_calls); + unregister_filesystem(&spufs_type); + kmem_cache_destroy(spufs_inode_cache); +} +module_exit(spufs_exit); + +MODULE_LICENSE("GPL"); +MODULE_AUTHOR("Arnd Bergmann "); + diff --git a/arch/powerpc/platforms/cell/spufs/spufs.h b/arch/powerpc/platforms/cell/spufs/spufs.h new file mode 100644 index 000000000000..b37fe797ea1c --- /dev/null +++ b/arch/powerpc/platforms/cell/spufs/spufs.h @@ -0,0 +1,71 @@ +/* + * SPU file system + * + * (C) Copyright IBM Deutschland Entwicklung GmbH 2005 + * + * Author: Arnd Bergmann + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2, or (at your option) + * any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. + */ +#ifndef SPUFS_H +#define SPUFS_H + +#include +#include +#include +#include + +#include + +/* The magic number for our file system */ +enum { + SPUFS_MAGIC = 0x23c9b64e, +}; + +struct spu_context { + struct spu *spu; /* pointer to a physical SPU */ + struct rw_semaphore backing_sema; /* protects the above */ + spinlock_t mmio_lock; /* protects mmio access */ + + struct kref kref; +}; + +struct spufs_inode_info { + struct spu_context *i_ctx; + struct inode vfs_inode; +}; +#define SPUFS_I(inode) \ + container_of(inode, struct spufs_inode_info, vfs_inode) + +extern struct tree_descr spufs_dir_contents[]; + +/* system call implementation */ +long spufs_run_spu(struct file *file, + struct spu_context *ctx, u32 *npc, u32 *status); +long spufs_create_thread(struct nameidata *nd, const char *name, + unsigned int flags, mode_t mode); + +/* context management */ +struct spu_context * alloc_spu_context(void); +void destroy_spu_context(struct kref *kref); +struct spu_context * get_spu_context(struct spu_context *ctx); +int put_spu_context(struct spu_context *ctx); + +void spu_acquire(struct spu_context *ctx); +void spu_release(struct spu_context *ctx); +void spu_acquire_runnable(struct spu_context *ctx); +void spu_acquire_saved(struct spu_context *ctx); + +#endif diff --git a/arch/powerpc/platforms/cell/spufs/syscalls.c b/arch/powerpc/platforms/cell/spufs/syscalls.c new file mode 100644 index 000000000000..3f71bb5e9d8e --- /dev/null +++ b/arch/powerpc/platforms/cell/spufs/syscalls.c @@ -0,0 +1,106 @@ +#include +#include +#include +#include +#include + +#include + +#include "spufs.h" + +/** + * sys_spu_run - run code loaded into an SPU + * + * @unpc: next program counter for the SPU + * @ustatus: status of the SPU + * + * This system call transfers the control of execution of a + * user space thread to an SPU. It will return when the + * SPU has finished executing or when it hits an error + * condition and it will be interrupted if a signal needs + * to be delivered to a handler in user space. + * + * The next program counter is set to the passed value + * before the SPU starts fetching code and the user space + * pointer gets updated with the new value when returning + * from kernel space. + * + * The status value returned from spu_run reflects the + * value of the spu_status register after the SPU has stopped. + * + */ +long do_spu_run(struct file *filp, __u32 __user *unpc, __u32 __user *ustatus) +{ + long ret; + struct spufs_inode_info *i; + u32 npc, status; + + ret = -EFAULT; + if (get_user(npc, unpc)) + goto out; + + ret = -EINVAL; + if (filp->f_vfsmnt->mnt_sb->s_magic != SPUFS_MAGIC) + goto out; + + i = SPUFS_I(filp->f_dentry->d_inode); + ret = spufs_run_spu(filp, i->i_ctx, &npc, &status); + + if (ret ==-EAGAIN || ret == -EIO) + ret = status; + + if (put_user(npc, unpc)) + ret = -EFAULT; + + if (ustatus && put_user(status, ustatus)) + ret = -EFAULT; +out: + return ret; +} + +#ifndef MODULE +asmlinkage long sys_spu_run(int fd, __u32 __user *unpc, __u32 __user *ustatus) +{ + int fput_needed; + struct file *filp; + long ret; + + ret = -EBADF; + filp = fget_light(fd, &fput_needed); + if (filp) { + ret = do_spu_run(filp, unpc, ustatus); + fput_light(filp, fput_needed); + } + + return ret; +} +#endif + +asmlinkage long sys_spu_create(const char __user *pathname, + unsigned int flags, mode_t mode) +{ + char *tmp; + int ret; + + tmp = getname(pathname); + ret = PTR_ERR(tmp); + if (!IS_ERR(tmp)) { + struct nameidata nd; + + ret = path_lookup(tmp, LOOKUP_PARENT| + LOOKUP_OPEN|LOOKUP_CREATE, &nd); + if (!ret) { + ret = spufs_create_thread(&nd, pathname, flags, mode); + path_release(&nd); + } + putname(tmp); + } + + return ret; +} + +struct spufs_calls spufs_calls = { + .create_thread = sys_spu_create, + .spu_run = do_spu_run, + .owner = THIS_MODULE, +}; diff --git a/arch/ppc/kernel/ppc_ksyms.c b/arch/ppc/kernel/ppc_ksyms.c index 28f1082e5040..95075f99a6d4 100644 --- a/arch/ppc/kernel/ppc_ksyms.c +++ b/arch/ppc/kernel/ppc_ksyms.c @@ -307,7 +307,6 @@ EXPORT_SYMBOL(__res); EXPORT_SYMBOL(next_mmu_context); EXPORT_SYMBOL(set_context); -EXPORT_SYMBOL_GPL(__handle_mm_fault); /* For MOL */ EXPORT_SYMBOL(disarm_decr); #ifdef CONFIG_PPC_STD_MMU extern long mol_trampoline; diff --git a/include/asm-powerpc/spu.h b/include/asm-powerpc/spu.h new file mode 100644 index 000000000000..b036385cd831 --- /dev/null +++ b/include/asm-powerpc/spu.h @@ -0,0 +1,498 @@ +/* + * SPU core / file system interface and HW structures + * + * (C) Copyright IBM Deutschland Entwicklung GmbH 2005 + * + * Author: Arnd Bergmann + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2, or (at your option) + * any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. + */ + +#ifndef _SPU_H +#define _SPU_H +#include +#include +#include + +#define LS_ORDER (6) /* 256 kb */ + +#define LS_SIZE (PAGE_SIZE << LS_ORDER) + +struct spu { + char *name; + unsigned long local_store_phys; + u8 *local_store; + struct spu_problem __iomem *problem; + struct spu_priv1 __iomem *priv1; + struct spu_priv2 __iomem *priv2; + struct list_head list; + int number; + u32 isrc; + u32 node; + struct kref kref; + size_t ls_size; + unsigned int slb_replace; + struct mm_struct *mm; + int class_0_pending; + spinlock_t register_lock; + + u32 stop_code; + wait_queue_head_t stop_wq; + wait_queue_head_t ibox_wq; + wait_queue_head_t wbox_wq; + struct fasync_struct *ibox_fasync; + struct fasync_struct *wbox_fasync; + + char irq_c0[8]; + char irq_c1[8]; + char irq_c2[8]; +}; + +struct spu *spu_alloc(void); +void spu_free(struct spu *spu); +int spu_run(struct spu *spu); + +size_t spu_wbox_write(struct spu *spu, u32 data); +size_t spu_ibox_read(struct spu *spu, u32 *data); + +extern struct spufs_calls { + asmlinkage long (*create_thread)(const char __user *name, + unsigned int flags, mode_t mode); + asmlinkage long (*spu_run)(struct file *filp, __u32 __user *unpc, + __u32 __user *ustatus); + struct module *owner; +} spufs_calls; + +#ifdef CONFIG_SPU_FS_MODULE +int register_spu_syscalls(struct spufs_calls *calls); +void unregister_spu_syscalls(struct spufs_calls *calls); +#else +static inline int register_spu_syscalls(struct spufs_calls *calls) +{ + return 0; +} +static inline void unregister_spu_syscalls(struct spufs_calls *calls) +{ +} +#endif /* MODULE */ + + +/* + * This defines the Local Store, Problem Area and Privlege Area of an SPU. + */ + +union mfc_tag_size_class_cmd { + struct { + u16 mfc_size; + u16 mfc_tag; + u8 pad; + u8 mfc_rclassid; + u16 mfc_cmd; + } u; + struct { + u32 mfc_size_tag32; + u32 mfc_class_cmd32; + } by32; + u64 all64; +}; + +struct mfc_cq_sr { + u64 mfc_cq_data0_RW; + u64 mfc_cq_data1_RW; + u64 mfc_cq_data2_RW; + u64 mfc_cq_data3_RW; +}; + +struct spu_problem { +#define MS_SYNC_PENDING 1L + u64 spc_mssync_RW; /* 0x0000 */ + u8 pad_0x0008_0x3000[0x3000 - 0x0008]; + + /* DMA Area */ + u8 pad_0x3000_0x3004[0x4]; /* 0x3000 */ + u32 mfc_lsa_W; /* 0x3004 */ + u64 mfc_ea_W; /* 0x3008 */ + union mfc_tag_size_class_cmd mfc_union_W; /* 0x3010 */ + u8 pad_0x3018_0x3104[0xec]; /* 0x3018 */ + u32 dma_qstatus_R; /* 0x3104 */ + u8 pad_0x3108_0x3204[0xfc]; /* 0x3108 */ + u32 dma_querytype_RW; /* 0x3204 */ + u8 pad_0x3208_0x321c[0x14]; /* 0x3208 */ + u32 dma_querymask_RW; /* 0x321c */ + u8 pad_0x3220_0x322c[0xc]; /* 0x3220 */ + u32 dma_tagstatus_R; /* 0x322c */ +#define DMA_TAGSTATUS_INTR_ANY 1u +#define DMA_TAGSTATUS_INTR_ALL 2u + u8 pad_0x3230_0x4000[0x4000 - 0x3230]; /* 0x3230 */ + + /* SPU Control Area */ + u8 pad_0x4000_0x4004[0x4]; /* 0x4000 */ + u32 pu_mb_R; /* 0x4004 */ + u8 pad_0x4008_0x400c[0x4]; /* 0x4008 */ + u32 spu_mb_W; /* 0x400c */ + u8 pad_0x4010_0x4014[0x4]; /* 0x4010 */ + u32 mb_stat_R; /* 0x4014 */ + u8 pad_0x4018_0x401c[0x4]; /* 0x4018 */ + u32 spu_runcntl_RW; /* 0x401c */ +#define SPU_RUNCNTL_STOP 0L +#define SPU_RUNCNTL_RUNNABLE 1L + u8 pad_0x4020_0x4024[0x4]; /* 0x4020 */ + u32 spu_status_R; /* 0x4024 */ +#define SPU_STOP_STATUS_SHIFT 16 +#define SPU_STATUS_STOPPED 0x0 +#define SPU_STATUS_RUNNING 0x1 +#define SPU_STATUS_STOPPED_BY_STOP 0x2 +#define SPU_STATUS_STOPPED_BY_HALT 0x4 +#define SPU_STATUS_WAITING_FOR_CHANNEL 0x8 +#define SPU_STATUS_SINGLE_STEP 0x10 +#define SPU_STATUS_INVALID_INSTR 0x20 +#define SPU_STATUS_INVALID_CH 0x40 +#define SPU_STATUS_ISOLATED_STATE 0x80 +#define SPU_STATUS_ISOLATED_LOAD_STAUTUS 0x200 +#define SPU_STATUS_ISOLATED_EXIT_STAUTUS 0x400 + u8 pad_0x4028_0x402c[0x4]; /* 0x4028 */ + u32 spu_spe_R; /* 0x402c */ + u8 pad_0x4030_0x4034[0x4]; /* 0x4030 */ + u32 spu_npc_RW; /* 0x4034 */ + u8 pad_0x4038_0x14000[0x14000 - 0x4038]; /* 0x4038 */ + + /* Signal Notification Area */ + u8 pad_0x14000_0x1400c[0xc]; /* 0x14000 */ + u32 signal_notify1; /* 0x1400c */ + u8 pad_0x14010_0x1c00c[0x7ffc]; /* 0x14010 */ + u32 signal_notify2; /* 0x1c00c */ +} __attribute__ ((aligned(0x20000))); + +/* SPU Privilege 2 State Area */ +struct spu_priv2 { + /* MFC Registers */ + u8 pad_0x0000_0x1100[0x1100 - 0x0000]; /* 0x0000 */ + + /* SLB Management Registers */ + u8 pad_0x1100_0x1108[0x8]; /* 0x1100 */ + u64 slb_index_W; /* 0x1108 */ +#define SLB_INDEX_MASK 0x7L + u64 slb_esid_RW; /* 0x1110 */ + u64 slb_vsid_RW; /* 0x1118 */ +#define SLB_VSID_SUPERVISOR_STATE (0x1ull << 11) +#define SLB_VSID_SUPERVISOR_STATE_MASK (0x1ull << 11) +#define SLB_VSID_PROBLEM_STATE (0x1ull << 10) +#define SLB_VSID_PROBLEM_STATE_MASK (0x1ull << 10) +#define SLB_VSID_EXECUTE_SEGMENT (0x1ull << 9) +#define SLB_VSID_NO_EXECUTE_SEGMENT (0x1ull << 9) +#define SLB_VSID_EXECUTE_SEGMENT_MASK (0x1ull << 9) +#define SLB_VSID_4K_PAGE (0x0 << 8) +#define SLB_VSID_LARGE_PAGE (0x1ull << 8) +#define SLB_VSID_PAGE_SIZE_MASK (0x1ull << 8) +#define SLB_VSID_CLASS_MASK (0x1ull << 7) +#define SLB_VSID_VIRTUAL_PAGE_SIZE_MASK (0x1ull << 6) + u64 slb_invalidate_entry_W; /* 0x1120 */ + u64 slb_invalidate_all_W; /* 0x1128 */ + u8 pad_0x1130_0x2000[0x2000 - 0x1130]; /* 0x1130 */ + + /* Context Save / Restore Area */ + struct mfc_cq_sr spuq[16]; /* 0x2000 */ + struct mfc_cq_sr puq[8]; /* 0x2200 */ + u8 pad_0x2300_0x3000[0x3000 - 0x2300]; /* 0x2300 */ + + /* MFC Control */ + u64 mfc_control_RW; /* 0x3000 */ +#define MFC_CNTL_RESUME_DMA_QUEUE (0ull << 0) +#define MFC_CNTL_SUSPEND_DMA_QUEUE (1ull << 0) +#define MFC_CNTL_SUSPEND_DMA_QUEUE_MASK (1ull << 0) +#define MFC_CNTL_NORMAL_DMA_QUEUE_OPERATION (0ull << 8) +#define MFC_CNTL_SUSPEND_IN_PROGRESS (1ull << 8) +#define MFC_CNTL_SUSPEND_COMPLETE (3ull << 8) +#define MFC_CNTL_SUSPEND_DMA_STATUS_MASK (3ull << 8) +#define MFC_CNTL_DMA_QUEUES_EMPTY (1ull << 14) +#define MFC_CNTL_DMA_QUEUES_EMPTY_MASK (1ull << 14) +#define MFC_CNTL_PURGE_DMA_REQUEST (1ull << 15) +#define MFC_CNTL_PURGE_DMA_IN_PROGRESS (1ull << 24) +#define MFC_CNTL_PURGE_DMA_COMPLETE (3ull << 24) +#define MFC_CNTL_PURGE_DMA_STATUS_MASK (3ull << 24) +#define MFC_CNTL_RESTART_DMA_COMMAND (1ull << 32) +#define MFC_CNTL_DMA_COMMAND_REISSUE_PENDING (1ull << 32) +#define MFC_CNTL_DMA_COMMAND_REISSUE_STATUS_MASK (1ull << 32) +#define MFC_CNTL_MFC_PRIVILEGE_STATE (2ull << 33) +#define MFC_CNTL_MFC_PROBLEM_STATE (3ull << 33) +#define MFC_CNTL_MFC_KEY_PROTECTION_STATE_MASK (3ull << 33) +#define MFC_CNTL_DECREMENTER_HALTED (1ull << 35) +#define MFC_CNTL_DECREMENTER_RUNNING (1ull << 40) +#define MFC_CNTL_DECREMENTER_STATUS_MASK (1ull << 40) + u8 pad_0x3008_0x4000[0x4000 - 0x3008]; /* 0x3008 */ + + /* Interrupt Mailbox */ + u64 puint_mb_R; /* 0x4000 */ + u8 pad_0x4008_0x4040[0x4040 - 0x4008]; /* 0x4008 */ + + /* SPU Control */ + u64 spu_privcntl_RW; /* 0x4040 */ +#define SPU_PRIVCNTL_MODE_NORMAL (0x0ull << 0) +#define SPU_PRIVCNTL_MODE_SINGLE_STEP (0x1ull << 0) +#define SPU_PRIVCNTL_MODE_MASK (0x1ull << 0) +#define SPU_PRIVCNTL_NO_ATTENTION_EVENT (0x0ull << 1) +#define SPU_PRIVCNTL_ATTENTION_EVENT (0x1ull << 1) +#define SPU_PRIVCNTL_ATTENTION_EVENT_MASK (0x1ull << 1) +#define SPU_PRIVCNT_LOAD_REQUEST_NORMAL (0x0ull << 2) +#define SPU_PRIVCNT_LOAD_REQUEST_ENABLE_MASK (0x1ull << 2) + u8 pad_0x4048_0x4058[0x10]; /* 0x4048 */ + u64 spu_lslr_RW; /* 0x4058 */ + u64 spu_chnlcntptr_RW; /* 0x4060 */ + u64 spu_chnlcnt_RW; /* 0x4068 */ + u64 spu_chnldata_RW; /* 0x4070 */ + u64 spu_cfg_RW; /* 0x4078 */ + u8 pad_0x4080_0x5000[0x5000 - 0x4080]; /* 0x4080 */ + + /* PV2_ImplRegs: Implementation-specific privileged-state 2 regs */ + u64 spu_pm_trace_tag_status_RW; /* 0x5000 */ + u64 spu_tag_status_query_RW; /* 0x5008 */ +#define TAG_STATUS_QUERY_CONDITION_BITS (0x3ull << 32) +#define TAG_STATUS_QUERY_MASK_BITS (0xffffffffull) + u64 spu_cmd_buf1_RW; /* 0x5010 */ +#define SPU_COMMAND_BUFFER_1_LSA_BITS (0x7ffffull << 32) +#define SPU_COMMAND_BUFFER_1_EAH_BITS (0xffffffffull) + u64 spu_cmd_buf2_RW; /* 0x5018 */ +#define SPU_COMMAND_BUFFER_2_EAL_BITS ((0xffffffffull) << 32) +#define SPU_COMMAND_BUFFER_2_TS_BITS (0xffffull << 16) +#define SPU_COMMAND_BUFFER_2_TAG_BITS (0x3full) + u64 spu_atomic_status_RW; /* 0x5020 */ +} __attribute__ ((aligned(0x20000))); + +/* SPU Privilege 1 State Area */ +struct spu_priv1 { + /* Control and Configuration Area */ + u64 mfc_sr1_RW; /* 0x000 */ +#define MFC_STATE1_LOCAL_STORAGE_DECODE_MASK 0x01ull +#define MFC_STATE1_BUS_TLBIE_MASK 0x02ull +#define MFC_STATE1_REAL_MODE_OFFSET_ENABLE_MASK 0x04ull +#define MFC_STATE1_PROBLEM_STATE_MASK 0x08ull +#define MFC_STATE1_RELOCATE_MASK 0x10ull +#define MFC_STATE1_MASTER_RUN_CONTROL_MASK 0x20ull + u64 mfc_lpid_RW; /* 0x008 */ + u64 spu_idr_RW; /* 0x010 */ + u64 mfc_vr_RO; /* 0x018 */ +#define MFC_VERSION_BITS (0xffff << 16) +#define MFC_REVISION_BITS (0xffff) +#define MFC_GET_VERSION_BITS(vr) (((vr) & MFC_VERSION_BITS) >> 16) +#define MFC_GET_REVISION_BITS(vr) ((vr) & MFC_REVISION_BITS) + u64 spu_vr_RO; /* 0x020 */ +#define SPU_VERSION_BITS (0xffff << 16) +#define SPU_REVISION_BITS (0xffff) +#define SPU_GET_VERSION_BITS(vr) (vr & SPU_VERSION_BITS) >> 16 +#define SPU_GET_REVISION_BITS(vr) (vr & SPU_REVISION_BITS) + u8 pad_0x28_0x100[0x100 - 0x28]; /* 0x28 */ + + + /* Interrupt Area */ + u64 int_mask_class0_RW; /* 0x100 */ +#define CLASS0_ENABLE_DMA_ALIGNMENT_INTR 0x1L +#define CLASS0_ENABLE_INVALID_DMA_COMMAND_INTR 0x2L +#define CLASS0_ENABLE_SPU_ERROR_INTR 0x4L +#define CLASS0_ENABLE_MFC_FIR_INTR 0x8L + u64 int_mask_class1_RW; /* 0x108 */ +#define CLASS1_ENABLE_SEGMENT_FAULT_INTR 0x1L +#define CLASS1_ENABLE_STORAGE_FAULT_INTR 0x2L +#define CLASS1_ENABLE_LS_COMPARE_SUSPEND_ON_GET_INTR 0x4L +#define CLASS1_ENABLE_LS_COMPARE_SUSPEND_ON_PUT_INTR 0x8L + u64 int_mask_class2_RW; /* 0x110 */ +#define CLASS2_ENABLE_MAILBOX_INTR 0x1L +#define CLASS2_ENABLE_SPU_STOP_INTR 0x2L +#define CLASS2_ENABLE_SPU_HALT_INTR 0x4L +#define CLASS2_ENABLE_SPU_DMA_TAG_GROUP_COMPLETE_INTR 0x8L + u8 pad_0x118_0x140[0x28]; /* 0x118 */ + u64 int_stat_class0_RW; /* 0x140 */ + u64 int_stat_class1_RW; /* 0x148 */ + u64 int_stat_class2_RW; /* 0x150 */ + u8 pad_0x158_0x180[0x28]; /* 0x158 */ + u64 int_route_RW; /* 0x180 */ + + /* Interrupt Routing */ + u8 pad_0x188_0x200[0x200 - 0x188]; /* 0x188 */ + + /* Atomic Unit Control Area */ + u64 mfc_atomic_flush_RW; /* 0x200 */ +#define mfc_atomic_flush_enable 0x1L + u8 pad_0x208_0x280[0x78]; /* 0x208 */ + u64 resource_allocation_groupID_RW; /* 0x280 */ + u64 resource_allocation_enable_RW; /* 0x288 */ + u8 pad_0x290_0x3c8[0x3c8 - 0x290]; /* 0x290 */ + + /* SPU_Cache_ImplRegs: Implementation-dependent cache registers */ + + u64 smf_sbi_signal_sel; /* 0x3c8 */ +#define smf_sbi_mask_lsb 56 +#define smf_sbi_shift (63 - smf_sbi_mask_lsb) +#define smf_sbi_mask (0x301LL << smf_sbi_shift) +#define smf_sbi_bus0_bits (0x001LL << smf_sbi_shift) +#define smf_sbi_bus2_bits (0x100LL << smf_sbi_shift) +#define smf_sbi2_bus0_bits (0x201LL << smf_sbi_shift) +#define smf_sbi2_bus2_bits (0x300LL << smf_sbi_shift) + u64 smf_ato_signal_sel; /* 0x3d0 */ +#define smf_ato_mask_lsb 35 +#define smf_ato_shift (63 - smf_ato_mask_lsb) +#define smf_ato_mask (0x3LL << smf_ato_shift) +#define smf_ato_bus0_bits (0x2LL << smf_ato_shift) +#define smf_ato_bus2_bits (0x1LL << smf_ato_shift) + u8 pad_0x3d8_0x400[0x400 - 0x3d8]; /* 0x3d8 */ + + /* TLB Management Registers */ + u64 mfc_sdr_RW; /* 0x400 */ + u8 pad_0x408_0x500[0xf8]; /* 0x408 */ + u64 tlb_index_hint_RO; /* 0x500 */ + u64 tlb_index_W; /* 0x508 */ + u64 tlb_vpn_RW; /* 0x510 */ + u64 tlb_rpn_RW; /* 0x518 */ + u8 pad_0x520_0x540[0x20]; /* 0x520 */ + u64 tlb_invalidate_entry_W; /* 0x540 */ + u64 tlb_invalidate_all_W; /* 0x548 */ + u8 pad_0x550_0x580[0x580 - 0x550]; /* 0x550 */ + + /* SPU_MMU_ImplRegs: Implementation-dependent MMU registers */ + u64 smm_hid; /* 0x580 */ +#define PAGE_SIZE_MASK 0xf000000000000000ull +#define PAGE_SIZE_16MB_64KB 0x2000000000000000ull + u8 pad_0x588_0x600[0x600 - 0x588]; /* 0x588 */ + + /* MFC Status/Control Area */ + u64 mfc_accr_RW; /* 0x600 */ +#define MFC_ACCR_EA_ACCESS_GET (1 << 0) +#define MFC_ACCR_EA_ACCESS_PUT (1 << 1) +#define MFC_ACCR_LS_ACCESS_GET (1 << 3) +#define MFC_ACCR_LS_ACCESS_PUT (1 << 4) + u8 pad_0x608_0x610[0x8]; /* 0x608 */ + u64 mfc_dsisr_RW; /* 0x610 */ +#define MFC_DSISR_PTE_NOT_FOUND (1 << 30) +#define MFC_DSISR_ACCESS_DENIED (1 << 27) +#define MFC_DSISR_ATOMIC (1 << 26) +#define MFC_DSISR_ACCESS_PUT (1 << 25) +#define MFC_DSISR_ADDR_MATCH (1 << 22) +#define MFC_DSISR_LS (1 << 17) +#define MFC_DSISR_L (1 << 16) +#define MFC_DSISR_ADDRESS_OVERFLOW (1 << 0) + u8 pad_0x618_0x620[0x8]; /* 0x618 */ + u64 mfc_dar_RW; /* 0x620 */ + u8 pad_0x628_0x700[0x700 - 0x628]; /* 0x628 */ + + /* Replacement Management Table (RMT) Area */ + u64 rmt_index_RW; /* 0x700 */ + u8 pad_0x708_0x710[0x8]; /* 0x708 */ + u64 rmt_data1_RW; /* 0x710 */ + u8 pad_0x718_0x800[0x800 - 0x718]; /* 0x718 */ + + /* Control/Configuration Registers */ + u64 mfc_dsir_R; /* 0x800 */ +#define MFC_DSIR_Q (1 << 31) +#define MFC_DSIR_SPU_QUEUE MFC_DSIR_Q + u64 mfc_lsacr_RW; /* 0x808 */ +#define MFC_LSACR_COMPARE_MASK ((~0ull) << 32) +#define MFC_LSACR_COMPARE_ADDR ((~0ull) >> 32) + u64 mfc_lscrr_R; /* 0x810 */ +#define MFC_LSCRR_Q (1 << 31) +#define MFC_LSCRR_SPU_QUEUE MFC_LSCRR_Q +#define MFC_LSCRR_QI_SHIFT 32 +#define MFC_LSCRR_QI_MASK ((~0ull) << MFC_LSCRR_QI_SHIFT) + u8 pad_0x818_0x820[0x8]; /* 0x818 */ + u64 mfc_tclass_id_RW; /* 0x820 */ +#define MFC_TCLASS_ID_ENABLE (1L << 0L) +#define MFC_TCLASS_SLOT2_ENABLE (1L << 5L) +#define MFC_TCLASS_SLOT1_ENABLE (1L << 6L) +#define MFC_TCLASS_SLOT0_ENABLE (1L << 7L) +#define MFC_TCLASS_QUOTA_2_SHIFT 8L +#define MFC_TCLASS_QUOTA_1_SHIFT 16L +#define MFC_TCLASS_QUOTA_0_SHIFT 24L +#define MFC_TCLASS_QUOTA_2_MASK (0x1FL << MFC_TCLASS_QUOTA_2_SHIFT) +#define MFC_TCLASS_QUOTA_1_MASK (0x1FL << MFC_TCLASS_QUOTA_1_SHIFT) +#define MFC_TCLASS_QUOTA_0_MASK (0x1FL << MFC_TCLASS_QUOTA_0_SHIFT) + u8 pad_0x828_0x900[0x900 - 0x828]; /* 0x828 */ + + /* Real Mode Support Registers */ + u64 mfc_rm_boundary; /* 0x900 */ + u8 pad_0x908_0x938[0x30]; /* 0x908 */ + u64 smf_dma_signal_sel; /* 0x938 */ +#define mfc_dma1_mask_lsb 41 +#define mfc_dma1_shift (63 - mfc_dma1_mask_lsb) +#define mfc_dma1_mask (0x3LL << mfc_dma1_shift) +#define mfc_dma1_bits (0x1LL << mfc_dma1_shift) +#define mfc_dma2_mask_lsb 43 +#define mfc_dma2_shift (63 - mfc_dma2_mask_lsb) +#define mfc_dma2_mask (0x3LL << mfc_dma2_shift) +#define mfc_dma2_bits (0x1LL << mfc_dma2_shift) + u8 pad_0x940_0xa38[0xf8]; /* 0x940 */ + u64 smm_signal_sel; /* 0xa38 */ +#define smm_sig_mask_lsb 12 +#define smm_sig_shift (63 - smm_sig_mask_lsb) +#define smm_sig_mask (0x3LL << smm_sig_shift) +#define smm_sig_bus0_bits (0x2LL << smm_sig_shift) +#define smm_sig_bus2_bits (0x1LL << smm_sig_shift) + u8 pad_0xa40_0xc00[0xc00 - 0xa40]; /* 0xa40 */ + + /* DMA Command Error Area */ + u64 mfc_cer_R; /* 0xc00 */ +#define MFC_CER_Q (1 << 31) +#define MFC_CER_SPU_QUEUE MFC_CER_Q + u8 pad_0xc08_0x1000[0x1000 - 0xc08]; /* 0xc08 */ + + /* PV1_ImplRegs: Implementation-dependent privileged-state 1 regs */ + /* DMA Command Error Area */ + u64 spu_ecc_cntl_RW; /* 0x1000 */ +#define SPU_ECC_CNTL_E (1ull << 0ull) +#define SPU_ECC_CNTL_ENABLE SPU_ECC_CNTL_E +#define SPU_ECC_CNTL_DISABLE (~SPU_ECC_CNTL_E & 1L) +#define SPU_ECC_CNTL_S (1ull << 1ull) +#define SPU_ECC_STOP_AFTER_ERROR SPU_ECC_CNTL_S +#define SPU_ECC_CONTINUE_AFTER_ERROR (~SPU_ECC_CNTL_S & 2L) +#define SPU_ECC_CNTL_B (1ull << 2ull) +#define SPU_ECC_BACKGROUND_ENABLE SPU_ECC_CNTL_B +#define SPU_ECC_BACKGROUND_DISABLE (~SPU_ECC_CNTL_B & 4L) +#define SPU_ECC_CNTL_I_SHIFT 3ull +#define SPU_ECC_CNTL_I_MASK (3ull << SPU_ECC_CNTL_I_SHIFT) +#define SPU_ECC_WRITE_ALWAYS (~SPU_ECC_CNTL_I & 12L) +#define SPU_ECC_WRITE_CORRECTABLE (1ull << SPU_ECC_CNTL_I_SHIFT) +#define SPU_ECC_WRITE_UNCORRECTABLE (3ull << SPU_ECC_CNTL_I_SHIFT) +#define SPU_ECC_CNTL_D (1ull << 5ull) +#define SPU_ECC_DETECTION_ENABLE SPU_ECC_CNTL_D +#define SPU_ECC_DETECTION_DISABLE (~SPU_ECC_CNTL_D & 32L) + u64 spu_ecc_stat_RW; /* 0x1008 */ +#define SPU_ECC_CORRECTED_ERROR (1ull << 0ul) +#define SPU_ECC_UNCORRECTED_ERROR (1ull << 1ul) +#define SPU_ECC_SCRUB_COMPLETE (1ull << 2ul) +#define SPU_ECC_SCRUB_IN_PROGRESS (1ull << 3ul) +#define SPU_ECC_INSTRUCTION_ERROR (1ull << 4ul) +#define SPU_ECC_DATA_ERROR (1ull << 5ul) +#define SPU_ECC_DMA_ERROR (1ull << 6ul) +#define SPU_ECC_STATUS_CNT_MASK (256ull << 8) + u64 spu_ecc_addr_RW; /* 0x1010 */ + u64 spu_err_mask_RW; /* 0x1018 */ +#define SPU_ERR_ILLEGAL_INSTR (1ull << 0ul) +#define SPU_ERR_ILLEGAL_CHANNEL (1ull << 1ul) + u8 pad_0x1020_0x1028[0x1028 - 0x1020]; /* 0x1020 */ + + /* SPU Debug-Trace Bus (DTB) Selection Registers */ + u64 spu_trig0_sel; /* 0x1028 */ + u64 spu_trig1_sel; /* 0x1030 */ + u64 spu_trig2_sel; /* 0x1038 */ + u64 spu_trig3_sel; /* 0x1040 */ + u64 spu_trace_sel; /* 0x1048 */ +#define spu_trace_sel_mask 0x1f1fLL +#define spu_trace_sel_bus0_bits 0x1000LL +#define spu_trace_sel_bus2_bits 0x0010LL + u64 spu_event0_sel; /* 0x1050 */ + u64 spu_event1_sel; /* 0x1058 */ + u64 spu_event2_sel; /* 0x1060 */ + u64 spu_event3_sel; /* 0x1068 */ + u64 spu_trace_cntl; /* 0x1070 */ +} __attribute__ ((aligned(0x2000))); + +#endif diff --git a/include/asm-powerpc/unistd.h b/include/asm-powerpc/unistd.h index 0991dfceef1d..9606349855da 100644 --- a/include/asm-powerpc/unistd.h +++ b/include/asm-powerpc/unistd.h @@ -296,6 +296,8 @@ #define __NR_inotify_init 275 #define __NR_inotify_add_watch 276 #define __NR_inotify_rm_watch 277 +#define __NR_spu_run 278 +#define __NR_spu_create 279 #define __NR_syscalls 278 diff --git a/include/linux/syscalls.h b/include/linux/syscalls.h index c7007b1db91d..44fdd48d38e6 100644 --- a/include/linux/syscalls.h +++ b/include/linux/syscalls.h @@ -512,4 +512,9 @@ asmlinkage long sys_ioprio_get(int which, int who); asmlinkage long sys_set_mempolicy(int mode, unsigned long __user *nmask, unsigned long maxnode); +asmlinkage long sys_spu_run(int fd, __u32 __user *unpc, + __u32 __user *ustatus); +asmlinkage long sys_spu_create(const char __user *name, + unsigned int flags, mode_t mode); + #endif diff --git a/kernel/sys_ni.c b/kernel/sys_ni.c index 1ab2370e2efa..d4739a475d23 100644 --- a/kernel/sys_ni.c +++ b/kernel/sys_ni.c @@ -90,3 +90,5 @@ cond_syscall(sys_pciconfig_iobase); cond_syscall(sys32_ipc); cond_syscall(sys32_sysctl); cond_syscall(ppc_rtas); +cond_syscall(sys_spu_run); +cond_syscall(sys_spu_create); diff --git a/mm/memory.c b/mm/memory.c index 7197f9bcd384..3944fec38012 100644 --- a/mm/memory.c +++ b/mm/memory.c @@ -2267,6 +2267,8 @@ int __handle_mm_fault(struct mm_struct *mm, struct vm_area_struct *vma, return handle_pte_fault(mm, vma, address, pte, pmd, write_access); } +EXPORT_SYMBOL_GPL(__handle_mm_fault); + #ifndef __PAGETABLE_PUD_FOLDED /* * Allocate page upper directory. -- cgit v1.2.3-71-gd317 From 1beb6a7d6cbed3ac03500ce9b5b9bb632c512039 Mon Sep 17 00:00:00 2001 From: Benjamin Herrenschmidt Date: Wed, 14 Dec 2005 13:10:10 +1100 Subject: [PATCH] powerpc: Experimental support for new G5 Macs (#2) This adds some very basic support for the new machines, including the Quad G5 (tested), and other new dual core based machines and iMac G5 iSight (untested). This is still experimental ! There is no thermal control yet, there is no proper handing of MSIs, etc.. but it boots, I have all 4 cores up on my machine. Compared to the previous version of this patch, this one adds DART IOMMU support for the U4 chipset and thus should work fine on setups with more than 2Gb of RAM. Signed-off-by: Benjamin Herrenschmidt Signed-off-by: Paul Mackerras --- arch/powerpc/Kconfig | 1 + arch/powerpc/kernel/pci_64.c | 35 ++- arch/powerpc/kernel/prom.c | 26 ++- arch/powerpc/kernel/udbg.c | 2 + arch/powerpc/platforms/maple/setup.c | 4 +- arch/powerpc/platforms/powermac/feature.c | 65 ++++-- arch/powerpc/platforms/powermac/pci.c | 210 +++++++++++++++--- arch/powerpc/platforms/powermac/pic.c | 72 +++--- arch/powerpc/platforms/powermac/setup.c | 13 +- arch/powerpc/platforms/powermac/smp.c | 319 ++++++++++++++------------- arch/powerpc/sysdev/Makefile | 2 +- arch/powerpc/sysdev/dart.h | 41 ++-- arch/powerpc/sysdev/dart_iommu.c | 350 ++++++++++++++++++++++++++++++ arch/powerpc/sysdev/mpic.c | 199 +++++++++++++---- arch/powerpc/sysdev/u3_iommu.c | 327 ---------------------------- drivers/ide/ppc/pmac.c | 2 +- drivers/macintosh/smu.c | 8 +- include/asm-powerpc/iommu.h | 6 +- include/asm-powerpc/mpic.h | 3 +- include/asm-powerpc/pmac_feature.h | 2 + include/linux/pci_regs.h | 1 + 21 files changed, 1059 insertions(+), 629 deletions(-) create mode 100644 arch/powerpc/sysdev/dart_iommu.c delete mode 100644 arch/powerpc/sysdev/u3_iommu.c (limited to 'include/linux') diff --git a/arch/powerpc/Kconfig b/arch/powerpc/Kconfig index 773b880d5577..5692edb3491e 100644 --- a/arch/powerpc/Kconfig +++ b/arch/powerpc/Kconfig @@ -300,6 +300,7 @@ config PPC_PMAC64 bool depends on PPC_PMAC && POWER4 select U3_DART + select MPIC_BROKEN_U3 select GENERIC_TBSYNC default y diff --git a/arch/powerpc/kernel/pci_64.c b/arch/powerpc/kernel/pci_64.c index 523f35087e81..f73a16e9867a 100644 --- a/arch/powerpc/kernel/pci_64.c +++ b/arch/powerpc/kernel/pci_64.c @@ -34,7 +34,7 @@ #ifdef DEBUG #include -#define DBG(fmt...) udbg_printf(fmt) +#define DBG(fmt...) printk(fmt) #else #define DBG(fmt...) #endif @@ -323,6 +323,7 @@ static void pci_parse_of_addrs(struct device_node *node, struct pci_dev *dev) addrs = (u32 *) get_property(node, "assigned-addresses", &proplen); if (!addrs) return; + DBG(" parse addresses (%d bytes) @ %p\n", proplen, addrs); for (; proplen >= 20; proplen -= 20, addrs += 5) { flags = pci_parse_of_flags(addrs[0]); if (!flags) @@ -332,6 +333,9 @@ static void pci_parse_of_addrs(struct device_node *node, struct pci_dev *dev) if (!size) continue; i = addrs[0] & 0xff; + DBG(" base: %llx, size: %llx, i: %x\n", + (unsigned long long)base, (unsigned long long)size, i); + if (PCI_BASE_ADDRESS_0 <= i && i <= PCI_BASE_ADDRESS_5) { res = &dev->resource[(i - PCI_BASE_ADDRESS_0) >> 2]; } else if (i == dev->rom_base_reg) { @@ -362,6 +366,8 @@ struct pci_dev *of_create_pci_dev(struct device_node *node, if (type == NULL) type = ""; + DBG(" create device, devfn: %x, type: %s\n", devfn, type); + memset(dev, 0, sizeof(struct pci_dev)); dev->bus = bus; dev->sysdata = node; @@ -381,6 +387,8 @@ struct pci_dev *of_create_pci_dev(struct device_node *node, dev->bus->number, PCI_SLOT(devfn), PCI_FUNC(devfn)); dev->class = get_int_prop(node, "class-code", 0); + DBG(" class: 0x%x\n", dev->class); + dev->current_state = 4; /* unknown power state */ if (!strcmp(type, "pci")) { @@ -402,6 +410,8 @@ struct pci_dev *of_create_pci_dev(struct device_node *node, pci_parse_of_addrs(node, dev); + DBG(" adding to system ...\n"); + pci_device_add(dev, bus); /* XXX pci_scan_msi_device(dev); */ @@ -418,15 +428,21 @@ void __devinit of_scan_bus(struct device_node *node, int reglen, devfn; struct pci_dev *dev; + DBG("of_scan_bus(%s) bus no %d... \n", node->full_name, bus->number); + while ((child = of_get_next_child(node, child)) != NULL) { + DBG(" * %s\n", child->full_name); reg = (u32 *) get_property(child, "reg", ®len); if (reg == NULL || reglen < 20) continue; devfn = (reg[0] >> 8) & 0xff; + /* create a new pci_dev for this device */ dev = of_create_pci_dev(child, bus, devfn); if (!dev) continue; + DBG("dev header type: %x\n", dev->hdr_type); + if (dev->hdr_type == PCI_HEADER_TYPE_BRIDGE || dev->hdr_type == PCI_HEADER_TYPE_CARDBUS) of_scan_pci_bridge(child, dev); @@ -446,16 +462,18 @@ void __devinit of_scan_pci_bridge(struct device_node *node, unsigned int flags; u64 size; + DBG("of_scan_pci_bridge(%s)\n", node->full_name); + /* parse bus-range property */ busrange = (u32 *) get_property(node, "bus-range", &len); if (busrange == NULL || len != 8) { - printk(KERN_ERR "Can't get bus-range for PCI-PCI bridge %s\n", + printk(KERN_DEBUG "Can't get bus-range for PCI-PCI bridge %s\n", node->full_name); return; } ranges = (u32 *) get_property(node, "ranges", &len); if (ranges == NULL) { - printk(KERN_ERR "Can't get ranges for PCI-PCI bridge %s\n", + printk(KERN_DEBUG "Can't get ranges for PCI-PCI bridge %s\n", node->full_name); return; } @@ -509,10 +527,13 @@ void __devinit of_scan_pci_bridge(struct device_node *node, } sprintf(bus->name, "PCI Bus %04x:%02x", pci_domain_nr(bus), bus->number); + DBG(" bus name: %s\n", bus->name); mode = PCI_PROBE_NORMAL; if (ppc_md.pci_probe_mode) mode = ppc_md.pci_probe_mode(bus); + DBG(" probe mode: %d\n", mode); + if (mode == PCI_PROBE_DEVTREE) of_scan_bus(node, bus); else if (mode == PCI_PROBE_NORMAL) @@ -528,6 +549,8 @@ void __devinit scan_phb(struct pci_controller *hose) int i, mode; struct resource *res; + DBG("Scanning PHB %s\n", node ? node->full_name : ""); + bus = pci_create_bus(NULL, hose->first_busno, hose->ops, node); if (bus == NULL) { printk(KERN_ERR "Failed to create bus for PCI domain %04x\n", @@ -552,8 +575,9 @@ void __devinit scan_phb(struct pci_controller *hose) mode = PCI_PROBE_NORMAL; #ifdef CONFIG_PPC_MULTIPLATFORM - if (ppc_md.pci_probe_mode) + if (node && ppc_md.pci_probe_mode) mode = ppc_md.pci_probe_mode(bus); + DBG(" probe mode: %d\n", mode); if (mode == PCI_PROBE_DEVTREE) { bus->subordinate = hose->last_busno; of_scan_bus(node, bus); @@ -842,8 +866,7 @@ pgprot_t pci_phys_mem_access_prot(struct file *file, * Returns a negative error code on failure, zero on success. */ int pci_mmap_page_range(struct pci_dev *dev, struct vm_area_struct *vma, - enum pci_mmap_state mmap_state, - int write_combine) + enum pci_mmap_state mmap_state, int write_combine) { unsigned long offset = vma->vm_pgoff << PAGE_SHIFT; struct resource *rp; diff --git a/arch/powerpc/kernel/prom.c b/arch/powerpc/kernel/prom.c index 1b97e13657e5..977ee3adaf2d 100644 --- a/arch/powerpc/kernel/prom.c +++ b/arch/powerpc/kernel/prom.c @@ -298,6 +298,16 @@ static int __devinit finish_node_interrupts(struct device_node *np, int i, j, n, sense; unsigned int *irq, virq; struct device_node *ic; + int trace = 0; + + //#define TRACE(fmt...) do { if (trace) { printk(fmt); mdelay(1000); } } while(0) +#define TRACE(fmt...) + + if (!strcmp(np->name, "smu-doorbell")) + trace = 1; + + TRACE("Finishing SMU doorbell ! num_interrupt_controllers = %d\n", + num_interrupt_controllers); if (num_interrupt_controllers == 0) { /* @@ -332,11 +342,12 @@ static int __devinit finish_node_interrupts(struct device_node *np, } ints = (unsigned int *) get_property(np, "interrupts", &intlen); + TRACE("ints=%p, intlen=%d\n", ints, intlen); if (ints == NULL) return 0; intrcells = prom_n_intr_cells(np); intlen /= intrcells * sizeof(unsigned int); - + TRACE("intrcells=%d, new intlen=%d\n", intrcells, intlen); np->intrs = prom_alloc(intlen * sizeof(*(np->intrs)), mem_start); if (!np->intrs) return -ENOMEM; @@ -347,6 +358,7 @@ static int __devinit finish_node_interrupts(struct device_node *np, intrcount = 0; for (i = 0; i < intlen; ++i, ints += intrcells) { n = map_interrupt(&irq, &ic, np, ints, intrcells); + TRACE("map, irq=%d, ic=%p, n=%d\n", irq, ic, n); if (n <= 0) continue; @@ -357,6 +369,7 @@ static int __devinit finish_node_interrupts(struct device_node *np, np->intrs[intrcount].sense = map_isa_senses[sense]; } else { virq = virt_irq_create_mapping(irq[0]); + TRACE("virq=%d\n", virq); #ifdef CONFIG_PPC64 if (virq == NO_IRQ) { printk(KERN_CRIT "Could not allocate interrupt" @@ -366,6 +379,12 @@ static int __devinit finish_node_interrupts(struct device_node *np, #endif np->intrs[intrcount].line = irq_offset_up(virq); sense = (n > 1)? (irq[1] & 3): 1; + + /* Apple uses bits in there in a different way, let's + * only keep the real sense bit on macs + */ + if (_machine == PLATFORM_POWERMAC) + sense &= 0x1; np->intrs[intrcount].sense = map_mpic_senses[sense]; } @@ -375,12 +394,13 @@ static int __devinit finish_node_interrupts(struct device_node *np, char *name = get_property(ic->parent, "name", NULL); if (name && !strcmp(name, "u3")) np->intrs[intrcount].line += 128; - else if (!(name && !strcmp(name, "mac-io"))) + else if (!(name && (!strcmp(name, "mac-io") || + !strcmp(name, "u4")))) /* ignore other cascaded controllers, such as the k2-sata-root */ break; } -#endif +#endif /* CONFIG_PPC64 */ if (n > 2) { printk("hmmm, got %d intr cells for %s:", n, np->full_name); diff --git a/arch/powerpc/kernel/udbg.c b/arch/powerpc/kernel/udbg.c index a058285a70e7..9567d9474c80 100644 --- a/arch/powerpc/kernel/udbg.c +++ b/arch/powerpc/kernel/udbg.c @@ -110,10 +110,12 @@ static int early_console_initialized; void __init disable_early_printk(void) { +#if 1 if (!early_console_initialized) return; unregister_console(&udbg_console); early_console_initialized = 0; +#endif } /* called by setup_system */ diff --git a/arch/powerpc/platforms/maple/setup.c b/arch/powerpc/platforms/maple/setup.c index 65fe4c166a68..dd73e38bfb7d 100644 --- a/arch/powerpc/platforms/maple/setup.c +++ b/arch/powerpc/platforms/maple/setup.c @@ -195,7 +195,7 @@ static void __init maple_init_early(void) /* Setup interrupt mapping options */ ppc64_interrupt_controller = IC_OPEN_PIC; - iommu_init_early_u3(); + iommu_init_early_dart(); DBG(" <- maple_init_early\n"); } @@ -257,7 +257,7 @@ static int __init maple_probe(int platform) * occupies having to be broken up so the DART itself is not * part of the cacheable linar mapping */ - alloc_u3_dart_table(); + alloc_dart_table(); return 1; } diff --git a/arch/powerpc/platforms/powermac/feature.c b/arch/powerpc/platforms/powermac/feature.c index b1f896952b1b..d2915d64d45e 100644 --- a/arch/powerpc/platforms/powermac/feature.c +++ b/arch/powerpc/platforms/powermac/feature.c @@ -101,7 +101,8 @@ static const char *macio_names[] = "Keylargo", "Pangea", "Intrepid", - "K2" + "K2", + "Shasta", }; @@ -119,7 +120,7 @@ static const char *macio_names[] = static struct device_node *uninorth_node; static u32 __iomem *uninorth_base; static u32 uninorth_rev; -static int uninorth_u3; +static int uninorth_maj; static void __iomem *u3_ht; /* @@ -1399,8 +1400,15 @@ static long g5_fw_enable(struct device_node *node, long param, long value) static long g5_mpic_enable(struct device_node *node, long param, long value) { unsigned long flags; + struct device_node *parent = of_get_parent(node); + int is_u3; - if (node->parent == NULL || strcmp(node->parent->name, "u3")) + if (parent == NULL) + return 0; + is_u3 = strcmp(parent->name, "u3") == 0 || + strcmp(parent->name, "u4") == 0; + of_node_put(parent); + if (!is_u3) return 0; LOCK(flags); @@ -1464,7 +1472,7 @@ static long g5_i2s_enable(struct device_node *node, long param, long value) }, }; - if (macio->type != macio_keylargo2 /* && macio->type != macio_shasta*/) + if (macio->type != macio_keylargo2 && macio->type != macio_shasta) return -ENODEV; if (strncmp(node->name, "i2s-", 4)) return -ENODEV; @@ -1473,11 +1481,9 @@ static long g5_i2s_enable(struct device_node *node, long param, long value) case 0: case 1: break; -#if 0 case 2: if (macio->type == macio_shasta) break; -#endif default: return -ENODEV; } @@ -1508,7 +1514,7 @@ static long g5_reset_cpu(struct device_node *node, long param, long value) struct device_node *np; macio = &macio_chips[0]; - if (macio->type != macio_keylargo2) + if (macio->type != macio_keylargo2 && macio->type != macio_shasta) return -ENODEV; np = find_path_device("/cpus"); @@ -1547,7 +1553,8 @@ static long g5_reset_cpu(struct device_node *node, long param, long value) */ void g5_phy_disable_cpu1(void) { - UN_OUT(U3_API_PHY_CONFIG_1, 0); + if (uninorth_maj == 3) + UN_OUT(U3_API_PHY_CONFIG_1, 0); } #endif /* CONFIG_POWER4 */ @@ -2462,6 +2469,14 @@ static struct pmac_mb_def pmac_mb_defs[] = { PMAC_TYPE_POWERMAC_G5_U3L, g5_features, 0, }, + { "PowerMac11,2", "PowerMac G5 Dual Core", + PMAC_TYPE_POWERMAC_G5_U3L, g5_features, + 0, + }, + { "PowerMac12,1", "iMac G5 (iSight)", + PMAC_TYPE_POWERMAC_G5_U3L, g5_features, + 0, + }, { "RackMac3,1", "XServe G5", PMAC_TYPE_XSERVE_G5, g5_features, 0, @@ -2574,6 +2589,11 @@ static int __init probe_motherboard(void) pmac_mb.model_name = "Unknown K2-based"; pmac_mb.features = g5_features; break; + case macio_shasta: + pmac_mb.model_id = PMAC_TYPE_UNKNOWN_SHASTA; + pmac_mb.model_name = "Unknown Shasta-based"; + pmac_mb.features = g5_features; + break; #endif /* CONFIG_POWER4 */ default: return -ENODEV; @@ -2651,7 +2671,12 @@ static void __init probe_uninorth(void) /* Locate G5 u3 */ if (uninorth_node == NULL) { uninorth_node = of_find_node_by_name(NULL, "u3"); - uninorth_u3 = 1; + uninorth_maj = 3; + } + /* Locate G5 u4 */ + if (uninorth_node == NULL) { + uninorth_node = of_find_node_by_name(NULL, "u4"); + uninorth_maj = 4; } if (uninorth_node == NULL) return; @@ -2664,12 +2689,13 @@ static void __init probe_uninorth(void) return; uninorth_base = ioremap(address, 0x40000); uninorth_rev = in_be32(UN_REG(UNI_N_VERSION)); - if (uninorth_u3) + if (uninorth_maj == 3 || uninorth_maj == 4) u3_ht = ioremap(address + U3_HT_CONFIG_BASE, 0x1000); - printk(KERN_INFO "Found %s memory controller & host bridge," - " revision: %d\n", uninorth_u3 ? "U3" : "UniNorth", - uninorth_rev); + printk(KERN_INFO "Found %s memory controller & host bridge" + " @ 0x%08x revision: 0x%02x\n", uninorth_maj == 3 ? "U3" : + uninorth_maj == 4 ? "U4" : "UniNorth", + (unsigned int)address, uninorth_rev); printk(KERN_INFO "Mapped at 0x%08lx\n", (unsigned long)uninorth_base); /* Set the arbitrer QAck delay according to what Apple does @@ -2677,7 +2703,8 @@ static void __init probe_uninorth(void) if (uninorth_rev < 0x11) { actrl = UN_IN(UNI_N_ARB_CTRL) & ~UNI_N_ARB_CTRL_QACK_DELAY_MASK; actrl |= ((uninorth_rev < 3) ? UNI_N_ARB_CTRL_QACK_DELAY105 : - UNI_N_ARB_CTRL_QACK_DELAY) << UNI_N_ARB_CTRL_QACK_DELAY_SHIFT; + UNI_N_ARB_CTRL_QACK_DELAY) << + UNI_N_ARB_CTRL_QACK_DELAY_SHIFT; UN_OUT(UNI_N_ARB_CTRL, actrl); } @@ -2685,7 +2712,8 @@ static void __init probe_uninorth(void) * revs 1.5 to 2.O and Pangea. Seem to toggle the UniN Maxbus/PCI * memory timeout */ - if ((uninorth_rev >= 0x11 && uninorth_rev <= 0x24) || uninorth_rev == 0xc0) + if ((uninorth_rev >= 0x11 && uninorth_rev <= 0x24) || + uninorth_rev == 0xc0) UN_OUT(0x2160, UN_IN(0x2160) & 0x00ffffff); } @@ -2736,12 +2764,14 @@ static void __init probe_one_macio(const char *name, const char *compat, int typ node->full_name); return; } - if (type == macio_keylargo) { + if (type == macio_keylargo || type == macio_keylargo2) { u32 *did = (u32 *)get_property(node, "device-id", NULL); if (*did == 0x00000025) type = macio_pangea; if (*did == 0x0000003e) type = macio_intrepid; + if (*did == 0x0000004f) + type = macio_shasta; } macio_chips[i].of_node = node; macio_chips[i].type = type; @@ -2840,7 +2870,8 @@ set_initial_features(void) } #ifdef CONFIG_POWER4 - if (macio_chips[0].type == macio_keylargo2) { + if (macio_chips[0].type == macio_keylargo2 || + macio_chips[0].type == macio_shasta) { #ifndef CONFIG_SMP /* On SMP machines running UP, we have the second CPU eating * bus cycles. We need to take it off the bus. This is done diff --git a/arch/powerpc/platforms/powermac/pci.c b/arch/powerpc/platforms/powermac/pci.c index 5aab261075de..f671ed253901 100644 --- a/arch/powerpc/platforms/powermac/pci.c +++ b/arch/powerpc/platforms/powermac/pci.c @@ -1,7 +1,7 @@ /* * Support for PCI bridges found on Power Macintoshes. * - * Copyright (C) 2003 Benjamin Herrenschmuidt (benh@kernel.crashing.org) + * Copyright (C) 2003-2005 Benjamin Herrenschmuidt (benh@kernel.crashing.org) * Copyright (C) 1997 Paul Mackerras (paulus@samba.org) * * This program is free software; you can redistribute it and/or @@ -25,7 +25,7 @@ #include #include #ifdef CONFIG_PPC64 -#include +//#include #include #endif @@ -44,6 +44,7 @@ static int add_bridge(struct device_node *dev); static int has_uninorth; #ifdef CONFIG_PPC64 static struct pci_controller *u3_agp; +static struct pci_controller *u4_pcie; static struct pci_controller *u3_ht; #endif /* CONFIG_PPC64 */ @@ -97,11 +98,8 @@ static void __init fixup_bus_range(struct device_node *bridge) /* Lookup the "bus-range" property for the hose */ bus_range = (int *) get_property(bridge, "bus-range", &len); - if (bus_range == NULL || len < 2 * sizeof(int)) { - printk(KERN_WARNING "Can't get bus-range for %s\n", - bridge->full_name); + if (bus_range == NULL || len < 2 * sizeof(int)) return; - } bus_range[1] = fixup_one_level_bus_range(bridge->child, bus_range[1]); } @@ -128,14 +126,14 @@ static void __init fixup_bus_range(struct device_node *bridge) */ #define MACRISC_CFA0(devfn, off) \ - ((1 << (unsigned long)PCI_SLOT(dev_fn)) \ - | (((unsigned long)PCI_FUNC(dev_fn)) << 8) \ - | (((unsigned long)(off)) & 0xFCUL)) + ((1 << (unsigned int)PCI_SLOT(dev_fn)) \ + | (((unsigned int)PCI_FUNC(dev_fn)) << 8) \ + | (((unsigned int)(off)) & 0xFCUL)) #define MACRISC_CFA1(bus, devfn, off) \ - ((((unsigned long)(bus)) << 16) \ - |(((unsigned long)(devfn)) << 8) \ - |(((unsigned long)(off)) & 0xFCUL) \ + ((((unsigned int)(bus)) << 16) \ + |(((unsigned int)(devfn)) << 8) \ + |(((unsigned int)(off)) & 0xFCUL) \ |1UL) static unsigned long macrisc_cfg_access(struct pci_controller* hose, @@ -168,7 +166,8 @@ static int macrisc_read_config(struct pci_bus *bus, unsigned int devfn, hose = pci_bus_to_host(bus); if (hose == NULL) return PCIBIOS_DEVICE_NOT_FOUND; - + if (offset >= 0x100) + return PCIBIOS_BAD_REGISTER_NUMBER; addr = macrisc_cfg_access(hose, bus->number, devfn, offset); if (!addr) return PCIBIOS_DEVICE_NOT_FOUND; @@ -199,7 +198,8 @@ static int macrisc_write_config(struct pci_bus *bus, unsigned int devfn, hose = pci_bus_to_host(bus); if (hose == NULL) return PCIBIOS_DEVICE_NOT_FOUND; - + if (offset >= 0x100) + return PCIBIOS_BAD_REGISTER_NUMBER; addr = macrisc_cfg_access(hose, bus->number, devfn, offset); if (!addr) return PCIBIOS_DEVICE_NOT_FOUND; @@ -234,12 +234,13 @@ static struct pci_ops macrisc_pci_ops = /* * Verify that a specific (bus, dev_fn) exists on chaos */ -static int -chaos_validate_dev(struct pci_bus *bus, int devfn, int offset) +static int chaos_validate_dev(struct pci_bus *bus, int devfn, int offset) { struct device_node *np; u32 *vendor, *device; + if (offset >= 0x100) + return PCIBIOS_BAD_REGISTER_NUMBER; np = pci_busdev_to_OF_node(bus, devfn); if (np == NULL) return PCIBIOS_DEVICE_NOT_FOUND; @@ -341,10 +342,10 @@ static int u3_ht_skip_device(struct pci_controller *hose, } #define U3_HT_CFA0(devfn, off) \ - ((((unsigned long)devfn) << 8) | offset) + ((((unsigned int)devfn) << 8) | offset) #define U3_HT_CFA1(bus, devfn, off) \ (U3_HT_CFA0(devfn, off) \ - + (((unsigned long)bus) << 16) \ + + (((unsigned int)bus) << 16) \ + 0x01000000UL) static unsigned long u3_ht_cfg_access(struct pci_controller* hose, @@ -370,7 +371,8 @@ static int u3_ht_read_config(struct pci_bus *bus, unsigned int devfn, hose = pci_bus_to_host(bus); if (hose == NULL) return PCIBIOS_DEVICE_NOT_FOUND; - + if (offset >= 0x100) + return PCIBIOS_BAD_REGISTER_NUMBER; addr = u3_ht_cfg_access(hose, bus->number, devfn, offset); if (!addr) return PCIBIOS_DEVICE_NOT_FOUND; @@ -419,7 +421,8 @@ static int u3_ht_write_config(struct pci_bus *bus, unsigned int devfn, hose = pci_bus_to_host(bus); if (hose == NULL) return PCIBIOS_DEVICE_NOT_FOUND; - + if (offset >= 0x100) + return PCIBIOS_BAD_REGISTER_NUMBER; addr = u3_ht_cfg_access(hose, bus->number, devfn, offset); if (!addr) return PCIBIOS_DEVICE_NOT_FOUND; @@ -459,6 +462,112 @@ static struct pci_ops u3_ht_pci_ops = u3_ht_read_config, u3_ht_write_config }; + +#define U4_PCIE_CFA0(devfn, off) \ + ((1 << ((unsigned int)PCI_SLOT(dev_fn))) \ + | (((unsigned int)PCI_FUNC(dev_fn)) << 8) \ + | ((((unsigned int)(off)) >> 8) << 28) \ + | (((unsigned int)(off)) & 0xfcU)) + +#define U4_PCIE_CFA1(bus, devfn, off) \ + ((((unsigned int)(bus)) << 16) \ + |(((unsigned int)(devfn)) << 8) \ + | ((((unsigned int)(off)) >> 8) << 28) \ + |(((unsigned int)(off)) & 0xfcU) \ + |1UL) + +static unsigned long u4_pcie_cfg_access(struct pci_controller* hose, + u8 bus, u8 dev_fn, int offset) +{ + unsigned int caddr; + + if (bus == hose->first_busno) { + caddr = U4_PCIE_CFA0(dev_fn, offset); + } else + caddr = U4_PCIE_CFA1(bus, dev_fn, offset); + + /* Uninorth will return garbage if we don't read back the value ! */ + do { + out_le32(hose->cfg_addr, caddr); + } while (in_le32(hose->cfg_addr) != caddr); + + offset &= 0x03; + return ((unsigned long)hose->cfg_data) + offset; +} + +static int u4_pcie_read_config(struct pci_bus *bus, unsigned int devfn, + int offset, int len, u32 *val) +{ + struct pci_controller *hose; + unsigned long addr; + + hose = pci_bus_to_host(bus); + if (hose == NULL) + return PCIBIOS_DEVICE_NOT_FOUND; + if (offset >= 0x1000) + return PCIBIOS_BAD_REGISTER_NUMBER; + addr = u4_pcie_cfg_access(hose, bus->number, devfn, offset); + if (!addr) + return PCIBIOS_DEVICE_NOT_FOUND; + /* + * Note: the caller has already checked that offset is + * suitably aligned and that len is 1, 2 or 4. + */ + switch (len) { + case 1: + *val = in_8((u8 *)addr); + break; + case 2: + *val = in_le16((u16 *)addr); + break; + default: + *val = in_le32((u32 *)addr); + break; + } + return PCIBIOS_SUCCESSFUL; +} + +static int u4_pcie_write_config(struct pci_bus *bus, unsigned int devfn, + int offset, int len, u32 val) +{ + struct pci_controller *hose; + unsigned long addr; + + hose = pci_bus_to_host(bus); + if (hose == NULL) + return PCIBIOS_DEVICE_NOT_FOUND; + if (offset >= 0x1000) + return PCIBIOS_BAD_REGISTER_NUMBER; + addr = u4_pcie_cfg_access(hose, bus->number, devfn, offset); + if (!addr) + return PCIBIOS_DEVICE_NOT_FOUND; + /* + * Note: the caller has already checked that offset is + * suitably aligned and that len is 1, 2 or 4. + */ + switch (len) { + case 1: + out_8((u8 *)addr, val); + (void) in_8((u8 *)addr); + break; + case 2: + out_le16((u16 *)addr, val); + (void) in_le16((u16 *)addr); + break; + default: + out_le32((u32 *)addr, val); + (void) in_le32((u32 *)addr); + break; + } + return PCIBIOS_SUCCESSFUL; +} + +static struct pci_ops u4_pcie_pci_ops = +{ + u4_pcie_read_config, + u4_pcie_write_config +}; + #endif /* CONFIG_PPC64 */ #ifdef CONFIG_PPC32 @@ -628,15 +737,36 @@ static void __init setup_u3_agp(struct pci_controller* hose) hose->ops = ¯isc_pci_ops; hose->cfg_addr = ioremap(0xf0000000 + 0x800000, 0x1000); hose->cfg_data = ioremap(0xf0000000 + 0xc00000, 0x1000); - u3_agp = hose; } +static void __init setup_u4_pcie(struct pci_controller* hose) +{ + /* We currently only implement the "non-atomic" config space, to + * be optimised later. + */ + hose->ops = &u4_pcie_pci_ops; + hose->cfg_addr = ioremap(0xf0000000 + 0x800000, 0x1000); + hose->cfg_data = ioremap(0xf0000000 + 0xc00000, 0x1000); + + /* The bus contains a bridge from root -> device, we need to + * make it visible on bus 0 so that we pick the right type + * of config cycles. If we didn't, we would have to force all + * config cycles to be type 1. So we override the "bus-range" + * property here + */ + hose->first_busno = 0x00; + hose->last_busno = 0xff; + u4_pcie = hose; +} + static void __init setup_u3_ht(struct pci_controller* hose) { struct device_node *np = (struct device_node *)hose->arch_data; + struct pci_controller *other = NULL; int i, cur; + hose->ops = &u3_ht_pci_ops; /* We hard code the address because of the different size of @@ -670,11 +800,20 @@ static void __init setup_u3_ht(struct pci_controller* hose) u3_ht = hose; - if (u3_agp == NULL) { - DBG("U3 has no AGP, using full resource range\n"); + if (u3_agp != NULL) + other = u3_agp; + else if (u4_pcie != NULL) + other = u4_pcie; + + if (other == NULL) { + DBG("U3/4 has no AGP/PCIE, using full resource range\n"); return; } + /* Fixup bus range vs. PCIE */ + if (u4_pcie) + hose->last_busno = u4_pcie->first_busno - 1; + /* We "remove" the AGP resources from the resources allocated to HT, * that is we create "holes". However, that code does assumptions * that so far happen to be true (cross fingers...), typically that @@ -682,7 +821,7 @@ static void __init setup_u3_ht(struct pci_controller* hose) */ cur = 0; for (i=0; i<3; i++) { - struct resource *res = &u3_agp->mem_resources[i]; + struct resource *res = &other->mem_resources[i]; if (res->flags != IORESOURCE_MEM) continue; /* We don't care about "fine" resources */ @@ -777,9 +916,13 @@ static int __init add_bridge(struct device_node *dev) setup_u3_ht(hose); disp_name = "U3-HT"; primary = 1; + } else if (device_is_compatible(dev, "u4-pcie")) { + setup_u4_pcie(hose); + disp_name = "U4-PCIE"; + primary = 0; } - printk(KERN_INFO "Found %s PCI host bridge. Firmware bus number: %d->%d\n", - disp_name, hose->first_busno, hose->last_busno); + printk(KERN_INFO "Found %s PCI host bridge. Firmware bus number:" + " %d->%d\n", disp_name, hose->first_busno, hose->last_busno); #endif /* CONFIG_PPC64 */ /* 32 bits only bridges */ @@ -900,6 +1043,8 @@ void __init pmac_pci_init(void) pci_setup_phb_io(u3_ht, 1); if (u3_agp) pci_setup_phb_io(u3_agp, 0); + if (u4_pcie) + pci_setup_phb_io(u4_pcie, 0); /* * On ppc64, fixup the IO resources on our host bridges as @@ -912,7 +1057,8 @@ void __init pmac_pci_init(void) /* Fixup the PCI<->OF mapping for U3 AGP due to bus renumbering. We * assume there is no P2P bridge on the AGP bus, which should be a - * safe assumptions hopefully. + * safe assumptions for now. We should do something better in the + * future though */ if (u3_agp) { struct device_node *np = u3_agp->arch_data; @@ -920,7 +1066,6 @@ void __init pmac_pci_init(void) for (np = np->child; np; np = np->sibling) PCI_DN(np)->busno = 0xf0; } - /* pmac_check_ht_link(); */ /* Tell pci.c to not use the common resource allocation mechanism */ @@ -1127,7 +1272,8 @@ void pmac_pci_fixup_pciata(struct pci_dev* dev) good: pci_read_config_byte(dev, PCI_CLASS_PROG, &progif); if ((progif & 5) != 5) { - printk(KERN_INFO "Forcing PCI IDE into native mode: %s\n", pci_name(dev)); + printk(KERN_INFO "Forcing PCI IDE into native mode: %s\n", + pci_name(dev)); (void) pci_write_config_byte(dev, PCI_CLASS_PROG, progif|5); if (pci_read_config_byte(dev, PCI_CLASS_PROG, &progif) || (progif & 5) != 5) @@ -1153,7 +1299,8 @@ static void fixup_k2_sata(struct pci_dev* dev) for (i = 0; i < 6; i++) { dev->resource[i].start = dev->resource[i].end = 0; dev->resource[i].flags = 0; - pci_write_config_dword(dev, PCI_BASE_ADDRESS_0 + 4 * i, 0); + pci_write_config_dword(dev, PCI_BASE_ADDRESS_0 + 4 * i, + 0); } } else { pci_read_config_word(dev, PCI_COMMAND, &cmd); @@ -1162,7 +1309,8 @@ static void fixup_k2_sata(struct pci_dev* dev) for (i = 0; i < 5; i++) { dev->resource[i].start = dev->resource[i].end = 0; dev->resource[i].flags = 0; - pci_write_config_dword(dev, PCI_BASE_ADDRESS_0 + 4 * i, 0); + pci_write_config_dword(dev, PCI_BASE_ADDRESS_0 + 4 * i, + 0); } } } diff --git a/arch/powerpc/platforms/powermac/pic.c b/arch/powerpc/platforms/powermac/pic.c index dbb524a851aa..18bf3011d1e3 100644 --- a/arch/powerpc/platforms/powermac/pic.c +++ b/arch/powerpc/platforms/powermac/pic.c @@ -524,18 +524,56 @@ static void __init pmac_pic_setup_mpic_nmi(struct mpic *mpic) #endif /* defined(CONFIG_XMON) && defined(CONFIG_PPC32) */ } +static struct mpic * __init pmac_setup_one_mpic(struct device_node *np, + int master) +{ + unsigned char senses[128]; + int offset = master ? 0 : 128; + int count = master ? 128 : 124; + const char *name = master ? " MPIC 1 " : " MPIC 2 "; + struct resource r; + struct mpic *mpic; + unsigned int flags = master ? MPIC_PRIMARY : 0; + int rc; + + rc = of_address_to_resource(np, 0, &r); + if (rc) + return NULL; + + pmac_call_feature(PMAC_FTR_ENABLE_MPIC, np, 0, 0); + + prom_get_irq_senses(senses, offset, offset + count); + + flags |= MPIC_WANTS_RESET; + if (get_property(np, "big-endian", NULL)) + flags |= MPIC_BIG_ENDIAN; + + /* Primary Big Endian means HT interrupts. This is quite dodgy + * but works until I find a better way + */ + if (master && (flags & MPIC_BIG_ENDIAN)) + flags |= MPIC_BROKEN_U3; + + mpic = mpic_alloc(r.start, flags, 0, offset, count, master ? 252 : 0, + senses, count, name); + if (mpic == NULL) + return NULL; + + mpic_init(mpic); + + return mpic; + } + static int __init pmac_pic_probe_mpic(void) { struct mpic *mpic1, *mpic2; struct device_node *np, *master = NULL, *slave = NULL; - unsigned char senses[128]; - struct resource r; /* We can have up to 2 MPICs cascaded */ for (np = NULL; (np = of_find_node_by_type(np, "open-pic")) != NULL;) { if (master == NULL && - get_property(np, "interrupt-parent", NULL) != NULL) + get_property(np, "interrupts", NULL) == NULL) master = of_node_get(np); else if (slave == NULL) slave = of_node_get(np); @@ -557,13 +595,8 @@ static int __init pmac_pic_probe_mpic(void) ppc_md.get_irq = mpic_get_irq; /* Setup master */ - BUG_ON(of_address_to_resource(master, 0, &r)); - pmac_call_feature(PMAC_FTR_ENABLE_MPIC, master, 0, 0); - prom_get_irq_senses(senses, 0, 128); - mpic1 = mpic_alloc(r.start, MPIC_PRIMARY | MPIC_WANTS_RESET, - 0, 0, 128, 252, senses, 128, " OpenPIC "); + mpic1 = pmac_setup_one_mpic(master, 1); BUG_ON(mpic1 == NULL); - mpic_init(mpic1); /* Install NMI if any */ pmac_pic_setup_mpic_nmi(mpic1); @@ -574,27 +607,12 @@ static int __init pmac_pic_probe_mpic(void) if (slave == NULL || slave->n_intrs < 1) return 0; - /* Setup slave, failures are non-fatal */ - if (of_address_to_resource(slave, 0, &r)) { - printk(KERN_ERR "Can't get address of MPIC %s\n", - slave->full_name); - return 0; - } - pmac_call_feature(PMAC_FTR_ENABLE_MPIC, slave, 0, 0); - prom_get_irq_senses(senses, 128, 128 + 124); - - /* We don't need to set MPIC_BROKEN_U3 here since we don't have - * hypertransport interrupts routed to it, at least not on currently - * supported machines, that may change. - */ - mpic2 = mpic_alloc(r.start, MPIC_BIG_ENDIAN | MPIC_WANTS_RESET, - 0, 128, 124, 0, senses, 124, " U3-MPIC "); + mpic2 = pmac_setup_one_mpic(slave, 0); if (mpic2 == NULL) { - printk(KERN_ERR "Can't create slave MPIC %s\n", - slave->full_name); + printk(KERN_ERR "Failed to setup slave MPIC\n"); + of_node_put(slave); return 0; } - mpic_init(mpic2); mpic_setup_cascade(slave->intrs[0].line, pmac_u3_cascade, mpic2); of_node_put(slave); diff --git a/arch/powerpc/platforms/powermac/setup.c b/arch/powerpc/platforms/powermac/setup.c index 18c5620f87fa..1daa5a06e9ea 100644 --- a/arch/powerpc/platforms/powermac/setup.c +++ b/arch/powerpc/platforms/powermac/setup.c @@ -345,7 +345,7 @@ void __init pmac_setup_arch(void) #ifdef CONFIG_SMP /* Check for Core99 */ - if (find_devices("uni-n") || find_devices("u3")) + if (find_devices("uni-n") || find_devices("u3") || find_devices("u4")) smp_ops = &core99_smp_ops; #ifdef CONFIG_PPC32 else @@ -635,7 +635,7 @@ static void __init pmac_init_early(void) /* Setup interrupt mapping options */ ppc64_interrupt_controller = IC_OPEN_PIC; - iommu_init_early_u3(); + iommu_init_early_dart(); #endif } @@ -711,7 +711,7 @@ static int __init pmac_probe(int platform) * occupies having to be broken up so the DART itself is not * part of the cacheable linar mapping */ - alloc_u3_dart_table(); + alloc_dart_table(); #endif #ifdef CONFIG_PMAC_SMU @@ -733,10 +733,11 @@ static int pmac_pci_probe_mode(struct pci_bus *bus) struct device_node *node = bus->sysdata; /* We need to use normal PCI probing for the AGP bus, - since the device for the AGP bridge isn't in the tree. */ - if (bus->self == NULL && device_is_compatible(node, "u3-agp")) + * since the device for the AGP bridge isn't in the tree. + */ + if (bus->self == NULL && (device_is_compatible(node, "u3-agp") || + device_is_compatible(node, "u4-pcie"))) return PCI_PROBE_NORMAL; - return PCI_PROBE_DEVTREE; } #endif diff --git a/arch/powerpc/platforms/powermac/smp.c b/arch/powerpc/platforms/powermac/smp.c index 862f1e985c19..df01bb8feb16 100644 --- a/arch/powerpc/platforms/powermac/smp.c +++ b/arch/powerpc/platforms/powermac/smp.c @@ -361,7 +361,6 @@ static void __init psurge_dual_sync_tb(int cpu_nr) set_dec(tb_ticks_per_jiffy); /* XXX fixme */ set_tb(0, 0); - last_jiffy_stamp(cpu_nr) = 0; if (cpu_nr > 0) { mb(); @@ -429,15 +428,62 @@ struct smp_ops_t psurge_smp_ops = { }; #endif /* CONFIG_PPC32 - actually powersurge support */ +/* + * Core 99 and later support + */ + +static void (*pmac_tb_freeze)(int freeze); +static unsigned long timebase; +static int tb_req; + +static void smp_core99_give_timebase(void) +{ + unsigned long flags; + + local_irq_save(flags); + + while(!tb_req) + barrier(); + tb_req = 0; + (*pmac_tb_freeze)(1); + mb(); + timebase = get_tb(); + mb(); + while (timebase) + barrier(); + mb(); + (*pmac_tb_freeze)(0); + mb(); + + local_irq_restore(flags); +} + + +static void __devinit smp_core99_take_timebase(void) +{ + unsigned long flags; + + local_irq_save(flags); + + tb_req = 1; + mb(); + while (!timebase) + barrier(); + mb(); + set_tb(timebase >> 32, timebase & 0xffffffff); + timebase = 0; + mb(); + set_dec(tb_ticks_per_jiffy/2); + + local_irq_restore(flags); +} + #ifdef CONFIG_PPC64 /* * G5s enable/disable the timebase via an i2c-connected clock chip. */ static struct device_node *pmac_tb_clock_chip_host; static u8 pmac_tb_pulsar_addr; -static void (*pmac_tb_freeze)(int freeze); -static DEFINE_SPINLOCK(timebase_lock); -static unsigned long timebase; static void smp_core99_cypress_tb_freeze(int freeze) { @@ -447,7 +493,8 @@ static void smp_core99_cypress_tb_freeze(int freeze) /* Strangely, the device-tree says address is 0xd2, but darwin * accesses 0xd0 ... */ - pmac_low_i2c_setmode(pmac_tb_clock_chip_host, pmac_low_i2c_mode_combined); + pmac_low_i2c_setmode(pmac_tb_clock_chip_host, + pmac_low_i2c_mode_combined); rc = pmac_low_i2c_xfer(pmac_tb_clock_chip_host, 0xd0 | pmac_low_i2c_read, 0x81, &data, 1); @@ -475,7 +522,8 @@ static void smp_core99_pulsar_tb_freeze(int freeze) u8 data; int rc; - pmac_low_i2c_setmode(pmac_tb_clock_chip_host, pmac_low_i2c_mode_combined); + pmac_low_i2c_setmode(pmac_tb_clock_chip_host, + pmac_low_i2c_mode_combined); rc = pmac_low_i2c_xfer(pmac_tb_clock_chip_host, pmac_tb_pulsar_addr | pmac_low_i2c_read, 0x2e, &data, 1); @@ -496,54 +544,14 @@ static void smp_core99_pulsar_tb_freeze(int freeze) } } - -static void smp_core99_give_timebase(void) -{ - /* Open i2c bus for synchronous access */ - if (pmac_low_i2c_open(pmac_tb_clock_chip_host, 0)) - panic("Can't open i2c for TB sync !\n"); - - spin_lock(&timebase_lock); - (*pmac_tb_freeze)(1); - mb(); - timebase = get_tb(); - spin_unlock(&timebase_lock); - - while (timebase) - barrier(); - - spin_lock(&timebase_lock); - (*pmac_tb_freeze)(0); - spin_unlock(&timebase_lock); - - /* Close i2c bus */ - pmac_low_i2c_close(pmac_tb_clock_chip_host); -} - - -static void __devinit smp_core99_take_timebase(void) -{ - while (!timebase) - barrier(); - spin_lock(&timebase_lock); - set_tb(timebase >> 32, timebase & 0xffffffff); - timebase = 0; - spin_unlock(&timebase_lock); -} - -static void __init smp_core99_setup(int ncpus) +static void __init smp_core99_setup_i2c_hwsync(int ncpus) { struct device_node *cc = NULL; struct device_node *p; + const char *name = NULL; u32 *reg; int ok; - /* HW sync only on these platforms */ - if (!machine_is_compatible("PowerMac7,2") && - !machine_is_compatible("PowerMac7,3") && - !machine_is_compatible("RackMac3,1")) - return; - /* Look for the clock chip */ while ((cc = of_find_node_by_name(cc, "i2c-hwclock")) != NULL) { p = of_get_parent(cc); @@ -561,114 +569,64 @@ static void __init smp_core99_setup(int ncpus) if (device_is_compatible(cc, "pulsar-legacy-slewing")) { pmac_tb_freeze = smp_core99_pulsar_tb_freeze; pmac_tb_pulsar_addr = 0xd2; - printk(KERN_INFO "Timebase clock is Pulsar chip\n"); + name = "Pulsar"; } else if (device_is_compatible(cc, "cy28508")) { pmac_tb_freeze = smp_core99_cypress_tb_freeze; - printk(KERN_INFO "Timebase clock is Cypress chip\n"); + name = "Cypress"; } break; case 0xd4: pmac_tb_freeze = smp_core99_pulsar_tb_freeze; pmac_tb_pulsar_addr = 0xd4; - printk(KERN_INFO "Timebase clock is Pulsar chip\n"); + name = "Pulsar"; break; } - if (pmac_tb_freeze != NULL) { - pmac_tb_clock_chip_host = of_get_parent(cc); - of_node_put(cc); + if (pmac_tb_freeze != NULL) break; - } } - if (pmac_tb_freeze == NULL) { - smp_ops->give_timebase = smp_generic_give_timebase; - smp_ops->take_timebase = smp_generic_take_timebase; + if (pmac_tb_freeze != NULL) { + struct device_node *p = of_get_parent(cc); + of_node_put(cc); + while(p && strcmp(p->type, "i2c")) { + cc = of_get_parent(p); + of_node_put(p); + p = cc; + } + if (p == NULL) + goto no_i2c_sync; + /* Open i2c bus for synchronous access */ + if (pmac_low_i2c_open(p, 0)) { + printk(KERN_ERR "Failed top open i2c bus %s for clock" + " sync, fallback to software sync !\n", + p->full_name); + of_node_put(p); + goto no_i2c_sync; + } + pmac_tb_clock_chip_host = p; + printk(KERN_INFO "Processor timebase sync using %s i2c clock\n", + name); + return; } + no_i2c_sync: + pmac_tb_freeze = NULL; } -/* nothing to do here, caches are already set up by service processor */ -static inline void __devinit core99_init_caches(int cpu) -{ -} +#endif /* CONFIG_PPC64 */ -#else /* CONFIG_PPC64 */ /* - * SMP G4 powermacs use a GPIO to enable/disable the timebase. + * SMP G4 and newer G5 use a GPIO to enable/disable the timebase. */ static unsigned int core99_tb_gpio; /* Timebase freeze GPIO */ -static unsigned int pri_tb_hi, pri_tb_lo; -static unsigned int pri_tb_stamp; - -/* not __init, called in sleep/wakeup code */ -void smp_core99_give_timebase(void) +static void smp_core99_gpio_tb_freeze(int freeze) { - unsigned long flags; - unsigned int t; - - /* wait for the secondary to be in take_timebase */ - for (t = 100000; t > 0 && !sec_tb_reset; --t) - udelay(10); - if (!sec_tb_reset) { - printk(KERN_WARNING "Timeout waiting sync on second CPU\n"); - return; - } - - /* freeze the timebase and read it */ - /* disable interrupts so the timebase is disabled for the - shortest possible time */ - local_irq_save(flags); - pmac_call_feature(PMAC_FTR_WRITE_GPIO, NULL, core99_tb_gpio, 4); + if (freeze) + pmac_call_feature(PMAC_FTR_WRITE_GPIO, NULL, core99_tb_gpio, 4); + else + pmac_call_feature(PMAC_FTR_WRITE_GPIO, NULL, core99_tb_gpio, 0); pmac_call_feature(PMAC_FTR_READ_GPIO, NULL, core99_tb_gpio, 0); - mb(); - pri_tb_hi = get_tbu(); - pri_tb_lo = get_tbl(); - pri_tb_stamp = last_jiffy_stamp(smp_processor_id()); - mb(); - - /* tell the secondary we're ready */ - sec_tb_reset = 2; - mb(); - - /* wait for the secondary to have taken it */ - /* note: can't use udelay here, since it needs the timebase running */ - for (t = 10000000; t > 0 && sec_tb_reset; --t) - barrier(); - if (sec_tb_reset) - /* XXX BUG_ON here? */ - printk(KERN_WARNING "Timeout waiting sync(2) on second CPU\n"); - - /* Now, restart the timebase by leaving the GPIO to an open collector */ - pmac_call_feature(PMAC_FTR_WRITE_GPIO, NULL, core99_tb_gpio, 0); - pmac_call_feature(PMAC_FTR_READ_GPIO, NULL, core99_tb_gpio, 0); - local_irq_restore(flags); -} - -/* not __init, called in sleep/wakeup code */ -void smp_core99_take_timebase(void) -{ - unsigned long flags; - - /* tell the primary we're here */ - sec_tb_reset = 1; - mb(); - - /* wait for the primary to set pri_tb_hi/lo */ - while (sec_tb_reset < 2) - mb(); - - /* set our stuff the same as the primary */ - local_irq_save(flags); - set_dec(1); - set_tb(pri_tb_hi, pri_tb_lo); - last_jiffy_stamp(smp_processor_id()) = pri_tb_stamp; - mb(); - - /* tell the primary we're done */ - sec_tb_reset = 0; - mb(); - local_irq_restore(flags); } /* L2 and L3 cache settings to pass from CPU0 to CPU1 on G4 cpus */ @@ -677,6 +635,7 @@ volatile static long int core99_l3_cache; static void __devinit core99_init_caches(int cpu) { +#ifndef CONFIG_PPC64 if (!cpu_has_feature(CPU_FTR_L2CR)) return; @@ -702,30 +661,80 @@ static void __devinit core99_init_caches(int cpu) _set_L3CR(core99_l3_cache); printk("CPU%d: L3CR set to %lx\n", cpu, core99_l3_cache); } +#endif /* !CONFIG_PPC64 */ } static void __init smp_core99_setup(int ncpus) { - struct device_node *cpu; - u32 *tbprop = NULL; - int i; +#ifdef CONFIG_PPC64 - core99_tb_gpio = KL_GPIO_TB_ENABLE; /* default value */ - cpu = of_find_node_by_type(NULL, "cpu"); - if (cpu != NULL) { - tbprop = (u32 *)get_property(cpu, "timebase-enable", NULL); - if (tbprop) - core99_tb_gpio = *tbprop; - of_node_put(cpu); + /* i2c based HW sync on some G5s */ + if (machine_is_compatible("PowerMac7,2") || + machine_is_compatible("PowerMac7,3") || + machine_is_compatible("RackMac3,1")) + smp_core99_setup_i2c_hwsync(ncpus); + + /* GPIO based HW sync on recent G5s */ + if (pmac_tb_freeze == NULL) { + struct device_node *np = + of_find_node_by_name(NULL, "timebase-enable"); + u32 *reg = (u32 *)get_property(np, "reg", NULL); + + if (np && reg && !strcmp(np->type, "gpio")) { + core99_tb_gpio = *reg; + if (core99_tb_gpio < 0x50) + core99_tb_gpio += 0x50; + pmac_tb_freeze = smp_core99_gpio_tb_freeze; + printk(KERN_INFO "Processor timebase sync using" + " GPIO 0x%02x\n", core99_tb_gpio); + } } - /* XXX should get this from reg properties */ - for (i = 1; i < ncpus; ++i) - smp_hw_index[i] = i; - powersave_nap = 0; -} +#else /* CONFIG_PPC64 */ + + /* GPIO based HW sync on ppc32 Core99 */ + if (pmac_tb_freeze == NULL && !machine_is_compatible("MacRISC4")) { + struct device_node *cpu; + u32 *tbprop = NULL; + + core99_tb_gpio = KL_GPIO_TB_ENABLE; /* default value */ + cpu = of_find_node_by_type(NULL, "cpu"); + if (cpu != NULL) { + tbprop = (u32 *)get_property(cpu, "timebase-enable", + NULL); + if (tbprop) + core99_tb_gpio = *tbprop; + of_node_put(cpu); + } + pmac_tb_freeze = smp_core99_gpio_tb_freeze; + printk(KERN_INFO "Processor timebase sync using" + " GPIO 0x%02x\n", core99_tb_gpio); + } + +#endif /* CONFIG_PPC64 */ + + /* No timebase sync, fallback to software */ + if (pmac_tb_freeze == NULL) { + smp_ops->give_timebase = smp_generic_give_timebase; + smp_ops->take_timebase = smp_generic_take_timebase; + printk(KERN_INFO "Processor timebase sync using software\n"); + } + +#ifndef CONFIG_PPC64 + { + int i; + + /* XXX should get this from reg properties */ + for (i = 1; i < ncpus; ++i) + smp_hw_index[i] = i; + } #endif + /* 32 bits SMP can't NAP */ + if (!machine_is_compatible("MacRISC4")) + powersave_nap = 0; +} + static int __init smp_core99_probe(void) { struct device_node *cpus; @@ -803,17 +812,25 @@ static void __devinit smp_core99_setup_cpu(int cpu_nr) mpic_setup_this_cpu(); if (cpu_nr == 0) { -#ifdef CONFIG_POWER4 +#ifdef CONFIG_PPC64 extern void g5_phy_disable_cpu1(void); + /* Close i2c bus if it was used for tb sync */ + if (pmac_tb_clock_chip_host) { + pmac_low_i2c_close(pmac_tb_clock_chip_host); + pmac_tb_clock_chip_host = NULL; + } + /* If we didn't start the second CPU, we must take * it off the bus */ if (machine_is_compatible("MacRISC4") && num_online_cpus() < 2) g5_phy_disable_cpu1(); -#endif /* CONFIG_POWER4 */ - if (ppc_md.progress) ppc_md.progress("core99_setup_cpu 0 done", 0x349); +#endif /* CONFIG_PPC64 */ + + if (ppc_md.progress) + ppc_md.progress("core99_setup_cpu 0 done", 0x349); } } diff --git a/arch/powerpc/sysdev/Makefile b/arch/powerpc/sysdev/Makefile index b3e3636a57b0..14b9abde2d27 100644 --- a/arch/powerpc/sysdev/Makefile +++ b/arch/powerpc/sysdev/Makefile @@ -4,6 +4,6 @@ obj-$(CONFIG_PPC_I8259) += i8259.o obj-$(CONFIG_PPC_MPC106) += grackle.o obj-$(CONFIG_BOOKE) += dcr.o obj-$(CONFIG_40x) += dcr.o -obj-$(CONFIG_U3_DART) += u3_iommu.o +obj-$(CONFIG_U3_DART) += dart_iommu.o obj-$(CONFIG_MMIO_NVRAM) += mmio_nvram.o obj-$(CONFIG_83xx) += ipic.o diff --git a/arch/powerpc/sysdev/dart.h b/arch/powerpc/sysdev/dart.h index 33ed9ed7fc1e..c2d05763ccbe 100644 --- a/arch/powerpc/sysdev/dart.h +++ b/arch/powerpc/sysdev/dart.h @@ -20,29 +20,44 @@ #define _POWERPC_SYSDEV_DART_H -/* physical base of DART registers */ -#define DART_BASE 0xf8033000UL - /* Offset from base to control register */ -#define DARTCNTL 0 +#define DART_CNTL 0 + /* Offset from base to exception register */ -#define DARTEXCP 0x10 +#define DART_EXCP_U3 0x10 /* Offset from base to TLB tag registers */ -#define DARTTAG 0x1000 +#define DART_TAGS_U3 0x1000 +/* U4 registers */ +#define DART_BASE_U4 0x10 +#define DART_SIZE_U4 0x20 +#define DART_EXCP_U4 0x30 +#define DART_TAGS_U4 0x1000 /* Control Register fields */ -/* base address of table (pfn) */ -#define DARTCNTL_BASE_MASK 0xfffff -#define DARTCNTL_BASE_SHIFT 12 +/* U3 registers */ +#define DART_CNTL_U3_BASE_MASK 0xfffff +#define DART_CNTL_U3_BASE_SHIFT 12 +#define DART_CNTL_U3_FLUSHTLB 0x400 +#define DART_CNTL_U3_ENABLE 0x200 +#define DART_CNTL_U3_SIZE_MASK 0x1ff +#define DART_CNTL_U3_SIZE_SHIFT 0 + +/* U4 registers */ +#define DART_BASE_U4_BASE_MASK 0xffffff +#define DART_BASE_U4_BASE_SHIFT 0 +#define DART_CNTL_U4_FLUSHTLB 0x20000000 +#define DART_CNTL_U4_ENABLE 0x80000000 +#define DART_SIZE_U4_SIZE_MASK 0x1fff +#define DART_SIZE_U4_SIZE_SHIFT 0 + +#define DART_REG(r) (dart + ((r) >> 2)) +#define DART_IN(r) (in_be32(DART_REG(r))) +#define DART_OUT(r,v) (out_be32(DART_REG(r), (v))) -#define DARTCNTL_FLUSHTLB 0x400 -#define DARTCNTL_ENABLE 0x200 /* size of table in pages */ -#define DARTCNTL_SIZE_MASK 0x1ff -#define DARTCNTL_SIZE_SHIFT 0 /* DART table fields */ diff --git a/arch/powerpc/sysdev/dart_iommu.c b/arch/powerpc/sysdev/dart_iommu.c new file mode 100644 index 000000000000..df0dbdee762a --- /dev/null +++ b/arch/powerpc/sysdev/dart_iommu.c @@ -0,0 +1,350 @@ +/* + * arch/powerpc/sysdev/dart_iommu.c + * + * Copyright (C) 2004 Olof Johansson , IBM Corporation + * Copyright (C) 2005 Benjamin Herrenschmidt , + * IBM Corporation + * + * Based on pSeries_iommu.c: + * Copyright (C) 2001 Mike Corrigan & Dave Engebretsen, IBM Corporation + * Copyright (C) 2004 Olof Johansson , IBM Corporation + * + * Dynamic DMA mapping support, Apple U3, U4 & IBM CPC925 "DART" iommu. + * + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include "dart.h" + +extern int iommu_force_on; + +/* Physical base address and size of the DART table */ +unsigned long dart_tablebase; /* exported to htab_initialize */ +static unsigned long dart_tablesize; + +/* Virtual base address of the DART table */ +static u32 *dart_vbase; + +/* Mapped base address for the dart */ +static unsigned int *__iomem dart; + +/* Dummy val that entries are set to when unused */ +static unsigned int dart_emptyval; + +static struct iommu_table iommu_table_dart; +static int iommu_table_dart_inited; +static int dart_dirty; +static int dart_is_u4; + +#define DBG(...) + +static inline void dart_tlb_invalidate_all(void) +{ + unsigned long l = 0; + unsigned int reg, inv_bit; + unsigned long limit; + + DBG("dart: flush\n"); + + /* To invalidate the DART, set the DARTCNTL_FLUSHTLB bit in the + * control register and wait for it to clear. + * + * Gotcha: Sometimes, the DART won't detect that the bit gets + * set. If so, clear it and set it again. + */ + + limit = 0; + + inv_bit = dart_is_u4 ? DART_CNTL_U4_FLUSHTLB : DART_CNTL_U3_FLUSHTLB; +retry: + l = 0; + reg = DART_IN(DART_CNTL); + reg |= inv_bit; + DART_OUT(DART_CNTL, reg); + + while ((DART_IN(DART_CNTL) & inv_bit) && l < (1L << limit)) + l++; + if (l == (1L << limit)) { + if (limit < 4) { + limit++; + reg = DART_IN(DART_CNTL); + reg &= ~inv_bit; + DART_OUT(DART_CNTL, reg); + goto retry; + } else + panic("DART: TLB did not flush after waiting a long " + "time. Buggy U3 ?"); + } +} + +static void dart_flush(struct iommu_table *tbl) +{ + if (dart_dirty) + dart_tlb_invalidate_all(); + dart_dirty = 0; +} + +static void dart_build(struct iommu_table *tbl, long index, + long npages, unsigned long uaddr, + enum dma_data_direction direction) +{ + unsigned int *dp; + unsigned int rpn; + + DBG("dart: build at: %lx, %lx, addr: %x\n", index, npages, uaddr); + + index <<= DART_PAGE_FACTOR; + npages <<= DART_PAGE_FACTOR; + + dp = ((unsigned int*)tbl->it_base) + index; + + /* On U3, all memory is contigous, so we can move this + * out of the loop. + */ + while (npages--) { + rpn = virt_to_abs(uaddr) >> DART_PAGE_SHIFT; + + *(dp++) = DARTMAP_VALID | (rpn & DARTMAP_RPNMASK); + + rpn++; + uaddr += DART_PAGE_SIZE; + } + + dart_dirty = 1; +} + + +static void dart_free(struct iommu_table *tbl, long index, long npages) +{ + unsigned int *dp; + + /* We don't worry about flushing the TLB cache. The only drawback of + * not doing it is that we won't catch buggy device drivers doing + * bad DMAs, but then no 32-bit architecture ever does either. + */ + + DBG("dart: free at: %lx, %lx\n", index, npages); + + index <<= DART_PAGE_FACTOR; + npages <<= DART_PAGE_FACTOR; + + dp = ((unsigned int *)tbl->it_base) + index; + + while (npages--) + *(dp++) = dart_emptyval; +} + + +static int dart_init(struct device_node *dart_node) +{ + unsigned int i; + unsigned long tmp, base, size; + struct resource r; + + if (dart_tablebase == 0 || dart_tablesize == 0) { + printk(KERN_INFO "DART: table not allocated, using " + "direct DMA\n"); + return -ENODEV; + } + + if (of_address_to_resource(dart_node, 0, &r)) + panic("DART: can't get register base ! "); + + /* Make sure nothing from the DART range remains in the CPU cache + * from a previous mapping that existed before the kernel took + * over + */ + flush_dcache_phys_range(dart_tablebase, + dart_tablebase + dart_tablesize); + + /* Allocate a spare page to map all invalid DART pages. We need to do + * that to work around what looks like a problem with the HT bridge + * prefetching into invalid pages and corrupting data + */ + tmp = lmb_alloc(DART_PAGE_SIZE, DART_PAGE_SIZE); + if (!tmp) + panic("DART: Cannot allocate spare page!"); + dart_emptyval = DARTMAP_VALID | ((tmp >> DART_PAGE_SHIFT) & + DARTMAP_RPNMASK); + + /* Map in DART registers */ + dart = ioremap(r.start, r.end - r.start + 1); + if (dart == NULL) + panic("DART: Cannot map registers!"); + + /* Map in DART table */ + dart_vbase = ioremap(virt_to_abs(dart_tablebase), dart_tablesize); + + /* Fill initial table */ + for (i = 0; i < dart_tablesize/4; i++) + dart_vbase[i] = dart_emptyval; + + /* Initialize DART with table base and enable it. */ + base = dart_tablebase >> DART_PAGE_SHIFT; + size = dart_tablesize >> DART_PAGE_SHIFT; + if (dart_is_u4) { + BUG_ON(size & ~DART_SIZE_U4_SIZE_MASK); + DART_OUT(DART_BASE_U4, base); + DART_OUT(DART_SIZE_U4, size); + DART_OUT(DART_CNTL, DART_CNTL_U4_ENABLE); + } else { + BUG_ON(size & ~DART_CNTL_U3_SIZE_MASK); + DART_OUT(DART_CNTL, + DART_CNTL_U3_ENABLE | + (base << DART_CNTL_U3_BASE_SHIFT) | + (size << DART_CNTL_U3_SIZE_SHIFT)); + } + + /* Invalidate DART to get rid of possible stale TLBs */ + dart_tlb_invalidate_all(); + + printk(KERN_INFO "DART IOMMU initialized for %s type chipset\n", + dart_is_u4 ? "U4" : "U3"); + + return 0; +} + +static void iommu_table_dart_setup(void) +{ + iommu_table_dart.it_busno = 0; + iommu_table_dart.it_offset = 0; + /* it_size is in number of entries */ + iommu_table_dart.it_size = (dart_tablesize / sizeof(u32)) >> DART_PAGE_FACTOR; + + /* Initialize the common IOMMU code */ + iommu_table_dart.it_base = (unsigned long)dart_vbase; + iommu_table_dart.it_index = 0; + iommu_table_dart.it_blocksize = 1; + iommu_init_table(&iommu_table_dart); + + /* Reserve the last page of the DART to avoid possible prefetch + * past the DART mapped area + */ + set_bit(iommu_table_dart.it_size - 1, iommu_table_dart.it_map); +} + +static void iommu_dev_setup_dart(struct pci_dev *dev) +{ + struct device_node *dn; + + /* We only have one iommu table on the mac for now, which makes + * things simple. Setup all PCI devices to point to this table + * + * We must use pci_device_to_OF_node() to make sure that + * we get the real "final" pointer to the device in the + * pci_dev sysdata and not the temporary PHB one + */ + dn = pci_device_to_OF_node(dev); + + if (dn) + PCI_DN(dn)->iommu_table = &iommu_table_dart; +} + +static void iommu_bus_setup_dart(struct pci_bus *bus) +{ + struct device_node *dn; + + if (!iommu_table_dart_inited) { + iommu_table_dart_inited = 1; + iommu_table_dart_setup(); + } + + dn = pci_bus_to_OF_node(bus); + + if (dn) + PCI_DN(dn)->iommu_table = &iommu_table_dart; +} + +static void iommu_dev_setup_null(struct pci_dev *dev) { } +static void iommu_bus_setup_null(struct pci_bus *bus) { } + +void iommu_init_early_dart(void) +{ + struct device_node *dn; + + /* Find the DART in the device-tree */ + dn = of_find_compatible_node(NULL, "dart", "u3-dart"); + if (dn == NULL) { + dn = of_find_compatible_node(NULL, "dart", "u4-dart"); + if (dn == NULL) + goto bail; + dart_is_u4 = 1; + } + + /* Setup low level TCE operations for the core IOMMU code */ + ppc_md.tce_build = dart_build; + ppc_md.tce_free = dart_free; + ppc_md.tce_flush = dart_flush; + + /* Initialize the DART HW */ + if (dart_init(dn) == 0) { + ppc_md.iommu_dev_setup = iommu_dev_setup_dart; + ppc_md.iommu_bus_setup = iommu_bus_setup_dart; + + /* Setup pci_dma ops */ + pci_iommu_init(); + + return; + } + + bail: + /* If init failed, use direct iommu and null setup functions */ + ppc_md.iommu_dev_setup = iommu_dev_setup_null; + ppc_md.iommu_bus_setup = iommu_bus_setup_null; + + /* Setup pci_dma ops */ + pci_direct_iommu_init(); +} + + +void __init alloc_dart_table(void) +{ + /* Only reserve DART space if machine has more than 2GB of RAM + * or if requested with iommu=on on cmdline. + */ + if (lmb_end_of_DRAM() <= 0x80000000ull && !iommu_force_on) + return; + + /* 512 pages (2MB) is max DART tablesize. */ + dart_tablesize = 1UL << 21; + /* 16MB (1 << 24) alignment. We allocate a full 16Mb chuck since we + * will blow up an entire large page anyway in the kernel mapping + */ + dart_tablebase = (unsigned long) + abs_to_virt(lmb_alloc_base(1UL<<24, 1UL<<24, 0x80000000L)); + + printk(KERN_INFO "DART table allocated at: %lx\n", dart_tablebase); +} diff --git a/arch/powerpc/sysdev/mpic.c b/arch/powerpc/sysdev/mpic.c index 9513ea78e6c1..4f26304d0263 100644 --- a/arch/powerpc/sysdev/mpic.c +++ b/arch/powerpc/sysdev/mpic.c @@ -13,6 +13,9 @@ */ #undef DEBUG +#undef DEBUG_IPI +#undef DEBUG_IRQ +#undef DEBUG_LOW #include #include @@ -168,35 +171,86 @@ static void __init mpic_test_broken_ipi(struct mpic *mpic) /* Test if an interrupt is sourced from HyperTransport (used on broken U3s) * to force the edge setting on the MPIC and do the ack workaround. */ -static inline int mpic_is_ht_interrupt(struct mpic *mpic, unsigned int source_no) +static inline int mpic_is_ht_interrupt(struct mpic *mpic, unsigned int source) { - if (source_no >= 128 || !mpic->fixups) + if (source >= 128 || !mpic->fixups) return 0; - return mpic->fixups[source_no].base != NULL; + return mpic->fixups[source].base != NULL; } -static inline void mpic_apic_end_irq(struct mpic *mpic, unsigned int source_no) +static inline void mpic_ht_end_irq(struct mpic *mpic, unsigned int source) { - struct mpic_irq_fixup *fixup = &mpic->fixups[source_no]; + struct mpic_irq_fixup *fixup = &mpic->fixups[source]; - spin_lock(&mpic->fixup_lock); - writeb(0x11 + 2 * fixup->irq, fixup->base + 2); - writel(fixup->data, fixup->base + 4); - spin_unlock(&mpic->fixup_lock); + if (fixup->applebase) { + unsigned int soff = (fixup->index >> 3) & ~3; + unsigned int mask = 1U << (fixup->index & 0x1f); + writel(mask, fixup->applebase + soff); + } else { + spin_lock(&mpic->fixup_lock); + writeb(0x11 + 2 * fixup->index, fixup->base + 2); + writel(fixup->data, fixup->base + 4); + spin_unlock(&mpic->fixup_lock); + } } +static void mpic_startup_ht_interrupt(struct mpic *mpic, unsigned int source, + unsigned int irqflags) +{ + struct mpic_irq_fixup *fixup = &mpic->fixups[source]; + unsigned long flags; + u32 tmp; + + if (fixup->base == NULL) + return; + + DBG("startup_ht_interrupt(%u, %u) index: %d\n", + source, irqflags, fixup->index); + spin_lock_irqsave(&mpic->fixup_lock, flags); + /* Enable and configure */ + writeb(0x10 + 2 * fixup->index, fixup->base + 2); + tmp = readl(fixup->base + 4); + tmp &= ~(0x23U); + if (irqflags & IRQ_LEVEL) + tmp |= 0x22; + writel(tmp, fixup->base + 4); + spin_unlock_irqrestore(&mpic->fixup_lock, flags); +} + +static void mpic_shutdown_ht_interrupt(struct mpic *mpic, unsigned int source, + unsigned int irqflags) +{ + struct mpic_irq_fixup *fixup = &mpic->fixups[source]; + unsigned long flags; + u32 tmp; + + if (fixup->base == NULL) + return; + + DBG("shutdown_ht_interrupt(%u, %u)\n", source, irqflags); + + /* Disable */ + spin_lock_irqsave(&mpic->fixup_lock, flags); + writeb(0x10 + 2 * fixup->index, fixup->base + 2); + tmp = readl(fixup->base + 4); + tmp &= ~1U; + writel(tmp, fixup->base + 4); + spin_unlock_irqrestore(&mpic->fixup_lock, flags); +} -static void __init mpic_scan_ioapic(struct mpic *mpic, u8 __iomem *devbase) +static void __init mpic_scan_ht_pic(struct mpic *mpic, u8 __iomem *devbase, + unsigned int devfn, u32 vdid) { int i, irq, n; + u8 __iomem *base; u32 tmp; u8 pos; - for (pos = readb(devbase + 0x34); pos; pos = readb(devbase + pos + 1)) { - u8 id = readb(devbase + pos); - - if (id == 0x08) { + for (pos = readb(devbase + PCI_CAPABILITY_LIST); pos != 0; + pos = readb(devbase + pos + PCI_CAP_LIST_NEXT)) { + u8 id = readb(devbase + pos + PCI_CAP_LIST_ID); + if (id == PCI_CAP_ID_HT_IRQCONF) { id = readb(devbase + pos + 3); if (id == 0x80) break; @@ -205,33 +259,41 @@ static void __init mpic_scan_ioapic(struct mpic *mpic, u8 __iomem *devbase) if (pos == 0) return; - printk(KERN_INFO "mpic: - Workarounds @ %p, pos = 0x%02x\n", devbase, pos); + base = devbase + pos; + writeb(0x01, base + 2); + n = (readl(base + 4) >> 16) & 0xff; - devbase += pos; - - writeb(0x01, devbase + 2); - n = (readl(devbase + 4) >> 16) & 0xff; + printk(KERN_INFO "mpic: - HT:%02x.%x [0x%02x] vendor %04x device %04x" + " has %d irqs\n", + devfn >> 3, devfn & 0x7, pos, vdid & 0xffff, vdid >> 16, n + 1); for (i = 0; i <= n; i++) { - writeb(0x10 + 2 * i, devbase + 2); - tmp = readl(devbase + 4); - if ((tmp & 0x21) != 0x20) - continue; + writeb(0x10 + 2 * i, base + 2); + tmp = readl(base + 4); irq = (tmp >> 16) & 0xff; - mpic->fixups[irq].irq = i; - mpic->fixups[irq].base = devbase; - writeb(0x11 + 2 * i, devbase + 2); - mpic->fixups[irq].data = readl(devbase + 4) | 0x80000000; + DBG("HT PIC index 0x%x, irq 0x%x, tmp: %08x\n", i, irq, tmp); + /* mask it , will be unmasked later */ + tmp |= 0x1; + writel(tmp, base + 4); + mpic->fixups[irq].index = i; + mpic->fixups[irq].base = base; + /* Apple HT PIC has a non-standard way of doing EOIs */ + if ((vdid & 0xffff) == 0x106b) + mpic->fixups[irq].applebase = devbase + 0x60; + else + mpic->fixups[irq].applebase = NULL; + writeb(0x11 + 2 * i, base + 2); + mpic->fixups[irq].data = readl(base + 4) | 0x80000000; } } -static void __init mpic_scan_ioapics(struct mpic *mpic) +static void __init mpic_scan_ht_pics(struct mpic *mpic) { unsigned int devfn; u8 __iomem *cfgspace; - printk(KERN_INFO "mpic: Setting up IO-APICs workarounds for U3\n"); + printk(KERN_INFO "mpic: Setting up HT PICs workarounds for U3/U4\n"); /* Allocate fixups array */ mpic->fixups = alloc_bootmem(128 * sizeof(struct mpic_irq_fixup)); @@ -247,13 +309,14 @@ static void __init mpic_scan_ioapics(struct mpic *mpic) cfgspace = ioremap(0xf2000000, 0x10000); BUG_ON(cfgspace == NULL); - /* Now we scan all slots. We do a very quick scan, we read the header type, - * vendor ID and device ID only, that's plenty enough + /* Now we scan all slots. We do a very quick scan, we read the header + * type, vendor ID and device ID only, that's plenty enough */ for (devfn = 0; devfn < 0x100; devfn++) { u8 __iomem *devbase = cfgspace + (devfn << 8); u8 hdr_type = readb(devbase + PCI_HEADER_TYPE); u32 l = readl(devbase + PCI_VENDOR_ID); + u16 s; DBG("devfn %x, l: %x\n", devfn, l); @@ -261,8 +324,12 @@ static void __init mpic_scan_ioapics(struct mpic *mpic) if (l == 0xffffffff || l == 0x00000000 || l == 0x0000ffff || l == 0xffff0000) goto next; + /* Check if is supports capability lists */ + s = readw(devbase + PCI_STATUS); + if (!(s & PCI_STATUS_CAP_LIST)) + goto next; - mpic_scan_ioapic(mpic, devbase); + mpic_scan_ht_pic(mpic, devbase, devfn, l); next: /* next device, if function 0 */ @@ -363,6 +430,31 @@ static void mpic_enable_irq(unsigned int irq) break; } } while(mpic_irq_read(src, MPIC_IRQ_VECTOR_PRI) & MPIC_VECPRI_MASK); + +#ifdef CONFIG_MPIC_BROKEN_U3 + if (mpic->flags & MPIC_BROKEN_U3) { + unsigned int src = irq - mpic->irq_offset; + if (mpic_is_ht_interrupt(mpic, src) && + (irq_desc[irq].status & IRQ_LEVEL)) + mpic_ht_end_irq(mpic, src); + } +#endif /* CONFIG_MPIC_BROKEN_U3 */ +} + +static unsigned int mpic_startup_irq(unsigned int irq) +{ +#ifdef CONFIG_MPIC_BROKEN_U3 + struct mpic *mpic = mpic_from_irq(irq); + unsigned int src = irq - mpic->irq_offset; + + if (mpic_is_ht_interrupt(mpic, src)) + mpic_startup_ht_interrupt(mpic, src, irq_desc[irq].status); + +#endif /* CONFIG_MPIC_BROKEN_U3 */ + + mpic_enable_irq(irq); + + return 0; } static void mpic_disable_irq(unsigned int irq) @@ -386,12 +478,27 @@ static void mpic_disable_irq(unsigned int irq) } while(!(mpic_irq_read(src, MPIC_IRQ_VECTOR_PRI) & MPIC_VECPRI_MASK)); } +static void mpic_shutdown_irq(unsigned int irq) +{ +#ifdef CONFIG_MPIC_BROKEN_U3 + struct mpic *mpic = mpic_from_irq(irq); + unsigned int src = irq - mpic->irq_offset; + + if (mpic_is_ht_interrupt(mpic, src)) + mpic_shutdown_ht_interrupt(mpic, src, irq_desc[irq].status); + +#endif /* CONFIG_MPIC_BROKEN_U3 */ + + mpic_disable_irq(irq); +} + static void mpic_end_irq(unsigned int irq) { struct mpic *mpic = mpic_from_irq(irq); +#ifdef DEBUG_IRQ DBG("%s: end_irq: %d\n", mpic->name, irq); - +#endif /* We always EOI on end_irq() even for edge interrupts since that * should only lower the priority, the MPIC should have properly * latched another edge interrupt coming in anyway @@ -400,8 +507,9 @@ static void mpic_end_irq(unsigned int irq) #ifdef CONFIG_MPIC_BROKEN_U3 if (mpic->flags & MPIC_BROKEN_U3) { unsigned int src = irq - mpic->irq_offset; - if (mpic_is_ht_interrupt(mpic, src)) - mpic_apic_end_irq(mpic, src); + if (mpic_is_ht_interrupt(mpic, src) && + (irq_desc[irq].status & IRQ_LEVEL)) + mpic_ht_end_irq(mpic, src); } #endif /* CONFIG_MPIC_BROKEN_U3 */ @@ -482,6 +590,8 @@ struct mpic * __init mpic_alloc(unsigned long phys_addr, mpic->name = name; mpic->hc_irq.typename = name; + mpic->hc_irq.startup = mpic_startup_irq; + mpic->hc_irq.shutdown = mpic_shutdown_irq; mpic->hc_irq.enable = mpic_enable_irq; mpic->hc_irq.disable = mpic_disable_irq; mpic->hc_irq.end = mpic_end_irq; @@ -650,10 +760,10 @@ void __init mpic_init(struct mpic *mpic) mpic->irq_count = mpic->num_sources; #ifdef CONFIG_MPIC_BROKEN_U3 - /* Do the ioapic fixups on U3 broken mpic */ + /* Do the HT PIC fixups on U3 broken mpic */ DBG("MPIC flags: %x\n", mpic->flags); if ((mpic->flags & MPIC_BROKEN_U3) && (mpic->flags & MPIC_PRIMARY)) - mpic_scan_ioapics(mpic); + mpic_scan_ht_pics(mpic); #endif /* CONFIG_MPIC_BROKEN_U3 */ for (i = 0; i < mpic->num_sources; i++) { @@ -840,7 +950,9 @@ void mpic_send_ipi(unsigned int ipi_no, unsigned int cpu_mask) BUG_ON(mpic == NULL); +#ifdef DEBUG_IPI DBG("%s: send_ipi(ipi_no: %d)\n", mpic->name, ipi_no); +#endif mpic_cpu_write(MPIC_CPU_IPI_DISPATCH_0 + ipi_no * 0x10, mpic_physmask(cpu_mask & cpus_addr(cpu_online_map)[0])); @@ -851,19 +963,28 @@ int mpic_get_one_irq(struct mpic *mpic, struct pt_regs *regs) u32 irq; irq = mpic_cpu_read(MPIC_CPU_INTACK) & MPIC_VECPRI_VECTOR_MASK; +#ifdef DEBUG_LOW DBG("%s: get_one_irq(): %d\n", mpic->name, irq); - +#endif if (mpic->cascade && irq == mpic->cascade_vec) { +#ifdef DEBUG_LOW DBG("%s: cascading ...\n", mpic->name); +#endif irq = mpic->cascade(regs, mpic->cascade_data); mpic_eoi(mpic); return irq; } if (unlikely(irq == MPIC_VEC_SPURRIOUS)) return -1; - if (irq < MPIC_VEC_IPI_0) + if (irq < MPIC_VEC_IPI_0) { +#ifdef DEBUG_IRQ + DBG("%s: irq %d\n", mpic->name, irq + mpic->irq_offset); +#endif return irq + mpic->irq_offset; + } +#ifdef DEBUG_IPI DBG("%s: ipi %d !\n", mpic->name, irq - MPIC_VEC_IPI_0); +#endif return irq - MPIC_VEC_IPI_0 + mpic->ipi_offset; } diff --git a/arch/powerpc/sysdev/u3_iommu.c b/arch/powerpc/sysdev/u3_iommu.c deleted file mode 100644 index 5c1a26a6d00c..000000000000 --- a/arch/powerpc/sysdev/u3_iommu.c +++ /dev/null @@ -1,327 +0,0 @@ -/* - * arch/powerpc/sysdev/u3_iommu.c - * - * Copyright (C) 2004 Olof Johansson , IBM Corporation - * - * Based on pSeries_iommu.c: - * Copyright (C) 2001 Mike Corrigan & Dave Engebretsen, IBM Corporation - * Copyright (C) 2004 Olof Johansson , IBM Corporation - * - * Dynamic DMA mapping support, Apple U3 & IBM CPC925 "DART" iommu. - * - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License as published by - * the Free Software Foundation; either version 2 of the License, or - * (at your option) any later version. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program; if not, write to the Free Software - * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA - */ - -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include - -#include "dart.h" - -extern int iommu_force_on; - -/* Physical base address and size of the DART table */ -unsigned long dart_tablebase; /* exported to htab_initialize */ -static unsigned long dart_tablesize; - -/* Virtual base address of the DART table */ -static u32 *dart_vbase; - -/* Mapped base address for the dart */ -static unsigned int *dart; - -/* Dummy val that entries are set to when unused */ -static unsigned int dart_emptyval; - -static struct iommu_table iommu_table_u3; -static int iommu_table_u3_inited; -static int dart_dirty; - -#define DBG(...) - -static inline void dart_tlb_invalidate_all(void) -{ - unsigned long l = 0; - unsigned int reg; - unsigned long limit; - - DBG("dart: flush\n"); - - /* To invalidate the DART, set the DARTCNTL_FLUSHTLB bit in the - * control register and wait for it to clear. - * - * Gotcha: Sometimes, the DART won't detect that the bit gets - * set. If so, clear it and set it again. - */ - - limit = 0; - -retry: - reg = in_be32((unsigned int *)dart+DARTCNTL); - reg |= DARTCNTL_FLUSHTLB; - out_be32((unsigned int *)dart+DARTCNTL, reg); - - l = 0; - while ((in_be32((unsigned int *)dart+DARTCNTL) & DARTCNTL_FLUSHTLB) && - l < (1L<it_base) + index; - - /* On U3, all memory is contigous, so we can move this - * out of the loop. - */ - while (npages--) { - rpn = virt_to_abs(uaddr) >> DART_PAGE_SHIFT; - - *(dp++) = DARTMAP_VALID | (rpn & DARTMAP_RPNMASK); - - rpn++; - uaddr += DART_PAGE_SIZE; - } - - dart_dirty = 1; -} - - -static void dart_free(struct iommu_table *tbl, long index, long npages) -{ - unsigned int *dp; - - /* We don't worry about flushing the TLB cache. The only drawback of - * not doing it is that we won't catch buggy device drivers doing - * bad DMAs, but then no 32-bit architecture ever does either. - */ - - DBG("dart: free at: %lx, %lx\n", index, npages); - - index <<= DART_PAGE_FACTOR; - npages <<= DART_PAGE_FACTOR; - - dp = ((unsigned int *)tbl->it_base) + index; - - while (npages--) - *(dp++) = dart_emptyval; -} - - -static int dart_init(struct device_node *dart_node) -{ - unsigned int regword; - unsigned int i; - unsigned long tmp; - - if (dart_tablebase == 0 || dart_tablesize == 0) { - printk(KERN_INFO "U3-DART: table not allocated, using direct DMA\n"); - return -ENODEV; - } - - /* Make sure nothing from the DART range remains in the CPU cache - * from a previous mapping that existed before the kernel took - * over - */ - flush_dcache_phys_range(dart_tablebase, dart_tablebase + dart_tablesize); - - /* Allocate a spare page to map all invalid DART pages. We need to do - * that to work around what looks like a problem with the HT bridge - * prefetching into invalid pages and corrupting data - */ - tmp = lmb_alloc(DART_PAGE_SIZE, DART_PAGE_SIZE); - if (!tmp) - panic("U3-DART: Cannot allocate spare page!"); - dart_emptyval = DARTMAP_VALID | ((tmp >> DART_PAGE_SHIFT) & DARTMAP_RPNMASK); - - /* Map in DART registers. FIXME: Use device node to get base address */ - dart = ioremap(DART_BASE, 0x7000); - if (dart == NULL) - panic("U3-DART: Cannot map registers!"); - - /* Set initial control register contents: table base, - * table size and enable bit - */ - regword = DARTCNTL_ENABLE | - ((dart_tablebase >> DART_PAGE_SHIFT) << DARTCNTL_BASE_SHIFT) | - (((dart_tablesize >> DART_PAGE_SHIFT) & DARTCNTL_SIZE_MASK) - << DARTCNTL_SIZE_SHIFT); - dart_vbase = ioremap(virt_to_abs(dart_tablebase), dart_tablesize); - - /* Fill initial table */ - for (i = 0; i < dart_tablesize/4; i++) - dart_vbase[i] = dart_emptyval; - - /* Initialize DART with table base and enable it. */ - out_be32((unsigned int *)dart, regword); - - /* Invalidate DART to get rid of possible stale TLBs */ - dart_tlb_invalidate_all(); - - printk(KERN_INFO "U3/CPC925 DART IOMMU initialized\n"); - - return 0; -} - -static void iommu_table_u3_setup(void) -{ - iommu_table_u3.it_busno = 0; - iommu_table_u3.it_offset = 0; - /* it_size is in number of entries */ - iommu_table_u3.it_size = (dart_tablesize / sizeof(u32)) >> DART_PAGE_FACTOR; - - /* Initialize the common IOMMU code */ - iommu_table_u3.it_base = (unsigned long)dart_vbase; - iommu_table_u3.it_index = 0; - iommu_table_u3.it_blocksize = 1; - iommu_init_table(&iommu_table_u3); - - /* Reserve the last page of the DART to avoid possible prefetch - * past the DART mapped area - */ - set_bit(iommu_table_u3.it_size - 1, iommu_table_u3.it_map); -} - -static void iommu_dev_setup_u3(struct pci_dev *dev) -{ - struct device_node *dn; - - /* We only have one iommu table on the mac for now, which makes - * things simple. Setup all PCI devices to point to this table - * - * We must use pci_device_to_OF_node() to make sure that - * we get the real "final" pointer to the device in the - * pci_dev sysdata and not the temporary PHB one - */ - dn = pci_device_to_OF_node(dev); - - if (dn) - PCI_DN(dn)->iommu_table = &iommu_table_u3; -} - -static void iommu_bus_setup_u3(struct pci_bus *bus) -{ - struct device_node *dn; - - if (!iommu_table_u3_inited) { - iommu_table_u3_inited = 1; - iommu_table_u3_setup(); - } - - dn = pci_bus_to_OF_node(bus); - - if (dn) - PCI_DN(dn)->iommu_table = &iommu_table_u3; -} - -static void iommu_dev_setup_null(struct pci_dev *dev) { } -static void iommu_bus_setup_null(struct pci_bus *bus) { } - -void iommu_init_early_u3(void) -{ - struct device_node *dn; - - /* Find the DART in the device-tree */ - dn = of_find_compatible_node(NULL, "dart", "u3-dart"); - if (dn == NULL) - return; - - /* Setup low level TCE operations for the core IOMMU code */ - ppc_md.tce_build = dart_build; - ppc_md.tce_free = dart_free; - ppc_md.tce_flush = dart_flush; - - /* Initialize the DART HW */ - if (dart_init(dn)) { - /* If init failed, use direct iommu and null setup functions */ - ppc_md.iommu_dev_setup = iommu_dev_setup_null; - ppc_md.iommu_bus_setup = iommu_bus_setup_null; - - /* Setup pci_dma ops */ - pci_direct_iommu_init(); - } else { - ppc_md.iommu_dev_setup = iommu_dev_setup_u3; - ppc_md.iommu_bus_setup = iommu_bus_setup_u3; - - /* Setup pci_dma ops */ - pci_iommu_init(); - } -} - - -void __init alloc_u3_dart_table(void) -{ - /* Only reserve DART space if machine has more than 2GB of RAM - * or if requested with iommu=on on cmdline. - */ - if (lmb_end_of_DRAM() <= 0x80000000ull && !iommu_force_on) - return; - - /* 512 pages (2MB) is max DART tablesize. */ - dart_tablesize = 1UL << 21; - /* 16MB (1 << 24) alignment. We allocate a full 16Mb chuck since we - * will blow up an entire large page anyway in the kernel mapping - */ - dart_tablebase = (unsigned long) - abs_to_virt(lmb_alloc_base(1UL<<24, 1UL<<24, 0x80000000L)); - - printk(KERN_INFO "U3-DART allocated at: %lx\n", dart_tablebase); -} diff --git a/drivers/ide/ppc/pmac.c b/drivers/ide/ppc/pmac.c index a8d3bc0a9c5c..5013b1285e22 100644 --- a/drivers/ide/ppc/pmac.c +++ b/drivers/ide/ppc/pmac.c @@ -1686,7 +1686,7 @@ pmac_ide_probe(void) #else macio_register_driver(&pmac_ide_macio_driver); pci_register_driver(&pmac_ide_pci_driver); -#endif +#endif } #ifdef CONFIG_BLK_DEV_IDEDMA_PMAC diff --git a/drivers/macintosh/smu.c b/drivers/macintosh/smu.c index e8378274d710..96226116a646 100644 --- a/drivers/macintosh/smu.c +++ b/drivers/macintosh/smu.c @@ -53,7 +53,7 @@ #undef DEBUG_SMU #ifdef DEBUG_SMU -#define DPRINTK(fmt, args...) do { udbg_printf(KERN_DEBUG fmt , ##args); } while (0) +#define DPRINTK(fmt, args...) do { printk(KERN_DEBUG fmt , ##args); } while (0) #else #define DPRINTK(fmt, args...) do { } while (0) #endif @@ -909,10 +909,13 @@ static struct smu_sdbp_header *smu_create_sdb_partition(int id) struct property *prop; /* First query the partition info */ + DPRINTK("SMU: Query partition infos ... (irq=%d)\n", smu->db_irq); smu_queue_simple(&cmd, SMU_CMD_PARTITION_COMMAND, 2, smu_done_complete, &comp, SMU_CMD_PARTITION_LATEST, id); wait_for_completion(&comp); + DPRINTK("SMU: done, status: %d, reply_len: %d\n", + cmd.cmd.status, cmd.cmd.reply_len); /* Partition doesn't exist (or other error) */ if (cmd.cmd.status != 0 || cmd.cmd.reply_len != 6) @@ -975,6 +978,8 @@ struct smu_sdbp_header *__smu_get_sdb_partition(int id, unsigned int *size, sprintf(pname, "sdb-partition-%02x", id); + DPRINTK("smu_get_sdb_partition(%02x)\n", id); + if (interruptible) { int rc; rc = down_interruptible(&smu_part_access); @@ -986,6 +991,7 @@ struct smu_sdbp_header *__smu_get_sdb_partition(int id, unsigned int *size, part = (struct smu_sdbp_header *)get_property(smu->of_node, pname, size); if (part == NULL) { + DPRINTK("trying to extract from SMU ...\n"); part = smu_create_sdb_partition(id); if (part != NULL && size) *size = part->len << 2; diff --git a/include/asm-powerpc/iommu.h b/include/asm-powerpc/iommu.h index f89f06050893..59f062668997 100644 --- a/include/asm-powerpc/iommu.h +++ b/include/asm-powerpc/iommu.h @@ -56,7 +56,7 @@ struct device_node; /* Walks all buses and creates iommu tables */ extern void iommu_setup_pSeries(void); -extern void iommu_setup_u3(void); +extern void iommu_setup_dart(void); /* Frees table for an individual device node */ extern void iommu_free_table(struct device_node *dn); @@ -104,7 +104,7 @@ extern void iommu_unmap_single(struct iommu_table *tbl, dma_addr_t dma_handle, extern void iommu_init_early_pSeries(void); extern void iommu_init_early_iSeries(void); -extern void iommu_init_early_u3(void); +extern void iommu_init_early_dart(void); #ifdef CONFIG_PCI extern void pci_iommu_init(void); @@ -113,6 +113,6 @@ extern void pci_direct_iommu_init(void); static inline void pci_iommu_init(void) { } #endif -extern void alloc_u3_dart_table(void); +extern void alloc_dart_table(void); #endif /* _ASM_IOMMU_H */ diff --git a/include/asm-powerpc/mpic.h b/include/asm-powerpc/mpic.h index 6ce27e1b5646..bf7e71793205 100644 --- a/include/asm-powerpc/mpic.h +++ b/include/asm-powerpc/mpic.h @@ -117,8 +117,9 @@ typedef int (*mpic_cascade_t)(struct pt_regs *regs, void *data); struct mpic_irq_fixup { u8 __iomem *base; + u8 __iomem *applebase; u32 data; - unsigned int irq; + unsigned int index; }; #endif /* CONFIG_MPIC_BROKEN_U3 */ diff --git a/include/asm-powerpc/pmac_feature.h b/include/asm-powerpc/pmac_feature.h index e9683bcff19b..f6997ed5179e 100644 --- a/include/asm-powerpc/pmac_feature.h +++ b/include/asm-powerpc/pmac_feature.h @@ -121,6 +121,7 @@ #define PMAC_TYPE_IMAC_G5 0x152 /* iMac G5 */ #define PMAC_TYPE_XSERVE_G5 0x153 /* Xserve G5 */ #define PMAC_TYPE_UNKNOWN_K2 0x19f /* Any other K2 based */ +#define PMAC_TYPE_UNKNOWN_SHASTA 0x19e /* Any other Shasta based */ /* * Motherboard flags @@ -341,6 +342,7 @@ enum { macio_pangea, macio_intrepid, macio_keylargo2, + macio_shasta, }; struct macio_chip diff --git a/include/linux/pci_regs.h b/include/linux/pci_regs.h index e2a089b051ed..d27a78b71297 100644 --- a/include/linux/pci_regs.h +++ b/include/linux/pci_regs.h @@ -196,6 +196,7 @@ #define PCI_CAP_ID_MSI 0x05 /* Message Signalled Interrupts */ #define PCI_CAP_ID_CHSWP 0x06 /* CompactPCI HotSwap */ #define PCI_CAP_ID_PCIX 0x07 /* PCI-X */ +#define PCI_CAP_ID_HT_IRQCONF 0x08 /* HyperTransport IRQ Configuration */ #define PCI_CAP_ID_SHPC 0x0C /* PCI Standard Hot-Plug Controller */ #define PCI_CAP_ID_EXP 0x10 /* PCI Express */ #define PCI_CAP_ID_MSIX 0x11 /* MSI-X */ -- cgit v1.2.3-71-gd317 From b792de39d892e06b18ddea85be076bae123d6bf6 Mon Sep 17 00:00:00 2001 From: Olaf Hering Date: Sun, 8 Jan 2006 01:00:32 -0800 Subject: [PATCH] Fix compilation with CONFIG_MEMORY_HOTPLUG=y and gcc41. Fix compilation with CONFIG_MEMORY_HOTPLUG=y and gcc41. Also remove unneeded declations, add a public function. drivers/base/memory.c:53: error: static declaration of 'register_memory_notifier' follows non-static declaration include/linux/memory.h:85: error: previous declaration of 'register_memory_notifier' was here drivers/base/memory.c:58: error: static declaration of 'unregister_memory_notifier' follows non-static declaration include/linux/memory.h:86: error: previous declaration of 'unregister_memory_notifier' was here drivers/base/memory.c:68: error: static declaration of 'register_memory' follows non-static declaration include/linux/memory.h:73: error: previous declaration of 'register_memory' was here Signed-off-by: Olaf Hering Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/memory.h | 8 +------- 1 file changed, 1 insertion(+), 7 deletions(-) (limited to 'include/linux') diff --git a/include/linux/memory.h b/include/linux/memory.h index dc4081b6f161..e251dc43d0f5 100644 --- a/include/linux/memory.h +++ b/include/linux/memory.h @@ -70,21 +70,15 @@ static inline void unregister_memory_notifier(struct notifier_block *nb) { } #else -extern int register_memory(struct memory_block *, struct mem_section *section, struct node *); extern int register_new_memory(struct mem_section *); extern int unregister_memory_section(struct mem_section *); extern int memory_dev_init(void); -extern int register_memory_notifier(struct notifier_block *nb); -extern void unregister_memory_notifier(struct notifier_block *nb); +extern int remove_memory_block(unsigned long, struct mem_section *, int); #define CONFIG_MEM_BLOCK_SIZE (PAGES_PER_SECTION< Date: Sun, 8 Jan 2006 01:00:33 -0800 Subject: [PATCH] slab: remove unused align parameter from alloc_percpu __alloc_percpu and alloc_percpu both take an 'align' argument which is completely ignored. snmp6_mib_init() in net/ipv6/af_inet6.c attempts to use it, but it will be ignored. Therefore, remove the 'align' argument and fixup the lone caller. Signed-off-by: Matthew Dobson Acked-by: Manfred Spraul Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/percpu.h | 7 +++---- mm/slab.c | 3 +-- net/ipv6/af_inet6.c | 4 ++-- 3 files changed, 6 insertions(+), 8 deletions(-) (limited to 'include/linux') diff --git a/include/linux/percpu.h b/include/linux/percpu.h index fb8d2d24e4bb..20317d88deba 100644 --- a/include/linux/percpu.h +++ b/include/linux/percpu.h @@ -33,14 +33,14 @@ struct percpu_data { (__typeof__(ptr))__p->ptrs[(cpu)]; \ }) -extern void *__alloc_percpu(size_t size, size_t align); +extern void *__alloc_percpu(size_t size); extern void free_percpu(const void *); #else /* CONFIG_SMP */ #define per_cpu_ptr(ptr, cpu) ({ (void)(cpu); (ptr); }) -static inline void *__alloc_percpu(size_t size, size_t align) +static inline void *__alloc_percpu(size_t size) { void *ret = kmalloc(size, GFP_KERNEL); if (ret) @@ -55,7 +55,6 @@ static inline void free_percpu(const void *ptr) #endif /* CONFIG_SMP */ /* Simple wrapper for the common case: zeros memory. */ -#define alloc_percpu(type) \ - ((type *)(__alloc_percpu(sizeof(type), __alignof__(type)))) +#define alloc_percpu(type) ((type *)(__alloc_percpu(sizeof(type)))) #endif /* __LINUX_PERCPU_H */ diff --git a/mm/slab.c b/mm/slab.c index e5ec26e0c460..eb70fddf2059 100644 --- a/mm/slab.c +++ b/mm/slab.c @@ -2944,9 +2944,8 @@ EXPORT_SYMBOL(__kmalloc); * Objects should be dereferenced using the per_cpu_ptr macro only. * * @size: how many bytes of memory are required. - * @align: the alignment, which can't be greater than SMP_CACHE_BYTES. */ -void *__alloc_percpu(size_t size, size_t align) +void *__alloc_percpu(size_t size) { int i; struct percpu_data *pdata = kmalloc(sizeof (*pdata), GFP_KERNEL); diff --git a/net/ipv6/af_inet6.c b/net/ipv6/af_inet6.c index 68afc53be662..25c3fe5005d9 100644 --- a/net/ipv6/af_inet6.c +++ b/net/ipv6/af_inet6.c @@ -689,11 +689,11 @@ snmp6_mib_init(void *ptr[2], size_t mibsize, size_t mibalign) if (ptr == NULL) return -EINVAL; - ptr[0] = __alloc_percpu(mibsize, mibalign); + ptr[0] = __alloc_percpu(mibsize); if (!ptr[0]) goto err0; - ptr[1] = __alloc_percpu(mibsize, mibalign); + ptr[1] = __alloc_percpu(mibsize); if (!ptr[1]) goto err1; -- cgit v1.2.3-71-gd317 From 9d0243bca345d5ce25d3f4b74b7facb3a6df1232 Mon Sep 17 00:00:00 2001 From: Andrew Morton Date: Sun, 8 Jan 2006 01:00:39 -0800 Subject: [PATCH] drop-pagecache Add /proc/sys/vm/drop_caches. When written to, this will cause the kernel to discard as much pagecache and/or reclaimable slab objects as it can. THis operation requires root permissions. It won't drop dirty data, so the user should run `sync' first. Caveats: a) Holds inode_lock for exorbitant amounts of time. b) Needs to be taught about NUMA nodes: propagate these all the way through so the discarding can be controlled on a per-node basis. This is a debugging feature: useful for getting consistent results between filesystem benchmarks. We could possibly put it under a config option, but it's less than 300 bytes. Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- Documentation/filesystems/proc.txt | 17 ++++++++++ Documentation/sysctl/vm.txt | 3 +- fs/Makefile | 2 +- fs/drop_caches.c | 68 ++++++++++++++++++++++++++++++++++++++ include/linux/mm.h | 7 ++++ include/linux/sysctl.h | 1 + kernel/sysctl.c | 10 ++++++ mm/truncate.c | 1 - mm/vmscan.c | 3 +- 9 files changed, 107 insertions(+), 5 deletions(-) create mode 100644 fs/drop_caches.c (limited to 'include/linux') diff --git a/Documentation/filesystems/proc.txt b/Documentation/filesystems/proc.txt index d4773565ea2f..a4dcf42c2fd9 100644 --- a/Documentation/filesystems/proc.txt +++ b/Documentation/filesystems/proc.txt @@ -1302,6 +1302,23 @@ VM has token based thrashing control mechanism and uses the token to prevent unnecessary page faults in thrashing situation. The unit of the value is second. The value would be useful to tune thrashing behavior. +drop_caches +----------- + +Writing to this will cause the kernel to drop clean caches, dentries and +inodes from memory, causing that memory to become free. + +To free pagecache: + echo 1 > /proc/sys/vm/drop_caches +To free dentries and inodes: + echo 2 > /proc/sys/vm/drop_caches +To free pagecache, dentries and inodes: + echo 3 > /proc/sys/vm/drop_caches + +As this is a non-destructive operation and dirty objects are not freeable, the +user should run `sync' first. + + 2.5 /proc/sys/dev - Device specific parameters ---------------------------------------------- diff --git a/Documentation/sysctl/vm.txt b/Documentation/sysctl/vm.txt index 2f1aae32a5d9..89ba1a42a17d 100644 --- a/Documentation/sysctl/vm.txt +++ b/Documentation/sysctl/vm.txt @@ -26,12 +26,13 @@ Currently, these files are in /proc/sys/vm: - min_free_kbytes - laptop_mode - block_dump +- drop-caches ============================================================== dirty_ratio, dirty_background_ratio, dirty_expire_centisecs, dirty_writeback_centisecs, vfs_cache_pressure, laptop_mode, -block_dump, swap_token_timeout: +block_dump, swap_token_timeout, drop-caches: See Documentation/filesystems/proc.txt diff --git a/fs/Makefile b/fs/Makefile index 73676111ebbe..35e9aec608e4 100644 --- a/fs/Makefile +++ b/fs/Makefile @@ -10,7 +10,7 @@ obj-y := open.o read_write.o file_table.o buffer.o bio.o super.o \ ioctl.o readdir.o select.o fifo.o locks.o dcache.o inode.o \ attr.o bad_inode.o file.o filesystems.o namespace.o aio.o \ seq_file.o xattr.o libfs.o fs-writeback.o mpage.o direct-io.o \ - ioprio.o pnode.o + ioprio.o pnode.o drop_caches.o obj-$(CONFIG_INOTIFY) += inotify.o obj-$(CONFIG_EPOLL) += eventpoll.o diff --git a/fs/drop_caches.c b/fs/drop_caches.c new file mode 100644 index 000000000000..4e4762389bdc --- /dev/null +++ b/fs/drop_caches.c @@ -0,0 +1,68 @@ +/* + * Implement the manual drop-all-pagecache function + */ + +#include +#include +#include +#include +#include +#include + +/* A global variable is a bit ugly, but it keeps the code simple */ +int sysctl_drop_caches; + +static void drop_pagecache_sb(struct super_block *sb) +{ + struct inode *inode; + + spin_lock(&inode_lock); + list_for_each_entry(inode, &sb->s_inodes, i_sb_list) { + if (inode->i_state & (I_FREEING|I_WILL_FREE)) + continue; + invalidate_inode_pages(inode->i_mapping); + } + spin_unlock(&inode_lock); +} + +void drop_pagecache(void) +{ + struct super_block *sb; + + spin_lock(&sb_lock); +restart: + list_for_each_entry(sb, &super_blocks, s_list) { + sb->s_count++; + spin_unlock(&sb_lock); + down_read(&sb->s_umount); + if (sb->s_root) + drop_pagecache_sb(sb); + up_read(&sb->s_umount); + spin_lock(&sb_lock); + if (__put_super_and_need_restart(sb)) + goto restart; + } + spin_unlock(&sb_lock); +} + +void drop_slab(void) +{ + int nr_objects; + + do { + nr_objects = shrink_slab(1000, GFP_KERNEL, 1000); + } while (nr_objects > 10); +} + +int drop_caches_sysctl_handler(ctl_table *table, int write, + struct file *file, void __user *buffer, size_t *length, loff_t *ppos) +{ + proc_dointvec_minmax(table, write, file, buffer, length, ppos); + if (write) { + if (sysctl_drop_caches & 1) + drop_pagecache(); + if (sysctl_drop_caches & 2) + drop_slab(); + } + return 0; +} diff --git a/include/linux/mm.h b/include/linux/mm.h index bc01fff3aa01..83c651f25188 100644 --- a/include/linux/mm.h +++ b/include/linux/mm.h @@ -1036,5 +1036,12 @@ int in_gate_area_no_task(unsigned long addr); /* /proc//oom_adj set to -17 protects from the oom-killer */ #define OOM_DISABLE -17 +int drop_caches_sysctl_handler(struct ctl_table *, int, struct file *, + void __user *, size_t *, loff_t *); +int shrink_slab(unsigned long scanned, gfp_t gfp_mask, + unsigned long lru_pages); +void drop_pagecache(void); +void drop_slab(void); + #endif /* __KERNEL__ */ #endif /* _LINUX_MM_H */ diff --git a/include/linux/sysctl.h b/include/linux/sysctl.h index a9b80fc7f0f3..4cd267fe87ec 100644 --- a/include/linux/sysctl.h +++ b/include/linux/sysctl.h @@ -180,6 +180,7 @@ enum VM_VFS_CACHE_PRESSURE=26, /* dcache/icache reclaim pressure */ VM_LEGACY_VA_LAYOUT=27, /* legacy/compatibility virtual address space layout */ VM_SWAP_TOKEN_TIMEOUT=28, /* default time for token time out */ + VM_DROP_PAGECACHE=29, /* int: nuke lots of pagecache */ }; diff --git a/kernel/sysctl.c b/kernel/sysctl.c index a85047bb5739..8dcf6fd5b0f9 100644 --- a/kernel/sysctl.c +++ b/kernel/sysctl.c @@ -68,6 +68,7 @@ extern int min_free_kbytes; extern int printk_ratelimit_jiffies; extern int printk_ratelimit_burst; extern int pid_max_min, pid_max_max; +extern int sysctl_drop_caches; #if defined(CONFIG_X86_LOCAL_APIC) && defined(CONFIG_X86) int unknown_nmi_panic; @@ -774,6 +775,15 @@ static ctl_table vm_table[] = { .proc_handler = &lowmem_reserve_ratio_sysctl_handler, .strategy = &sysctl_intvec, }, + { + .ctl_name = VM_DROP_PAGECACHE, + .procname = "drop_caches", + .data = &sysctl_drop_caches, + .maxlen = sizeof(int), + .mode = 0644, + .proc_handler = drop_caches_sysctl_handler, + .strategy = &sysctl_intvec, + }, { .ctl_name = VM_MIN_FREE_KBYTES, .procname = "min_free_kbytes", diff --git a/mm/truncate.c b/mm/truncate.c index 7dee32745901..b1a463d0fe71 100644 --- a/mm/truncate.c +++ b/mm/truncate.c @@ -249,7 +249,6 @@ unlock: break; } pagevec_release(&pvec); - cond_resched(); } return ret; } diff --git a/mm/vmscan.c b/mm/vmscan.c index be8235fb1939..428c5801d4b4 100644 --- a/mm/vmscan.c +++ b/mm/vmscan.c @@ -180,8 +180,7 @@ EXPORT_SYMBOL(remove_shrinker); * * Returns the number of slab objects which we shrunk. */ -static int shrink_slab(unsigned long scanned, gfp_t gfp_mask, - unsigned long lru_pages) +int shrink_slab(unsigned long scanned, gfp_t gfp_mask, unsigned long lru_pages) { struct shrinker *shrinker; int ret = 0; -- cgit v1.2.3-71-gd317 From 8ad4b1fb8205340dba16b63467bb23efc27264d6 Mon Sep 17 00:00:00 2001 From: Rohit Seth Date: Sun, 8 Jan 2006 01:00:40 -0800 Subject: [PATCH] Make high and batch sizes of per_cpu_pagelists configurable As recently there has been lot of traffic on the right values for batch and high water marks for per_cpu_pagelists. This patch makes these two variables configurable through /proc interface. A new tunable /proc/sys/vm/percpu_pagelist_fraction is added. This entry controls the fraction of pages at most in each zone that are allocated for each per cpu page list. The min value for this is 8. It means that we don't allow more than 1/8th of pages in each zone to be allocated in any single per_cpu_pagelist. The batch value of each per cpu pagelist is also updated as a result. It is set to pcp->high/4. The upper limit of batch is (PAGE_SHIFT * 8) Signed-off-by: Rohit Seth Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- Documentation/sysctl/vm.txt | 17 ++++++++++++++++ include/linux/mmzone.h | 2 ++ include/linux/sysctl.h | 1 + kernel/sysctl.c | 12 +++++++++++ mm/page_alloc.c | 49 +++++++++++++++++++++++++++++++++++++++++++++ 5 files changed, 81 insertions(+) (limited to 'include/linux') diff --git a/Documentation/sysctl/vm.txt b/Documentation/sysctl/vm.txt index 89ba1a42a17d..6910c0136f8d 100644 --- a/Documentation/sysctl/vm.txt +++ b/Documentation/sysctl/vm.txt @@ -103,3 +103,20 @@ This is used to force the Linux VM to keep a minimum number of kilobytes free. The VM uses this number to compute a pages_min value for each lowmem zone in the system. Each lowmem zone gets a number of reserved free pages based proportionally on its size. + +============================================================== + +percpu_pagelist_fraction + +This is the fraction of pages at most (high mark pcp->high) in each zone that +are allocated for each per cpu page list. The min value for this is 8. It +means that we don't allow more than 1/8th of pages in each zone to be +allocated in any single per_cpu_pagelist. This entry only changes the value +of hot per cpu pagelists. User can specify a number like 100 to allocate +1/100th of each zone to each per cpu page list. + +The batch value of each per cpu pagelist is also updated as a result. It is +set to pcp->high/4. The upper limit of batch is (PAGE_SHIFT * 8) + +The initial value is zero. Kernel does not use this value at boot time to set +the high water marks for each per cpu page list. diff --git a/include/linux/mmzone.h b/include/linux/mmzone.h index c34f4a2c62f8..2a89c132ba9c 100644 --- a/include/linux/mmzone.h +++ b/include/linux/mmzone.h @@ -437,6 +437,8 @@ int min_free_kbytes_sysctl_handler(struct ctl_table *, int, struct file *, extern int sysctl_lowmem_reserve_ratio[MAX_NR_ZONES-1]; int lowmem_reserve_ratio_sysctl_handler(struct ctl_table *, int, struct file *, void __user *, size_t *, loff_t *); +int percpu_pagelist_fraction_sysctl_handler(struct ctl_table *, int, struct file *, + void __user *, size_t *, loff_t *); #include /* Returns the number of the current Node. */ diff --git a/include/linux/sysctl.h b/include/linux/sysctl.h index 4cd267fe87ec..7f472127b7b5 100644 --- a/include/linux/sysctl.h +++ b/include/linux/sysctl.h @@ -181,6 +181,7 @@ enum VM_LEGACY_VA_LAYOUT=27, /* legacy/compatibility virtual address space layout */ VM_SWAP_TOKEN_TIMEOUT=28, /* default time for token time out */ VM_DROP_PAGECACHE=29, /* int: nuke lots of pagecache */ + VM_PERCPU_PAGELIST_FRACTION=30,/* int: fraction of pages in each percpu_pagelist */ }; diff --git a/kernel/sysctl.c b/kernel/sysctl.c index 8dcf6fd5b0f9..03b0598f2369 100644 --- a/kernel/sysctl.c +++ b/kernel/sysctl.c @@ -69,6 +69,7 @@ extern int printk_ratelimit_jiffies; extern int printk_ratelimit_burst; extern int pid_max_min, pid_max_max; extern int sysctl_drop_caches; +extern int percpu_pagelist_fraction; #if defined(CONFIG_X86_LOCAL_APIC) && defined(CONFIG_X86) int unknown_nmi_panic; @@ -79,6 +80,7 @@ extern int proc_unknown_nmi_panic(ctl_table *, int, struct file *, /* this is needed for the proc_dointvec_minmax for [fs_]overflow UID and GID */ static int maxolduid = 65535; static int minolduid; +static int min_percpu_pagelist_fract = 8; static int ngroups_max = NGROUPS_MAX; @@ -794,6 +796,16 @@ static ctl_table vm_table[] = { .strategy = &sysctl_intvec, .extra1 = &zero, }, + { + .ctl_name = VM_PERCPU_PAGELIST_FRACTION, + .procname = "percpu_pagelist_fraction", + .data = &percpu_pagelist_fraction, + .maxlen = sizeof(percpu_pagelist_fraction), + .mode = 0644, + .proc_handler = &percpu_pagelist_fraction_sysctl_handler, + .strategy = &sysctl_intvec, + .extra1 = &min_percpu_pagelist_fract, + }, #ifdef CONFIG_MMU { .ctl_name = VM_MAX_MAP_COUNT, diff --git a/mm/page_alloc.c b/mm/page_alloc.c index 5eeeadd9f66a..2c46f697e8ff 100644 --- a/mm/page_alloc.c +++ b/mm/page_alloc.c @@ -53,6 +53,7 @@ struct pglist_data *pgdat_list __read_mostly; unsigned long totalram_pages __read_mostly; unsigned long totalhigh_pages __read_mostly; long nr_swap_pages; +int percpu_pagelist_fraction; static void fastcall free_hot_cold_page(struct page *page, int cold); @@ -1831,6 +1832,24 @@ inline void setup_pageset(struct per_cpu_pageset *p, unsigned long batch) INIT_LIST_HEAD(&pcp->list); } +/* + * setup_pagelist_highmark() sets the high water mark for hot per_cpu_pagelist + * to the value high for the pageset p. + */ + +static void setup_pagelist_highmark(struct per_cpu_pageset *p, + unsigned long high) +{ + struct per_cpu_pages *pcp; + + pcp = &p->pcp[0]; /* hot list */ + pcp->high = high; + pcp->batch = max(1UL, high/4); + if ((high/4) > (PAGE_SHIFT * 8)) + pcp->batch = PAGE_SHIFT * 8; +} + + #ifdef CONFIG_NUMA /* * Boot pageset table. One per cpu which is going to be used for all @@ -1868,6 +1887,10 @@ static int __devinit process_zones(int cpu) goto bad; setup_pageset(zone->pageset[cpu], zone_batchsize(zone)); + + if (percpu_pagelist_fraction) + setup_pagelist_highmark(zone_pcp(zone, cpu), + (zone->present_pages / percpu_pagelist_fraction)); } return 0; @@ -2567,6 +2590,32 @@ int lowmem_reserve_ratio_sysctl_handler(ctl_table *table, int write, return 0; } +/* + * percpu_pagelist_fraction - changes the pcp->high for each zone on each + * cpu. It is the fraction of total pages in each zone that a hot per cpu pagelist + * can have before it gets flushed back to buddy allocator. + */ + +int percpu_pagelist_fraction_sysctl_handler(ctl_table *table, int write, + struct file *file, void __user *buffer, size_t *length, loff_t *ppos) +{ + struct zone *zone; + unsigned int cpu; + int ret; + + ret = proc_dointvec_minmax(table, write, file, buffer, length, ppos); + if (!write || (ret == -EINVAL)) + return ret; + for_each_zone(zone) { + for_each_online_cpu(cpu) { + unsigned long high; + high = zone->present_pages / percpu_pagelist_fraction; + setup_pagelist_highmark(zone_pcp(zone, cpu), high); + } + } + return 0; +} + __initdata int hashdist = HASHDIST_DEFAULT; #ifdef CONFIG_NUMA -- cgit v1.2.3-71-gd317 From 15316ba81aee6775d6079fb46c66c801989e7d10 Mon Sep 17 00:00:00 2001 From: Christoph Lameter Date: Sun, 8 Jan 2006 01:00:43 -0800 Subject: [PATCH] add schedule_on_each_cpu() swap migration's isolate_lru_page() currently uses an IPI to notify other processors that the lru caches need to be drained if the page cannot be found on the LRU. The IPI interrupt may interrupt a processor that is just processing lru requests and cause a race condition. This patch introduces a new function run_on_each_cpu() that uses the keventd() to run the LRU draining on each processor. Processors disable preemption when dealing the LRU caches (these are per processor) and thus executing LRU draining from another process is safe. Thanks to Lee Schermerhorn for finding this race condition. Signed-off-by: Christoph Lameter Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/workqueue.h | 1 + kernel/workqueue.c | 19 +++++++++++++++++++ 2 files changed, 20 insertions(+) (limited to 'include/linux') diff --git a/include/linux/workqueue.h b/include/linux/workqueue.h index ac39d04d027c..86b111300231 100644 --- a/include/linux/workqueue.h +++ b/include/linux/workqueue.h @@ -65,6 +65,7 @@ extern int FASTCALL(schedule_work(struct work_struct *work)); extern int FASTCALL(schedule_delayed_work(struct work_struct *work, unsigned long delay)); extern int schedule_delayed_work_on(int cpu, struct work_struct *work, unsigned long delay); +extern int schedule_on_each_cpu(void (*func)(void *info), void *info); extern void flush_scheduled_work(void); extern int current_is_keventd(void); extern int keventd_up(void); diff --git a/kernel/workqueue.c b/kernel/workqueue.c index 2bd5aee1c736..62d47220696a 100644 --- a/kernel/workqueue.c +++ b/kernel/workqueue.c @@ -419,6 +419,25 @@ int schedule_delayed_work_on(int cpu, return ret; } +int schedule_on_each_cpu(void (*func) (void *info), void *info) +{ + int cpu; + struct work_struct *work; + + work = kmalloc(NR_CPUS * sizeof(struct work_struct), GFP_KERNEL); + + if (!work) + return -ENOMEM; + for_each_online_cpu(cpu) { + INIT_WORK(work + cpu, func, info); + __queue_work(per_cpu_ptr(keventd_wq->cpu_wq, cpu), + work + cpu); + } + flush_workqueue(keventd_wq); + kfree(work); + return 0; +} + void flush_scheduled_work(void) { flush_workqueue(keventd_wq); -- cgit v1.2.3-71-gd317 From 21eac81f252fe31c3cf64b805a1e8652192f3a3b Mon Sep 17 00:00:00 2001 From: Christoph Lameter Date: Sun, 8 Jan 2006 01:00:45 -0800 Subject: [PATCH] Swap Migration V5: LRU operations This is the start of the `swap migration' patch series. Swap migration allows the moving of the physical location of pages between nodes in a numa system while the process is running. This means that the virtual addresses that the process sees do not change. However, the system rearranges the physical location of those pages. The main intent of page migration patches here is to reduce the latency of memory access by moving pages near to the processor where the process accessing that memory is running. The patchset allows a process to manually relocate the node on which its pages are located through the MF_MOVE and MF_MOVE_ALL options while setting a new memory policy. The pages of process can also be relocated from another process using the sys_migrate_pages() function call. Requires CAP_SYS_ADMIN. The migrate_pages function call takes two sets of nodes and moves pages of a process that are located on the from nodes to the destination nodes. Manual migration is very useful if for example the scheduler has relocated a process to a processor on a distant node. A batch scheduler or an administrator can detect the situation and move the pages of the process nearer to the new processor. sys_migrate_pages() could be used on non-numa machines as well, to force all of a particualr process's pages out to swap, if someone thinks that's useful. Larger installations usually partition the system using cpusets into sections of nodes. Paul has equipped cpusets with the ability to move pages when a task is moved to another cpuset. This allows automatic control over locality of a process. If a task is moved to a new cpuset then also all its pages are moved with it so that the performance of the process does not sink dramatically (as is the case today). Swap migration works by simply evicting the page. The pages must be faulted back in. The pages are then typically reallocated by the system near the node where the process is executing. For swap migration the destination of the move is controlled by the allocation policy. Cpusets set the allocation policy before calling sys_migrate_pages() in order to move the pages as intended. No allocation policy changes are performed for sys_migrate_pages(). This means that the pages may not faulted in to the specified nodes if no allocation policy was set by other means. The pages will just end up near the node where the fault occurred. There's another patch series in the pipeline which implements "direct migration". The direct migration patchset extends the migration functionality to avoid going through swap. The destination node of the relation is controllable during the actual moving of pages. The crutch of using the allocation policy to relocate is not necessary and the pages are moved directly to the target. Its also faster since swap is not used. And sys_migrate_pages() can then move pages directly to the specified node. Implement functions to isolate pages from the LRU and put them back later. This patch: An earlier implementation was provided by Hirokazu Takahashi and IWAMOTO Toshihiro for the memory hotplug project. From: Magnus This breaks out isolate_lru_page() and putpack_lru_page(). Needed for swap migration. Signed-off-by: Magnus Damm Signed-off-by: Christoph Lameter Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/mm_inline.h | 22 ++++++++++ include/linux/swap.h | 3 ++ mm/vmscan.c | 100 ++++++++++++++++++++++++++++++++++++++++------ 3 files changed, 112 insertions(+), 13 deletions(-) (limited to 'include/linux') diff --git a/include/linux/mm_inline.h b/include/linux/mm_inline.h index 47762ca695a5..49cc68af01f8 100644 --- a/include/linux/mm_inline.h +++ b/include/linux/mm_inline.h @@ -38,3 +38,25 @@ del_page_from_lru(struct zone *zone, struct page *page) zone->nr_inactive--; } } + +/* + * Isolate one page from the LRU lists. + * + * - zone->lru_lock must be held + */ +static inline int __isolate_lru_page(struct page *page) +{ + if (unlikely(!TestClearPageLRU(page))) + return 0; + + if (get_page_testone(page)) { + /* + * It is being freed elsewhere + */ + __put_page(page); + SetPageLRU(page); + return -ENOENT; + } + + return 1; +} diff --git a/include/linux/swap.h b/include/linux/swap.h index 556617bcf7ac..a49112536c02 100644 --- a/include/linux/swap.h +++ b/include/linux/swap.h @@ -175,6 +175,9 @@ extern int try_to_free_pages(struct zone **, gfp_t); extern int shrink_all_memory(int); extern int vm_swappiness; +extern int isolate_lru_page(struct page *p); +extern int putback_lru_pages(struct list_head *l); + #ifdef CONFIG_MMU /* linux/mm/shmem.c */ extern int shmem_unuse(swp_entry_t entry, struct page *page); diff --git a/mm/vmscan.c b/mm/vmscan.c index 428c5801d4b4..261a56ee11b6 100644 --- a/mm/vmscan.c +++ b/mm/vmscan.c @@ -593,20 +593,18 @@ static int isolate_lru_pages(int nr_to_scan, struct list_head *src, page = lru_to_page(src); prefetchw_prev_lru_page(page, src, flags); - if (!TestClearPageLRU(page)) - BUG(); - list_del(&page->lru); - if (get_page_testone(page)) { - /* - * It is being freed elsewhere - */ - __put_page(page); - SetPageLRU(page); - list_add(&page->lru, src); - continue; - } else { - list_add(&page->lru, dst); + switch (__isolate_lru_page(page)) { + case 1: + /* Succeeded to isolate page */ + list_move(&page->lru, dst); nr_taken++; + break; + case -ENOENT: + /* Not possible to isolate */ + list_move(&page->lru, src); + break; + default: + BUG(); } } @@ -614,6 +612,48 @@ static int isolate_lru_pages(int nr_to_scan, struct list_head *src, return nr_taken; } +static void lru_add_drain_per_cpu(void *dummy) +{ + lru_add_drain(); +} + +/* + * Isolate one page from the LRU lists and put it on the + * indicated list. Do necessary cache draining if the + * page is not on the LRU lists yet. + * + * Result: + * 0 = page not on LRU list + * 1 = page removed from LRU list and added to the specified list. + * -ENOENT = page is being freed elsewhere. + */ +int isolate_lru_page(struct page *page) +{ + int rc = 0; + struct zone *zone = page_zone(page); + +redo: + spin_lock_irq(&zone->lru_lock); + rc = __isolate_lru_page(page); + if (rc == 1) { + if (PageActive(page)) + del_page_from_active_list(zone, page); + else + del_page_from_inactive_list(zone, page); + } + spin_unlock_irq(&zone->lru_lock); + if (rc == 0) { + /* + * Maybe this page is still waiting for a cpu to drain it + * from one of the lru lists? + */ + rc = schedule_on_each_cpu(lru_add_drain_per_cpu, NULL); + if (rc == 0 && PageLRU(page)) + goto redo; + } + return rc; +} + /* * shrink_cache() adds the number of pages reclaimed to sc->nr_reclaimed */ @@ -679,6 +719,40 @@ done: pagevec_release(&pvec); } +static inline void move_to_lru(struct page *page) +{ + list_del(&page->lru); + if (PageActive(page)) { + /* + * lru_cache_add_active checks that + * the PG_active bit is off. + */ + ClearPageActive(page); + lru_cache_add_active(page); + } else { + lru_cache_add(page); + } + put_page(page); +} + +/* + * Add isolated pages on the list back to the LRU + * + * returns the number of pages put back. + */ +int putback_lru_pages(struct list_head *l) +{ + struct page *page; + struct page *page2; + int count = 0; + + list_for_each_entry_safe(page, page2, l, lru) { + move_to_lru(page); + count++; + } + return count; +} + /* * This moves pages from the active list to the inactive list. * -- cgit v1.2.3-71-gd317 From 930d915252edda7042c944ed3c30194a2f9fe163 Mon Sep 17 00:00:00 2001 From: Christoph Lameter Date: Sun, 8 Jan 2006 01:00:47 -0800 Subject: [PATCH] Swap Migration V5: PF_SWAPWRITE to allow writing to swap Add PF_SWAPWRITE to control a processes permission to write to swap. - Use PF_SWAPWRITE in may_write_to_queue() instead of checking for kswapd and pdflush - Set PF_SWAPWRITE flag for kswapd and pdflush Signed-off-by: Christoph Lameter Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/sched.h | 1 + mm/pdflush.c | 2 +- mm/vmscan.c | 6 ++---- 3 files changed, 4 insertions(+), 5 deletions(-) (limited to 'include/linux') diff --git a/include/linux/sched.h b/include/linux/sched.h index 7da33619d5d0..a74662077d60 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h @@ -895,6 +895,7 @@ do { if (atomic_dec_and_test(&(tsk)->usage)) __put_task_struct(tsk); } while(0) #define PF_SYNCWRITE 0x00200000 /* I am doing a sync write */ #define PF_BORROWED_MM 0x00400000 /* I am a kthread doing use_mm */ #define PF_RANDOMIZE 0x00800000 /* randomize virtual address space */ +#define PF_SWAPWRITE 0x01000000 /* Allowed to write to swap */ /* * Only the _current_ task can read/write to tsk->flags, but other diff --git a/mm/pdflush.c b/mm/pdflush.c index 52822c98c489..c4b6d0afd736 100644 --- a/mm/pdflush.c +++ b/mm/pdflush.c @@ -90,7 +90,7 @@ struct pdflush_work { static int __pdflush(struct pdflush_work *my_work) { - current->flags |= PF_FLUSHER; + current->flags |= PF_FLUSHER | PF_SWAPWRITE; my_work->fn = NULL; my_work->who = current; INIT_LIST_HEAD(&my_work->list); diff --git a/mm/vmscan.c b/mm/vmscan.c index 261a56ee11b6..6c30a8c59795 100644 --- a/mm/vmscan.c +++ b/mm/vmscan.c @@ -268,9 +268,7 @@ static inline int is_page_cache_freeable(struct page *page) static int may_write_to_queue(struct backing_dev_info *bdi) { - if (current_is_kswapd()) - return 1; - if (current_is_pdflush()) /* This is unlikely, but why not... */ + if (current->flags & PF_SWAPWRITE) return 1; if (!bdi_write_congested(bdi)) return 1; @@ -1299,7 +1297,7 @@ static int kswapd(void *p) * us from recursively trying to free more memory as we're * trying to free the first piece of memory in the first place). */ - tsk->flags |= PF_MEMALLOC|PF_KSWAPD; + tsk->flags |= PF_MEMALLOC | PF_SWAPWRITE | PF_KSWAPD; order = 0; for ( ; ; ) { -- cgit v1.2.3-71-gd317 From 49d2e9cc4544369635cd6f4ef6d5bb0f757079a7 Mon Sep 17 00:00:00 2001 From: Christoph Lameter Date: Sun, 8 Jan 2006 01:00:48 -0800 Subject: [PATCH] Swap Migration V5: migrate_pages() function This adds the basic page migration function with a minimal implementation that only allows the eviction of pages to swap space. Page eviction and migration may be useful to migrate pages, to suspend programs or for remapping single pages (useful for faulty pages or pages with soft ECC failures) The process is as follows: The function wanting to migrate pages must first build a list of pages to be migrated or evicted and take them off the lru lists via isolate_lru_page(). isolate_lru_page determines that a page is freeable based on the LRU bit set. Then the actual migration or swapout can happen by calling migrate_pages(). migrate_pages does its best to migrate or swapout the pages and does multiple passes over the list. Some pages may only be swappable if they are not dirty. migrate_pages may start writing out dirty pages in the initial passes over the pages. However, migrate_pages may not be able to migrate or evict all pages for a variety of reasons. The remaining pages may be returned to the LRU lists using putback_lru_pages(). Changelog V4->V5: - Use the lru caches to return pages to the LRU Changelog V3->V4: - Restructure code so that applying patches to support full migration does require minimal changes. Rename swapout_pages() to migrate_pages(). Changelog V2->V3: - Extract common code from shrink_list() and swapout_pages() Signed-off-by: Mike Kravetz Signed-off-by: Christoph Lameter Cc: "Michael Kerrisk" Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/swap.h | 2 + mm/vmscan.c | 214 +++++++++++++++++++++++++++++++++++++++++++-------- 2 files changed, 182 insertions(+), 34 deletions(-) (limited to 'include/linux') diff --git a/include/linux/swap.h b/include/linux/swap.h index a49112536c02..893096e67bdb 100644 --- a/include/linux/swap.h +++ b/include/linux/swap.h @@ -178,6 +178,8 @@ extern int vm_swappiness; extern int isolate_lru_page(struct page *p); extern int putback_lru_pages(struct list_head *l); +extern int migrate_pages(struct list_head *l, struct list_head *t); + #ifdef CONFIG_MMU /* linux/mm/shmem.c */ extern int shmem_unuse(swp_entry_t entry, struct page *page); diff --git a/mm/vmscan.c b/mm/vmscan.c index 6c30a8c59795..a537a7f16357 100644 --- a/mm/vmscan.c +++ b/mm/vmscan.c @@ -373,6 +373,43 @@ static pageout_t pageout(struct page *page, struct address_space *mapping) return PAGE_CLEAN; } +static int remove_mapping(struct address_space *mapping, struct page *page) +{ + if (!mapping) + return 0; /* truncate got there first */ + + write_lock_irq(&mapping->tree_lock); + + /* + * The non-racy check for busy page. It is critical to check + * PageDirty _after_ making sure that the page is freeable and + * not in use by anybody. (pagecache + us == 2) + */ + if (unlikely(page_count(page) != 2)) + goto cannot_free; + smp_rmb(); + if (unlikely(PageDirty(page))) + goto cannot_free; + + if (PageSwapCache(page)) { + swp_entry_t swap = { .val = page_private(page) }; + __delete_from_swap_cache(page); + write_unlock_irq(&mapping->tree_lock); + swap_free(swap); + __put_page(page); /* The pagecache ref */ + return 1; + } + + __remove_from_page_cache(page); + write_unlock_irq(&mapping->tree_lock); + __put_page(page); + return 1; + +cannot_free: + write_unlock_irq(&mapping->tree_lock); + return 0; +} + /* * shrink_list adds the number of reclaimed pages to sc->nr_reclaimed */ @@ -504,36 +541,8 @@ static int shrink_list(struct list_head *page_list, struct scan_control *sc) goto free_it; } - if (!mapping) - goto keep_locked; /* truncate got there first */ - - write_lock_irq(&mapping->tree_lock); - - /* - * The non-racy check for busy page. It is critical to check - * PageDirty _after_ making sure that the page is freeable and - * not in use by anybody. (pagecache + us == 2) - */ - if (unlikely(page_count(page) != 2)) - goto cannot_free; - smp_rmb(); - if (unlikely(PageDirty(page))) - goto cannot_free; - -#ifdef CONFIG_SWAP - if (PageSwapCache(page)) { - swp_entry_t swap = { .val = page_private(page) }; - __delete_from_swap_cache(page); - write_unlock_irq(&mapping->tree_lock); - swap_free(swap); - __put_page(page); /* The pagecache ref */ - goto free_it; - } -#endif /* CONFIG_SWAP */ - - __remove_from_page_cache(page); - write_unlock_irq(&mapping->tree_lock); - __put_page(page); + if (!remove_mapping(mapping, page)) + goto keep_locked; free_it: unlock_page(page); @@ -542,10 +551,6 @@ free_it: __pagevec_release_nonlru(&freed_pvec); continue; -cannot_free: - write_unlock_irq(&mapping->tree_lock); - goto keep_locked; - activate_locked: SetPageActive(page); pgactivate++; @@ -563,6 +568,147 @@ keep: return reclaimed; } +/* + * swapout a single page + * page is locked upon entry, unlocked on exit + * + * return codes: + * 0 = complete + * 1 = retry + */ +static int swap_page(struct page *page) +{ + struct address_space *mapping = page_mapping(page); + + if (page_mapped(page) && mapping) + if (try_to_unmap(page) != SWAP_SUCCESS) + goto unlock_retry; + + if (PageDirty(page)) { + /* Page is dirty, try to write it out here */ + switch(pageout(page, mapping)) { + case PAGE_KEEP: + case PAGE_ACTIVATE: + goto unlock_retry; + + case PAGE_SUCCESS: + goto retry; + + case PAGE_CLEAN: + ; /* try to free the page below */ + } + } + + if (PagePrivate(page)) { + if (!try_to_release_page(page, GFP_KERNEL) || + (!mapping && page_count(page) == 1)) + goto unlock_retry; + } + + if (remove_mapping(mapping, page)) { + /* Success */ + unlock_page(page); + return 0; + } + +unlock_retry: + unlock_page(page); + +retry: + return 1; +} +/* + * migrate_pages + * + * Two lists are passed to this function. The first list + * contains the pages isolated from the LRU to be migrated. + * The second list contains new pages that the pages isolated + * can be moved to. If the second list is NULL then all + * pages are swapped out. + * + * The function returns after 10 attempts or if no pages + * are movable anymore because t has become empty + * or no retryable pages exist anymore. + * + * SIMPLIFIED VERSION: This implementation of migrate_pages + * is only swapping out pages and never touches the second + * list. The direct migration patchset + * extends this function to avoid the use of swap. + */ +int migrate_pages(struct list_head *l, struct list_head *t) +{ + int retry; + LIST_HEAD(failed); + int nr_failed = 0; + int pass = 0; + struct page *page; + struct page *page2; + int swapwrite = current->flags & PF_SWAPWRITE; + + if (!swapwrite) + current->flags |= PF_SWAPWRITE; + +redo: + retry = 0; + + list_for_each_entry_safe(page, page2, l, lru) { + cond_resched(); + + /* + * Skip locked pages during the first two passes to give the + * functions holding the lock time to release the page. Later we use + * lock_page to have a higher chance of acquiring the lock. + */ + if (pass > 2) + lock_page(page); + else + if (TestSetPageLocked(page)) + goto retry_later; + + /* + * Only wait on writeback if we have already done a pass where + * we we may have triggered writeouts for lots of pages. + */ + if (pass > 0) + wait_on_page_writeback(page); + else + if (PageWriteback(page)) { + unlock_page(page); + goto retry_later; + } + +#ifdef CONFIG_SWAP + if (PageAnon(page) && !PageSwapCache(page)) { + if (!add_to_swap(page)) { + unlock_page(page); + list_move(&page->lru, &failed); + nr_failed++; + continue; + } + } +#endif /* CONFIG_SWAP */ + + /* + * Page is properly locked and writeback is complete. + * Try to migrate the page. + */ + if (swap_page(page)) { +retry_later: + retry++; + } + } + if (retry && pass++ < 10) + goto redo; + + if (!swapwrite) + current->flags &= ~PF_SWAPWRITE; + + if (!list_empty(&failed)) + list_splice(&failed, l); + + return nr_failed + retry; +} + /* * zone->lru_lock is heavily contended. Some of the functions that * shrink the lists perform better by taking out a batch of pages -- cgit v1.2.3-71-gd317 From 7cbe34cf86c673503b177ff47cfa2c7030dabb50 Mon Sep 17 00:00:00 2001 From: Christoph Lameter Date: Sun, 8 Jan 2006 01:00:49 -0800 Subject: [PATCH] Swap Migration V5: Add CONFIG_MIGRATION for page migration support Include page migration if the system is NUMA or having a memory model that allows distinct areas of memory (SPARSEMEM, DISCONTIGMEM). And: - Only include lru_add_drain_per_cpu if building for an SMP system. Signed-off-by: Christoph Lameter Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/swap.h | 2 ++ mm/Kconfig | 7 +++++++ mm/vmscan.c | 20 +++++++++++--------- 3 files changed, 20 insertions(+), 9 deletions(-) (limited to 'include/linux') diff --git a/include/linux/swap.h b/include/linux/swap.h index 893096e67bdb..117add066f00 100644 --- a/include/linux/swap.h +++ b/include/linux/swap.h @@ -178,7 +178,9 @@ extern int vm_swappiness; extern int isolate_lru_page(struct page *p); extern int putback_lru_pages(struct list_head *l); +#ifdef CONFIG_MIGRATION extern int migrate_pages(struct list_head *l, struct list_head *t); +#endif #ifdef CONFIG_MMU /* linux/mm/shmem.c */ diff --git a/mm/Kconfig b/mm/Kconfig index b3db11f137e0..a9cb80ae6409 100644 --- a/mm/Kconfig +++ b/mm/Kconfig @@ -132,3 +132,10 @@ config SPLIT_PTLOCK_CPUS default "4096" if ARM && !CPU_CACHE_VIPT default "4096" if PARISC && !PA20 default "4" + +# +# support for page migration +# +config MIGRATION + def_bool y if NUMA || SPARSEMEM || DISCONTIGMEM + depends on SWAP diff --git a/mm/vmscan.c b/mm/vmscan.c index a537a7f16357..58270aea669a 100644 --- a/mm/vmscan.c +++ b/mm/vmscan.c @@ -568,6 +568,7 @@ keep: return reclaimed; } +#ifdef CONFIG_MIGRATION /* * swapout a single page * page is locked upon entry, unlocked on exit @@ -656,8 +657,9 @@ redo: /* * Skip locked pages during the first two passes to give the - * functions holding the lock time to release the page. Later we use - * lock_page to have a higher chance of acquiring the lock. + * functions holding the lock time to release the page. Later we + * use lock_page() to have a higher chance of acquiring the + * lock. */ if (pass > 2) lock_page(page); @@ -669,15 +671,15 @@ redo: * Only wait on writeback if we have already done a pass where * we we may have triggered writeouts for lots of pages. */ - if (pass > 0) + if (pass > 0) { wait_on_page_writeback(page); - else + } else { if (PageWriteback(page)) { unlock_page(page); goto retry_later; } + } -#ifdef CONFIG_SWAP if (PageAnon(page) && !PageSwapCache(page)) { if (!add_to_swap(page)) { unlock_page(page); @@ -686,16 +688,15 @@ redo: continue; } } -#endif /* CONFIG_SWAP */ /* * Page is properly locked and writeback is complete. * Try to migrate the page. */ - if (swap_page(page)) { + if (!swap_page(page)) + continue; retry_later: - retry++; - } + retry++; } if (retry && pass++ < 10) goto redo; @@ -708,6 +709,7 @@ retry_later: return nr_failed + retry; } +#endif /* * zone->lru_lock is heavily contended. Some of the functions that -- cgit v1.2.3-71-gd317 From dc9aa5b9d65fd11b1f5246b46ec610ee8b83c6dd Mon Sep 17 00:00:00 2001 From: Christoph Lameter Date: Sun, 8 Jan 2006 01:00:50 -0800 Subject: [PATCH] Swap Migration V5: MPOL_MF_MOVE interface Add page migration support via swap to the NUMA policy layer This patch adds page migration support to the NUMA policy layer. An additional flag MPOL_MF_MOVE is introduced for mbind. If MPOL_MF_MOVE is specified then pages that do not conform to the memory policy will be evicted from memory. When they get pages back in new pages will be allocated following the numa policy. Signed-off-by: Christoph Lameter Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/mempolicy.h | 3 + mm/mempolicy.c | 155 ++++++++++++++++++++++++++++++++++++++++------ 2 files changed, 138 insertions(+), 20 deletions(-) (limited to 'include/linux') diff --git a/include/linux/mempolicy.h b/include/linux/mempolicy.h index ed00b278cb93..05443a766cb8 100644 --- a/include/linux/mempolicy.h +++ b/include/linux/mempolicy.h @@ -22,6 +22,9 @@ /* Flags for mbind */ #define MPOL_MF_STRICT (1<<0) /* Verify existing pages in the mapping */ +#define MPOL_MF_MOVE (1<<1) /* Move pages owned by this process to conform to mapping */ +#define MPOL_MF_MOVE_ALL (1<<2) /* Move every page to conform to mapping */ +#define MPOL_MF_INTERNAL (1<<3) /* Internal flags start here */ #ifdef __KERNEL__ diff --git a/mm/mempolicy.c b/mm/mempolicy.c index 0f1d2b8a952b..9cc6d962831d 100644 --- a/mm/mempolicy.c +++ b/mm/mempolicy.c @@ -83,9 +83,14 @@ #include #include #include +#include + #include #include +/* Internal MPOL_MF_xxx flags */ +#define MPOL_MF_DISCONTIG_OK (MPOL_MF_INTERNAL << 0) /* Skip checks for continuous vmas */ + static kmem_cache_t *policy_cache; static kmem_cache_t *sn_cache; @@ -174,9 +179,59 @@ static struct mempolicy *mpol_new(int mode, nodemask_t *nodes) return policy; } +/* Check if we are the only process mapping the page in question */ +static inline int single_mm_mapping(struct mm_struct *mm, + struct address_space *mapping) +{ + struct vm_area_struct *vma; + struct prio_tree_iter iter; + int rc = 1; + + spin_lock(&mapping->i_mmap_lock); + vma_prio_tree_foreach(vma, &iter, &mapping->i_mmap, 0, ULONG_MAX) + if (mm != vma->vm_mm) { + rc = 0; + goto out; + } + list_for_each_entry(vma, &mapping->i_mmap_nonlinear, shared.vm_set.list) + if (mm != vma->vm_mm) { + rc = 0; + goto out; + } +out: + spin_unlock(&mapping->i_mmap_lock); + return rc; +} + +/* + * Add a page to be migrated to the pagelist + */ +static void migrate_page_add(struct vm_area_struct *vma, + struct page *page, struct list_head *pagelist, unsigned long flags) +{ + /* + * Avoid migrating a page that is shared by others and not writable. + */ + if ((flags & MPOL_MF_MOVE_ALL) || !page->mapping || PageAnon(page) || + mapping_writably_mapped(page->mapping) || + single_mm_mapping(vma->vm_mm, page->mapping)) { + int rc = isolate_lru_page(page); + + if (rc == 1) + list_add(&page->lru, pagelist); + /* + * If the isolate attempt was not successful then we just + * encountered an unswappable page. Something must be wrong. + */ + WARN_ON(rc == 0); + } +} + /* Ensure all existing pages follow the policy. */ static int check_pte_range(struct vm_area_struct *vma, pmd_t *pmd, - unsigned long addr, unsigned long end, nodemask_t *nodes) + unsigned long addr, unsigned long end, + const nodemask_t *nodes, unsigned long flags, + struct list_head *pagelist) { pte_t *orig_pte; pte_t *pte; @@ -193,15 +248,21 @@ static int check_pte_range(struct vm_area_struct *vma, pmd_t *pmd, if (!page) continue; nid = page_to_nid(page); - if (!node_isset(nid, *nodes)) - break; + if (!node_isset(nid, *nodes)) { + if (pagelist) + migrate_page_add(vma, page, pagelist, flags); + else + break; + } } while (pte++, addr += PAGE_SIZE, addr != end); pte_unmap_unlock(orig_pte, ptl); return addr != end; } static inline int check_pmd_range(struct vm_area_struct *vma, pud_t *pud, - unsigned long addr, unsigned long end, nodemask_t *nodes) + unsigned long addr, unsigned long end, + const nodemask_t *nodes, unsigned long flags, + struct list_head *pagelist) { pmd_t *pmd; unsigned long next; @@ -211,14 +272,17 @@ static inline int check_pmd_range(struct vm_area_struct *vma, pud_t *pud, next = pmd_addr_end(addr, end); if (pmd_none_or_clear_bad(pmd)) continue; - if (check_pte_range(vma, pmd, addr, next, nodes)) + if (check_pte_range(vma, pmd, addr, next, nodes, + flags, pagelist)) return -EIO; } while (pmd++, addr = next, addr != end); return 0; } static inline int check_pud_range(struct vm_area_struct *vma, pgd_t *pgd, - unsigned long addr, unsigned long end, nodemask_t *nodes) + unsigned long addr, unsigned long end, + const nodemask_t *nodes, unsigned long flags, + struct list_head *pagelist) { pud_t *pud; unsigned long next; @@ -228,14 +292,17 @@ static inline int check_pud_range(struct vm_area_struct *vma, pgd_t *pgd, next = pud_addr_end(addr, end); if (pud_none_or_clear_bad(pud)) continue; - if (check_pmd_range(vma, pud, addr, next, nodes)) + if (check_pmd_range(vma, pud, addr, next, nodes, + flags, pagelist)) return -EIO; } while (pud++, addr = next, addr != end); return 0; } static inline int check_pgd_range(struct vm_area_struct *vma, - unsigned long addr, unsigned long end, nodemask_t *nodes) + unsigned long addr, unsigned long end, + const nodemask_t *nodes, unsigned long flags, + struct list_head *pagelist) { pgd_t *pgd; unsigned long next; @@ -245,16 +312,31 @@ static inline int check_pgd_range(struct vm_area_struct *vma, next = pgd_addr_end(addr, end); if (pgd_none_or_clear_bad(pgd)) continue; - if (check_pud_range(vma, pgd, addr, next, nodes)) + if (check_pud_range(vma, pgd, addr, next, nodes, + flags, pagelist)) return -EIO; } while (pgd++, addr = next, addr != end); return 0; } -/* Step 1: check the range */ +/* Check if a vma is migratable */ +static inline int vma_migratable(struct vm_area_struct *vma) +{ + if (vma->vm_flags & ( + VM_LOCKED|VM_IO|VM_HUGETLB|VM_PFNMAP)) + return 0; + return 1; +} + +/* + * Check if all pages in a range are on a set of nodes. + * If pagelist != NULL then isolate pages from the LRU and + * put them on the pagelist. + */ static struct vm_area_struct * check_range(struct mm_struct *mm, unsigned long start, unsigned long end, - nodemask_t *nodes, unsigned long flags) + const nodemask_t *nodes, unsigned long flags, + struct list_head *pagelist) { int err; struct vm_area_struct *first, *vma, *prev; @@ -264,17 +346,24 @@ check_range(struct mm_struct *mm, unsigned long start, unsigned long end, return ERR_PTR(-EFAULT); prev = NULL; for (vma = first; vma && vma->vm_start < end; vma = vma->vm_next) { - if (!vma->vm_next && vma->vm_end < end) - return ERR_PTR(-EFAULT); - if (prev && prev->vm_end < vma->vm_start) - return ERR_PTR(-EFAULT); - if ((flags & MPOL_MF_STRICT) && !is_vm_hugetlb_page(vma)) { + if (!(flags & MPOL_MF_DISCONTIG_OK)) { + if (!vma->vm_next && vma->vm_end < end) + return ERR_PTR(-EFAULT); + if (prev && prev->vm_end < vma->vm_start) + return ERR_PTR(-EFAULT); + } + if (!is_vm_hugetlb_page(vma) && + ((flags & MPOL_MF_STRICT) || + ((flags & (MPOL_MF_MOVE | MPOL_MF_MOVE_ALL)) && + vma_migratable(vma)))) { unsigned long endvma = vma->vm_end; + if (endvma > end) endvma = end; if (vma->vm_start > start) start = vma->vm_start; - err = check_pgd_range(vma, start, endvma, nodes); + err = check_pgd_range(vma, start, endvma, nodes, + flags, pagelist); if (err) { first = ERR_PTR(err); break; @@ -348,33 +437,59 @@ long do_mbind(unsigned long start, unsigned long len, struct mempolicy *new; unsigned long end; int err; + LIST_HEAD(pagelist); - if ((flags & ~(unsigned long)(MPOL_MF_STRICT)) || mode > MPOL_MAX) + if ((flags & ~(unsigned long)(MPOL_MF_STRICT|MPOL_MF_MOVE|MPOL_MF_MOVE_ALL)) + || mode > MPOL_MAX) return -EINVAL; + if ((flags & MPOL_MF_MOVE_ALL) && !capable(CAP_SYS_RESOURCE)) + return -EPERM; + if (start & ~PAGE_MASK) return -EINVAL; + if (mode == MPOL_DEFAULT) flags &= ~MPOL_MF_STRICT; + len = (len + PAGE_SIZE - 1) & PAGE_MASK; end = start + len; + if (end < start) return -EINVAL; if (end == start) return 0; + if (mpol_check_policy(mode, nmask)) return -EINVAL; + new = mpol_new(mode, nmask); if (IS_ERR(new)) return PTR_ERR(new); + /* + * If we are using the default policy then operation + * on discontinuous address spaces is okay after all + */ + if (!new) + flags |= MPOL_MF_DISCONTIG_OK; + PDprintk("mbind %lx-%lx mode:%ld nodes:%lx\n",start,start+len, mode,nodes_addr(nodes)[0]); down_write(&mm->mmap_sem); - vma = check_range(mm, start, end, nmask, flags); + vma = check_range(mm, start, end, nmask, flags, + (flags & (MPOL_MF_MOVE | MPOL_MF_MOVE_ALL)) ? &pagelist : NULL); err = PTR_ERR(vma); - if (!IS_ERR(vma)) + if (!IS_ERR(vma)) { err = mbind_range(vma, start, end, new); + if (!list_empty(&pagelist)) + migrate_pages(&pagelist, NULL); + if (!err && !list_empty(&pagelist) && (flags & MPOL_MF_STRICT)) + err = -EIO; + } + if (!list_empty(&pagelist)) + putback_lru_pages(&pagelist); + up_write(&mm->mmap_sem); mpol_free(new); return err; -- cgit v1.2.3-71-gd317 From 39743889aaf76725152f16aa90ca3c45f6d52da3 Mon Sep 17 00:00:00 2001 From: Christoph Lameter Date: Sun, 8 Jan 2006 01:00:51 -0800 Subject: [PATCH] Swap Migration V5: sys_migrate_pages interface sys_migrate_pages implementation using swap based page migration This is the original API proposed by Ray Bryant in his posts during the first half of 2005 on linux-mm@kvack.org and linux-kernel@vger.kernel.org. The intent of sys_migrate is to migrate memory of a process. A process may have migrated to another node. Memory was allocated optimally for the prior context. sys_migrate_pages allows to shift the memory to the new node. sys_migrate_pages is also useful if the processes available memory nodes have changed through cpuset operations to manually move the processes memory. Paul Jackson is working on an automated mechanism that will allow an automatic migration if the cpuset of a process is changed. However, a user may decide to manually control the migration. This implementation is put into the policy layer since it uses concepts and functions that are also needed for mbind and friends. The patch also provides a do_migrate_pages function that may be useful for cpusets to automatically move memory. sys_migrate_pages does not modify policies in contrast to Ray's implementation. The current code here is based on the swap based page migration capability and thus is not able to preserve the physical layout relative to it containing nodeset (which may be a cpuset). When direct page migration becomes available then the implementation needs to be changed to do a isomorphic move of pages between different nodesets. The current implementation simply evicts all pages in source nodeset that are not in the target nodeset. Patch supports ia64, i386 and x86_64. Signed-off-by: Christoph Lameter Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- arch/i386/kernel/syscall_table.S | 1 + arch/ia64/kernel/entry.S | 1 + arch/x86_64/ia32/ia32entry.S | 1 + include/asm-i386/unistd.h | 3 +- include/asm-ia64/unistd.h | 3 +- include/asm-x86_64/ia32_unistd.h | 3 +- include/asm-x86_64/unistd.h | 4 +- include/linux/mempolicy.h | 3 ++ include/linux/syscalls.h | 2 + kernel/sys_ni.c | 1 + mm/mempolicy.c | 94 +++++++++++++++++++++++++++++++++++++++- 11 files changed, 111 insertions(+), 5 deletions(-) (limited to 'include/linux') diff --git a/arch/i386/kernel/syscall_table.S b/arch/i386/kernel/syscall_table.S index f7ba4acc20ec..6ff3e5243226 100644 --- a/arch/i386/kernel/syscall_table.S +++ b/arch/i386/kernel/syscall_table.S @@ -293,3 +293,4 @@ ENTRY(sys_call_table) .long sys_inotify_init .long sys_inotify_add_watch .long sys_inotify_rm_watch + .long sys_migrate_pages diff --git a/arch/ia64/kernel/entry.S b/arch/ia64/kernel/entry.S index 0741b066b98f..7a6ffd613789 100644 --- a/arch/ia64/kernel/entry.S +++ b/arch/ia64/kernel/entry.S @@ -1600,5 +1600,6 @@ sys_call_table: data8 sys_inotify_init data8 sys_inotify_add_watch data8 sys_inotify_rm_watch + data8 sys_migrate_pages // 1280 .org sys_call_table + 8*NR_syscalls // guard against failures to increase NR_syscalls diff --git a/arch/x86_64/ia32/ia32entry.S b/arch/x86_64/ia32/ia32entry.S index df0773c9bdbe..1f0ff5adc80e 100644 --- a/arch/x86_64/ia32/ia32entry.S +++ b/arch/x86_64/ia32/ia32entry.S @@ -643,6 +643,7 @@ ia32_sys_call_table: .quad sys_inotify_init .quad sys_inotify_add_watch .quad sys_inotify_rm_watch + .quad sys_migrate_pages ia32_syscall_end: .rept IA32_NR_syscalls-(ia32_syscall_end-ia32_sys_call_table)/8 .quad ni_syscall diff --git a/include/asm-i386/unistd.h b/include/asm-i386/unistd.h index fe38b9a96233..481c3c0ea720 100644 --- a/include/asm-i386/unistd.h +++ b/include/asm-i386/unistd.h @@ -299,8 +299,9 @@ #define __NR_inotify_init 291 #define __NR_inotify_add_watch 292 #define __NR_inotify_rm_watch 293 +#define __NR_migrate_pages 294 -#define NR_syscalls 294 +#define NR_syscalls 295 /* * user-visible error numbers are in the range -1 - -128: see diff --git a/include/asm-ia64/unistd.h b/include/asm-ia64/unistd.h index 2bf543493cb8..962f9bd1bdff 100644 --- a/include/asm-ia64/unistd.h +++ b/include/asm-ia64/unistd.h @@ -269,12 +269,13 @@ #define __NR_inotify_init 1277 #define __NR_inotify_add_watch 1278 #define __NR_inotify_rm_watch 1279 +#define __NR_migrate_pages 1280 #ifdef __KERNEL__ #include -#define NR_syscalls 256 /* length of syscall table */ +#define NR_syscalls 270 /* length of syscall table */ #define __ARCH_WANT_SYS_RT_SIGACTION diff --git a/include/asm-x86_64/ia32_unistd.h b/include/asm-x86_64/ia32_unistd.h index d5166ec3868d..e8843362a6cc 100644 --- a/include/asm-x86_64/ia32_unistd.h +++ b/include/asm-x86_64/ia32_unistd.h @@ -299,7 +299,8 @@ #define __NR_ia32_inotify_init 291 #define __NR_ia32_inotify_add_watch 292 #define __NR_ia32_inotify_rm_watch 293 +#define __NR_ia32_migrate_pages 294 -#define IA32_NR_syscalls 294 /* must be > than biggest syscall! */ +#define IA32_NR_syscalls 295 /* must be > than biggest syscall! */ #endif /* _ASM_X86_64_IA32_UNISTD_H_ */ diff --git a/include/asm-x86_64/unistd.h b/include/asm-x86_64/unistd.h index 2c42150bce0c..e6f896161c11 100644 --- a/include/asm-x86_64/unistd.h +++ b/include/asm-x86_64/unistd.h @@ -571,8 +571,10 @@ __SYSCALL(__NR_inotify_init, sys_inotify_init) __SYSCALL(__NR_inotify_add_watch, sys_inotify_add_watch) #define __NR_inotify_rm_watch 255 __SYSCALL(__NR_inotify_rm_watch, sys_inotify_rm_watch) +#define __NR_migrate_pages 256 +__SYSCALL(__NR_migrate_pages, sys_migrate_pages) -#define __NR_syscall_max __NR_inotify_rm_watch +#define __NR_syscall_max __NR_migrate_pages #ifndef __NO_STUBS /* user-visible error numbers are in the range -1 - -4095 */ diff --git a/include/linux/mempolicy.h b/include/linux/mempolicy.h index 05443a766cb8..3e61e829681d 100644 --- a/include/linux/mempolicy.h +++ b/include/linux/mempolicy.h @@ -162,6 +162,9 @@ static inline void check_highest_zone(int k) policy_zone = k; } +int do_migrate_pages(struct mm_struct *mm, + const nodemask_t *from_nodes, const nodemask_t *to_nodes, int flags); + #else struct mempolicy {}; diff --git a/include/linux/syscalls.h b/include/linux/syscalls.h index c7007b1db91d..e910d1a481df 100644 --- a/include/linux/syscalls.h +++ b/include/linux/syscalls.h @@ -511,5 +511,7 @@ asmlinkage long sys_ioprio_set(int which, int who, int ioprio); asmlinkage long sys_ioprio_get(int which, int who); asmlinkage long sys_set_mempolicy(int mode, unsigned long __user *nmask, unsigned long maxnode); +asmlinkage long sys_migrate_pages(pid_t pid, unsigned long maxnode, + const unsigned long __user *from, const unsigned long __user *to); #endif diff --git a/kernel/sys_ni.c b/kernel/sys_ni.c index 1ab2370e2efa..7a8bc7f60d91 100644 --- a/kernel/sys_ni.c +++ b/kernel/sys_ni.c @@ -82,6 +82,7 @@ cond_syscall(compat_sys_socketcall); cond_syscall(sys_inotify_init); cond_syscall(sys_inotify_add_watch); cond_syscall(sys_inotify_rm_watch); +cond_syscall(sys_migrate_pages); /* arch-specific weak syscall entries */ cond_syscall(sys_pciconfig_read); diff --git a/mm/mempolicy.c b/mm/mempolicy.c index 9cc6d962831d..20d5ad39fa41 100644 --- a/mm/mempolicy.c +++ b/mm/mempolicy.c @@ -614,12 +614,42 @@ long do_get_mempolicy(int *policy, nodemask_t *nmask, return err; } +/* + * For now migrate_pages simply swaps out the pages from nodes that are in + * the source set but not in the target set. In the future, we would + * want a function that moves pages between the two nodesets in such + * a way as to preserve the physical layout as much as possible. + * + * Returns the number of page that could not be moved. + */ +int do_migrate_pages(struct mm_struct *mm, + const nodemask_t *from_nodes, const nodemask_t *to_nodes, int flags) +{ + LIST_HEAD(pagelist); + int count = 0; + nodemask_t nodes; + + nodes_andnot(nodes, *from_nodes, *to_nodes); + nodes_complement(nodes, nodes); + + down_read(&mm->mmap_sem); + check_range(mm, mm->mmap->vm_start, TASK_SIZE, &nodes, + flags | MPOL_MF_DISCONTIG_OK, &pagelist); + if (!list_empty(&pagelist)) { + migrate_pages(&pagelist, NULL); + if (!list_empty(&pagelist)) + count = putback_lru_pages(&pagelist); + } + up_read(&mm->mmap_sem); + return count; +} + /* * User space interface with variable sized bitmaps for nodelists. */ /* Copy a node mask from user space. */ -static int get_nodes(nodemask_t *nodes, unsigned long __user *nmask, +static int get_nodes(nodemask_t *nodes, const unsigned long __user *nmask, unsigned long maxnode) { unsigned long k; @@ -708,6 +738,68 @@ asmlinkage long sys_set_mempolicy(int mode, unsigned long __user *nmask, return do_set_mempolicy(mode, &nodes); } +/* Macro needed until Paul implements this function in kernel/cpusets.c */ +#define cpuset_mems_allowed(task) node_online_map + +asmlinkage long sys_migrate_pages(pid_t pid, unsigned long maxnode, + const unsigned long __user *old_nodes, + const unsigned long __user *new_nodes) +{ + struct mm_struct *mm; + struct task_struct *task; + nodemask_t old; + nodemask_t new; + nodemask_t task_nodes; + int err; + + err = get_nodes(&old, old_nodes, maxnode); + if (err) + return err; + + err = get_nodes(&new, new_nodes, maxnode); + if (err) + return err; + + /* Find the mm_struct */ + read_lock(&tasklist_lock); + task = pid ? find_task_by_pid(pid) : current; + if (!task) { + read_unlock(&tasklist_lock); + return -ESRCH; + } + mm = get_task_mm(task); + read_unlock(&tasklist_lock); + + if (!mm) + return -EINVAL; + + /* + * Check if this process has the right to modify the specified + * process. The right exists if the process has administrative + * capabilities, superuser priviledges or the same + * userid as the target process. + */ + if ((current->euid != task->suid) && (current->euid != task->uid) && + (current->uid != task->suid) && (current->uid != task->uid) && + !capable(CAP_SYS_ADMIN)) { + err = -EPERM; + goto out; + } + + task_nodes = cpuset_mems_allowed(task); + /* Is the user allowed to access the target nodes? */ + if (!nodes_subset(new, task_nodes) && !capable(CAP_SYS_ADMIN)) { + err = -EPERM; + goto out; + } + + err = do_migrate_pages(mm, &old, &new, MPOL_MF_MOVE); +out: + mmput(mm); + return err; +} + + /* Retrieve NUMA policy */ asmlinkage long sys_get_mempolicy(int __user *policy, unsigned long __user *nmask, -- cgit v1.2.3-71-gd317 From 8419c3181086c86664e8246bc997afc2e4ffba4f Mon Sep 17 00:00:00 2001 From: Christoph Lameter Date: Sun, 8 Jan 2006 01:00:52 -0800 Subject: [PATCH] SwapMig: CONFIG_MIGRATION fixes Move move_to_lru, putback_lru_pages and isolate_lru in section surrounded by CONFIG_MIGRATION saving some codesize for single processor kernels. Signed-off-by: Christoph Lameter Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/swap.h | 3 +- mm/vmscan.c | 152 +++++++++++++++++++++++++-------------------------- 2 files changed, 77 insertions(+), 78 deletions(-) (limited to 'include/linux') diff --git a/include/linux/swap.h b/include/linux/swap.h index 117add066f00..997d838f0e70 100644 --- a/include/linux/swap.h +++ b/include/linux/swap.h @@ -175,10 +175,9 @@ extern int try_to_free_pages(struct zone **, gfp_t); extern int shrink_all_memory(int); extern int vm_swappiness; +#ifdef CONFIG_MIGRATION extern int isolate_lru_page(struct page *p); extern int putback_lru_pages(struct list_head *l); - -#ifdef CONFIG_MIGRATION extern int migrate_pages(struct list_head *l, struct list_head *t); #endif diff --git a/mm/vmscan.c b/mm/vmscan.c index 58270aea669a..daed4a73b761 100644 --- a/mm/vmscan.c +++ b/mm/vmscan.c @@ -569,6 +569,40 @@ keep: } #ifdef CONFIG_MIGRATION +static inline void move_to_lru(struct page *page) +{ + list_del(&page->lru); + if (PageActive(page)) { + /* + * lru_cache_add_active checks that + * the PG_active bit is off. + */ + ClearPageActive(page); + lru_cache_add_active(page); + } else { + lru_cache_add(page); + } + put_page(page); +} + +/* + * Add isolated pages on the list back to the LRU + * + * returns the number of pages put back. + */ +int putback_lru_pages(struct list_head *l) +{ + struct page *page; + struct page *page2; + int count = 0; + + list_for_each_entry_safe(page, page2, l, lru) { + move_to_lru(page); + count++; + } + return count; +} + /* * swapout a single page * page is locked upon entry, unlocked on exit @@ -709,6 +743,48 @@ retry_later: return nr_failed + retry; } + +static void lru_add_drain_per_cpu(void *dummy) +{ + lru_add_drain(); +} + +/* + * Isolate one page from the LRU lists and put it on the + * indicated list. Do necessary cache draining if the + * page is not on the LRU lists yet. + * + * Result: + * 0 = page not on LRU list + * 1 = page removed from LRU list and added to the specified list. + * -ENOENT = page is being freed elsewhere. + */ +int isolate_lru_page(struct page *page) +{ + int rc = 0; + struct zone *zone = page_zone(page); + +redo: + spin_lock_irq(&zone->lru_lock); + rc = __isolate_lru_page(page); + if (rc == 1) { + if (PageActive(page)) + del_page_from_active_list(zone, page); + else + del_page_from_inactive_list(zone, page); + } + spin_unlock_irq(&zone->lru_lock); + if (rc == 0) { + /* + * Maybe this page is still waiting for a cpu to drain it + * from one of the lru lists? + */ + rc = schedule_on_each_cpu(lru_add_drain_per_cpu, NULL); + if (rc == 0 && PageLRU(page)) + goto redo; + } + return rc; +} #endif /* @@ -758,48 +834,6 @@ static int isolate_lru_pages(int nr_to_scan, struct list_head *src, return nr_taken; } -static void lru_add_drain_per_cpu(void *dummy) -{ - lru_add_drain(); -} - -/* - * Isolate one page from the LRU lists and put it on the - * indicated list. Do necessary cache draining if the - * page is not on the LRU lists yet. - * - * Result: - * 0 = page not on LRU list - * 1 = page removed from LRU list and added to the specified list. - * -ENOENT = page is being freed elsewhere. - */ -int isolate_lru_page(struct page *page) -{ - int rc = 0; - struct zone *zone = page_zone(page); - -redo: - spin_lock_irq(&zone->lru_lock); - rc = __isolate_lru_page(page); - if (rc == 1) { - if (PageActive(page)) - del_page_from_active_list(zone, page); - else - del_page_from_inactive_list(zone, page); - } - spin_unlock_irq(&zone->lru_lock); - if (rc == 0) { - /* - * Maybe this page is still waiting for a cpu to drain it - * from one of the lru lists? - */ - rc = schedule_on_each_cpu(lru_add_drain_per_cpu, NULL); - if (rc == 0 && PageLRU(page)) - goto redo; - } - return rc; -} - /* * shrink_cache() adds the number of pages reclaimed to sc->nr_reclaimed */ @@ -865,40 +899,6 @@ done: pagevec_release(&pvec); } -static inline void move_to_lru(struct page *page) -{ - list_del(&page->lru); - if (PageActive(page)) { - /* - * lru_cache_add_active checks that - * the PG_active bit is off. - */ - ClearPageActive(page); - lru_cache_add_active(page); - } else { - lru_cache_add(page); - } - put_page(page); -} - -/* - * Add isolated pages on the list back to the LRU - * - * returns the number of pages put back. - */ -int putback_lru_pages(struct list_head *l) -{ - struct page *page; - struct page *page2; - int count = 0; - - list_for_each_entry_safe(page, page2, l, lru) { - move_to_lru(page); - count++; - } - return count; -} - /* * This moves pages from the active list to the inactive list. * -- cgit v1.2.3-71-gd317 From 1480a540c98525640174a7eadd712378fcd6fd63 Mon Sep 17 00:00:00 2001 From: Christoph Lameter Date: Sun, 8 Jan 2006 01:00:53 -0800 Subject: [PATCH] SwapMig: add_to_swap() avoid atomic allocations Add gfp_mask to add_to_swap add_to_swap does allocations with GFP_ATOMIC in order not to interfere with swapping. During migration we may have use add_to_swap extensively which may lead to out of memory errors. This patch makes add_to_swap take a parameter that specifies the gfp mask. The page migration code can then make add_to_swap use GFP_KERNEL. Signed-off-by: Hirokazu Takahashi Signed-off-by: Dave Hansen Signed-off-by: Christoph Lameter Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/swap.h | 2 +- mm/swap_state.c | 4 ++-- mm/vmscan.c | 4 ++-- 3 files changed, 5 insertions(+), 5 deletions(-) (limited to 'include/linux') diff --git a/include/linux/swap.h b/include/linux/swap.h index 997d838f0e70..eb591eaad1b7 100644 --- a/include/linux/swap.h +++ b/include/linux/swap.h @@ -198,7 +198,7 @@ extern int rw_swap_page_sync(int, swp_entry_t, struct page *); extern struct address_space swapper_space; #define total_swapcache_pages swapper_space.nrpages extern void show_swap_cache_info(void); -extern int add_to_swap(struct page *); +extern int add_to_swap(struct page *, gfp_t); extern void __delete_from_swap_cache(struct page *); extern void delete_from_swap_cache(struct page *); extern int move_to_swap_cache(struct page *, swp_entry_t); diff --git a/mm/swap_state.c b/mm/swap_state.c index fc2aecb70a95..7b09ac503fec 100644 --- a/mm/swap_state.c +++ b/mm/swap_state.c @@ -141,7 +141,7 @@ void __delete_from_swap_cache(struct page *page) * Allocate swap space for the page and add the page to the * swap cache. Caller needs to hold the page lock. */ -int add_to_swap(struct page * page) +int add_to_swap(struct page * page, gfp_t gfp_mask) { swp_entry_t entry; int err; @@ -166,7 +166,7 @@ int add_to_swap(struct page * page) * Add it to the swap cache and mark it dirty */ err = __add_to_swap_cache(page, entry, - GFP_ATOMIC|__GFP_NOMEMALLOC|__GFP_NOWARN); + gfp_mask|__GFP_NOMEMALLOC|__GFP_NOWARN); switch (err) { case 0: /* Success */ diff --git a/mm/vmscan.c b/mm/vmscan.c index daed4a73b761..5393b093a87b 100644 --- a/mm/vmscan.c +++ b/mm/vmscan.c @@ -458,7 +458,7 @@ static int shrink_list(struct list_head *page_list, struct scan_control *sc) * Try to allocate it some swap space here. */ if (PageAnon(page) && !PageSwapCache(page)) { - if (!add_to_swap(page)) + if (!add_to_swap(page, GFP_ATOMIC)) goto activate_locked; } #endif /* CONFIG_SWAP */ @@ -715,7 +715,7 @@ redo: } if (PageAnon(page) && !PageSwapCache(page)) { - if (!add_to_swap(page)) { + if (!add_to_swap(page, GFP_KERNEL)) { unlock_page(page); list_move(&page->lru, &failed); nr_failed++; -- cgit v1.2.3-71-gd317 From d498471133ff1f9586a06820beaeebc575fe2814 Mon Sep 17 00:00:00 2001 From: Christoph Lameter Date: Sun, 8 Jan 2006 01:00:55 -0800 Subject: [PATCH] SwapMig: Extend parameters for migrate_pages() Extend the parameters of migrate_pages() to allow the caller control over the fate of successfully migrated or impossible to migrate pages. Swap migration and direct migration will have the same interface after this patch so that patches can be independently applied to the policy layer and the core migration code. Signed-off-by: Christoph Lameter Cc: Andi Kleen Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/swap.h | 3 ++- mm/mempolicy.c | 27 ++++++++++++++++++++++----- mm/vmscan.c | 17 ++++++++--------- 3 files changed, 32 insertions(+), 15 deletions(-) (limited to 'include/linux') diff --git a/include/linux/swap.h b/include/linux/swap.h index eb591eaad1b7..389d1c382e20 100644 --- a/include/linux/swap.h +++ b/include/linux/swap.h @@ -178,7 +178,8 @@ extern int vm_swappiness; #ifdef CONFIG_MIGRATION extern int isolate_lru_page(struct page *p); extern int putback_lru_pages(struct list_head *l); -extern int migrate_pages(struct list_head *l, struct list_head *t); +extern int migrate_pages(struct list_head *l, struct list_head *t, + struct list_head *moved, struct list_head *failed); #endif #ifdef CONFIG_MMU diff --git a/mm/mempolicy.c b/mm/mempolicy.c index 20d5ad39fa41..30bdafba52d8 100644 --- a/mm/mempolicy.c +++ b/mm/mempolicy.c @@ -429,6 +429,19 @@ static int contextualize_policy(int mode, nodemask_t *nodes) return mpol_check_policy(mode, nodes); } +static int swap_pages(struct list_head *pagelist) +{ + LIST_HEAD(moved); + LIST_HEAD(failed); + int n; + + n = migrate_pages(pagelist, NULL, &moved, &failed); + putback_lru_pages(&failed); + putback_lru_pages(&moved); + + return n; +} + long do_mbind(unsigned long start, unsigned long len, unsigned long mode, nodemask_t *nmask, unsigned long flags) { @@ -481,10 +494,13 @@ long do_mbind(unsigned long start, unsigned long len, (flags & (MPOL_MF_MOVE | MPOL_MF_MOVE_ALL)) ? &pagelist : NULL); err = PTR_ERR(vma); if (!IS_ERR(vma)) { + int nr_failed = 0; + err = mbind_range(vma, start, end, new); if (!list_empty(&pagelist)) - migrate_pages(&pagelist, NULL); - if (!err && !list_empty(&pagelist) && (flags & MPOL_MF_STRICT)) + nr_failed = swap_pages(&pagelist); + + if (!err && nr_failed && (flags & MPOL_MF_STRICT)) err = -EIO; } if (!list_empty(&pagelist)) @@ -635,11 +651,12 @@ int do_migrate_pages(struct mm_struct *mm, down_read(&mm->mmap_sem); check_range(mm, mm->mmap->vm_start, TASK_SIZE, &nodes, flags | MPOL_MF_DISCONTIG_OK, &pagelist); + if (!list_empty(&pagelist)) { - migrate_pages(&pagelist, NULL); - if (!list_empty(&pagelist)) - count = putback_lru_pages(&pagelist); + count = swap_pages(&pagelist); + putback_lru_pages(&pagelist); } + up_read(&mm->mmap_sem); return count; } diff --git a/mm/vmscan.c b/mm/vmscan.c index 73ba4046ed27..5eecb514ccea 100644 --- a/mm/vmscan.c +++ b/mm/vmscan.c @@ -670,10 +670,10 @@ retry: * list. The direct migration patchset * extends this function to avoid the use of swap. */ -int migrate_pages(struct list_head *l, struct list_head *t) +int migrate_pages(struct list_head *from, struct list_head *to, + struct list_head *moved, struct list_head *failed) { int retry; - LIST_HEAD(failed); int nr_failed = 0; int pass = 0; struct page *page; @@ -686,12 +686,12 @@ int migrate_pages(struct list_head *l, struct list_head *t) redo: retry = 0; - list_for_each_entry_safe(page, page2, l, lru) { + list_for_each_entry_safe(page, page2, from, lru) { cond_resched(); if (page_count(page) == 1) { /* page was freed from under us. So we are done. */ - move_to_lru(page); + list_move(&page->lru, moved); continue; } /* @@ -722,7 +722,7 @@ redo: if (PageAnon(page) && !PageSwapCache(page)) { if (!add_to_swap(page, GFP_KERNEL)) { unlock_page(page); - list_move(&page->lru, &failed); + list_move(&page->lru, failed); nr_failed++; continue; } @@ -732,8 +732,10 @@ redo: * Page is properly locked and writeback is complete. * Try to migrate the page. */ - if (!swap_page(page)) + if (!swap_page(page)) { + list_move(&page->lru, moved); continue; + } retry_later: retry++; } @@ -743,9 +745,6 @@ retry_later: if (!swapwrite) current->flags &= ~PF_SWAPWRITE; - if (!list_empty(&failed)) - list_splice(&failed, l); - return nr_failed + retry; } -- cgit v1.2.3-71-gd317 From 45b07ef31d1182d2cfde7711327e3afb268bb1ac Mon Sep 17 00:00:00 2001 From: Paul Jackson Date: Sun, 8 Jan 2006 01:00:56 -0800 Subject: [PATCH] cpusets: swap migration interface Add a boolean "memory_migrate" to each cpuset, represented by a file containing "0" or "1" in each directory below /dev/cpuset. It defaults to false (file contains "0"). It can be set true by writing "1" to the file. If true, then anytime that a task is attached to the cpuset so marked, the pages of that task will be moved to that cpuset, preserving, to the extent practical, the cpuset-relative placement of the pages. Also anytime that a cpuset so marked has its memory placement changed (by writing to its "mems" file), the tasks in that cpuset will have their pages moved to the cpusets new nodes, preserving, to the extent practical, the cpuset-relative placement of the moved pages. Signed-off-by: Paul Jackson Cc: Christoph Lameter Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- Documentation/cpusets.txt | 25 +++++++++++++++++++++++++ include/linux/mempolicy.h | 7 +++++++ kernel/cpuset.c | 38 ++++++++++++++++++++++++++++++++++++-- 3 files changed, 68 insertions(+), 2 deletions(-) (limited to 'include/linux') diff --git a/Documentation/cpusets.txt b/Documentation/cpusets.txt index a09a8eb80665..e2d9afc30d2d 100644 --- a/Documentation/cpusets.txt +++ b/Documentation/cpusets.txt @@ -192,6 +192,7 @@ containing the following files describing that cpuset: - cpus: list of CPUs in that cpuset - mems: list of Memory Nodes in that cpuset + - memory_migrate flag: if set, move pages to cpusets nodes - cpu_exclusive flag: is cpu placement exclusive? - mem_exclusive flag: is memory placement exclusive? - tasks: list of tasks (by pid) attached to that cpuset @@ -277,6 +278,30 @@ rewritten to the 'tasks' file of its cpuset. This is done to avoid impacting the scheduler code in the kernel with a check for changes in a tasks processor placement. +Normally, once a page is allocated (given a physical page +of main memory) then that page stays on whatever node it +was allocated, so long as it remains allocated, even if the +cpusets memory placement policy 'mems' subsequently changes. +If the cpuset flag file 'memory_migrate' is set true, then when +tasks are attached to that cpuset, any pages that task had +allocated to it on nodes in its previous cpuset are migrated +to the tasks new cpuset. Depending on the implementation, +this migration may either be done by swapping the page out, +so that the next time the page is referenced, it will be paged +into the tasks new cpuset, usually on the node where it was +referenced, or this migration may be done by directly copying +the pages from the tasks previous cpuset to the new cpuset, +where possible to the same node, relative to the new cpuset, +as the node that held the page, relative to the old cpuset. +Also if 'memory_migrate' is set true, then if that cpusets +'mems' file is modified, pages allocated to tasks in that +cpuset, that were on nodes in the previous setting of 'mems', +will be moved to nodes in the new setting of 'mems.' Again, +depending on the implementation, this might be done by swapping, +or by direct copying. In either case, pages that were not in +the tasks prior cpuset, or in the cpusets prior 'mems' setting, +will not be moved. + There is an exception to the above. If hotplug functionality is used to remove all the CPUs that are currently assigned to a cpuset, then the kernel will automatically update the cpus_allowed of all diff --git a/include/linux/mempolicy.h b/include/linux/mempolicy.h index 3e61e829681d..66247eff24a0 100644 --- a/include/linux/mempolicy.h +++ b/include/linux/mempolicy.h @@ -235,6 +235,13 @@ static inline struct zonelist *huge_zonelist(struct vm_area_struct *vma, return NODE_DATA(0)->node_zonelists + gfp_zone(GFP_HIGHUSER); } +static inline int do_migrate_pages(struct mm_struct *mm, + const nodemask_t *from_nodes, + const nodemask_t *to_nodes, int flags) +{ + return 0; +} + static inline void check_highest_zone(int k) { } diff --git a/kernel/cpuset.c b/kernel/cpuset.c index 7430640f9816..f63383e01ec7 100644 --- a/kernel/cpuset.c +++ b/kernel/cpuset.c @@ -87,6 +87,7 @@ struct cpuset { typedef enum { CS_CPU_EXCLUSIVE, CS_MEM_EXCLUSIVE, + CS_MEMORY_MIGRATE, CS_REMOVED, CS_NOTIFY_ON_RELEASE } cpuset_flagbits_t; @@ -112,6 +113,11 @@ static inline int notify_on_release(const struct cpuset *cs) return !!test_bit(CS_NOTIFY_ON_RELEASE, &cs->flags); } +static inline int is_memory_migrate(const struct cpuset *cs) +{ + return !!test_bit(CS_MEMORY_MIGRATE, &cs->flags); +} + /* * Increment this atomic integer everytime any cpuset changes its * mems_allowed value. Users of cpusets can track this generation @@ -602,16 +608,24 @@ static void refresh_mems(void) if (current->cpuset_mems_generation != my_cpusets_mem_gen) { struct cpuset *cs; nodemask_t oldmem = current->mems_allowed; + int migrate; down(&callback_sem); task_lock(current); cs = current->cpuset; + migrate = is_memory_migrate(cs); guarantee_online_mems(cs, ¤t->mems_allowed); current->cpuset_mems_generation = cs->mems_generation; task_unlock(current); up(&callback_sem); - if (!nodes_equal(oldmem, current->mems_allowed)) + if (!nodes_equal(oldmem, current->mems_allowed)) { numa_policy_rebind(&oldmem, ¤t->mems_allowed); + if (migrate) { + do_migrate_pages(current->mm, &oldmem, + ¤t->mems_allowed, + MPOL_MF_MOVE_ALL); + } + } } } @@ -795,7 +809,7 @@ static int update_nodemask(struct cpuset *cs, char *buf) /* * update_flag - read a 0 or a 1 in a file and update associated flag * bit: the bit to update (CS_CPU_EXCLUSIVE, CS_MEM_EXCLUSIVE, - * CS_NOTIFY_ON_RELEASE) + * CS_NOTIFY_ON_RELEASE, CS_MEMORY_MIGRATE) * cs: the cpuset to update * buf: the buffer where we read the 0 or 1 * @@ -848,6 +862,7 @@ static int attach_task(struct cpuset *cs, char *pidbuf, char **ppathbuf) struct task_struct *tsk; struct cpuset *oldcs; cpumask_t cpus; + nodemask_t from, to; if (sscanf(pidbuf, "%d", &pid) != 1) return -EIO; @@ -893,7 +908,12 @@ static int attach_task(struct cpuset *cs, char *pidbuf, char **ppathbuf) guarantee_online_cpus(cs, &cpus); set_cpus_allowed(tsk, cpus); + from = oldcs->mems_allowed; + to = cs->mems_allowed; + up(&callback_sem); + if (is_memory_migrate(cs)) + do_migrate_pages(tsk->mm, &from, &to, MPOL_MF_MOVE_ALL); put_task_struct(tsk); if (atomic_dec_and_test(&oldcs->count)) check_for_release(oldcs, ppathbuf); @@ -905,6 +925,7 @@ static int attach_task(struct cpuset *cs, char *pidbuf, char **ppathbuf) typedef enum { FILE_ROOT, FILE_DIR, + FILE_MEMORY_MIGRATE, FILE_CPULIST, FILE_MEMLIST, FILE_CPU_EXCLUSIVE, @@ -960,6 +981,9 @@ static ssize_t cpuset_common_file_write(struct file *file, const char __user *us case FILE_NOTIFY_ON_RELEASE: retval = update_flag(CS_NOTIFY_ON_RELEASE, cs, buffer); break; + case FILE_MEMORY_MIGRATE: + retval = update_flag(CS_MEMORY_MIGRATE, cs, buffer); + break; case FILE_TASKLIST: retval = attach_task(cs, buffer, &pathbuf); break; @@ -1060,6 +1084,9 @@ static ssize_t cpuset_common_file_read(struct file *file, char __user *buf, case FILE_NOTIFY_ON_RELEASE: *s++ = notify_on_release(cs) ? '1' : '0'; break; + case FILE_MEMORY_MIGRATE: + *s++ = is_memory_migrate(cs) ? '1' : '0'; + break; default: retval = -EINVAL; goto out; @@ -1408,6 +1435,11 @@ static struct cftype cft_notify_on_release = { .private = FILE_NOTIFY_ON_RELEASE, }; +static struct cftype cft_memory_migrate = { + .name = "memory_migrate", + .private = FILE_MEMORY_MIGRATE, +}; + static int cpuset_populate_dir(struct dentry *cs_dentry) { int err; @@ -1422,6 +1454,8 @@ static int cpuset_populate_dir(struct dentry *cs_dentry) return err; if ((err = cpuset_add_file(cs_dentry, &cft_notify_on_release)) < 0) return err; + if ((err = cpuset_add_file(cs_dentry, &cft_memory_migrate)) < 0) + return err; if ((err = cpuset_add_file(cs_dentry, &cft_tasks)) < 0) return err; return 0; -- cgit v1.2.3-71-gd317 From 152194aaa6266d71dfee57882a23def339ef17a4 Mon Sep 17 00:00:00 2001 From: Avishay Traeger Date: Sun, 8 Jan 2006 01:00:58 -0800 Subject: [PATCH] set_page_count() macro safety Fix set_page_count() macro to handle complex arguments. Signed-off-by: Avishay Traeger Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/mm.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/mm.h b/include/linux/mm.h index 83c651f25188..7ff54242c5d7 100644 --- a/include/linux/mm.h +++ b/include/linux/mm.h @@ -308,7 +308,7 @@ struct page { */ #define get_page_testone(p) atomic_inc_and_test(&(p)->_count) -#define set_page_count(p,v) atomic_set(&(p)->_count, v - 1) +#define set_page_count(p,v) atomic_set(&(p)->_count, (v) - 1) #define __put_page(p) atomic_dec(&(p)->_count) extern void FASTCALL(__page_cache_release(struct page *)); -- cgit v1.2.3-71-gd317 From 48fce3429df84a94766fbbc845fa8450d0715b48 Mon Sep 17 00:00:00 2001 From: Christoph Lameter Date: Sun, 8 Jan 2006 01:01:03 -0800 Subject: [PATCH] mempolicies: unexport get_vma_policy() Since the numa_maps functionality is now in mempolicy.c we no longer need to export get_vma_policy(). Signed-off-by: Christoph Lameter Cc: Andi Kleen Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/mempolicy.h | 3 --- mm/mempolicy.c | 4 ++-- 2 files changed, 2 insertions(+), 5 deletions(-) (limited to 'include/linux') diff --git a/include/linux/mempolicy.h b/include/linux/mempolicy.h index 66247eff24a0..05fddd5bee5d 100644 --- a/include/linux/mempolicy.h +++ b/include/linux/mempolicy.h @@ -144,9 +144,6 @@ void mpol_free_shared_policy(struct shared_policy *p); struct mempolicy *mpol_shared_policy_lookup(struct shared_policy *sp, unsigned long idx); -struct mempolicy *get_vma_policy(struct task_struct *task, - struct vm_area_struct *vma, unsigned long addr); - extern void numa_default_policy(void); extern void numa_policy_init(void); extern void numa_policy_rebind(const nodemask_t *old, const nodemask_t *new); diff --git a/mm/mempolicy.c b/mm/mempolicy.c index 4c0510e9e7f6..4b077ec6c005 100644 --- a/mm/mempolicy.c +++ b/mm/mempolicy.c @@ -935,8 +935,8 @@ asmlinkage long compat_sys_mbind(compat_ulong_t start, compat_ulong_t len, #endif /* Return effective policy for a VMA */ -struct mempolicy * -get_vma_policy(struct task_struct *task, struct vm_area_struct *vma, unsigned long addr) +static struct mempolicy * get_vma_policy(struct task_struct *task, + struct vm_area_struct *vma, unsigned long addr) { struct mempolicy *pol = task->mempolicy; -- cgit v1.2.3-71-gd317 From 22fc6eccbf4ce4eb6265e6ada7b50a7b9cc57d05 Mon Sep 17 00:00:00 2001 From: Ravikiran G Thirumalai Date: Sun, 8 Jan 2006 01:01:27 -0800 Subject: [PATCH] Change maxaligned_in_smp alignemnt macros to internodealigned_in_smp macros ____cacheline_maxaligned_in_smp is currently used to align critical structures and avoid false sharing. It uses per-arch L1_CACHE_SHIFT_MAX and people find L1_CACHE_SHIFT_MAX useless. However, we have been using ____cacheline_maxaligned_in_smp to align structures on the internode cacheline size. As per Andi's suggestion, following patch kills ____cacheline_maxaligned_in_smp and introduces INTERNODE_CACHE_SHIFT, which defaults to L1_CACHE_SHIFT for all arches. Arches needing L3/Internode cacheline alignment can define INTERNODE_CACHE_SHIFT in the arch asm/cache.h. Patch replaces ____cacheline_maxaligned_in_smp with ____cacheline_internodealigned_in_smp With this patch, L1_CACHE_SHIFT_MAX can be killed Signed-off-by: Ravikiran Thirumalai Signed-off-by: Shai Fultheim Signed-off-by: Andi Kleen Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- arch/i386/kernel/init_task.c | 2 +- arch/i386/kernel/irq.c | 2 +- arch/x86_64/kernel/init_task.c | 2 +- include/linux/cache.h | 17 +++++++++++++---- include/linux/ide.h | 2 +- include/linux/mmzone.h | 4 ++-- include/linux/rcupdate.h | 2 +- kernel/rcupdate.c | 4 ++-- mm/sparse.c | 4 ++-- 9 files changed, 24 insertions(+), 15 deletions(-) (limited to 'include/linux') diff --git a/arch/i386/kernel/init_task.c b/arch/i386/kernel/init_task.c index 9caa8e8db80c..cff95d10a4d8 100644 --- a/arch/i386/kernel/init_task.c +++ b/arch/i386/kernel/init_task.c @@ -42,5 +42,5 @@ EXPORT_SYMBOL(init_task); * per-CPU TSS segments. Threads are completely 'soft' on Linux, * no more per-task TSS's. */ -DEFINE_PER_CPU(struct tss_struct, init_tss) ____cacheline_maxaligned_in_smp = INIT_TSS; +DEFINE_PER_CPU(struct tss_struct, init_tss) ____cacheline_internodealigned_in_smp = INIT_TSS; diff --git a/arch/i386/kernel/irq.c b/arch/i386/kernel/irq.c index 1a201a932865..f3a9c78c4a24 100644 --- a/arch/i386/kernel/irq.c +++ b/arch/i386/kernel/irq.c @@ -19,7 +19,7 @@ #include #include -DEFINE_PER_CPU(irq_cpustat_t, irq_stat) ____cacheline_maxaligned_in_smp; +DEFINE_PER_CPU(irq_cpustat_t, irq_stat) ____cacheline_internodealigned_in_smp; EXPORT_PER_CPU_SYMBOL(irq_stat); #ifndef CONFIG_X86_LOCAL_APIC diff --git a/arch/x86_64/kernel/init_task.c b/arch/x86_64/kernel/init_task.c index e0ba5c1043fd..ce31d904d601 100644 --- a/arch/x86_64/kernel/init_task.c +++ b/arch/x86_64/kernel/init_task.c @@ -44,6 +44,6 @@ EXPORT_SYMBOL(init_task); * section. Since TSS's are completely CPU-local, we want them * on exact cacheline boundaries, to eliminate cacheline ping-pong. */ -DEFINE_PER_CPU(struct tss_struct, init_tss) ____cacheline_maxaligned_in_smp = INIT_TSS; +DEFINE_PER_CPU(struct tss_struct, init_tss) ____cacheline_internodealigned_in_smp = INIT_TSS; #define ALIGN_TO_4K __attribute__((section(".data.init_task"))) diff --git a/include/linux/cache.h b/include/linux/cache.h index 0b7ecf3af78a..ffe52210fc4f 100644 --- a/include/linux/cache.h +++ b/include/linux/cache.h @@ -45,12 +45,21 @@ #endif /* CONFIG_SMP */ #endif -#if !defined(____cacheline_maxaligned_in_smp) +/* + * The maximum alignment needed for some critical structures + * These could be inter-node cacheline sizes/L3 cacheline + * size etc. Define this in asm/cache.h for your arch + */ +#ifndef INTERNODE_CACHE_SHIFT +#define INTERNODE_CACHE_SHIFT L1_CACHE_SHIFT +#endif + +#if !defined(____cacheline_internodealigned_in_smp) #if defined(CONFIG_SMP) -#define ____cacheline_maxaligned_in_smp \ - __attribute__((__aligned__(1 << (L1_CACHE_SHIFT_MAX)))) +#define ____cacheline_internodealigned_in_smp \ + __attribute__((__aligned__(1 << (INTERNODE_CACHE_SHIFT)))) #else -#define ____cacheline_maxaligned_in_smp +#define ____cacheline_internodealigned_in_smp #endif #endif diff --git a/include/linux/ide.h b/include/linux/ide.h index 7b6a6a58e465..4dd6694963c0 100644 --- a/include/linux/ide.h +++ b/include/linux/ide.h @@ -801,7 +801,7 @@ typedef struct hwif_s { unsigned dma; void (*led_act)(void *data, int rw); -} ____cacheline_maxaligned_in_smp ide_hwif_t; +} ____cacheline_internodealigned_in_smp ide_hwif_t; /* * internal ide interrupt handler type diff --git a/include/linux/mmzone.h b/include/linux/mmzone.h index 2a89c132ba9c..7e4ae6ab1977 100644 --- a/include/linux/mmzone.h +++ b/include/linux/mmzone.h @@ -38,7 +38,7 @@ struct pglist_data; #if defined(CONFIG_SMP) struct zone_padding { char x[0]; -} ____cacheline_maxaligned_in_smp; +} ____cacheline_internodealigned_in_smp; #define ZONE_PADDING(name) struct zone_padding name; #else #define ZONE_PADDING(name) @@ -233,7 +233,7 @@ struct zone { * rarely used fields: */ char *name; -} ____cacheline_maxaligned_in_smp; +} ____cacheline_internodealigned_in_smp; /* diff --git a/include/linux/rcupdate.h b/include/linux/rcupdate.h index a471f3bb713e..51747cd88d1a 100644 --- a/include/linux/rcupdate.h +++ b/include/linux/rcupdate.h @@ -65,7 +65,7 @@ struct rcu_ctrlblk { long cur; /* Current batch number. */ long completed; /* Number of the last completed batch */ int next_pending; /* Is the next batch already waiting? */ -} ____cacheline_maxaligned_in_smp; +} ____cacheline_internodealigned_in_smp; /* Is batch a before batch b ? */ static inline int rcu_batch_before(long a, long b) diff --git a/kernel/rcupdate.c b/kernel/rcupdate.c index 48d3bce465b8..c9afc61240e4 100644 --- a/kernel/rcupdate.c +++ b/kernel/rcupdate.c @@ -61,9 +61,9 @@ struct rcu_state { /* for current batch to proceed. */ }; -static struct rcu_state rcu_state ____cacheline_maxaligned_in_smp = +static struct rcu_state rcu_state ____cacheline_internodealigned_in_smp = {.lock = SPIN_LOCK_UNLOCKED, .cpumask = CPU_MASK_NONE }; -static struct rcu_state rcu_bh_state ____cacheline_maxaligned_in_smp = +static struct rcu_state rcu_bh_state ____cacheline_internodealigned_in_smp = {.lock = SPIN_LOCK_UNLOCKED, .cpumask = CPU_MASK_NONE }; DEFINE_PER_CPU(struct rcu_data, rcu_data) = { 0L }; diff --git a/mm/sparse.c b/mm/sparse.c index 72079b538e2d..0a51f36ba3a1 100644 --- a/mm/sparse.c +++ b/mm/sparse.c @@ -18,10 +18,10 @@ */ #ifdef CONFIG_SPARSEMEM_EXTREME struct mem_section *mem_section[NR_SECTION_ROOTS] - ____cacheline_maxaligned_in_smp; + ____cacheline_internodealigned_in_smp; #else struct mem_section mem_section[NR_SECTION_ROOTS][SECTIONS_PER_ROOT] - ____cacheline_maxaligned_in_smp; + ____cacheline_internodealigned_in_smp; #endif EXPORT_SYMBOL(mem_section); -- cgit v1.2.3-71-gd317 From e56d090310d7625ecb43a1eeebd479f04affb48b Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Sun, 8 Jan 2006 01:01:37 -0800 Subject: [PATCH] RCU signal handling RCU tasklist_lock and RCU signal handling: send signals RCU-read-locked instead of tasklist_lock read-locked. This is a scalability improvement on SMP and a preemption-latency improvement under PREEMPT_RCU. Signed-off-by: Paul E. McKenney Signed-off-by: Ingo Molnar Acked-by: William Irwin Cc: Roland McGrath Cc: Oleg Nesterov Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- fs/exec.c | 4 +-- include/linux/sched.h | 32 +++++++++++++++-- kernel/exit.c | 1 - kernel/fork.c | 10 +++++- kernel/pid.c | 22 ++++++------ kernel/rcupdate.c | 1 + kernel/sched.c | 7 ++++ kernel/signal.c | 97 +++++++++++++++++++++++++++++++++++++++++++-------- 8 files changed, 143 insertions(+), 31 deletions(-) (limited to 'include/linux') diff --git a/fs/exec.c b/fs/exec.c index e75a9548da8e..e9650cd22a3b 100644 --- a/fs/exec.c +++ b/fs/exec.c @@ -760,7 +760,7 @@ no_thread_group: spin_lock(&oldsighand->siglock); spin_lock(&newsighand->siglock); - current->sighand = newsighand; + rcu_assign_pointer(current->sighand, newsighand); recalc_sigpending(); spin_unlock(&newsighand->siglock); @@ -768,7 +768,7 @@ no_thread_group: write_unlock_irq(&tasklist_lock); if (atomic_dec_and_test(&oldsighand->count)) - kmem_cache_free(sighand_cachep, oldsighand); + sighand_free(oldsighand); } BUG_ON(!thread_group_leader(current)); diff --git a/include/linux/sched.h b/include/linux/sched.h index a74662077d60..a6af77e9b4cf 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h @@ -34,6 +34,7 @@ #include #include #include +#include #include /* For AT_VECTOR_SIZE */ @@ -350,8 +351,16 @@ struct sighand_struct { atomic_t count; struct k_sigaction action[_NSIG]; spinlock_t siglock; + struct rcu_head rcu; }; +extern void sighand_free_cb(struct rcu_head *rhp); + +static inline void sighand_free(struct sighand_struct *sp) +{ + call_rcu(&sp->rcu, sighand_free_cb); +} + /* * NOTE! "signal_struct" does not have it's own * locking, because a shared signal_struct always @@ -844,6 +853,7 @@ struct task_struct { int cpuset_mems_generation; #endif atomic_t fs_excl; /* holding fs exclusive resources */ + struct rcu_head rcu; }; static inline pid_t process_group(struct task_struct *tsk) @@ -867,8 +877,26 @@ static inline int pid_alive(struct task_struct *p) extern void free_task(struct task_struct *tsk); extern void __put_task_struct(struct task_struct *tsk); #define get_task_struct(tsk) do { atomic_inc(&(tsk)->usage); } while(0) -#define put_task_struct(tsk) \ -do { if (atomic_dec_and_test(&(tsk)->usage)) __put_task_struct(tsk); } while(0) + +static inline int get_task_struct_rcu(struct task_struct *t) +{ + int oldusage; + + do { + oldusage = atomic_read(&t->usage); + if (oldusage == 0) + return 0; + } while (cmpxchg(&t->usage.counter, oldusage, oldusage+1) != oldusage); + return 1; +} + +extern void __put_task_struct_cb(struct rcu_head *rhp); + +static inline void put_task_struct(struct task_struct *t) +{ + if (atomic_dec_and_test(&t->usage)) + call_rcu(&t->rcu, __put_task_struct_cb); +} /* * Per process flags diff --git a/kernel/exit.c b/kernel/exit.c index ee515683b92d..c73a7eb26de3 100644 --- a/kernel/exit.c +++ b/kernel/exit.c @@ -72,7 +72,6 @@ repeat: __ptrace_unlink(p); BUG_ON(!list_empty(&p->ptrace_list) || !list_empty(&p->ptrace_children)); __exit_signal(p); - __exit_sighand(p); /* * Note that the fastpath in sys_times depends on __exit_signal having * updated the counters before a task is removed from the tasklist of diff --git a/kernel/fork.c b/kernel/fork.c index fb8572a42297..7fe3adfa65cb 100644 --- a/kernel/fork.c +++ b/kernel/fork.c @@ -743,6 +743,14 @@ int unshare_files(void) EXPORT_SYMBOL(unshare_files); +void sighand_free_cb(struct rcu_head *rhp) +{ + struct sighand_struct *sp; + + sp = container_of(rhp, struct sighand_struct, rcu); + kmem_cache_free(sighand_cachep, sp); +} + static inline int copy_sighand(unsigned long clone_flags, struct task_struct * tsk) { struct sighand_struct *sig; @@ -752,7 +760,7 @@ static inline int copy_sighand(unsigned long clone_flags, struct task_struct * t return 0; } sig = kmem_cache_alloc(sighand_cachep, GFP_KERNEL); - tsk->sighand = sig; + rcu_assign_pointer(tsk->sighand, sig); if (!sig) return -ENOMEM; spin_lock_init(&sig->siglock); diff --git a/kernel/pid.c b/kernel/pid.c index edba31c681ac..1acc07246991 100644 --- a/kernel/pid.c +++ b/kernel/pid.c @@ -136,7 +136,7 @@ struct pid * fastcall find_pid(enum pid_type type, int nr) struct hlist_node *elem; struct pid *pid; - hlist_for_each_entry(pid, elem, + hlist_for_each_entry_rcu(pid, elem, &pid_hash[type][pid_hashfn(nr)], pid_chain) { if (pid->nr == nr) return pid; @@ -150,15 +150,15 @@ int fastcall attach_pid(task_t *task, enum pid_type type, int nr) task_pid = &task->pids[type]; pid = find_pid(type, nr); + task_pid->nr = nr; if (pid == NULL) { - hlist_add_head(&task_pid->pid_chain, - &pid_hash[type][pid_hashfn(nr)]); INIT_LIST_HEAD(&task_pid->pid_list); + hlist_add_head_rcu(&task_pid->pid_chain, + &pid_hash[type][pid_hashfn(nr)]); } else { INIT_HLIST_NODE(&task_pid->pid_chain); - list_add_tail(&task_pid->pid_list, &pid->pid_list); + list_add_tail_rcu(&task_pid->pid_list, &pid->pid_list); } - task_pid->nr = nr; return 0; } @@ -170,20 +170,20 @@ static fastcall int __detach_pid(task_t *task, enum pid_type type) pid = &task->pids[type]; if (!hlist_unhashed(&pid->pid_chain)) { - hlist_del(&pid->pid_chain); - if (list_empty(&pid->pid_list)) + if (list_empty(&pid->pid_list)) { nr = pid->nr; - else { + hlist_del_rcu(&pid->pid_chain); + } else { pid_next = list_entry(pid->pid_list.next, struct pid, pid_list); /* insert next pid from pid_list to hash */ - hlist_add_head(&pid_next->pid_chain, - &pid_hash[type][pid_hashfn(pid_next->nr)]); + hlist_replace_rcu(&pid->pid_chain, + &pid_next->pid_chain); } } - list_del(&pid->pid_list); + list_del_rcu(&pid->pid_list); pid->nr = 0; return nr; diff --git a/kernel/rcupdate.c b/kernel/rcupdate.c index c9afc61240e4..0a669bd2f6d1 100644 --- a/kernel/rcupdate.c +++ b/kernel/rcupdate.c @@ -35,6 +35,7 @@ #include #include #include +#include #include #include #include diff --git a/kernel/sched.c b/kernel/sched.c index 6f46c94cc29e..92733091154c 100644 --- a/kernel/sched.c +++ b/kernel/sched.c @@ -176,6 +176,13 @@ static unsigned int task_timeslice(task_t *p) #define task_hot(p, now, sd) ((long long) ((now) - (p)->last_ran) \ < (long long) (sd)->cache_hot_time) +void __put_task_struct_cb(struct rcu_head *rhp) +{ + __put_task_struct(container_of(rhp, struct task_struct, rcu)); +} + +EXPORT_SYMBOL_GPL(__put_task_struct_cb); + /* * These are the runqueue data structures: */ diff --git a/kernel/signal.c b/kernel/signal.c index d7611f189ef7..64737c72dadd 100644 --- a/kernel/signal.c +++ b/kernel/signal.c @@ -329,13 +329,20 @@ void __exit_sighand(struct task_struct *tsk) /* Ok, we're done with the signal handlers */ tsk->sighand = NULL; if (atomic_dec_and_test(&sighand->count)) - kmem_cache_free(sighand_cachep, sighand); + sighand_free(sighand); } void exit_sighand(struct task_struct *tsk) { write_lock_irq(&tasklist_lock); - __exit_sighand(tsk); + rcu_read_lock(); + if (tsk->sighand != NULL) { + struct sighand_struct *sighand = rcu_dereference(tsk->sighand); + spin_lock(&sighand->siglock); + __exit_sighand(tsk); + spin_unlock(&sighand->siglock); + } + rcu_read_unlock(); write_unlock_irq(&tasklist_lock); } @@ -345,12 +352,14 @@ void exit_sighand(struct task_struct *tsk) void __exit_signal(struct task_struct *tsk) { struct signal_struct * sig = tsk->signal; - struct sighand_struct * sighand = tsk->sighand; + struct sighand_struct * sighand; if (!sig) BUG(); if (!atomic_read(&sig->count)) BUG(); + rcu_read_lock(); + sighand = rcu_dereference(tsk->sighand); spin_lock(&sighand->siglock); posix_cpu_timers_exit(tsk); if (atomic_dec_and_test(&sig->count)) { @@ -358,6 +367,7 @@ void __exit_signal(struct task_struct *tsk) if (tsk == sig->curr_target) sig->curr_target = next_thread(tsk); tsk->signal = NULL; + __exit_sighand(tsk); spin_unlock(&sighand->siglock); flush_sigqueue(&sig->shared_pending); } else { @@ -389,9 +399,11 @@ void __exit_signal(struct task_struct *tsk) sig->nvcsw += tsk->nvcsw; sig->nivcsw += tsk->nivcsw; sig->sched_time += tsk->sched_time; + __exit_sighand(tsk); spin_unlock(&sighand->siglock); sig = NULL; /* Marker for below. */ } + rcu_read_unlock(); clear_tsk_thread_flag(tsk,TIF_SIGPENDING); flush_sigqueue(&tsk->pending); if (sig) { @@ -1080,18 +1092,28 @@ void zap_other_threads(struct task_struct *p) } /* - * Must be called with the tasklist_lock held for reading! + * Must be called under rcu_read_lock() or with tasklist_lock read-held. */ int group_send_sig_info(int sig, struct siginfo *info, struct task_struct *p) { unsigned long flags; + struct sighand_struct *sp; int ret; +retry: ret = check_kill_permission(sig, info, p); - if (!ret && sig && p->sighand) { - spin_lock_irqsave(&p->sighand->siglock, flags); + if (!ret && sig && (sp = p->sighand)) { + if (!get_task_struct_rcu(p)) + return -ESRCH; + spin_lock_irqsave(&sp->siglock, flags); + if (p->sighand != sp) { + spin_unlock_irqrestore(&sp->siglock, flags); + put_task_struct(p); + goto retry; + } ret = __group_send_sig_info(sig, info, p); - spin_unlock_irqrestore(&p->sighand->siglock, flags); + spin_unlock_irqrestore(&sp->siglock, flags); + put_task_struct(p); } return ret; @@ -1136,14 +1158,21 @@ int kill_proc_info(int sig, struct siginfo *info, pid_t pid) { int error; + int acquired_tasklist_lock = 0; struct task_struct *p; - read_lock(&tasklist_lock); + rcu_read_lock(); + if (unlikely(sig_kernel_stop(sig) || sig == SIGCONT)) { + read_lock(&tasklist_lock); + acquired_tasklist_lock = 1; + } p = find_task_by_pid(pid); error = -ESRCH; if (p) error = group_send_sig_info(sig, info, p); - read_unlock(&tasklist_lock); + if (unlikely(acquired_tasklist_lock)) + read_unlock(&tasklist_lock); + rcu_read_unlock(); return error; } @@ -1355,16 +1384,54 @@ send_sigqueue(int sig, struct sigqueue *q, struct task_struct *p) { unsigned long flags; int ret = 0; + struct sighand_struct *sh; BUG_ON(!(q->flags & SIGQUEUE_PREALLOC)); - read_lock(&tasklist_lock); + + /* + * The rcu based delayed sighand destroy makes it possible to + * run this without tasklist lock held. The task struct itself + * cannot go away as create_timer did get_task_struct(). + * + * We return -1, when the task is marked exiting, so + * posix_timer_event can redirect it to the group leader + */ + rcu_read_lock(); if (unlikely(p->flags & PF_EXITING)) { ret = -1; goto out_err; } - spin_lock_irqsave(&p->sighand->siglock, flags); +retry: + sh = rcu_dereference(p->sighand); + + spin_lock_irqsave(&sh->siglock, flags); + if (p->sighand != sh) { + /* We raced with exec() in a multithreaded process... */ + spin_unlock_irqrestore(&sh->siglock, flags); + goto retry; + } + + /* + * We do the check here again to handle the following scenario: + * + * CPU 0 CPU 1 + * send_sigqueue + * check PF_EXITING + * interrupt exit code running + * __exit_signal + * lock sighand->siglock + * unlock sighand->siglock + * lock sh->siglock + * add(tsk->pending) flush_sigqueue(tsk->pending) + * + */ + + if (unlikely(p->flags & PF_EXITING)) { + ret = -1; + goto out; + } if (unlikely(!list_empty(&q->list))) { /* @@ -1388,9 +1455,9 @@ send_sigqueue(int sig, struct sigqueue *q, struct task_struct *p) signal_wake_up(p, sig == SIGKILL); out: - spin_unlock_irqrestore(&p->sighand->siglock, flags); + spin_unlock_irqrestore(&sh->siglock, flags); out_err: - read_unlock(&tasklist_lock); + rcu_read_unlock(); return ret; } @@ -1402,7 +1469,9 @@ send_group_sigqueue(int sig, struct sigqueue *q, struct task_struct *p) int ret = 0; BUG_ON(!(q->flags & SIGQUEUE_PREALLOC)); + read_lock(&tasklist_lock); + /* Since it_lock is held, p->sighand cannot be NULL. */ spin_lock_irqsave(&p->sighand->siglock, flags); handle_stop_signal(sig, p); @@ -1436,7 +1505,7 @@ send_group_sigqueue(int sig, struct sigqueue *q, struct task_struct *p) out: spin_unlock_irqrestore(&p->sighand->siglock, flags); read_unlock(&tasklist_lock); - return(ret); + return ret; } /* -- cgit v1.2.3-71-gd317 From d4829cd5b4bd1ea58ba1bebad44d562f4027c290 Mon Sep 17 00:00:00 2001 From: "Paul E. McKenney" Date: Sun, 8 Jan 2006 01:01:39 -0800 Subject: [PATCH] remove get_task_struct_rcu() The latest set of signal-RCU patches does not use get_task_struct_rcu(). Attached is a patch that removes it. Signed-off-by: "Paul E. McKenney" Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/sched.h | 12 ------------ 1 file changed, 12 deletions(-) (limited to 'include/linux') diff --git a/include/linux/sched.h b/include/linux/sched.h index a6af77e9b4cf..20bd70749104 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h @@ -878,18 +878,6 @@ extern void free_task(struct task_struct *tsk); extern void __put_task_struct(struct task_struct *tsk); #define get_task_struct(tsk) do { atomic_inc(&(tsk)->usage); } while(0) -static inline int get_task_struct_rcu(struct task_struct *t) -{ - int oldusage; - - do { - oldusage = atomic_read(&t->usage); - if (oldusage == 0) - return 0; - } while (cmpxchg(&t->usage.counter, oldusage, oldusage+1) != oldusage); - return 1; -} - extern void __put_task_struct_cb(struct rcu_head *rhp); static inline void put_task_struct(struct task_struct *t) -- cgit v1.2.3-71-gd317 From 10cef6029502915bdb3cf0821d425cf9dc30c817 Mon Sep 17 00:00:00 2001 From: Matt Mackall Date: Sun, 8 Jan 2006 01:01:45 -0800 Subject: [PATCH] slob: introduce the SLOB allocator configurable replacement for slab allocator This adds a CONFIG_SLAB option under CONFIG_EMBEDDED. When CONFIG_SLAB is disabled, the kernel falls back to using the 'SLOB' allocator. SLOB is a traditional K&R/UNIX allocator with a SLAB emulation layer, similar to the original Linux kmalloc allocator that SLAB replaced. It's signicantly smaller code and is more memory efficient. But like all similar allocators, it scales poorly and suffers from fragmentation more than SLAB, so it's only appropriate for small systems. It's been tested extensively in the Linux-tiny tree. I've also stress-tested it with make -j 8 compiles on a 3G SMP+PREEMPT box (not recommended). Here's a comparison for otherwise identical builds, showing SLOB saving nearly half a megabyte of RAM: $ size vmlinux* text data bss dec hex filename 3336372 529360 190812 4056544 3de5e0 vmlinux-slab 3323208 527948 190684 4041840 3dac70 vmlinux-slob $ size mm/{slab,slob}.o text data bss dec hex filename 13221 752 48 14021 36c5 mm/slab.o 1896 52 8 1956 7a4 mm/slob.o /proc/meminfo: SLAB SLOB delta MemTotal: 27964 kB 27980 kB +16 kB MemFree: 24596 kB 25092 kB +496 kB Buffers: 36 kB 36 kB 0 kB Cached: 1188 kB 1188 kB 0 kB SwapCached: 0 kB 0 kB 0 kB Active: 608 kB 600 kB -8 kB Inactive: 808 kB 812 kB +4 kB HighTotal: 0 kB 0 kB 0 kB HighFree: 0 kB 0 kB 0 kB LowTotal: 27964 kB 27980 kB +16 kB LowFree: 24596 kB 25092 kB +496 kB SwapTotal: 0 kB 0 kB 0 kB SwapFree: 0 kB 0 kB 0 kB Dirty: 4 kB 12 kB +8 kB Writeback: 0 kB 0 kB 0 kB Mapped: 560 kB 556 kB -4 kB Slab: 1756 kB 0 kB -1756 kB CommitLimit: 13980 kB 13988 kB +8 kB Committed_AS: 4208 kB 4208 kB 0 kB PageTables: 28 kB 28 kB 0 kB VmallocTotal: 1007312 kB 1007312 kB 0 kB VmallocUsed: 48 kB 48 kB 0 kB VmallocChunk: 1007264 kB 1007264 kB 0 kB (this work has been sponsored in part by CELF) From: Ingo Molnar Fix 32-bitness bugs in mm/slob.c. Signed-off-by: Matt Mackall Signed-off-by: Ingo Molnar Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- fs/proc/proc_misc.c | 4 + include/linux/slab.h | 35 +++++ init/Kconfig | 13 ++ mm/Makefile | 4 +- mm/slob.c | 385 +++++++++++++++++++++++++++++++++++++++++++++++++++ 5 files changed, 440 insertions(+), 1 deletion(-) create mode 100644 mm/slob.c (limited to 'include/linux') diff --git a/fs/proc/proc_misc.c b/fs/proc/proc_misc.c index 5b6b0b6038a7..63bf6c00fa0c 100644 --- a/fs/proc/proc_misc.c +++ b/fs/proc/proc_misc.c @@ -323,6 +323,7 @@ static struct file_operations proc_modules_operations = { }; #endif +#ifdef CONFIG_SLAB extern struct seq_operations slabinfo_op; extern ssize_t slabinfo_write(struct file *, const char __user *, size_t, loff_t *); static int slabinfo_open(struct inode *inode, struct file *file) @@ -336,6 +337,7 @@ static struct file_operations proc_slabinfo_operations = { .llseek = seq_lseek, .release = seq_release, }; +#endif static int show_stat(struct seq_file *p, void *v) { @@ -600,7 +602,9 @@ void __init proc_misc_init(void) create_seq_entry("partitions", 0, &proc_partitions_operations); create_seq_entry("stat", 0, &proc_stat_operations); create_seq_entry("interrupts", 0, &proc_interrupts_operations); +#ifdef CONFIG_SLAB create_seq_entry("slabinfo",S_IWUSR|S_IRUGO,&proc_slabinfo_operations); +#endif create_seq_entry("buddyinfo",S_IRUGO, &fragmentation_file_operations); create_seq_entry("vmstat",S_IRUGO, &proc_vmstat_file_operations); create_seq_entry("zoneinfo",S_IRUGO, &proc_zoneinfo_file_operations); diff --git a/include/linux/slab.h b/include/linux/slab.h index d1ea4051b996..1fb77a9cc148 100644 --- a/include/linux/slab.h +++ b/include/linux/slab.h @@ -53,6 +53,8 @@ typedef struct kmem_cache kmem_cache_t; #define SLAB_CTOR_ATOMIC 0x002UL /* tell constructor it can't sleep */ #define SLAB_CTOR_VERIFY 0x004UL /* tell constructor it's a verify call */ +#ifndef CONFIG_SLOB + /* prototypes */ extern void __init kmem_cache_init(void); @@ -134,6 +136,39 @@ static inline void *kmalloc_node(size_t size, gfp_t flags, int node) extern int FASTCALL(kmem_cache_reap(int)); extern int FASTCALL(kmem_ptr_validate(kmem_cache_t *cachep, void *ptr)); +#else /* CONFIG_SLOB */ + +/* SLOB allocator routines */ + +void kmem_cache_init(void); +struct kmem_cache *kmem_find_general_cachep(size_t, gfp_t gfpflags); +struct kmem_cache *kmem_cache_create(const char *c, size_t, size_t, + unsigned long, + void (*)(void *, struct kmem_cache *, unsigned long), + void (*)(void *, struct kmem_cache *, unsigned long)); +int kmem_cache_destroy(struct kmem_cache *c); +void *kmem_cache_alloc(struct kmem_cache *c, gfp_t flags); +void kmem_cache_free(struct kmem_cache *c, void *b); +const char *kmem_cache_name(struct kmem_cache *); +void *kmalloc(size_t size, gfp_t flags); +void *kzalloc(size_t size, gfp_t flags); +void kfree(const void *m); +unsigned int ksize(const void *m); +unsigned int kmem_cache_size(struct kmem_cache *c); + +static inline void *kcalloc(size_t n, size_t size, gfp_t flags) +{ + return kzalloc(n * size, flags); +} + +#define kmem_cache_shrink(d) (0) +#define kmem_cache_reap(a) +#define kmem_ptr_validate(a, b) (0) +#define kmem_cache_alloc_node(c, f, n) kmem_cache_alloc(c, f) +#define kmalloc_node(s, f, n) kmalloc(s, f) + +#endif /* CONFIG_SLOB */ + /* System wide caches */ extern kmem_cache_t *vm_area_cachep; extern kmem_cache_t *names_cachep; diff --git a/init/Kconfig b/init/Kconfig index ba42f3793a84..0c9932f9f06b 100644 --- a/init/Kconfig +++ b/init/Kconfig @@ -380,6 +380,15 @@ config CC_ALIGN_JUMPS no dummy operations need be executed. Zero means use compiler's default. +config SLAB + default y + bool "Use full SLAB allocator" if EMBEDDED + help + Disabling this replaces the advanced SLAB allocator and + kmalloc support with the drastically simpler SLOB allocator. + SLOB is more space efficient but does not scale well and is + more susceptible to fragmentation. + endmenu # General setup config TINY_SHMEM @@ -391,6 +400,10 @@ config BASE_SMALL default 0 if BASE_FULL default 1 if !BASE_FULL +config SLOB + default !SLAB + bool + menu "Loadable module support" config MODULES diff --git a/mm/Makefile b/mm/Makefile index 74c85ddc9176..9aa03fa1dcc3 100644 --- a/mm/Makefile +++ b/mm/Makefile @@ -9,7 +9,7 @@ mmu-$(CONFIG_MMU) := fremap.o highmem.o madvise.o memory.o mincore.o \ obj-y := bootmem.o filemap.o mempool.o oom_kill.o fadvise.o \ page_alloc.o page-writeback.o pdflush.o \ - readahead.o slab.o swap.o truncate.o vmscan.o \ + readahead.o swap.o truncate.o vmscan.o \ prio_tree.o util.o $(mmu-y) obj-$(CONFIG_SWAP) += page_io.o swap_state.o swapfile.o thrash.o @@ -18,5 +18,7 @@ obj-$(CONFIG_NUMA) += mempolicy.o obj-$(CONFIG_SPARSEMEM) += sparse.o obj-$(CONFIG_SHMEM) += shmem.o obj-$(CONFIG_TINY_SHMEM) += tiny-shmem.o +obj-$(CONFIG_SLOB) += slob.o +obj-$(CONFIG_SLAB) += slab.o obj-$(CONFIG_MEMORY_HOTPLUG) += memory_hotplug.o obj-$(CONFIG_FS_XIP) += filemap_xip.o diff --git a/mm/slob.c b/mm/slob.c new file mode 100644 index 000000000000..1c240c4b71d9 --- /dev/null +++ b/mm/slob.c @@ -0,0 +1,385 @@ +/* + * SLOB Allocator: Simple List Of Blocks + * + * Matt Mackall 12/30/03 + * + * How SLOB works: + * + * The core of SLOB is a traditional K&R style heap allocator, with + * support for returning aligned objects. The granularity of this + * allocator is 8 bytes on x86, though it's perhaps possible to reduce + * this to 4 if it's deemed worth the effort. The slob heap is a + * singly-linked list of pages from __get_free_page, grown on demand + * and allocation from the heap is currently first-fit. + * + * Above this is an implementation of kmalloc/kfree. Blocks returned + * from kmalloc are 8-byte aligned and prepended with a 8-byte header. + * If kmalloc is asked for objects of PAGE_SIZE or larger, it calls + * __get_free_pages directly so that it can return page-aligned blocks + * and keeps a linked list of such pages and their orders. These + * objects are detected in kfree() by their page alignment. + * + * SLAB is emulated on top of SLOB by simply calling constructors and + * destructors for every SLAB allocation. Objects are returned with + * the 8-byte alignment unless the SLAB_MUST_HWCACHE_ALIGN flag is + * set, in which case the low-level allocator will fragment blocks to + * create the proper alignment. Again, objects of page-size or greater + * are allocated by calling __get_free_pages. As SLAB objects know + * their size, no separate size bookkeeping is necessary and there is + * essentially no allocation space overhead. + */ + +#include +#include +#include +#include +#include +#include +#include + +struct slob_block { + int units; + struct slob_block *next; +}; +typedef struct slob_block slob_t; + +#define SLOB_UNIT sizeof(slob_t) +#define SLOB_UNITS(size) (((size) + SLOB_UNIT - 1)/SLOB_UNIT) +#define SLOB_ALIGN L1_CACHE_BYTES + +struct bigblock { + int order; + void *pages; + struct bigblock *next; +}; +typedef struct bigblock bigblock_t; + +static slob_t arena = { .next = &arena, .units = 1 }; +static slob_t *slobfree = &arena; +static bigblock_t *bigblocks; +static DEFINE_SPINLOCK(slob_lock); +static DEFINE_SPINLOCK(block_lock); + +static void slob_free(void *b, int size); + +static void *slob_alloc(size_t size, gfp_t gfp, int align) +{ + slob_t *prev, *cur, *aligned = 0; + int delta = 0, units = SLOB_UNITS(size); + unsigned long flags; + + spin_lock_irqsave(&slob_lock, flags); + prev = slobfree; + for (cur = prev->next; ; prev = cur, cur = cur->next) { + if (align) { + aligned = (slob_t *)ALIGN((unsigned long)cur, align); + delta = aligned - cur; + } + if (cur->units >= units + delta) { /* room enough? */ + if (delta) { /* need to fragment head to align? */ + aligned->units = cur->units - delta; + aligned->next = cur->next; + cur->next = aligned; + cur->units = delta; + prev = cur; + cur = aligned; + } + + if (cur->units == units) /* exact fit? */ + prev->next = cur->next; /* unlink */ + else { /* fragment */ + prev->next = cur + units; + prev->next->units = cur->units - units; + prev->next->next = cur->next; + cur->units = units; + } + + slobfree = prev; + spin_unlock_irqrestore(&slob_lock, flags); + return cur; + } + if (cur == slobfree) { + spin_unlock_irqrestore(&slob_lock, flags); + + if (size == PAGE_SIZE) /* trying to shrink arena? */ + return 0; + + cur = (slob_t *)__get_free_page(gfp); + if (!cur) + return 0; + + slob_free(cur, PAGE_SIZE); + spin_lock_irqsave(&slob_lock, flags); + cur = slobfree; + } + } +} + +static void slob_free(void *block, int size) +{ + slob_t *cur, *b = (slob_t *)block; + unsigned long flags; + + if (!block) + return; + + if (size) + b->units = SLOB_UNITS(size); + + /* Find reinsertion point */ + spin_lock_irqsave(&slob_lock, flags); + for (cur = slobfree; !(b > cur && b < cur->next); cur = cur->next) + if (cur >= cur->next && (b > cur || b < cur->next)) + break; + + if (b + b->units == cur->next) { + b->units += cur->next->units; + b->next = cur->next->next; + } else + b->next = cur->next; + + if (cur + cur->units == b) { + cur->units += b->units; + cur->next = b->next; + } else + cur->next = b; + + slobfree = cur; + + spin_unlock_irqrestore(&slob_lock, flags); +} + +static int FASTCALL(find_order(int size)); +static int fastcall find_order(int size) +{ + int order = 0; + for ( ; size > 4096 ; size >>=1) + order++; + return order; +} + +void *kmalloc(size_t size, gfp_t gfp) +{ + slob_t *m; + bigblock_t *bb; + unsigned long flags; + + if (size < PAGE_SIZE - SLOB_UNIT) { + m = slob_alloc(size + SLOB_UNIT, gfp, 0); + return m ? (void *)(m + 1) : 0; + } + + bb = slob_alloc(sizeof(bigblock_t), gfp, 0); + if (!bb) + return 0; + + bb->order = find_order(size); + bb->pages = (void *)__get_free_pages(gfp, bb->order); + + if (bb->pages) { + spin_lock_irqsave(&block_lock, flags); + bb->next = bigblocks; + bigblocks = bb; + spin_unlock_irqrestore(&block_lock, flags); + return bb->pages; + } + + slob_free(bb, sizeof(bigblock_t)); + return 0; +} + +EXPORT_SYMBOL(kmalloc); + +void kfree(const void *block) +{ + bigblock_t *bb, **last = &bigblocks; + unsigned long flags; + + if (!block) + return; + + if (!((unsigned long)block & (PAGE_SIZE-1))) { + /* might be on the big block list */ + spin_lock_irqsave(&block_lock, flags); + for (bb = bigblocks; bb; last = &bb->next, bb = bb->next) { + if (bb->pages == block) { + *last = bb->next; + spin_unlock_irqrestore(&block_lock, flags); + free_pages((unsigned long)block, bb->order); + slob_free(bb, sizeof(bigblock_t)); + return; + } + } + spin_unlock_irqrestore(&block_lock, flags); + } + + slob_free((slob_t *)block - 1, 0); + return; +} + +EXPORT_SYMBOL(kfree); + +unsigned int ksize(const void *block) +{ + bigblock_t *bb; + unsigned long flags; + + if (!block) + return 0; + + if (!((unsigned long)block & (PAGE_SIZE-1))) { + spin_lock_irqsave(&block_lock, flags); + for (bb = bigblocks; bb; bb = bb->next) + if (bb->pages == block) { + spin_unlock_irqrestore(&slob_lock, flags); + return PAGE_SIZE << bb->order; + } + spin_unlock_irqrestore(&block_lock, flags); + } + + return ((slob_t *)block - 1)->units * SLOB_UNIT; +} + +struct kmem_cache { + unsigned int size, align; + const char *name; + void (*ctor)(void *, struct kmem_cache *, unsigned long); + void (*dtor)(void *, struct kmem_cache *, unsigned long); +}; + +struct kmem_cache *kmem_cache_create(const char *name, size_t size, + size_t align, unsigned long flags, + void (*ctor)(void*, struct kmem_cache *, unsigned long), + void (*dtor)(void*, struct kmem_cache *, unsigned long)) +{ + struct kmem_cache *c; + + c = slob_alloc(sizeof(struct kmem_cache), flags, 0); + + if (c) { + c->name = name; + c->size = size; + c->ctor = ctor; + c->dtor = dtor; + /* ignore alignment unless it's forced */ + c->align = (flags & SLAB_MUST_HWCACHE_ALIGN) ? SLOB_ALIGN : 0; + if (c->align < align) + c->align = align; + } + + return c; +} +EXPORT_SYMBOL(kmem_cache_create); + +int kmem_cache_destroy(struct kmem_cache *c) +{ + slob_free(c, sizeof(struct kmem_cache)); + return 0; +} +EXPORT_SYMBOL(kmem_cache_destroy); + +void *kmem_cache_alloc(struct kmem_cache *c, gfp_t flags) +{ + void *b; + + if (c->size < PAGE_SIZE) + b = slob_alloc(c->size, flags, c->align); + else + b = (void *)__get_free_pages(flags, find_order(c->size)); + + if (c->ctor) + c->ctor(b, c, SLAB_CTOR_CONSTRUCTOR); + + return b; +} +EXPORT_SYMBOL(kmem_cache_alloc); + +void kmem_cache_free(struct kmem_cache *c, void *b) +{ + if (c->dtor) + c->dtor(b, c, 0); + + if (c->size < PAGE_SIZE) + slob_free(b, c->size); + else + free_pages((unsigned long)b, find_order(c->size)); +} +EXPORT_SYMBOL(kmem_cache_free); + +unsigned int kmem_cache_size(struct kmem_cache *c) +{ + return c->size; +} +EXPORT_SYMBOL(kmem_cache_size); + +const char *kmem_cache_name(struct kmem_cache *c) +{ + return c->name; +} +EXPORT_SYMBOL(kmem_cache_name); + +static struct timer_list slob_timer = TIMER_INITIALIZER( + (void (*)(unsigned long))kmem_cache_init, 0, 0); + +void kmem_cache_init(void) +{ + void *p = slob_alloc(PAGE_SIZE, 0, PAGE_SIZE-1); + + if (p) + free_page((unsigned long)p); + + mod_timer(&slob_timer, jiffies + HZ); +} + +atomic_t slab_reclaim_pages = ATOMIC_INIT(0); +EXPORT_SYMBOL(slab_reclaim_pages); + +#ifdef CONFIG_SMP + +void *__alloc_percpu(size_t size, size_t align) +{ + int i; + struct percpu_data *pdata = kmalloc(sizeof (*pdata), GFP_KERNEL); + + if (!pdata) + return NULL; + + for (i = 0; i < NR_CPUS; i++) { + if (!cpu_possible(i)) + continue; + pdata->ptrs[i] = kmalloc(size, GFP_KERNEL); + if (!pdata->ptrs[i]) + goto unwind_oom; + memset(pdata->ptrs[i], 0, size); + } + + /* Catch derefs w/o wrappers */ + return (void *) (~(unsigned long) pdata); + +unwind_oom: + while (--i >= 0) { + if (!cpu_possible(i)) + continue; + kfree(pdata->ptrs[i]); + } + kfree(pdata); + return NULL; +} +EXPORT_SYMBOL(__alloc_percpu); + +void +free_percpu(const void *objp) +{ + int i; + struct percpu_data *p = (struct percpu_data *) (~(unsigned long) objp); + + for (i = 0; i < NR_CPUS; i++) { + if (!cpu_possible(i)) + continue; + kfree(p->ptrs[i]); + } + kfree(p); +} +EXPORT_SYMBOL(free_percpu); + +#endif -- cgit v1.2.3-71-gd317 From 5966514db662fb24c9bb43226a80106bcffd51f8 Mon Sep 17 00:00:00 2001 From: Paul Jackson Date: Sun, 8 Jan 2006 01:01:47 -0800 Subject: [PATCH] cpuset: mempolicy one more nodemask conversion Finish converting mm/mempolicy.c from bitmaps to nodemasks. The previous conversion had left one routine using bitmaps, since it involved a corresponding change to kernel/cpuset.c Fix that interface by replacing with a simple macro that calls nodes_subset(), or if !CONFIG_CPUSET, returns (1). Signed-off-by: Paul Jackson Cc: Christoph Lameter Cc: Andi Kleen Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/cpuset.h | 5 +++-- kernel/cpuset.c | 10 ---------- mm/mempolicy.c | 5 ++--- 3 files changed, 5 insertions(+), 15 deletions(-) (limited to 'include/linux') diff --git a/include/linux/cpuset.h b/include/linux/cpuset.h index 6e2deef96b34..8b21786490ee 100644 --- a/include/linux/cpuset.h +++ b/include/linux/cpuset.h @@ -21,7 +21,8 @@ extern void cpuset_exit(struct task_struct *p); extern cpumask_t cpuset_cpus_allowed(const struct task_struct *p); void cpuset_init_current_mems_allowed(void); void cpuset_update_current_mems_allowed(void); -void cpuset_restrict_to_mems_allowed(unsigned long *nodes); +#define cpuset_nodes_subset_current_mems_allowed(nodes) \ + nodes_subset((nodes), current->mems_allowed) int cpuset_zonelist_valid_mems_allowed(struct zonelist *zl); extern int cpuset_zone_allowed(struct zone *z, gfp_t gfp_mask); extern int cpuset_excl_nodes_overlap(const struct task_struct *p); @@ -42,7 +43,7 @@ static inline cpumask_t cpuset_cpus_allowed(struct task_struct *p) static inline void cpuset_init_current_mems_allowed(void) {} static inline void cpuset_update_current_mems_allowed(void) {} -static inline void cpuset_restrict_to_mems_allowed(unsigned long *nodes) {} +#define cpuset_nodes_subset_current_mems_allowed(nodes) (1) static inline int cpuset_zonelist_valid_mems_allowed(struct zonelist *zl) { diff --git a/kernel/cpuset.c b/kernel/cpuset.c index f63383e01ec7..6503c6da4c4f 100644 --- a/kernel/cpuset.c +++ b/kernel/cpuset.c @@ -1749,16 +1749,6 @@ done: refresh_mems(); } -/** - * cpuset_restrict_to_mems_allowed - limit nodes to current mems_allowed - * @nodes: pointer to a node bitmap that is and-ed with mems_allowed - */ -void cpuset_restrict_to_mems_allowed(unsigned long *nodes) -{ - bitmap_and(nodes, nodes, nodes_addr(current->mems_allowed), - MAX_NUMNODES); -} - /** * cpuset_zonelist_valid_mems_allowed - check zonelist vs. curremt mems_allowed * @zl: the zonelist to be checked diff --git a/mm/mempolicy.c b/mm/mempolicy.c index 7051fe450e96..9dea2b8a7d48 100644 --- a/mm/mempolicy.c +++ b/mm/mempolicy.c @@ -387,10 +387,9 @@ static int contextualize_policy(int mode, nodemask_t *nodes) if (!nodes) return 0; - /* Update current mems_allowed */ cpuset_update_current_mems_allowed(); - /* Ignore nodes not set in current->mems_allowed */ - cpuset_restrict_to_mems_allowed(nodes->bits); + if (!cpuset_nodes_subset_current_mems_allowed(*nodes)) + return -EINVAL; return mpol_check_policy(mode, nodes); } -- cgit v1.2.3-71-gd317 From 3e0d98b9f1eb757fc98efc84e74e54a08308aa73 Mon Sep 17 00:00:00 2001 From: Paul Jackson Date: Sun, 8 Jan 2006 01:01:49 -0800 Subject: [PATCH] cpuset: memory pressure meter Provide a simple per-cpuset metric of memory pressure, tracking the -rate- that the tasks in a cpuset call try_to_free_pages(), the synchronous (direct) memory reclaim code. This enables batch managers monitoring jobs running in dedicated cpusets to efficiently detect what level of memory pressure that job is causing. This is useful both on tightly managed systems running a wide mix of submitted jobs, which may choose to terminate or reprioritize jobs that are trying to use more memory than allowed on the nodes assigned them, and with tightly coupled, long running, massively parallel scientific computing jobs that will dramatically fail to meet required performance goals if they start to use more memory than allowed to them. This patch just provides a very economical way for the batch manager to monitor a cpuset for signs of memory pressure. It's up to the batch manager or other user code to decide what to do about it and take action. ==> Unless this feature is enabled by writing "1" to the special file /dev/cpuset/memory_pressure_enabled, the hook in the rebalance code of __alloc_pages() for this metric reduces to simply noticing that the cpuset_memory_pressure_enabled flag is zero. So only systems that enable this feature will compute the metric. Why a per-cpuset, running average: Because this meter is per-cpuset, rather than per-task or mm, the system load imposed by a batch scheduler monitoring this metric is sharply reduced on large systems, because a scan of the tasklist can be avoided on each set of queries. Because this meter is a running average, instead of an accumulating counter, a batch scheduler can detect memory pressure with a single read, instead of having to read and accumulate results for a period of time. Because this meter is per-cpuset rather than per-task or mm, the batch scheduler can obtain the key information, memory pressure in a cpuset, with a single read, rather than having to query and accumulate results over all the (dynamically changing) set of tasks in the cpuset. A per-cpuset simple digital filter (requires a spinlock and 3 words of data per-cpuset) is kept, and updated by any task attached to that cpuset, if it enters the synchronous (direct) page reclaim code. A per-cpuset file provides an integer number representing the recent (half-life of 10 seconds) rate of direct page reclaims caused by the tasks in the cpuset, in units of reclaims attempted per second, times 1000. Signed-off-by: Paul Jackson Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/cpuset.h | 11 +++ kernel/cpuset.c | 193 ++++++++++++++++++++++++++++++++++++++++++++++++- mm/page_alloc.c | 1 + 3 files changed, 203 insertions(+), 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/cpuset.h b/include/linux/cpuset.h index 8b21786490ee..736d73801cb6 100644 --- a/include/linux/cpuset.h +++ b/include/linux/cpuset.h @@ -26,6 +26,15 @@ void cpuset_update_current_mems_allowed(void); int cpuset_zonelist_valid_mems_allowed(struct zonelist *zl); extern int cpuset_zone_allowed(struct zone *z, gfp_t gfp_mask); extern int cpuset_excl_nodes_overlap(const struct task_struct *p); + +#define cpuset_memory_pressure_bump() \ + do { \ + if (cpuset_memory_pressure_enabled) \ + __cpuset_memory_pressure_bump(); \ + } while (0) +extern int cpuset_memory_pressure_enabled; +extern void __cpuset_memory_pressure_bump(void); + extern struct file_operations proc_cpuset_operations; extern char *cpuset_task_status_allowed(struct task_struct *task, char *buffer); @@ -60,6 +69,8 @@ static inline int cpuset_excl_nodes_overlap(const struct task_struct *p) return 1; } +static inline void cpuset_memory_pressure_bump(void) {} + static inline char *cpuset_task_status_allowed(struct task_struct *task, char *buffer) { diff --git a/kernel/cpuset.c b/kernel/cpuset.c index 6503c6da4c4f..5a06fef669f8 100644 --- a/kernel/cpuset.c +++ b/kernel/cpuset.c @@ -56,6 +56,15 @@ #define CPUSET_SUPER_MAGIC 0x27e0eb +/* See "Frequency meter" comments, below. */ + +struct fmeter { + int cnt; /* unprocessed events count */ + int val; /* most recent output value */ + time_t time; /* clock (secs) when val computed */ + spinlock_t lock; /* guards read or write of above */ +}; + struct cpuset { unsigned long flags; /* "unsigned long" so bitops work */ cpumask_t cpus_allowed; /* CPUs allowed to tasks in cpuset */ @@ -80,7 +89,9 @@ struct cpuset { * Copy of global cpuset_mems_generation as of the most * recent time this cpuset changed its mems_allowed. */ - int mems_generation; + int mems_generation; + + struct fmeter fmeter; /* memory_pressure filter */ }; /* bits in struct cpuset flags field */ @@ -149,7 +160,7 @@ static struct cpuset top_cpuset = { }; static struct vfsmount *cpuset_mount; -static struct super_block *cpuset_sb = NULL; +static struct super_block *cpuset_sb; /* * We have two global cpuset semaphores below. They can nest. @@ -806,6 +817,19 @@ static int update_nodemask(struct cpuset *cs, char *buf) return retval; } +/* + * Call with manage_sem held. + */ + +static int update_memory_pressure_enabled(struct cpuset *cs, char *buf) +{ + if (simple_strtoul(buf, NULL, 10) != 0) + cpuset_memory_pressure_enabled = 1; + else + cpuset_memory_pressure_enabled = 0; + return 0; +} + /* * update_flag - read a 0 or a 1 in a file and update associated flag * bit: the bit to update (CS_CPU_EXCLUSIVE, CS_MEM_EXCLUSIVE, @@ -847,6 +871,104 @@ static int update_flag(cpuset_flagbits_t bit, struct cpuset *cs, char *buf) return 0; } +/* + * Frequency meter - How fast is some event occuring? + * + * These routines manage a digitally filtered, constant time based, + * event frequency meter. There are four routines: + * fmeter_init() - initialize a frequency meter. + * fmeter_markevent() - called each time the event happens. + * fmeter_getrate() - returns the recent rate of such events. + * fmeter_update() - internal routine used to update fmeter. + * + * A common data structure is passed to each of these routines, + * which is used to keep track of the state required to manage the + * frequency meter and its digital filter. + * + * The filter works on the number of events marked per unit time. + * The filter is single-pole low-pass recursive (IIR). The time unit + * is 1 second. Arithmetic is done using 32-bit integers scaled to + * simulate 3 decimal digits of precision (multiplied by 1000). + * + * With an FM_COEF of 933, and a time base of 1 second, the filter + * has a half-life of 10 seconds, meaning that if the events quit + * happening, then the rate returned from the fmeter_getrate() + * will be cut in half each 10 seconds, until it converges to zero. + * + * It is not worth doing a real infinitely recursive filter. If more + * than FM_MAXTICKS ticks have elapsed since the last filter event, + * just compute FM_MAXTICKS ticks worth, by which point the level + * will be stable. + * + * Limit the count of unprocessed events to FM_MAXCNT, so as to avoid + * arithmetic overflow in the fmeter_update() routine. + * + * Given the simple 32 bit integer arithmetic used, this meter works + * best for reporting rates between one per millisecond (msec) and + * one per 32 (approx) seconds. At constant rates faster than one + * per msec it maxes out at values just under 1,000,000. At constant + * rates between one per msec, and one per second it will stabilize + * to a value N*1000, where N is the rate of events per second. + * At constant rates between one per second and one per 32 seconds, + * it will be choppy, moving up on the seconds that have an event, + * and then decaying until the next event. At rates slower than + * about one in 32 seconds, it decays all the way back to zero between + * each event. + */ + +#define FM_COEF 933 /* coefficient for half-life of 10 secs */ +#define FM_MAXTICKS ((time_t)99) /* useless computing more ticks than this */ +#define FM_MAXCNT 1000000 /* limit cnt to avoid overflow */ +#define FM_SCALE 1000 /* faux fixed point scale */ + +/* Initialize a frequency meter */ +static void fmeter_init(struct fmeter *fmp) +{ + fmp->cnt = 0; + fmp->val = 0; + fmp->time = 0; + spin_lock_init(&fmp->lock); +} + +/* Internal meter update - process cnt events and update value */ +static void fmeter_update(struct fmeter *fmp) +{ + time_t now = get_seconds(); + time_t ticks = now - fmp->time; + + if (ticks == 0) + return; + + ticks = min(FM_MAXTICKS, ticks); + while (ticks-- > 0) + fmp->val = (FM_COEF * fmp->val) / FM_SCALE; + fmp->time = now; + + fmp->val += ((FM_SCALE - FM_COEF) * fmp->cnt) / FM_SCALE; + fmp->cnt = 0; +} + +/* Process any previous ticks, then bump cnt by one (times scale). */ +static void fmeter_markevent(struct fmeter *fmp) +{ + spin_lock(&fmp->lock); + fmeter_update(fmp); + fmp->cnt = min(FM_MAXCNT, fmp->cnt + FM_SCALE); + spin_unlock(&fmp->lock); +} + +/* Process any previous ticks, then return current value. */ +static int fmeter_getrate(struct fmeter *fmp) +{ + int val; + + spin_lock(&fmp->lock); + fmeter_update(fmp); + val = fmp->val; + spin_unlock(&fmp->lock); + return val; +} + /* * Attack task specified by pid in 'pidbuf' to cpuset 'cs', possibly * writing the path of the old cpuset in 'ppathbuf' if it needs to be @@ -931,6 +1053,8 @@ typedef enum { FILE_CPU_EXCLUSIVE, FILE_MEM_EXCLUSIVE, FILE_NOTIFY_ON_RELEASE, + FILE_MEMORY_PRESSURE_ENABLED, + FILE_MEMORY_PRESSURE, FILE_TASKLIST, } cpuset_filetype_t; @@ -984,6 +1108,12 @@ static ssize_t cpuset_common_file_write(struct file *file, const char __user *us case FILE_MEMORY_MIGRATE: retval = update_flag(CS_MEMORY_MIGRATE, cs, buffer); break; + case FILE_MEMORY_PRESSURE_ENABLED: + retval = update_memory_pressure_enabled(cs, buffer); + break; + case FILE_MEMORY_PRESSURE: + retval = -EACCES; + break; case FILE_TASKLIST: retval = attach_task(cs, buffer, &pathbuf); break; @@ -1087,6 +1217,12 @@ static ssize_t cpuset_common_file_read(struct file *file, char __user *buf, case FILE_MEMORY_MIGRATE: *s++ = is_memory_migrate(cs) ? '1' : '0'; break; + case FILE_MEMORY_PRESSURE_ENABLED: + *s++ = cpuset_memory_pressure_enabled ? '1' : '0'; + break; + case FILE_MEMORY_PRESSURE: + s += sprintf(s, "%d", fmeter_getrate(&cs->fmeter)); + break; default: retval = -EINVAL; goto out; @@ -1440,6 +1576,16 @@ static struct cftype cft_memory_migrate = { .private = FILE_MEMORY_MIGRATE, }; +static struct cftype cft_memory_pressure_enabled = { + .name = "memory_pressure_enabled", + .private = FILE_MEMORY_PRESSURE_ENABLED, +}; + +static struct cftype cft_memory_pressure = { + .name = "memory_pressure", + .private = FILE_MEMORY_PRESSURE, +}; + static int cpuset_populate_dir(struct dentry *cs_dentry) { int err; @@ -1456,6 +1602,8 @@ static int cpuset_populate_dir(struct dentry *cs_dentry) return err; if ((err = cpuset_add_file(cs_dentry, &cft_memory_migrate)) < 0) return err; + if ((err = cpuset_add_file(cs_dentry, &cft_memory_pressure)) < 0) + return err; if ((err = cpuset_add_file(cs_dentry, &cft_tasks)) < 0) return err; return 0; @@ -1491,6 +1639,7 @@ static long cpuset_create(struct cpuset *parent, const char *name, int mode) INIT_LIST_HEAD(&cs->children); atomic_inc(&cpuset_mems_generation); cs->mems_generation = atomic_read(&cpuset_mems_generation); + fmeter_init(&cs->fmeter); cs->parent = parent; @@ -1580,6 +1729,7 @@ int __init cpuset_init(void) top_cpuset.cpus_allowed = CPU_MASK_ALL; top_cpuset.mems_allowed = NODE_MASK_ALL; + fmeter_init(&top_cpuset.fmeter); atomic_inc(&cpuset_mems_generation); top_cpuset.mems_generation = atomic_read(&cpuset_mems_generation); @@ -1601,6 +1751,9 @@ int __init cpuset_init(void) top_cpuset.dentry = root; root->d_inode->i_op = &cpuset_dir_inode_operations; err = cpuset_populate_dir(root); + /* memory_pressure_enabled is in root cpuset only */ + if (err == 0) + err = cpuset_add_file(root, &cft_memory_pressure_enabled); out: return err; } @@ -1890,6 +2043,42 @@ done: return overlap; } +/* + * Collection of memory_pressure is suppressed unless + * this flag is enabled by writing "1" to the special + * cpuset file 'memory_pressure_enabled' in the root cpuset. + */ + +int cpuset_memory_pressure_enabled; + +/** + * cpuset_memory_pressure_bump - keep stats of per-cpuset reclaims. + * + * Keep a running average of the rate of synchronous (direct) + * page reclaim efforts initiated by tasks in each cpuset. + * + * This represents the rate at which some task in the cpuset + * ran low on memory on all nodes it was allowed to use, and + * had to enter the kernels page reclaim code in an effort to + * create more free memory by tossing clean pages or swapping + * or writing dirty pages. + * + * Display to user space in the per-cpuset read-only file + * "memory_pressure". Value displayed is an integer + * representing the recent rate of entry into the synchronous + * (direct) page reclaim by any task attached to the cpuset. + **/ + +void __cpuset_memory_pressure_bump(void) +{ + struct cpuset *cs; + + task_lock(current); + cs = current->cpuset; + fmeter_markevent(&cs->fmeter); + task_unlock(current); +} + /* * proc_cpuset_show() * - Print tasks cpuset path into seq_file. diff --git a/mm/page_alloc.c b/mm/page_alloc.c index ad3d0202cdef..e0e84924171b 100644 --- a/mm/page_alloc.c +++ b/mm/page_alloc.c @@ -976,6 +976,7 @@ rebalance: cond_resched(); /* We now go into synchronous reclaim */ + cpuset_memory_pressure_bump(); p->flags |= PF_MEMALLOC; reclaim_state.reclaimed_slab = 0; p->reclaim_state = &reclaim_state; -- cgit v1.2.3-71-gd317 From cf2a473c4089aa41c26f653200673f5a4cc25047 Mon Sep 17 00:00:00 2001 From: Paul Jackson Date: Sun, 8 Jan 2006 01:01:54 -0800 Subject: [PATCH] cpuset: combine refresh_mems and update_mems The important code paths through alloc_pages_current() and alloc_page_vma(), by which most kernel page allocations go, both called cpuset_update_current_mems_allowed(), which in turn called refresh_mems(). -Both- of these latter two routines did a tasklock, got the tasks cpuset pointer, and checked for out of date cpuset->mems_generation. That was a silly duplication of code and waste of CPU cycles on an important code path. Consolidated those two routines into a single routine, called cpuset_update_task_memory_state(), since it updates more than just mems_allowed. Changed all callers of either routine to call the new consolidated routine. Signed-off-by: Paul Jackson Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/cpuset.h | 4 +-- kernel/cpuset.c | 95 ++++++++++++++++++++++---------------------------- mm/mempolicy.c | 10 +++--- 3 files changed, 48 insertions(+), 61 deletions(-) (limited to 'include/linux') diff --git a/include/linux/cpuset.h b/include/linux/cpuset.h index 736d73801cb6..1feebf16ab08 100644 --- a/include/linux/cpuset.h +++ b/include/linux/cpuset.h @@ -20,7 +20,7 @@ extern void cpuset_fork(struct task_struct *p); extern void cpuset_exit(struct task_struct *p); extern cpumask_t cpuset_cpus_allowed(const struct task_struct *p); void cpuset_init_current_mems_allowed(void); -void cpuset_update_current_mems_allowed(void); +void cpuset_update_task_memory_state(void); #define cpuset_nodes_subset_current_mems_allowed(nodes) \ nodes_subset((nodes), current->mems_allowed) int cpuset_zonelist_valid_mems_allowed(struct zonelist *zl); @@ -51,7 +51,7 @@ static inline cpumask_t cpuset_cpus_allowed(struct task_struct *p) } static inline void cpuset_init_current_mems_allowed(void) {} -static inline void cpuset_update_current_mems_allowed(void) {} +static inline void cpuset_update_task_memory_state(void) {} #define cpuset_nodes_subset_current_mems_allowed(nodes) (1) static inline int cpuset_zonelist_valid_mems_allowed(struct zonelist *zl) diff --git a/kernel/cpuset.c b/kernel/cpuset.c index d9349cc48b95..e9917d71628a 100644 --- a/kernel/cpuset.c +++ b/kernel/cpuset.c @@ -584,13 +584,26 @@ static void guarantee_online_mems(const struct cpuset *cs, nodemask_t *pmask) BUG_ON(!nodes_intersects(*pmask, node_online_map)); } -/* - * Refresh current tasks mems_allowed and mems_generation from current - * tasks cpuset. +/** + * cpuset_update_task_memory_state - update task memory placement * - * Call without callback_sem or task_lock() held. May be called with - * or without manage_sem held. Will acquire task_lock() and might - * acquire callback_sem during call. + * If the current tasks cpusets mems_allowed changed behind our + * backs, update current->mems_allowed, mems_generation and task NUMA + * mempolicy to the new value. + * + * Task mempolicy is updated by rebinding it relative to the + * current->cpuset if a task has its memory placement changed. + * Do not call this routine if in_interrupt(). + * + * Call without callback_sem or task_lock() held. May be called + * with or without manage_sem held. Except in early boot or + * an exiting task, when tsk->cpuset is NULL, this routine will + * acquire task_lock(). We don't need to use task_lock to guard + * against another task changing a non-NULL cpuset pointer to NULL, + * as that is only done by a task on itself, and if the current task + * is here, it is not simultaneously in the exit code NULL'ing its + * cpuset pointer. This routine also might acquire callback_sem and + * current->mm->mmap_sem during call. * * The task_lock() is required to dereference current->cpuset safely. * Without it, we could pick up the pointer value of current->cpuset @@ -605,32 +618,36 @@ static void guarantee_online_mems(const struct cpuset *cs, nodemask_t *pmask) * task has been modifying its cpuset. */ -static void refresh_mems(void) +void cpuset_update_task_memory_state() { int my_cpusets_mem_gen; + struct task_struct *tsk = current; + struct cpuset *cs = tsk->cpuset; - task_lock(current); - my_cpusets_mem_gen = current->cpuset->mems_generation; - task_unlock(current); + if (unlikely(!cs)) + return; + + task_lock(tsk); + my_cpusets_mem_gen = cs->mems_generation; + task_unlock(tsk); - if (current->cpuset_mems_generation != my_cpusets_mem_gen) { - struct cpuset *cs; - nodemask_t oldmem = current->mems_allowed; + if (my_cpusets_mem_gen != tsk->cpuset_mems_generation) { + nodemask_t oldmem = tsk->mems_allowed; int migrate; down(&callback_sem); - task_lock(current); - cs = current->cpuset; + task_lock(tsk); + cs = tsk->cpuset; /* Maybe changed when task not locked */ migrate = is_memory_migrate(cs); - guarantee_online_mems(cs, ¤t->mems_allowed); - current->cpuset_mems_generation = cs->mems_generation; - task_unlock(current); + guarantee_online_mems(cs, &tsk->mems_allowed); + tsk->cpuset_mems_generation = cs->mems_generation; + task_unlock(tsk); up(&callback_sem); - if (!nodes_equal(oldmem, current->mems_allowed)) { - numa_policy_rebind(&oldmem, ¤t->mems_allowed); + numa_policy_rebind(&oldmem, &tsk->mems_allowed); + if (!nodes_equal(oldmem, tsk->mems_allowed)) { if (migrate) { - do_migrate_pages(current->mm, &oldmem, - ¤t->mems_allowed, + do_migrate_pages(tsk->mm, &oldmem, + &tsk->mems_allowed, MPOL_MF_MOVE_ALL); } } @@ -1630,7 +1647,7 @@ static long cpuset_create(struct cpuset *parent, const char *name, int mode) return -ENOMEM; down(&manage_sem); - refresh_mems(); + cpuset_update_task_memory_state(); cs->flags = 0; if (notify_on_release(parent)) set_bit(CS_NOTIFY_ON_RELEASE, &cs->flags); @@ -1688,7 +1705,7 @@ static int cpuset_rmdir(struct inode *unused_dir, struct dentry *dentry) /* the vfs holds both inode->i_sem already */ down(&manage_sem); - refresh_mems(); + cpuset_update_task_memory_state(); if (atomic_read(&cs->count) > 0) { up(&manage_sem); return -EBUSY; @@ -1872,36 +1889,6 @@ void cpuset_init_current_mems_allowed(void) current->mems_allowed = NODE_MASK_ALL; } -/** - * cpuset_update_current_mems_allowed - update mems parameters to new values - * - * If the current tasks cpusets mems_allowed changed behind our backs, - * update current->mems_allowed and mems_generation to the new value. - * Do not call this routine if in_interrupt(). - * - * Call without callback_sem or task_lock() held. May be called - * with or without manage_sem held. Unless exiting, it will acquire - * task_lock(). Also might acquire callback_sem during call to - * refresh_mems(). - */ - -void cpuset_update_current_mems_allowed(void) -{ - struct cpuset *cs; - int need_to_refresh = 0; - - task_lock(current); - cs = current->cpuset; - if (!cs) - goto done; - if (current->cpuset_mems_generation != cs->mems_generation) - need_to_refresh = 1; -done: - task_unlock(current); - if (need_to_refresh) - refresh_mems(); -} - /** * cpuset_zonelist_valid_mems_allowed - check zonelist vs. curremt mems_allowed * @zl: the zonelist to be checked diff --git a/mm/mempolicy.c b/mm/mempolicy.c index 9dea2b8a7d48..515bfeee027e 100644 --- a/mm/mempolicy.c +++ b/mm/mempolicy.c @@ -387,7 +387,7 @@ static int contextualize_policy(int mode, nodemask_t *nodes) if (!nodes) return 0; - cpuset_update_current_mems_allowed(); + cpuset_update_task_memory_state(); if (!cpuset_nodes_subset_current_mems_allowed(*nodes)) return -EINVAL; return mpol_check_policy(mode, nodes); @@ -461,7 +461,7 @@ long do_get_mempolicy(int *policy, nodemask_t *nmask, struct vm_area_struct *vma = NULL; struct mempolicy *pol = current->mempolicy; - cpuset_update_current_mems_allowed(); + cpuset_update_task_memory_state(); if (flags & ~(unsigned long)(MPOL_F_NODE|MPOL_F_ADDR)) return -EINVAL; if (flags & MPOL_F_ADDR) { @@ -1089,7 +1089,7 @@ alloc_page_vma(gfp_t gfp, struct vm_area_struct *vma, unsigned long addr) { struct mempolicy *pol = get_vma_policy(current, vma, addr); - cpuset_update_current_mems_allowed(); + cpuset_update_task_memory_state(); if (unlikely(pol->policy == MPOL_INTERLEAVE)) { unsigned nid; @@ -1115,7 +1115,7 @@ alloc_page_vma(gfp_t gfp, struct vm_area_struct *vma, unsigned long addr) * interrupt context and apply the current process NUMA policy. * Returns NULL when no page can be allocated. * - * Don't call cpuset_update_current_mems_allowed() unless + * Don't call cpuset_update_task_memory_state() unless * 1) it's ok to take cpuset_sem (can WAIT), and * 2) allocating for current task (not interrupt). */ @@ -1124,7 +1124,7 @@ struct page *alloc_pages_current(gfp_t gfp, unsigned order) struct mempolicy *pol = current->mempolicy; if ((gfp & __GFP_WAIT) && !in_interrupt()) - cpuset_update_current_mems_allowed(); + cpuset_update_task_memory_state(); if (!pol || in_interrupt()) pol = &default_policy; if (pol->policy == MPOL_INTERLEAVE) -- cgit v1.2.3-71-gd317 From 909d75a3b77bdd8baa9429bad3b69a654d2954ce Mon Sep 17 00:00:00 2001 From: Paul Jackson Date: Sun, 8 Jan 2006 01:01:55 -0800 Subject: [PATCH] cpuset: implement cpuset_mems_allowed Provide a cpuset_mems_allowed() method, which the sys_migrate_pages() code needed, to obtain the mems_allowed vector of a cpuset, and replaced the workaround in sys_migrate_pages() to call this new method. Signed-off-by: Paul Jackson Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/cpuset.h | 8 +++++++- kernel/cpuset.c | 29 ++++++++++++++++++++++++++--- mm/mempolicy.c | 3 --- 3 files changed, 33 insertions(+), 7 deletions(-) (limited to 'include/linux') diff --git a/include/linux/cpuset.h b/include/linux/cpuset.h index 1feebf16ab08..37d2dd7ca3e9 100644 --- a/include/linux/cpuset.h +++ b/include/linux/cpuset.h @@ -18,7 +18,8 @@ extern int cpuset_init(void); extern void cpuset_init_smp(void); extern void cpuset_fork(struct task_struct *p); extern void cpuset_exit(struct task_struct *p); -extern cpumask_t cpuset_cpus_allowed(const struct task_struct *p); +extern cpumask_t cpuset_cpus_allowed(struct task_struct *p); +extern nodemask_t cpuset_mems_allowed(struct task_struct *p); void cpuset_init_current_mems_allowed(void); void cpuset_update_task_memory_state(void); #define cpuset_nodes_subset_current_mems_allowed(nodes) \ @@ -50,6 +51,11 @@ static inline cpumask_t cpuset_cpus_allowed(struct task_struct *p) return cpu_possible_map; } +static inline nodemask_t cpuset_mems_allowed(struct task_struct *p) +{ + return node_possible_map; +} + static inline void cpuset_init_current_mems_allowed(void) {} static inline void cpuset_update_task_memory_state(void) {} #define cpuset_nodes_subset_current_mems_allowed(nodes) (1) diff --git a/kernel/cpuset.c b/kernel/cpuset.c index e9917d71628a..0d0dbbd6560a 100644 --- a/kernel/cpuset.c +++ b/kernel/cpuset.c @@ -1871,14 +1871,14 @@ void cpuset_exit(struct task_struct *tsk) * tasks cpuset. **/ -cpumask_t cpuset_cpus_allowed(const struct task_struct *tsk) +cpumask_t cpuset_cpus_allowed(struct task_struct *tsk) { cpumask_t mask; down(&callback_sem); - task_lock((struct task_struct *)tsk); + task_lock(tsk); guarantee_online_cpus(tsk->cpuset, &mask); - task_unlock((struct task_struct *)tsk); + task_unlock(tsk); up(&callback_sem); return mask; @@ -1889,6 +1889,29 @@ void cpuset_init_current_mems_allowed(void) current->mems_allowed = NODE_MASK_ALL; } +/** + * cpuset_mems_allowed - return mems_allowed mask from a tasks cpuset. + * @tsk: pointer to task_struct from which to obtain cpuset->mems_allowed. + * + * Description: Returns the nodemask_t mems_allowed of the cpuset + * attached to the specified @tsk. Guaranteed to return some non-empty + * subset of node_online_map, even if this means going outside the + * tasks cpuset. + **/ + +nodemask_t cpuset_mems_allowed(struct task_struct *tsk) +{ + nodemask_t mask; + + down(&callback_sem); + task_lock(tsk); + guarantee_online_mems(tsk->cpuset, &mask); + task_unlock(tsk); + up(&callback_sem); + + return mask; +} + /** * cpuset_zonelist_valid_mems_allowed - check zonelist vs. curremt mems_allowed * @zl: the zonelist to be checked diff --git a/mm/mempolicy.c b/mm/mempolicy.c index 515bfeee027e..34d566ac147f 100644 --- a/mm/mempolicy.c +++ b/mm/mempolicy.c @@ -772,9 +772,6 @@ asmlinkage long sys_set_mempolicy(int mode, unsigned long __user *nmask, return do_set_mempolicy(mode, &nodes); } -/* Macro needed until Paul implements this function in kernel/cpusets.c */ -#define cpuset_mems_allowed(task) node_online_map - asmlinkage long sys_migrate_pages(pid_t pid, unsigned long maxnode, const unsigned long __user *old_nodes, const unsigned long __user *new_nodes) -- cgit v1.2.3-71-gd317 From 74cb21553f4bf244185b9bec4c26e4e3169ad55e Mon Sep 17 00:00:00 2001 From: Paul Jackson Date: Sun, 8 Jan 2006 01:01:56 -0800 Subject: [PATCH] cpuset: numa_policy_rebind cleanup Cleanup, reorganize and make more robust the mempolicy.c code to rebind mempolicies relative to the containing cpuset after a tasks memory placement changes. The real motivator for this cleanup patch is to lay more groundwork for the upcoming patch to correctly rebind NUMA mempolicies that are attached to vma's after the containing cpuset memory placement changes. NUMA mempolicies are constrained by the cpuset their task is a member of. When either (1) a task is moved to a different cpuset, or (2) the 'mems' mems_allowed of a cpuset is changed, then the NUMA mempolicies have embedded node numbers (for MPOL_BIND, MPOL_INTERLEAVE and MPOL_PREFERRED) that need to be recalculated, relative to their new cpuset placement. The old code used an unreliable method of determining what was the old mems_allowed constraining the mempolicy. It just looked at the tasks mems_allowed value. This sort of worked with the present code, that just rebinds the -task- mempolicy, and leaves any -vma- mempolicies broken, referring to the old nodes. But in an upcoming patch, the vma mempolicies will be rebound as well. Then the order in which the various task and vma mempolicies are updated will no longer be deterministic, and one can no longer count on the task->mems_allowed holding the old value for as long as needed. It's not even clear if the current code was guaranteed to work reliably for task mempolicies. So I added a mems_allowed field to each mempolicy, stating exactly what mems_allowed the policy is relative to, and updated synchronously and reliably anytime that the mempolicy is rebound. Also removed a useless wrapper routine, numa_policy_rebind(), and had its caller, cpuset_update_task_memory_state(), call directly to the rewritten policy_rebind() routine, and made that rebind routine extern instead of static, and added a "mpol_" prefix to its name, making it mpol_rebind_policy(). Signed-off-by: Paul Jackson Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/mempolicy.h | 12 ++++++++++-- kernel/cpuset.c | 2 +- mm/mempolicy.c | 31 +++++++++++++++++++------------ 3 files changed, 30 insertions(+), 15 deletions(-) (limited to 'include/linux') diff --git a/include/linux/mempolicy.h b/include/linux/mempolicy.h index 05fddd5bee5d..74357cb9bc7c 100644 --- a/include/linux/mempolicy.h +++ b/include/linux/mempolicy.h @@ -68,6 +68,7 @@ struct mempolicy { nodemask_t nodes; /* interleave */ /* undefined for default */ } v; + nodemask_t cpuset_mems_allowed; /* mempolicy relative to these nodes */ }; /* @@ -146,7 +147,9 @@ struct mempolicy *mpol_shared_policy_lookup(struct shared_policy *sp, extern void numa_default_policy(void); extern void numa_policy_init(void); -extern void numa_policy_rebind(const nodemask_t *old, const nodemask_t *new); +extern void mpol_rebind_policy(struct mempolicy *pol, const nodemask_t *new); +extern void mpol_rebind_task(struct task_struct *tsk, + const nodemask_t *new); extern struct mempolicy default_policy; extern struct zonelist *huge_zonelist(struct vm_area_struct *vma, unsigned long addr); @@ -221,7 +224,12 @@ static inline void numa_default_policy(void) { } -static inline void numa_policy_rebind(const nodemask_t *old, +static inline void mpol_rebind_policy(struct mempolicy *pol, + const nodemask_t *new) +{ +} + +static inline void mpol_rebind_task(struct task_struct *tsk, const nodemask_t *new) { } diff --git a/kernel/cpuset.c b/kernel/cpuset.c index 0d0dbbd6560a..8f764de3a9e7 100644 --- a/kernel/cpuset.c +++ b/kernel/cpuset.c @@ -643,7 +643,7 @@ void cpuset_update_task_memory_state() tsk->cpuset_mems_generation = cs->mems_generation; task_unlock(tsk); up(&callback_sem); - numa_policy_rebind(&oldmem, &tsk->mems_allowed); + mpol_rebind_task(tsk, &tsk->mems_allowed); if (!nodes_equal(oldmem, tsk->mems_allowed)) { if (migrate) { do_migrate_pages(tsk->mm, &oldmem, diff --git a/mm/mempolicy.c b/mm/mempolicy.c index 34d566ac147f..c39bd86f4ea0 100644 --- a/mm/mempolicy.c +++ b/mm/mempolicy.c @@ -180,6 +180,7 @@ static struct mempolicy *mpol_new(int mode, nodemask_t *nodes) break; } policy->policy = mode; + policy->cpuset_mems_allowed = cpuset_mems_allowed(current); return policy; } @@ -1411,25 +1412,31 @@ void numa_default_policy(void) } /* Migrate a policy to a different set of nodes */ -static void rebind_policy(struct mempolicy *pol, const nodemask_t *old, - const nodemask_t *new) +void mpol_rebind_policy(struct mempolicy *pol, const nodemask_t *newmask) { + nodemask_t *mpolmask; nodemask_t tmp; if (!pol) return; + mpolmask = &pol->cpuset_mems_allowed; + if (nodes_equal(*mpolmask, *newmask)) + return; switch (pol->policy) { case MPOL_DEFAULT: break; case MPOL_INTERLEAVE: - nodes_remap(tmp, pol->v.nodes, *old, *new); + nodes_remap(tmp, pol->v.nodes, *mpolmask, *newmask); pol->v.nodes = tmp; - current->il_next = node_remap(current->il_next, *old, *new); + *mpolmask = *newmask; + current->il_next = node_remap(current->il_next, + *mpolmask, *newmask); break; case MPOL_PREFERRED: pol->v.preferred_node = node_remap(pol->v.preferred_node, - *old, *new); + *mpolmask, *newmask); + *mpolmask = *newmask; break; case MPOL_BIND: { nodemask_t nodes; @@ -1439,7 +1446,7 @@ static void rebind_policy(struct mempolicy *pol, const nodemask_t *old, nodes_clear(nodes); for (z = pol->v.zonelist->zones; *z; z++) node_set((*z)->zone_pgdat->node_id, nodes); - nodes_remap(tmp, nodes, *old, *new); + nodes_remap(tmp, nodes, *mpolmask, *newmask); nodes = tmp; zonelist = bind_zonelist(&nodes); @@ -1454,6 +1461,7 @@ static void rebind_policy(struct mempolicy *pol, const nodemask_t *old, kfree(pol->v.zonelist); pol->v.zonelist = zonelist; } + *mpolmask = *newmask; break; } default: @@ -1463,14 +1471,13 @@ static void rebind_policy(struct mempolicy *pol, const nodemask_t *old, } /* - * Someone moved this task to different nodes. Fixup mempolicies. - * - * TODO - fixup current->mm->vma and shmfs/tmpfs/hugetlbfs policies as well, - * once we have a cpuset mechanism to mark which cpuset subtree is migrating. + * Wrapper for mpol_rebind_policy() that just requires task + * pointer, and updates task mempolicy. */ -void numa_policy_rebind(const nodemask_t *old, const nodemask_t *new) + +void mpol_rebind_task(struct task_struct *tsk, const nodemask_t *new) { - rebind_policy(current->mempolicy, old, new); + mpol_rebind_policy(tsk->mempolicy, new); } /* -- cgit v1.2.3-71-gd317 From 202f72d5d1b5c2c084f63ef996c736d208b447b5 Mon Sep 17 00:00:00 2001 From: Paul Jackson Date: Sun, 8 Jan 2006 01:01:57 -0800 Subject: [PATCH] cpuset: number_of_cpusets optimization Easy little optimization hack to avoid actually having to call cpuset_zone_allowed() and check mems_allowed, in the main page allocation routine, __alloc_pages(). This saves several CPU cycles per page allocation on systems not using cpusets. A counter is updated each time a cpuset is created or removed, and whenever there is only one cpuset in the system, it must be the root cpuset, which contains all CPUs and all Memory Nodes. In that case, when the counter is one, all allocations are allowed. Signed-off-by: Paul Jackson Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/cpuset.h | 10 +++++++++- kernel/cpuset.c | 12 +++++++++++- 2 files changed, 20 insertions(+), 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/cpuset.h b/include/linux/cpuset.h index 37d2dd7ca3e9..34081c168af5 100644 --- a/include/linux/cpuset.h +++ b/include/linux/cpuset.h @@ -14,6 +14,8 @@ #ifdef CONFIG_CPUSETS +extern int number_of_cpusets; /* How many cpusets are defined in system? */ + extern int cpuset_init(void); extern void cpuset_init_smp(void); extern void cpuset_fork(struct task_struct *p); @@ -25,7 +27,13 @@ void cpuset_update_task_memory_state(void); #define cpuset_nodes_subset_current_mems_allowed(nodes) \ nodes_subset((nodes), current->mems_allowed) int cpuset_zonelist_valid_mems_allowed(struct zonelist *zl); -extern int cpuset_zone_allowed(struct zone *z, gfp_t gfp_mask); + +extern int __cpuset_zone_allowed(struct zone *z, gfp_t gfp_mask); +static int inline cpuset_zone_allowed(struct zone *z, gfp_t gfp_mask) +{ + return number_of_cpusets <= 1 || __cpuset_zone_allowed(z, gfp_mask); +} + extern int cpuset_excl_nodes_overlap(const struct task_struct *p); #define cpuset_memory_pressure_bump() \ diff --git a/kernel/cpuset.c b/kernel/cpuset.c index 8f764de3a9e7..6004719f26ee 100644 --- a/kernel/cpuset.c +++ b/kernel/cpuset.c @@ -56,6 +56,13 @@ #define CPUSET_SUPER_MAGIC 0x27e0eb +/* + * Tracks how many cpusets are currently defined in system. + * When there is only one cpuset (the root cpuset) we can + * short circuit some hooks. + */ +int number_of_cpusets; + /* See "Frequency meter" comments, below. */ struct fmeter { @@ -1664,6 +1671,7 @@ static long cpuset_create(struct cpuset *parent, const char *name, int mode) down(&callback_sem); list_add(&cs->sibling, &cs->parent->children); + number_of_cpusets++; up(&callback_sem); err = cpuset_create_dir(cs, name, mode); @@ -1726,6 +1734,7 @@ static int cpuset_rmdir(struct inode *unused_dir, struct dentry *dentry) spin_unlock(&d->d_lock); cpuset_d_remove_dir(d); dput(d); + number_of_cpusets--; up(&callback_sem); if (list_empty(&parent->children)) check_for_release(parent, &pathbuf); @@ -1769,6 +1778,7 @@ int __init cpuset_init(void) root->d_inode->i_nlink++; top_cpuset.dentry = root; root->d_inode->i_op = &cpuset_dir_inode_operations; + number_of_cpusets = 1; err = cpuset_populate_dir(root); /* memory_pressure_enabled is in root cpuset only */ if (err == 0) @@ -1982,7 +1992,7 @@ static const struct cpuset *nearest_exclusive_ancestor(const struct cpuset *cs) * GFP_USER - only nodes in current tasks mems allowed ok. **/ -int cpuset_zone_allowed(struct zone *z, gfp_t gfp_mask) +int __cpuset_zone_allowed(struct zone *z, gfp_t gfp_mask) { int node; /* node that zone z is on */ const struct cpuset *cs; /* current cpuset ancestors */ -- cgit v1.2.3-71-gd317 From 4225399a66b315d4d1fb1cb61b75dda201c832e3 Mon Sep 17 00:00:00 2001 From: Paul Jackson Date: Sun, 8 Jan 2006 01:01:59 -0800 Subject: [PATCH] cpuset: rebind vma mempolicies fix Fix more of longstanding bug in cpuset/mempolicy interaction. NUMA mempolicies (mm/mempolicy.c) are constrained by the current tasks cpuset to just the Memory Nodes allowed by that cpuset. The kernel maintains internal state for each mempolicy, tracking what nodes are used for the MPOL_INTERLEAVE, MPOL_BIND or MPOL_PREFERRED policies. When a tasks cpuset memory placement changes, whether because the cpuset changed, or because the task was attached to a different cpuset, then the tasks mempolicies have to be rebound to the new cpuset placement, so as to preserve the cpuset-relative numbering of the nodes in that policy. An earlier fix handled such mempolicy rebinding for mempolicies attached to a task. This fix rebinds mempolicies attached to vma's (address ranges in a tasks address space.) Due to the need to hold the task->mm->mmap_sem semaphore while updating vma's, the rebinding of vma mempolicies has to be done when the cpuset memory placement is changed, at which time mmap_sem can be safely acquired. The tasks mempolicy is rebound later, when the task next attempts to allocate memory and notices that its task->cpuset_mems_generation is out-of-date with its cpusets mems_generation. Because walking the tasklist to find all tasks attached to a changing cpuset requires holding tasklist_lock, a spinlock, one cannot update the vma's of the affected tasks while doing the tasklist scan. In general, one cannot acquire a semaphore (which can sleep) while already holding a spinlock (such as tasklist_lock). So a list of mm references has to be built up during the tasklist scan, then the tasklist lock dropped, then for each mm, its mmap_sem acquired, and the vma's in that mm rebound. Once the tasklist lock is dropped, affected tasks may fork new tasks, before their mm's are rebound. A kernel global 'cpuset_being_rebound' is set to point to the cpuset being rebound (there can only be one; cpuset modifications are done under a global 'manage_sem' semaphore), and the mpol_copy code that is used to copy a tasks mempolicies during fork catches such forking tasks, and ensures their children are also rebound. When a task is moved to a different cpuset, it is easier, as there is only one task involved. It's mm->vma's are scanned, using the same mpol_rebind_policy() as used above. It may happen that both the mpol_copy hook and the update done via the tasklist scan update the same mm twice. This is ok, as the mempolicies of each vma in an mm keep track of what mems_allowed they are relative to, and safely no-op a second request to rebind to the same nodes. Signed-off-by: Paul Jackson Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/mempolicy.h | 18 ++++++++++ kernel/cpuset.c | 90 +++++++++++++++++++++++++++++++++++++++++++++++ mm/mempolicy.c | 29 +++++++++++++++ 3 files changed, 137 insertions(+) (limited to 'include/linux') diff --git a/include/linux/mempolicy.h b/include/linux/mempolicy.h index 74357cb9bc7c..c7ac77e873b3 100644 --- a/include/linux/mempolicy.h +++ b/include/linux/mempolicy.h @@ -150,6 +150,16 @@ extern void numa_policy_init(void); extern void mpol_rebind_policy(struct mempolicy *pol, const nodemask_t *new); extern void mpol_rebind_task(struct task_struct *tsk, const nodemask_t *new); +extern void mpol_rebind_mm(struct mm_struct *mm, nodemask_t *new); +#define set_cpuset_being_rebound(x) (cpuset_being_rebound = (x)) + +#ifdef CONFIG_CPUSET +#define current_cpuset_is_being_rebound() \ + (cpuset_being_rebound == current->cpuset) +#else +#define current_cpuset_is_being_rebound() 0 +#endif + extern struct mempolicy default_policy; extern struct zonelist *huge_zonelist(struct vm_area_struct *vma, unsigned long addr); @@ -165,6 +175,8 @@ static inline void check_highest_zone(int k) int do_migrate_pages(struct mm_struct *mm, const nodemask_t *from_nodes, const nodemask_t *to_nodes, int flags); +extern void *cpuset_being_rebound; /* Trigger mpol_copy vma rebind */ + #else struct mempolicy {}; @@ -234,6 +246,12 @@ static inline void mpol_rebind_task(struct task_struct *tsk, { } +static inline void mpol_rebind_mm(struct mm_struct *mm, nodemask_t *new) +{ +} + +#define set_cpuset_being_rebound(x) do {} while (0) + static inline struct zonelist *huge_zonelist(struct vm_area_struct *vma, unsigned long addr) { diff --git a/kernel/cpuset.c b/kernel/cpuset.c index 6004719f26ee..19f87565be17 100644 --- a/kernel/cpuset.c +++ b/kernel/cpuset.c @@ -812,12 +812,24 @@ static int update_cpumask(struct cpuset *cs, char *buf) } /* + * Handle user request to change the 'mems' memory placement + * of a cpuset. Needs to validate the request, update the + * cpusets mems_allowed and mems_generation, and for each + * task in the cpuset, rebind any vma mempolicies. + * * Call with manage_sem held. May take callback_sem during call. + * Will take tasklist_lock, scan tasklist for tasks in cpuset cs, + * lock each such tasks mm->mmap_sem, scan its vma's and rebind + * their mempolicies to the cpusets new mems_allowed. */ static int update_nodemask(struct cpuset *cs, char *buf) { struct cpuset trialcs; + struct task_struct *g, *p; + struct mm_struct **mmarray; + int i, n, ntasks; + int fudge; int retval; trialcs = *cs; @@ -839,6 +851,76 @@ static int update_nodemask(struct cpuset *cs, char *buf) cs->mems_generation = atomic_read(&cpuset_mems_generation); up(&callback_sem); + set_cpuset_being_rebound(cs); /* causes mpol_copy() rebind */ + + fudge = 10; /* spare mmarray[] slots */ + fudge += cpus_weight(cs->cpus_allowed); /* imagine one fork-bomb/cpu */ + retval = -ENOMEM; + + /* + * Allocate mmarray[] to hold mm reference for each task + * in cpuset cs. Can't kmalloc GFP_KERNEL while holding + * tasklist_lock. We could use GFP_ATOMIC, but with a + * few more lines of code, we can retry until we get a big + * enough mmarray[] w/o using GFP_ATOMIC. + */ + while (1) { + ntasks = atomic_read(&cs->count); /* guess */ + ntasks += fudge; + mmarray = kmalloc(ntasks * sizeof(*mmarray), GFP_KERNEL); + if (!mmarray) + goto done; + write_lock_irq(&tasklist_lock); /* block fork */ + if (atomic_read(&cs->count) <= ntasks) + break; /* got enough */ + write_unlock_irq(&tasklist_lock); /* try again */ + kfree(mmarray); + } + + n = 0; + + /* Load up mmarray[] with mm reference for each task in cpuset. */ + do_each_thread(g, p) { + struct mm_struct *mm; + + if (n >= ntasks) { + printk(KERN_WARNING + "Cpuset mempolicy rebind incomplete.\n"); + continue; + } + if (p->cpuset != cs) + continue; + mm = get_task_mm(p); + if (!mm) + continue; + mmarray[n++] = mm; + } while_each_thread(g, p); + write_unlock_irq(&tasklist_lock); + + /* + * Now that we've dropped the tasklist spinlock, we can + * rebind the vma mempolicies of each mm in mmarray[] to their + * new cpuset, and release that mm. The mpol_rebind_mm() + * call takes mmap_sem, which we couldn't take while holding + * tasklist_lock. Forks can happen again now - the mpol_copy() + * cpuset_being_rebound check will catch such forks, and rebind + * their vma mempolicies too. Because we still hold the global + * cpuset manage_sem, we know that no other rebind effort will + * be contending for the global variable cpuset_being_rebound. + * It's ok if we rebind the same mm twice; mpol_rebind_mm() + * is idempotent. + */ + for (i = 0; i < n; i++) { + struct mm_struct *mm = mmarray[i]; + + mpol_rebind_mm(mm, &cs->mems_allowed); + mmput(mm); + } + + /* We're done rebinding vma's to this cpusets new mems_allowed. */ + kfree(mmarray); + set_cpuset_being_rebound(NULL); + retval = 0; done: return retval; } @@ -1011,6 +1093,7 @@ static int attach_task(struct cpuset *cs, char *pidbuf, char **ppathbuf) struct cpuset *oldcs; cpumask_t cpus; nodemask_t from, to; + struct mm_struct *mm; if (sscanf(pidbuf, "%d", &pid) != 1) return -EIO; @@ -1060,6 +1143,13 @@ static int attach_task(struct cpuset *cs, char *pidbuf, char **ppathbuf) to = cs->mems_allowed; up(&callback_sem); + + mm = get_task_mm(tsk); + if (mm) { + mpol_rebind_mm(mm, &to); + mmput(mm); + } + if (is_memory_migrate(cs)) do_migrate_pages(tsk->mm, &from, &to, MPOL_MF_MOVE_ALL); put_task_struct(tsk); diff --git a/mm/mempolicy.c b/mm/mempolicy.c index c39bd86f4ea0..1850d0aef4ac 100644 --- a/mm/mempolicy.c +++ b/mm/mempolicy.c @@ -1131,6 +1131,15 @@ struct page *alloc_pages_current(gfp_t gfp, unsigned order) } EXPORT_SYMBOL(alloc_pages_current); +/* + * If mpol_copy() sees current->cpuset == cpuset_being_rebound, then it + * rebinds the mempolicy its copying by calling mpol_rebind_policy() + * with the mems_allowed returned by cpuset_mems_allowed(). This + * keeps mempolicies cpuset relative after its cpuset moves. See + * further kernel/cpuset.c update_nodemask(). + */ +void *cpuset_being_rebound; + /* Slow path of a mempolicy copy */ struct mempolicy *__mpol_copy(struct mempolicy *old) { @@ -1138,6 +1147,10 @@ struct mempolicy *__mpol_copy(struct mempolicy *old) if (!new) return ERR_PTR(-ENOMEM); + if (current_cpuset_is_being_rebound()) { + nodemask_t mems = cpuset_mems_allowed(current); + mpol_rebind_policy(old, &mems); + } *new = *old; atomic_set(&new->refcnt, 1); if (new->policy == MPOL_BIND) { @@ -1480,6 +1493,22 @@ void mpol_rebind_task(struct task_struct *tsk, const nodemask_t *new) mpol_rebind_policy(tsk->mempolicy, new); } +/* + * Rebind each vma in mm to new nodemask. + * + * Call holding a reference to mm. Takes mm->mmap_sem during call. + */ + +void mpol_rebind_mm(struct mm_struct *mm, nodemask_t *new) +{ + struct vm_area_struct *vma; + + down_write(&mm->mmap_sem); + for (vma = mm->mmap; vma; vma = vma->vm_next) + mpol_rebind_policy(vma->vm_policy, new); + up_write(&mm->mmap_sem); +} + /* * Display pages allocated per node and memory policy via /proc. */ -- cgit v1.2.3-71-gd317 From c417f0242ebe578924a30d4e53d35b5059fed4e7 Mon Sep 17 00:00:00 2001 From: Paul Jackson Date: Sun, 8 Jan 2006 01:02:01 -0800 Subject: [PATCH] cpuset: remove test for null cpuset from alloc code path Remove a couple of more lines of code from the cpuset hooks in the page allocation code path. There was a check for a NULL cpuset pointer in the routine cpuset_update_task_memory_state() that was only needed during system boot, after the memory subsystem was initialized, before the cpuset subsystem was initialized, to catch a NULL task->cpuset pointer. Add a cpuset_init_early() routine, just before the mem_init() call in init/main.c, that sets up just enough of the init tasks cpuset structure to render cpuset_update_task_memory_state() calls harmless. Signed-off-by: Paul Jackson Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/cpuset.h | 2 ++ init/main.c | 1 + kernel/cpuset.c | 22 ++++++++++++++++------ 3 files changed, 19 insertions(+), 6 deletions(-) (limited to 'include/linux') diff --git a/include/linux/cpuset.h b/include/linux/cpuset.h index 34081c168af5..c472f972bd6d 100644 --- a/include/linux/cpuset.h +++ b/include/linux/cpuset.h @@ -16,6 +16,7 @@ extern int number_of_cpusets; /* How many cpusets are defined in system? */ +extern int cpuset_init_early(void); extern int cpuset_init(void); extern void cpuset_init_smp(void); extern void cpuset_fork(struct task_struct *p); @@ -49,6 +50,7 @@ extern char *cpuset_task_status_allowed(struct task_struct *task, char *buffer); #else /* !CONFIG_CPUSETS */ +static inline int cpuset_init_early(void) { return 0; } static inline int cpuset_init(void) { return 0; } static inline void cpuset_init_smp(void) {} static inline void cpuset_fork(struct task_struct *p) {} diff --git a/init/main.c b/init/main.c index 2ed3638deec7..afe5eb84ad52 100644 --- a/init/main.c +++ b/init/main.c @@ -512,6 +512,7 @@ asmlinkage void __init start_kernel(void) } #endif vfs_caches_init_early(); + cpuset_init_early(); mem_init(); kmem_cache_init(); setup_per_cpu_pageset(); diff --git a/kernel/cpuset.c b/kernel/cpuset.c index cf8203a5fa71..fc949e4a625c 100644 --- a/kernel/cpuset.c +++ b/kernel/cpuset.c @@ -603,9 +603,7 @@ static void guarantee_online_mems(const struct cpuset *cs, nodemask_t *pmask) * Do not call this routine if in_interrupt(). * * Call without callback_sem or task_lock() held. May be called - * with or without manage_sem held. Except in early boot or - * an exiting task, when tsk->cpuset is NULL, this routine will - * acquire task_lock(). We don't need to use task_lock to guard + * with or without manage_sem held. Doesn't need task_lock to guard * against another task changing a non-NULL cpuset pointer to NULL, * as that is only done by a task on itself, and if the current task * is here, it is not simultaneously in the exit code NULL'ing its @@ -631,9 +629,6 @@ void cpuset_update_task_memory_state() struct task_struct *tsk = current; struct cpuset *cs = tsk->cpuset; - if (unlikely(!cs)) - return; - task_lock(tsk); my_cpusets_mem_gen = cs->mems_generation; task_unlock(tsk); @@ -1836,6 +1831,21 @@ static int cpuset_rmdir(struct inode *unused_dir, struct dentry *dentry) return 0; } +/* + * cpuset_init_early - just enough so that the calls to + * cpuset_update_task_memory_state() in early init code + * are harmless. + */ + +int __init cpuset_init_early(void) +{ + struct task_struct *tsk = current; + + tsk->cpuset = &top_cpuset; + tsk->cpuset->mems_generation = atomic_read(&cpuset_mems_generation); + return 0; +} + /** * cpuset_init - initialize cpusets at system boot * -- cgit v1.2.3-71-gd317 From de25968cc87cc5b76d09de8b4cbddc8f24fcf5f7 Mon Sep 17 00:00:00 2001 From: Tim Schmielau Date: Sun, 8 Jan 2006 01:02:05 -0800 Subject: [PATCH] fix more missing includes Include fixes for 2.6.14-git11. Should allow to remove sched.h from module.h on i386, x86_64, arm, ia64, ppc, ppc64, and s390. Probably more to come since I haven't yet checked the other archs. Signed-off-by: Tim Schmielau Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- arch/arm/common/scoop.c | 1 + arch/arm/mach-realview/localtimer.c | 1 + arch/mips/sgi-ip27/ip27-berr.c | 1 + drivers/char/agp/sworks-agp.c | 1 + drivers/infiniband/hw/mthca/mthca_dev.h | 1 + drivers/infiniband/ulp/srp/ib_srp.c | 1 + drivers/macintosh/windfarm_smu_controls.c | 1 + drivers/macintosh/windfarm_smu_sensors.c | 1 + drivers/mtd/onenand/generic.c | 1 + drivers/mtd/rfd_ftl.c | 1 + drivers/pci/hotplug/pciehp.h | 1 + drivers/pci/hotplug/pciehp_hpc.c | 3 +++ drivers/rapidio/rio-scan.c | 2 ++ drivers/rapidio/rio-sysfs.c | 1 + drivers/rapidio/rio.c | 1 + drivers/usb/host/ohci-au1xxx.c | 1 + drivers/usb/host/ohci-lh7a404.c | 1 + drivers/usb/host/ohci-ppc-soc.c | 1 + include/linux/rio_drv.h | 1 + 19 files changed, 22 insertions(+) (limited to 'include/linux') diff --git a/arch/arm/common/scoop.c b/arch/arm/common/scoop.c index b6de43e73699..a2dfe0b0f1ec 100644 --- a/arch/arm/common/scoop.c +++ b/arch/arm/common/scoop.c @@ -13,6 +13,7 @@ #include #include +#include #include #include #include diff --git a/arch/arm/mach-realview/localtimer.c b/arch/arm/mach-realview/localtimer.c index c9d7c596b200..caf6b8bb6c95 100644 --- a/arch/arm/mach-realview/localtimer.c +++ b/arch/arm/mach-realview/localtimer.c @@ -13,6 +13,7 @@ #include #include #include +#include #include #include diff --git a/arch/mips/sgi-ip27/ip27-berr.c b/arch/mips/sgi-ip27/ip27-berr.c index 07631a97670b..ce907eda221b 100644 --- a/arch/mips/sgi-ip27/ip27-berr.c +++ b/arch/mips/sgi-ip27/ip27-berr.c @@ -11,6 +11,7 @@ #include #include #include /* for SIGBUS */ +#include /* schow_regs(), force_sig() */ #include #include diff --git a/drivers/char/agp/sworks-agp.c b/drivers/char/agp/sworks-agp.c index 3f8f7fa6b0ff..268f78d926d3 100644 --- a/drivers/char/agp/sworks-agp.c +++ b/drivers/char/agp/sworks-agp.c @@ -7,6 +7,7 @@ #include #include #include +#include #include #include "agp.h" diff --git a/drivers/infiniband/hw/mthca/mthca_dev.h b/drivers/infiniband/hw/mthca/mthca_dev.h index 497ff794ef6a..795b379260bf 100644 --- a/drivers/infiniband/hw/mthca/mthca_dev.h +++ b/drivers/infiniband/hw/mthca/mthca_dev.h @@ -43,6 +43,7 @@ #include #include #include +#include #include #include "mthca_provider.h" diff --git a/drivers/infiniband/ulp/srp/ib_srp.c b/drivers/infiniband/ulp/srp/ib_srp.c index ee9fe226ae99..dd488d3cffa9 100644 --- a/drivers/infiniband/ulp/srp/ib_srp.c +++ b/drivers/infiniband/ulp/srp/ib_srp.c @@ -39,6 +39,7 @@ #include #include #include +#include #include diff --git a/drivers/macintosh/windfarm_smu_controls.c b/drivers/macintosh/windfarm_smu_controls.c index 2c3158c81ff2..4d811600bdab 100644 --- a/drivers/macintosh/windfarm_smu_controls.c +++ b/drivers/macintosh/windfarm_smu_controls.c @@ -14,6 +14,7 @@ #include #include #include +#include #include #include #include diff --git a/drivers/macintosh/windfarm_smu_sensors.c b/drivers/macintosh/windfarm_smu_sensors.c index b558cc209d49..1a00d9c75a23 100644 --- a/drivers/macintosh/windfarm_smu_sensors.c +++ b/drivers/macintosh/windfarm_smu_sensors.c @@ -14,6 +14,7 @@ #include #include #include +#include #include #include #include diff --git a/drivers/mtd/onenand/generic.c b/drivers/mtd/onenand/generic.c index 45c077d0f063..af06a80f44de 100644 --- a/drivers/mtd/onenand/generic.c +++ b/drivers/mtd/onenand/generic.c @@ -14,6 +14,7 @@ #include #include +#include #include #include #include diff --git a/drivers/mtd/rfd_ftl.c b/drivers/mtd/rfd_ftl.c index 20ce212638fc..a3e00a4635a5 100644 --- a/drivers/mtd/rfd_ftl.c +++ b/drivers/mtd/rfd_ftl.c @@ -18,6 +18,7 @@ #include #include #include +#include #include #include diff --git a/drivers/pci/hotplug/pciehp.h b/drivers/pci/hotplug/pciehp.h index 6a61b9f286e1..0aac6a61337d 100644 --- a/drivers/pci/hotplug/pciehp.h +++ b/drivers/pci/hotplug/pciehp.h @@ -32,6 +32,7 @@ #include #include #include +#include /* signal_pending() */ #include #include "pci_hotplug.h" diff --git a/drivers/pci/hotplug/pciehp_hpc.c b/drivers/pci/hotplug/pciehp_hpc.c index 0b8b26beb163..ac1e495c314e 100644 --- a/drivers/pci/hotplug/pciehp_hpc.c +++ b/drivers/pci/hotplug/pciehp_hpc.c @@ -30,6 +30,9 @@ #include #include #include +#include +#include +#include #include #include diff --git a/drivers/rapidio/rio-scan.c b/drivers/rapidio/rio-scan.c index 4f7ed4bd3be9..94e30fe4b8f3 100644 --- a/drivers/rapidio/rio-scan.c +++ b/drivers/rapidio/rio-scan.c @@ -24,6 +24,8 @@ #include #include #include +#include +#include #include "rio.h" diff --git a/drivers/rapidio/rio-sysfs.c b/drivers/rapidio/rio-sysfs.c index 30a11436e241..bef9316e95df 100644 --- a/drivers/rapidio/rio-sysfs.c +++ b/drivers/rapidio/rio-sysfs.c @@ -15,6 +15,7 @@ #include #include #include +#include /* for capable() */ #include "rio.h" diff --git a/drivers/rapidio/rio.c b/drivers/rapidio/rio.c index 3ca1011ceaac..5e382470faa2 100644 --- a/drivers/rapidio/rio.c +++ b/drivers/rapidio/rio.c @@ -23,6 +23,7 @@ #include #include #include +#include #include "rio.h" diff --git a/drivers/usb/host/ohci-au1xxx.c b/drivers/usb/host/ohci-au1xxx.c index d9cf3b327d96..77cd6ac07e3c 100644 --- a/drivers/usb/host/ohci-au1xxx.c +++ b/drivers/usb/host/ohci-au1xxx.c @@ -19,6 +19,7 @@ */ #include +#include #include diff --git a/drivers/usb/host/ohci-lh7a404.c b/drivers/usb/host/ohci-lh7a404.c index 3959ccc88332..0020ed7a39d0 100644 --- a/drivers/usb/host/ohci-lh7a404.c +++ b/drivers/usb/host/ohci-lh7a404.c @@ -17,6 +17,7 @@ */ #include +#include #include diff --git a/drivers/usb/host/ohci-ppc-soc.c b/drivers/usb/host/ohci-ppc-soc.c index 2ec6a78bd65e..b2a8dfa48870 100644 --- a/drivers/usb/host/ohci-ppc-soc.c +++ b/drivers/usb/host/ohci-ppc-soc.c @@ -15,6 +15,7 @@ */ #include +#include /* configure so an HC device and id are always provided */ /* always called with process context; sleeping is OK */ diff --git a/include/linux/rio_drv.h b/include/linux/rio_drv.h index 3bd7cce19e26..157d7e3236b5 100644 --- a/include/linux/rio_drv.h +++ b/include/linux/rio_drv.h @@ -21,6 +21,7 @@ #include #include #include +#include #include extern int __rio_local_read_config_32(struct rio_mport *port, u32 offset, -- cgit v1.2.3-71-gd317 From 705b6c7b34f2621f95f606d0e683daa10cdb8eb9 Mon Sep 17 00:00:00 2001 From: Paul Fulghum Date: Sun, 8 Jan 2006 01:02:06 -0800 Subject: [PATCH] new driver synclink_gt New character device driver for the SyncLink GT and SyncLink AC families of synchronous and asynchronous serial adapters Signed-off-by: Paul Fulghum Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- drivers/char/Kconfig | 8 + drivers/char/Makefile | 1 + drivers/char/synclink_gt.c | 4501 ++++++++++++++++++++++++++++++++++++++++++++ include/linux/synclink.h | 9 +- 4 files changed, 4518 insertions(+), 1 deletion(-) create mode 100644 drivers/char/synclink_gt.c (limited to 'include/linux') diff --git a/drivers/char/Kconfig b/drivers/char/Kconfig index b1b34edcd70c..dd7e6901c575 100644 --- a/drivers/char/Kconfig +++ b/drivers/char/Kconfig @@ -220,6 +220,14 @@ config SYNCLINKMP The module will be called synclinkmp. If you want to do that, say M here. +config SYNCLINK_GT + tristate "SyncLink GT/AC support" + depends on SERIAL_NONSTANDARD + help + Support for SyncLink GT and SyncLink AC families of + synchronous and asynchronous serial adapters + manufactured by Microgate Systems, Ltd. (www.microgate.com) + config N_HDLC tristate "HDLC line discipline support" depends on SERIAL_NONSTANDARD diff --git a/drivers/char/Makefile b/drivers/char/Makefile index 4aeae687e88a..d973d14d8f7f 100644 --- a/drivers/char/Makefile +++ b/drivers/char/Makefile @@ -36,6 +36,7 @@ obj-$(CONFIG_RISCOM8) += riscom8.o obj-$(CONFIG_ISI) += isicom.o obj-$(CONFIG_SYNCLINK) += synclink.o obj-$(CONFIG_SYNCLINKMP) += synclinkmp.o +obj-$(CONFIG_SYNCLINK_GT) += synclink_gt.o obj-$(CONFIG_N_HDLC) += n_hdlc.o obj-$(CONFIG_AMIGA_BUILTIN_SERIAL) += amiserial.o obj-$(CONFIG_SX) += sx.o generic_serial.o diff --git a/drivers/char/synclink_gt.c b/drivers/char/synclink_gt.c new file mode 100644 index 000000000000..2b9cde94e2f7 --- /dev/null +++ b/drivers/char/synclink_gt.c @@ -0,0 +1,4501 @@ +/* + * $Id: synclink_gt.c,v 4.20 2005/11/08 19:51:55 paulkf Exp $ + * + * Device driver for Microgate SyncLink GT serial adapters. + * + * written by Paul Fulghum for Microgate Corporation + * paulkf@microgate.com + * + * Microgate and SyncLink are trademarks of Microgate Corporation + * + * This code is released under the GNU General Public License (GPL) + * + * THIS SOFTWARE IS PROVIDED ``AS IS'' AND ANY EXPRESS OR IMPLIED + * WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES + * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE + * DISCLAIMED. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, + * INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES + * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR + * SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, + * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED + * OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +/* + * DEBUG OUTPUT DEFINITIONS + * + * uncomment lines below to enable specific types of debug output + * + * DBGINFO information - most verbose output + * DBGERR serious errors + * DBGBH bottom half service routine debugging + * DBGISR interrupt service routine debugging + * DBGDATA output receive and transmit data + * DBGTBUF output transmit DMA buffers and registers + * DBGRBUF output receive DMA buffers and registers + */ + +#define DBGINFO(fmt) if (debug_level >= DEBUG_LEVEL_INFO) printk fmt +#define DBGERR(fmt) if (debug_level >= DEBUG_LEVEL_ERROR) printk fmt +#define DBGBH(fmt) if (debug_level >= DEBUG_LEVEL_BH) printk fmt +#define DBGISR(fmt) if (debug_level >= DEBUG_LEVEL_ISR) printk fmt +#define DBGDATA(info, buf, size, label) if (debug_level >= DEBUG_LEVEL_DATA) trace_block((info), (buf), (size), (label)) +//#define DBGTBUF(info) dump_tbufs(info) +//#define DBGRBUF(info) dump_rbufs(info) + + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include +#include +#include +#include +#include +#include +#include + +#include "linux/synclink.h" + +#ifdef CONFIG_HDLC_MODULE +#define CONFIG_HDLC 1 +#endif + +/* + * module identification + */ +static char *driver_name = "SyncLink GT"; +static char *driver_version = "$Revision: 4.20 $"; +static char *tty_driver_name = "synclink_gt"; +static char *tty_dev_prefix = "ttySLG"; +MODULE_LICENSE("GPL"); +#define MGSL_MAGIC 0x5401 +#define MAX_DEVICES 12 + +static struct pci_device_id pci_table[] = { + {PCI_VENDOR_ID_MICROGATE, SYNCLINK_GT_DEVICE_ID, PCI_ANY_ID, PCI_ANY_ID,}, + {PCI_VENDOR_ID_MICROGATE, SYNCLINK_GT4_DEVICE_ID, PCI_ANY_ID, PCI_ANY_ID,}, + {PCI_VENDOR_ID_MICROGATE, SYNCLINK_AC_DEVICE_ID, PCI_ANY_ID, PCI_ANY_ID,}, + {0,}, /* terminate list */ +}; +MODULE_DEVICE_TABLE(pci, pci_table); + +static int init_one(struct pci_dev *dev,const struct pci_device_id *ent); +static void remove_one(struct pci_dev *dev); +static struct pci_driver pci_driver = { + .name = "synclink_gt", + .id_table = pci_table, + .probe = init_one, + .remove = __devexit_p(remove_one), +}; + +static int pci_registered; + +/* + * module configuration and status + */ +static struct slgt_info *slgt_device_list; +static int slgt_device_count; + +static int ttymajor; +static int debug_level; +static int maxframe[MAX_DEVICES]; +static int dosyncppp[MAX_DEVICES]; + +module_param(ttymajor, int, 0); +module_param(debug_level, int, 0); +module_param_array(maxframe, int, NULL, 0); +module_param_array(dosyncppp, int, NULL, 0); + +MODULE_PARM_DESC(ttymajor, "TTY major device number override: 0=auto assigned"); +MODULE_PARM_DESC(debug_level, "Debug syslog output: 0=disabled, 1 to 5=increasing detail"); +MODULE_PARM_DESC(maxframe, "Maximum frame size used by device (4096 to 65535)"); +MODULE_PARM_DESC(dosyncppp, "Enable synchronous net device, 0=disable 1=enable"); + +/* + * tty support and callbacks + */ +#define RELEVANT_IFLAG(iflag) (iflag & (IGNBRK|BRKINT|IGNPAR|PARMRK|INPCK)) + +static struct tty_driver *serial_driver; + +static int open(struct tty_struct *tty, struct file * filp); +static void close(struct tty_struct *tty, struct file * filp); +static void hangup(struct tty_struct *tty); +static void set_termios(struct tty_struct *tty, struct termios *old_termios); + +static int write(struct tty_struct *tty, const unsigned char *buf, int count); +static void put_char(struct tty_struct *tty, unsigned char ch); +static void send_xchar(struct tty_struct *tty, char ch); +static void wait_until_sent(struct tty_struct *tty, int timeout); +static int write_room(struct tty_struct *tty); +static void flush_chars(struct tty_struct *tty); +static void flush_buffer(struct tty_struct *tty); +static void tx_hold(struct tty_struct *tty); +static void tx_release(struct tty_struct *tty); + +static int ioctl(struct tty_struct *tty, struct file *file, unsigned int cmd, unsigned long arg); +static int read_proc(char *page, char **start, off_t off, int count,int *eof, void *data); +static int chars_in_buffer(struct tty_struct *tty); +static void throttle(struct tty_struct * tty); +static void unthrottle(struct tty_struct * tty); +static void set_break(struct tty_struct *tty, int break_state); + +/* + * generic HDLC support and callbacks + */ +#ifdef CONFIG_HDLC +#define dev_to_port(D) (dev_to_hdlc(D)->priv) +static void hdlcdev_tx_done(struct slgt_info *info); +static void hdlcdev_rx(struct slgt_info *info, char *buf, int size); +static int hdlcdev_init(struct slgt_info *info); +static void hdlcdev_exit(struct slgt_info *info); +#endif + + +/* + * device specific structures, macros and functions + */ + +#define SLGT_MAX_PORTS 4 +#define SLGT_REG_SIZE 256 + +/* + * DMA buffer descriptor and access macros + */ +struct slgt_desc +{ + unsigned short count; + unsigned short status; + unsigned int pbuf; /* physical address of data buffer */ + unsigned int next; /* physical address of next descriptor */ + + /* driver book keeping */ + char *buf; /* virtual address of data buffer */ + unsigned int pdesc; /* physical address of this descriptor */ + dma_addr_t buf_dma_addr; +}; + +#define set_desc_buffer(a,b) (a).pbuf = cpu_to_le32((unsigned int)(b)) +#define set_desc_next(a,b) (a).next = cpu_to_le32((unsigned int)(b)) +#define set_desc_count(a,b)(a).count = cpu_to_le16((unsigned short)(b)) +#define set_desc_eof(a,b) (a).status = cpu_to_le16((b) ? (le16_to_cpu((a).status) | BIT0) : (le16_to_cpu((a).status) & ~BIT0)) +#define desc_count(a) (le16_to_cpu((a).count)) +#define desc_status(a) (le16_to_cpu((a).status)) +#define desc_complete(a) (le16_to_cpu((a).status) & BIT15) +#define desc_eof(a) (le16_to_cpu((a).status) & BIT2) +#define desc_crc_error(a) (le16_to_cpu((a).status) & BIT1) +#define desc_abort(a) (le16_to_cpu((a).status) & BIT0) +#define desc_residue(a) ((le16_to_cpu((a).status) & 0x38) >> 3) + +struct _input_signal_events { + int ri_up; + int ri_down; + int dsr_up; + int dsr_down; + int dcd_up; + int dcd_down; + int cts_up; + int cts_down; +}; + +/* + * device instance data structure + */ +struct slgt_info { + void *if_ptr; /* General purpose pointer (used by SPPP) */ + + struct slgt_info *next_device; /* device list link */ + + int magic; + int flags; + + char device_name[25]; + struct pci_dev *pdev; + + int port_count; /* count of ports on adapter */ + int adapter_num; /* adapter instance number */ + int port_num; /* port instance number */ + + /* array of pointers to port contexts on this adapter */ + struct slgt_info *port_array[SLGT_MAX_PORTS]; + + int count; /* count of opens */ + int line; /* tty line instance number */ + unsigned short close_delay; + unsigned short closing_wait; /* time to wait before closing */ + + struct mgsl_icount icount; + + struct tty_struct *tty; + int timeout; + int x_char; /* xon/xoff character */ + int blocked_open; /* # of blocked opens */ + unsigned int read_status_mask; + unsigned int ignore_status_mask; + + wait_queue_head_t open_wait; + wait_queue_head_t close_wait; + + wait_queue_head_t status_event_wait_q; + wait_queue_head_t event_wait_q; + struct timer_list tx_timer; + struct timer_list rx_timer; + + spinlock_t lock; /* spinlock for synchronizing with ISR */ + + struct work_struct task; + u32 pending_bh; + int bh_requested; + int bh_running; + + int isr_overflow; + int irq_requested; /* nonzero if IRQ requested */ + int irq_occurred; /* for diagnostics use */ + + /* device configuration */ + + unsigned int bus_type; + unsigned int irq_level; + unsigned long irq_flags; + + unsigned char __iomem * reg_addr; /* memory mapped registers address */ + u32 phys_reg_addr; + u32 reg_offset; + int reg_addr_requested; + + MGSL_PARAMS params; /* communications parameters */ + u32 idle_mode; + u32 max_frame_size; /* as set by device config */ + + unsigned int raw_rx_size; + unsigned int if_mode; + + /* device status */ + + int rx_enabled; + int rx_restart; + + int tx_enabled; + int tx_active; + + unsigned char signals; /* serial signal states */ + unsigned int init_error; /* initialization error */ + + unsigned char *tx_buf; + int tx_count; + + char flag_buf[MAX_ASYNC_BUFFER_SIZE]; + char char_buf[MAX_ASYNC_BUFFER_SIZE]; + BOOLEAN drop_rts_on_tx_done; + struct _input_signal_events input_signal_events; + + int dcd_chkcount; /* check counts to prevent */ + int cts_chkcount; /* too many IRQs if a signal */ + int dsr_chkcount; /* is floating */ + int ri_chkcount; + + char *bufs; /* virtual address of DMA buffer lists */ + dma_addr_t bufs_dma_addr; /* physical address of buffer descriptors */ + + unsigned int rbuf_count; + struct slgt_desc *rbufs; + unsigned int rbuf_current; + unsigned int rbuf_index; + + unsigned int tbuf_count; + struct slgt_desc *tbufs; + unsigned int tbuf_current; + unsigned int tbuf_start; + + unsigned char *tmp_rbuf; + unsigned int tmp_rbuf_count; + + /* SPPP/Cisco HDLC device parts */ + + int netcount; + int dosyncppp; + spinlock_t netlock; +#ifdef CONFIG_HDLC + struct net_device *netdev; +#endif + +}; + +static MGSL_PARAMS default_params = { + .mode = MGSL_MODE_HDLC, + .loopback = 0, + .flags = HDLC_FLAG_UNDERRUN_ABORT15, + .encoding = HDLC_ENCODING_NRZI_SPACE, + .clock_speed = 0, + .addr_filter = 0xff, + .crc_type = HDLC_CRC_16_CCITT, + .preamble_length = HDLC_PREAMBLE_LENGTH_8BITS, + .preamble = HDLC_PREAMBLE_PATTERN_NONE, + .data_rate = 9600, + .data_bits = 8, + .stop_bits = 1, + .parity = ASYNC_PARITY_NONE +}; + + +#define BH_RECEIVE 1 +#define BH_TRANSMIT 2 +#define BH_STATUS 4 +#define IO_PIN_SHUTDOWN_LIMIT 100 + +#define DMABUFSIZE 256 +#define DESC_LIST_SIZE 4096 + +#define MASK_PARITY BIT1 +#define MASK_FRAMING BIT2 +#define MASK_BREAK BIT3 +#define MASK_OVERRUN BIT4 + +#define GSR 0x00 /* global status */ +#define TDR 0x80 /* tx data */ +#define RDR 0x80 /* rx data */ +#define TCR 0x82 /* tx control */ +#define TIR 0x84 /* tx idle */ +#define TPR 0x85 /* tx preamble */ +#define RCR 0x86 /* rx control */ +#define VCR 0x88 /* V.24 control */ +#define CCR 0x89 /* clock control */ +#define BDR 0x8a /* baud divisor */ +#define SCR 0x8c /* serial control */ +#define SSR 0x8e /* serial status */ +#define RDCSR 0x90 /* rx DMA control/status */ +#define TDCSR 0x94 /* tx DMA control/status */ +#define RDDAR 0x98 /* rx DMA descriptor address */ +#define TDDAR 0x9c /* tx DMA descriptor address */ + +#define RXIDLE BIT14 +#define RXBREAK BIT14 +#define IRQ_TXDATA BIT13 +#define IRQ_TXIDLE BIT12 +#define IRQ_TXUNDER BIT11 /* HDLC */ +#define IRQ_RXDATA BIT10 +#define IRQ_RXIDLE BIT9 /* HDLC */ +#define IRQ_RXBREAK BIT9 /* async */ +#define IRQ_RXOVER BIT8 +#define IRQ_DSR BIT7 +#define IRQ_CTS BIT6 +#define IRQ_DCD BIT5 +#define IRQ_RI BIT4 +#define IRQ_ALL 0x3ff0 +#define IRQ_MASTER BIT0 + +#define slgt_irq_on(info, mask) \ + wr_reg16((info), SCR, (unsigned short)(rd_reg16((info), SCR) | (mask))) +#define slgt_irq_off(info, mask) \ + wr_reg16((info), SCR, (unsigned short)(rd_reg16((info), SCR) & ~(mask))) + +static __u8 rd_reg8(struct slgt_info *info, unsigned int addr); +static void wr_reg8(struct slgt_info *info, unsigned int addr, __u8 value); +static __u16 rd_reg16(struct slgt_info *info, unsigned int addr); +static void wr_reg16(struct slgt_info *info, unsigned int addr, __u16 value); +static __u32 rd_reg32(struct slgt_info *info, unsigned int addr); +static void wr_reg32(struct slgt_info *info, unsigned int addr, __u32 value); + +static void msc_set_vcr(struct slgt_info *info); + +static int startup(struct slgt_info *info); +static int block_til_ready(struct tty_struct *tty, struct file * filp,struct slgt_info *info); +static void shutdown(struct slgt_info *info); +static void program_hw(struct slgt_info *info); +static void change_params(struct slgt_info *info); + +static int register_test(struct slgt_info *info); +static int irq_test(struct slgt_info *info); +static int loopback_test(struct slgt_info *info); +static int adapter_test(struct slgt_info *info); + +static void reset_adapter(struct slgt_info *info); +static void reset_port(struct slgt_info *info); +static void async_mode(struct slgt_info *info); +static void hdlc_mode(struct slgt_info *info); + +static void rx_stop(struct slgt_info *info); +static void rx_start(struct slgt_info *info); +static void reset_rbufs(struct slgt_info *info); +static void free_rbufs(struct slgt_info *info, unsigned int first, unsigned int last); +static void rdma_reset(struct slgt_info *info); +static int rx_get_frame(struct slgt_info *info); +static int rx_get_buf(struct slgt_info *info); + +static void tx_start(struct slgt_info *info); +static void tx_stop(struct slgt_info *info); +static void tx_set_idle(struct slgt_info *info); +static unsigned int free_tbuf_count(struct slgt_info *info); +static void reset_tbufs(struct slgt_info *info); +static void tdma_reset(struct slgt_info *info); +static void tx_load(struct slgt_info *info, const char *buf, unsigned int count); + +static void get_signals(struct slgt_info *info); +static void set_signals(struct slgt_info *info); +static void enable_loopback(struct slgt_info *info); +static void set_rate(struct slgt_info *info, u32 data_rate); + +static int bh_action(struct slgt_info *info); +static void bh_handler(void* context); +static void bh_transmit(struct slgt_info *info); +static void isr_serial(struct slgt_info *info); +static void isr_rdma(struct slgt_info *info); +static void isr_txeom(struct slgt_info *info, unsigned short status); +static void isr_tdma(struct slgt_info *info); +static irqreturn_t slgt_interrupt(int irq, void *dev_id, struct pt_regs * regs); + +static int alloc_dma_bufs(struct slgt_info *info); +static void free_dma_bufs(struct slgt_info *info); +static int alloc_desc(struct slgt_info *info); +static void free_desc(struct slgt_info *info); +static int alloc_bufs(struct slgt_info *info, struct slgt_desc *bufs, int count); +static void free_bufs(struct slgt_info *info, struct slgt_desc *bufs, int count); + +static int alloc_tmp_rbuf(struct slgt_info *info); +static void free_tmp_rbuf(struct slgt_info *info); + +static void tx_timeout(unsigned long context); +static void rx_timeout(unsigned long context); + +/* + * ioctl handlers + */ +static int get_stats(struct slgt_info *info, struct mgsl_icount __user *user_icount); +static int get_params(struct slgt_info *info, MGSL_PARAMS __user *params); +static int set_params(struct slgt_info *info, MGSL_PARAMS __user *params); +static int get_txidle(struct slgt_info *info, int __user *idle_mode); +static int set_txidle(struct slgt_info *info, int idle_mode); +static int tx_enable(struct slgt_info *info, int enable); +static int tx_abort(struct slgt_info *info); +static int rx_enable(struct slgt_info *info, int enable); +static int modem_input_wait(struct slgt_info *info,int arg); +static int wait_mgsl_event(struct slgt_info *info, int __user *mask_ptr); +static int tiocmget(struct tty_struct *tty, struct file *file); +static int tiocmset(struct tty_struct *tty, struct file *file, + unsigned int set, unsigned int clear); +static void set_break(struct tty_struct *tty, int break_state); +static int get_interface(struct slgt_info *info, int __user *if_mode); +static int set_interface(struct slgt_info *info, int if_mode); + +/* + * driver functions + */ +static void add_device(struct slgt_info *info); +static void device_init(int adapter_num, struct pci_dev *pdev); +static int claim_resources(struct slgt_info *info); +static void release_resources(struct slgt_info *info); + +/* + * DEBUG OUTPUT CODE + */ +#ifndef DBGINFO +#define DBGINFO(fmt) +#endif +#ifndef DBGERR +#define DBGERR(fmt) +#endif +#ifndef DBGBH +#define DBGBH(fmt) +#endif +#ifndef DBGISR +#define DBGISR(fmt) +#endif + +#ifdef DBGDATA +static void trace_block(struct slgt_info *info, const char *data, int count, const char *label) +{ + int i; + int linecount; + printk("%s %s data:\n",info->device_name, label); + while(count) { + linecount = (count > 16) ? 16 : count; + for(i=0; i < linecount; i++) + printk("%02X ",(unsigned char)data[i]); + for(;i<17;i++) + printk(" "); + for(i=0;i=040 && data[i]<=0176) + printk("%c",data[i]); + else + printk("."); + } + printk("\n"); + data += linecount; + count -= linecount; + } +} +#else +#define DBGDATA(info, buf, size, label) +#endif + +#ifdef DBGTBUF +static void dump_tbufs(struct slgt_info *info) +{ + int i; + printk("tbuf_current=%d\n", info->tbuf_current); + for (i=0 ; i < info->tbuf_count ; i++) { + printk("%d: count=%04X status=%04X\n", + i, le16_to_cpu(info->tbufs[i].count), le16_to_cpu(info->tbufs[i].status)); + } +} +#else +#define DBGTBUF(info) +#endif + +#ifdef DBGRBUF +static void dump_rbufs(struct slgt_info *info) +{ + int i; + printk("rbuf_current=%d\n", info->rbuf_current); + for (i=0 ; i < info->rbuf_count ; i++) { + printk("%d: count=%04X status=%04X\n", + i, le16_to_cpu(info->rbufs[i].count), le16_to_cpu(info->rbufs[i].status)); + } +} +#else +#define DBGRBUF(info) +#endif + +static inline int sanity_check(struct slgt_info *info, char *devname, const char *name) +{ +#ifdef SANITY_CHECK + if (!info) { + printk("null struct slgt_info for (%s) in %s\n", devname, name); + return 1; + } + if (info->magic != MGSL_MAGIC) { + printk("bad magic number struct slgt_info (%s) in %s\n", devname, name); + return 1; + } +#else + if (!info) + return 1; +#endif + return 0; +} + +/** + * line discipline callback wrappers + * + * The wrappers maintain line discipline references + * while calling into the line discipline. + * + * ldisc_receive_buf - pass receive data to line discipline + */ +static void ldisc_receive_buf(struct tty_struct *tty, + const __u8 *data, char *flags, int count) +{ + struct tty_ldisc *ld; + if (!tty) + return; + ld = tty_ldisc_ref(tty); + if (ld) { + if (ld->receive_buf) + ld->receive_buf(tty, data, flags, count); + tty_ldisc_deref(ld); + } +} + +/* tty callbacks */ + +static int open(struct tty_struct *tty, struct file *filp) +{ + struct slgt_info *info; + int retval, line; + unsigned long flags; + + line = tty->index; + if ((line < 0) || (line >= slgt_device_count)) { + DBGERR(("%s: open with invalid line #%d.\n", driver_name, line)); + return -ENODEV; + } + + info = slgt_device_list; + while(info && info->line != line) + info = info->next_device; + if (sanity_check(info, tty->name, "open")) + return -ENODEV; + if (info->init_error) { + DBGERR(("%s init error=%d\n", info->device_name, info->init_error)); + return -ENODEV; + } + + tty->driver_data = info; + info->tty = tty; + + DBGINFO(("%s open, old ref count = %d\n", info->device_name, info->count)); + + /* If port is closing, signal caller to try again */ + if (tty_hung_up_p(filp) || info->flags & ASYNC_CLOSING){ + if (info->flags & ASYNC_CLOSING) + interruptible_sleep_on(&info->close_wait); + retval = ((info->flags & ASYNC_HUP_NOTIFY) ? + -EAGAIN : -ERESTARTSYS); + goto cleanup; + } + + info->tty->low_latency = (info->flags & ASYNC_LOW_LATENCY) ? 1 : 0; + + spin_lock_irqsave(&info->netlock, flags); + if (info->netcount) { + retval = -EBUSY; + spin_unlock_irqrestore(&info->netlock, flags); + goto cleanup; + } + info->count++; + spin_unlock_irqrestore(&info->netlock, flags); + + if (info->count == 1) { + /* 1st open on this device, init hardware */ + retval = startup(info); + if (retval < 0) + goto cleanup; + } + + retval = block_til_ready(tty, filp, info); + if (retval) { + DBGINFO(("%s block_til_ready rc=%d\n", info->device_name, retval)); + goto cleanup; + } + + retval = 0; + +cleanup: + if (retval) { + if (tty->count == 1) + info->tty = NULL; /* tty layer will release tty struct */ + if(info->count) + info->count--; + } + + DBGINFO(("%s open rc=%d\n", info->device_name, retval)); + return retval; +} + +static void close(struct tty_struct *tty, struct file *filp) +{ + struct slgt_info *info = tty->driver_data; + + if (sanity_check(info, tty->name, "close")) + return; + DBGINFO(("%s close entry, count=%d\n", info->device_name, info->count)); + + if (!info->count) + return; + + if (tty_hung_up_p(filp)) + goto cleanup; + + if ((tty->count == 1) && (info->count != 1)) { + /* + * tty->count is 1 and the tty structure will be freed. + * info->count should be one in this case. + * if it's not, correct it so that the port is shutdown. + */ + DBGERR(("%s close: bad refcount; tty->count=1, " + "info->count=%d\n", info->device_name, info->count)); + info->count = 1; + } + + info->count--; + + /* if at least one open remaining, leave hardware active */ + if (info->count) + goto cleanup; + + info->flags |= ASYNC_CLOSING; + + /* set tty->closing to notify line discipline to + * only process XON/XOFF characters. Only the N_TTY + * discipline appears to use this (ppp does not). + */ + tty->closing = 1; + + /* wait for transmit data to clear all layers */ + + if (info->closing_wait != ASYNC_CLOSING_WAIT_NONE) { + DBGINFO(("%s call tty_wait_until_sent\n", info->device_name)); + tty_wait_until_sent(tty, info->closing_wait); + } + + if (info->flags & ASYNC_INITIALIZED) + wait_until_sent(tty, info->timeout); + if (tty->driver->flush_buffer) + tty->driver->flush_buffer(tty); + tty_ldisc_flush(tty); + + shutdown(info); + + tty->closing = 0; + info->tty = NULL; + + if (info->blocked_open) { + if (info->close_delay) { + msleep_interruptible(jiffies_to_msecs(info->close_delay)); + } + wake_up_interruptible(&info->open_wait); + } + + info->flags &= ~(ASYNC_NORMAL_ACTIVE|ASYNC_CLOSING); + + wake_up_interruptible(&info->close_wait); + +cleanup: + DBGINFO(("%s close exit, count=%d\n", tty->driver->name, info->count)); +} + +static void hangup(struct tty_struct *tty) +{ + struct slgt_info *info = tty->driver_data; + + if (sanity_check(info, tty->name, "hangup")) + return; + DBGINFO(("%s hangup\n", info->device_name)); + + flush_buffer(tty); + shutdown(info); + + info->count = 0; + info->flags &= ~ASYNC_NORMAL_ACTIVE; + info->tty = NULL; + + wake_up_interruptible(&info->open_wait); +} + +static void set_termios(struct tty_struct *tty, struct termios *old_termios) +{ + struct slgt_info *info = tty->driver_data; + unsigned long flags; + + DBGINFO(("%s set_termios\n", tty->driver->name)); + + /* just return if nothing has changed */ + if ((tty->termios->c_cflag == old_termios->c_cflag) + && (RELEVANT_IFLAG(tty->termios->c_iflag) + == RELEVANT_IFLAG(old_termios->c_iflag))) + return; + + change_params(info); + + /* Handle transition to B0 status */ + if (old_termios->c_cflag & CBAUD && + !(tty->termios->c_cflag & CBAUD)) { + info->signals &= ~(SerialSignal_RTS + SerialSignal_DTR); + spin_lock_irqsave(&info->lock,flags); + set_signals(info); + spin_unlock_irqrestore(&info->lock,flags); + } + + /* Handle transition away from B0 status */ + if (!(old_termios->c_cflag & CBAUD) && + tty->termios->c_cflag & CBAUD) { + info->signals |= SerialSignal_DTR; + if (!(tty->termios->c_cflag & CRTSCTS) || + !test_bit(TTY_THROTTLED, &tty->flags)) { + info->signals |= SerialSignal_RTS; + } + spin_lock_irqsave(&info->lock,flags); + set_signals(info); + spin_unlock_irqrestore(&info->lock,flags); + } + + /* Handle turning off CRTSCTS */ + if (old_termios->c_cflag & CRTSCTS && + !(tty->termios->c_cflag & CRTSCTS)) { + tty->hw_stopped = 0; + tx_release(tty); + } +} + +static int write(struct tty_struct *tty, + const unsigned char *buf, int count) +{ + int ret = 0; + struct slgt_info *info = tty->driver_data; + unsigned long flags; + + if (sanity_check(info, tty->name, "write")) + goto cleanup; + DBGINFO(("%s write count=%d\n", info->device_name, count)); + + if (!tty || !info->tx_buf) + goto cleanup; + + if (count > info->max_frame_size) { + ret = -EIO; + goto cleanup; + } + + if (!count) + goto cleanup; + + if (info->params.mode == MGSL_MODE_RAW) { + unsigned int bufs_needed = (count/DMABUFSIZE); + unsigned int bufs_free = free_tbuf_count(info); + if (count % DMABUFSIZE) + ++bufs_needed; + if (bufs_needed > bufs_free) + goto cleanup; + } else { + if (info->tx_active) + goto cleanup; + if (info->tx_count) { + /* send accumulated data from send_char() calls */ + /* as frame and wait before accepting more data. */ + tx_load(info, info->tx_buf, info->tx_count); + goto start; + } + } + + ret = info->tx_count = count; + tx_load(info, buf, count); + goto start; + +start: + if (info->tx_count && !tty->stopped && !tty->hw_stopped) { + spin_lock_irqsave(&info->lock,flags); + if (!info->tx_active) + tx_start(info); + spin_unlock_irqrestore(&info->lock,flags); + } + +cleanup: + DBGINFO(("%s write rc=%d\n", info->device_name, ret)); + return ret; +} + +static void put_char(struct tty_struct *tty, unsigned char ch) +{ + struct slgt_info *info = tty->driver_data; + unsigned long flags; + + if (sanity_check(info, tty->name, "put_char")) + return; + DBGINFO(("%s put_char(%d)\n", info->device_name, ch)); + if (!tty || !info->tx_buf) + return; + spin_lock_irqsave(&info->lock,flags); + if (!info->tx_active && (info->tx_count < info->max_frame_size)) + info->tx_buf[info->tx_count++] = ch; + spin_unlock_irqrestore(&info->lock,flags); +} + +static void send_xchar(struct tty_struct *tty, char ch) +{ + struct slgt_info *info = tty->driver_data; + unsigned long flags; + + if (sanity_check(info, tty->name, "send_xchar")) + return; + DBGINFO(("%s send_xchar(%d)\n", info->device_name, ch)); + info->x_char = ch; + if (ch) { + spin_lock_irqsave(&info->lock,flags); + if (!info->tx_enabled) + tx_start(info); + spin_unlock_irqrestore(&info->lock,flags); + } +} + +static void wait_until_sent(struct tty_struct *tty, int timeout) +{ + struct slgt_info *info = tty->driver_data; + unsigned long orig_jiffies, char_time; + + if (!info ) + return; + if (sanity_check(info, tty->name, "wait_until_sent")) + return; + DBGINFO(("%s wait_until_sent entry\n", info->device_name)); + if (!(info->flags & ASYNC_INITIALIZED)) + goto exit; + + orig_jiffies = jiffies; + + /* Set check interval to 1/5 of estimated time to + * send a character, and make it at least 1. The check + * interval should also be less than the timeout. + * Note: use tight timings here to satisfy the NIST-PCTS. + */ + + if (info->params.data_rate) { + char_time = info->timeout/(32 * 5); + if (!char_time) + char_time++; + } else + char_time = 1; + + if (timeout) + char_time = min_t(unsigned long, char_time, timeout); + + while (info->tx_active) { + msleep_interruptible(jiffies_to_msecs(char_time)); + if (signal_pending(current)) + break; + if (timeout && time_after(jiffies, orig_jiffies + timeout)) + break; + } + +exit: + DBGINFO(("%s wait_until_sent exit\n", info->device_name)); +} + +static int write_room(struct tty_struct *tty) +{ + struct slgt_info *info = tty->driver_data; + int ret; + + if (sanity_check(info, tty->name, "write_room")) + return 0; + ret = (info->tx_active) ? 0 : HDLC_MAX_FRAME_SIZE; + DBGINFO(("%s write_room=%d\n", info->device_name, ret)); + return ret; +} + +static void flush_chars(struct tty_struct *tty) +{ + struct slgt_info *info = tty->driver_data; + unsigned long flags; + + if (sanity_check(info, tty->name, "flush_chars")) + return; + DBGINFO(("%s flush_chars entry tx_count=%d\n", info->device_name, info->tx_count)); + + if (info->tx_count <= 0 || tty->stopped || + tty->hw_stopped || !info->tx_buf) + return; + + DBGINFO(("%s flush_chars start transmit\n", info->device_name)); + + spin_lock_irqsave(&info->lock,flags); + if (!info->tx_active && info->tx_count) { + tx_load(info, info->tx_buf,info->tx_count); + tx_start(info); + } + spin_unlock_irqrestore(&info->lock,flags); +} + +static void flush_buffer(struct tty_struct *tty) +{ + struct slgt_info *info = tty->driver_data; + unsigned long flags; + + if (sanity_check(info, tty->name, "flush_buffer")) + return; + DBGINFO(("%s flush_buffer\n", info->device_name)); + + spin_lock_irqsave(&info->lock,flags); + if (!info->tx_active) + info->tx_count = 0; + spin_unlock_irqrestore(&info->lock,flags); + + wake_up_interruptible(&tty->write_wait); + tty_wakeup(tty); +} + +/* + * throttle (stop) transmitter + */ +static void tx_hold(struct tty_struct *tty) +{ + struct slgt_info *info = tty->driver_data; + unsigned long flags; + + if (sanity_check(info, tty->name, "tx_hold")) + return; + DBGINFO(("%s tx_hold\n", info->device_name)); + spin_lock_irqsave(&info->lock,flags); + if (info->tx_enabled && info->params.mode == MGSL_MODE_ASYNC) + tx_stop(info); + spin_unlock_irqrestore(&info->lock,flags); +} + +/* + * release (start) transmitter + */ +static void tx_release(struct tty_struct *tty) +{ + struct slgt_info *info = tty->driver_data; + unsigned long flags; + + if (sanity_check(info, tty->name, "tx_release")) + return; + DBGINFO(("%s tx_release\n", info->device_name)); + spin_lock_irqsave(&info->lock,flags); + if (!info->tx_active && info->tx_count) { + tx_load(info, info->tx_buf, info->tx_count); + tx_start(info); + } + spin_unlock_irqrestore(&info->lock,flags); +} + +/* + * Service an IOCTL request + * + * Arguments + * + * tty pointer to tty instance data + * file pointer to associated file object for device + * cmd IOCTL command code + * arg command argument/context + * + * Return 0 if success, otherwise error code + */ +static int ioctl(struct tty_struct *tty, struct file *file, + unsigned int cmd, unsigned long arg) +{ + struct slgt_info *info = tty->driver_data; + struct mgsl_icount cnow; /* kernel counter temps */ + struct serial_icounter_struct __user *p_cuser; /* user space */ + unsigned long flags; + void __user *argp = (void __user *)arg; + + if (sanity_check(info, tty->name, "ioctl")) + return -ENODEV; + DBGINFO(("%s ioctl() cmd=%08X\n", info->device_name, cmd)); + + if ((cmd != TIOCGSERIAL) && (cmd != TIOCSSERIAL) && + (cmd != TIOCMIWAIT) && (cmd != TIOCGICOUNT)) { + if (tty->flags & (1 << TTY_IO_ERROR)) + return -EIO; + } + + switch (cmd) { + case MGSL_IOCGPARAMS: + return get_params(info, argp); + case MGSL_IOCSPARAMS: + return set_params(info, argp); + case MGSL_IOCGTXIDLE: + return get_txidle(info, argp); + case MGSL_IOCSTXIDLE: + return set_txidle(info, (int)arg); + case MGSL_IOCTXENABLE: + return tx_enable(info, (int)arg); + case MGSL_IOCRXENABLE: + return rx_enable(info, (int)arg); + case MGSL_IOCTXABORT: + return tx_abort(info); + case MGSL_IOCGSTATS: + return get_stats(info, argp); + case MGSL_IOCWAITEVENT: + return wait_mgsl_event(info, argp); + case TIOCMIWAIT: + return modem_input_wait(info,(int)arg); + case MGSL_IOCGIF: + return get_interface(info, argp); + case MGSL_IOCSIF: + return set_interface(info,(int)arg); + case TIOCGICOUNT: + spin_lock_irqsave(&info->lock,flags); + cnow = info->icount; + spin_unlock_irqrestore(&info->lock,flags); + p_cuser = argp; + if (put_user(cnow.cts, &p_cuser->cts) || + put_user(cnow.dsr, &p_cuser->dsr) || + put_user(cnow.rng, &p_cuser->rng) || + put_user(cnow.dcd, &p_cuser->dcd) || + put_user(cnow.rx, &p_cuser->rx) || + put_user(cnow.tx, &p_cuser->tx) || + put_user(cnow.frame, &p_cuser->frame) || + put_user(cnow.overrun, &p_cuser->overrun) || + put_user(cnow.parity, &p_cuser->parity) || + put_user(cnow.brk, &p_cuser->brk) || + put_user(cnow.buf_overrun, &p_cuser->buf_overrun)) + return -EFAULT; + return 0; + default: + return -ENOIOCTLCMD; + } + return 0; +} + +/* + * proc fs support + */ +static inline int line_info(char *buf, struct slgt_info *info) +{ + char stat_buf[30]; + int ret; + unsigned long flags; + + ret = sprintf(buf, "%s: IO=%08X IRQ=%d MaxFrameSize=%u\n", + info->device_name, info->phys_reg_addr, + info->irq_level, info->max_frame_size); + + /* output current serial signal states */ + spin_lock_irqsave(&info->lock,flags); + get_signals(info); + spin_unlock_irqrestore(&info->lock,flags); + + stat_buf[0] = 0; + stat_buf[1] = 0; + if (info->signals & SerialSignal_RTS) + strcat(stat_buf, "|RTS"); + if (info->signals & SerialSignal_CTS) + strcat(stat_buf, "|CTS"); + if (info->signals & SerialSignal_DTR) + strcat(stat_buf, "|DTR"); + if (info->signals & SerialSignal_DSR) + strcat(stat_buf, "|DSR"); + if (info->signals & SerialSignal_DCD) + strcat(stat_buf, "|CD"); + if (info->signals & SerialSignal_RI) + strcat(stat_buf, "|RI"); + + if (info->params.mode != MGSL_MODE_ASYNC) { + ret += sprintf(buf+ret, "\tHDLC txok:%d rxok:%d", + info->icount.txok, info->icount.rxok); + if (info->icount.txunder) + ret += sprintf(buf+ret, " txunder:%d", info->icount.txunder); + if (info->icount.txabort) + ret += sprintf(buf+ret, " txabort:%d", info->icount.txabort); + if (info->icount.rxshort) + ret += sprintf(buf+ret, " rxshort:%d", info->icount.rxshort); + if (info->icount.rxlong) + ret += sprintf(buf+ret, " rxlong:%d", info->icount.rxlong); + if (info->icount.rxover) + ret += sprintf(buf+ret, " rxover:%d", info->icount.rxover); + if (info->icount.rxcrc) + ret += sprintf(buf+ret, " rxcrc:%d", info->icount.rxcrc); + } else { + ret += sprintf(buf+ret, "\tASYNC tx:%d rx:%d", + info->icount.tx, info->icount.rx); + if (info->icount.frame) + ret += sprintf(buf+ret, " fe:%d", info->icount.frame); + if (info->icount.parity) + ret += sprintf(buf+ret, " pe:%d", info->icount.parity); + if (info->icount.brk) + ret += sprintf(buf+ret, " brk:%d", info->icount.brk); + if (info->icount.overrun) + ret += sprintf(buf+ret, " oe:%d", info->icount.overrun); + } + + /* Append serial signal status to end */ + ret += sprintf(buf+ret, " %s\n", stat_buf+1); + + ret += sprintf(buf+ret, "\ttxactive=%d bh_req=%d bh_run=%d pending_bh=%x\n", + info->tx_active,info->bh_requested,info->bh_running, + info->pending_bh); + + return ret; +} + +/* Called to print information about devices + */ +static int read_proc(char *page, char **start, off_t off, int count, + int *eof, void *data) +{ + int len = 0, l; + off_t begin = 0; + struct slgt_info *info; + + len += sprintf(page, "synclink_gt driver:%s\n", driver_version); + + info = slgt_device_list; + while( info ) { + l = line_info(page + len, info); + len += l; + if (len+begin > off+count) + goto done; + if (len+begin < off) { + begin += len; + len = 0; + } + info = info->next_device; + } + + *eof = 1; +done: + if (off >= len+begin) + return 0; + *start = page + (off-begin); + return ((count < begin+len-off) ? count : begin+len-off); +} + +/* + * return count of bytes in transmit buffer + */ +static int chars_in_buffer(struct tty_struct *tty) +{ + struct slgt_info *info = tty->driver_data; + if (sanity_check(info, tty->name, "chars_in_buffer")) + return 0; + DBGINFO(("%s chars_in_buffer()=%d\n", info->device_name, info->tx_count)); + return info->tx_count; +} + +/* + * signal remote device to throttle send data (our receive data) + */ +static void throttle(struct tty_struct * tty) +{ + struct slgt_info *info = tty->driver_data; + unsigned long flags; + + if (sanity_check(info, tty->name, "throttle")) + return; + DBGINFO(("%s throttle\n", info->device_name)); + if (I_IXOFF(tty)) + send_xchar(tty, STOP_CHAR(tty)); + if (tty->termios->c_cflag & CRTSCTS) { + spin_lock_irqsave(&info->lock,flags); + info->signals &= ~SerialSignal_RTS; + set_signals(info); + spin_unlock_irqrestore(&info->lock,flags); + } +} + +/* + * signal remote device to stop throttling send data (our receive data) + */ +static void unthrottle(struct tty_struct * tty) +{ + struct slgt_info *info = tty->driver_data; + unsigned long flags; + + if (sanity_check(info, tty->name, "unthrottle")) + return; + DBGINFO(("%s unthrottle\n", info->device_name)); + if (I_IXOFF(tty)) { + if (info->x_char) + info->x_char = 0; + else + send_xchar(tty, START_CHAR(tty)); + } + if (tty->termios->c_cflag & CRTSCTS) { + spin_lock_irqsave(&info->lock,flags); + info->signals |= SerialSignal_RTS; + set_signals(info); + spin_unlock_irqrestore(&info->lock,flags); + } +} + +/* + * set or clear transmit break condition + * break_state -1=set break condition, 0=clear + */ +static void set_break(struct tty_struct *tty, int break_state) +{ + struct slgt_info *info = tty->driver_data; + unsigned short value; + unsigned long flags; + + if (sanity_check(info, tty->name, "set_break")) + return; + DBGINFO(("%s set_break(%d)\n", info->device_name, break_state)); + + spin_lock_irqsave(&info->lock,flags); + value = rd_reg16(info, TCR); + if (break_state == -1) + value |= BIT6; + else + value &= ~BIT6; + wr_reg16(info, TCR, value); + spin_unlock_irqrestore(&info->lock,flags); +} + +#ifdef CONFIG_HDLC + +/** + * called by generic HDLC layer when protocol selected (PPP, frame relay, etc.) + * set encoding and frame check sequence (FCS) options + * + * dev pointer to network device structure + * encoding serial encoding setting + * parity FCS setting + * + * returns 0 if success, otherwise error code + */ +static int hdlcdev_attach(struct net_device *dev, unsigned short encoding, + unsigned short parity) +{ + struct slgt_info *info = dev_to_port(dev); + unsigned char new_encoding; + unsigned short new_crctype; + + /* return error if TTY interface open */ + if (info->count) + return -EBUSY; + + DBGINFO(("%s hdlcdev_attach\n", info->device_name)); + + switch (encoding) + { + case ENCODING_NRZ: new_encoding = HDLC_ENCODING_NRZ; break; + case ENCODING_NRZI: new_encoding = HDLC_ENCODING_NRZI_SPACE; break; + case ENCODING_FM_MARK: new_encoding = HDLC_ENCODING_BIPHASE_MARK; break; + case ENCODING_FM_SPACE: new_encoding = HDLC_ENCODING_BIPHASE_SPACE; break; + case ENCODING_MANCHESTER: new_encoding = HDLC_ENCODING_BIPHASE_LEVEL; break; + default: return -EINVAL; + } + + switch (parity) + { + case PARITY_NONE: new_crctype = HDLC_CRC_NONE; break; + case PARITY_CRC16_PR1_CCITT: new_crctype = HDLC_CRC_16_CCITT; break; + case PARITY_CRC32_PR1_CCITT: new_crctype = HDLC_CRC_32_CCITT; break; + default: return -EINVAL; + } + + info->params.encoding = new_encoding; + info->params.crc_type = new_crctype;; + + /* if network interface up, reprogram hardware */ + if (info->netcount) + program_hw(info); + + return 0; +} + +/** + * called by generic HDLC layer to send frame + * + * skb socket buffer containing HDLC frame + * dev pointer to network device structure + * + * returns 0 if success, otherwise error code + */ +static int hdlcdev_xmit(struct sk_buff *skb, struct net_device *dev) +{ + struct slgt_info *info = dev_to_port(dev); + struct net_device_stats *stats = hdlc_stats(dev); + unsigned long flags; + + DBGINFO(("%s hdlc_xmit\n", dev->name)); + + /* stop sending until this frame completes */ + netif_stop_queue(dev); + + /* copy data to device buffers */ + info->tx_count = skb->len; + tx_load(info, skb->data, skb->len); + + /* update network statistics */ + stats->tx_packets++; + stats->tx_bytes += skb->len; + + /* done with socket buffer, so free it */ + dev_kfree_skb(skb); + + /* save start time for transmit timeout detection */ + dev->trans_start = jiffies; + + /* start hardware transmitter if necessary */ + spin_lock_irqsave(&info->lock,flags); + if (!info->tx_active) + tx_start(info); + spin_unlock_irqrestore(&info->lock,flags); + + return 0; +} + +/** + * called by network layer when interface enabled + * claim resources and initialize hardware + * + * dev pointer to network device structure + * + * returns 0 if success, otherwise error code + */ +static int hdlcdev_open(struct net_device *dev) +{ + struct slgt_info *info = dev_to_port(dev); + int rc; + unsigned long flags; + + DBGINFO(("%s hdlcdev_open\n", dev->name)); + + /* generic HDLC layer open processing */ + if ((rc = hdlc_open(dev))) + return rc; + + /* arbitrate between network and tty opens */ + spin_lock_irqsave(&info->netlock, flags); + if (info->count != 0 || info->netcount != 0) { + DBGINFO(("%s hdlc_open busy\n", dev->name)); + spin_unlock_irqrestore(&info->netlock, flags); + return -EBUSY; + } + info->netcount=1; + spin_unlock_irqrestore(&info->netlock, flags); + + /* claim resources and init adapter */ + if ((rc = startup(info)) != 0) { + spin_lock_irqsave(&info->netlock, flags); + info->netcount=0; + spin_unlock_irqrestore(&info->netlock, flags); + return rc; + } + + /* assert DTR and RTS, apply hardware settings */ + info->signals |= SerialSignal_RTS + SerialSignal_DTR; + program_hw(info); + + /* enable network layer transmit */ + dev->trans_start = jiffies; + netif_start_queue(dev); + + /* inform generic HDLC layer of current DCD status */ + spin_lock_irqsave(&info->lock, flags); + get_signals(info); + spin_unlock_irqrestore(&info->lock, flags); + hdlc_set_carrier(info->signals & SerialSignal_DCD, dev); + + return 0; +} + +/** + * called by network layer when interface is disabled + * shutdown hardware and release resources + * + * dev pointer to network device structure + * + * returns 0 if success, otherwise error code + */ +static int hdlcdev_close(struct net_device *dev) +{ + struct slgt_info *info = dev_to_port(dev); + unsigned long flags; + + DBGINFO(("%s hdlcdev_close\n", dev->name)); + + netif_stop_queue(dev); + + /* shutdown adapter and release resources */ + shutdown(info); + + hdlc_close(dev); + + spin_lock_irqsave(&info->netlock, flags); + info->netcount=0; + spin_unlock_irqrestore(&info->netlock, flags); + + return 0; +} + +/** + * called by network layer to process IOCTL call to network device + * + * dev pointer to network device structure + * ifr pointer to network interface request structure + * cmd IOCTL command code + * + * returns 0 if success, otherwise error code + */ +static int hdlcdev_ioctl(struct net_device *dev, struct ifreq *ifr, int cmd) +{ + const size_t size = sizeof(sync_serial_settings); + sync_serial_settings new_line; + sync_serial_settings __user *line = ifr->ifr_settings.ifs_ifsu.sync; + struct slgt_info *info = dev_to_port(dev); + unsigned int flags; + + DBGINFO(("%s hdlcdev_ioctl\n", dev->name)); + + /* return error if TTY interface open */ + if (info->count) + return -EBUSY; + + if (cmd != SIOCWANDEV) + return hdlc_ioctl(dev, ifr, cmd); + + switch(ifr->ifr_settings.type) { + case IF_GET_IFACE: /* return current sync_serial_settings */ + + ifr->ifr_settings.type = IF_IFACE_SYNC_SERIAL; + if (ifr->ifr_settings.size < size) { + ifr->ifr_settings.size = size; /* data size wanted */ + return -ENOBUFS; + } + + flags = info->params.flags & (HDLC_FLAG_RXC_RXCPIN | HDLC_FLAG_RXC_DPLL | + HDLC_FLAG_RXC_BRG | HDLC_FLAG_RXC_TXCPIN | + HDLC_FLAG_TXC_TXCPIN | HDLC_FLAG_TXC_DPLL | + HDLC_FLAG_TXC_BRG | HDLC_FLAG_TXC_RXCPIN); + + switch (flags){ + case (HDLC_FLAG_RXC_RXCPIN | HDLC_FLAG_TXC_TXCPIN): new_line.clock_type = CLOCK_EXT; break; + case (HDLC_FLAG_RXC_BRG | HDLC_FLAG_TXC_BRG): new_line.clock_type = CLOCK_INT; break; + case (HDLC_FLAG_RXC_RXCPIN | HDLC_FLAG_TXC_BRG): new_line.clock_type = CLOCK_TXINT; break; + case (HDLC_FLAG_RXC_RXCPIN | HDLC_FLAG_TXC_RXCPIN): new_line.clock_type = CLOCK_TXFROMRX; break; + default: new_line.clock_type = CLOCK_DEFAULT; + } + + new_line.clock_rate = info->params.clock_speed; + new_line.loopback = info->params.loopback ? 1:0; + + if (copy_to_user(line, &new_line, size)) + return -EFAULT; + return 0; + + case IF_IFACE_SYNC_SERIAL: /* set sync_serial_settings */ + + if(!capable(CAP_NET_ADMIN)) + return -EPERM; + if (copy_from_user(&new_line, line, size)) + return -EFAULT; + + switch (new_line.clock_type) + { + case CLOCK_EXT: flags = HDLC_FLAG_RXC_RXCPIN | HDLC_FLAG_TXC_TXCPIN; break; + case CLOCK_TXFROMRX: flags = HDLC_FLAG_RXC_RXCPIN | HDLC_FLAG_TXC_RXCPIN; break; + case CLOCK_INT: flags = HDLC_FLAG_RXC_BRG | HDLC_FLAG_TXC_BRG; break; + case CLOCK_TXINT: flags = HDLC_FLAG_RXC_RXCPIN | HDLC_FLAG_TXC_BRG; break; + case CLOCK_DEFAULT: flags = info->params.flags & + (HDLC_FLAG_RXC_RXCPIN | HDLC_FLAG_RXC_DPLL | + HDLC_FLAG_RXC_BRG | HDLC_FLAG_RXC_TXCPIN | + HDLC_FLAG_TXC_TXCPIN | HDLC_FLAG_TXC_DPLL | + HDLC_FLAG_TXC_BRG | HDLC_FLAG_TXC_RXCPIN); break; + default: return -EINVAL; + } + + if (new_line.loopback != 0 && new_line.loopback != 1) + return -EINVAL; + + info->params.flags &= ~(HDLC_FLAG_RXC_RXCPIN | HDLC_FLAG_RXC_DPLL | + HDLC_FLAG_RXC_BRG | HDLC_FLAG_RXC_TXCPIN | + HDLC_FLAG_TXC_TXCPIN | HDLC_FLAG_TXC_DPLL | + HDLC_FLAG_TXC_BRG | HDLC_FLAG_TXC_RXCPIN); + info->params.flags |= flags; + + info->params.loopback = new_line.loopback; + + if (flags & (HDLC_FLAG_RXC_BRG | HDLC_FLAG_TXC_BRG)) + info->params.clock_speed = new_line.clock_rate; + else + info->params.clock_speed = 0; + + /* if network interface up, reprogram hardware */ + if (info->netcount) + program_hw(info); + return 0; + + default: + return hdlc_ioctl(dev, ifr, cmd); + } +} + +/** + * called by network layer when transmit timeout is detected + * + * dev pointer to network device structure + */ +static void hdlcdev_tx_timeout(struct net_device *dev) +{ + struct slgt_info *info = dev_to_port(dev); + struct net_device_stats *stats = hdlc_stats(dev); + unsigned long flags; + + DBGINFO(("%s hdlcdev_tx_timeout\n", dev->name)); + + stats->tx_errors++; + stats->tx_aborted_errors++; + + spin_lock_irqsave(&info->lock,flags); + tx_stop(info); + spin_unlock_irqrestore(&info->lock,flags); + + netif_wake_queue(dev); +} + +/** + * called by device driver when transmit completes + * reenable network layer transmit if stopped + * + * info pointer to device instance information + */ +static void hdlcdev_tx_done(struct slgt_info *info) +{ + if (netif_queue_stopped(info->netdev)) + netif_wake_queue(info->netdev); +} + +/** + * called by device driver when frame received + * pass frame to network layer + * + * info pointer to device instance information + * buf pointer to buffer contianing frame data + * size count of data bytes in buf + */ +static void hdlcdev_rx(struct slgt_info *info, char *buf, int size) +{ + struct sk_buff *skb = dev_alloc_skb(size); + struct net_device *dev = info->netdev; + struct net_device_stats *stats = hdlc_stats(dev); + + DBGINFO(("%s hdlcdev_rx\n", dev->name)); + + if (skb == NULL) { + DBGERR(("%s: can't alloc skb, drop packet\n", dev->name)); + stats->rx_dropped++; + return; + } + + memcpy(skb_put(skb, size),buf,size); + + skb->protocol = hdlc_type_trans(skb, info->netdev); + + stats->rx_packets++; + stats->rx_bytes += size; + + netif_rx(skb); + + info->netdev->last_rx = jiffies; +} + +/** + * called by device driver when adding device instance + * do generic HDLC initialization + * + * info pointer to device instance information + * + * returns 0 if success, otherwise error code + */ +static int hdlcdev_init(struct slgt_info *info) +{ + int rc; + struct net_device *dev; + hdlc_device *hdlc; + + /* allocate and initialize network and HDLC layer objects */ + + if (!(dev = alloc_hdlcdev(info))) { + printk(KERN_ERR "%s hdlc device alloc failure\n", info->device_name); + return -ENOMEM; + } + + /* for network layer reporting purposes only */ + dev->mem_start = info->phys_reg_addr; + dev->mem_end = info->phys_reg_addr + SLGT_REG_SIZE - 1; + dev->irq = info->irq_level; + + /* network layer callbacks and settings */ + dev->do_ioctl = hdlcdev_ioctl; + dev->open = hdlcdev_open; + dev->stop = hdlcdev_close; + dev->tx_timeout = hdlcdev_tx_timeout; + dev->watchdog_timeo = 10*HZ; + dev->tx_queue_len = 50; + + /* generic HDLC layer callbacks and settings */ + hdlc = dev_to_hdlc(dev); + hdlc->attach = hdlcdev_attach; + hdlc->xmit = hdlcdev_xmit; + + /* register objects with HDLC layer */ + if ((rc = register_hdlc_device(dev))) { + printk(KERN_WARNING "%s:unable to register hdlc device\n",__FILE__); + free_netdev(dev); + return rc; + } + + info->netdev = dev; + return 0; +} + +/** + * called by device driver when removing device instance + * do generic HDLC cleanup + * + * info pointer to device instance information + */ +static void hdlcdev_exit(struct slgt_info *info) +{ + unregister_hdlc_device(info->netdev); + free_netdev(info->netdev); + info->netdev = NULL; +} + +#endif /* ifdef CONFIG_HDLC */ + +/* + * get async data from rx DMA buffers + */ +static void rx_async(struct slgt_info *info) +{ + struct tty_struct *tty = info->tty; + struct mgsl_icount *icount = &info->icount; + unsigned int start, end; + unsigned char *p; + unsigned char status; + struct slgt_desc *bufs = info->rbufs; + int i, count; + + start = end = info->rbuf_current; + + while(desc_complete(bufs[end])) { + count = desc_count(bufs[end]) - info->rbuf_index; + p = bufs[end].buf + info->rbuf_index; + + DBGISR(("%s rx_async count=%d\n", info->device_name, count)); + DBGDATA(info, p, count, "rx"); + + for(i=0 ; i < count; i+=2, p+=2) { + if (tty) { + if (tty->flip.count >= TTY_FLIPBUF_SIZE) + tty_flip_buffer_push(tty); + if (tty->flip.count >= TTY_FLIPBUF_SIZE) + break; + *tty->flip.char_buf_ptr = *p; + *tty->flip.flag_buf_ptr = 0; + } + icount->rx++; + + if ((status = *(p+1) & (BIT9 + BIT8))) { + if (status & BIT9) + icount->parity++; + else if (status & BIT8) + icount->frame++; + /* discard char if tty control flags say so */ + if (status & info->ignore_status_mask) + continue; + if (tty) { + if (status & BIT9) + *tty->flip.flag_buf_ptr = TTY_PARITY; + else if (status & BIT8) + *tty->flip.flag_buf_ptr = TTY_FRAME; + } + } + if (tty) { + tty->flip.flag_buf_ptr++; + tty->flip.char_buf_ptr++; + tty->flip.count++; + } + } + + if (i < count) { + /* receive buffer not completed */ + info->rbuf_index += i; + info->rx_timer.expires = jiffies + 1; + add_timer(&info->rx_timer); + break; + } + + info->rbuf_index = 0; + free_rbufs(info, end, end); + + if (++end == info->rbuf_count) + end = 0; + + /* if entire list searched then no frame available */ + if (end == start) + break; + } + + if (tty && tty->flip.count) + tty_flip_buffer_push(tty); +} + +/* + * return next bottom half action to perform + */ +static int bh_action(struct slgt_info *info) +{ + unsigned long flags; + int rc; + + spin_lock_irqsave(&info->lock,flags); + + if (info->pending_bh & BH_RECEIVE) { + info->pending_bh &= ~BH_RECEIVE; + rc = BH_RECEIVE; + } else if (info->pending_bh & BH_TRANSMIT) { + info->pending_bh &= ~BH_TRANSMIT; + rc = BH_TRANSMIT; + } else if (info->pending_bh & BH_STATUS) { + info->pending_bh &= ~BH_STATUS; + rc = BH_STATUS; + } else { + /* Mark BH routine as complete */ + info->bh_running = 0; + info->bh_requested = 0; + rc = 0; + } + + spin_unlock_irqrestore(&info->lock,flags); + + return rc; +} + +/* + * perform bottom half processing + */ +static void bh_handler(void* context) +{ + struct slgt_info *info = context; + int action; + + if (!info) + return; + info->bh_running = 1; + + while((action = bh_action(info))) { + switch (action) { + case BH_RECEIVE: + DBGBH(("%s bh receive\n", info->device_name)); + switch(info->params.mode) { + case MGSL_MODE_ASYNC: + rx_async(info); + break; + case MGSL_MODE_HDLC: + while(rx_get_frame(info)); + break; + case MGSL_MODE_RAW: + while(rx_get_buf(info)); + break; + } + /* restart receiver if rx DMA buffers exhausted */ + if (info->rx_restart) + rx_start(info); + break; + case BH_TRANSMIT: + bh_transmit(info); + break; + case BH_STATUS: + DBGBH(("%s bh status\n", info->device_name)); + info->ri_chkcount = 0; + info->dsr_chkcount = 0; + info->dcd_chkcount = 0; + info->cts_chkcount = 0; + break; + default: + DBGBH(("%s unknown action\n", info->device_name)); + break; + } + } + DBGBH(("%s bh_handler exit\n", info->device_name)); +} + +static void bh_transmit(struct slgt_info *info) +{ + struct tty_struct *tty = info->tty; + + DBGBH(("%s bh_transmit\n", info->device_name)); + if (tty) { + tty_wakeup(tty); + wake_up_interruptible(&tty->write_wait); + } +} + +static void dsr_change(struct slgt_info *info) +{ + get_signals(info); + DBGISR(("dsr_change %s signals=%04X\n", info->device_name, info->signals)); + if ((info->dsr_chkcount)++ == IO_PIN_SHUTDOWN_LIMIT) { + slgt_irq_off(info, IRQ_DSR); + return; + } + info->icount.dsr++; + if (info->signals & SerialSignal_DSR) + info->input_signal_events.dsr_up++; + else + info->input_signal_events.dsr_down++; + wake_up_interruptible(&info->status_event_wait_q); + wake_up_interruptible(&info->event_wait_q); + info->pending_bh |= BH_STATUS; +} + +static void cts_change(struct slgt_info *info) +{ + get_signals(info); + DBGISR(("cts_change %s signals=%04X\n", info->device_name, info->signals)); + if ((info->cts_chkcount)++ == IO_PIN_SHUTDOWN_LIMIT) { + slgt_irq_off(info, IRQ_CTS); + return; + } + info->icount.cts++; + if (info->signals & SerialSignal_CTS) + info->input_signal_events.cts_up++; + else + info->input_signal_events.cts_down++; + wake_up_interruptible(&info->status_event_wait_q); + wake_up_interruptible(&info->event_wait_q); + info->pending_bh |= BH_STATUS; + + if (info->flags & ASYNC_CTS_FLOW) { + if (info->tty) { + if (info->tty->hw_stopped) { + if (info->signals & SerialSignal_CTS) { + info->tty->hw_stopped = 0; + info->pending_bh |= BH_TRANSMIT; + return; + } + } else { + if (!(info->signals & SerialSignal_CTS)) + info->tty->hw_stopped = 1; + } + } + } +} + +static void dcd_change(struct slgt_info *info) +{ + get_signals(info); + DBGISR(("dcd_change %s signals=%04X\n", info->device_name, info->signals)); + if ((info->dcd_chkcount)++ == IO_PIN_SHUTDOWN_LIMIT) { + slgt_irq_off(info, IRQ_DCD); + return; + } + info->icount.dcd++; + if (info->signals & SerialSignal_DCD) { + info->input_signal_events.dcd_up++; + } else { + info->input_signal_events.dcd_down++; + } +#ifdef CONFIG_HDLC + if (info->netcount) + hdlc_set_carrier(info->signals & SerialSignal_DCD, info->netdev); +#endif + wake_up_interruptible(&info->status_event_wait_q); + wake_up_interruptible(&info->event_wait_q); + info->pending_bh |= BH_STATUS; + + if (info->flags & ASYNC_CHECK_CD) { + if (info->signals & SerialSignal_DCD) + wake_up_interruptible(&info->open_wait); + else { + if (info->tty) + tty_hangup(info->tty); + } + } +} + +static void ri_change(struct slgt_info *info) +{ + get_signals(info); + DBGISR(("ri_change %s signals=%04X\n", info->device_name, info->signals)); + if ((info->ri_chkcount)++ == IO_PIN_SHUTDOWN_LIMIT) { + slgt_irq_off(info, IRQ_RI); + return; + } + info->icount.dcd++; + if (info->signals & SerialSignal_RI) { + info->input_signal_events.ri_up++; + } else { + info->input_signal_events.ri_down++; + } + wake_up_interruptible(&info->status_event_wait_q); + wake_up_interruptible(&info->event_wait_q); + info->pending_bh |= BH_STATUS; +} + +static void isr_serial(struct slgt_info *info) +{ + unsigned short status = rd_reg16(info, SSR); + + DBGISR(("%s isr_serial status=%04X\n", info->device_name, status)); + + wr_reg16(info, SSR, status); /* clear pending */ + + info->irq_occurred = 1; + + if (info->params.mode == MGSL_MODE_ASYNC) { + if (status & IRQ_TXIDLE) { + if (info->tx_count) + isr_txeom(info, status); + } + if ((status & IRQ_RXBREAK) && (status & RXBREAK)) { + info->icount.brk++; + /* process break detection if tty control allows */ + if (info->tty) { + if (!(status & info->ignore_status_mask)) { + if (info->read_status_mask & MASK_BREAK) { + *info->tty->flip.flag_buf_ptr = TTY_BREAK; + if (info->flags & ASYNC_SAK) + do_SAK(info->tty); + } + } + } + } + } else { + if (status & (IRQ_TXIDLE + IRQ_TXUNDER)) + isr_txeom(info, status); + + if (status & IRQ_RXIDLE) { + if (status & RXIDLE) + info->icount.rxidle++; + else + info->icount.exithunt++; + wake_up_interruptible(&info->event_wait_q); + } + + if (status & IRQ_RXOVER) + rx_start(info); + } + + if (status & IRQ_DSR) + dsr_change(info); + if (status & IRQ_CTS) + cts_change(info); + if (status & IRQ_DCD) + dcd_change(info); + if (status & IRQ_RI) + ri_change(info); +} + +static void isr_rdma(struct slgt_info *info) +{ + unsigned int status = rd_reg32(info, RDCSR); + + DBGISR(("%s isr_rdma status=%08x\n", info->device_name, status)); + + /* RDCSR (rx DMA control/status) + * + * 31..07 reserved + * 06 save status byte to DMA buffer + * 05 error + * 04 eol (end of list) + * 03 eob (end of buffer) + * 02 IRQ enable + * 01 reset + * 00 enable + */ + wr_reg32(info, RDCSR, status); /* clear pending */ + + if (status & (BIT5 + BIT4)) { + DBGISR(("%s isr_rdma rx_restart=1\n", info->device_name)); + info->rx_restart = 1; + } + info->pending_bh |= BH_RECEIVE; +} + +static void isr_tdma(struct slgt_info *info) +{ + unsigned int status = rd_reg32(info, TDCSR); + + DBGISR(("%s isr_tdma status=%08x\n", info->device_name, status)); + + /* TDCSR (tx DMA control/status) + * + * 31..06 reserved + * 05 error + * 04 eol (end of list) + * 03 eob (end of buffer) + * 02 IRQ enable + * 01 reset + * 00 enable + */ + wr_reg32(info, TDCSR, status); /* clear pending */ + + if (status & (BIT5 + BIT4 + BIT3)) { + // another transmit buffer has completed + // run bottom half to get more send data from user + info->pending_bh |= BH_TRANSMIT; + } +} + +static void isr_txeom(struct slgt_info *info, unsigned short status) +{ + DBGISR(("%s txeom status=%04x\n", info->device_name, status)); + + slgt_irq_off(info, IRQ_TXDATA + IRQ_TXIDLE + IRQ_TXUNDER); + tdma_reset(info); + reset_tbufs(info); + if (status & IRQ_TXUNDER) { + unsigned short val = rd_reg16(info, TCR); + wr_reg16(info, TCR, (unsigned short)(val | BIT2)); /* set reset bit */ + wr_reg16(info, TCR, val); /* clear reset bit */ + } + + if (info->tx_active) { + if (info->params.mode != MGSL_MODE_ASYNC) { + if (status & IRQ_TXUNDER) + info->icount.txunder++; + else if (status & IRQ_TXIDLE) + info->icount.txok++; + } + + info->tx_active = 0; + info->tx_count = 0; + + del_timer(&info->tx_timer); + + if (info->params.mode != MGSL_MODE_ASYNC && info->drop_rts_on_tx_done) { + info->signals &= ~SerialSignal_RTS; + info->drop_rts_on_tx_done = 0; + set_signals(info); + } + +#ifdef CONFIG_HDLC + if (info->netcount) + hdlcdev_tx_done(info); + else +#endif + { + if (info->tty && (info->tty->stopped || info->tty->hw_stopped)) { + tx_stop(info); + return; + } + info->pending_bh |= BH_TRANSMIT; + } + } +} + +/* interrupt service routine + * + * irq interrupt number + * dev_id device ID supplied during interrupt registration + * regs interrupted processor context + */ +static irqreturn_t slgt_interrupt(int irq, void *dev_id, struct pt_regs * regs) +{ + struct slgt_info *info; + unsigned int gsr; + unsigned int i; + + DBGISR(("slgt_interrupt irq=%d entry\n", irq)); + + info = dev_id; + if (!info) + return IRQ_NONE; + + spin_lock(&info->lock); + + while((gsr = rd_reg32(info, GSR) & 0xffffff00)) { + DBGISR(("%s gsr=%08x\n", info->device_name, gsr)); + info->irq_occurred = 1; + for(i=0; i < info->port_count ; i++) { + if (info->port_array[i] == NULL) + continue; + if (gsr & (BIT8 << i)) + isr_serial(info->port_array[i]); + if (gsr & (BIT16 << (i*2))) + isr_rdma(info->port_array[i]); + if (gsr & (BIT17 << (i*2))) + isr_tdma(info->port_array[i]); + } + } + + for(i=0; i < info->port_count ; i++) { + struct slgt_info *port = info->port_array[i]; + + if (port && (port->count || port->netcount) && + port->pending_bh && !port->bh_running && + !port->bh_requested) { + DBGISR(("%s bh queued\n", port->device_name)); + schedule_work(&port->task); + port->bh_requested = 1; + } + } + + spin_unlock(&info->lock); + + DBGISR(("slgt_interrupt irq=%d exit\n", irq)); + return IRQ_HANDLED; +} + +static int startup(struct slgt_info *info) +{ + DBGINFO(("%s startup\n", info->device_name)); + + if (info->flags & ASYNC_INITIALIZED) + return 0; + + if (!info->tx_buf) { + info->tx_buf = kmalloc(info->max_frame_size, GFP_KERNEL); + if (!info->tx_buf) { + DBGERR(("%s can't allocate tx buffer\n", info->device_name)); + return -ENOMEM; + } + } + + info->pending_bh = 0; + + memset(&info->icount, 0, sizeof(info->icount)); + + /* program hardware for current parameters */ + change_params(info); + + if (info->tty) + clear_bit(TTY_IO_ERROR, &info->tty->flags); + + info->flags |= ASYNC_INITIALIZED; + + return 0; +} + +/* + * called by close() and hangup() to shutdown hardware + */ +static void shutdown(struct slgt_info *info) +{ + unsigned long flags; + + if (!(info->flags & ASYNC_INITIALIZED)) + return; + + DBGINFO(("%s shutdown\n", info->device_name)); + + /* clear status wait queue because status changes */ + /* can't happen after shutting down the hardware */ + wake_up_interruptible(&info->status_event_wait_q); + wake_up_interruptible(&info->event_wait_q); + + del_timer_sync(&info->tx_timer); + del_timer_sync(&info->rx_timer); + + kfree(info->tx_buf); + info->tx_buf = NULL; + + spin_lock_irqsave(&info->lock,flags); + + tx_stop(info); + rx_stop(info); + + slgt_irq_off(info, IRQ_ALL | IRQ_MASTER); + + if (!info->tty || info->tty->termios->c_cflag & HUPCL) { + info->signals &= ~(SerialSignal_DTR + SerialSignal_RTS); + set_signals(info); + } + + spin_unlock_irqrestore(&info->lock,flags); + + if (info->tty) + set_bit(TTY_IO_ERROR, &info->tty->flags); + + info->flags &= ~ASYNC_INITIALIZED; +} + +static void program_hw(struct slgt_info *info) +{ + unsigned long flags; + + spin_lock_irqsave(&info->lock,flags); + + rx_stop(info); + tx_stop(info); + + if (info->params.mode == MGSL_MODE_HDLC || + info->params.mode == MGSL_MODE_RAW || + info->netcount) + hdlc_mode(info); + else + async_mode(info); + + set_signals(info); + + info->dcd_chkcount = 0; + info->cts_chkcount = 0; + info->ri_chkcount = 0; + info->dsr_chkcount = 0; + + slgt_irq_on(info, IRQ_DCD | IRQ_CTS | IRQ_DSR); + get_signals(info); + + if (info->netcount || + (info->tty && info->tty->termios->c_cflag & CREAD)) + rx_start(info); + + spin_unlock_irqrestore(&info->lock,flags); +} + +/* + * reconfigure adapter based on new parameters + */ +static void change_params(struct slgt_info *info) +{ + unsigned cflag; + int bits_per_char; + + if (!info->tty || !info->tty->termios) + return; + DBGINFO(("%s change_params\n", info->device_name)); + + cflag = info->tty->termios->c_cflag; + + /* if B0 rate (hangup) specified then negate DTR and RTS */ + /* otherwise assert DTR and RTS */ + if (cflag & CBAUD) + info->signals |= SerialSignal_RTS + SerialSignal_DTR; + else + info->signals &= ~(SerialSignal_RTS + SerialSignal_DTR); + + /* byte size and parity */ + + switch (cflag & CSIZE) { + case CS5: info->params.data_bits = 5; break; + case CS6: info->params.data_bits = 6; break; + case CS7: info->params.data_bits = 7; break; + case CS8: info->params.data_bits = 8; break; + default: info->params.data_bits = 7; break; + } + + info->params.stop_bits = (cflag & CSTOPB) ? 2 : 1; + + if (cflag & PARENB) + info->params.parity = (cflag & PARODD) ? ASYNC_PARITY_ODD : ASYNC_PARITY_EVEN; + else + info->params.parity = ASYNC_PARITY_NONE; + + /* calculate number of jiffies to transmit a full + * FIFO (32 bytes) at specified data rate + */ + bits_per_char = info->params.data_bits + + info->params.stop_bits + 1; + + info->params.data_rate = tty_get_baud_rate(info->tty); + + if (info->params.data_rate) { + info->timeout = (32*HZ*bits_per_char) / + info->params.data_rate; + } + info->timeout += HZ/50; /* Add .02 seconds of slop */ + + if (cflag & CRTSCTS) + info->flags |= ASYNC_CTS_FLOW; + else + info->flags &= ~ASYNC_CTS_FLOW; + + if (cflag & CLOCAL) + info->flags &= ~ASYNC_CHECK_CD; + else + info->flags |= ASYNC_CHECK_CD; + + /* process tty input control flags */ + + info->read_status_mask = IRQ_RXOVER; + if (I_INPCK(info->tty)) + info->read_status_mask |= MASK_PARITY | MASK_FRAMING; + if (I_BRKINT(info->tty) || I_PARMRK(info->tty)) + info->read_status_mask |= MASK_BREAK; + if (I_IGNPAR(info->tty)) + info->ignore_status_mask |= MASK_PARITY | MASK_FRAMING; + if (I_IGNBRK(info->tty)) { + info->ignore_status_mask |= MASK_BREAK; + /* If ignoring parity and break indicators, ignore + * overruns too. (For real raw support). + */ + if (I_IGNPAR(info->tty)) + info->ignore_status_mask |= MASK_OVERRUN; + } + + program_hw(info); +} + +static int get_stats(struct slgt_info *info, struct mgsl_icount __user *user_icount) +{ + DBGINFO(("%s get_stats\n", info->device_name)); + if (!user_icount) { + memset(&info->icount, 0, sizeof(info->icount)); + } else { + if (copy_to_user(user_icount, &info->icount, sizeof(struct mgsl_icount))) + return -EFAULT; + } + return 0; +} + +static int get_params(struct slgt_info *info, MGSL_PARAMS __user *user_params) +{ + DBGINFO(("%s get_params\n", info->device_name)); + if (copy_to_user(user_params, &info->params, sizeof(MGSL_PARAMS))) + return -EFAULT; + return 0; +} + +static int set_params(struct slgt_info *info, MGSL_PARAMS __user *new_params) +{ + unsigned long flags; + MGSL_PARAMS tmp_params; + + DBGINFO(("%s set_params\n", info->device_name)); + if (copy_from_user(&tmp_params, new_params, sizeof(MGSL_PARAMS))) + return -EFAULT; + + spin_lock_irqsave(&info->lock, flags); + memcpy(&info->params, &tmp_params, sizeof(MGSL_PARAMS)); + spin_unlock_irqrestore(&info->lock, flags); + + change_params(info); + + return 0; +} + +static int get_txidle(struct slgt_info *info, int __user *idle_mode) +{ + DBGINFO(("%s get_txidle=%d\n", info->device_name, info->idle_mode)); + if (put_user(info->idle_mode, idle_mode)) + return -EFAULT; + return 0; +} + +static int set_txidle(struct slgt_info *info, int idle_mode) +{ + unsigned long flags; + DBGINFO(("%s set_txidle(%d)\n", info->device_name, idle_mode)); + spin_lock_irqsave(&info->lock,flags); + info->idle_mode = idle_mode; + tx_set_idle(info); + spin_unlock_irqrestore(&info->lock,flags); + return 0; +} + +static int tx_enable(struct slgt_info *info, int enable) +{ + unsigned long flags; + DBGINFO(("%s tx_enable(%d)\n", info->device_name, enable)); + spin_lock_irqsave(&info->lock,flags); + if (enable) { + if (!info->tx_enabled) + tx_start(info); + } else { + if (info->tx_enabled) + tx_stop(info); + } + spin_unlock_irqrestore(&info->lock,flags); + return 0; +} + +/* + * abort transmit HDLC frame + */ +static int tx_abort(struct slgt_info *info) +{ + unsigned long flags; + DBGINFO(("%s tx_abort\n", info->device_name)); + spin_lock_irqsave(&info->lock,flags); + tdma_reset(info); + spin_unlock_irqrestore(&info->lock,flags); + return 0; +} + +static int rx_enable(struct slgt_info *info, int enable) +{ + unsigned long flags; + DBGINFO(("%s rx_enable(%d)\n", info->device_name, enable)); + spin_lock_irqsave(&info->lock,flags); + if (enable) { + if (!info->rx_enabled) + rx_start(info); + } else { + if (info->rx_enabled) + rx_stop(info); + } + spin_unlock_irqrestore(&info->lock,flags); + return 0; +} + +/* + * wait for specified event to occur + */ +static int wait_mgsl_event(struct slgt_info *info, int __user *mask_ptr) +{ + unsigned long flags; + int s; + int rc=0; + struct mgsl_icount cprev, cnow; + int events; + int mask; + struct _input_signal_events oldsigs, newsigs; + DECLARE_WAITQUEUE(wait, current); + + if (get_user(mask, mask_ptr)) + return -EFAULT; + + DBGINFO(("%s wait_mgsl_event(%d)\n", info->device_name, mask)); + + spin_lock_irqsave(&info->lock,flags); + + /* return immediately if state matches requested events */ + get_signals(info); + s = info->signals; + + events = mask & + ( ((s & SerialSignal_DSR) ? MgslEvent_DsrActive:MgslEvent_DsrInactive) + + ((s & SerialSignal_DCD) ? MgslEvent_DcdActive:MgslEvent_DcdInactive) + + ((s & SerialSignal_CTS) ? MgslEvent_CtsActive:MgslEvent_CtsInactive) + + ((s & SerialSignal_RI) ? MgslEvent_RiActive :MgslEvent_RiInactive) ); + if (events) { + spin_unlock_irqrestore(&info->lock,flags); + goto exit; + } + + /* save current irq counts */ + cprev = info->icount; + oldsigs = info->input_signal_events; + + /* enable hunt and idle irqs if needed */ + if (mask & (MgslEvent_ExitHuntMode+MgslEvent_IdleReceived)) { + unsigned short val = rd_reg16(info, SCR); + if (!(val & IRQ_RXIDLE)) + wr_reg16(info, SCR, (unsigned short)(val | IRQ_RXIDLE)); + } + + set_current_state(TASK_INTERRUPTIBLE); + add_wait_queue(&info->event_wait_q, &wait); + + spin_unlock_irqrestore(&info->lock,flags); + + for(;;) { + schedule(); + if (signal_pending(current)) { + rc = -ERESTARTSYS; + break; + } + + /* get current irq counts */ + spin_lock_irqsave(&info->lock,flags); + cnow = info->icount; + newsigs = info->input_signal_events; + set_current_state(TASK_INTERRUPTIBLE); + spin_unlock_irqrestore(&info->lock,flags); + + /* if no change, wait aborted for some reason */ + if (newsigs.dsr_up == oldsigs.dsr_up && + newsigs.dsr_down == oldsigs.dsr_down && + newsigs.dcd_up == oldsigs.dcd_up && + newsigs.dcd_down == oldsigs.dcd_down && + newsigs.cts_up == oldsigs.cts_up && + newsigs.cts_down == oldsigs.cts_down && + newsigs.ri_up == oldsigs.ri_up && + newsigs.ri_down == oldsigs.ri_down && + cnow.exithunt == cprev.exithunt && + cnow.rxidle == cprev.rxidle) { + rc = -EIO; + break; + } + + events = mask & + ( (newsigs.dsr_up != oldsigs.dsr_up ? MgslEvent_DsrActive:0) + + (newsigs.dsr_down != oldsigs.dsr_down ? MgslEvent_DsrInactive:0) + + (newsigs.dcd_up != oldsigs.dcd_up ? MgslEvent_DcdActive:0) + + (newsigs.dcd_down != oldsigs.dcd_down ? MgslEvent_DcdInactive:0) + + (newsigs.cts_up != oldsigs.cts_up ? MgslEvent_CtsActive:0) + + (newsigs.cts_down != oldsigs.cts_down ? MgslEvent_CtsInactive:0) + + (newsigs.ri_up != oldsigs.ri_up ? MgslEvent_RiActive:0) + + (newsigs.ri_down != oldsigs.ri_down ? MgslEvent_RiInactive:0) + + (cnow.exithunt != cprev.exithunt ? MgslEvent_ExitHuntMode:0) + + (cnow.rxidle != cprev.rxidle ? MgslEvent_IdleReceived:0) ); + if (events) + break; + + cprev = cnow; + oldsigs = newsigs; + } + + remove_wait_queue(&info->event_wait_q, &wait); + set_current_state(TASK_RUNNING); + + + if (mask & (MgslEvent_ExitHuntMode + MgslEvent_IdleReceived)) { + spin_lock_irqsave(&info->lock,flags); + if (!waitqueue_active(&info->event_wait_q)) { + /* disable enable exit hunt mode/idle rcvd IRQs */ + wr_reg16(info, SCR, + (unsigned short)(rd_reg16(info, SCR) & ~IRQ_RXIDLE)); + } + spin_unlock_irqrestore(&info->lock,flags); + } +exit: + if (rc == 0) + rc = put_user(events, mask_ptr); + return rc; +} + +static int get_interface(struct slgt_info *info, int __user *if_mode) +{ + DBGINFO(("%s get_interface=%x\n", info->device_name, info->if_mode)); + if (put_user(info->if_mode, if_mode)) + return -EFAULT; + return 0; +} + +static int set_interface(struct slgt_info *info, int if_mode) +{ + unsigned long flags; + unsigned char val; + + DBGINFO(("%s set_interface=%x)\n", info->device_name, if_mode)); + spin_lock_irqsave(&info->lock,flags); + info->if_mode = if_mode; + + msc_set_vcr(info); + + /* TCR (tx control) 07 1=RTS driver control */ + val = rd_reg16(info, TCR); + if (info->if_mode & MGSL_INTERFACE_RTS_EN) + val |= BIT7; + else + val &= ~BIT7; + wr_reg16(info, TCR, val); + + spin_unlock_irqrestore(&info->lock,flags); + return 0; +} + +static int modem_input_wait(struct slgt_info *info,int arg) +{ + unsigned long flags; + int rc; + struct mgsl_icount cprev, cnow; + DECLARE_WAITQUEUE(wait, current); + + /* save current irq counts */ + spin_lock_irqsave(&info->lock,flags); + cprev = info->icount; + add_wait_queue(&info->status_event_wait_q, &wait); + set_current_state(TASK_INTERRUPTIBLE); + spin_unlock_irqrestore(&info->lock,flags); + + for(;;) { + schedule(); + if (signal_pending(current)) { + rc = -ERESTARTSYS; + break; + } + + /* get new irq counts */ + spin_lock_irqsave(&info->lock,flags); + cnow = info->icount; + set_current_state(TASK_INTERRUPTIBLE); + spin_unlock_irqrestore(&info->lock,flags); + + /* if no change, wait aborted for some reason */ + if (cnow.rng == cprev.rng && cnow.dsr == cprev.dsr && + cnow.dcd == cprev.dcd && cnow.cts == cprev.cts) { + rc = -EIO; + break; + } + + /* check for change in caller specified modem input */ + if ((arg & TIOCM_RNG && cnow.rng != cprev.rng) || + (arg & TIOCM_DSR && cnow.dsr != cprev.dsr) || + (arg & TIOCM_CD && cnow.dcd != cprev.dcd) || + (arg & TIOCM_CTS && cnow.cts != cprev.cts)) { + rc = 0; + break; + } + + cprev = cnow; + } + remove_wait_queue(&info->status_event_wait_q, &wait); + set_current_state(TASK_RUNNING); + return rc; +} + +/* + * return state of serial control and status signals + */ +static int tiocmget(struct tty_struct *tty, struct file *file) +{ + struct slgt_info *info = tty->driver_data; + unsigned int result; + unsigned long flags; + + spin_lock_irqsave(&info->lock,flags); + get_signals(info); + spin_unlock_irqrestore(&info->lock,flags); + + result = ((info->signals & SerialSignal_RTS) ? TIOCM_RTS:0) + + ((info->signals & SerialSignal_DTR) ? TIOCM_DTR:0) + + ((info->signals & SerialSignal_DCD) ? TIOCM_CAR:0) + + ((info->signals & SerialSignal_RI) ? TIOCM_RNG:0) + + ((info->signals & SerialSignal_DSR) ? TIOCM_DSR:0) + + ((info->signals & SerialSignal_CTS) ? TIOCM_CTS:0); + + DBGINFO(("%s tiocmget value=%08X\n", info->device_name, result)); + return result; +} + +/* + * set modem control signals (DTR/RTS) + * + * cmd signal command: TIOCMBIS = set bit TIOCMBIC = clear bit + * TIOCMSET = set/clear signal values + * value bit mask for command + */ +static int tiocmset(struct tty_struct *tty, struct file *file, + unsigned int set, unsigned int clear) +{ + struct slgt_info *info = tty->driver_data; + unsigned long flags; + + DBGINFO(("%s tiocmset(%x,%x)\n", info->device_name, set, clear)); + + if (set & TIOCM_RTS) + info->signals |= SerialSignal_RTS; + if (set & TIOCM_DTR) + info->signals |= SerialSignal_DTR; + if (clear & TIOCM_RTS) + info->signals &= ~SerialSignal_RTS; + if (clear & TIOCM_DTR) + info->signals &= ~SerialSignal_DTR; + + spin_lock_irqsave(&info->lock,flags); + set_signals(info); + spin_unlock_irqrestore(&info->lock,flags); + return 0; +} + +/* + * block current process until the device is ready to open + */ +static int block_til_ready(struct tty_struct *tty, struct file *filp, + struct slgt_info *info) +{ + DECLARE_WAITQUEUE(wait, current); + int retval; + int do_clocal = 0, extra_count = 0; + unsigned long flags; + + DBGINFO(("%s block_til_ready\n", tty->driver->name)); + + if (filp->f_flags & O_NONBLOCK || tty->flags & (1 << TTY_IO_ERROR)){ + /* nonblock mode is set or port is not enabled */ + info->flags |= ASYNC_NORMAL_ACTIVE; + return 0; + } + + if (tty->termios->c_cflag & CLOCAL) + do_clocal = 1; + + /* Wait for carrier detect and the line to become + * free (i.e., not in use by the callout). While we are in + * this loop, info->count is dropped by one, so that + * close() knows when to free things. We restore it upon + * exit, either normal or abnormal. + */ + + retval = 0; + add_wait_queue(&info->open_wait, &wait); + + spin_lock_irqsave(&info->lock, flags); + if (!tty_hung_up_p(filp)) { + extra_count = 1; + info->count--; + } + spin_unlock_irqrestore(&info->lock, flags); + info->blocked_open++; + + while (1) { + if ((tty->termios->c_cflag & CBAUD)) { + spin_lock_irqsave(&info->lock,flags); + info->signals |= SerialSignal_RTS + SerialSignal_DTR; + set_signals(info); + spin_unlock_irqrestore(&info->lock,flags); + } + + set_current_state(TASK_INTERRUPTIBLE); + + if (tty_hung_up_p(filp) || !(info->flags & ASYNC_INITIALIZED)){ + retval = (info->flags & ASYNC_HUP_NOTIFY) ? + -EAGAIN : -ERESTARTSYS; + break; + } + + spin_lock_irqsave(&info->lock,flags); + get_signals(info); + spin_unlock_irqrestore(&info->lock,flags); + + if (!(info->flags & ASYNC_CLOSING) && + (do_clocal || (info->signals & SerialSignal_DCD)) ) { + break; + } + + if (signal_pending(current)) { + retval = -ERESTARTSYS; + break; + } + + DBGINFO(("%s block_til_ready wait\n", tty->driver->name)); + schedule(); + } + + set_current_state(TASK_RUNNING); + remove_wait_queue(&info->open_wait, &wait); + + if (extra_count) + info->count++; + info->blocked_open--; + + if (!retval) + info->flags |= ASYNC_NORMAL_ACTIVE; + + DBGINFO(("%s block_til_ready ready, rc=%d\n", tty->driver->name, retval)); + return retval; +} + +static int alloc_tmp_rbuf(struct slgt_info *info) +{ + info->tmp_rbuf = kmalloc(info->max_frame_size, GFP_KERNEL); + if (info->tmp_rbuf == NULL) + return -ENOMEM; + return 0; +} + +static void free_tmp_rbuf(struct slgt_info *info) +{ + kfree(info->tmp_rbuf); + info->tmp_rbuf = NULL; +} + +/* + * allocate DMA descriptor lists. + */ +static int alloc_desc(struct slgt_info *info) +{ + unsigned int i; + unsigned int pbufs; + + /* allocate memory to hold descriptor lists */ + info->bufs = pci_alloc_consistent(info->pdev, DESC_LIST_SIZE, &info->bufs_dma_addr); + if (info->bufs == NULL) + return -ENOMEM; + + memset(info->bufs, 0, DESC_LIST_SIZE); + + info->rbufs = (struct slgt_desc*)info->bufs; + info->tbufs = ((struct slgt_desc*)info->bufs) + info->rbuf_count; + + pbufs = (unsigned int)info->bufs_dma_addr; + + /* + * Build circular lists of descriptors + */ + + for (i=0; i < info->rbuf_count; i++) { + /* physical address of this descriptor */ + info->rbufs[i].pdesc = pbufs + (i * sizeof(struct slgt_desc)); + + /* physical address of next descriptor */ + if (i == info->rbuf_count - 1) + info->rbufs[i].next = cpu_to_le32(pbufs); + else + info->rbufs[i].next = cpu_to_le32(pbufs + ((i+1) * sizeof(struct slgt_desc))); + set_desc_count(info->rbufs[i], DMABUFSIZE); + } + + for (i=0; i < info->tbuf_count; i++) { + /* physical address of this descriptor */ + info->tbufs[i].pdesc = pbufs + ((info->rbuf_count + i) * sizeof(struct slgt_desc)); + + /* physical address of next descriptor */ + if (i == info->tbuf_count - 1) + info->tbufs[i].next = cpu_to_le32(pbufs + info->rbuf_count * sizeof(struct slgt_desc)); + else + info->tbufs[i].next = cpu_to_le32(pbufs + ((info->rbuf_count + i + 1) * sizeof(struct slgt_desc))); + } + + return 0; +} + +static void free_desc(struct slgt_info *info) +{ + if (info->bufs != NULL) { + pci_free_consistent(info->pdev, DESC_LIST_SIZE, info->bufs, info->bufs_dma_addr); + info->bufs = NULL; + info->rbufs = NULL; + info->tbufs = NULL; + } +} + +static int alloc_bufs(struct slgt_info *info, struct slgt_desc *bufs, int count) +{ + int i; + for (i=0; i < count; i++) { + if ((bufs[i].buf = pci_alloc_consistent(info->pdev, DMABUFSIZE, &bufs[i].buf_dma_addr)) == NULL) + return -ENOMEM; + bufs[i].pbuf = cpu_to_le32((unsigned int)bufs[i].buf_dma_addr); + } + return 0; +} + +static void free_bufs(struct slgt_info *info, struct slgt_desc *bufs, int count) +{ + int i; + for (i=0; i < count; i++) { + if (bufs[i].buf == NULL) + continue; + pci_free_consistent(info->pdev, DMABUFSIZE, bufs[i].buf, bufs[i].buf_dma_addr); + bufs[i].buf = NULL; + } +} + +static int alloc_dma_bufs(struct slgt_info *info) +{ + info->rbuf_count = 32; + info->tbuf_count = 32; + + if (alloc_desc(info) < 0 || + alloc_bufs(info, info->rbufs, info->rbuf_count) < 0 || + alloc_bufs(info, info->tbufs, info->tbuf_count) < 0 || + alloc_tmp_rbuf(info) < 0) { + DBGERR(("%s DMA buffer alloc fail\n", info->device_name)); + return -ENOMEM; + } + reset_rbufs(info); + return 0; +} + +static void free_dma_bufs(struct slgt_info *info) +{ + if (info->bufs) { + free_bufs(info, info->rbufs, info->rbuf_count); + free_bufs(info, info->tbufs, info->tbuf_count); + free_desc(info); + } + free_tmp_rbuf(info); +} + +static int claim_resources(struct slgt_info *info) +{ + if (request_mem_region(info->phys_reg_addr, SLGT_REG_SIZE, "synclink_gt") == NULL) { + DBGERR(("%s reg addr conflict, addr=%08X\n", + info->device_name, info->phys_reg_addr)); + info->init_error = DiagStatus_AddressConflict; + goto errout; + } + else + info->reg_addr_requested = 1; + + info->reg_addr = ioremap(info->phys_reg_addr, PAGE_SIZE); + if (!info->reg_addr) { + DBGERR(("%s cant map device registers, addr=%08X\n", + info->device_name, info->phys_reg_addr)); + info->init_error = DiagStatus_CantAssignPciResources; + goto errout; + } + info->reg_addr += info->reg_offset; + return 0; + +errout: + release_resources(info); + return -ENODEV; +} + +static void release_resources(struct slgt_info *info) +{ + if (info->irq_requested) { + free_irq(info->irq_level, info); + info->irq_requested = 0; + } + + if (info->reg_addr_requested) { + release_mem_region(info->phys_reg_addr, SLGT_REG_SIZE); + info->reg_addr_requested = 0; + } + + if (info->reg_addr) { + iounmap(info->reg_addr - info->reg_offset); + info->reg_addr = NULL; + } +} + +/* Add the specified device instance data structure to the + * global linked list of devices and increment the device count. + */ +static void add_device(struct slgt_info *info) +{ + char *devstr; + + info->next_device = NULL; + info->line = slgt_device_count; + sprintf(info->device_name, "%s%d", tty_dev_prefix, info->line); + + if (info->line < MAX_DEVICES) { + if (maxframe[info->line]) + info->max_frame_size = maxframe[info->line]; + info->dosyncppp = dosyncppp[info->line]; + } + + slgt_device_count++; + + if (!slgt_device_list) + slgt_device_list = info; + else { + struct slgt_info *current_dev = slgt_device_list; + while(current_dev->next_device) + current_dev = current_dev->next_device; + current_dev->next_device = info; + } + + if (info->max_frame_size < 4096) + info->max_frame_size = 4096; + else if (info->max_frame_size > 65535) + info->max_frame_size = 65535; + + switch(info->pdev->device) { + case SYNCLINK_GT_DEVICE_ID: + devstr = "GT"; + break; + case SYNCLINK_GT4_DEVICE_ID: + devstr = "GT4"; + break; + case SYNCLINK_AC_DEVICE_ID: + devstr = "AC"; + info->params.mode = MGSL_MODE_ASYNC; + break; + default: + devstr = "(unknown model)"; + } + printk("SyncLink %s %s IO=%08x IRQ=%d MaxFrameSize=%u\n", + devstr, info->device_name, info->phys_reg_addr, + info->irq_level, info->max_frame_size); + +#ifdef CONFIG_HDLC + hdlcdev_init(info); +#endif +} + +/* + * allocate device instance structure, return NULL on failure + */ +static struct slgt_info *alloc_dev(int adapter_num, int port_num, struct pci_dev *pdev) +{ + struct slgt_info *info; + + info = kmalloc(sizeof(struct slgt_info), GFP_KERNEL); + + if (!info) { + DBGERR(("%s device alloc failed adapter=%d port=%d\n", + driver_name, adapter_num, port_num)); + } else { + memset(info, 0, sizeof(struct slgt_info)); + info->magic = MGSL_MAGIC; + INIT_WORK(&info->task, bh_handler, info); + info->max_frame_size = 4096; + info->raw_rx_size = DMABUFSIZE; + info->close_delay = 5*HZ/10; + info->closing_wait = 30*HZ; + init_waitqueue_head(&info->open_wait); + init_waitqueue_head(&info->close_wait); + init_waitqueue_head(&info->status_event_wait_q); + init_waitqueue_head(&info->event_wait_q); + spin_lock_init(&info->netlock); + memcpy(&info->params,&default_params,sizeof(MGSL_PARAMS)); + info->idle_mode = HDLC_TXIDLE_FLAGS; + info->adapter_num = adapter_num; + info->port_num = port_num; + + init_timer(&info->tx_timer); + info->tx_timer.data = (unsigned long)info; + info->tx_timer.function = tx_timeout; + + init_timer(&info->rx_timer); + info->rx_timer.data = (unsigned long)info; + info->rx_timer.function = rx_timeout; + + /* Copy configuration info to device instance data */ + info->pdev = pdev; + info->irq_level = pdev->irq; + info->phys_reg_addr = pci_resource_start(pdev,0); + + /* veremap works on page boundaries + * map full page starting at the page boundary + */ + info->reg_offset = info->phys_reg_addr & (PAGE_SIZE-1); + info->phys_reg_addr &= ~(PAGE_SIZE-1); + + info->bus_type = MGSL_BUS_TYPE_PCI; + info->irq_flags = SA_SHIRQ; + + info->init_error = -1; /* assume error, set to 0 on successful init */ + } + + return info; +} + +static void device_init(int adapter_num, struct pci_dev *pdev) +{ + struct slgt_info *port_array[SLGT_MAX_PORTS]; + int i; + int port_count = 1; + + if (pdev->device == SYNCLINK_GT4_DEVICE_ID) + port_count = 4; + + /* allocate device instances for all ports */ + for (i=0; i < port_count; ++i) { + port_array[i] = alloc_dev(adapter_num, i, pdev); + if (port_array[i] == NULL) { + for (--i; i >= 0; --i) + kfree(port_array[i]); + return; + } + } + + /* give copy of port_array to all ports and add to device list */ + for (i=0; i < port_count; ++i) { + memcpy(port_array[i]->port_array, port_array, sizeof(port_array)); + add_device(port_array[i]); + port_array[i]->port_count = port_count; + spin_lock_init(&port_array[i]->lock); + } + + /* Allocate and claim adapter resources */ + if (!claim_resources(port_array[0])) { + + alloc_dma_bufs(port_array[0]); + + /* copy resource information from first port to others */ + for (i = 1; i < port_count; ++i) { + port_array[i]->lock = port_array[0]->lock; + port_array[i]->irq_level = port_array[0]->irq_level; + port_array[i]->reg_addr = port_array[0]->reg_addr; + alloc_dma_bufs(port_array[i]); + } + + if (request_irq(port_array[0]->irq_level, + slgt_interrupt, + port_array[0]->irq_flags, + port_array[0]->device_name, + port_array[0]) < 0) { + DBGERR(("%s request_irq failed IRQ=%d\n", + port_array[0]->device_name, + port_array[0]->irq_level)); + } else { + port_array[0]->irq_requested = 1; + adapter_test(port_array[0]); + for (i=1 ; i < port_count ; i++) + port_array[i]->init_error = port_array[0]->init_error; + } + } +} + +static int __devinit init_one(struct pci_dev *dev, + const struct pci_device_id *ent) +{ + if (pci_enable_device(dev)) { + printk("error enabling pci device %p\n", dev); + return -EIO; + } + pci_set_master(dev); + device_init(slgt_device_count, dev); + return 0; +} + +static void __devexit remove_one(struct pci_dev *dev) +{ +} + +static struct tty_operations ops = { + .open = open, + .close = close, + .write = write, + .put_char = put_char, + .flush_chars = flush_chars, + .write_room = write_room, + .chars_in_buffer = chars_in_buffer, + .flush_buffer = flush_buffer, + .ioctl = ioctl, + .throttle = throttle, + .unthrottle = unthrottle, + .send_xchar = send_xchar, + .break_ctl = set_break, + .wait_until_sent = wait_until_sent, + .read_proc = read_proc, + .set_termios = set_termios, + .stop = tx_hold, + .start = tx_release, + .hangup = hangup, + .tiocmget = tiocmget, + .tiocmset = tiocmset, +}; + +static void slgt_cleanup(void) +{ + int rc; + struct slgt_info *info; + struct slgt_info *tmp; + + printk("unload %s %s\n", driver_name, driver_version); + + if (serial_driver) { + if ((rc = tty_unregister_driver(serial_driver))) + DBGERR(("tty_unregister_driver error=%d\n", rc)); + put_tty_driver(serial_driver); + } + + /* reset devices */ + info = slgt_device_list; + while(info) { + reset_port(info); + info = info->next_device; + } + + /* release devices */ + info = slgt_device_list; + while(info) { +#ifdef CONFIG_HDLC + hdlcdev_exit(info); +#endif + free_dma_bufs(info); + free_tmp_rbuf(info); + if (info->port_num == 0) + release_resources(info); + tmp = info; + info = info->next_device; + kfree(tmp); + } + + if (pci_registered) + pci_unregister_driver(&pci_driver); +} + +/* + * Driver initialization entry point. + */ +static int __init slgt_init(void) +{ + int rc; + + printk("%s %s\n", driver_name, driver_version); + + slgt_device_count = 0; + if ((rc = pci_register_driver(&pci_driver)) < 0) { + printk("%s pci_register_driver error=%d\n", driver_name, rc); + return rc; + } + pci_registered = 1; + + if (!slgt_device_list) { + printk("%s no devices found\n",driver_name); + return -ENODEV; + } + + serial_driver = alloc_tty_driver(MAX_DEVICES); + if (!serial_driver) { + rc = -ENOMEM; + goto error; + } + + /* Initialize the tty_driver structure */ + + serial_driver->owner = THIS_MODULE; + serial_driver->driver_name = tty_driver_name; + serial_driver->name = tty_dev_prefix; + serial_driver->major = ttymajor; + serial_driver->minor_start = 64; + serial_driver->type = TTY_DRIVER_TYPE_SERIAL; + serial_driver->subtype = SERIAL_TYPE_NORMAL; + serial_driver->init_termios = tty_std_termios; + serial_driver->init_termios.c_cflag = + B9600 | CS8 | CREAD | HUPCL | CLOCAL; + serial_driver->flags = TTY_DRIVER_REAL_RAW; + tty_set_operations(serial_driver, &ops); + if ((rc = tty_register_driver(serial_driver)) < 0) { + DBGERR(("%s can't register serial driver\n", driver_name)); + put_tty_driver(serial_driver); + serial_driver = NULL; + goto error; + } + + printk("%s %s, tty major#%d\n", + driver_name, driver_version, + serial_driver->major); + + return 0; + +error: + slgt_cleanup(); + return rc; +} + +static void __exit slgt_exit(void) +{ + slgt_cleanup(); +} + +module_init(slgt_init); +module_exit(slgt_exit); + +/* + * register access routines + */ + +#define CALC_REGADDR() \ + unsigned long reg_addr = ((unsigned long)info->reg_addr) + addr; \ + if (addr >= 0x80) \ + reg_addr += (info->port_num) * 32; + +static __u8 rd_reg8(struct slgt_info *info, unsigned int addr) +{ + CALC_REGADDR(); + return readb((void __iomem *)reg_addr); +} + +static void wr_reg8(struct slgt_info *info, unsigned int addr, __u8 value) +{ + CALC_REGADDR(); + writeb(value, (void __iomem *)reg_addr); +} + +static __u16 rd_reg16(struct slgt_info *info, unsigned int addr) +{ + CALC_REGADDR(); + return readw((void __iomem *)reg_addr); +} + +static void wr_reg16(struct slgt_info *info, unsigned int addr, __u16 value) +{ + CALC_REGADDR(); + writew(value, (void __iomem *)reg_addr); +} + +static __u32 rd_reg32(struct slgt_info *info, unsigned int addr) +{ + CALC_REGADDR(); + return readl((void __iomem *)reg_addr); +} + +static void wr_reg32(struct slgt_info *info, unsigned int addr, __u32 value) +{ + CALC_REGADDR(); + writel(value, (void __iomem *)reg_addr); +} + +static void rdma_reset(struct slgt_info *info) +{ + unsigned int i; + + /* set reset bit */ + wr_reg32(info, RDCSR, BIT1); + + /* wait for enable bit cleared */ + for(i=0 ; i < 1000 ; i++) + if (!(rd_reg32(info, RDCSR) & BIT0)) + break; +} + +static void tdma_reset(struct slgt_info *info) +{ + unsigned int i; + + /* set reset bit */ + wr_reg32(info, TDCSR, BIT1); + + /* wait for enable bit cleared */ + for(i=0 ; i < 1000 ; i++) + if (!(rd_reg32(info, TDCSR) & BIT0)) + break; +} + +/* + * enable internal loopback + * TxCLK and RxCLK are generated from BRG + * and TxD is looped back to RxD internally. + */ +static void enable_loopback(struct slgt_info *info) +{ + /* SCR (serial control) BIT2=looopback enable */ + wr_reg16(info, SCR, (unsigned short)(rd_reg16(info, SCR) | BIT2)); + + if (info->params.mode != MGSL_MODE_ASYNC) { + /* CCR (clock control) + * 07..05 tx clock source (010 = BRG) + * 04..02 rx clock source (010 = BRG) + * 01 auxclk enable (0 = disable) + * 00 BRG enable (1 = enable) + * + * 0100 1001 + */ + wr_reg8(info, CCR, 0x49); + + /* set speed if available, otherwise use default */ + if (info->params.clock_speed) + set_rate(info, info->params.clock_speed); + else + set_rate(info, 3686400); + } +} + +/* + * set baud rate generator to specified rate + */ +static void set_rate(struct slgt_info *info, u32 rate) +{ + unsigned int div; + static unsigned int osc = 14745600; + + /* div = osc/rate - 1 + * + * Round div up if osc/rate is not integer to + * force to next slowest rate. + */ + + if (rate) { + div = osc/rate; + if (!(osc % rate) && div) + div--; + wr_reg16(info, BDR, (unsigned short)div); + } +} + +static void rx_stop(struct slgt_info *info) +{ + unsigned short val; + + /* disable and reset receiver */ + val = rd_reg16(info, RCR) & ~BIT1; /* clear enable bit */ + wr_reg16(info, RCR, (unsigned short)(val | BIT2)); /* set reset bit */ + wr_reg16(info, RCR, val); /* clear reset bit */ + + slgt_irq_off(info, IRQ_RXOVER + IRQ_RXDATA + IRQ_RXIDLE); + + /* clear pending rx interrupts */ + wr_reg16(info, SSR, IRQ_RXIDLE + IRQ_RXOVER); + + rdma_reset(info); + + info->rx_enabled = 0; + info->rx_restart = 0; +} + +static void rx_start(struct slgt_info *info) +{ + unsigned short val; + + slgt_irq_off(info, IRQ_RXOVER + IRQ_RXDATA); + + /* clear pending rx overrun IRQ */ + wr_reg16(info, SSR, IRQ_RXOVER); + + /* reset and disable receiver */ + val = rd_reg16(info, RCR) & ~BIT1; /* clear enable bit */ + wr_reg16(info, RCR, (unsigned short)(val | BIT2)); /* set reset bit */ + wr_reg16(info, RCR, val); /* clear reset bit */ + + rdma_reset(info); + reset_rbufs(info); + + /* set 1st descriptor address */ + wr_reg32(info, RDDAR, info->rbufs[0].pdesc); + + if (info->params.mode != MGSL_MODE_ASYNC) { + /* enable rx DMA and DMA interrupt */ + wr_reg32(info, RDCSR, (BIT2 + BIT0)); + } else { + /* enable saving of rx status, rx DMA and DMA interrupt */ + wr_reg32(info, RDCSR, (BIT6 + BIT2 + BIT0)); + } + + slgt_irq_on(info, IRQ_RXOVER); + + /* enable receiver */ + wr_reg16(info, RCR, (unsigned short)(rd_reg16(info, RCR) | BIT1)); + + info->rx_restart = 0; + info->rx_enabled = 1; +} + +static void tx_start(struct slgt_info *info) +{ + if (!info->tx_enabled) { + wr_reg16(info, TCR, + (unsigned short)(rd_reg16(info, TCR) | BIT1)); + info->tx_enabled = TRUE; + } + + if (info->tx_count) { + info->drop_rts_on_tx_done = 0; + + if (info->params.mode != MGSL_MODE_ASYNC) { + if (info->params.flags & HDLC_FLAG_AUTO_RTS) { + get_signals(info); + if (!(info->signals & SerialSignal_RTS)) { + info->signals |= SerialSignal_RTS; + set_signals(info); + info->drop_rts_on_tx_done = 1; + } + } + + slgt_irq_off(info, IRQ_TXDATA); + slgt_irq_on(info, IRQ_TXUNDER + IRQ_TXIDLE); + /* clear tx idle and underrun status bits */ + wr_reg16(info, SSR, (unsigned short)(IRQ_TXIDLE + IRQ_TXUNDER)); + + if (!(rd_reg32(info, TDCSR) & BIT0)) { + /* tx DMA stopped, restart tx DMA */ + tdma_reset(info); + /* set 1st descriptor address */ + wr_reg32(info, TDDAR, info->tbufs[info->tbuf_start].pdesc); + if (info->params.mode == MGSL_MODE_RAW) + wr_reg32(info, TDCSR, BIT2 + BIT0); /* IRQ + DMA enable */ + else + wr_reg32(info, TDCSR, BIT0); /* DMA enable */ + } + + if (info->params.mode != MGSL_MODE_RAW) { + info->tx_timer.expires = jiffies + msecs_to_jiffies(5000); + add_timer(&info->tx_timer); + } + } else { + tdma_reset(info); + /* set 1st descriptor address */ + wr_reg32(info, TDDAR, info->tbufs[info->tbuf_start].pdesc); + + slgt_irq_off(info, IRQ_TXDATA); + slgt_irq_on(info, IRQ_TXIDLE); + /* clear tx idle status bit */ + wr_reg16(info, SSR, IRQ_TXIDLE); + + /* enable tx DMA */ + wr_reg32(info, TDCSR, BIT0); + } + + info->tx_active = 1; + } +} + +static void tx_stop(struct slgt_info *info) +{ + unsigned short val; + + del_timer(&info->tx_timer); + + tdma_reset(info); + + /* reset and disable transmitter */ + val = rd_reg16(info, TCR) & ~BIT1; /* clear enable bit */ + wr_reg16(info, TCR, (unsigned short)(val | BIT2)); /* set reset bit */ + wr_reg16(info, TCR, val); /* clear reset */ + + slgt_irq_off(info, IRQ_TXDATA + IRQ_TXIDLE + IRQ_TXUNDER); + + /* clear tx idle and underrun status bit */ + wr_reg16(info, SSR, (unsigned short)(IRQ_TXIDLE + IRQ_TXUNDER)); + + reset_tbufs(info); + + info->tx_enabled = 0; + info->tx_active = 0; +} + +static void reset_port(struct slgt_info *info) +{ + if (!info->reg_addr) + return; + + tx_stop(info); + rx_stop(info); + + info->signals &= ~(SerialSignal_DTR + SerialSignal_RTS); + set_signals(info); + + slgt_irq_off(info, IRQ_ALL | IRQ_MASTER); +} + +static void reset_adapter(struct slgt_info *info) +{ + int i; + for (i=0; i < info->port_count; ++i) { + if (info->port_array[i]) + reset_port(info->port_array[i]); + } +} + +static void async_mode(struct slgt_info *info) +{ + unsigned short val; + + slgt_irq_off(info, IRQ_ALL | IRQ_MASTER); + tx_stop(info); + rx_stop(info); + + /* TCR (tx control) + * + * 15..13 mode, 010=async + * 12..10 encoding, 000=NRZ + * 09 parity enable + * 08 1=odd parity, 0=even parity + * 07 1=RTS driver control + * 06 1=break enable + * 05..04 character length + * 00=5 bits + * 01=6 bits + * 10=7 bits + * 11=8 bits + * 03 0=1 stop bit, 1=2 stop bits + * 02 reset + * 01 enable + * 00 auto-CTS enable + */ + val = 0x4000; + + if (info->if_mode & MGSL_INTERFACE_RTS_EN) + val |= BIT7; + + if (info->params.parity != ASYNC_PARITY_NONE) { + val |= BIT9; + if (info->params.parity == ASYNC_PARITY_ODD) + val |= BIT8; + } + + switch (info->params.data_bits) + { + case 6: val |= BIT4; break; + case 7: val |= BIT5; break; + case 8: val |= BIT5 + BIT4; break; + } + + if (info->params.stop_bits != 1) + val |= BIT3; + + if (info->params.flags & HDLC_FLAG_AUTO_CTS) + val |= BIT0; + + wr_reg16(info, TCR, val); + + /* RCR (rx control) + * + * 15..13 mode, 010=async + * 12..10 encoding, 000=NRZ + * 09 parity enable + * 08 1=odd parity, 0=even parity + * 07..06 reserved, must be 0 + * 05..04 character length + * 00=5 bits + * 01=6 bits + * 10=7 bits + * 11=8 bits + * 03 reserved, must be zero + * 02 reset + * 01 enable + * 00 auto-DCD enable + */ + val = 0x4000; + + if (info->params.parity != ASYNC_PARITY_NONE) { + val |= BIT9; + if (info->params.parity == ASYNC_PARITY_ODD) + val |= BIT8; + } + + switch (info->params.data_bits) + { + case 6: val |= BIT4; break; + case 7: val |= BIT5; break; + case 8: val |= BIT5 + BIT4; break; + } + + if (info->params.flags & HDLC_FLAG_AUTO_DCD) + val |= BIT0; + + wr_reg16(info, RCR, val); + + /* CCR (clock control) + * + * 07..05 011 = tx clock source is BRG/16 + * 04..02 010 = rx clock source is BRG + * 01 0 = auxclk disabled + * 00 1 = BRG enabled + * + * 0110 1001 + */ + wr_reg8(info, CCR, 0x69); + + msc_set_vcr(info); + + tx_set_idle(info); + + /* SCR (serial control) + * + * 15 1=tx req on FIFO half empty + * 14 1=rx req on FIFO half full + * 13 tx data IRQ enable + * 12 tx idle IRQ enable + * 11 rx break on IRQ enable + * 10 rx data IRQ enable + * 09 rx break off IRQ enable + * 08 overrun IRQ enable + * 07 DSR IRQ enable + * 06 CTS IRQ enable + * 05 DCD IRQ enable + * 04 RI IRQ enable + * 03 reserved, must be zero + * 02 1=txd->rxd internal loopback enable + * 01 reserved, must be zero + * 00 1=master IRQ enable + */ + val = BIT15 + BIT14 + BIT0; + wr_reg16(info, SCR, val); + + slgt_irq_on(info, IRQ_RXBREAK | IRQ_RXOVER); + + set_rate(info, info->params.data_rate * 16); + + if (info->params.loopback) + enable_loopback(info); +} + +static void hdlc_mode(struct slgt_info *info) +{ + unsigned short val; + + slgt_irq_off(info, IRQ_ALL | IRQ_MASTER); + tx_stop(info); + rx_stop(info); + + /* TCR (tx control) + * + * 15..13 mode, 000=HDLC 001=raw sync + * 12..10 encoding + * 09 CRC enable + * 08 CRC32 + * 07 1=RTS driver control + * 06 preamble enable + * 05..04 preamble length + * 03 share open/close flag + * 02 reset + * 01 enable + * 00 auto-CTS enable + */ + val = 0; + + if (info->params.mode == MGSL_MODE_RAW) + val |= BIT13; + if (info->if_mode & MGSL_INTERFACE_RTS_EN) + val |= BIT7; + + switch(info->params.encoding) + { + case HDLC_ENCODING_NRZB: val |= BIT10; break; + case HDLC_ENCODING_NRZI_MARK: val |= BIT11; break; + case HDLC_ENCODING_NRZI: val |= BIT11 + BIT10; break; + case HDLC_ENCODING_BIPHASE_MARK: val |= BIT12; break; + case HDLC_ENCODING_BIPHASE_SPACE: val |= BIT12 + BIT10; break; + case HDLC_ENCODING_BIPHASE_LEVEL: val |= BIT12 + BIT11; break; + case HDLC_ENCODING_DIFF_BIPHASE_LEVEL: val |= BIT12 + BIT11 + BIT10; break; + } + + switch (info->params.crc_type) + { + case HDLC_CRC_16_CCITT: val |= BIT9; break; + case HDLC_CRC_32_CCITT: val |= BIT9 + BIT8; break; + } + + if (info->params.preamble != HDLC_PREAMBLE_PATTERN_NONE) + val |= BIT6; + + switch (info->params.preamble_length) + { + case HDLC_PREAMBLE_LENGTH_16BITS: val |= BIT5; break; + case HDLC_PREAMBLE_LENGTH_32BITS: val |= BIT4; break; + case HDLC_PREAMBLE_LENGTH_64BITS: val |= BIT5 + BIT4; break; + } + + if (info->params.flags & HDLC_FLAG_AUTO_CTS) + val |= BIT0; + + wr_reg16(info, TCR, val); + + /* TPR (transmit preamble) */ + + switch (info->params.preamble) + { + case HDLC_PREAMBLE_PATTERN_FLAGS: val = 0x7e; break; + case HDLC_PREAMBLE_PATTERN_ONES: val = 0xff; break; + case HDLC_PREAMBLE_PATTERN_ZEROS: val = 0x00; break; + case HDLC_PREAMBLE_PATTERN_10: val = 0x55; break; + case HDLC_PREAMBLE_PATTERN_01: val = 0xaa; break; + default: val = 0x7e; break; + } + wr_reg8(info, TPR, (unsigned char)val); + + /* RCR (rx control) + * + * 15..13 mode, 000=HDLC 001=raw sync + * 12..10 encoding + * 09 CRC enable + * 08 CRC32 + * 07..03 reserved, must be 0 + * 02 reset + * 01 enable + * 00 auto-DCD enable + */ + val = 0; + + if (info->params.mode == MGSL_MODE_RAW) + val |= BIT13; + + switch(info->params.encoding) + { + case HDLC_ENCODING_NRZB: val |= BIT10; break; + case HDLC_ENCODING_NRZI_MARK: val |= BIT11; break; + case HDLC_ENCODING_NRZI: val |= BIT11 + BIT10; break; + case HDLC_ENCODING_BIPHASE_MARK: val |= BIT12; break; + case HDLC_ENCODING_BIPHASE_SPACE: val |= BIT12 + BIT10; break; + case HDLC_ENCODING_BIPHASE_LEVEL: val |= BIT12 + BIT11; break; + case HDLC_ENCODING_DIFF_BIPHASE_LEVEL: val |= BIT12 + BIT11 + BIT10; break; + } + + switch (info->params.crc_type) + { + case HDLC_CRC_16_CCITT: val |= BIT9; break; + case HDLC_CRC_32_CCITT: val |= BIT9 + BIT8; break; + } + + if (info->params.flags & HDLC_FLAG_AUTO_DCD) + val |= BIT0; + + wr_reg16(info, RCR, val); + + /* CCR (clock control) + * + * 07..05 tx clock source + * 04..02 rx clock source + * 01 auxclk enable + * 00 BRG enable + */ + val = 0; + + if (info->params.flags & HDLC_FLAG_TXC_BRG) + { + // when RxC source is DPLL, BRG generates 16X DPLL + // reference clock, so take TxC from BRG/16 to get + // transmit clock at actual data rate + if (info->params.flags & HDLC_FLAG_RXC_DPLL) + val |= BIT6 + BIT5; /* 011, txclk = BRG/16 */ + else + val |= BIT6; /* 010, txclk = BRG */ + } + else if (info->params.flags & HDLC_FLAG_TXC_DPLL) + val |= BIT7; /* 100, txclk = DPLL Input */ + else if (info->params.flags & HDLC_FLAG_TXC_RXCPIN) + val |= BIT5; /* 001, txclk = RXC Input */ + + if (info->params.flags & HDLC_FLAG_RXC_BRG) + val |= BIT3; /* 010, rxclk = BRG */ + else if (info->params.flags & HDLC_FLAG_RXC_DPLL) + val |= BIT4; /* 100, rxclk = DPLL */ + else if (info->params.flags & HDLC_FLAG_RXC_TXCPIN) + val |= BIT2; /* 001, rxclk = TXC Input */ + + if (info->params.clock_speed) + val |= BIT1 + BIT0; + + wr_reg8(info, CCR, (unsigned char)val); + + if (info->params.flags & (HDLC_FLAG_TXC_DPLL + HDLC_FLAG_RXC_DPLL)) + { + // program DPLL mode + switch(info->params.encoding) + { + case HDLC_ENCODING_BIPHASE_MARK: + case HDLC_ENCODING_BIPHASE_SPACE: + val = BIT7; break; + case HDLC_ENCODING_BIPHASE_LEVEL: + case HDLC_ENCODING_DIFF_BIPHASE_LEVEL: + val = BIT7 + BIT6; break; + default: val = BIT6; // NRZ encodings + } + wr_reg16(info, RCR, (unsigned short)(rd_reg16(info, RCR) | val)); + + // DPLL requires a 16X reference clock from BRG + set_rate(info, info->params.clock_speed * 16); + } + else + set_rate(info, info->params.clock_speed); + + tx_set_idle(info); + + msc_set_vcr(info); + + /* SCR (serial control) + * + * 15 1=tx req on FIFO half empty + * 14 1=rx req on FIFO half full + * 13 tx data IRQ enable + * 12 tx idle IRQ enable + * 11 underrun IRQ enable + * 10 rx data IRQ enable + * 09 rx idle IRQ enable + * 08 overrun IRQ enable + * 07 DSR IRQ enable + * 06 CTS IRQ enable + * 05 DCD IRQ enable + * 04 RI IRQ enable + * 03 reserved, must be zero + * 02 1=txd->rxd internal loopback enable + * 01 reserved, must be zero + * 00 1=master IRQ enable + */ + wr_reg16(info, SCR, BIT15 + BIT14 + BIT0); + + if (info->params.loopback) + enable_loopback(info); +} + +/* + * set transmit idle mode + */ +static void tx_set_idle(struct slgt_info *info) +{ + unsigned char val = 0xff; + + switch(info->idle_mode) + { + case HDLC_TXIDLE_FLAGS: val = 0x7e; break; + case HDLC_TXIDLE_ALT_ZEROS_ONES: val = 0xaa; break; + case HDLC_TXIDLE_ZEROS: val = 0x00; break; + case HDLC_TXIDLE_ONES: val = 0xff; break; + case HDLC_TXIDLE_ALT_MARK_SPACE: val = 0xaa; break; + case HDLC_TXIDLE_SPACE: val = 0x00; break; + case HDLC_TXIDLE_MARK: val = 0xff; break; + } + + wr_reg8(info, TIR, val); +} + +/* + * get state of V24 status (input) signals + */ +static void get_signals(struct slgt_info *info) +{ + unsigned short status = rd_reg16(info, SSR); + + /* clear all serial signals except DTR and RTS */ + info->signals &= SerialSignal_DTR + SerialSignal_RTS; + + if (status & BIT3) + info->signals |= SerialSignal_DSR; + if (status & BIT2) + info->signals |= SerialSignal_CTS; + if (status & BIT1) + info->signals |= SerialSignal_DCD; + if (status & BIT0) + info->signals |= SerialSignal_RI; +} + +/* + * set V.24 Control Register based on current configuration + */ +static void msc_set_vcr(struct slgt_info *info) +{ + unsigned char val = 0; + + /* VCR (V.24 control) + * + * 07..04 serial IF select + * 03 DTR + * 02 RTS + * 01 LL + * 00 RL + */ + + switch(info->if_mode & MGSL_INTERFACE_MASK) + { + case MGSL_INTERFACE_RS232: + val |= BIT5; /* 0010 */ + break; + case MGSL_INTERFACE_V35: + val |= BIT7 + BIT6 + BIT5; /* 1110 */ + break; + case MGSL_INTERFACE_RS422: + val |= BIT6; /* 0100 */ + break; + } + + if (info->signals & SerialSignal_DTR) + val |= BIT3; + if (info->signals & SerialSignal_RTS) + val |= BIT2; + if (info->if_mode & MGSL_INTERFACE_LL) + val |= BIT1; + if (info->if_mode & MGSL_INTERFACE_RL) + val |= BIT0; + wr_reg8(info, VCR, val); +} + +/* + * set state of V24 control (output) signals + */ +static void set_signals(struct slgt_info *info) +{ + unsigned char val = rd_reg8(info, VCR); + if (info->signals & SerialSignal_DTR) + val |= BIT3; + else + val &= ~BIT3; + if (info->signals & SerialSignal_RTS) + val |= BIT2; + else + val &= ~BIT2; + wr_reg8(info, VCR, val); +} + +/* + * free range of receive DMA buffers (i to last) + */ +static void free_rbufs(struct slgt_info *info, unsigned int i, unsigned int last) +{ + int done = 0; + + while(!done) { + /* reset current buffer for reuse */ + info->rbufs[i].status = 0; + if (info->params.mode == MGSL_MODE_RAW) + set_desc_count(info->rbufs[i], info->raw_rx_size); + else + set_desc_count(info->rbufs[i], DMABUFSIZE); + + if (i == last) + done = 1; + if (++i == info->rbuf_count) + i = 0; + } + info->rbuf_current = i; +} + +/* + * mark all receive DMA buffers as free + */ +static void reset_rbufs(struct slgt_info *info) +{ + free_rbufs(info, 0, info->rbuf_count - 1); +} + +/* + * pass receive HDLC frame to upper layer + * + * return 1 if frame available, otherwise 0 + */ +static int rx_get_frame(struct slgt_info *info) +{ + unsigned int start, end; + unsigned short status; + unsigned int framesize = 0; + int rc = 0; + unsigned long flags; + struct tty_struct *tty = info->tty; + unsigned char addr_field = 0xff; + +check_again: + + framesize = 0; + addr_field = 0xff; + start = end = info->rbuf_current; + + for (;;) { + if (!desc_complete(info->rbufs[end])) + goto cleanup; + + if (framesize == 0 && info->params.addr_filter != 0xff) + addr_field = info->rbufs[end].buf[0]; + + framesize += desc_count(info->rbufs[end]); + + if (desc_eof(info->rbufs[end])) + break; + + if (++end == info->rbuf_count) + end = 0; + + if (end == info->rbuf_current) { + if (info->rx_enabled){ + spin_lock_irqsave(&info->lock,flags); + rx_start(info); + spin_unlock_irqrestore(&info->lock,flags); + } + goto cleanup; + } + } + + /* status + * + * 15 buffer complete + * 14..06 reserved + * 05..04 residue + * 02 eof (end of frame) + * 01 CRC error + * 00 abort + */ + status = desc_status(info->rbufs[end]); + + /* ignore CRC bit if not using CRC (bit is undefined) */ + if (info->params.crc_type == HDLC_CRC_NONE) + status &= ~BIT1; + + if (framesize == 0 || + (addr_field != 0xff && addr_field != info->params.addr_filter)) { + free_rbufs(info, start, end); + goto check_again; + } + + if (framesize < 2 || status & (BIT1+BIT0)) { + if (framesize < 2 || (status & BIT0)) + info->icount.rxshort++; + else + info->icount.rxcrc++; + framesize = 0; + +#ifdef CONFIG_HDLC + { + struct net_device_stats *stats = hdlc_stats(info->netdev); + stats->rx_errors++; + stats->rx_frame_errors++; + } +#endif + } else { + /* adjust frame size for CRC, if any */ + if (info->params.crc_type == HDLC_CRC_16_CCITT) + framesize -= 2; + else if (info->params.crc_type == HDLC_CRC_32_CCITT) + framesize -= 4; + } + + DBGBH(("%s rx frame status=%04X size=%d\n", + info->device_name, status, framesize)); + DBGDATA(info, info->rbufs[start].buf, min_t(int, framesize, DMABUFSIZE), "rx"); + + if (framesize) { + if (framesize > info->max_frame_size) + info->icount.rxlong++; + else { + /* copy dma buffer(s) to contiguous temp buffer */ + int copy_count = framesize; + int i = start; + unsigned char *p = info->tmp_rbuf; + info->tmp_rbuf_count = framesize; + + info->icount.rxok++; + + while(copy_count) { + int partial_count = min(copy_count, DMABUFSIZE); + memcpy(p, info->rbufs[i].buf, partial_count); + p += partial_count; + copy_count -= partial_count; + if (++i == info->rbuf_count) + i = 0; + } + +#ifdef CONFIG_HDLC + if (info->netcount) + hdlcdev_rx(info,info->tmp_rbuf, framesize); + else +#endif + ldisc_receive_buf(tty, info->tmp_rbuf, info->flag_buf, framesize); + } + } + free_rbufs(info, start, end); + rc = 1; + +cleanup: + return rc; +} + +/* + * pass receive buffer (RAW synchronous mode) to tty layer + * return 1 if buffer available, otherwise 0 + */ +static int rx_get_buf(struct slgt_info *info) +{ + unsigned int i = info->rbuf_current; + + if (!desc_complete(info->rbufs[i])) + return 0; + DBGDATA(info, info->rbufs[i].buf, desc_count(info->rbufs[i]), "rx"); + DBGINFO(("rx_get_buf size=%d\n", desc_count(info->rbufs[i]))); + ldisc_receive_buf(info->tty, info->rbufs[i].buf, + info->flag_buf, desc_count(info->rbufs[i])); + free_rbufs(info, i, i); + return 1; +} + +static void reset_tbufs(struct slgt_info *info) +{ + unsigned int i; + info->tbuf_current = 0; + for (i=0 ; i < info->tbuf_count ; i++) { + info->tbufs[i].status = 0; + info->tbufs[i].count = 0; + } +} + +/* + * return number of free transmit DMA buffers + */ +static unsigned int free_tbuf_count(struct slgt_info *info) +{ + unsigned int count = 0; + unsigned int i = info->tbuf_current; + + do + { + if (desc_count(info->tbufs[i])) + break; /* buffer in use */ + ++count; + if (++i == info->tbuf_count) + i=0; + } while (i != info->tbuf_current); + + /* last buffer with zero count may be in use, assume it is */ + if (count) + --count; + + return count; +} + +/* + * load transmit DMA buffer(s) with data + */ +static void tx_load(struct slgt_info *info, const char *buf, unsigned int size) +{ + unsigned short count; + unsigned int i; + struct slgt_desc *d; + + if (size == 0) + return; + + DBGDATA(info, buf, size, "tx"); + + info->tbuf_start = i = info->tbuf_current; + + while (size) { + d = &info->tbufs[i]; + if (++i == info->tbuf_count) + i = 0; + + count = (unsigned short)((size > DMABUFSIZE) ? DMABUFSIZE : size); + memcpy(d->buf, buf, count); + + size -= count; + buf += count; + + if (!size && info->params.mode != MGSL_MODE_RAW) + set_desc_eof(*d, 1); /* HDLC: set EOF of last desc */ + else + set_desc_eof(*d, 0); + + set_desc_count(*d, count); + } + + info->tbuf_current = i; +} + +static int register_test(struct slgt_info *info) +{ + static unsigned short patterns[] = + {0x0000, 0xffff, 0xaaaa, 0x5555, 0x6969, 0x9696}; + static unsigned int count = sizeof(patterns)/sizeof(patterns[0]); + unsigned int i; + int rc = 0; + + for (i=0 ; i < count ; i++) { + wr_reg16(info, TIR, patterns[i]); + wr_reg16(info, BDR, patterns[(i+1)%count]); + if ((rd_reg16(info, TIR) != patterns[i]) || + (rd_reg16(info, BDR) != patterns[(i+1)%count])) { + rc = -ENODEV; + break; + } + } + + info->init_error = rc ? 0 : DiagStatus_AddressFailure; + return rc; +} + +static int irq_test(struct slgt_info *info) +{ + unsigned long timeout; + unsigned long flags; + struct tty_struct *oldtty = info->tty; + u32 speed = info->params.data_rate; + + info->params.data_rate = 921600; + info->tty = NULL; + + spin_lock_irqsave(&info->lock, flags); + async_mode(info); + slgt_irq_on(info, IRQ_TXIDLE); + + /* enable transmitter */ + wr_reg16(info, TCR, + (unsigned short)(rd_reg16(info, TCR) | BIT1)); + + /* write one byte and wait for tx idle */ + wr_reg16(info, TDR, 0); + + /* assume failure */ + info->init_error = DiagStatus_IrqFailure; + info->irq_occurred = FALSE; + + spin_unlock_irqrestore(&info->lock, flags); + + timeout=100; + while(timeout-- && !info->irq_occurred) + msleep_interruptible(10); + + spin_lock_irqsave(&info->lock,flags); + reset_port(info); + spin_unlock_irqrestore(&info->lock,flags); + + info->params.data_rate = speed; + info->tty = oldtty; + + info->init_error = info->irq_occurred ? 0 : DiagStatus_IrqFailure; + return info->irq_occurred ? 0 : -ENODEV; +} + +static int loopback_test_rx(struct slgt_info *info) +{ + unsigned char *src, *dest; + int count; + + if (desc_complete(info->rbufs[0])) { + count = desc_count(info->rbufs[0]); + src = info->rbufs[0].buf; + dest = info->tmp_rbuf; + + for( ; count ; count-=2, src+=2) { + /* src=data byte (src+1)=status byte */ + if (!(*(src+1) & (BIT9 + BIT8))) { + *dest = *src; + dest++; + info->tmp_rbuf_count++; + } + } + DBGDATA(info, info->tmp_rbuf, info->tmp_rbuf_count, "rx"); + return 1; + } + return 0; +} + +static int loopback_test(struct slgt_info *info) +{ +#define TESTFRAMESIZE 20 + + unsigned long timeout; + u16 count = TESTFRAMESIZE; + unsigned char buf[TESTFRAMESIZE]; + int rc = -ENODEV; + unsigned long flags; + + struct tty_struct *oldtty = info->tty; + MGSL_PARAMS params; + + memcpy(¶ms, &info->params, sizeof(params)); + + info->params.mode = MGSL_MODE_ASYNC; + info->params.data_rate = 921600; + info->params.loopback = 1; + info->tty = NULL; + + /* build and send transmit frame */ + for (count = 0; count < TESTFRAMESIZE; ++count) + buf[count] = (unsigned char)count; + + info->tmp_rbuf_count = 0; + memset(info->tmp_rbuf, 0, TESTFRAMESIZE); + + /* program hardware for HDLC and enabled receiver */ + spin_lock_irqsave(&info->lock,flags); + async_mode(info); + rx_start(info); + info->tx_count = count; + tx_load(info, buf, count); + tx_start(info); + spin_unlock_irqrestore(&info->lock, flags); + + /* wait for receive complete */ + for (timeout = 100; timeout; --timeout) { + msleep_interruptible(10); + if (loopback_test_rx(info)) { + rc = 0; + break; + } + } + + /* verify received frame length and contents */ + if (!rc && (info->tmp_rbuf_count != count || + memcmp(buf, info->tmp_rbuf, count))) { + rc = -ENODEV; + } + + spin_lock_irqsave(&info->lock,flags); + reset_adapter(info); + spin_unlock_irqrestore(&info->lock,flags); + + memcpy(&info->params, ¶ms, sizeof(info->params)); + info->tty = oldtty; + + info->init_error = rc ? DiagStatus_DmaFailure : 0; + return rc; +} + +static int adapter_test(struct slgt_info *info) +{ + DBGINFO(("testing %s\n", info->device_name)); + if ((info->init_error = register_test(info)) < 0) { + printk("register test failure %s addr=%08X\n", + info->device_name, info->phys_reg_addr); + } else if ((info->init_error = irq_test(info)) < 0) { + printk("IRQ test failure %s IRQ=%d\n", + info->device_name, info->irq_level); + } else if ((info->init_error = loopback_test(info)) < 0) { + printk("loopback test failure %s\n", info->device_name); + } + return info->init_error; +} + +/* + * transmit timeout handler + */ +static void tx_timeout(unsigned long context) +{ + struct slgt_info *info = (struct slgt_info*)context; + unsigned long flags; + + DBGINFO(("%s tx_timeout\n", info->device_name)); + if(info->tx_active && info->params.mode == MGSL_MODE_HDLC) { + info->icount.txtimeout++; + } + spin_lock_irqsave(&info->lock,flags); + info->tx_active = 0; + info->tx_count = 0; + spin_unlock_irqrestore(&info->lock,flags); + +#ifdef CONFIG_HDLC + if (info->netcount) + hdlcdev_tx_done(info); + else +#endif + bh_transmit(info); +} + +/* + * receive buffer polling timer + */ +static void rx_timeout(unsigned long context) +{ + struct slgt_info *info = (struct slgt_info*)context; + unsigned long flags; + + DBGINFO(("%s rx_timeout\n", info->device_name)); + spin_lock_irqsave(&info->lock, flags); + info->pending_bh |= BH_RECEIVE; + spin_unlock_irqrestore(&info->lock, flags); + bh_handler(info); +} + diff --git a/include/linux/synclink.h b/include/linux/synclink.h index 763bd290f28d..1b7cd8d1a71b 100644 --- a/include/linux/synclink.h +++ b/include/linux/synclink.h @@ -1,7 +1,7 @@ /* * SyncLink Multiprotocol Serial Adapter Driver * - * $Id: synclink.h,v 3.6 2002/02/20 21:58:20 paulkf Exp $ + * $Id: synclink.h,v 3.10 2005/11/08 19:50:54 paulkf Exp $ * * Copyright (C) 1998-2000 by Microgate Corporation * @@ -128,10 +128,14 @@ #define MGSL_BUS_TYPE_EISA 2 #define MGSL_BUS_TYPE_PCI 5 +#define MGSL_INTERFACE_MASK 0xf #define MGSL_INTERFACE_DISABLE 0 #define MGSL_INTERFACE_RS232 1 #define MGSL_INTERFACE_V35 2 #define MGSL_INTERFACE_RS422 3 +#define MGSL_INTERFACE_RTS_EN 0x10 +#define MGSL_INTERFACE_LL 0x20 +#define MGSL_INTERFACE_RL 0x40 typedef struct _MGSL_PARAMS { @@ -163,6 +167,9 @@ typedef struct _MGSL_PARAMS #define SYNCLINK_DEVICE_ID 0x0010 #define MGSCC_DEVICE_ID 0x0020 #define SYNCLINK_SCA_DEVICE_ID 0x0030 +#define SYNCLINK_GT_DEVICE_ID 0x0070 +#define SYNCLINK_GT4_DEVICE_ID 0x0080 +#define SYNCLINK_AC_DEVICE_ID 0x0090 #define MGSL_MAX_SERIAL_NUMBER 30 /* -- cgit v1.2.3-71-gd317 From 9ded96f24c3a5fcbef954e88c443385a1af37eb9 Mon Sep 17 00:00:00 2001 From: Russell King Date: Sun, 8 Jan 2006 01:02:07 -0800 Subject: [PATCH] IRQ type flags Some ARM platforms have the ability to program the interrupt controller to detect various interrupt edges and/or levels. For some platforms, this is critical to setup correctly, particularly those which the setting is dependent on the device. Currently, ARM drivers do (eg) the following: err = request_irq(irq, ...); set_irq_type(irq, IRQT_RISING); However, if the interrupt has previously been programmed to be level sensitive (for whatever reason) then this will cause an interrupt storm. Hence, if we combine set_irq_type() with request_irq(), we can then safely set the type prior to unmasking the interrupt. The unfortunate problem is that in order to support this, these flags need to be visible outside of the ARM architecture - drivers such as smc91x need these flags and they're cross-architecture. Finally, the SA_TRIGGER_* flag passed to request_irq() should reflect the property that the device would like. The IRQ controller code should do its best to select the most appropriate supported mode. Signed-off-by: Russell King Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- arch/arm/kernel/irq.c | 14 ++++++++++++-- arch/arm/mach-omap1/serial.c | 3 +-- arch/arm/mach-pxa/corgi.c | 7 +++---- arch/arm/mach-pxa/poodle.c | 7 +++---- arch/arm/mach-pxa/spitz.c | 7 +++---- arch/arm/mach-s3c2410/usb-simtec.c | 6 +++--- drivers/i2c/chips/tps65010.c | 11 ++++++----- drivers/input/keyboard/corgikbd.c | 6 ++---- drivers/input/keyboard/spitzkbd.c | 27 ++++++++++++++------------- drivers/mfd/ucb1x00-core.c | 5 ++--- drivers/net/smc91x.c | 5 +---- drivers/net/smc91x.h | 18 +++++++++--------- include/asm-arm/irq.h | 12 ++++++++---- include/linux/signal.h | 13 +++++++++++++ 14 files changed, 80 insertions(+), 61 deletions(-) (limited to 'include/linux') diff --git a/arch/arm/kernel/irq.c b/arch/arm/kernel/irq.c index 869c466e6258..b5645c4462cf 100644 --- a/arch/arm/kernel/irq.c +++ b/arch/arm/kernel/irq.c @@ -684,8 +684,12 @@ int setup_irq(unsigned int irq, struct irqaction *new) spin_lock_irqsave(&irq_controller_lock, flags); p = &desc->action; if ((old = *p) != NULL) { - /* Can't share interrupts unless both agree to */ - if (!(old->flags & new->flags & SA_SHIRQ)) { + /* + * Can't share interrupts unless both agree to and are + * the same type. + */ + if (!(old->flags & new->flags & SA_SHIRQ) || + (~old->flags & new->flags) & SA_TRIGGER_MASK) { spin_unlock_irqrestore(&irq_controller_lock, flags); return -EBUSY; } @@ -705,6 +709,12 @@ int setup_irq(unsigned int irq, struct irqaction *new) desc->running = 0; desc->pending = 0; desc->disable_depth = 1; + + if (new->flags & SA_TRIGGER_MASK) { + unsigned int type = new->flags & SA_TRIGGER_MASK; + desc->chip->set_type(irq, type); + } + if (!desc->noautoenable) { desc->disable_depth = 0; desc->chip->unmask(irq); diff --git a/arch/arm/mach-omap1/serial.c b/arch/arm/mach-omap1/serial.c index fcfb81d13cfe..7a68f098a025 100644 --- a/arch/arm/mach-omap1/serial.c +++ b/arch/arm/mach-omap1/serial.c @@ -252,9 +252,8 @@ static void __init omap_serial_set_port_wakeup(int gpio_nr) return; } omap_set_gpio_direction(gpio_nr, 1); - set_irq_type(OMAP_GPIO_IRQ(gpio_nr), IRQT_RISING); ret = request_irq(OMAP_GPIO_IRQ(gpio_nr), &omap_serial_wake_interrupt, - 0, "serial wakeup", NULL); + SA_TRIGGER_RISING, "serial wakeup", NULL); if (ret) { omap_free_gpio(gpio_nr); printk(KERN_ERR "No interrupt for UART wake GPIO: %i\n", diff --git a/arch/arm/mach-pxa/corgi.c b/arch/arm/mach-pxa/corgi.c index 100fb31b5156..5a7b873f29b3 100644 --- a/arch/arm/mach-pxa/corgi.c +++ b/arch/arm/mach-pxa/corgi.c @@ -213,15 +213,14 @@ static int corgi_mci_init(struct device *dev, irqreturn_t (*corgi_detect_int)(in corgi_mci_platform_data.detect_delay = msecs_to_jiffies(250); - err = request_irq(CORGI_IRQ_GPIO_nSD_DETECT, corgi_detect_int, SA_INTERRUPT, - "MMC card detect", data); + err = request_irq(CORGI_IRQ_GPIO_nSD_DETECT, corgi_detect_int, + SA_INTERRUPT | SA_TRIGGER_RISING | SA_TRIGGER_FALLING, + "MMC card detect", data); if (err) { printk(KERN_ERR "corgi_mci_init: MMC/SD: can't request MMC card detect IRQ\n"); return -1; } - set_irq_type(CORGI_IRQ_GPIO_nSD_DETECT, IRQT_BOTHEDGE); - return 0; } diff --git a/arch/arm/mach-pxa/poodle.c b/arch/arm/mach-pxa/poodle.c index eef3de26ad37..663c95005985 100644 --- a/arch/arm/mach-pxa/poodle.c +++ b/arch/arm/mach-pxa/poodle.c @@ -146,15 +146,14 @@ static int poodle_mci_init(struct device *dev, irqreturn_t (*poodle_detect_int)( poodle_mci_platform_data.detect_delay = msecs_to_jiffies(250); - err = request_irq(POODLE_IRQ_GPIO_nSD_DETECT, poodle_detect_int, SA_INTERRUPT, - "MMC card detect", data); + err = request_irq(POODLE_IRQ_GPIO_nSD_DETECT, poodle_detect_int, + SA_INTERRUPT | SA_TRIGGER_RISING | SA_TRIGGER_FALLING, + "MMC card detect", data); if (err) { printk(KERN_ERR "poodle_mci_init: MMC/SD: can't request MMC card detect IRQ\n"); return -1; } - set_irq_type(POODLE_IRQ_GPIO_nSD_DETECT, IRQT_BOTHEDGE); - return 0; } diff --git a/arch/arm/mach-pxa/spitz.c b/arch/arm/mach-pxa/spitz.c index f2007db0cda5..a9eacc06555f 100644 --- a/arch/arm/mach-pxa/spitz.c +++ b/arch/arm/mach-pxa/spitz.c @@ -296,15 +296,14 @@ static int spitz_mci_init(struct device *dev, irqreturn_t (*spitz_detect_int)(in spitz_mci_platform_data.detect_delay = msecs_to_jiffies(250); - err = request_irq(SPITZ_IRQ_GPIO_nSD_DETECT, spitz_detect_int, SA_INTERRUPT, - "MMC card detect", data); + err = request_irq(SPITZ_IRQ_GPIO_nSD_DETECT, spitz_detect_int, + SA_INTERRUPT | SA_TRIGGER_RISING | SA_TRIGGER_FALLING, + "MMC card detect", data); if (err) { printk(KERN_ERR "spitz_mci_init: MMC/SD: can't request MMC card detect IRQ\n"); return -1; } - set_irq_type(SPITZ_IRQ_GPIO_nSD_DETECT, IRQT_BOTHEDGE); - return 0; } diff --git a/arch/arm/mach-s3c2410/usb-simtec.c b/arch/arm/mach-s3c2410/usb-simtec.c index 5098b50158a3..495f8c6ffcb6 100644 --- a/arch/arm/mach-s3c2410/usb-simtec.c +++ b/arch/arm/mach-s3c2410/usb-simtec.c @@ -84,13 +84,13 @@ static void usb_simtec_enableoc(struct s3c2410_hcd_info *info, int on) int ret; if (on) { - ret = request_irq(IRQ_USBOC, usb_simtec_ocirq, SA_INTERRUPT, + ret = request_irq(IRQ_USBOC, usb_simtec_ocirq, + SA_INTERRUPT | SA_TRIGGER_RISING | + SA_TRIGGER_FALLING, "USB Over-current", info); if (ret != 0) { printk(KERN_ERR "failed to request usb oc irq\n"); } - - set_irq_type(IRQ_USBOC, IRQT_BOTHEDGE); } else { free_irq(IRQ_USBOC, info); } diff --git a/drivers/i2c/chips/tps65010.c b/drivers/i2c/chips/tps65010.c index e70b3db69edd..1af3dfbb8086 100644 --- a/drivers/i2c/chips/tps65010.c +++ b/drivers/i2c/chips/tps65010.c @@ -494,6 +494,7 @@ tps65010_probe(struct i2c_adapter *bus, int address, int kind) { struct tps65010 *tps; int status; + unsigned long irqflags; if (the_tps) { dev_dbg(&bus->dev, "only one %s for now\n", DRIVER_NAME); @@ -520,13 +521,14 @@ tps65010_probe(struct i2c_adapter *bus, int address, int kind) } #ifdef CONFIG_ARM + irqflags = SA_SAMPLE_RANDOM | SA_TRIGGER_LOW; if (machine_is_omap_h2()) { tps->model = TPS65010; omap_cfg_reg(W4_GPIO58); tps->irq = OMAP_GPIO_IRQ(58); omap_request_gpio(58); omap_set_gpio_direction(58, 1); - set_irq_type(tps->irq, IRQT_FALLING); + irqflags |= SA_TRIGGER_FALLING; } if (machine_is_omap_osk()) { tps->model = TPS65010; @@ -534,7 +536,7 @@ tps65010_probe(struct i2c_adapter *bus, int address, int kind) tps->irq = OMAP_GPIO_IRQ(OMAP_MPUIO(1)); omap_request_gpio(OMAP_MPUIO(1)); omap_set_gpio_direction(OMAP_MPUIO(1), 1); - set_irq_type(tps->irq, IRQT_FALLING); + irqflags |= SA_TRIGGER_FALLING; } if (machine_is_omap_h3()) { tps->model = TPS65013; @@ -542,13 +544,12 @@ tps65010_probe(struct i2c_adapter *bus, int address, int kind) // FIXME set up this board's IRQ ... } #else -#define set_irq_type(num,trigger) do{}while(0) + irqflags = SA_SAMPLE_RANDOM; #endif if (tps->irq > 0) { - set_irq_type(tps->irq, IRQT_LOW); status = request_irq(tps->irq, tps65010_irq, - SA_SAMPLE_RANDOM, DRIVER_NAME, tps); + irqflags, DRIVER_NAME, tps); if (status < 0) { dev_dbg(&tps->client.dev, "can't get IRQ %d, err %d\n", tps->irq, status); diff --git a/drivers/input/keyboard/corgikbd.c b/drivers/input/keyboard/corgikbd.c index 64672d491222..e301ee4ca264 100644 --- a/drivers/input/keyboard/corgikbd.c +++ b/drivers/input/keyboard/corgikbd.c @@ -19,7 +19,6 @@ #include #include #include -#include #include #include @@ -343,10 +342,9 @@ static int __init corgikbd_probe(struct platform_device *pdev) for (i = 0; i < CORGI_KEY_SENSE_NUM; i++) { pxa_gpio_mode(CORGI_GPIO_KEY_SENSE(i) | GPIO_IN); if (request_irq(CORGI_IRQ_GPIO_KEY_SENSE(i), corgikbd_interrupt, - SA_INTERRUPT, "corgikbd", corgikbd)) + SA_INTERRUPT | SA_TRIGGER_RISING, + "corgikbd", corgikbd)) printk(KERN_WARNING "corgikbd: Can't get IRQ: %d!\n", i); - else - set_irq_type(CORGI_IRQ_GPIO_KEY_SENSE(i),IRQT_RISING); } /* Set Strobe lines as outputs - set high */ diff --git a/drivers/input/keyboard/spitzkbd.c b/drivers/input/keyboard/spitzkbd.c index 6a15fe3bc527..83999d583122 100644 --- a/drivers/input/keyboard/spitzkbd.c +++ b/drivers/input/keyboard/spitzkbd.c @@ -19,7 +19,6 @@ #include #include #include -#include #include #include @@ -407,10 +406,9 @@ static int __init spitzkbd_probe(struct platform_device *dev) for (i = 0; i < SPITZ_KEY_SENSE_NUM; i++) { pxa_gpio_mode(spitz_senses[i] | GPIO_IN); if (request_irq(IRQ_GPIO(spitz_senses[i]), spitzkbd_interrupt, - SA_INTERRUPT, "Spitzkbd Sense", spitzkbd)) + SA_INTERRUPT|SA_TRIGGER_RISING, + "Spitzkbd Sense", spitzkbd)) printk(KERN_WARNING "spitzkbd: Can't get Sense IRQ: %d!\n", i); - else - set_irq_type(IRQ_GPIO(spitz_senses[i]),IRQT_RISING); } /* Set Strobe lines as outputs - set high */ @@ -422,15 +420,18 @@ static int __init spitzkbd_probe(struct platform_device *dev) pxa_gpio_mode(SPITZ_GPIO_SWA | GPIO_IN); pxa_gpio_mode(SPITZ_GPIO_SWB | GPIO_IN); - request_irq(SPITZ_IRQ_GPIO_SYNC, spitzkbd_interrupt, SA_INTERRUPT, "Spitzkbd Sync", spitzkbd); - request_irq(SPITZ_IRQ_GPIO_ON_KEY, spitzkbd_interrupt, SA_INTERRUPT, "Spitzkbd PwrOn", spitzkbd); - request_irq(SPITZ_IRQ_GPIO_SWA, spitzkbd_hinge_isr, SA_INTERRUPT, "Spitzkbd SWA", spitzkbd); - request_irq(SPITZ_IRQ_GPIO_SWB, spitzkbd_hinge_isr, SA_INTERRUPT, "Spitzkbd SWB", spitzkbd); - - set_irq_type(SPITZ_IRQ_GPIO_SYNC, IRQT_BOTHEDGE); - set_irq_type(SPITZ_IRQ_GPIO_ON_KEY, IRQT_BOTHEDGE); - set_irq_type(SPITZ_IRQ_GPIO_SWA, IRQT_BOTHEDGE); - set_irq_type(SPITZ_IRQ_GPIO_SWB, IRQT_BOTHEDGE); + request_irq(SPITZ_IRQ_GPIO_SYNC, spitzkbd_interrupt, + SA_INTERRUPT | SA_TRIGGER_RISING | SA_TRIGGER_FALLING, + "Spitzkbd Sync", spitzkbd); + request_irq(SPITZ_IRQ_GPIO_ON_KEY, spitzkbd_interrupt, + SA_INTERRUPT | SA_TRIGGER_RISING | SA_TRIGGER_FALLING, + "Spitzkbd PwrOn", spitzkbd); + request_irq(SPITZ_IRQ_GPIO_SWA, spitzkbd_hinge_isr, + SA_INTERRUPT | SA_TRIGGER_RISING | SA_TRIGGER_FALLING, + "Spitzkbd SWA", spitzkbd); + request_irq(SPITZ_IRQ_GPIO_SWB, spitzkbd_hinge_isr, + SA_INTERRUPT | SA_TRIGGER_RISING | SA_TRIGGER_FALLING, + "Spitzkbd SWB", spitzkbd); printk(KERN_INFO "input: Spitz Keyboard Registered\n"); diff --git a/drivers/mfd/ucb1x00-core.c b/drivers/mfd/ucb1x00-core.c index e335d54c4659..b42e0fbab59b 100644 --- a/drivers/mfd/ucb1x00-core.c +++ b/drivers/mfd/ucb1x00-core.c @@ -27,7 +27,6 @@ #include #include -#include #include "ucb1x00.h" @@ -507,14 +506,14 @@ static int ucb1x00_probe(struct mcp *mcp) goto err_free; } - ret = request_irq(ucb->irq, ucb1x00_irq, 0, "UCB1x00", ucb); + ret = request_irq(ucb->irq, ucb1x00_irq, SA_TRIGGER_RISING, + "UCB1x00", ucb); if (ret) { printk(KERN_ERR "ucb1x00: unable to grab irq%d: %d\n", ucb->irq, ret); goto err_free; } - set_irq_type(ucb->irq, IRQT_RISING); mcp_set_drvdata(mcp, ucb); ret = class_device_register(&ucb->cdev); diff --git a/drivers/net/smc91x.c b/drivers/net/smc91x.c index 28bf2e69eb5e..7ec08127c9d6 100644 --- a/drivers/net/smc91x.c +++ b/drivers/net/smc91x.c @@ -88,7 +88,6 @@ static const char version[] = #include #include -#include #include "smc91x.h" @@ -2007,12 +2006,10 @@ static int __init smc_probe(struct net_device *dev, void __iomem *ioaddr) } /* Grab the IRQ */ - retval = request_irq(dev->irq, &smc_interrupt, 0, dev->name, dev); + retval = request_irq(dev->irq, &smc_interrupt, SMC_IRQ_FLAGS, dev->name, dev); if (retval) goto err_out; - set_irq_type(dev->irq, SMC_IRQ_TRIGGER_TYPE); - #ifdef SMC_USE_PXA_DMA { int dma = pxa_request_dma(dev->name, DMA_PRIO_LOW, diff --git a/drivers/net/smc91x.h b/drivers/net/smc91x.h index 5c2824be4ee6..e0efd1964e72 100644 --- a/drivers/net/smc91x.h +++ b/drivers/net/smc91x.h @@ -90,7 +90,7 @@ __l--; \ } \ } while (0) -#define set_irq_type(irq, type) +#define SMC_IRQ_FLAGS (0) #elif defined(CONFIG_SA1100_PLEB) /* We can only do 16-bit reads and writes in the static memory space. */ @@ -109,7 +109,7 @@ #define SMC_outw(v, a, r) writew(v, (a) + (r)) #define SMC_outsw(a, r, p, l) writesw((a) + (r), p, l) -#define set_irq_type(irq, type) do {} while (0) +#define SMC_IRQ_FLAGS (0) #elif defined(CONFIG_SA1100_ASSABET) @@ -185,11 +185,11 @@ SMC_outw(u16 val, void __iomem *ioaddr, int reg) #include #include -#define SMC_IRQ_TRIGGER_TYPE (( \ +#define SMC_IRQ_FLAGS (( \ machine_is_omap_h2() \ || machine_is_omap_h3() \ || (machine_is_omap_innovator() && !cpu_is_omap1510()) \ - ) ? IRQT_FALLING : IRQT_RISING) + ) ? SA_TRIGGER_FALLING : SA_TRIGGER_RISING) #elif defined(CONFIG_SH_SH4202_MICRODEV) @@ -209,7 +209,7 @@ SMC_outw(u16 val, void __iomem *ioaddr, int reg) #define SMC_insw(a, r, p, l) insw((a) + (r) - 0xa0000000, p, l) #define SMC_outsw(a, r, p, l) outsw((a) + (r) - 0xa0000000, p, l) -#define set_irq_type(irq, type) do {} while(0) +#define SMC_IRQ_FLAGS (0) #elif defined(CONFIG_ISA) @@ -237,7 +237,7 @@ SMC_outw(u16 val, void __iomem *ioaddr, int reg) #define SMC_insw(a, r, p, l) insw(((u32)a) + (r), p, l) #define SMC_outsw(a, r, p, l) outsw(((u32)a) + (r), p, l) -#define set_irq_type(irq, type) do {} while(0) +#define SMC_IRQ_FLAGS (0) #define RPC_LSA_DEFAULT RPC_LED_TX_RX #define RPC_LSB_DEFAULT RPC_LED_100_10 @@ -319,7 +319,7 @@ static inline void SMC_outsw (unsigned long a, int r, unsigned char* p, int l) au_writew(*_p++ , _a); \ } while(0) -#define set_irq_type(irq, type) do {} while (0) +#define SMC_IRQ_FLAGS (0) #else @@ -342,8 +342,8 @@ static inline void SMC_outsw (unsigned long a, int r, unsigned char* p, int l) #endif -#ifndef SMC_IRQ_TRIGGER_TYPE -#define SMC_IRQ_TRIGGER_TYPE IRQT_RISING +#ifndef SMC_IRQ_FLAGS +#define SMC_IRQ_FLAGS SA_TRIGGER_RISING #endif #ifdef SMC_USE_PXA_DMA diff --git a/include/asm-arm/irq.h b/include/asm-arm/irq.h index 59975ee43cf1..7772432d3fd7 100644 --- a/include/asm-arm/irq.h +++ b/include/asm-arm/irq.h @@ -25,10 +25,14 @@ extern void disable_irq_nosync(unsigned int); extern void disable_irq(unsigned int); extern void enable_irq(unsigned int); -#define __IRQT_FALEDGE (1 << 0) -#define __IRQT_RISEDGE (1 << 1) -#define __IRQT_LOWLVL (1 << 2) -#define __IRQT_HIGHLVL (1 << 3) +/* + * These correspond with the SA_TRIGGER_* defines, and therefore the + * IRQRESOURCE_IRQ_* defines. + */ +#define __IRQT_RISEDGE (1 << 0) +#define __IRQT_FALEDGE (1 << 1) +#define __IRQT_HIGHLVL (1 << 2) +#define __IRQT_LOWLVL (1 << 3) #define IRQT_NOEDGE (0) #define IRQT_RISING (__IRQT_RISEDGE) diff --git a/include/linux/signal.h b/include/linux/signal.h index 5dd5f02c5c5f..ea9eff16c4b7 100644 --- a/include/linux/signal.h +++ b/include/linux/signal.h @@ -18,6 +18,19 @@ #define SA_PROBE SA_ONESHOT #define SA_SAMPLE_RANDOM SA_RESTART #define SA_SHIRQ 0x04000000 +/* + * As above, these correspond to the IORESOURCE_IRQ_* defines in + * linux/ioport.h to select the interrupt line behaviour. When + * requesting an interrupt without specifying a SA_TRIGGER, the + * setting should be assumed to be "as already configured", which + * may be as per machine or firmware initialisation. + */ +#define SA_TRIGGER_LOW 0x00000008 +#define SA_TRIGGER_HIGH 0x00000004 +#define SA_TRIGGER_FALLING 0x00000002 +#define SA_TRIGGER_RISING 0x00000001 +#define SA_TRIGGER_MASK (SA_TRIGGER_HIGH|SA_TRIGGER_LOW|\ + SA_TRIGGER_RISING|SA_TRIGGER_FALLING) /* * Real Time signals may be queued. -- cgit v1.2.3-71-gd317 From e5174baaea7585760f02eef23b225847d209a8db Mon Sep 17 00:00:00 2001 From: OGAWA Hirofumi Date: Sun, 8 Jan 2006 01:02:11 -0800 Subject: [PATCH] fat: support ->direct_IO() This patch add to support of ->direct_IO() for mostly read. The user of this seems to want to use for streaming read. So, current direct I/O has limitation, it can only overwrite. (For write operation, mainly we need to handle the hole etc..) Signed-off-by: OGAWA Hirofumi Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- fs/fat/cache.c | 14 +++++++-- fs/fat/dir.c | 6 ++-- fs/fat/inode.c | 82 ++++++++++++++++++++++++++++++++++++++++++------ include/linux/msdos_fs.h | 3 +- 4 files changed, 89 insertions(+), 16 deletions(-) (limited to 'include/linux') diff --git a/fs/fat/cache.c b/fs/fat/cache.c index 77c24fcf712a..1acc941245fb 100644 --- a/fs/fat/cache.c +++ b/fs/fat/cache.c @@ -295,7 +295,8 @@ static int fat_bmap_cluster(struct inode *inode, int cluster) return dclus; } -int fat_bmap(struct inode *inode, sector_t sector, sector_t *phys) +int fat_bmap(struct inode *inode, sector_t sector, sector_t *phys, + unsigned long *mapped_blocks) { struct super_block *sb = inode->i_sb; struct msdos_sb_info *sbi = MSDOS_SB(sb); @@ -303,9 +304,12 @@ int fat_bmap(struct inode *inode, sector_t sector, sector_t *phys) int cluster, offset; *phys = 0; + *mapped_blocks = 0; if ((sbi->fat_bits != 32) && (inode->i_ino == MSDOS_ROOT_INO)) { - if (sector < (sbi->dir_entries >> sbi->dir_per_block_bits)) + if (sector < (sbi->dir_entries >> sbi->dir_per_block_bits)) { *phys = sector + sbi->dir_start; + *mapped_blocks = 1; + } return 0; } last_block = (MSDOS_I(inode)->mmu_private + (sb->s_blocksize - 1)) @@ -318,7 +322,11 @@ int fat_bmap(struct inode *inode, sector_t sector, sector_t *phys) cluster = fat_bmap_cluster(inode, cluster); if (cluster < 0) return cluster; - else if (cluster) + else if (cluster) { *phys = fat_clus_to_blknr(sbi, cluster) + offset; + *mapped_blocks = sbi->sec_per_clus - offset; + if (*mapped_blocks > last_block - sector) + *mapped_blocks = last_block - sector; + } return 0; } diff --git a/fs/fat/dir.c b/fs/fat/dir.c index 4ce77475aed3..eef1b81aa294 100644 --- a/fs/fat/dir.c +++ b/fs/fat/dir.c @@ -68,8 +68,8 @@ static int fat__get_entry(struct inode *dir, loff_t *pos, { struct super_block *sb = dir->i_sb; sector_t phys, iblock; - int offset; - int err; + unsigned long mapped_blocks; + int err, offset; next: if (*bh) @@ -77,7 +77,7 @@ next: *bh = NULL; iblock = *pos >> sb->s_blocksize_bits; - err = fat_bmap(dir, iblock, &phys); + err = fat_bmap(dir, iblock, &phys, &mapped_blocks); if (err || !phys) return -1; /* beyond EOF or error */ diff --git a/fs/fat/inode.c b/fs/fat/inode.c index 932c8d6d1f54..e7f4aa7fc686 100644 --- a/fs/fat/inode.c +++ b/fs/fat/inode.c @@ -23,6 +23,7 @@ #include #include #include +#include #include #ifndef CONFIG_FAT_DEFAULT_IOCHARSET @@ -49,43 +50,77 @@ static int fat_add_cluster(struct inode *inode) return err; } -static int fat_get_block(struct inode *inode, sector_t iblock, - struct buffer_head *bh_result, int create) +static int __fat_get_blocks(struct inode *inode, sector_t iblock, + unsigned long *max_blocks, + struct buffer_head *bh_result, int create) { struct super_block *sb = inode->i_sb; + struct msdos_sb_info *sbi = MSDOS_SB(sb); sector_t phys; - int err; + unsigned long mapped_blocks; + int err, offset; - err = fat_bmap(inode, iblock, &phys); + err = fat_bmap(inode, iblock, &phys, &mapped_blocks); if (err) return err; if (phys) { map_bh(bh_result, sb, phys); + *max_blocks = min(mapped_blocks, *max_blocks); return 0; } if (!create) return 0; + if (iblock != MSDOS_I(inode)->mmu_private >> sb->s_blocksize_bits) { fat_fs_panic(sb, "corrupted file size (i_pos %lld, %lld)", MSDOS_I(inode)->i_pos, MSDOS_I(inode)->mmu_private); return -EIO; } - if (!((unsigned long)iblock & (MSDOS_SB(sb)->sec_per_clus - 1))) { + + offset = (unsigned long)iblock & (sbi->sec_per_clus - 1); + if (!offset) { + /* TODO: multiple cluster allocation would be desirable. */ err = fat_add_cluster(inode); if (err) return err; } - MSDOS_I(inode)->mmu_private += sb->s_blocksize; - err = fat_bmap(inode, iblock, &phys); + /* available blocks on this cluster */ + mapped_blocks = sbi->sec_per_clus - offset; + + *max_blocks = min(mapped_blocks, *max_blocks); + MSDOS_I(inode)->mmu_private += *max_blocks << sb->s_blocksize_bits; + + err = fat_bmap(inode, iblock, &phys, &mapped_blocks); if (err) return err; - if (!phys) - BUG(); + BUG_ON(!phys); + BUG_ON(*max_blocks != mapped_blocks); set_buffer_new(bh_result); map_bh(bh_result, sb, phys); return 0; } +static int fat_get_blocks(struct inode *inode, sector_t iblock, + unsigned long max_blocks, + struct buffer_head *bh_result, int create) +{ + struct super_block *sb = inode->i_sb; + int err; + + err = __fat_get_blocks(inode, iblock, &max_blocks, bh_result, create); + if (err) + return err; + bh_result->b_size = max_blocks << sb->s_blocksize_bits; + return 0; +} + +static int fat_get_block(struct inode *inode, sector_t iblock, + struct buffer_head *bh_result, int create) +{ + unsigned long max_blocks = 1; + return __fat_get_blocks(inode, iblock, &max_blocks, bh_result, create); +} + static int fat_writepage(struct page *page, struct writeback_control *wbc) { return block_write_full_page(page, fat_get_block, wbc); @@ -128,6 +163,34 @@ static int fat_commit_write(struct file *file, struct page *page, return err; } +static ssize_t fat_direct_IO(int rw, struct kiocb *iocb, + const struct iovec *iov, + loff_t offset, unsigned long nr_segs) +{ + struct file *file = iocb->ki_filp; + struct inode *inode = file->f_mapping->host; + + if (rw == WRITE) { + /* + * FIXME: blockdev_direct_IO() doesn't use ->prepare_write(), + * so we need to update the ->mmu_private to block boundary. + * + * But we must fill the remaining area or hole by nul for + * updating ->mmu_private. + */ + loff_t size = offset + iov_length(iov, nr_segs); + if (MSDOS_I(inode)->mmu_private < size) + return -EINVAL; + } + + /* + * FAT need to use the DIO_LOCKING for avoiding the race + * condition of fat_get_block() and ->truncate(). + */ + return blockdev_direct_IO(rw, iocb, inode, inode->i_sb->s_bdev, iov, + offset, nr_segs, fat_get_blocks, NULL); +} + static sector_t _fat_bmap(struct address_space *mapping, sector_t block) { return generic_block_bmap(mapping, block, fat_get_block); @@ -141,6 +204,7 @@ static struct address_space_operations fat_aops = { .sync_page = block_sync_page, .prepare_write = fat_prepare_write, .commit_write = fat_commit_write, + .direct_IO = fat_direct_IO, .bmap = _fat_bmap }; diff --git a/include/linux/msdos_fs.h b/include/linux/msdos_fs.h index 941da5c016a0..e933e2a355ad 100644 --- a/include/linux/msdos_fs.h +++ b/include/linux/msdos_fs.h @@ -329,7 +329,8 @@ static inline void fatwchar_to16(__u8 *dst, const wchar_t *src, size_t len) extern void fat_cache_inval_inode(struct inode *inode); extern int fat_get_cluster(struct inode *inode, int cluster, int *fclus, int *dclus); -extern int fat_bmap(struct inode *inode, sector_t sector, sector_t *phys); +extern int fat_bmap(struct inode *inode, sector_t sector, sector_t *phys, + unsigned long *mapped_blocks); /* fat/dir.c */ extern struct file_operations fat_dir_operations; -- cgit v1.2.3-71-gd317 From 268fc16e343b4f8e249468747db2e658da46a814 Mon Sep 17 00:00:00 2001 From: OGAWA Hirofumi Date: Sun, 8 Jan 2006 01:02:12 -0800 Subject: [PATCH] export/change sync_page_range/_nolock() This exports/changes the sync_page_range/_nolock(). The fatfs needs sync_page_range/_nolock() for expanding truncate, and changes "size_t count" to "loff_t count". Signed-off-by: OGAWA Hirofumi Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/writeback.h | 4 +++- mm/filemap.c | 8 ++++---- 2 files changed, 7 insertions(+), 5 deletions(-) (limited to 'include/linux') diff --git a/include/linux/writeback.h b/include/linux/writeback.h index b096159086e8..beaef5c7a0ea 100644 --- a/include/linux/writeback.h +++ b/include/linux/writeback.h @@ -103,7 +103,9 @@ void balance_dirty_pages_ratelimited(struct address_space *mapping); int pdflush_operation(void (*fn)(unsigned long), unsigned long arg0); int do_writepages(struct address_space *mapping, struct writeback_control *wbc); int sync_page_range(struct inode *inode, struct address_space *mapping, - loff_t pos, size_t count); + loff_t pos, loff_t count); +int sync_page_range_nolock(struct inode *inode, struct address_space *mapping, + loff_t pos, loff_t count); /* pdflush.c */ extern int nr_pdflush_threads; /* Global so it can be exported to sysctl diff --git a/mm/filemap.c b/mm/filemap.c index 4ef24a397684..8fdf36508023 100644 --- a/mm/filemap.c +++ b/mm/filemap.c @@ -280,7 +280,7 @@ static int wait_on_page_writeback_range(struct address_space *mapping, * it is otherwise livelockable. */ int sync_page_range(struct inode *inode, struct address_space *mapping, - loff_t pos, size_t count) + loff_t pos, loff_t count) { pgoff_t start = pos >> PAGE_CACHE_SHIFT; pgoff_t end = (pos + count - 1) >> PAGE_CACHE_SHIFT; @@ -305,9 +305,8 @@ EXPORT_SYMBOL(sync_page_range); * as it forces O_SYNC writers to different parts of the same file * to be serialised right until io completion. */ -static int sync_page_range_nolock(struct inode *inode, - struct address_space *mapping, - loff_t pos, size_t count) +int sync_page_range_nolock(struct inode *inode, struct address_space *mapping, + loff_t pos, loff_t count) { pgoff_t start = pos >> PAGE_CACHE_SHIFT; pgoff_t end = (pos + count - 1) >> PAGE_CACHE_SHIFT; @@ -322,6 +321,7 @@ static int sync_page_range_nolock(struct inode *inode, ret = wait_on_page_writeback_range(mapping, start, end); return ret; } +EXPORT_SYMBOL(sync_page_range_nolock); /** * filemap_fdatawait - walk the list of under-writeback pages of the given -- cgit v1.2.3-71-gd317 From 05eb0b51fb46430050d5873458612f53e0234f2e Mon Sep 17 00:00:00 2001 From: OGAWA Hirofumi Date: Sun, 8 Jan 2006 01:02:13 -0800 Subject: [PATCH] fat: support a truncate() for expanding size (generic_cont_expand) This patch changes generic_cont_expand(), in order to share the code with fatfs. - Use vmtruncate() if ->prepare_write() returns a error. Even if ->prepare_write() returns an error, it may already have added some blocks. So, this truncates blocks outside of ->i_size by vmtruncate(). - Add generic_cont_expand_simple(). The generic_cont_expand_simple() assumes that ->prepare_write() can handle the block boundary. With this, we don't need to care the extra byte. And for expanding a file size by truncate(), fatfs uses the added generic_cont_expand_simple(). Signed-off-by: OGAWA Hirofumi Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- fs/buffer.c | 60 ++++++++++++++++++++++++++++++++++----------- fs/fat/file.c | 31 ++++++++++++++++++++--- include/linux/buffer_head.h | 3 ++- 3 files changed, 76 insertions(+), 18 deletions(-) (limited to 'include/linux') diff --git a/fs/buffer.c b/fs/buffer.c index 5287be18633b..55023231e460 100644 --- a/fs/buffer.c +++ b/fs/buffer.c @@ -2160,11 +2160,12 @@ int block_read_full_page(struct page *page, get_block_t *get_block) * truncates. Uses prepare/commit_write to allow the filesystem to * deal with the hole. */ -int generic_cont_expand(struct inode *inode, loff_t size) +static int __generic_cont_expand(struct inode *inode, loff_t size, + pgoff_t index, unsigned int offset) { struct address_space *mapping = inode->i_mapping; struct page *page; - unsigned long index, offset, limit; + unsigned long limit; int err; err = -EFBIG; @@ -2176,24 +2177,24 @@ int generic_cont_expand(struct inode *inode, loff_t size) if (size > inode->i_sb->s_maxbytes) goto out; - offset = (size & (PAGE_CACHE_SIZE-1)); /* Within page */ - - /* ugh. in prepare/commit_write, if from==to==start of block, we - ** skip the prepare. make sure we never send an offset for the start - ** of a block - */ - if ((offset & (inode->i_sb->s_blocksize - 1)) == 0) { - offset++; - } - index = size >> PAGE_CACHE_SHIFT; err = -ENOMEM; page = grab_cache_page(mapping, index); if (!page) goto out; err = mapping->a_ops->prepare_write(NULL, page, offset, offset); - if (!err) { - err = mapping->a_ops->commit_write(NULL, page, offset, offset); + if (err) { + /* + * ->prepare_write() may have instantiated a few blocks + * outside i_size. Trim these off again. + */ + unlock_page(page); + page_cache_release(page); + vmtruncate(inode, inode->i_size); + goto out; } + + err = mapping->a_ops->commit_write(NULL, page, offset, offset); + unlock_page(page); page_cache_release(page); if (err > 0) @@ -2202,6 +2203,36 @@ out: return err; } +int generic_cont_expand(struct inode *inode, loff_t size) +{ + pgoff_t index; + unsigned int offset; + + offset = (size & (PAGE_CACHE_SIZE - 1)); /* Within page */ + + /* ugh. in prepare/commit_write, if from==to==start of block, we + ** skip the prepare. make sure we never send an offset for the start + ** of a block + */ + if ((offset & (inode->i_sb->s_blocksize - 1)) == 0) { + /* caller must handle this extra byte. */ + offset++; + } + index = size >> PAGE_CACHE_SHIFT; + + return __generic_cont_expand(inode, size, index, offset); +} + +int generic_cont_expand_simple(struct inode *inode, loff_t size) +{ + loff_t pos = size - 1; + pgoff_t index = pos >> PAGE_CACHE_SHIFT; + unsigned int offset = (pos & (PAGE_CACHE_SIZE - 1)) + 1; + + /* prepare/commit_write can handle even if from==to==start of block. */ + return __generic_cont_expand(inode, size, index, offset); +} + /* * For moronic filesystems that do not allow holes in file. * We may have to extend the file. @@ -3145,6 +3176,7 @@ EXPORT_SYMBOL(fsync_bdev); EXPORT_SYMBOL(generic_block_bmap); EXPORT_SYMBOL(generic_commit_write); EXPORT_SYMBOL(generic_cont_expand); +EXPORT_SYMBOL(generic_cont_expand_simple); EXPORT_SYMBOL(init_buffer); EXPORT_SYMBOL(invalidate_bdev); EXPORT_SYMBOL(ll_rw_block); diff --git a/fs/fat/file.c b/fs/fat/file.c index 15229fe569c3..9b07c328a6fc 100644 --- a/fs/fat/file.c +++ b/fs/fat/file.c @@ -11,6 +11,7 @@ #include #include #include +#include int fat_generic_ioctl(struct inode *inode, struct file *filp, unsigned int cmd, unsigned long arg) @@ -124,6 +125,24 @@ struct file_operations fat_file_operations = { .sendfile = generic_file_sendfile, }; +static int fat_cont_expand(struct inode *inode, loff_t size) +{ + struct address_space *mapping = inode->i_mapping; + loff_t start = inode->i_size, count = size - inode->i_size; + int err; + + err = generic_cont_expand_simple(inode, size); + if (err) + goto out; + + inode->i_ctime = inode->i_mtime = CURRENT_TIME_SEC; + mark_inode_dirty(inode); + if (IS_SYNC(inode)) + err = sync_page_range_nolock(inode, mapping, start, count); +out: + return err; +} + int fat_notify_change(struct dentry *dentry, struct iattr *attr) { struct msdos_sb_info *sbi = MSDOS_SB(dentry->d_sb); @@ -132,11 +151,17 @@ int fat_notify_change(struct dentry *dentry, struct iattr *attr) lock_kernel(); - /* FAT cannot truncate to a longer file */ + /* + * Expand the file. Since inode_setattr() updates ->i_size + * before calling the ->truncate(), but FAT needs to fill the + * hole before it. + */ if (attr->ia_valid & ATTR_SIZE) { if (attr->ia_size > inode->i_size) { - error = -EPERM; - goto out; + error = fat_cont_expand(inode, attr->ia_size); + if (error || attr->ia_valid == ATTR_SIZE) + goto out; + attr->ia_valid &= ~ATTR_SIZE; } } diff --git a/include/linux/buffer_head.h b/include/linux/buffer_head.h index 1db061bb6b08..9f159baf153f 100644 --- a/include/linux/buffer_head.h +++ b/include/linux/buffer_head.h @@ -197,7 +197,8 @@ int block_read_full_page(struct page*, get_block_t*); int block_prepare_write(struct page*, unsigned, unsigned, get_block_t*); int cont_prepare_write(struct page*, unsigned, unsigned, get_block_t*, loff_t *); -int generic_cont_expand(struct inode *inode, loff_t size) ; +int generic_cont_expand(struct inode *inode, loff_t size); +int generic_cont_expand_simple(struct inode *inode, loff_t size); int block_commit_write(struct page *page, unsigned from, unsigned to); int block_sync_page(struct page *); sector_t generic_block_bmap(struct address_space *, sector_t, get_block_t *); -- cgit v1.2.3-71-gd317 From 2a10e0b28b196051ae71829e5b989cba00513289 Mon Sep 17 00:00:00 2001 From: Adrian Bunk Date: Sun, 8 Jan 2006 01:02:15 -0800 Subject: [PATCH] move rtc_interrupt() prototype to rtc.h This patch moves the rtc_interrupt() prototype to rtc.h and removes the prototypes from C files. It also renames static rtc_interrupt() functions in arch/arm/mach-integrator/time.c and arch/sh64/kernel/time.c to avoid compile problems. Signed-off-by: Adrian Bunk Signed-off-by: Paul Gortmaker Acked-by: Paul Mundt Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- arch/arm/mach-integrator/time.c | 5 +++-- arch/i386/kernel/time_hpet.c | 2 -- arch/sh64/kernel/time.c | 7 ++++--- arch/x86_64/kernel/time.c | 2 -- include/linux/rtc.h | 3 +++ 5 files changed, 10 insertions(+), 9 deletions(-) (limited to 'include/linux') diff --git a/arch/arm/mach-integrator/time.c b/arch/arm/mach-integrator/time.c index 9f46aaef8968..3c22c16b38bf 100644 --- a/arch/arm/mach-integrator/time.c +++ b/arch/arm/mach-integrator/time.c @@ -96,7 +96,8 @@ static struct rtc_ops rtc_ops = { .set_alarm = rtc_set_alarm, }; -static irqreturn_t rtc_interrupt(int irq, void *dev_id, struct pt_regs *regs) +static irqreturn_t arm_rtc_interrupt(int irq, void *dev_id, + struct pt_regs *regs) { writel(0, rtc_base + RTC_EOI); return IRQ_HANDLED; @@ -124,7 +125,7 @@ static int rtc_probe(struct amba_device *dev, void *id) xtime.tv_sec = __raw_readl(rtc_base + RTC_DR); - ret = request_irq(dev->irq[0], rtc_interrupt, SA_INTERRUPT, + ret = request_irq(dev->irq[0], arm_rtc_interrupt, SA_INTERRUPT, "rtc-pl030", dev); if (ret) goto map_out; diff --git a/arch/i386/kernel/time_hpet.c b/arch/i386/kernel/time_hpet.c index 9caeaa315cd7..a529f0cdce17 100644 --- a/arch/i386/kernel/time_hpet.c +++ b/arch/i386/kernel/time_hpet.c @@ -259,8 +259,6 @@ __setup("hpet=", hpet_setup); #include #include -extern irqreturn_t rtc_interrupt(int irq, void *dev_id, struct pt_regs *regs); - #define DEFAULT_RTC_INT_FREQ 64 #define RTC_NUM_INTS 1 diff --git a/arch/sh64/kernel/time.c b/arch/sh64/kernel/time.c index 870fe5327e09..1195af37ee5a 100644 --- a/arch/sh64/kernel/time.c +++ b/arch/sh64/kernel/time.c @@ -417,7 +417,7 @@ static __init unsigned int get_cpu_hz(void) /* ** Regardless the toolchain, force the compiler to use the ** arbitrary register r3 as a clock tick counter. - ** NOTE: r3 must be in accordance with rtc_interrupt() + ** NOTE: r3 must be in accordance with sh64_rtc_interrupt() */ register unsigned long long __rtc_irq_flag __asm__ ("r3"); @@ -482,7 +482,8 @@ static __init unsigned int get_cpu_hz(void) #endif } -static irqreturn_t rtc_interrupt(int irq, void *dev_id, struct pt_regs *regs) +static irqreturn_t sh64_rtc_interrupt(int irq, void *dev_id, + struct pt_regs *regs) { ctrl_outb(0, RCR1); /* Disable Carry Interrupts */ regs->regs[3] = 1; /* Using r3 */ @@ -491,7 +492,7 @@ static irqreturn_t rtc_interrupt(int irq, void *dev_id, struct pt_regs *regs) } static struct irqaction irq0 = { timer_interrupt, SA_INTERRUPT, CPU_MASK_NONE, "timer", NULL, NULL}; -static struct irqaction irq1 = { rtc_interrupt, SA_INTERRUPT, CPU_MASK_NONE, "rtc", NULL, NULL}; +static struct irqaction irq1 = { sh64_rtc_interrupt, SA_INTERRUPT, CPU_MASK_NONE, "rtc", NULL, NULL}; void __init time_init(void) { diff --git a/arch/x86_64/kernel/time.c b/arch/x86_64/kernel/time.c index 74102796e5c0..43c9fa0f8d5f 100644 --- a/arch/x86_64/kernel/time.c +++ b/arch/x86_64/kernel/time.c @@ -1075,8 +1075,6 @@ device_initcall(time_init_device); */ #include -extern irqreturn_t rtc_interrupt(int irq, void *dev_id, struct pt_regs *regs); - #define DEFAULT_RTC_INT_FREQ 64 #define RTC_NUM_INTS 1 diff --git a/include/linux/rtc.h b/include/linux/rtc.h index e1aaf1fac8e0..0b2ba67ff13c 100644 --- a/include/linux/rtc.h +++ b/include/linux/rtc.h @@ -11,6 +11,8 @@ #ifndef _LINUX_RTC_H_ #define _LINUX_RTC_H_ +#include + /* * The struct used to pass data via the following ioctl. Similar to the * struct tm in , but it needs to be here so that the kernel @@ -102,6 +104,7 @@ int rtc_register(rtc_task_t *task); int rtc_unregister(rtc_task_t *task); int rtc_control(rtc_task_t *t, unsigned int cmd, unsigned long arg); void rtc_get_rtc_time(struct rtc_time *rtc_tm); +irqreturn_t rtc_interrupt(int irq, void *dev_id, struct pt_regs *regs); #endif /* __KERNEL__ */ -- cgit v1.2.3-71-gd317 From 095975da26dba21698582e91e96be10f7417333f Mon Sep 17 00:00:00 2001 From: Nick Piggin Date: Sun, 8 Jan 2006 01:02:19 -0800 Subject: [PATCH] rcu file: use atomic primitives Use atomic_inc_not_zero for rcu files instead of special case rcuref. Signed-off-by: Nick Piggin Cc: "Paul E. McKenney" Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- Documentation/RCU/rcuref.txt | 87 ++++++++--------- fs/aio.c | 3 +- fs/file_table.c | 8 +- include/linux/fs.h | 3 +- include/linux/radix-tree.h | 1 + include/linux/rcuref.h | 220 ------------------------------------------- kernel/rcupdate.c | 14 --- kernel/rcutorture.c | 1 - security/selinux/hooks.c | 2 +- 9 files changed, 48 insertions(+), 291 deletions(-) delete mode 100644 include/linux/rcuref.h (limited to 'include/linux') diff --git a/Documentation/RCU/rcuref.txt b/Documentation/RCU/rcuref.txt index a23fee66064d..3f60db41b2f0 100644 --- a/Documentation/RCU/rcuref.txt +++ b/Documentation/RCU/rcuref.txt @@ -1,74 +1,67 @@ -Refcounter framework for elements of lists/arrays protected by -RCU. +Refcounter design for elements of lists/arrays protected by RCU. Refcounting on elements of lists which are protected by traditional reader/writer spinlocks or semaphores are straight forward as in: -1. 2. -add() search_and_reference() -{ { - alloc_object read_lock(&list_lock); - ... search_for_element - atomic_set(&el->rc, 1); atomic_inc(&el->rc); - write_lock(&list_lock); ... - add_element read_unlock(&list_lock); - ... ... - write_unlock(&list_lock); } +1. 2. +add() search_and_reference() +{ { + alloc_object read_lock(&list_lock); + ... search_for_element + atomic_set(&el->rc, 1); atomic_inc(&el->rc); + write_lock(&list_lock); ... + add_element read_unlock(&list_lock); + ... ... + write_unlock(&list_lock); } } 3. 4. release_referenced() delete() { { - ... write_lock(&list_lock); - atomic_dec(&el->rc, relfunc) ... - ... delete_element -} write_unlock(&list_lock); - ... - if (atomic_dec_and_test(&el->rc)) - kfree(el); - ... + ... write_lock(&list_lock); + atomic_dec(&el->rc, relfunc) ... + ... delete_element +} write_unlock(&list_lock); + ... + if (atomic_dec_and_test(&el->rc)) + kfree(el); + ... } If this list/array is made lock free using rcu as in changing the write_lock in add() and delete() to spin_lock and changing read_lock -in search_and_reference to rcu_read_lock(), the rcuref_get in +in search_and_reference to rcu_read_lock(), the atomic_get in search_and_reference could potentially hold reference to an element which -has already been deleted from the list/array. rcuref_lf_get_rcu takes +has already been deleted from the list/array. atomic_inc_not_zero takes care of this scenario. search_and_reference should look as; 1. 2. add() search_and_reference() { { - alloc_object rcu_read_lock(); - ... search_for_element - atomic_set(&el->rc, 1); if (rcuref_inc_lf(&el->rc)) { - write_lock(&list_lock); rcu_read_unlock(); - return FAIL; - add_element } - ... ... - write_unlock(&list_lock); rcu_read_unlock(); + alloc_object rcu_read_lock(); + ... search_for_element + atomic_set(&el->rc, 1); if (atomic_inc_not_zero(&el->rc)) { + write_lock(&list_lock); rcu_read_unlock(); + return FAIL; + add_element } + ... ... + write_unlock(&list_lock); rcu_read_unlock(); } } 3. 4. release_referenced() delete() { { - ... write_lock(&list_lock); - rcuref_dec(&el->rc, relfunc) ... - ... delete_element -} write_unlock(&list_lock); - ... - if (rcuref_dec_and_test(&el->rc)) - call_rcu(&el->head, el_free); - ... + ... write_lock(&list_lock); + atomic_dec(&el->rc, relfunc) ... + ... delete_element +} write_unlock(&list_lock); + ... + if (atomic_dec_and_test(&el->rc)) + call_rcu(&el->head, el_free); + ... } Sometimes, reference to the element need to be obtained in the -update (write) stream. In such cases, rcuref_inc_lf might be an overkill -since the spinlock serialising list updates are held. rcuref_inc +update (write) stream. In such cases, atomic_inc_not_zero might be an +overkill since the spinlock serialising list updates are held. atomic_inc is to be used in such cases. -For arches which do not have cmpxchg rcuref_inc_lf -api uses a hashed spinlock implementation and the same hashed spinlock -is acquired in all rcuref_xxx primitives to preserve atomicity. -Note: Use rcuref_inc api only if you need to use rcuref_inc_lf on the -refcounter atleast at one place. Mixing rcuref_inc and atomic_xxx api -might lead to races. rcuref_inc_lf() must be used in lockfree -RCU critical sections only. + diff --git a/fs/aio.c b/fs/aio.c index 5a28b69ad223..aec2b1916d1b 100644 --- a/fs/aio.c +++ b/fs/aio.c @@ -29,7 +29,6 @@ #include #include #include -#include #include #include @@ -514,7 +513,7 @@ static int __aio_put_req(struct kioctx *ctx, struct kiocb *req) /* Must be done under the lock to serialise against cancellation. * Call this aio_fput as it duplicates fput via the fput_work. */ - if (unlikely(rcuref_dec_and_test(&req->ki_filp->f_count))) { + if (unlikely(atomic_dec_and_test(&req->ki_filp->f_count))) { get_ioctx(ctx); spin_lock(&fput_lock); list_add(&req->ki_list, &fput_head); diff --git a/fs/file_table.c b/fs/file_table.c index c3a5e2fd663b..6142250104a6 100644 --- a/fs/file_table.c +++ b/fs/file_table.c @@ -117,7 +117,7 @@ EXPORT_SYMBOL(get_empty_filp); void fastcall fput(struct file *file) { - if (rcuref_dec_and_test(&file->f_count)) + if (atomic_dec_and_test(&file->f_count)) __fput(file); } @@ -166,7 +166,7 @@ struct file fastcall *fget(unsigned int fd) rcu_read_lock(); file = fcheck_files(files, fd); if (file) { - if (!rcuref_inc_lf(&file->f_count)) { + if (!atomic_inc_not_zero(&file->f_count)) { /* File object ref couldn't be taken */ rcu_read_unlock(); return NULL; @@ -198,7 +198,7 @@ struct file fastcall *fget_light(unsigned int fd, int *fput_needed) rcu_read_lock(); file = fcheck_files(files, fd); if (file) { - if (rcuref_inc_lf(&file->f_count)) + if (atomic_inc_not_zero(&file->f_count)) *fput_needed = 1; else /* Didn't get the reference, someone's freed */ @@ -213,7 +213,7 @@ struct file fastcall *fget_light(unsigned int fd, int *fput_needed) void put_filp(struct file *file) { - if (rcuref_dec_and_test(&file->f_count)) { + if (atomic_dec_and_test(&file->f_count)) { security_file_free(file); file_kill(file); file_free(file); diff --git a/include/linux/fs.h b/include/linux/fs.h index 2c9c48d65630..ef29500b5df8 100644 --- a/include/linux/fs.h +++ b/include/linux/fs.h @@ -9,7 +9,6 @@ #include #include #include -#include /* * It's silly to have NR_OPEN bigger than NR_FILE, but you can change @@ -653,7 +652,7 @@ extern spinlock_t files_lock; #define file_list_lock() spin_lock(&files_lock); #define file_list_unlock() spin_unlock(&files_lock); -#define get_file(x) rcuref_inc(&(x)->f_count) +#define get_file(x) atomic_inc(&(x)->f_count) #define file_count(x) atomic_read(&(x)->f_count) #define MAX_NON_LFS ((1UL<<31) - 1) diff --git a/include/linux/radix-tree.h b/include/linux/radix-tree.h index 36e5d269612f..c57ff2fcb30a 100644 --- a/include/linux/radix-tree.h +++ b/include/linux/radix-tree.h @@ -19,6 +19,7 @@ #ifndef _LINUX_RADIX_TREE_H #define _LINUX_RADIX_TREE_H +#include #include #include diff --git a/include/linux/rcuref.h b/include/linux/rcuref.h deleted file mode 100644 index e1adbba14b67..000000000000 --- a/include/linux/rcuref.h +++ /dev/null @@ -1,220 +0,0 @@ -/* - * rcuref.h - * - * Reference counting for elements of lists/arrays protected by - * RCU. - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License as published by - * the Free Software Foundation; either version 2 of the License, or - * (at your option) any later version. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program; if not, write to the Free Software - * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. - * - * Copyright (C) IBM Corporation, 2005 - * - * Author: Dipankar Sarma - * Ravikiran Thirumalai - * - * See Documentation/RCU/rcuref.txt for detailed user guide. - * - */ - -#ifndef _RCUREF_H_ -#define _RCUREF_H_ - -#ifdef __KERNEL__ - -#include -#include -#include -#include - -/* - * These APIs work on traditional atomic_t counters used in the - * kernel for reference counting. Under special circumstances - * where a lock-free get() operation races with a put() operation - * these APIs can be used. See Documentation/RCU/rcuref.txt. - */ - -#ifdef __HAVE_ARCH_CMPXCHG - -/** - * rcuref_inc - increment refcount for object. - * @rcuref: reference counter in the object in question. - * - * This should be used only for objects where we use RCU and - * use the rcuref_inc_lf() api to acquire a reference - * in a lock-free reader-side critical section. - */ -static inline void rcuref_inc(atomic_t *rcuref) -{ - atomic_inc(rcuref); -} - -/** - * rcuref_dec - decrement refcount for object. - * @rcuref: reference counter in the object in question. - * - * This should be used only for objects where we use RCU and - * use the rcuref_inc_lf() api to acquire a reference - * in a lock-free reader-side critical section. - */ -static inline void rcuref_dec(atomic_t *rcuref) -{ - atomic_dec(rcuref); -} - -/** - * rcuref_dec_and_test - decrement refcount for object and test - * @rcuref: reference counter in the object. - * @release: pointer to the function that will clean up the object - * when the last reference to the object is released. - * This pointer is required. - * - * Decrement the refcount, and if 0, return 1. Else return 0. - * - * This should be used only for objects where we use RCU and - * use the rcuref_inc_lf() api to acquire a reference - * in a lock-free reader-side critical section. - */ -static inline int rcuref_dec_and_test(atomic_t *rcuref) -{ - return atomic_dec_and_test(rcuref); -} - -/* - * cmpxchg is needed on UP too, if deletions to the list/array can happen - * in interrupt context. - */ - -/** - * rcuref_inc_lf - Take reference to an object in a read-side - * critical section protected by RCU. - * @rcuref: reference counter in the object in question. - * - * Try and increment the refcount by 1. The increment might fail if - * the reference counter has been through a 1 to 0 transition and - * is no longer part of the lock-free list. - * Returns non-zero on successful increment and zero otherwise. - */ -static inline int rcuref_inc_lf(atomic_t *rcuref) -{ - int c, old; - c = atomic_read(rcuref); - while (c && (old = cmpxchg(&rcuref->counter, c, c + 1)) != c) - c = old; - return c; -} - -#else /* !__HAVE_ARCH_CMPXCHG */ - -extern spinlock_t __rcuref_hash[]; - -/* - * Use a hash table of locks to protect the reference count - * since cmpxchg is not available in this arch. - */ -#ifdef CONFIG_SMP -#define RCUREF_HASH_SIZE 4 -#define RCUREF_HASH(k) \ - (&__rcuref_hash[(((unsigned long)k)>>8) & (RCUREF_HASH_SIZE-1)]) -#else -#define RCUREF_HASH_SIZE 1 -#define RCUREF_HASH(k) &__rcuref_hash[0] -#endif /* CONFIG_SMP */ - -/** - * rcuref_inc - increment refcount for object. - * @rcuref: reference counter in the object in question. - * - * This should be used only for objects where we use RCU and - * use the rcuref_inc_lf() api to acquire a reference in a lock-free - * reader-side critical section. - */ -static inline void rcuref_inc(atomic_t *rcuref) -{ - unsigned long flags; - spin_lock_irqsave(RCUREF_HASH(rcuref), flags); - rcuref->counter += 1; - spin_unlock_irqrestore(RCUREF_HASH(rcuref), flags); -} - -/** - * rcuref_dec - decrement refcount for object. - * @rcuref: reference counter in the object in question. - * - * This should be used only for objects where we use RCU and - * use the rcuref_inc_lf() api to acquire a reference in a lock-free - * reader-side critical section. - */ -static inline void rcuref_dec(atomic_t *rcuref) -{ - unsigned long flags; - spin_lock_irqsave(RCUREF_HASH(rcuref), flags); - rcuref->counter -= 1; - spin_unlock_irqrestore(RCUREF_HASH(rcuref), flags); -} - -/** - * rcuref_dec_and_test - decrement refcount for object and test - * @rcuref: reference counter in the object. - * @release: pointer to the function that will clean up the object - * when the last reference to the object is released. - * This pointer is required. - * - * Decrement the refcount, and if 0, return 1. Else return 0. - * - * This should be used only for objects where we use RCU and - * use the rcuref_inc_lf() api to acquire a reference in a lock-free - * reader-side critical section. - */ -static inline int rcuref_dec_and_test(atomic_t *rcuref) -{ - unsigned long flags; - spin_lock_irqsave(RCUREF_HASH(rcuref), flags); - rcuref->counter--; - if (!rcuref->counter) { - spin_unlock_irqrestore(RCUREF_HASH(rcuref), flags); - return 1; - } else { - spin_unlock_irqrestore(RCUREF_HASH(rcuref), flags); - return 0; - } -} - -/** - * rcuref_inc_lf - Take reference to an object of a lock-free collection - * by traversing a lock-free list/array. - * @rcuref: reference counter in the object in question. - * - * Try and increment the refcount by 1. The increment might fail if - * the reference counter has been through a 1 to 0 transition and - * object is no longer part of the lock-free list. - * Returns non-zero on successful increment and zero otherwise. - */ -static inline int rcuref_inc_lf(atomic_t *rcuref) -{ - int ret; - unsigned long flags; - spin_lock_irqsave(RCUREF_HASH(rcuref), flags); - if (rcuref->counter) - ret = rcuref->counter++; - else - ret = 0; - spin_unlock_irqrestore(RCUREF_HASH(rcuref), flags); - return ret; -} - - -#endif /* !__HAVE_ARCH_CMPXCHG */ - -#endif /* __KERNEL__ */ -#endif /* _RCUREF_H_ */ diff --git a/kernel/rcupdate.c b/kernel/rcupdate.c index 0a669bd2f6d1..30b0bba03859 100644 --- a/kernel/rcupdate.c +++ b/kernel/rcupdate.c @@ -46,7 +46,6 @@ #include #include #include -#include #include /* Definition for rcupdate control block. */ @@ -74,19 +73,6 @@ DEFINE_PER_CPU(struct rcu_data, rcu_bh_data) = { 0L }; static DEFINE_PER_CPU(struct tasklet_struct, rcu_tasklet) = {NULL}; static int maxbatch = 10000; -#ifndef __HAVE_ARCH_CMPXCHG -/* - * We use an array of spinlocks for the rcurefs -- similar to ones in sparc - * 32 bit atomic_t implementations, and a hash function similar to that - * for our refcounting needs. - * Can't help multiprocessors which donot have cmpxchg :( - */ - -spinlock_t __rcuref_hash[RCUREF_HASH_SIZE] = { - [0 ... (RCUREF_HASH_SIZE-1)] = SPIN_LOCK_UNLOCKED -}; -#endif - /** * call_rcu - Queue an RCU callback for invocation after a grace period. * @head: structure to be used for queueing the RCU updates. diff --git a/kernel/rcutorture.c b/kernel/rcutorture.c index 36efe088ad81..75174c81529a 100644 --- a/kernel/rcutorture.c +++ b/kernel/rcutorture.c @@ -39,7 +39,6 @@ #include #include #include -#include #include #include #include diff --git a/security/selinux/hooks.c b/security/selinux/hooks.c index 3d496eae1b47..6647204e4636 100644 --- a/security/selinux/hooks.c +++ b/security/selinux/hooks.c @@ -1663,7 +1663,7 @@ static inline void flush_unauthorized_files(struct files_struct * files) continue; } if (devnull) { - rcuref_inc(&devnull->f_count); + get_file(devnull); } else { devnull = dentry_open(dget(selinux_null), mntget(selinuxfs_mount), O_RDWR); if (!devnull) { -- cgit v1.2.3-71-gd317 From b3f3d6141f8636f627bf19fd44eaf59a52637ac8 Mon Sep 17 00:00:00 2001 From: Jan Beulich Date: Sun, 8 Jan 2006 01:02:20 -0800 Subject: [PATCH] ELF: symbol table type additions Needed for the Novell kernel debugger and perhaps some per-cpu data on x86_64 in the future. Cc: Andi Kleen Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/elf.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include/linux') diff --git a/include/linux/elf.h b/include/linux/elf.h index ff955dbf510d..d3bfacb24496 100644 --- a/include/linux/elf.h +++ b/include/linux/elf.h @@ -151,6 +151,8 @@ typedef __s64 Elf64_Sxword; #define STT_FUNC 2 #define STT_SECTION 3 #define STT_FILE 4 +#define STT_COMMON 5 +#define STT_TLS 6 #define ELF_ST_BIND(x) ((x) >> 4) #define ELF_ST_TYPE(x) (((unsigned int) x) & 0xf) -- cgit v1.2.3-71-gd317 From 907f2c77d1653ce235e8e1fd6ce5c46005814e78 Mon Sep 17 00:00:00 2001 From: Tom Zanussi Date: Sun, 8 Jan 2006 01:02:24 -0800 Subject: [PATCH] relayfs: export relayfs_create_file() with fileops param This patch adds a mandatory fileops param to relayfs_create_file() and exports that function so that clients can use it to create files defined by their own set of file operations, in relayfs. The purpose is to allow relayfs applications to create their own set of 'control' files alongside their relay files in relayfs rather than having to create them in /proc or debugfs for instance. relayfs_create_file() is also used by relay_open_buf() to create the relay files for a channel. In this case, a pointer to relayfs_file_operations is passed in, along with a pointer to the buffer associated with the file. Signed-off-by: Tom Zanussi Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- fs/relayfs/inode.c | 41 ++++++++++++++++++++++++++--------------- fs/relayfs/relay.c | 3 ++- fs/relayfs/relay.h | 4 ---- include/linux/relayfs_fs.h | 7 ++++++- 4 files changed, 34 insertions(+), 21 deletions(-) (limited to 'include/linux') diff --git a/fs/relayfs/inode.c b/fs/relayfs/inode.c index 379e07cd2b34..a5e6d4f2efb9 100644 --- a/fs/relayfs/inode.c +++ b/fs/relayfs/inode.c @@ -33,7 +33,9 @@ static struct backing_dev_info relayfs_backing_dev_info = { .capabilities = BDI_CAP_NO_ACCT_DIRTY | BDI_CAP_NO_WRITEBACK, }; -static struct inode *relayfs_get_inode(struct super_block *sb, int mode, +static struct inode *relayfs_get_inode(struct super_block *sb, + int mode, + struct file_operations *fops, void *data) { struct inode *inode; @@ -51,8 +53,8 @@ static struct inode *relayfs_get_inode(struct super_block *sb, int mode, inode->i_atime = inode->i_mtime = inode->i_ctime = CURRENT_TIME; switch (mode & S_IFMT) { case S_IFREG: - inode->i_fop = &relayfs_file_operations; - RELAYFS_I(inode)->buf = data; + inode->i_fop = fops; + RELAYFS_I(inode)->data = data; break; case S_IFDIR: inode->i_op = &simple_dir_inode_operations; @@ -73,6 +75,7 @@ static struct inode *relayfs_get_inode(struct super_block *sb, int mode, * @name: the name of the file to create * @parent: parent directory * @mode: mode + * @fops: file operations to use for the file * @data: user-associated data for this file * * Returns the new dentry, NULL on failure @@ -82,6 +85,7 @@ static struct inode *relayfs_get_inode(struct super_block *sb, int mode, static struct dentry *relayfs_create_entry(const char *name, struct dentry *parent, int mode, + struct file_operations *fops, void *data) { struct dentry *d; @@ -117,7 +121,7 @@ static struct dentry *relayfs_create_entry(const char *name, goto release_mount; } - inode = relayfs_get_inode(parent->d_inode->i_sb, mode, data); + inode = relayfs_get_inode(parent->d_inode->i_sb, mode, fops, data); if (!inode) { d = NULL; goto release_mount; @@ -145,20 +149,26 @@ exit: * @name: the name of the file to create * @parent: parent directory * @mode: mode, if not specied the default perms are used + * @fops: file operations to use for the file * @data: user-associated data for this file * * Returns file dentry if successful, NULL otherwise. * * The file will be created user r on behalf of current user. */ -struct dentry *relayfs_create_file(const char *name, struct dentry *parent, - int mode, void *data) +struct dentry *relayfs_create_file(const char *name, + struct dentry *parent, + int mode, + struct file_operations *fops, + void *data) { + BUG_ON(!fops); + if (!mode) mode = S_IRUSR; mode = (mode & S_IALLUGO) | S_IFREG; - return relayfs_create_entry(name, parent, mode, data); + return relayfs_create_entry(name, parent, mode, fops, data); } /** @@ -173,7 +183,7 @@ struct dentry *relayfs_create_file(const char *name, struct dentry *parent, struct dentry *relayfs_create_dir(const char *name, struct dentry *parent) { int mode = S_IFDIR | S_IRWXU | S_IRUGO | S_IXUGO; - return relayfs_create_entry(name, parent, mode, NULL); + return relayfs_create_entry(name, parent, mode, NULL, NULL); } /** @@ -234,7 +244,7 @@ int relayfs_remove_dir(struct dentry *dentry) */ static int relayfs_open(struct inode *inode, struct file *filp) { - struct rchan_buf *buf = RELAYFS_I(inode)->buf; + struct rchan_buf *buf = RELAYFS_I(inode)->data; kref_get(&buf->kref); return 0; @@ -250,7 +260,7 @@ static int relayfs_open(struct inode *inode, struct file *filp) static int relayfs_mmap(struct file *filp, struct vm_area_struct *vma) { struct inode *inode = filp->f_dentry->d_inode; - return relay_mmap_buf(RELAYFS_I(inode)->buf, vma); + return relay_mmap_buf(RELAYFS_I(inode)->data, vma); } /** @@ -264,7 +274,7 @@ static unsigned int relayfs_poll(struct file *filp, poll_table *wait) { unsigned int mask = 0; struct inode *inode = filp->f_dentry->d_inode; - struct rchan_buf *buf = RELAYFS_I(inode)->buf; + struct rchan_buf *buf = RELAYFS_I(inode)->data; if (buf->finalized) return POLLERR; @@ -288,7 +298,7 @@ static unsigned int relayfs_poll(struct file *filp, poll_table *wait) */ static int relayfs_release(struct inode *inode, struct file *filp) { - struct rchan_buf *buf = RELAYFS_I(inode)->buf; + struct rchan_buf *buf = RELAYFS_I(inode)->data; kref_put(&buf->kref, relay_remove_buf); return 0; @@ -450,7 +460,7 @@ static ssize_t relayfs_read(struct file *filp, loff_t *ppos) { struct inode *inode = filp->f_dentry->d_inode; - struct rchan_buf *buf = RELAYFS_I(inode)->buf; + struct rchan_buf *buf = RELAYFS_I(inode)->data; size_t read_start, avail; ssize_t ret = 0; void *from; @@ -485,7 +495,7 @@ static struct inode *relayfs_alloc_inode(struct super_block *sb) struct relayfs_inode_info *p = kmem_cache_alloc(relayfs_inode_cachep, SLAB_KERNEL); if (!p) return NULL; - p->buf = NULL; + p->data = NULL; return &p->vfs_inode; } @@ -531,7 +541,7 @@ static int relayfs_fill_super(struct super_block * sb, void * data, int silent) sb->s_blocksize_bits = PAGE_CACHE_SHIFT; sb->s_magic = RELAYFS_MAGIC; sb->s_op = &relayfs_ops; - inode = relayfs_get_inode(sb, mode, NULL); + inode = relayfs_get_inode(sb, mode, NULL, NULL); if (!inode) return -ENOMEM; @@ -589,6 +599,7 @@ module_exit(exit_relayfs_fs) EXPORT_SYMBOL_GPL(relayfs_file_operations); EXPORT_SYMBOL_GPL(relayfs_create_dir); EXPORT_SYMBOL_GPL(relayfs_remove_dir); +EXPORT_SYMBOL_GPL(relayfs_create_file); MODULE_AUTHOR("Tom Zanussi and Karim Yaghmour "); MODULE_DESCRIPTION("Relay Filesystem"); diff --git a/fs/relayfs/relay.c b/fs/relayfs/relay.c index 7fbda177ad8f..a9cd5585c45c 100644 --- a/fs/relayfs/relay.c +++ b/fs/relayfs/relay.c @@ -176,7 +176,8 @@ static struct rchan_buf *relay_open_buf(struct rchan *chan, return NULL; /* Create file in fs */ - dentry = relayfs_create_file(filename, parent, S_IRUSR, buf); + dentry = relayfs_create_file(filename, parent, S_IRUSR, + &relayfs_file_operations, buf); if (!dentry) { relay_destroy_buf(buf); return NULL; diff --git a/fs/relayfs/relay.h b/fs/relayfs/relay.h index c325bb243549..0993d3e5753b 100644 --- a/fs/relayfs/relay.h +++ b/fs/relayfs/relay.h @@ -1,10 +1,6 @@ #ifndef _RELAY_H #define _RELAY_H -struct dentry *relayfs_create_file(const char *name, - struct dentry *parent, - int mode, - void *data); extern int relayfs_remove(struct dentry *dentry); extern int relay_buf_empty(struct rchan_buf *buf); extern void relay_destroy_channel(struct kref *kref); diff --git a/include/linux/relayfs_fs.h b/include/linux/relayfs_fs.h index fb7e80737325..a122df2d9880 100644 --- a/include/linux/relayfs_fs.h +++ b/include/linux/relayfs_fs.h @@ -70,7 +70,7 @@ struct rchan struct relayfs_inode_info { struct inode vfs_inode; - struct rchan_buf *buf; + void *data; }; static inline struct relayfs_inode_info *RELAYFS_I(struct inode *inode) @@ -148,6 +148,11 @@ extern size_t relay_switch_subbuf(struct rchan_buf *buf, extern struct dentry *relayfs_create_dir(const char *name, struct dentry *parent); extern int relayfs_remove_dir(struct dentry *dentry); +extern struct dentry *relayfs_create_file(const char *name, + struct dentry *parent, + int mode, + struct file_operations *fops, + void *data); /** * relay_write - write data into the channel -- cgit v1.2.3-71-gd317 From 7431733791feb0b19453d8047b0723c744667040 Mon Sep 17 00:00:00 2001 From: Tom Zanussi Date: Sun, 8 Jan 2006 01:02:25 -0800 Subject: [PATCH] relayfs: add relayfs_remove_file() This patch adds and exports relayfs_remove_file(), for API symmetry (with relayfs_create_file()). Signed-off-by: Tom Zanussi Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- fs/relayfs/inode.c | 12 ++++++++++++ include/linux/relayfs_fs.h | 1 + 2 files changed, 13 insertions(+) (limited to 'include/linux') diff --git a/fs/relayfs/inode.c b/fs/relayfs/inode.c index a5e6d4f2efb9..b2f50655736b 100644 --- a/fs/relayfs/inode.c +++ b/fs/relayfs/inode.c @@ -224,6 +224,17 @@ int relayfs_remove(struct dentry *dentry) return error; } +/** + * relayfs_remove_file - remove a file from relay filesystem + * @dentry: directory dentry + * + * Returns 0 if successful, negative otherwise. + */ +int relayfs_remove_file(struct dentry *dentry) +{ + return relayfs_remove(dentry); +} + /** * relayfs_remove_dir - remove a directory in the relay filesystem * @dentry: directory dentry @@ -600,6 +611,7 @@ EXPORT_SYMBOL_GPL(relayfs_file_operations); EXPORT_SYMBOL_GPL(relayfs_create_dir); EXPORT_SYMBOL_GPL(relayfs_remove_dir); EXPORT_SYMBOL_GPL(relayfs_create_file); +EXPORT_SYMBOL_GPL(relayfs_remove_file); MODULE_AUTHOR("Tom Zanussi and Karim Yaghmour "); MODULE_DESCRIPTION("Relay Filesystem"); diff --git a/include/linux/relayfs_fs.h b/include/linux/relayfs_fs.h index a122df2d9880..921540b1cdf8 100644 --- a/include/linux/relayfs_fs.h +++ b/include/linux/relayfs_fs.h @@ -153,6 +153,7 @@ extern struct dentry *relayfs_create_file(const char *name, int mode, struct file_operations *fops, void *data); +extern int relayfs_remove_file(struct dentry *dentry); /** * relay_write - write data into the channel -- cgit v1.2.3-71-gd317 From aaea25d7a68a7f72e167dc1698b66a15edc71883 Mon Sep 17 00:00:00 2001 From: Tom Zanussi Date: Sun, 8 Jan 2006 01:02:26 -0800 Subject: [PATCH] relayfs: remove unused alloc/destroy_inode() Since we're no longer using relayfs_inode_info, remove relayfs_alloc_inode() and relayfs_destroy_inode() along with the relayfs inode cache. Signed-off-by: Tom Zanussi Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- fs/relayfs/inode.c | 46 +--------------------------------------------- include/linux/relayfs_fs.h | 14 -------------- 2 files changed, 1 insertion(+), 59 deletions(-) (limited to 'include/linux') diff --git a/fs/relayfs/inode.c b/fs/relayfs/inode.c index 7f6d2c8e91c2..b4c3e0466e98 100644 --- a/fs/relayfs/inode.c +++ b/fs/relayfs/inode.c @@ -26,7 +26,6 @@ static struct vfsmount * relayfs_mount; static int relayfs_mount_count; -static kmem_cache_t * relayfs_inode_cachep; static struct backing_dev_info relayfs_backing_dev_info = { .ra_pages = 0, /* No readahead */ @@ -499,34 +498,6 @@ out: return ret; } -/** - * relayfs alloc_inode() implementation - */ -static struct inode *relayfs_alloc_inode(struct super_block *sb) -{ - struct relayfs_inode_info *p = kmem_cache_alloc(relayfs_inode_cachep, SLAB_KERNEL); - if (!p) - return NULL; - p->data = NULL; - - return &p->vfs_inode; -} - -/** - * relayfs destroy_inode() implementation - */ -static void relayfs_destroy_inode(struct inode *inode) -{ - kmem_cache_free(relayfs_inode_cachep, RELAYFS_I(inode)); -} - -static void init_once(void *p, kmem_cache_t *cachep, unsigned long flags) -{ - struct relayfs_inode_info *i = p; - if ((flags & (SLAB_CTOR_VERIFY | SLAB_CTOR_CONSTRUCTOR)) == SLAB_CTOR_CONSTRUCTOR) - inode_init_once(&i->vfs_inode); -} - struct file_operations relayfs_file_operations = { .open = relayfs_open, .poll = relayfs_poll, @@ -539,8 +510,6 @@ struct file_operations relayfs_file_operations = { static struct super_operations relayfs_ops = { .statfs = simple_statfs, .drop_inode = generic_delete_inode, - .alloc_inode = relayfs_alloc_inode, - .destroy_inode = relayfs_destroy_inode, }; static int relayfs_fill_super(struct super_block * sb, void * data, int silent) @@ -584,25 +553,12 @@ static struct file_system_type relayfs_fs_type = { static int __init init_relayfs_fs(void) { - int err; - - relayfs_inode_cachep = kmem_cache_create("relayfs_inode_cache", - sizeof(struct relayfs_inode_info), 0, - 0, init_once, NULL); - if (!relayfs_inode_cachep) - return -ENOMEM; - - err = register_filesystem(&relayfs_fs_type); - if (err) - kmem_cache_destroy(relayfs_inode_cachep); - - return err; + return register_filesystem(&relayfs_fs_type); } static void __exit exit_relayfs_fs(void) { unregister_filesystem(&relayfs_fs_type); - kmem_cache_destroy(relayfs_inode_cachep); } module_init(init_relayfs_fs) diff --git a/include/linux/relayfs_fs.h b/include/linux/relayfs_fs.h index 921540b1cdf8..8200ecbe6e0f 100644 --- a/include/linux/relayfs_fs.h +++ b/include/linux/relayfs_fs.h @@ -64,20 +64,6 @@ struct rchan struct rchan_buf *buf[NR_CPUS]; /* per-cpu channel buffers */ }; -/* - * Relayfs inode - */ -struct relayfs_inode_info -{ - struct inode vfs_inode; - void *data; -}; - -static inline struct relayfs_inode_info *RELAYFS_I(struct inode *inode) -{ - return container_of(inode, struct relayfs_inode_info, vfs_inode); -} - /* * Relay channel client callbacks */ -- cgit v1.2.3-71-gd317 From 08c541a7ade230883c48225f4ea406a0117e7c2f Mon Sep 17 00:00:00 2001 From: Tom Zanussi Date: Sun, 8 Jan 2006 01:02:28 -0800 Subject: [PATCH] relayfs: add support for relay files in other filesystems This patch adds a couple of callback functions that allow a client to hook into relay_open()/close() and supply the files that will be used to represent the channel buffers; the default implementation if no callbacks are defined is to create the files in relayfs. This is to support the creation and use of relay files in other filesystems such as debugfs, as implied by the fact that relayfs_file_operations are exported. Signed-off-by: Tom Zanussi Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- fs/relayfs/buffers.c | 2 +- fs/relayfs/relay.c | 30 ++++++++++++++++++++++++++++-- include/linux/relayfs_fs.h | 34 ++++++++++++++++++++++++++++++++++ 3 files changed, 63 insertions(+), 3 deletions(-) (limited to 'include/linux') diff --git a/fs/relayfs/buffers.c b/fs/relayfs/buffers.c index 667b529944c5..10187812771e 100644 --- a/fs/relayfs/buffers.c +++ b/fs/relayfs/buffers.c @@ -185,6 +185,6 @@ void relay_destroy_buf(struct rchan_buf *buf) void relay_remove_buf(struct kref *kref) { struct rchan_buf *buf = container_of(kref, struct rchan_buf, kref); - relayfs_remove(buf->dentry); + buf->chan->cb->remove_buf_file(buf->dentry); relay_destroy_buf(buf); } diff --git a/fs/relayfs/relay.c b/fs/relayfs/relay.c index a9cd5585c45c..b9bb56903272 100644 --- a/fs/relayfs/relay.c +++ b/fs/relayfs/relay.c @@ -80,11 +80,33 @@ static void buf_unmapped_default_callback(struct rchan_buf *buf, { } +/* + * create_buf_file_create() default callback. Creates file to represent buf. + */ +static struct dentry *create_buf_file_default_callback(const char *filename, + struct dentry *parent, + int mode, + struct rchan_buf *buf) +{ + return relayfs_create_file(filename, parent, mode, + &relayfs_file_operations, buf); +} + +/* + * remove_buf_file() default callback. Removes file representing relay buffer. + */ +static int remove_buf_file_default_callback(struct dentry *dentry) +{ + return relayfs_remove(dentry); +} + /* relay channel default callbacks */ static struct rchan_callbacks default_channel_callbacks = { .subbuf_start = subbuf_start_default_callback, .buf_mapped = buf_mapped_default_callback, .buf_unmapped = buf_unmapped_default_callback, + .create_buf_file = create_buf_file_default_callback, + .remove_buf_file = remove_buf_file_default_callback, }; /** @@ -176,8 +198,8 @@ static struct rchan_buf *relay_open_buf(struct rchan *chan, return NULL; /* Create file in fs */ - dentry = relayfs_create_file(filename, parent, S_IRUSR, - &relayfs_file_operations, buf); + dentry = chan->cb->create_buf_file(filename, parent, S_IRUSR, + buf); if (!dentry) { relay_destroy_buf(buf); return NULL; @@ -220,6 +242,10 @@ static inline void setup_callbacks(struct rchan *chan, cb->buf_mapped = buf_mapped_default_callback; if (!cb->buf_unmapped) cb->buf_unmapped = buf_unmapped_default_callback; + if (!cb->create_buf_file) + cb->create_buf_file = create_buf_file_default_callback; + if (!cb->remove_buf_file) + cb->remove_buf_file = remove_buf_file_default_callback; chan->cb = cb; } diff --git a/include/linux/relayfs_fs.h b/include/linux/relayfs_fs.h index 8200ecbe6e0f..8c2177105857 100644 --- a/include/linux/relayfs_fs.h +++ b/include/linux/relayfs_fs.h @@ -110,6 +110,40 @@ struct rchan_callbacks */ void (*buf_unmapped)(struct rchan_buf *buf, struct file *filp); + /* + * create_buf_file - create file to represent a relayfs channel buffer + * @filename: the name of the file to create + * @parent: the parent of the file to create + * @mode: the mode of the file to create + * @buf: the channel buffer + * + * Called during relay_open(), once for each per-cpu buffer, + * to allow the client to create a file to be used to + * represent the corresponding channel buffer. If the file is + * created outside of relayfs, the parent must also exist in + * that filesystem. + * + * The callback should return the dentry of the file created + * to represent the relay buffer. + * + * See Documentation/filesystems/relayfs.txt for more info. + */ + struct dentry *(*create_buf_file)(const char *filename, + struct dentry *parent, + int mode, + struct rchan_buf *buf); + + /* + * remove_buf_file - remove file representing a relayfs channel buffer + * @dentry: the dentry of the file to remove + * + * Called during relay_close(), once for each per-cpu buffer, + * to allow the client to remove a file used to represent a + * channel buffer. + * + * The callback should return 0 if successful, negative if not. + */ + int (*remove_buf_file)(struct dentry *dentry); }; /* -- cgit v1.2.3-71-gd317 From e6c08367b8fc6dce6dfd1106f53f6ef28215b313 Mon Sep 17 00:00:00 2001 From: Tom Zanussi Date: Sun, 8 Jan 2006 01:02:29 -0800 Subject: [PATCH] relayfs: add support for global relay buffers This patch adds the optional is_global outparam to the create_buf_file() callback. This can be used by clients to create a single global relayfs buffer instead of the default per-cpu buffers. This was suggested as being useful for certain debugging applications where it's more convenient to be able to get all the data from a single channel without having to go to the bother of dealing with per-cpu files. Signed-off-by: Tom Zanussi Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- fs/relayfs/relay.c | 35 +++++++++++++++++++++++++---------- include/linux/relayfs_fs.h | 8 +++++++- 2 files changed, 32 insertions(+), 11 deletions(-) (limited to 'include/linux') diff --git a/fs/relayfs/relay.c b/fs/relayfs/relay.c index b9bb56903272..2935a6ab8ffa 100644 --- a/fs/relayfs/relay.c +++ b/fs/relayfs/relay.c @@ -86,7 +86,8 @@ static void buf_unmapped_default_callback(struct rchan_buf *buf, static struct dentry *create_buf_file_default_callback(const char *filename, struct dentry *parent, int mode, - struct rchan_buf *buf) + struct rchan_buf *buf, + int *is_global) { return relayfs_create_file(filename, parent, mode, &relayfs_file_operations, buf); @@ -170,14 +171,16 @@ static inline void __relay_reset(struct rchan_buf *buf, unsigned int init) void relay_reset(struct rchan *chan) { unsigned int i; + struct rchan_buf *prev = NULL; if (!chan) return; for (i = 0; i < NR_CPUS; i++) { - if (!chan->buf[i]) - continue; + if (!chan->buf[i] || chan->buf[i] == prev) + break; __relay_reset(chan->buf[i], 0); + prev = chan->buf[i]; } } @@ -188,18 +191,22 @@ void relay_reset(struct rchan *chan) */ static struct rchan_buf *relay_open_buf(struct rchan *chan, const char *filename, - struct dentry *parent) + struct dentry *parent, + int *is_global) { struct rchan_buf *buf; struct dentry *dentry; + if (*is_global) + return chan->buf[0]; + buf = relay_create_buf(chan); if (!buf) return NULL; /* Create file in fs */ dentry = chan->cb->create_buf_file(filename, parent, S_IRUSR, - buf); + buf, is_global); if (!dentry) { relay_destroy_buf(buf); return NULL; @@ -273,6 +280,7 @@ struct rchan *relay_open(const char *base_filename, unsigned int i; struct rchan *chan; char *tmpname; + int is_global = 0; if (!base_filename) return NULL; @@ -297,7 +305,8 @@ struct rchan *relay_open(const char *base_filename, for_each_online_cpu(i) { sprintf(tmpname, "%s%d", base_filename, i); - chan->buf[i] = relay_open_buf(chan, tmpname, parent); + chan->buf[i] = relay_open_buf(chan, tmpname, parent, + &is_global); chan->buf[i]->cpu = i; if (!chan->buf[i]) goto free_bufs; @@ -311,6 +320,8 @@ free_bufs: if (!chan->buf[i]) break; relay_close_buf(chan->buf[i]); + if (is_global) + break; } kfree(tmpname); @@ -420,14 +431,16 @@ void relay_destroy_channel(struct kref *kref) void relay_close(struct rchan *chan) { unsigned int i; + struct rchan_buf *prev = NULL; if (!chan) return; for (i = 0; i < NR_CPUS; i++) { - if (!chan->buf[i]) - continue; + if (!chan->buf[i] || chan->buf[i] == prev) + break; relay_close_buf(chan->buf[i]); + prev = chan->buf[i]; } if (chan->last_toobig) @@ -447,14 +460,16 @@ void relay_close(struct rchan *chan) void relay_flush(struct rchan *chan) { unsigned int i; + struct rchan_buf *prev = NULL; if (!chan) return; for (i = 0; i < NR_CPUS; i++) { - if (!chan->buf[i]) - continue; + if (!chan->buf[i] || chan->buf[i] == prev) + break; relay_switch_subbuf(chan->buf[i], 0); + prev = chan->buf[i]; } } diff --git a/include/linux/relayfs_fs.h b/include/linux/relayfs_fs.h index 8c2177105857..30f45511b40d 100644 --- a/include/linux/relayfs_fs.h +++ b/include/linux/relayfs_fs.h @@ -116,6 +116,7 @@ struct rchan_callbacks * @parent: the parent of the file to create * @mode: the mode of the file to create * @buf: the channel buffer + * @is_global: outparam - set non-zero if the buffer should be global * * Called during relay_open(), once for each per-cpu buffer, * to allow the client to create a file to be used to @@ -126,12 +127,17 @@ struct rchan_callbacks * The callback should return the dentry of the file created * to represent the relay buffer. * + * Setting the is_global outparam to a non-zero value will + * cause relay_open() to create a single global buffer rather + * than the default set of per-cpu buffers. + * * See Documentation/filesystems/relayfs.txt for more info. */ struct dentry *(*create_buf_file)(const char *filename, struct dentry *parent, int mode, - struct rchan_buf *buf); + struct rchan_buf *buf, + int *is_global); /* * remove_buf_file - remove file representing a relayfs channel buffer -- cgit v1.2.3-71-gd317 From 761da5c88aca34586e5b7295bd8b9be2438906f2 Mon Sep 17 00:00:00 2001 From: Tom Zanussi Date: Sun, 8 Jan 2006 01:02:31 -0800 Subject: [PATCH] relayfs: cleanup, change relayfs_file_* to relay_file_* This patch renames relayfs_file_operations to relay_file_operations, and the file operations themselves from relayfs_XXX to relay_file_XXX, to make it more clear that they refer to relay files. Signed-off-by: Tom Zanussi Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- fs/relayfs/inode.c | 89 ++++++++++++++++++++++++---------------------- fs/relayfs/relay.c | 2 +- include/linux/relayfs_fs.h | 5 ++- 3 files changed, 50 insertions(+), 46 deletions(-) (limited to 'include/linux') diff --git a/fs/relayfs/inode.c b/fs/relayfs/inode.c index b4c3e0466e98..7b7f2cb5f0e1 100644 --- a/fs/relayfs/inode.c +++ b/fs/relayfs/inode.c @@ -247,13 +247,13 @@ int relayfs_remove_dir(struct dentry *dentry) } /** - * relayfs_open - open file op for relayfs files + * relay_file_open - open file op for relay files * @inode: the inode * @filp: the file * * Increments the channel buffer refcount. */ -static int relayfs_open(struct inode *inode, struct file *filp) +static int relay_file_open(struct inode *inode, struct file *filp) { struct rchan_buf *buf = inode->u.generic_ip; kref_get(&buf->kref); @@ -263,26 +263,26 @@ static int relayfs_open(struct inode *inode, struct file *filp) } /** - * relayfs_mmap - mmap file op for relayfs files + * relay_file_mmap - mmap file op for relay files * @filp: the file * @vma: the vma describing what to map * * Calls upon relay_mmap_buf to map the file into user space. */ -static int relayfs_mmap(struct file *filp, struct vm_area_struct *vma) +static int relay_file_mmap(struct file *filp, struct vm_area_struct *vma) { struct rchan_buf *buf = filp->private_data; return relay_mmap_buf(buf, vma); } /** - * relayfs_poll - poll file op for relayfs files + * relay_file_poll - poll file op for relay files * @filp: the file * @wait: poll table * * Poll implemention. */ -static unsigned int relayfs_poll(struct file *filp, poll_table *wait) +static unsigned int relay_file_poll(struct file *filp, poll_table *wait) { unsigned int mask = 0; struct rchan_buf *buf = filp->private_data; @@ -300,14 +300,14 @@ static unsigned int relayfs_poll(struct file *filp, poll_table *wait) } /** - * relayfs_release - release file op for relayfs files + * relay_file_release - release file op for relay files * @inode: the inode * @filp: the file * * Decrements the channel refcount, as the filesystem is * no longer using it. */ -static int relayfs_release(struct inode *inode, struct file *filp) +static int relay_file_release(struct inode *inode, struct file *filp) { struct rchan_buf *buf = filp->private_data; kref_put(&buf->kref, relay_remove_buf); @@ -316,11 +316,11 @@ static int relayfs_release(struct inode *inode, struct file *filp) } /** - * relayfs_read_consume - update the consumed count for the buffer + * relay_file_read_consume - update the consumed count for the buffer */ -static void relayfs_read_consume(struct rchan_buf *buf, - size_t read_pos, - size_t bytes_consumed) +static void relay_file_read_consume(struct rchan_buf *buf, + size_t read_pos, + size_t bytes_consumed) { size_t subbuf_size = buf->chan->subbuf_size; size_t n_subbufs = buf->chan->n_subbufs; @@ -343,9 +343,9 @@ static void relayfs_read_consume(struct rchan_buf *buf, } /** - * relayfs_read_avail - boolean, are there unconsumed bytes available? + * relay_file_read_avail - boolean, are there unconsumed bytes available? */ -static int relayfs_read_avail(struct rchan_buf *buf, size_t read_pos) +static int relay_file_read_avail(struct rchan_buf *buf, size_t read_pos) { size_t bytes_produced, bytes_consumed, write_offset; size_t subbuf_size = buf->chan->subbuf_size; @@ -376,16 +376,16 @@ static int relayfs_read_avail(struct rchan_buf *buf, size_t read_pos) if (bytes_produced == bytes_consumed) return 0; - relayfs_read_consume(buf, read_pos, 0); + relay_file_read_consume(buf, read_pos, 0); return 1; } /** - * relayfs_read_subbuf_avail - return bytes available in sub-buffer + * relay_file_read_subbuf_avail - return bytes available in sub-buffer */ -static size_t relayfs_read_subbuf_avail(size_t read_pos, - struct rchan_buf *buf) +static size_t relay_file_read_subbuf_avail(size_t read_pos, + struct rchan_buf *buf) { size_t padding, avail = 0; size_t read_subbuf, read_offset, write_subbuf, write_offset; @@ -407,14 +407,14 @@ static size_t relayfs_read_subbuf_avail(size_t read_pos, } /** - * relayfs_read_start_pos - find the first available byte to read + * relay_file_read_start_pos - find the first available byte to read * * If the read_pos is in the middle of padding, return the * position of the first actually available byte, otherwise * return the original value. */ -static size_t relayfs_read_start_pos(size_t read_pos, - struct rchan_buf *buf) +static size_t relay_file_read_start_pos(size_t read_pos, + struct rchan_buf *buf) { size_t read_subbuf, padding, padding_start, padding_end; size_t subbuf_size = buf->chan->subbuf_size; @@ -433,11 +433,11 @@ static size_t relayfs_read_start_pos(size_t read_pos, } /** - * relayfs_read_end_pos - return the new read position + * relay_file_read_end_pos - return the new read position */ -static size_t relayfs_read_end_pos(struct rchan_buf *buf, - size_t read_pos, - size_t count) +static size_t relay_file_read_end_pos(struct rchan_buf *buf, + size_t read_pos, + size_t count) { size_t read_subbuf, padding, end_pos; size_t subbuf_size = buf->chan->subbuf_size; @@ -456,7 +456,7 @@ static size_t relayfs_read_end_pos(struct rchan_buf *buf, } /** - * relayfs_read - read file op for relayfs files + * relay_file_read - read file op for relay files * @filp: the file * @buffer: the userspace buffer * @count: number of bytes to read @@ -465,10 +465,10 @@ static size_t relayfs_read_end_pos(struct rchan_buf *buf, * Reads count bytes or the number of bytes available in the * current sub-buffer being read, whichever is smaller. */ -static ssize_t relayfs_read(struct file *filp, - char __user *buffer, - size_t count, - loff_t *ppos) +static ssize_t relay_file_read(struct file *filp, + char __user *buffer, + size_t count, + loff_t *ppos) { struct rchan_buf *buf = filp->private_data; struct inode *inode = filp->f_dentry->d_inode; @@ -477,11 +477,11 @@ static ssize_t relayfs_read(struct file *filp, void *from; down(&inode->i_sem); - if(!relayfs_read_avail(buf, *ppos)) + if(!relay_file_read_avail(buf, *ppos)) goto out; - read_start = relayfs_read_start_pos(*ppos, buf); - avail = relayfs_read_subbuf_avail(read_start, buf); + read_start = relay_file_read_start_pos(*ppos, buf); + avail = relay_file_read_subbuf_avail(read_start, buf); if (!avail) goto out; @@ -491,20 +491,20 @@ static ssize_t relayfs_read(struct file *filp, ret = -EFAULT; goto out; } - relayfs_read_consume(buf, read_start, count); - *ppos = relayfs_read_end_pos(buf, read_start, count); + relay_file_read_consume(buf, read_start, count); + *ppos = relay_file_read_end_pos(buf, read_start, count); out: up(&inode->i_sem); return ret; } -struct file_operations relayfs_file_operations = { - .open = relayfs_open, - .poll = relayfs_poll, - .mmap = relayfs_mmap, - .read = relayfs_read, +struct file_operations relay_file_operations = { + .open = relay_file_open, + .poll = relay_file_poll, + .mmap = relay_file_mmap, + .read = relay_file_read, .llseek = no_llseek, - .release = relayfs_release, + .release = relay_file_release, }; static struct super_operations relayfs_ops = { @@ -558,13 +558,18 @@ static int __init init_relayfs_fs(void) static void __exit exit_relayfs_fs(void) { + + + + + unregister_filesystem(&relayfs_fs_type); } module_init(init_relayfs_fs) module_exit(exit_relayfs_fs) -EXPORT_SYMBOL_GPL(relayfs_file_operations); +EXPORT_SYMBOL_GPL(relay_file_operations); EXPORT_SYMBOL_GPL(relayfs_create_dir); EXPORT_SYMBOL_GPL(relayfs_remove_dir); EXPORT_SYMBOL_GPL(relayfs_create_file); diff --git a/fs/relayfs/relay.c b/fs/relayfs/relay.c index 2935a6ab8ffa..abf3ceaace49 100644 --- a/fs/relayfs/relay.c +++ b/fs/relayfs/relay.c @@ -90,7 +90,7 @@ static struct dentry *create_buf_file_default_callback(const char *filename, int *is_global) { return relayfs_create_file(filename, parent, mode, - &relayfs_file_operations, buf); + &relay_file_operations, buf); } /* diff --git a/include/linux/relayfs_fs.h b/include/linux/relayfs_fs.h index 30f45511b40d..7342e66247fb 100644 --- a/include/linux/relayfs_fs.h +++ b/include/linux/relayfs_fs.h @@ -279,10 +279,9 @@ static inline void subbuf_start_reserve(struct rchan_buf *buf, } /* - * exported relayfs file operations, fs/relayfs/inode.c + * exported relay file operations, fs/relayfs/inode.c */ - -extern struct file_operations relayfs_file_operations; +extern struct file_operations relay_file_operations; #endif /* _LINUX_RELAYFS_FS_H */ -- cgit v1.2.3-71-gd317 From 6b9c7ed84837753a436415097063232422e29a35 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Sun, 8 Jan 2006 01:02:33 -0800 Subject: [PATCH] use ptrace_get_task_struct in various places The ptrace_get_task_struct() helper that I added as part of the ptrace consolidation is useful in variety of places that currently opencode it. Switch them to the common helpers. Add a ptrace_traceme() helper that needs to be explicitly called, and simplify the ptrace_get_task_struct() interface. We don't need the request argument now, and we return the task_struct directly, using ERR_PTR() for error returns. It's a bit more code in the callers, but we have two sane routines that do one thing well now. Signed-off-by: Christoph Hellwig Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- arch/alpha/kernel/ptrace.c | 24 +++---------- arch/ia64/ia32/sys_ia32.c | 16 +++------ arch/ia64/kernel/ptrace.c | 9 +---- arch/m32r/kernel/ptrace.c | 22 +++--------- arch/mips/kernel/ptrace32.c | 26 ++++---------- arch/powerpc/kernel/ptrace32.c | 28 ++++----------- arch/s390/kernel/ptrace.c | 29 ++++------------ arch/sparc/kernel/ptrace.c | 35 ++++--------------- arch/sparc64/kernel/ptrace.c | 34 +++---------------- arch/x86_64/ia32/ptrace32.c | 44 ++++++------------------ include/linux/ptrace.h | 2 ++ kernel/ptrace.c | 77 +++++++++++++++++++++++++----------------- 12 files changed, 105 insertions(+), 241 deletions(-) (limited to 'include/linux') diff --git a/arch/alpha/kernel/ptrace.c b/arch/alpha/kernel/ptrace.c index bbd37536d14e..9969d212e94d 100644 --- a/arch/alpha/kernel/ptrace.c +++ b/arch/alpha/kernel/ptrace.c @@ -265,30 +265,16 @@ do_sys_ptrace(long request, long pid, long addr, long data, lock_kernel(); DBG(DBG_MEM, ("request=%ld pid=%ld addr=0x%lx data=0x%lx\n", request, pid, addr, data)); - ret = -EPERM; if (request == PTRACE_TRACEME) { - /* are we already being traced? */ - if (current->ptrace & PT_PTRACED) - goto out_notsk; - ret = security_ptrace(current->parent, current); - if (ret) - goto out_notsk; - /* set the ptrace bit in the process ptrace flags. */ - current->ptrace |= PT_PTRACED; - ret = 0; + ret = ptrace_traceme(); goto out_notsk; } - if (pid == 1) /* you may not mess with init */ - goto out_notsk; - ret = -ESRCH; - read_lock(&tasklist_lock); - child = find_task_by_pid(pid); - if (child) - get_task_struct(child); - read_unlock(&tasklist_lock); - if (!child) + child = ptrace_get_task_struct(pid); + if (IS_ERR(child)) { + ret = PTR_ERR(child); goto out_notsk; + } if (request == PTRACE_ATTACH) { ret = ptrace_attach(child); diff --git a/arch/ia64/ia32/sys_ia32.c b/arch/ia64/ia32/sys_ia32.c index dc282710421a..9f8e8d558873 100644 --- a/arch/ia64/ia32/sys_ia32.c +++ b/arch/ia64/ia32/sys_ia32.c @@ -1761,21 +1761,15 @@ sys32_ptrace (int request, pid_t pid, unsigned int addr, unsigned int data) lock_kernel(); if (request == PTRACE_TRACEME) { - ret = sys_ptrace(request, pid, addr, data); + ret = ptrace_traceme(); goto out; } - ret = -ESRCH; - read_lock(&tasklist_lock); - child = find_task_by_pid(pid); - if (child) - get_task_struct(child); - read_unlock(&tasklist_lock); - if (!child) + child = ptrace_get_task_struct(pid); + if (IS_ERR(child)) { + ret = PTR_ERR(child); goto out; - ret = -EPERM; - if (pid == 1) /* no messing around with init! */ - goto out_tsk; + } if (request == PTRACE_ATTACH) { ret = sys_ptrace(request, pid, addr, data); diff --git a/arch/ia64/kernel/ptrace.c b/arch/ia64/kernel/ptrace.c index 4b19d0410632..8d88eeea02d1 100644 --- a/arch/ia64/kernel/ptrace.c +++ b/arch/ia64/kernel/ptrace.c @@ -1422,14 +1422,7 @@ sys_ptrace (long request, pid_t pid, unsigned long addr, unsigned long data) lock_kernel(); ret = -EPERM; if (request == PTRACE_TRACEME) { - /* are we already being traced? */ - if (current->ptrace & PT_PTRACED) - goto out; - ret = security_ptrace(current->parent, current); - if (ret) - goto out; - current->ptrace |= PT_PTRACED; - ret = 0; + ret = ptrace_traceme(); goto out; } diff --git a/arch/m32r/kernel/ptrace.c b/arch/m32r/kernel/ptrace.c index 078d2a0e71c2..9b75caaf5cec 100644 --- a/arch/m32r/kernel/ptrace.c +++ b/arch/m32r/kernel/ptrace.c @@ -762,28 +762,16 @@ asmlinkage long sys_ptrace(long request, long pid, long addr, long data) int ret; lock_kernel(); - ret = -EPERM; if (request == PTRACE_TRACEME) { - /* are we already being traced? */ - if (current->ptrace & PT_PTRACED) - goto out; - /* set the ptrace bit in the process flags. */ - current->ptrace |= PT_PTRACED; - ret = 0; + ret = ptrace_traceme(); goto out; } - ret = -ESRCH; - read_lock(&tasklist_lock); - child = find_task_by_pid(pid); - if (child) - get_task_struct(child); - read_unlock(&tasklist_lock); - if (!child) - goto out; - ret = -EPERM; - if (pid == 1) /* you may not mess with init */ + child = ptrace_get_task_struct(pid); + if (IS_ERR(child)) { + ret = PTR_ERR(child); goto out; + } if (request == PTRACE_ATTACH) { ret = ptrace_attach(child); diff --git a/arch/mips/kernel/ptrace32.c b/arch/mips/kernel/ptrace32.c index 9a9b04972132..7e55457a491f 100644 --- a/arch/mips/kernel/ptrace32.c +++ b/arch/mips/kernel/ptrace32.c @@ -57,30 +57,16 @@ asmlinkage int sys32_ptrace(int request, int pid, int addr, int data) (unsigned long) data); #endif lock_kernel(); - ret = -EPERM; if (request == PTRACE_TRACEME) { - /* are we already being traced? */ - if (current->ptrace & PT_PTRACED) - goto out; - if ((ret = security_ptrace(current->parent, current))) - goto out; - /* set the ptrace bit in the process flags. */ - current->ptrace |= PT_PTRACED; - ret = 0; + ret = ptrace_traceme(); goto out; } - ret = -ESRCH; - read_lock(&tasklist_lock); - child = find_task_by_pid(pid); - if (child) - get_task_struct(child); - read_unlock(&tasklist_lock); - if (!child) - goto out; - ret = -EPERM; - if (pid == 1) /* you may not mess with init */ - goto out_tsk; + child = ptrace_get_task_struct(pid); + if (IS_ERR(child)) { + ret = PTR_ERR(child); + goto out; + } if (request == PTRACE_ATTACH) { ret = ptrace_attach(child); diff --git a/arch/powerpc/kernel/ptrace32.c b/arch/powerpc/kernel/ptrace32.c index 61762640b877..826ee3d056de 100644 --- a/arch/powerpc/kernel/ptrace32.c +++ b/arch/powerpc/kernel/ptrace32.c @@ -45,33 +45,19 @@ long compat_sys_ptrace(int request, int pid, unsigned long addr, unsigned long data) { struct task_struct *child; - int ret = -EPERM; + int ret; lock_kernel(); if (request == PTRACE_TRACEME) { - /* are we already being traced? */ - if (current->ptrace & PT_PTRACED) - goto out; - ret = security_ptrace(current->parent, current); - if (ret) - goto out; - /* set the ptrace bit in the process flags. */ - current->ptrace |= PT_PTRACED; - ret = 0; + ret = ptrace_traceme(); goto out; } - ret = -ESRCH; - read_lock(&tasklist_lock); - child = find_task_by_pid(pid); - if (child) - get_task_struct(child); - read_unlock(&tasklist_lock); - if (!child) - goto out; - ret = -EPERM; - if (pid == 1) /* you may not mess with init */ - goto out_tsk; + child = ptrace_get_task_struct(pid); + if (IS_ERR(child)) { + ret = PTR_ERR(child); + goto out; + } if (request == PTRACE_ATTACH) { ret = ptrace_attach(child); diff --git a/arch/s390/kernel/ptrace.c b/arch/s390/kernel/ptrace.c index 8ecda6d66de4..cc02232aa96e 100644 --- a/arch/s390/kernel/ptrace.c +++ b/arch/s390/kernel/ptrace.c @@ -712,35 +712,18 @@ sys_ptrace(long request, long pid, long addr, long data) int ret; lock_kernel(); - if (request == PTRACE_TRACEME) { - /* are we already being traced? */ - ret = -EPERM; - if (current->ptrace & PT_PTRACED) - goto out; - ret = security_ptrace(current->parent, current); - if (ret) - goto out; - /* set the ptrace bit in the process flags. */ - current->ptrace |= PT_PTRACED; - goto out; + ret = ptrace_traceme(); + goto out; } - ret = -EPERM; - if (pid == 1) /* you may not mess with init */ - goto out; - - ret = -ESRCH; - read_lock(&tasklist_lock); - child = find_task_by_pid(pid); - if (child) - get_task_struct(child); - read_unlock(&tasklist_lock); - if (!child) + child = ptrace_get_task_struct(pid); + if (IS_ERR(child)) { + ret = PTR_ERR(child); goto out; + } ret = do_ptrace(child, request, addr, data); - put_task_struct(child); out: unlock_kernel(); diff --git a/arch/sparc/kernel/ptrace.c b/arch/sparc/kernel/ptrace.c index 475c4c13462c..fc470c0e9dc6 100644 --- a/arch/sparc/kernel/ptrace.c +++ b/arch/sparc/kernel/ptrace.c @@ -286,40 +286,17 @@ asmlinkage void do_ptrace(struct pt_regs *regs) s, (int) request, (int) pid, addr, data, addr2); } #endif - if (request == PTRACE_TRACEME) { - int my_ret; - - /* are we already being traced? */ - if (current->ptrace & PT_PTRACED) { - pt_error_return(regs, EPERM); - goto out; - } - my_ret = security_ptrace(current->parent, current); - if (my_ret) { - pt_error_return(regs, -my_ret); - goto out; - } - /* set the ptrace bit in the process flags. */ - current->ptrace |= PT_PTRACED; + if (request == PTRACE_TRACEME) { + ret = ptrace_traceme(); pt_succ_return(regs, 0); goto out; } -#ifndef ALLOW_INIT_TRACING - if (pid == 1) { - /* Can't dork with init. */ - pt_error_return(regs, EPERM); - goto out; - } -#endif - read_lock(&tasklist_lock); - child = find_task_by_pid(pid); - if (child) - get_task_struct(child); - read_unlock(&tasklist_lock); - if (!child) { - pt_error_return(regs, ESRCH); + child = ptrace_get_task_struct(pid); + if (IS_ERR(child)) { + ret = PTR_ERR(child); + pt_error_return(regs, -ret); goto out; } diff --git a/arch/sparc64/kernel/ptrace.c b/arch/sparc64/kernel/ptrace.c index 774ecbb8a031..84d3df2264cb 100644 --- a/arch/sparc64/kernel/ptrace.c +++ b/arch/sparc64/kernel/ptrace.c @@ -198,39 +198,15 @@ asmlinkage void do_ptrace(struct pt_regs *regs) } #endif if (request == PTRACE_TRACEME) { - int ret; - - /* are we already being traced? */ - if (current->ptrace & PT_PTRACED) { - pt_error_return(regs, EPERM); - goto out; - } - ret = security_ptrace(current->parent, current); - if (ret) { - pt_error_return(regs, -ret); - goto out; - } - - /* set the ptrace bit in the process flags. */ - current->ptrace |= PT_PTRACED; + ret = ptrace_traceme(); pt_succ_return(regs, 0); goto out; } -#ifndef ALLOW_INIT_TRACING - if (pid == 1) { - /* Can't dork with init. */ - pt_error_return(regs, EPERM); - goto out; - } -#endif - read_lock(&tasklist_lock); - child = find_task_by_pid(pid); - if (child) - get_task_struct(child); - read_unlock(&tasklist_lock); - if (!child) { - pt_error_return(regs, ESRCH); + child = ptrace_get_task_struct(pid); + if (IS_ERR(child)) { + ret = PTR_ERR(child); + pt_error_return(regs, -ret); goto out; } diff --git a/arch/x86_64/ia32/ptrace32.c b/arch/x86_64/ia32/ptrace32.c index 2a925e2af390..5f4cdfa56901 100644 --- a/arch/x86_64/ia32/ptrace32.c +++ b/arch/x86_64/ia32/ptrace32.c @@ -196,36 +196,6 @@ static int getreg32(struct task_struct *child, unsigned regno, u32 *val) #undef R32 -static struct task_struct *find_target(int request, int pid, int *err) -{ - struct task_struct *child; - - *err = -EPERM; - if (pid == 1) - return NULL; - - *err = -ESRCH; - read_lock(&tasklist_lock); - child = find_task_by_pid(pid); - if (child) - get_task_struct(child); - read_unlock(&tasklist_lock); - if (child) { - *err = -EPERM; - if (child->pid == 1) - goto out; - *err = ptrace_check_attach(child, request == PTRACE_KILL); - if (*err < 0) - goto out; - return child; - } - out: - if (child) - put_task_struct(child); - return NULL; - -} - asmlinkage long sys32_ptrace(long request, u32 pid, u32 addr, u32 data) { struct task_struct *child; @@ -254,9 +224,16 @@ asmlinkage long sys32_ptrace(long request, u32 pid, u32 addr, u32 data) break; } - child = find_target(request, pid, &ret); - if (!child) - return ret; + if (request == PTRACE_TRACEME) + return ptrace_traceme(); + + child = ptrace_get_task_struct(pid); + if (IS_ERR(child)) + return PTR_ERR(child); + + ret = ptrace_check_attach(child, request == PTRACE_KILL); + if (ret < 0) + goto out; childregs = (struct pt_regs *)(child->thread.rsp0 - sizeof(struct pt_regs)); @@ -373,6 +350,7 @@ asmlinkage long sys32_ptrace(long request, u32 pid, u32 addr, u32 data) break; } + out: put_task_struct(child); return ret; } diff --git a/include/linux/ptrace.h b/include/linux/ptrace.h index b2b3dba1298d..864791996b5f 100644 --- a/include/linux/ptrace.h +++ b/include/linux/ptrace.h @@ -80,6 +80,8 @@ extern long arch_ptrace(struct task_struct *child, long request, long addr, long data); +extern struct task_struct *ptrace_get_task_struct(pid_t pid); +extern int ptrace_traceme(void); extern int ptrace_readdata(struct task_struct *tsk, unsigned long src, char __user *dst, int len); extern int ptrace_writedata(struct task_struct *tsk, char __user *src, unsigned long dst, int len); extern int ptrace_attach(struct task_struct *tsk); diff --git a/kernel/ptrace.c b/kernel/ptrace.c index 656476eedb1b..cceaf09ac413 100644 --- a/kernel/ptrace.c +++ b/kernel/ptrace.c @@ -408,54 +408,62 @@ int ptrace_request(struct task_struct *child, long request, return ret; } -#ifndef __ARCH_SYS_PTRACE -static int ptrace_get_task_struct(long request, long pid, - struct task_struct **childp) +/** + * ptrace_traceme -- helper for PTRACE_TRACEME + * + * Performs checks and sets PT_PTRACED. + * Should be used by all ptrace implementations for PTRACE_TRACEME. + */ +int ptrace_traceme(void) { - struct task_struct *child; int ret; /* - * Callers use child == NULL as an indication to exit early even - * when the return value is 0, so make sure it is non-NULL here. + * Are we already being traced? + */ + if (current->ptrace & PT_PTRACED) + return -EPERM; + ret = security_ptrace(current->parent, current); + if (ret) + return -EPERM; + /* + * Set the ptrace bit in the process ptrace flags. */ - *childp = NULL; + current->ptrace |= PT_PTRACED; + return 0; +} - if (request == PTRACE_TRACEME) { - /* - * Are we already being traced? - */ - if (current->ptrace & PT_PTRACED) - return -EPERM; - ret = security_ptrace(current->parent, current); - if (ret) - return -EPERM; - /* - * Set the ptrace bit in the process ptrace flags. - */ - current->ptrace |= PT_PTRACED; - return 0; - } +/** + * ptrace_get_task_struct -- grab a task struct reference for ptrace + * @pid: process id to grab a task_struct reference of + * + * This function is a helper for ptrace implementations. It checks + * permissions and then grabs a task struct for use of the actual + * ptrace implementation. + * + * Returns the task_struct for @pid or an ERR_PTR() on failure. + */ +struct task_struct *ptrace_get_task_struct(pid_t pid) +{ + struct task_struct *child; /* - * You may not mess with init + * Tracing init is not allowed. */ if (pid == 1) - return -EPERM; + return ERR_PTR(-EPERM); - ret = -ESRCH; read_lock(&tasklist_lock); child = find_task_by_pid(pid); if (child) get_task_struct(child); read_unlock(&tasklist_lock); if (!child) - return -ESRCH; - - *childp = child; - return 0; + return ERR_PTR(-ESRCH); + return child; } +#ifndef __ARCH_SYS_PTRACE asmlinkage long sys_ptrace(long request, long pid, long addr, long data) { struct task_struct *child; @@ -465,9 +473,16 @@ asmlinkage long sys_ptrace(long request, long pid, long addr, long data) * This lock_kernel fixes a subtle race with suid exec */ lock_kernel(); - ret = ptrace_get_task_struct(request, pid, &child); - if (!child) + if (request == PTRACE_TRACEME) { + ret = ptrace_traceme(); goto out; + } + + child = ptrace_get_task_struct(pid); + if (IS_ERR(child)) { + ret = PTR_ERR(child); + goto out; + } if (request == PTRACE_ATTACH) { ret = ptrace_attach(child); -- cgit v1.2.3-71-gd317 From 788540141f4549637e89aadca6e25cf25eb53383 Mon Sep 17 00:00:00 2001 From: David Howells Date: Sun, 8 Jan 2006 01:02:37 -0800 Subject: [PATCH] Permit multiple inclusion of linux/pagevec.h Make it possible to include linux/pagevec.h multiple times without incurring errors due to duplicate definitions. Signed-off-by: David Howells Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/pagevec.h | 5 +++++ 1 file changed, 5 insertions(+) (limited to 'include/linux') diff --git a/include/linux/pagevec.h b/include/linux/pagevec.h index def32c5715be..8eb7fa76c1d0 100644 --- a/include/linux/pagevec.h +++ b/include/linux/pagevec.h @@ -5,6 +5,9 @@ * pages. A pagevec is a multipage container which is used for that. */ +#ifndef _LINUX_PAGEVEC_H +#define _LINUX_PAGEVEC_H + /* 14 pointers + two long's align the pagevec structure to a power of two */ #define PAGEVEC_SIZE 14 @@ -83,3 +86,5 @@ static inline void pagevec_lru_add(struct pagevec *pvec) if (pagevec_count(pvec)) __pagevec_lru_add(pvec); } + +#endif /* _LINUX_PAGEVEC_H */ -- cgit v1.2.3-71-gd317 From 4a30131e7dbb17e5fec6958bfac9da9aff1fa29b Mon Sep 17 00:00:00 2001 From: NeilBrown Date: Sun, 8 Jan 2006 01:02:39 -0800 Subject: [PATCH] Fix some problems with truncate and mtime semantics. SUS requires that when truncating a file to the size that it currently is: truncate and ftruncate should NOT modify ctime or mtime O_TRUNC SHOULD modify ctime and mtime. Currently mtime and ctime are always modified on most local filesystems (side effect of ->truncate) or never modified (on NFS). With this patch: ATTR_CTIME|ATTR_MTIME are sent with ATTR_SIZE precisely when an update of these times is required whether size changes or not (via a new argument to do_truncate). This allows NFS to do the right thing for O_TRUNC. inode_setattr nolonger forces ATTR_MTIME|ATTR_CTIME when the ATTR_SIZE sets the size to it's current value. This allows local filesystems to do the right thing for f?truncate. Also, the logic in inode_setattr is changed a bit so there are two return points. One returns the error from vmtruncate if it failed, the other returns 0 (there can be no other failure). Finally, if vmtruncate succeeds, and ATTR_SIZE is the only change requested, we now fall-through and mark_inode_dirty. If a filesystem did not have a ->truncate function, then vmtruncate will have changed i_size, without marking the inode as 'dirty', and I think this is wrong. Signed-off-by: Neil Brown Cc: Christoph Hellwig Cc: Trond Myklebust Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- fs/attr.c | 24 ++++++++---------------- fs/exec.c | 2 +- fs/namei.c | 2 +- fs/open.c | 9 +++++---- include/linux/fs.h | 3 ++- 5 files changed, 17 insertions(+), 23 deletions(-) (limited to 'include/linux') diff --git a/fs/attr.c b/fs/attr.c index 67bcd9b14ea5..b34732506f1d 100644 --- a/fs/attr.c +++ b/fs/attr.c @@ -67,20 +67,12 @@ EXPORT_SYMBOL(inode_change_ok); int inode_setattr(struct inode * inode, struct iattr * attr) { unsigned int ia_valid = attr->ia_valid; - int error = 0; - - if (ia_valid & ATTR_SIZE) { - if (attr->ia_size != i_size_read(inode)) { - error = vmtruncate(inode, attr->ia_size); - if (error || (ia_valid == ATTR_SIZE)) - goto out; - } else { - /* - * We skipped the truncate but must still update - * timestamps - */ - ia_valid |= ATTR_MTIME|ATTR_CTIME; - } + + if (ia_valid & ATTR_SIZE && + attr->ia_size != i_size_read(inode)) { + int error = vmtruncate(inode, attr->ia_size); + if (error) + return error; } if (ia_valid & ATTR_UID) @@ -104,8 +96,8 @@ int inode_setattr(struct inode * inode, struct iattr * attr) inode->i_mode = mode; } mark_inode_dirty(inode); -out: - return error; + + return 0; } EXPORT_SYMBOL(inode_setattr); diff --git a/fs/exec.c b/fs/exec.c index e9650cd22a3b..2075b674d85e 100644 --- a/fs/exec.c +++ b/fs/exec.c @@ -1505,7 +1505,7 @@ int do_coredump(long signr, int exit_code, struct pt_regs * regs) goto close_fail; if (!file->f_op->write) goto close_fail; - if (do_truncate(file->f_dentry, 0, file) != 0) + if (do_truncate(file->f_dentry, 0, 0, file) != 0) goto close_fail; retval = binfmt->core_dump(signr, regs, file); diff --git a/fs/namei.c b/fs/namei.c index 6dbbd42d8b95..300eae088d5f 100644 --- a/fs/namei.c +++ b/fs/namei.c @@ -1491,7 +1491,7 @@ int may_open(struct nameidata *nd, int acc_mode, int flag) if (!error) { DQUOT_INIT(inode); - error = do_truncate(dentry, 0, NULL); + error = do_truncate(dentry, 0, ATTR_MTIME|ATTR_CTIME, NULL); } put_write_access(inode); if (error) diff --git a/fs/open.c b/fs/open.c index f53a5b9ffb7d..94968cb3afca 100644 --- a/fs/open.c +++ b/fs/open.c @@ -194,7 +194,8 @@ out: return error; } -int do_truncate(struct dentry *dentry, loff_t length, struct file *filp) +int do_truncate(struct dentry *dentry, loff_t length, unsigned int time_attrs, + struct file *filp) { int err; struct iattr newattrs; @@ -204,7 +205,7 @@ int do_truncate(struct dentry *dentry, loff_t length, struct file *filp) return -EINVAL; newattrs.ia_size = length; - newattrs.ia_valid = ATTR_SIZE | ATTR_CTIME; + newattrs.ia_valid = ATTR_SIZE | time_attrs; if (filp) { newattrs.ia_file = filp; newattrs.ia_valid |= ATTR_FILE; @@ -266,7 +267,7 @@ static inline long do_sys_truncate(const char __user * path, loff_t length) error = locks_verify_truncate(inode, NULL, length); if (!error) { DQUOT_INIT(inode); - error = do_truncate(nd.dentry, length, NULL); + error = do_truncate(nd.dentry, length, 0, NULL); } put_write_access(inode); @@ -318,7 +319,7 @@ static inline long do_sys_ftruncate(unsigned int fd, loff_t length, int small) error = locks_verify_truncate(inode, file, length); if (!error) - error = do_truncate(dentry, length, file); + error = do_truncate(dentry, length, 0, file); out_putf: fput(file); out: diff --git a/include/linux/fs.h b/include/linux/fs.h index ef29500b5df8..74c01aabd4ab 100644 --- a/include/linux/fs.h +++ b/include/linux/fs.h @@ -1344,7 +1344,8 @@ static inline int break_lease(struct inode *inode, unsigned int mode) /* fs/open.c */ -extern int do_truncate(struct dentry *, loff_t start, struct file *filp); +extern int do_truncate(struct dentry *, loff_t start, unsigned int time_attrs, + struct file *filp); extern long do_sys_open(const char __user *filename, int flags, int mode); extern struct file *filp_open(const char *, int, int); extern struct file * dentry_open(struct dentry *, struct vfsmount *, int); -- cgit v1.2.3-71-gd317 From 017679c4d45783158dba1dd6f79e712c22bb3d9a Mon Sep 17 00:00:00 2001 From: David Howells Date: Sun, 8 Jan 2006 01:02:43 -0800 Subject: [PATCH] keys: Permit key expiry time to be set Add a new keyctl function that allows the expiry time to be set on a key or removed from a key, provided the caller has attribute modification access. Signed-off-by: David Howells Cc: Trond Myklebust Cc: Alexander Zangerl Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- Documentation/keys.txt | 15 ++++++++++++++- include/linux/keyctl.h | 1 + security/keys/compat.c | 3 +++ security/keys/internal.h | 1 + security/keys/keyctl.c | 44 ++++++++++++++++++++++++++++++++++++++++++++ 5 files changed, 63 insertions(+), 1 deletion(-) (limited to 'include/linux') diff --git a/Documentation/keys.txt b/Documentation/keys.txt index 6304db59bfe4..c17c4ca74302 100644 --- a/Documentation/keys.txt +++ b/Documentation/keys.txt @@ -498,7 +498,7 @@ The keyctl syscall functions are: keyring is full, error ENFILE will result. The link procedure checks the nesting of the keyrings, returning ELOOP if - it appears to deep or EDEADLK if the link would introduce a cycle. + it appears too deep or EDEADLK if the link would introduce a cycle. (*) Unlink a key or keyring from another keyring: @@ -628,6 +628,19 @@ The keyctl syscall functions are: there is one, otherwise the user default session keyring. + (*) Set the timeout on a key. + + long keyctl(KEYCTL_SET_TIMEOUT, key_serial_t key, unsigned timeout); + + This sets or clears the timeout on a key. The timeout can be 0 to clear + the timeout or a number of seconds to set the expiry time that far into + the future. + + The process must have attribute modification access on a key to set its + timeout. Timeouts may not be set with this function on negative, revoked + or expired keys. + + =============== KERNEL SERVICES =============== diff --git a/include/linux/keyctl.h b/include/linux/keyctl.h index 8d7c59a29e09..ec8f3d622a8d 100644 --- a/include/linux/keyctl.h +++ b/include/linux/keyctl.h @@ -46,5 +46,6 @@ #define KEYCTL_INSTANTIATE 12 /* instantiate a partially constructed key */ #define KEYCTL_NEGATE 13 /* negate a partially constructed key */ #define KEYCTL_SET_REQKEY_KEYRING 14 /* set default request-key keyring */ +#define KEYCTL_SET_TIMEOUT 15 /* set key timeout */ #endif /* _LINUX_KEYCTL_H */ diff --git a/security/keys/compat.c b/security/keys/compat.c index 3303673c636e..e8e7ef4a290c 100644 --- a/security/keys/compat.c +++ b/security/keys/compat.c @@ -74,6 +74,9 @@ asmlinkage long compat_sys_keyctl(u32 option, case KEYCTL_SET_REQKEY_KEYRING: return keyctl_set_reqkey_keyring(arg2); + case KEYCTL_SET_TIMEOUT: + return keyctl_set_timeout(arg2, arg3); + default: return -EOPNOTSUPP; } diff --git a/security/keys/internal.h b/security/keys/internal.h index 39cba97c5eb9..51f37c0bdb32 100644 --- a/security/keys/internal.h +++ b/security/keys/internal.h @@ -136,6 +136,7 @@ extern long keyctl_instantiate_key(key_serial_t, const void __user *, size_t, key_serial_t); extern long keyctl_negate_key(key_serial_t, unsigned, key_serial_t); extern long keyctl_set_reqkey_keyring(int); +extern long keyctl_set_timeout(key_serial_t, unsigned); /* diff --git a/security/keys/keyctl.c b/security/keys/keyctl.c index b7a468fabdf9..299f0ae11cf0 100644 --- a/security/keys/keyctl.c +++ b/security/keys/keyctl.c @@ -965,6 +965,46 @@ long keyctl_set_reqkey_keyring(int reqkey_defl) } /* end keyctl_set_reqkey_keyring() */ +/*****************************************************************************/ +/* + * set or clear the timeout for a key + */ +long keyctl_set_timeout(key_serial_t id, unsigned timeout) +{ + struct timespec now; + struct key *key; + key_ref_t key_ref; + time_t expiry; + long ret; + + key_ref = lookup_user_key(NULL, id, 1, 1, KEY_SETATTR); + if (IS_ERR(key_ref)) { + ret = PTR_ERR(key_ref); + goto error; + } + + key = key_ref_to_ptr(key_ref); + + /* make the changes with the locks held to prevent races */ + down_write(&key->sem); + + expiry = 0; + if (timeout > 0) { + now = current_kernel_time(); + expiry = now.tv_sec + timeout; + } + + key->expiry = expiry; + + up_write(&key->sem); + key_put(key); + + ret = 0; +error: + return ret; + +} /* end keyctl_set_timeout() */ + /*****************************************************************************/ /* * the key control system call @@ -1038,6 +1078,10 @@ asmlinkage long sys_keyctl(int option, unsigned long arg2, unsigned long arg3, case KEYCTL_SET_REQKEY_KEYRING: return keyctl_set_reqkey_keyring(arg2); + case KEYCTL_SET_TIMEOUT: + return keyctl_set_timeout((key_serial_t) arg2, + (unsigned) arg3); + default: return -EOPNOTSUPP; } -- cgit v1.2.3-71-gd317 From b5f545c880a2a47947ba2118b2509644ab7a2969 Mon Sep 17 00:00:00 2001 From: David Howells Date: Sun, 8 Jan 2006 01:02:47 -0800 Subject: [PATCH] keys: Permit running process to instantiate keys Make it possible for a running process (such as gssapid) to be able to instantiate a key, as was requested by Trond Myklebust for NFS4. The patch makes the following changes: (1) A new, optional key type method has been added. This permits a key type to intercept requests at the point /sbin/request-key is about to be spawned and do something else with them - passing them over the rpc_pipefs files or netlink sockets for instance. The uninstantiated key, the authorisation key and the intended operation name are passed to the method. (2) The callout_info is no longer passed as an argument to /sbin/request-key to prevent unauthorised viewing of this data using ps or by looking in /proc/pid/cmdline. This means that the old /sbin/request-key program will not work with the patched kernel as it will expect to see an extra argument that is no longer there. A revised keyutils package will be made available tomorrow. (3) The callout_info is now attached to the authorisation key. Reading this key will retrieve the information. (4) A new field has been added to the task_struct. This holds the authorisation key currently active for a thread. Searches now look here for the caller's set of keys rather than looking for an auth key in the lowest level of the session keyring. This permits a thread to be servicing multiple requests at once and to switch between them. Note that this is per-thread, not per-process, and so is usable in multithreaded programs. The setting of this field is inherited across fork and exec. (5) A new keyctl function (KEYCTL_ASSUME_AUTHORITY) has been added that permits a thread to assume the authority to deal with an uninstantiated key. Assumption is only permitted if the authorisation key associated with the uninstantiated key is somewhere in the thread's keyrings. This function can also clear the assumption. (6) A new magic key specifier has been added to refer to the currently assumed authorisation key (KEY_SPEC_REQKEY_AUTH_KEY). (7) Instantiation will only proceed if the appropriate authorisation key is assumed first. The assumed authorisation key is discarded if instantiation is successful. (8) key_validate() is moved from the file of request_key functions to the file of permissions functions. (9) The documentation is updated. From: Build fix. Signed-off-by: David Howells Cc: Trond Myklebust Cc: Alexander Zangerl Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- Documentation/keys-request-key.txt | 22 +++-- Documentation/keys.txt | 24 +++++ include/linux/key.h | 12 +++ include/linux/keyctl.h | 2 + include/linux/sched.h | 1 + security/keys/compat.c | 3 + security/keys/internal.h | 4 +- security/keys/keyctl.c | 107 ++++++++++++++++----- security/keys/keyring.c | 45 --------- security/keys/permission.c | 32 +++++++ security/keys/process_keys.c | 71 +++++++------- security/keys/request_key.c | 108 ++++++++++----------- security/keys/request_key_auth.c | 192 ++++++++++++++++++++++--------------- 13 files changed, 378 insertions(+), 245 deletions(-) (limited to 'include/linux') diff --git a/Documentation/keys-request-key.txt b/Documentation/keys-request-key.txt index 5f2b9c5edbb5..22488d791168 100644 --- a/Documentation/keys-request-key.txt +++ b/Documentation/keys-request-key.txt @@ -56,10 +56,12 @@ A request proceeds in the following manner: (4) request_key() then forks and executes /sbin/request-key with a new session keyring that contains a link to auth key V. - (5) /sbin/request-key execs an appropriate program to perform the actual + (5) /sbin/request-key assumes the authority associated with key U. + + (6) /sbin/request-key execs an appropriate program to perform the actual instantiation. - (6) The program may want to access another key from A's context (say a + (7) The program may want to access another key from A's context (say a Kerberos TGT key). It just requests the appropriate key, and the keyring search notes that the session keyring has auth key V in its bottom level. @@ -67,19 +69,19 @@ A request proceeds in the following manner: UID, GID, groups and security info of process A as if it was process A, and come up with key W. - (7) The program then does what it must to get the data with which to + (8) The program then does what it must to get the data with which to instantiate key U, using key W as a reference (perhaps it contacts a Kerberos server using the TGT) and then instantiates key U. - (8) Upon instantiating key U, auth key V is automatically revoked so that it + (9) Upon instantiating key U, auth key V is automatically revoked so that it may not be used again. - (9) The program then exits 0 and request_key() deletes key V and returns key +(10) The program then exits 0 and request_key() deletes key V and returns key U to the caller. -This also extends further. If key W (step 5 above) didn't exist, key W would be -created uninstantiated, another auth key (X) would be created [as per step 3] -and another copy of /sbin/request-key spawned [as per step 4]; but the context +This also extends further. If key W (step 7 above) didn't exist, key W would be +created uninstantiated, another auth key (X) would be created (as per step 3) +and another copy of /sbin/request-key spawned (as per step 4); but the context specified by auth key X will still be process A, as it was in auth key V. This is because process A's keyrings can't simply be attached to @@ -138,8 +140,8 @@ until one succeeds: (3) The process's session keyring is searched. - (4) If the process has a request_key() authorisation key in its session - keyring then: + (4) If the process has assumed the authority associated with a request_key() + authorisation key then: (a) If extant, the calling process's thread keyring is searched. diff --git a/Documentation/keys.txt b/Documentation/keys.txt index eeda00f82d2c..aaa01b0e3ee9 100644 --- a/Documentation/keys.txt +++ b/Documentation/keys.txt @@ -308,6 +308,8 @@ process making the call: KEY_SPEC_USER_KEYRING -4 UID-specific keyring KEY_SPEC_USER_SESSION_KEYRING -5 UID-session keyring KEY_SPEC_GROUP_KEYRING -6 GID-specific keyring + KEY_SPEC_REQKEY_AUTH_KEY -7 assumed request_key() + authorisation key The main syscalls are: @@ -645,6 +647,28 @@ The keyctl syscall functions are: or expired keys. + (*) Assume the authority granted to instantiate a key + + long keyctl(KEYCTL_ASSUME_AUTHORITY, key_serial_t key); + + This assumes or divests the authority required to instantiate the + specified key. Authority can only be assumed if the thread has the + authorisation key associated with the specified key in its keyrings + somewhere. + + Once authority is assumed, searches for keys will also search the + requester's keyrings using the requester's security label, UID, GID and + groups. + + If the requested authority is unavailable, error EPERM will be returned, + likewise if the authority has been revoked because the target key is + already instantiated. + + If the specified key is 0, then any assumed authority will be divested. + + The assumed authorititive key is inherited across fork and exec. + + =============== KERNEL SERVICES =============== diff --git a/include/linux/key.h b/include/linux/key.h index 4d189e51bc6c..cbf464ad9589 100644 --- a/include/linux/key.h +++ b/include/linux/key.h @@ -177,6 +177,8 @@ struct key { /* * kernel managed key type definition */ +typedef int (*request_key_actor_t)(struct key *key, struct key *authkey, const char *op); + struct key_type { /* name of the type */ const char *name; @@ -218,6 +220,16 @@ struct key_type { */ long (*read)(const struct key *key, char __user *buffer, size_t buflen); + /* handle request_key() for this type instead of invoking + * /sbin/request-key (optional) + * - key is the key to instantiate + * - authkey is the authority to assume when instantiating this key + * - op is the operation to be done, usually "create" + * - the call must not return until the instantiation process has run + * its course + */ + request_key_actor_t request_key; + /* internal fields */ struct list_head link; /* link in types list */ }; diff --git a/include/linux/keyctl.h b/include/linux/keyctl.h index ec8f3d622a8d..3365945640c9 100644 --- a/include/linux/keyctl.h +++ b/include/linux/keyctl.h @@ -19,6 +19,7 @@ #define KEY_SPEC_USER_KEYRING -4 /* - key ID for UID-specific keyring */ #define KEY_SPEC_USER_SESSION_KEYRING -5 /* - key ID for UID-session keyring */ #define KEY_SPEC_GROUP_KEYRING -6 /* - key ID for GID-specific keyring */ +#define KEY_SPEC_REQKEY_AUTH_KEY -7 /* - key ID for assumed request_key auth key */ /* request-key default keyrings */ #define KEY_REQKEY_DEFL_NO_CHANGE -1 @@ -47,5 +48,6 @@ #define KEYCTL_NEGATE 13 /* negate a partially constructed key */ #define KEYCTL_SET_REQKEY_KEYRING 14 /* set default request-key keyring */ #define KEYCTL_SET_TIMEOUT 15 /* set key timeout */ +#define KEYCTL_ASSUME_AUTHORITY 16 /* assume request_key() authorisation */ #endif /* _LINUX_KEYCTL_H */ diff --git a/include/linux/sched.h b/include/linux/sched.h index 20bd70749104..78eb92ae4d94 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h @@ -771,6 +771,7 @@ struct task_struct { unsigned keep_capabilities:1; struct user_struct *user; #ifdef CONFIG_KEYS + struct key *request_key_auth; /* assumed request_key authority */ struct key *thread_keyring; /* keyring private to this thread */ unsigned char jit_keyring; /* default keyring to attach requested keys to */ #endif diff --git a/security/keys/compat.c b/security/keys/compat.c index e8e7ef4a290c..bcdb28533733 100644 --- a/security/keys/compat.c +++ b/security/keys/compat.c @@ -77,6 +77,9 @@ asmlinkage long compat_sys_keyctl(u32 option, case KEYCTL_SET_TIMEOUT: return keyctl_set_timeout(arg2, arg3); + case KEYCTL_ASSUME_AUTHORITY: + return keyctl_assume_authority(arg2); + default: return -EOPNOTSUPP; } diff --git a/security/keys/internal.h b/security/keys/internal.h index 51f37c0bdb32..e066e6057955 100644 --- a/security/keys/internal.h +++ b/security/keys/internal.h @@ -107,12 +107,13 @@ extern struct key *request_key_and_link(struct key_type *type, struct request_key_auth { struct key *target_key; struct task_struct *context; + const char *callout_info; pid_t pid; }; extern struct key_type key_type_request_key_auth; extern struct key *request_key_auth_new(struct key *target, - struct key **_rkakey); + const char *callout_info); extern struct key *key_get_instantiation_authkey(key_serial_t target_id); @@ -137,6 +138,7 @@ extern long keyctl_instantiate_key(key_serial_t, const void __user *, extern long keyctl_negate_key(key_serial_t, unsigned, key_serial_t); extern long keyctl_set_reqkey_keyring(int); extern long keyctl_set_timeout(key_serial_t, unsigned); +extern long keyctl_assume_authority(key_serial_t); /* diff --git a/security/keys/keyctl.c b/security/keys/keyctl.c index 299f0ae11cf0..3d2ebae029c1 100644 --- a/security/keys/keyctl.c +++ b/security/keys/keyctl.c @@ -834,6 +834,17 @@ long keyctl_instantiate_key(key_serial_t id, if (plen > 32767) goto error; + /* the appropriate instantiation authorisation key must have been + * assumed before calling this */ + ret = -EPERM; + instkey = current->request_key_auth; + if (!instkey) + goto error; + + rka = instkey->payload.data; + if (rka->target_key->serial != id) + goto error; + /* pull the payload in if one was supplied */ payload = NULL; @@ -848,15 +859,6 @@ long keyctl_instantiate_key(key_serial_t id, goto error2; } - /* find the instantiation authorisation key */ - instkey = key_get_instantiation_authkey(id); - if (IS_ERR(instkey)) { - ret = PTR_ERR(instkey); - goto error2; - } - - rka = instkey->payload.data; - /* find the destination keyring amongst those belonging to the * requesting task */ keyring_ref = NULL; @@ -865,7 +867,7 @@ long keyctl_instantiate_key(key_serial_t id, KEY_WRITE); if (IS_ERR(keyring_ref)) { ret = PTR_ERR(keyring_ref); - goto error3; + goto error2; } } @@ -874,11 +876,17 @@ long keyctl_instantiate_key(key_serial_t id, key_ref_to_ptr(keyring_ref), instkey); key_ref_put(keyring_ref); - error3: - key_put(instkey); - error2: + + /* discard the assumed authority if it's just been disabled by + * instantiation of the key */ + if (ret == 0) { + key_put(current->request_key_auth); + current->request_key_auth = NULL; + } + +error2: kfree(payload); - error: +error: return ret; } /* end keyctl_instantiate_key() */ @@ -895,14 +903,16 @@ long keyctl_negate_key(key_serial_t id, unsigned timeout, key_serial_t ringid) key_ref_t keyring_ref; long ret; - /* find the instantiation authorisation key */ - instkey = key_get_instantiation_authkey(id); - if (IS_ERR(instkey)) { - ret = PTR_ERR(instkey); + /* the appropriate instantiation authorisation key must have been + * assumed before calling this */ + ret = -EPERM; + instkey = current->request_key_auth; + if (!instkey) goto error; - } rka = instkey->payload.data; + if (rka->target_key->serial != id) + goto error; /* find the destination keyring if present (which must also be * writable) */ @@ -911,7 +921,7 @@ long keyctl_negate_key(key_serial_t id, unsigned timeout, key_serial_t ringid) keyring_ref = lookup_user_key(NULL, ringid, 1, 0, KEY_WRITE); if (IS_ERR(keyring_ref)) { ret = PTR_ERR(keyring_ref); - goto error2; + goto error; } } @@ -920,9 +930,15 @@ long keyctl_negate_key(key_serial_t id, unsigned timeout, key_serial_t ringid) key_ref_to_ptr(keyring_ref), instkey); key_ref_put(keyring_ref); - error2: - key_put(instkey); - error: + + /* discard the assumed authority if it's just been disabled by + * instantiation of the key */ + if (ret == 0) { + key_put(current->request_key_auth); + current->request_key_auth = NULL; + } + +error: return ret; } /* end keyctl_negate_key() */ @@ -1005,6 +1021,48 @@ error: } /* end keyctl_set_timeout() */ +/*****************************************************************************/ +/* + * assume the authority to instantiate the specified key + */ +long keyctl_assume_authority(key_serial_t id) +{ + struct key *authkey; + long ret; + + /* special key IDs aren't permitted */ + ret = -EINVAL; + if (id < 0) + goto error; + + /* we divest ourselves of authority if given an ID of 0 */ + if (id == 0) { + key_put(current->request_key_auth); + current->request_key_auth = NULL; + ret = 0; + goto error; + } + + /* attempt to assume the authority temporarily granted to us whilst we + * instantiate the specified key + * - the authorisation key must be in the current task's keyrings + * somewhere + */ + authkey = key_get_instantiation_authkey(id); + if (IS_ERR(authkey)) { + ret = PTR_ERR(authkey); + goto error; + } + + key_put(current->request_key_auth); + current->request_key_auth = authkey; + ret = authkey->serial; + +error: + return ret; + +} /* end keyctl_assume_authority() */ + /*****************************************************************************/ /* * the key control system call @@ -1082,6 +1140,9 @@ asmlinkage long sys_keyctl(int option, unsigned long arg2, unsigned long arg3, return keyctl_set_timeout((key_serial_t) arg2, (unsigned) arg3); + case KEYCTL_ASSUME_AUTHORITY: + return keyctl_assume_authority((key_serial_t) arg2); + default: return -EOPNOTSUPP; } diff --git a/security/keys/keyring.c b/security/keys/keyring.c index 09d92d52ef75..d65a180f888d 100644 --- a/security/keys/keyring.c +++ b/security/keys/keyring.c @@ -479,51 +479,6 @@ key_ref_t __keyring_search_one(key_ref_t keyring_ref, } /* end __keyring_search_one() */ -/*****************************************************************************/ -/* - * search for an instantiation authorisation key matching a target key - * - the RCU read lock must be held by the caller - * - a target_id of zero specifies any valid token - */ -struct key *keyring_search_instkey(struct key *keyring, - key_serial_t target_id) -{ - struct request_key_auth *rka; - struct keyring_list *klist; - struct key *instkey; - int loop; - - klist = rcu_dereference(keyring->payload.subscriptions); - if (klist) { - for (loop = 0; loop < klist->nkeys; loop++) { - instkey = klist->keys[loop]; - - if (instkey->type != &key_type_request_key_auth) - continue; - - rka = instkey->payload.data; - if (target_id && rka->target_key->serial != target_id) - continue; - - /* the auth key is revoked during instantiation */ - if (!test_bit(KEY_FLAG_REVOKED, &instkey->flags)) - goto found; - - instkey = ERR_PTR(-EKEYREVOKED); - goto error; - } - } - - instkey = ERR_PTR(-EACCES); - goto error; - -found: - atomic_inc(&instkey->usage); -error: - return instkey; - -} /* end keyring_search_instkey() */ - /*****************************************************************************/ /* * find a keyring with the specified name diff --git a/security/keys/permission.c b/security/keys/permission.c index e7f579c0eaf5..3b41f9b52537 100644 --- a/security/keys/permission.c +++ b/security/keys/permission.c @@ -73,3 +73,35 @@ use_these_perms: } /* end key_task_permission() */ EXPORT_SYMBOL(key_task_permission); + +/*****************************************************************************/ +/* + * validate a key + */ +int key_validate(struct key *key) +{ + struct timespec now; + int ret = 0; + + if (key) { + /* check it's still accessible */ + ret = -EKEYREVOKED; + if (test_bit(KEY_FLAG_REVOKED, &key->flags) || + test_bit(KEY_FLAG_DEAD, &key->flags)) + goto error; + + /* check it hasn't expired */ + ret = 0; + if (key->expiry) { + now = current_kernel_time(); + if (now.tv_sec >= key->expiry) + ret = -EKEYEXPIRED; + } + } + + error: + return ret; + +} /* end key_validate() */ + +EXPORT_SYMBOL(key_validate); diff --git a/security/keys/process_keys.c b/security/keys/process_keys.c index 566b1cc0118a..74cb79eb917e 100644 --- a/security/keys/process_keys.c +++ b/security/keys/process_keys.c @@ -270,9 +270,14 @@ int copy_thread_group_keys(struct task_struct *tsk) int copy_keys(unsigned long clone_flags, struct task_struct *tsk) { key_check(tsk->thread_keyring); + key_check(tsk->request_key_auth); /* no thread keyring yet */ tsk->thread_keyring = NULL; + + /* copy the request_key() authorisation for this thread */ + key_get(tsk->request_key_auth); + return 0; } /* end copy_keys() */ @@ -290,11 +295,12 @@ void exit_thread_group_keys(struct signal_struct *tg) /*****************************************************************************/ /* - * dispose of keys upon thread exit + * dispose of per-thread keys upon thread exit */ void exit_keys(struct task_struct *tsk) { key_put(tsk->thread_keyring); + key_put(tsk->request_key_auth); } /* end exit_keys() */ @@ -382,7 +388,7 @@ key_ref_t search_process_keyrings(struct key_type *type, struct task_struct *context) { struct request_key_auth *rka; - key_ref_t key_ref, ret, err, instkey_ref; + key_ref_t key_ref, ret, err; /* we want to return -EAGAIN or -ENOKEY if any of the keyrings were * searchable, but we failed to find a key or we found a negative key; @@ -461,30 +467,12 @@ key_ref_t search_process_keyrings(struct key_type *type, err = key_ref; break; } - - /* if this process has a session keyring and that has an - * instantiation authorisation key in the bottom level, then we - * also search the keyrings of the process mentioned there */ - if (context != current) - goto no_key; - - rcu_read_lock(); - instkey_ref = __keyring_search_one( - make_key_ref(rcu_dereference( - context->signal->session_keyring), - 1), - &key_type_request_key_auth, NULL, 0); - rcu_read_unlock(); - - if (IS_ERR(instkey_ref)) - goto no_key; - - rka = key_ref_to_ptr(instkey_ref)->payload.data; - - key_ref = search_process_keyrings(type, description, match, - rka->context); - key_ref_put(instkey_ref); - + } + /* or search the user-session keyring */ + else { + key_ref = keyring_search_aux( + make_key_ref(context->user->session_keyring, 1), + context, type, description, match); if (!IS_ERR(key_ref)) goto found; @@ -500,11 +488,21 @@ key_ref_t search_process_keyrings(struct key_type *type, break; } } - /* or search the user-session keyring */ - else { - key_ref = keyring_search_aux( - make_key_ref(context->user->session_keyring, 1), - context, type, description, match); + + /* if this process has an instantiation authorisation key, then we also + * search the keyrings of the process mentioned there + * - we don't permit access to request_key auth keys via this method + */ + if (context->request_key_auth && + context == current && + type != &key_type_request_key_auth && + key_validate(context->request_key_auth) == 0 + ) { + rka = context->request_key_auth->payload.data; + + key_ref = search_process_keyrings(type, description, match, + rka->context); + if (!IS_ERR(key_ref)) goto found; @@ -521,8 +519,6 @@ key_ref_t search_process_keyrings(struct key_type *type, } } - -no_key: /* no key - decide on the error we're going to go for */ key_ref = ret ? ret : err; @@ -628,6 +624,15 @@ key_ref_t lookup_user_key(struct task_struct *context, key_serial_t id, key = ERR_PTR(-EINVAL); goto error; + case KEY_SPEC_REQKEY_AUTH_KEY: + key = context->request_key_auth; + if (!key) + goto error; + + atomic_inc(&key->usage); + key_ref = make_key_ref(key, 1); + break; + default: key_ref = ERR_PTR(-EINVAL); if (id < 1) diff --git a/security/keys/request_key.c b/security/keys/request_key.c index 5cc4bba70db6..f030a0ccbb93 100644 --- a/security/keys/request_key.c +++ b/security/keys/request_key.c @@ -29,28 +29,36 @@ DECLARE_WAIT_QUEUE_HEAD(request_key_conswq); /*****************************************************************************/ /* * request userspace finish the construction of a key - * - execute "/sbin/request-key " + * - execute "/sbin/request-key " */ -static int call_request_key(struct key *key, - const char *op, - const char *callout_info) +static int call_sbin_request_key(struct key *key, + struct key *authkey, + const char *op) { struct task_struct *tsk = current; key_serial_t prkey, sskey; - struct key *session_keyring, *rkakey; - char *argv[10], *envp[3], uid_str[12], gid_str[12]; + struct key *keyring; + char *argv[9], *envp[3], uid_str[12], gid_str[12]; char key_str[12], keyring_str[3][12]; + char desc[20]; int ret, i; - kenter("{%d},%s,%s", key->serial, op, callout_info); + kenter("{%d},{%d},%s", key->serial, authkey->serial, op); - /* generate a new session keyring with an auth key in it */ - session_keyring = request_key_auth_new(key, &rkakey); - if (IS_ERR(session_keyring)) { - ret = PTR_ERR(session_keyring); - goto error; + /* allocate a new session keyring */ + sprintf(desc, "_req.%u", key->serial); + + keyring = keyring_alloc(desc, current->fsuid, current->fsgid, 1, NULL); + if (IS_ERR(keyring)) { + ret = PTR_ERR(keyring); + goto error_alloc; } + /* attach the auth key to the session keyring */ + ret = __key_link(keyring, authkey); + if (ret < 0) + goto error_link; + /* record the UID and GID */ sprintf(uid_str, "%d", current->fsuid); sprintf(gid_str, "%d", current->fsgid); @@ -95,22 +103,19 @@ static int call_request_key(struct key *key, argv[i++] = keyring_str[0]; argv[i++] = keyring_str[1]; argv[i++] = keyring_str[2]; - argv[i++] = (char *) callout_info; argv[i] = NULL; /* do it */ - ret = call_usermodehelper_keys(argv[0], argv, envp, session_keyring, 1); + ret = call_usermodehelper_keys(argv[0], argv, envp, keyring, 1); - /* dispose of the special keys */ - key_revoke(rkakey); - key_put(rkakey); - key_put(session_keyring); +error_link: + key_put(keyring); - error: +error_alloc: kleave(" = %d", ret); return ret; -} /* end call_request_key() */ +} /* end call_sbin_request_key() */ /*****************************************************************************/ /* @@ -122,9 +127,10 @@ static struct key *__request_key_construction(struct key_type *type, const char *description, const char *callout_info) { + request_key_actor_t actor; struct key_construction cons; struct timespec now; - struct key *key; + struct key *key, *authkey; int ret, negated; kenter("%s,%s,%s", type->name, description, callout_info); @@ -143,8 +149,19 @@ static struct key *__request_key_construction(struct key_type *type, /* we drop the construction sem here on behalf of the caller */ up_write(&key_construction_sem); + /* allocate an authorisation key */ + authkey = request_key_auth_new(key, callout_info); + if (IS_ERR(authkey)) { + ret = PTR_ERR(authkey); + authkey = NULL; + goto alloc_authkey_failed; + } + /* make the call */ - ret = call_request_key(key, "create", callout_info); + actor = call_sbin_request_key; + if (type->request_key) + actor = type->request_key; + ret = actor(key, authkey, "create"); if (ret < 0) goto request_failed; @@ -153,22 +170,29 @@ static struct key *__request_key_construction(struct key_type *type, if (!test_bit(KEY_FLAG_INSTANTIATED, &key->flags)) goto request_failed; + key_revoke(authkey); + key_put(authkey); + down_write(&key_construction_sem); list_del(&cons.link); up_write(&key_construction_sem); /* also give an error if the key was negatively instantiated */ - check_not_negative: +check_not_negative: if (test_bit(KEY_FLAG_NEGATIVE, &key->flags)) { key_put(key); key = ERR_PTR(-ENOKEY); } - out: +out: kleave(" = %p", key); return key; - request_failed: +request_failed: + key_revoke(authkey); + key_put(authkey); + +alloc_authkey_failed: /* it wasn't instantiated * - remove from construction queue * - mark the key as dead @@ -217,7 +241,7 @@ static struct key *__request_key_construction(struct key_type *type, key = ERR_PTR(ret); goto out; - alloc_failed: +alloc_failed: up_write(&key_construction_sem); goto out; @@ -464,35 +488,3 @@ struct key *request_key(struct key_type *type, } /* end request_key() */ EXPORT_SYMBOL(request_key); - -/*****************************************************************************/ -/* - * validate a key - */ -int key_validate(struct key *key) -{ - struct timespec now; - int ret = 0; - - if (key) { - /* check it's still accessible */ - ret = -EKEYREVOKED; - if (test_bit(KEY_FLAG_REVOKED, &key->flags) || - test_bit(KEY_FLAG_DEAD, &key->flags)) - goto error; - - /* check it hasn't expired */ - ret = 0; - if (key->expiry) { - now = current_kernel_time(); - if (now.tv_sec >= key->expiry) - ret = -EKEYEXPIRED; - } - } - - error: - return ret; - -} /* end key_validate() */ - -EXPORT_SYMBOL(key_validate); diff --git a/security/keys/request_key_auth.c b/security/keys/request_key_auth.c index a8e4069d48cb..cce6ba6b0323 100644 --- a/security/keys/request_key_auth.c +++ b/security/keys/request_key_auth.c @@ -15,11 +15,13 @@ #include #include #include +#include #include "internal.h" static int request_key_auth_instantiate(struct key *, const void *, size_t); static void request_key_auth_describe(const struct key *, struct seq_file *); static void request_key_auth_destroy(struct key *); +static long request_key_auth_read(const struct key *, char __user *, size_t); /* * the request-key authorisation key type definition @@ -30,51 +32,25 @@ struct key_type key_type_request_key_auth = { .instantiate = request_key_auth_instantiate, .describe = request_key_auth_describe, .destroy = request_key_auth_destroy, + .read = request_key_auth_read, }; /*****************************************************************************/ /* - * instantiate a request-key authorisation record + * instantiate a request-key authorisation key */ static int request_key_auth_instantiate(struct key *key, const void *data, size_t datalen) { - struct request_key_auth *rka, *irka; - struct key *instkey; - int ret; - - ret = -ENOMEM; - rka = kmalloc(sizeof(*rka), GFP_KERNEL); - if (rka) { - /* see if the calling process is already servicing the key - * request of another process */ - instkey = key_get_instantiation_authkey(0); - if (!IS_ERR(instkey)) { - /* it is - use that instantiation context here too */ - irka = instkey->payload.data; - rka->context = irka->context; - rka->pid = irka->pid; - key_put(instkey); - } - else { - /* it isn't - use this process as the context */ - rka->context = current; - rka->pid = current->pid; - } - - rka->target_key = key_get((struct key *) data); - key->payload.data = rka; - ret = 0; - } - - return ret; + key->payload.data = (struct request_key_auth *) data; + return 0; } /* end request_key_auth_instantiate() */ /*****************************************************************************/ /* - * + * reading a request-key authorisation key retrieves the callout information */ static void request_key_auth_describe(const struct key *key, struct seq_file *m) @@ -83,10 +59,38 @@ static void request_key_auth_describe(const struct key *key, seq_puts(m, "key:"); seq_puts(m, key->description); - seq_printf(m, " pid:%d", rka->pid); + seq_printf(m, " pid:%d ci:%zu", rka->pid, strlen(rka->callout_info)); } /* end request_key_auth_describe() */ +/*****************************************************************************/ +/* + * read the callout_info data + * - the key's semaphore is read-locked + */ +static long request_key_auth_read(const struct key *key, + char __user *buffer, size_t buflen) +{ + struct request_key_auth *rka = key->payload.data; + size_t datalen; + long ret; + + datalen = strlen(rka->callout_info); + ret = datalen; + + /* we can return the data as is */ + if (buffer && buflen > 0) { + if (buflen > datalen) + buflen = datalen; + + if (copy_to_user(buffer, rka->callout_info, buflen) != 0) + ret = -EFAULT; + } + + return ret; + +} /* end request_key_auth_read() */ + /*****************************************************************************/ /* * destroy an instantiation authorisation token key @@ -104,54 +108,87 @@ static void request_key_auth_destroy(struct key *key) /*****************************************************************************/ /* - * create a session keyring to be for the invokation of /sbin/request-key and - * stick an authorisation token in it + * create an authorisation token for /sbin/request-key or whoever to gain + * access to the caller's security data */ -struct key *request_key_auth_new(struct key *target, struct key **_rkakey) +struct key *request_key_auth_new(struct key *target, const char *callout_info) { - struct key *keyring, *rkakey = NULL; + struct request_key_auth *rka, *irka; + struct key *authkey = NULL; char desc[20]; int ret; kenter("%d,", target->serial); - /* allocate a new session keyring */ - sprintf(desc, "_req.%u", target->serial); + /* allocate a auth record */ + rka = kmalloc(sizeof(*rka), GFP_KERNEL); + if (!rka) { + kleave(" = -ENOMEM"); + return ERR_PTR(-ENOMEM); + } - keyring = keyring_alloc(desc, current->fsuid, current->fsgid, 1, NULL); - if (IS_ERR(keyring)) { - kleave("= %ld", PTR_ERR(keyring)); - return keyring; + /* see if the calling process is already servicing the key request of + * another process */ + if (current->request_key_auth) { + /* it is - use that instantiation context here too */ + irka = current->request_key_auth->payload.data; + rka->context = irka->context; + rka->pid = irka->pid; } + else { + /* it isn't - use this process as the context */ + rka->context = current; + rka->pid = current->pid; + } + + rka->target_key = key_get(target); + rka->callout_info = callout_info; /* allocate the auth key */ sprintf(desc, "%x", target->serial); - rkakey = key_alloc(&key_type_request_key_auth, desc, - current->fsuid, current->fsgid, - KEY_POS_VIEW | KEY_USR_VIEW, 1); - if (IS_ERR(rkakey)) { - key_put(keyring); - kleave("= %ld", PTR_ERR(rkakey)); - return rkakey; + authkey = key_alloc(&key_type_request_key_auth, desc, + current->fsuid, current->fsgid, + KEY_POS_VIEW | KEY_POS_READ | KEY_POS_SEARCH | + KEY_USR_VIEW, 1); + if (IS_ERR(authkey)) { + ret = PTR_ERR(authkey); + goto error_alloc; } /* construct and attach to the keyring */ - ret = key_instantiate_and_link(rkakey, target, 0, keyring, NULL); - if (ret < 0) { - key_revoke(rkakey); - key_put(rkakey); - key_put(keyring); - kleave("= %d", ret); - return ERR_PTR(ret); - } + ret = key_instantiate_and_link(authkey, rka, 0, NULL, NULL); + if (ret < 0) + goto error_inst; - *_rkakey = rkakey; - kleave(" = {%d} ({%d})", keyring->serial, rkakey->serial); - return keyring; + kleave(" = {%d})", authkey->serial); + return authkey; + +error_inst: + key_revoke(authkey); + key_put(authkey); +error_alloc: + key_put(rka->target_key); + kfree(rka); + kleave("= %d", ret); + return ERR_PTR(ret); } /* end request_key_auth_new() */ +/*****************************************************************************/ +/* + * see if an authorisation key is associated with a particular key + */ +static int key_get_instantiation_authkey_match(const struct key *key, + const void *_id) +{ + struct request_key_auth *rka = key->payload.data; + key_serial_t id = (key_serial_t)(unsigned long) _id; + + return rka->target_key->serial == id; + +} /* end key_get_instantiation_authkey_match() */ + /*****************************************************************************/ /* * get the authorisation key for instantiation of a specific key if attached to @@ -162,22 +199,27 @@ struct key *request_key_auth_new(struct key *target, struct key **_rkakey) */ struct key *key_get_instantiation_authkey(key_serial_t target_id) { - struct task_struct *tsk = current; - struct key *instkey; - - /* we must have our own personal session keyring */ - if (!tsk->signal->session_keyring) - return ERR_PTR(-EACCES); - - /* and it must contain a suitable request authorisation key - * - lock RCU against session keyring changing - */ - rcu_read_lock(); + struct key *authkey; + key_ref_t authkey_ref; + + authkey_ref = search_process_keyrings( + &key_type_request_key_auth, + (void *) (unsigned long) target_id, + key_get_instantiation_authkey_match, + current); + + if (IS_ERR(authkey_ref)) { + authkey = ERR_PTR(PTR_ERR(authkey_ref)); + goto error; + } - instkey = keyring_search_instkey( - rcu_dereference(tsk->signal->session_keyring), target_id); + authkey = key_ref_to_ptr(authkey_ref); + if (test_bit(KEY_FLAG_REVOKED, &authkey->flags)) { + key_put(authkey); + authkey = ERR_PTR(-EKEYREVOKED); + } - rcu_read_unlock(); - return instkey; +error: + return authkey; } /* end key_get_instantiation_authkey() */ -- cgit v1.2.3-71-gd317 From 71fabd5e4835309b4feca6209122ce56c595c461 Mon Sep 17 00:00:00 2001 From: George Anzinger Date: Sun, 8 Jan 2006 01:02:48 -0800 Subject: [PATCH] sigaction should clear all signals on SIG_IGN, not just < 32 While rooting aroung in the signal code trying to understand how to fix the SIG_IGN ploy (set sig handler to SIG_IGN and flood system with high speed repeating timers) I came across what, I think, is a problem in sigaction() in that when processing a SIG_IGN request it flushes signals from 1 to SIGRTMIN and leaves the rest. Attempt to fix this. Signed-off-by: George Anzinger Cc: Roland McGrath Cc: Linus Torvalds Signed-off-by: Adrian Bunk Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/signal.h | 17 +++++++++++++++++ kernel/signal.c | 34 ++++++++++++++++++++++++++++++++-- 2 files changed, 49 insertions(+), 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/signal.h b/include/linux/signal.h index ea9eff16c4b7..b7d093520bb6 100644 --- a/include/linux/signal.h +++ b/include/linux/signal.h @@ -94,6 +94,23 @@ static inline int sigfindinword(unsigned long word) #endif /* __HAVE_ARCH_SIG_BITOPS */ +static inline int sigisemptyset(sigset_t *set) +{ + extern void _NSIG_WORDS_is_unsupported_size(void); + switch (_NSIG_WORDS) { + case 4: + return (set->sig[3] | set->sig[2] | + set->sig[1] | set->sig[0]) == 0; + case 2: + return (set->sig[1] | set->sig[0]) == 0; + case 1: + return set->sig[0] == 0; + default: + _NSIG_WORDS_is_unsupported_size(); + return 0; + } +} + #define sigmask(sig) (1UL << ((sig) - 1)) #ifndef __HAVE_ARCH_SIG_SETOPS diff --git a/kernel/signal.c b/kernel/signal.c index 9b6fda5e87f1..e20724af9b36 100644 --- a/kernel/signal.c +++ b/kernel/signal.c @@ -620,6 +620,33 @@ void signal_wake_up(struct task_struct *t, int resume) kick_process(t); } +/* + * Remove signals in mask from the pending set and queue. + * Returns 1 if any signals were found. + * + * All callers must be holding the siglock. + * + * This version takes a sigset mask and looks at all signals, + * not just those in the first mask word. + */ +static int rm_from_queue_full(sigset_t *mask, struct sigpending *s) +{ + struct sigqueue *q, *n; + sigset_t m; + + sigandsets(&m, mask, &s->signal); + if (sigisemptyset(&m)) + return 0; + + signandsets(&s->signal, &s->signal, mask); + list_for_each_entry_safe(q, n, &s->list, list) { + if (sigismember(mask, q->info.si_signo)) { + list_del_init(&q->list); + __sigqueue_free(q); + } + } + return 1; +} /* * Remove signals in mask from the pending set and queue. * Returns 1 if any signals were found. @@ -2408,6 +2435,7 @@ int do_sigaction(int sig, const struct k_sigaction *act, struct k_sigaction *oact) { struct k_sigaction *k; + sigset_t mask; if (!valid_signal(sig) || sig < 1 || (act && sig_kernel_only(sig))) return -EINVAL; @@ -2455,9 +2483,11 @@ do_sigaction(int sig, const struct k_sigaction *act, struct k_sigaction *oact) *k = *act; sigdelsetmask(&k->sa.sa_mask, sigmask(SIGKILL) | sigmask(SIGSTOP)); - rm_from_queue(sigmask(sig), &t->signal->shared_pending); + sigemptyset(&mask); + sigaddset(&mask, sig); + rm_from_queue_full(&mask, &t->signal->shared_pending); do { - rm_from_queue(sigmask(sig), &t->pending); + rm_from_queue_full(&mask, &t->pending); recalc_sigpending_tsk(t); t = next_thread(t); } while (t != current); -- cgit v1.2.3-71-gd317 From a885c8c4316e1c1d2d2c8755da3f3d14f852528d Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Sun, 8 Jan 2006 01:02:50 -0800 Subject: [PATCH] Add block_device_operations.getgeo block device method HDIO_GETGEO is implemented in most block drivers, and all of them have to duplicate the code to copy the structure to userspace, as well as getting the start sector. This patch moves that to common code [1] and adds a ->getgeo method to fill out the raw kernel hd_geometry structure. For many drivers this means ->ioctl can go away now. [1] the s390 block drivers are odd in this respect. xpram sets ->start to 4 always which seems more than odd, and the dasd driver shifts the start offset around, probably because of it's non-standard sector size. Signed-off-by: Christoph Hellwig Cc: Jens Axboe Cc: Cc: Jeff Dike Cc: Paolo Giarrusso Cc: Bartlomiej Zolnierkiewicz Cc: Neil Brown Cc: Markus Lidel Cc: Russell King Cc: David Woodhouse Cc: Martin Schwidefsky Cc: James Bottomley Signed-off-by: Adrian Bunk Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- arch/um/drivers/ubd_kern.c | 21 +++++++++-------- block/ioctl.c | 22 ++++++++++++++++++ drivers/acorn/block/mfmhd.c | 36 ++++++----------------------- drivers/block/DAC960.c | 35 ++++++++++++----------------- drivers/block/acsi.c | 26 +++++++++++---------- drivers/block/amiflop.c | 23 +++++++++---------- drivers/block/aoe/aoeblk.c | 26 ++++++--------------- drivers/block/cciss.c | 31 ++++++++++++------------- drivers/block/cpqarray.c | 36 +++++++++++++++-------------- drivers/block/floppy.c | 35 +++++++++++++++-------------- drivers/block/paride/pd.c | 34 +++++++++++++++------------- drivers/block/paride/pf.c | 50 +++++++++++++++++++++-------------------- drivers/block/ps2esdi.c | 25 +++++++-------------- drivers/block/sx8.c | 35 +++++++---------------------- drivers/block/umem.c | 41 +++++++++++++-------------------- drivers/block/viodasd.c | 44 ++++++++---------------------------- drivers/block/xd.c | 25 +++++++++++---------- drivers/ide/ide-disk.c | 12 ++++++++++ drivers/ide/ide-floppy.c | 12 ++++++++++ drivers/ide/ide.c | 13 ----------- drivers/ide/legacy/hd.c | 24 +++++++------------- drivers/md/md.c | 30 +++++++++---------------- drivers/message/i2o/i2o_block.c | 18 +++++++-------- drivers/mmc/mmc_block.c | 25 +++++---------------- drivers/mtd/mtd_blkdevs.c | 25 ++++++++------------- drivers/s390/block/dasd.c | 23 +++++++++++++++++++ drivers/s390/block/dasd_ioctl.c | 28 ----------------------- drivers/s390/block/xpram.c | 18 ++++++--------- drivers/scsi/sd.c | 21 +++++------------ include/linux/fs.h | 2 ++ 30 files changed, 340 insertions(+), 456 deletions(-) (limited to 'include/linux') diff --git a/arch/um/drivers/ubd_kern.c b/arch/um/drivers/ubd_kern.c index 73f9652b2ee9..3a93c6f772fa 100644 --- a/arch/um/drivers/ubd_kern.c +++ b/arch/um/drivers/ubd_kern.c @@ -117,6 +117,7 @@ static int ubd_open(struct inode * inode, struct file * filp); static int ubd_release(struct inode * inode, struct file * file); static int ubd_ioctl(struct inode * inode, struct file * file, unsigned int cmd, unsigned long arg); +static int ubd_getgeo(struct block_device *bdev, struct hd_geometry *geo); #define MAX_DEV (8) @@ -125,6 +126,7 @@ static struct block_device_operations ubd_blops = { .open = ubd_open, .release = ubd_release, .ioctl = ubd_ioctl, + .getgeo = ubd_getgeo, }; /* Protected by the queue_lock */ @@ -1058,6 +1060,16 @@ static void do_ubd_request(request_queue_t *q) } } +static int ubd_getgeo(struct block_device *bdev, struct hd_geometry *geo) +{ + struct ubd *dev = bdev->bd_disk->private_data; + + geo->heads = 128; + geo->sectors = 32; + geo->cylinders = dev->size / (128 * 32 * 512); + return 0; +} + static int ubd_ioctl(struct inode * inode, struct file * file, unsigned int cmd, unsigned long arg) { @@ -1070,16 +1082,7 @@ static int ubd_ioctl(struct inode * inode, struct file * file, }; switch (cmd) { - struct hd_geometry g; struct cdrom_volctrl volume; - case HDIO_GETGEO: - if(!loc) return(-EINVAL); - g.heads = 128; - g.sectors = 32; - g.cylinders = dev->size / (128 * 32 * 512); - g.start = get_start_sect(inode->i_bdev); - return(copy_to_user(loc, &g, sizeof(g)) ? -EFAULT : 0); - case HDIO_GET_IDENTITY: ubd_id.cyls = dev->size / (128 * 32 * 512); if(copy_to_user((char __user *) arg, (char *) &ubd_id, diff --git a/block/ioctl.c b/block/ioctl.c index 6e278474f9a8..82030e1dfd63 100644 --- a/block/ioctl.c +++ b/block/ioctl.c @@ -1,6 +1,7 @@ #include /* for capable() */ #include #include +#include #include #include #include @@ -245,6 +246,27 @@ int blkdev_ioctl(struct inode *inode, struct file *file, unsigned cmd, set_device_ro(bdev, n); unlock_kernel(); return 0; + case HDIO_GETGEO: { + struct hd_geometry geo; + + if (!arg) + return -EINVAL; + if (!disk->fops->getgeo) + return -ENOTTY; + + /* + * We need to set the startsect first, the driver may + * want to override it. + */ + geo.start = get_start_sect(bdev); + ret = disk->fops->getgeo(bdev, &geo); + if (ret) + return ret; + if (copy_to_user((struct hd_geometry __user *)arg, &geo, + sizeof(geo))) + return -EFAULT; + return 0; + } } lock_kernel(); diff --git a/drivers/acorn/block/mfmhd.c b/drivers/acorn/block/mfmhd.c index 4b65f74d66b1..ce074f6f3369 100644 --- a/drivers/acorn/block/mfmhd.c +++ b/drivers/acorn/block/mfmhd.c @@ -129,19 +129,6 @@ static DEFINE_SPINLOCK(mfm_lock); #define MAJOR_NR MFM_ACORN_MAJOR #define QUEUE (mfm_queue) #define CURRENT elv_next_request(mfm_queue) -/* - * This sort of stuff should be in a header file shared with ide.c, hd.c, xd.c etc - */ -#ifndef HDIO_GETGEO -#define HDIO_GETGEO 0x301 -struct hd_geometry { - unsigned char heads; - unsigned char sectors; - unsigned short cylinders; - unsigned long start; -}; -#endif - /* * Configuration section @@ -1153,22 +1140,13 @@ static int mfm_initdrives(void) * The 'front' end of the mfm driver follows... */ -static int mfm_ioctl(struct inode *inode, struct file *file, u_int cmd, u_long arg) +static int mfm_getgeo(struct block_device *bdev, struct hd_geometry *geo) { - struct mfm_info *p = inode->i_bdev->bd_disk->private_data; - struct hd_geometry *geo = (struct hd_geometry *) arg; - if (cmd != HDIO_GETGEO) - return -EINVAL; - if (!arg) - return -EINVAL; - if (put_user (p->heads, &geo->heads)) - return -EFAULT; - if (put_user (p->sectors, &geo->sectors)) - return -EFAULT; - if (put_user (p->cylinders, &geo->cylinders)) - return -EFAULT; - if (put_user (get_start_sect(inode->i_bdev), &geo->start)) - return -EFAULT; + struct mfm_info *p = bdev->bd_disk->private_data; + + geo->heads = p->heads; + geo->sectors = p->sectors; + geo->cylinders = p->cylinders; return 0; } @@ -1219,7 +1197,7 @@ void xd_set_geometry(struct block_device *bdev, unsigned char secsptrack, static struct block_device_operations mfm_fops = { .owner = THIS_MODULE, - .ioctl = mfm_ioctl, + .getgeo = mfm_getgeo, }; /* diff --git a/drivers/block/DAC960.c b/drivers/block/DAC960.c index 21097a39a057..179c68a3cef3 100644 --- a/drivers/block/DAC960.c +++ b/drivers/block/DAC960.c @@ -92,34 +92,28 @@ static int DAC960_open(struct inode *inode, struct file *file) return 0; } -static int DAC960_ioctl(struct inode *inode, struct file *file, - unsigned int cmd, unsigned long arg) +static int DAC960_getgeo(struct block_device *bdev, struct hd_geometry *geo) { - struct gendisk *disk = inode->i_bdev->bd_disk; + struct gendisk *disk = bdev->bd_disk; DAC960_Controller_T *p = disk->queue->queuedata; int drive_nr = (long)disk->private_data; - struct hd_geometry g; - struct hd_geometry __user *loc = (struct hd_geometry __user *)arg; - - if (cmd != HDIO_GETGEO || !loc) - return -EINVAL; if (p->FirmwareType == DAC960_V1_Controller) { - g.heads = p->V1.GeometryTranslationHeads; - g.sectors = p->V1.GeometryTranslationSectors; - g.cylinders = p->V1.LogicalDriveInformation[drive_nr]. - LogicalDriveSize / (g.heads * g.sectors); + geo->heads = p->V1.GeometryTranslationHeads; + geo->sectors = p->V1.GeometryTranslationSectors; + geo->cylinders = p->V1.LogicalDriveInformation[drive_nr]. + LogicalDriveSize / (geo->heads * geo->sectors); } else { DAC960_V2_LogicalDeviceInfo_T *i = p->V2.LogicalDeviceInformation[drive_nr]; switch (i->DriveGeometry) { case DAC960_V2_Geometry_128_32: - g.heads = 128; - g.sectors = 32; + geo->heads = 128; + geo->sectors = 32; break; case DAC960_V2_Geometry_255_63: - g.heads = 255; - g.sectors = 63; + geo->heads = 255; + geo->sectors = 63; break; default: DAC960_Error("Illegal Logical Device Geometry %d\n", @@ -127,12 +121,11 @@ static int DAC960_ioctl(struct inode *inode, struct file *file, return -EINVAL; } - g.cylinders = i->ConfigurableDeviceSize / (g.heads * g.sectors); + geo->cylinders = i->ConfigurableDeviceSize / + (geo->heads * geo->sectors); } - g.start = get_start_sect(inode->i_bdev); - - return copy_to_user(loc, &g, sizeof g) ? -EFAULT : 0; + return 0; } static int DAC960_media_changed(struct gendisk *disk) @@ -157,7 +150,7 @@ static int DAC960_revalidate_disk(struct gendisk *disk) static struct block_device_operations DAC960_BlockDeviceOperations = { .owner = THIS_MODULE, .open = DAC960_open, - .ioctl = DAC960_ioctl, + .getgeo = DAC960_getgeo, .media_changed = DAC960_media_changed, .revalidate_disk = DAC960_revalidate_disk, }; diff --git a/drivers/block/acsi.c b/drivers/block/acsi.c index 5d2d649f7e8d..196c0ec9cd54 100644 --- a/drivers/block/acsi.c +++ b/drivers/block/acsi.c @@ -1079,6 +1079,19 @@ static void redo_acsi_request( void ) * ***********************************************************************/ +static int acsi_getgeo(struct block_device *bdev, struct hd_geometry *geo) +{ + struct acsi_info_struct *aip = bdev->bd_disk->private_data; + + /* + * Just fake some geometry here, it's nonsense anyway + * To make it easy, use Adaptec's usual 64/32 mapping + */ + geo->heads = 64; + geo->sectors = 32; + geo->cylinders = aip->size >> 11; + return 0; +} static int acsi_ioctl( struct inode *inode, struct file *file, unsigned int cmd, unsigned long arg ) @@ -1086,18 +1099,6 @@ static int acsi_ioctl( struct inode *inode, struct file *file, struct gendisk *disk = inode->i_bdev->bd_disk; struct acsi_info_struct *aip = disk->private_data; switch (cmd) { - case HDIO_GETGEO: - /* HDIO_GETGEO is supported more for getting the partition's - * start sector... */ - { struct hd_geometry *geo = (struct hd_geometry *)arg; - /* just fake some geometry here, it's nonsense anyway; to make it - * easy, use Adaptec's usual 64/32 mapping */ - put_user( 64, &geo->heads ); - put_user( 32, &geo->sectors ); - put_user( aip->size >> 11, &geo->cylinders ); - put_user(get_start_sect(inode->i_bdev), &geo->start); - return 0; - } case SCSI_IOCTL_GET_IDLUN: /* SCSI compatible GET_IDLUN call to get target's ID and LUN number */ put_user( aip->target | (aip->lun << 8), @@ -1592,6 +1593,7 @@ static struct block_device_operations acsi_fops = { .open = acsi_open, .release = acsi_release, .ioctl = acsi_ioctl, + .getgeo = acsi_getgeo, .media_changed = acsi_media_change, .revalidate_disk= acsi_revalidate, }; diff --git a/drivers/block/amiflop.c b/drivers/block/amiflop.c index 0acbfff8ad28..cb2a545e57dc 100644 --- a/drivers/block/amiflop.c +++ b/drivers/block/amiflop.c @@ -1424,6 +1424,16 @@ static void do_fd_request(request_queue_t * q) redo_fd_request(); } +static int fd_getgeo(struct block_device *bdev, struct hd_geometry *geo) +{ + int drive = MINOR(bdev->bd_dev) & 3; + + geo->heads = unit[drive].type->heads; + geo->sectors = unit[drive].dtype->sects * unit[drive].type->sect_mult; + geo->cylinders = unit[drive].type->tracks; + return 0; +} + static int fd_ioctl(struct inode *inode, struct file *filp, unsigned int cmd, unsigned long param) { @@ -1431,18 +1441,6 @@ static int fd_ioctl(struct inode *inode, struct file *filp, static struct floppy_struct getprm; switch(cmd){ - case HDIO_GETGEO: - { - struct hd_geometry loc; - loc.heads = unit[drive].type->heads; - loc.sectors = unit[drive].dtype->sects * unit[drive].type->sect_mult; - loc.cylinders = unit[drive].type->tracks; - loc.start = 0; - if (copy_to_user((void *)param, (void *)&loc, - sizeof(struct hd_geometry))) - return -EFAULT; - break; - } case FDFMTBEG: get_fdc(drive); if (fd_ref[drive] > 1) { @@ -1652,6 +1650,7 @@ static struct block_device_operations floppy_fops = { .open = floppy_open, .release = floppy_release, .ioctl = fd_ioctl, + .getgeo = fd_getgeo, .media_changed = amiga_floppy_change, }; diff --git a/drivers/block/aoe/aoeblk.c b/drivers/block/aoe/aoeblk.c index 0e97fcb9f3a1..c05ee8bffd97 100644 --- a/drivers/block/aoe/aoeblk.c +++ b/drivers/block/aoe/aoeblk.c @@ -169,38 +169,26 @@ aoeblk_make_request(request_queue_t *q, struct bio *bio) return 0; } -/* This ioctl implementation expects userland to have the device node - * permissions set so that only priviledged users can open an aoe - * block device directly. - */ static int -aoeblk_ioctl(struct inode *inode, struct file *filp, uint cmd, ulong arg) +aoeblk_getgeo(struct block_device *bdev, struct hd_geometry *geo) { - struct aoedev *d; - - if (!arg) - return -EINVAL; + struct aoedev *d = bdev->bd_disk->private_data; - d = inode->i_bdev->bd_disk->private_data; if ((d->flags & DEVFL_UP) == 0) { printk(KERN_ERR "aoe: aoeblk_ioctl: disk not up\n"); return -ENODEV; } - if (cmd == HDIO_GETGEO) { - d->geo.start = get_start_sect(inode->i_bdev); - if (!copy_to_user((void __user *) arg, &d->geo, sizeof d->geo)) - return 0; - return -EFAULT; - } - printk(KERN_INFO "aoe: aoeblk_ioctl: unknown ioctl %d\n", cmd); - return -EINVAL; + geo->cylinders = d->geo.cylinders; + geo->heads = d->geo.heads; + geo->sectors = d->geo.sectors; + return 0; } static struct block_device_operations aoe_bdops = { .open = aoeblk_open, .release = aoeblk_release, - .ioctl = aoeblk_ioctl, + .getgeo = aoeblk_getgeo, .owner = THIS_MODULE, }; diff --git a/drivers/block/cciss.c b/drivers/block/cciss.c index d2815b7a9150..bdb9c2717d40 100644 --- a/drivers/block/cciss.c +++ b/drivers/block/cciss.c @@ -153,6 +153,7 @@ static int cciss_open(struct inode *inode, struct file *filep); static int cciss_release(struct inode *inode, struct file *filep); static int cciss_ioctl(struct inode *inode, struct file *filep, unsigned int cmd, unsigned long arg); +static int cciss_getgeo(struct block_device *bdev, struct hd_geometry *geo); static int revalidate_allvol(ctlr_info_t *host); static int cciss_revalidate(struct gendisk *disk); @@ -194,6 +195,7 @@ static struct block_device_operations cciss_fops = { .open = cciss_open, .release = cciss_release, .ioctl = cciss_ioctl, + .getgeo = cciss_getgeo, #ifdef CONFIG_COMPAT .compat_ioctl = cciss_compat_ioctl, #endif @@ -633,6 +635,20 @@ static int cciss_ioctl32_big_passthru(struct file *file, unsigned cmd, unsigned return err; } #endif + +static int cciss_getgeo(struct block_device *bdev, struct hd_geometry *geo) +{ + drive_info_struct *drv = get_drv(bdev->bd_disk); + + if (!drv->cylinders) + return -ENXIO; + + geo->heads = drv->heads; + geo->sectors = drv->sectors; + geo->cylinders = drv->cylinders; + return 0; +} + /* * ioctl */ @@ -651,21 +667,6 @@ static int cciss_ioctl(struct inode *inode, struct file *filep, #endif /* CCISS_DEBUG */ switch(cmd) { - case HDIO_GETGEO: - { - struct hd_geometry driver_geo; - if (drv->cylinders) { - driver_geo.heads = drv->heads; - driver_geo.sectors = drv->sectors; - driver_geo.cylinders = drv->cylinders; - } else - return -ENXIO; - driver_geo.start= get_start_sect(inode->i_bdev); - if (copy_to_user(argp, &driver_geo, sizeof(struct hd_geometry))) - return -EFAULT; - return(0); - } - case CCISS_GETPCIINFO: { cciss_pci_info_struct pciinfo; diff --git a/drivers/block/cpqarray.c b/drivers/block/cpqarray.c index 9bddb6874873..9f0664dd3800 100644 --- a/drivers/block/cpqarray.c +++ b/drivers/block/cpqarray.c @@ -160,6 +160,7 @@ static int sendcmd( static int ida_open(struct inode *inode, struct file *filep); static int ida_release(struct inode *inode, struct file *filep); static int ida_ioctl(struct inode *inode, struct file *filep, unsigned int cmd, unsigned long arg); +static int ida_getgeo(struct block_device *bdev, struct hd_geometry *geo); static int ida_ctlr_ioctl(ctlr_info_t *h, int dsk, ida_ioctl_t *io); static void do_ida_request(request_queue_t *q); @@ -199,6 +200,7 @@ static struct block_device_operations ida_fops = { .open = ida_open, .release = ida_release, .ioctl = ida_ioctl, + .getgeo = ida_getgeo, .revalidate_disk= ida_revalidate, }; @@ -1124,6 +1126,23 @@ static void ida_timer(unsigned long tdata) h->misc_tflags = 0; } +static int ida_getgeo(struct block_device *bdev, struct hd_geometry *geo) +{ + drv_info_t *drv = get_drv(bdev->bd_disk); + + if (drv->cylinders) { + geo->heads = drv->heads; + geo->sectors = drv->sectors; + geo->cylinders = drv->cylinders; + } else { + geo->heads = 0xff; + geo->sectors = 0x3f; + geo->cylinders = drv->nr_blks / (0xff*0x3f); + } + + return 0; +} + /* * ida_ioctl does some miscellaneous stuff like reporting drive geometry, * setting readahead and submitting commands from userspace to the controller. @@ -1133,27 +1152,10 @@ static int ida_ioctl(struct inode *inode, struct file *filep, unsigned int cmd, drv_info_t *drv = get_drv(inode->i_bdev->bd_disk); ctlr_info_t *host = get_host(inode->i_bdev->bd_disk); int error; - int diskinfo[4]; - struct hd_geometry __user *geo = (struct hd_geometry __user *)arg; ida_ioctl_t __user *io = (ida_ioctl_t __user *)arg; ida_ioctl_t *my_io; switch(cmd) { - case HDIO_GETGEO: - if (drv->cylinders) { - diskinfo[0] = drv->heads; - diskinfo[1] = drv->sectors; - diskinfo[2] = drv->cylinders; - } else { - diskinfo[0] = 0xff; - diskinfo[1] = 0x3f; - diskinfo[2] = drv->nr_blks / (0xff*0x3f); - } - put_user(diskinfo[0], &geo->heads); - put_user(diskinfo[1], &geo->sectors); - put_user(diskinfo[2], &geo->cylinders); - put_user(get_start_sect(inode->i_bdev), &geo->start); - return 0; case IDAGETDRVINFO: if (copy_to_user(&io->c.drv, drv, sizeof(drv_info_t))) return -EFAULT; diff --git a/drivers/block/floppy.c b/drivers/block/floppy.c index a5b857c5c4b8..b86613b21cf1 100644 --- a/drivers/block/floppy.c +++ b/drivers/block/floppy.c @@ -3445,6 +3445,23 @@ static int get_floppy_geometry(int drive, int type, struct floppy_struct **g) return 0; } +static int fd_getgeo(struct block_device *bdev, struct hd_geometry *geo) +{ + int drive = (long)bdev->bd_disk->private_data; + int type = ITYPE(drive_state[drive].fd_device); + struct floppy_struct *g; + int ret; + + ret = get_floppy_geometry(drive, type, &g); + if (ret) + return ret; + + geo->heads = g->head; + geo->sectors = g->sect; + geo->cylinders = g->track; + return 0; +} + static int fd_ioctl(struct inode *inode, struct file *filp, unsigned int cmd, unsigned long param) { @@ -3474,23 +3491,6 @@ static int fd_ioctl(struct inode *inode, struct file *filp, unsigned int cmd, cmd = FDEJECT; } - /* generic block device ioctls */ - switch (cmd) { - /* the following have been inspired by the corresponding - * code for other block devices. */ - struct floppy_struct *g; - case HDIO_GETGEO: - { - struct hd_geometry loc; - ECALL(get_floppy_geometry(drive, type, &g)); - loc.heads = g->head; - loc.sectors = g->sect; - loc.cylinders = g->track; - loc.start = 0; - return _COPYOUT(loc); - } - } - /* convert the old style command into a new style command */ if ((cmd & 0xff00) == 0x0200) { ECALL(normalize_ioctl(&cmd, &size)); @@ -3938,6 +3938,7 @@ static struct block_device_operations floppy_fops = { .open = floppy_open, .release = floppy_release, .ioctl = fd_ioctl, + .getgeo = fd_getgeo, .media_changed = check_floppy_change, .revalidate_disk = floppy_revalidate, }; diff --git a/drivers/block/paride/pd.c b/drivers/block/paride/pd.c index fa49d62626ba..62d2464c12f2 100644 --- a/drivers/block/paride/pd.c +++ b/drivers/block/paride/pd.c @@ -747,32 +747,33 @@ static int pd_open(struct inode *inode, struct file *file) return 0; } +static int pd_getgeo(struct block_device *bdev, struct hd_geometry *geo) +{ + struct pd_unit *disk = bdev->bd_disk->private_data; + + if (disk->alt_geom) { + geo->heads = PD_LOG_HEADS; + geo->sectors = PD_LOG_SECTS; + geo->cylinders = disk->capacity / (geo->heads * geo->sectors); + } else { + geo->heads = disk->heads; + geo->sectors = disk->sectors; + geo->cylinders = disk->cylinders; + } + + return 0; +} + static int pd_ioctl(struct inode *inode, struct file *file, unsigned int cmd, unsigned long arg) { struct pd_unit *disk = inode->i_bdev->bd_disk->private_data; - struct hd_geometry __user *geo = (struct hd_geometry __user *) arg; - struct hd_geometry g; switch (cmd) { case CDROMEJECT: if (disk->access == 1) pd_special_command(disk, pd_eject); return 0; - case HDIO_GETGEO: - if (disk->alt_geom) { - g.heads = PD_LOG_HEADS; - g.sectors = PD_LOG_SECTS; - g.cylinders = disk->capacity / (g.heads * g.sectors); - } else { - g.heads = disk->heads; - g.sectors = disk->sectors; - g.cylinders = disk->cylinders; - } - g.start = get_start_sect(inode->i_bdev); - if (copy_to_user(geo, &g, sizeof(struct hd_geometry))) - return -EFAULT; - return 0; default: return -EINVAL; } @@ -815,6 +816,7 @@ static struct block_device_operations pd_fops = { .open = pd_open, .release = pd_release, .ioctl = pd_ioctl, + .getgeo = pd_getgeo, .media_changed = pd_check_media, .revalidate_disk= pd_revalidate }; diff --git a/drivers/block/paride/pf.c b/drivers/block/paride/pf.c index e9746af29b9f..852b564e903a 100644 --- a/drivers/block/paride/pf.c +++ b/drivers/block/paride/pf.c @@ -205,6 +205,7 @@ static int pf_open(struct inode *inode, struct file *file); static void do_pf_request(request_queue_t * q); static int pf_ioctl(struct inode *inode, struct file *file, unsigned int cmd, unsigned long arg); +static int pf_getgeo(struct block_device *bdev, struct hd_geometry *geo); static int pf_release(struct inode *inode, struct file *file); @@ -266,6 +267,7 @@ static struct block_device_operations pf_fops = { .open = pf_open, .release = pf_release, .ioctl = pf_ioctl, + .getgeo = pf_getgeo, .media_changed = pf_check_media, }; @@ -313,34 +315,34 @@ static int pf_open(struct inode *inode, struct file *file) return 0; } -static int pf_ioctl(struct inode *inode, struct file *file, unsigned int cmd, unsigned long arg) +static int pf_getgeo(struct block_device *bdev, struct hd_geometry *geo) { - struct pf_unit *pf = inode->i_bdev->bd_disk->private_data; - struct hd_geometry __user *geo = (struct hd_geometry __user *) arg; - struct hd_geometry g; - sector_t capacity; - - if (cmd == CDROMEJECT) { - if (pf->access == 1) { - pf_eject(pf); - return 0; - } - return -EBUSY; - } - if (cmd != HDIO_GETGEO) - return -EINVAL; - capacity = get_capacity(pf->disk); + struct pf_unit *pf = bdev->bd_disk->private_data; + sector_t capacity = get_capacity(pf->disk); + if (capacity < PF_FD_MAX) { - g.cylinders = sector_div(capacity, PF_FD_HDS * PF_FD_SPT); - g.heads = PF_FD_HDS; - g.sectors = PF_FD_SPT; + geo->cylinders = sector_div(capacity, PF_FD_HDS * PF_FD_SPT); + geo->heads = PF_FD_HDS; + geo->sectors = PF_FD_SPT; } else { - g.cylinders = sector_div(capacity, PF_HD_HDS * PF_HD_SPT); - g.heads = PF_HD_HDS; - g.sectors = PF_HD_SPT; + geo->cylinders = sector_div(capacity, PF_HD_HDS * PF_HD_SPT); + geo->heads = PF_HD_HDS; + geo->sectors = PF_HD_SPT; } - if (copy_to_user(geo, &g, sizeof(g))) - return -EFAULT; + + return 0; +} + +static int pf_ioctl(struct inode *inode, struct file *file, unsigned int cmd, unsigned long arg) +{ + struct pf_unit *pf = inode->i_bdev->bd_disk->private_data; + + if (cmd != CDROMEJECT) + return -EINVAL; + + if (pf->access != 1) + return -EBUSY; + pf_eject(pf); return 0; } diff --git a/drivers/block/ps2esdi.c b/drivers/block/ps2esdi.c index 29d1518be72a..43415f69839f 100644 --- a/drivers/block/ps2esdi.c +++ b/drivers/block/ps2esdi.c @@ -81,8 +81,7 @@ static void (*current_int_handler) (u_int) = NULL; static void ps2esdi_normal_interrupt_handler(u_int); static void ps2esdi_initial_reset_int_handler(u_int); static void ps2esdi_geometry_int_handler(u_int); -static int ps2esdi_ioctl(struct inode *inode, struct file *file, - u_int cmd, u_long arg); +static int ps2esdi_getgeo(struct block_device *bdev, struct hd_geometry *geo); static int ps2esdi_read_status_words(int num_words, int max_words, u_short * buffer); @@ -132,7 +131,7 @@ static struct ps2esdi_i_struct ps2esdi_info[MAX_HD] = static struct block_device_operations ps2esdi_fops = { .owner = THIS_MODULE, - .ioctl = ps2esdi_ioctl, + .getgeo = ps2esdi_getgeo, }; static struct gendisk *ps2esdi_gendisk[2]; @@ -1058,21 +1057,13 @@ static void dump_cmd_complete_status(u_int int_ret_code) } -static int ps2esdi_ioctl(struct inode *inode, - struct file *file, u_int cmd, u_long arg) +static int ps2esdi_getgeo(struct block_device *bdev, struct hd_geometry *geo) { - struct ps2esdi_i_struct *p = inode->i_bdev->bd_disk->private_data; - struct ps2esdi_geometry geom; - - if (cmd != HDIO_GETGEO) - return -EINVAL; - memset(&geom, 0, sizeof(geom)); - geom.heads = p->head; - geom.sectors = p->sect; - geom.cylinders = p->cyl; - geom.start = get_start_sect(inode->i_bdev); - if (copy_to_user((void __user *)arg, &geom, sizeof(geom))) - return -EFAULT; + struct ps2esdi_i_struct *p = bdev->bd_disk->private_data; + + geo->heads = p->head; + geo->sectors = p->sect; + geo->cylinders = p->cyl; return 0; } diff --git a/drivers/block/sx8.c b/drivers/block/sx8.c index 9251f4131b53..c0cdc182a8b0 100644 --- a/drivers/block/sx8.c +++ b/drivers/block/sx8.c @@ -407,8 +407,7 @@ struct carm_array_info { static int carm_init_one (struct pci_dev *pdev, const struct pci_device_id *ent); static void carm_remove_one (struct pci_dev *pdev); -static int carm_bdev_ioctl(struct inode *ino, struct file *fil, - unsigned int cmd, unsigned long arg); +static int carm_bdev_getgeo(struct block_device *bdev, struct hd_geometry *geo); static struct pci_device_id carm_pci_tbl[] = { { PCI_VENDOR_ID_PROMISE, 0x8000, PCI_ANY_ID, PCI_ANY_ID, 0, 0, }, @@ -426,7 +425,7 @@ static struct pci_driver carm_driver = { static struct block_device_operations carm_bd_ops = { .owner = THIS_MODULE, - .ioctl = carm_bdev_ioctl, + .getgeo = carm_bdev_getgeo, }; static unsigned int carm_host_id; @@ -434,32 +433,14 @@ static unsigned long carm_major_alloc; -static int carm_bdev_ioctl(struct inode *ino, struct file *fil, - unsigned int cmd, unsigned long arg) +static int carm_bdev_getgeo(struct block_device *bdev, struct hd_geometry *geo) { - void __user *usermem = (void __user *) arg; - struct carm_port *port = ino->i_bdev->bd_disk->private_data; - struct hd_geometry geom; + struct carm_port *port = bdev->bd_disk->private_data; - switch (cmd) { - case HDIO_GETGEO: - if (!usermem) - return -EINVAL; - - geom.heads = (u8) port->dev_geom_head; - geom.sectors = (u8) port->dev_geom_sect; - geom.cylinders = port->dev_geom_cyl; - geom.start = get_start_sect(ino->i_bdev); - - if (copy_to_user(usermem, &geom, sizeof(geom))) - return -EFAULT; - return 0; - - default: - break; - } - - return -EOPNOTSUPP; + geo->heads = (u8) port->dev_geom_head; + geo->sectors = (u8) port->dev_geom_sect; + geo->cylinders = port->dev_geom_cyl; + return 0; } static const u32 msg_sizes[] = { 32, 64, 128, CARM_MSG_SIZE }; diff --git a/drivers/block/umem.c b/drivers/block/umem.c index 0f48301342da..15299e7a1ade 100644 --- a/drivers/block/umem.c +++ b/drivers/block/umem.c @@ -809,34 +809,23 @@ static int mm_revalidate(struct gendisk *disk) set_capacity(disk, card->mm_size << 1); return 0; } -/* ------------------------------------------------------------------------------------ --- mm_ioctl ------------------------------------------------------------------------------------ -*/ -static int mm_ioctl(struct inode *i, struct file *f, unsigned int cmd, unsigned long arg) + +static int mm_getgeo(struct block_device *bdev, struct hd_geometry *geo) { - if (cmd == HDIO_GETGEO) { - struct cardinfo *card = i->i_bdev->bd_disk->private_data; - int size = card->mm_size * (1024 / MM_HARDSECT); - struct hd_geometry geo; - /* - * get geometry: we have to fake one... trim the size to a - * multiple of 2048 (1M): tell we have 32 sectors, 64 heads, - * whatever cylinders. - */ - geo.heads = 64; - geo.sectors = 32; - geo.start = get_start_sect(i->i_bdev); - geo.cylinders = size / (geo.heads * geo.sectors); - - if (copy_to_user((void __user *) arg, &geo, sizeof(geo))) - return -EFAULT; - return 0; - } + struct cardinfo *card = bdev->bd_disk->private_data; + int size = card->mm_size * (1024 / MM_HARDSECT); - return -EINVAL; + /* + * get geometry: we have to fake one... trim the size to a + * multiple of 2048 (1M): tell we have 32 sectors, 64 heads, + * whatever cylinders. + */ + geo->heads = 64; + geo->sectors = 32; + geo->cylinders = size / (geo->heads * geo->sectors); + return 0; } + /* ----------------------------------------------------------------------------------- -- mm_check_change @@ -855,7 +844,7 @@ static int mm_check_change(struct gendisk *disk) */ static struct block_device_operations mm_fops = { .owner = THIS_MODULE, - .ioctl = mm_ioctl, + .getgeo = mm_getgeo, .revalidate_disk= mm_revalidate, .media_changed = mm_check_change, }; diff --git a/drivers/block/viodasd.c b/drivers/block/viodasd.c index 063f0304a163..d1aaf31bd97e 100644 --- a/drivers/block/viodasd.c +++ b/drivers/block/viodasd.c @@ -247,43 +247,17 @@ static int viodasd_release(struct inode *ino, struct file *fil) /* External ioctl entry point. */ -static int viodasd_ioctl(struct inode *ino, struct file *fil, - unsigned int cmd, unsigned long arg) +static int viodasd_getgeo(struct block_device *bdev, struct hd_geometry *geo) { - unsigned char sectors; - unsigned char heads; - unsigned short cylinders; - struct hd_geometry *geo; - struct gendisk *gendisk; - struct viodasd_device *d; + struct gendisk *disk = bdev->bd_disk; + struct viodasd_device *d = disk->private_data; - switch (cmd) { - case HDIO_GETGEO: - geo = (struct hd_geometry *)arg; - if (geo == NULL) - return -EINVAL; - if (!access_ok(VERIFY_WRITE, geo, sizeof(*geo))) - return -EFAULT; - gendisk = ino->i_bdev->bd_disk; - d = gendisk->private_data; - sectors = d->sectors; - if (sectors == 0) - sectors = 32; - heads = d->tracks; - if (heads == 0) - heads = 64; - cylinders = d->cylinders; - if (cylinders == 0) - cylinders = get_capacity(gendisk) / (sectors * heads); - if (__put_user(sectors, &geo->sectors) || - __put_user(heads, &geo->heads) || - __put_user(cylinders, &geo->cylinders) || - __put_user(get_start_sect(ino->i_bdev), &geo->start)) - return -EFAULT; - return 0; - } + geo->sectors = d->sectors ? d->sectors : 0; + geo->heads = d->tracks ? d->tracks : 64; + geo->cylinders = d->cylinders ? d->cylinders : + get_capacity(disk) / (geo->cylinders * geo->heads); - return -EINVAL; + return 0; } /* @@ -293,7 +267,7 @@ static struct block_device_operations viodasd_fops = { .owner = THIS_MODULE, .open = viodasd_open, .release = viodasd_release, - .ioctl = viodasd_ioctl, + .getgeo = viodasd_getgeo, }; /* diff --git a/drivers/block/xd.c b/drivers/block/xd.c index 68b6d7b154cf..97f5dab24b5a 100644 --- a/drivers/block/xd.c +++ b/drivers/block/xd.c @@ -128,9 +128,12 @@ static DEFINE_SPINLOCK(xd_lock); static struct gendisk *xd_gendisk[2]; +static int xd_getgeo(struct block_device *bdev, struct hd_geometry *geo); + static struct block_device_operations xd_fops = { .owner = THIS_MODULE, .ioctl = xd_ioctl, + .getgeo = xd_getgeo, }; static DECLARE_WAIT_QUEUE_HEAD(xd_wait_int); static u_char xd_drives, xd_irq = 5, xd_dma = 3, xd_maxsectors; @@ -330,22 +333,20 @@ static void do_xd_request (request_queue_t * q) } } +static int xd_getgeo(struct block_device *bdev, struct hd_geometry *geo) +{ + XD_INFO *p = bdev->bd_disk->private_data; + + geo->heads = p->heads; + geo->sectors = p->sectors; + geo->cylinders = p->cylinders; + return 0; +} + /* xd_ioctl: handle device ioctl's */ static int xd_ioctl (struct inode *inode,struct file *file,u_int cmd,u_long arg) { - XD_INFO *p = inode->i_bdev->bd_disk->private_data; - switch (cmd) { - case HDIO_GETGEO: - { - struct hd_geometry g; - struct hd_geometry __user *geom= (void __user *)arg; - g.heads = p->heads; - g.sectors = p->sectors; - g.cylinders = p->cylinders; - g.start = get_start_sect(inode->i_bdev); - return copy_to_user(geom, &g, sizeof(g)) ? -EFAULT : 0; - } case HDIO_SET_DMA: if (!capable(CAP_SYS_ADMIN)) return -EACCES; if (xdc_busy) return -EBUSY; diff --git a/drivers/ide/ide-disk.c b/drivers/ide/ide-disk.c index 4b441720b6ba..cab362ea0336 100644 --- a/drivers/ide/ide-disk.c +++ b/drivers/ide/ide-disk.c @@ -1130,6 +1130,17 @@ static int idedisk_release(struct inode *inode, struct file *filp) return 0; } +static int idedisk_getgeo(struct block_device *bdev, struct hd_geometry *geo) +{ + struct ide_disk_obj *idkp = ide_disk_g(bdev->bd_disk); + ide_drive_t *drive = idkp->drive; + + geo->heads = drive->bios_head; + geo->sectors = drive->bios_sect; + geo->cylinders = (u16)drive->bios_cyl; /* truncate */ + return 0; +} + static int idedisk_ioctl(struct inode *inode, struct file *file, unsigned int cmd, unsigned long arg) { @@ -1164,6 +1175,7 @@ static struct block_device_operations idedisk_ops = { .open = idedisk_open, .release = idedisk_release, .ioctl = idedisk_ioctl, + .getgeo = idedisk_getgeo, .media_changed = idedisk_media_changed, .revalidate_disk= idedisk_revalidate_disk }; diff --git a/drivers/ide/ide-floppy.c b/drivers/ide/ide-floppy.c index fba3fffc2d66..5945f551aaaa 100644 --- a/drivers/ide/ide-floppy.c +++ b/drivers/ide/ide-floppy.c @@ -2031,6 +2031,17 @@ static int idefloppy_release(struct inode *inode, struct file *filp) return 0; } +static int idefloppy_getgeo(struct block_device *bdev, struct hd_geometry *geo) +{ + struct ide_floppy_obj *floppy = ide_floppy_g(bdev->bd_disk); + ide_drive_t *drive = floppy->drive; + + geo->heads = drive->bios_head; + geo->sectors = drive->bios_sect; + geo->cylinders = (u16)drive->bios_cyl; /* truncate */ + return 0; +} + static int idefloppy_ioctl(struct inode *inode, struct file *file, unsigned int cmd, unsigned long arg) { @@ -2120,6 +2131,7 @@ static struct block_device_operations idefloppy_ops = { .open = idefloppy_open, .release = idefloppy_release, .ioctl = idefloppy_ioctl, + .getgeo = idefloppy_getgeo, .media_changed = idefloppy_media_changed, .revalidate_disk= idefloppy_revalidate_disk }; diff --git a/drivers/ide/ide.c b/drivers/ide/ide.c index 4b524f6b3ecd..b069b13b75a7 100644 --- a/drivers/ide/ide.c +++ b/drivers/ide/ide.c @@ -1278,19 +1278,6 @@ int generic_ide_ioctl(ide_drive_t *drive, struct file *file, struct block_device up(&ide_setting_sem); switch (cmd) { - case HDIO_GETGEO: - { - struct hd_geometry geom; - if (!p || (drive->media != ide_disk && drive->media != ide_floppy)) return -EINVAL; - geom.heads = drive->bios_head; - geom.sectors = drive->bios_sect; - geom.cylinders = (u16)drive->bios_cyl; /* truncate */ - geom.start = get_start_sect(bdev); - if (copy_to_user(p, &geom, sizeof(struct hd_geometry))) - return -EFAULT; - return 0; - } - case HDIO_OBSOLETE_IDENTITY: case HDIO_GET_IDENTITY: if (bdev != bdev->bd_contains) diff --git a/drivers/ide/legacy/hd.c b/drivers/ide/legacy/hd.c index 242029c9c0ca..6439dec66881 100644 --- a/drivers/ide/legacy/hd.c +++ b/drivers/ide/legacy/hd.c @@ -658,22 +658,14 @@ static void do_hd_request (request_queue_t * q) enable_irq(HD_IRQ); } -static int hd_ioctl(struct inode * inode, struct file * file, - unsigned int cmd, unsigned long arg) +static int hd_getgeo(struct block_device *bdev, struct hd_geometry *geo) { - struct hd_i_struct *disk = inode->i_bdev->bd_disk->private_data; - struct hd_geometry __user *loc = (struct hd_geometry __user *) arg; - struct hd_geometry g; - - if (cmd != HDIO_GETGEO) - return -EINVAL; - if (!loc) - return -EINVAL; - g.heads = disk->head; - g.sectors = disk->sect; - g.cylinders = disk->cyl; - g.start = get_start_sect(inode->i_bdev); - return copy_to_user(loc, &g, sizeof g) ? -EFAULT : 0; + struct hd_i_struct *disk = bdev->bd_disk->private_data; + + geo->heads = disk->head; + geo->sectors = disk->sect; + geo->cylinders = disk->cyl; + return 0; } /* @@ -695,7 +687,7 @@ static irqreturn_t hd_interrupt(int irq, void *dev_id, struct pt_regs *regs) } static struct block_device_operations hd_fops = { - .ioctl = hd_ioctl, + .getgeo = hd_getgeo, }; /* diff --git a/drivers/md/md.c b/drivers/md/md.c index 1b76fb29fb70..e423a16ba3c9 100644 --- a/drivers/md/md.c +++ b/drivers/md/md.c @@ -3598,12 +3598,21 @@ static int set_disk_faulty(mddev_t *mddev, dev_t dev) return 0; } +static int md_getgeo(struct block_device *bdev, struct hd_geometry *geo) +{ + mddev_t *mddev = bdev->bd_disk->private_data; + + geo->heads = 2; + geo->sectors = 4; + geo->cylinders = get_capacity(mddev->gendisk) / 8; + return 0; +} + static int md_ioctl(struct inode *inode, struct file *file, unsigned int cmd, unsigned long arg) { int err = 0; void __user *argp = (void __user *)arg; - struct hd_geometry __user *loc = argp; mddev_t *mddev = NULL; if (!capable(CAP_SYS_ADMIN)) @@ -3765,24 +3774,6 @@ static int md_ioctl(struct inode *inode, struct file *file, * 4 sectors (with a BIG number of cylinders...). This drives * dosfs just mad... ;-) */ - case HDIO_GETGEO: - if (!loc) { - err = -EINVAL; - goto abort_unlock; - } - err = put_user (2, (char __user *) &loc->heads); - if (err) - goto abort_unlock; - err = put_user (4, (char __user *) &loc->sectors); - if (err) - goto abort_unlock; - err = put_user(get_capacity(mddev->gendisk)/8, - (short __user *) &loc->cylinders); - if (err) - goto abort_unlock; - err = put_user (get_start_sect(inode->i_bdev), - (long __user *) &loc->start); - goto done_unlock; } /* @@ -3911,6 +3902,7 @@ static struct block_device_operations md_fops = .open = md_open, .release = md_release, .ioctl = md_ioctl, + .getgeo = md_getgeo, .media_changed = md_media_changed, .revalidate_disk= md_revalidate, }; diff --git a/drivers/message/i2o/i2o_block.c b/drivers/message/i2o/i2o_block.c index 5b1febed3133..b09fb6307153 100644 --- a/drivers/message/i2o/i2o_block.c +++ b/drivers/message/i2o/i2o_block.c @@ -662,6 +662,13 @@ static int i2o_block_release(struct inode *inode, struct file *file) return 0; } +static int i2o_block_getgeo(struct block_device *bdev, struct hd_geometry *geo) +{ + i2o_block_biosparam(get_capacity(bdev->bd_disk), + &geo->cylinders, &geo->heads, &geo->sectors); + return 0; +} + /** * i2o_block_ioctl - Issue device specific ioctl calls. * @cmd: ioctl command @@ -676,7 +683,6 @@ static int i2o_block_ioctl(struct inode *inode, struct file *file, { struct gendisk *disk = inode->i_bdev->bd_disk; struct i2o_block_device *dev = disk->private_data; - void __user *argp = (void __user *)arg; /* Anyone capable of this syscall can do *real bad* things */ @@ -684,15 +690,6 @@ static int i2o_block_ioctl(struct inode *inode, struct file *file, return -EPERM; switch (cmd) { - case HDIO_GETGEO: - { - struct hd_geometry g; - i2o_block_biosparam(get_capacity(disk), - &g.cylinders, &g.heads, &g.sectors); - g.start = get_start_sect(inode->i_bdev); - return copy_to_user(argp, &g, sizeof(g)) ? -EFAULT : 0; - } - case BLKI2OGRSTRAT: return put_user(dev->rcache, (int __user *)arg); case BLKI2OGWSTRAT: @@ -962,6 +959,7 @@ static struct block_device_operations i2o_block_fops = { .open = i2o_block_open, .release = i2o_block_release, .ioctl = i2o_block_ioctl, + .getgeo = i2o_block_getgeo, .media_changed = i2o_block_media_changed }; diff --git a/drivers/mmc/mmc_block.c b/drivers/mmc/mmc_block.c index 198561d21710..d5f28981596b 100644 --- a/drivers/mmc/mmc_block.c +++ b/drivers/mmc/mmc_block.c @@ -113,31 +113,18 @@ static int mmc_blk_release(struct inode *inode, struct file *filp) } static int -mmc_blk_ioctl(struct inode *inode, struct file *filp, unsigned int cmd, unsigned long arg) +mmc_blk_getgeo(struct block_device *bdev, struct hd_geometry *geo) { - struct block_device *bdev = inode->i_bdev; - - if (cmd == HDIO_GETGEO) { - struct hd_geometry geo; - - memset(&geo, 0, sizeof(struct hd_geometry)); - - geo.cylinders = get_capacity(bdev->bd_disk) / (4 * 16); - geo.heads = 4; - geo.sectors = 16; - geo.start = get_start_sect(bdev); - - return copy_to_user((void __user *)arg, &geo, sizeof(geo)) - ? -EFAULT : 0; - } - - return -ENOTTY; + geo->cylinders = get_capacity(bdev->bd_disk) / (4 * 16); + geo->heads = 4; + geo->sectors = 16; + return 0; } static struct block_device_operations mmc_bdops = { .open = mmc_blk_open, .release = mmc_blk_release, - .ioctl = mmc_blk_ioctl, + .getgeo = mmc_blk_getgeo, .owner = THIS_MODULE, }; diff --git a/drivers/mtd/mtd_blkdevs.c b/drivers/mtd/mtd_blkdevs.c index 339cb1218eaa..7f3ff500b68e 100644 --- a/drivers/mtd/mtd_blkdevs.c +++ b/drivers/mtd/mtd_blkdevs.c @@ -194,6 +194,14 @@ static int blktrans_release(struct inode *i, struct file *f) return ret; } +static int blktrans_getgeo(struct block_device *bdev, struct hd_geometry *geo) +{ + struct mtd_blktrans_dev *dev = bdev->bd_disk->private_data; + + if (dev->tr->getgeo) + return dev->tr->getgeo(dev, geo); + return -ENOTTY; +} static int blktrans_ioctl(struct inode *inode, struct file *file, unsigned int cmd, unsigned long arg) @@ -207,22 +215,6 @@ static int blktrans_ioctl(struct inode *inode, struct file *file, return tr->flush(dev); /* The core code did the work, we had nothing to do. */ return 0; - - case HDIO_GETGEO: - if (tr->getgeo) { - struct hd_geometry g; - int ret; - - memset(&g, 0, sizeof(g)); - ret = tr->getgeo(dev, &g); - if (ret) - return ret; - - g.start = get_start_sect(inode->i_bdev); - if (copy_to_user((void __user *)arg, &g, sizeof(g))) - return -EFAULT; - return 0; - } /* else */ default: return -ENOTTY; } @@ -233,6 +225,7 @@ struct block_device_operations mtd_blktrans_ops = { .open = blktrans_open, .release = blktrans_release, .ioctl = blktrans_ioctl, + .getgeo = blktrans_getgeo, }; int add_mtd_blktrans_dev(struct mtd_blktrans_dev *new) diff --git a/drivers/s390/block/dasd.c b/drivers/s390/block/dasd.c index f779f674dfa0..2472fa1a1be1 100644 --- a/drivers/s390/block/dasd.c +++ b/drivers/s390/block/dasd.c @@ -18,6 +18,7 @@ #include #include #include +#include #include #include @@ -1723,12 +1724,34 @@ dasd_release(struct inode *inp, struct file *filp) return 0; } +/* + * Return disk geometry. + */ +static int +dasd_getgeo(struct block_device *bdev, struct hd_geometry *geo) +{ + struct dasd_device *device; + + device = bdev->bd_disk->private_data; + if (!device) + return -ENODEV; + + if (!device->discipline || + !device->discipline->fill_geometry) + return -EINVAL; + + device->discipline->fill_geometry(device, geo); + geo->start = get_start_sect(bdev) >> device->s2b_shift; + return 0; +} + struct block_device_operations dasd_device_operations = { .owner = THIS_MODULE, .open = dasd_open, .release = dasd_release, .ioctl = dasd_ioctl, + .getgeo = dasd_getgeo, }; diff --git a/drivers/s390/block/dasd_ioctl.c b/drivers/s390/block/dasd_ioctl.c index 044b75371990..8e4dcd58599e 100644 --- a/drivers/s390/block/dasd_ioctl.c +++ b/drivers/s390/block/dasd_ioctl.c @@ -485,33 +485,6 @@ dasd_ioctl_set_ro(struct block_device *bdev, int no, long args) return rc; } -/* - * Return disk geometry. - */ -static int -dasd_ioctl_getgeo(struct block_device *bdev, int no, long args) -{ - struct hd_geometry geo = { 0, }; - struct dasd_device *device; - - device = bdev->bd_disk->private_data; - if (device == NULL) - return -ENODEV; - - if (device == NULL || device->discipline == NULL || - device->discipline->fill_geometry == NULL) - return -EINVAL; - - geo = (struct hd_geometry) {}; - device->discipline->fill_geometry(device, &geo); - geo.start = get_start_sect(bdev) >> device->s2b_shift; - if (copy_to_user((struct hd_geometry __user *) args, &geo, - sizeof (struct hd_geometry))) - return -EFAULT; - - return 0; -} - /* * List of static ioctls. */ @@ -528,7 +501,6 @@ static struct { int no; dasd_ioctl_fn_t fn; } dasd_ioctls[] = { BIODASDPRRST, dasd_ioctl_reset_profile }, { BLKROSET, dasd_ioctl_set_ro }, { DASDAPIVER, dasd_ioctl_api_version }, - { HDIO_GETGEO, dasd_ioctl_getgeo }, { -1, NULL } }; diff --git a/drivers/s390/block/xpram.c b/drivers/s390/block/xpram.c index bf3a67c3cc5e..54ecd548c318 100644 --- a/drivers/s390/block/xpram.c +++ b/drivers/s390/block/xpram.c @@ -328,31 +328,27 @@ fail: return 0; } -static int xpram_ioctl (struct inode *inode, struct file *filp, - unsigned int cmd, unsigned long arg) +static int xpram_getgeo(struct block_device *bdev, struct hd_geometry *geo) { - struct hd_geometry __user *geo; unsigned long size; - if (cmd != HDIO_GETGEO) - return -EINVAL; + /* * get geometry: we have to fake one... trim the size to a * multiple of 64 (32k): tell we have 16 sectors, 4 heads, * whatever cylinders. Tell also that data starts at sector. 4. */ - geo = (struct hd_geometry __user *) arg; size = (xpram_pages * 8) & ~0x3f; - put_user(size >> 6, &geo->cylinders); - put_user(4, &geo->heads); - put_user(16, &geo->sectors); - put_user(4, &geo->start); + geo->cylinders = size >> 6; + geo->heads = 4; + geo->sectors = 16; + geo->start = 4; return 0; } static struct block_device_operations xpram_devops = { .owner = THIS_MODULE, - .ioctl = xpram_ioctl, + .getgeo = xpram_getgeo, }; /* diff --git a/drivers/scsi/sd.c b/drivers/scsi/sd.c index 32d4d8d7b9f3..4c5127ed379c 100644 --- a/drivers/scsi/sd.c +++ b/drivers/scsi/sd.c @@ -527,7 +527,7 @@ static int sd_release(struct inode *inode, struct file *filp) return 0; } -static int sd_hdio_getgeo(struct block_device *bdev, struct hd_geometry __user *loc) +static int sd_getgeo(struct block_device *bdev, struct hd_geometry *geo) { struct scsi_disk *sdkp = scsi_disk(bdev->bd_disk); struct scsi_device *sdp = sdkp->device; @@ -545,15 +545,9 @@ static int sd_hdio_getgeo(struct block_device *bdev, struct hd_geometry __user * else scsicam_bios_param(bdev, sdkp->capacity, diskinfo); - if (put_user(diskinfo[0], &loc->heads)) - return -EFAULT; - if (put_user(diskinfo[1], &loc->sectors)) - return -EFAULT; - if (put_user(diskinfo[2], &loc->cylinders)) - return -EFAULT; - if (put_user((unsigned)get_start_sect(bdev), - (unsigned long __user *)&loc->start)) - return -EFAULT; + geo->heads = diskinfo[0]; + geo->sectors = diskinfo[1]; + geo->cylinders = diskinfo[2]; return 0; } @@ -593,12 +587,6 @@ static int sd_ioctl(struct inode * inode, struct file * filp, if (!scsi_block_when_processing_errors(sdp) || !error) return error; - if (cmd == HDIO_GETGEO) { - if (!arg) - return -EINVAL; - return sd_hdio_getgeo(bdev, p); - } - /* * Send SCSI addressing ioctls directly to mid level, send other * ioctls to block level and then onto mid level if they can't be @@ -800,6 +788,7 @@ static struct block_device_operations sd_fops = { .open = sd_open, .release = sd_release, .ioctl = sd_ioctl, + .getgeo = sd_getgeo, #ifdef CONFIG_COMPAT .compat_ioctl = sd_compat_ioctl, #endif diff --git a/include/linux/fs.h b/include/linux/fs.h index 74c01aabd4ab..7f140480c6a8 100644 --- a/include/linux/fs.h +++ b/include/linux/fs.h @@ -224,6 +224,7 @@ extern int dir_notify_enable; #include #include +struct hd_geometry; struct iovec; struct nameidata; struct kiocb; @@ -962,6 +963,7 @@ struct block_device_operations { int (*direct_access) (struct block_device *, sector_t, unsigned long *); int (*media_changed) (struct gendisk *); int (*revalidate_disk) (struct gendisk *); + int (*getgeo)(struct block_device *, struct hd_geometry *); struct module *owner; }; -- cgit v1.2.3-71-gd317 From bf066c7db775a04bd761f8ea206f5522d0cf40ff Mon Sep 17 00:00:00 2001 From: Miklos Szeredi Date: Sun, 8 Jan 2006 01:03:19 -0800 Subject: [PATCH] shared mounts: cleanup Small cleanups in shared mounts code. Signed-off-by: Miklos Szeredi Cc: Ram Pai Cc: Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- fs/namespace.c | 2 +- fs/pnode.c | 2 +- include/linux/fs.h | 2 +- include/linux/mount.h | 3 ++- 4 files changed, 5 insertions(+), 4 deletions(-) (limited to 'include/linux') diff --git a/fs/namespace.c b/fs/namespace.c index e5aa1eeb5748..3e8fb61ad597 100644 --- a/fs/namespace.c +++ b/fs/namespace.c @@ -451,7 +451,7 @@ EXPORT_SYMBOL(may_umount); void release_mounts(struct list_head *head) { struct vfsmount *mnt; - while(!list_empty(head)) { + while (!list_empty(head)) { mnt = list_entry(head->next, struct vfsmount, mnt_hash); list_del_init(&mnt->mnt_hash); if (mnt->mnt_parent != mnt) { diff --git a/fs/pnode.c b/fs/pnode.c index aeeec8ba8dd2..f1871f773f64 100644 --- a/fs/pnode.c +++ b/fs/pnode.c @@ -103,7 +103,7 @@ static struct vfsmount *propagation_next(struct vfsmount *m, struct vfsmount *next; struct vfsmount *master = m->mnt_master; - if ( master == origin->mnt_master ) { + if (master == origin->mnt_master) { next = next_peer(m); return ((next == origin) ? NULL : next); } else if (m->mnt_slave.next != &master->mnt_slave_list) diff --git a/include/linux/fs.h b/include/linux/fs.h index 7f140480c6a8..a1e28f0895c0 100644 --- a/include/linux/fs.h +++ b/include/linux/fs.h @@ -103,11 +103,11 @@ extern int dir_notify_enable; #define MS_MOVE 8192 #define MS_REC 16384 #define MS_VERBOSE 32768 +#define MS_POSIXACL (1<<16) /* VFS does not apply the umask */ #define MS_UNBINDABLE (1<<17) /* change to unbindable */ #define MS_PRIVATE (1<<18) /* change to private */ #define MS_SLAVE (1<<19) /* change to slave */ #define MS_SHARED (1<<20) /* change to shared */ -#define MS_POSIXACL (1<<16) /* VFS does not apply the umask */ #define MS_ACTIVE (1<<30) #define MS_NOUSER (1<<31) diff --git a/include/linux/mount.h b/include/linux/mount.h index dd4e83eba933..b98a709f1794 100644 --- a/include/linux/mount.h +++ b/include/linux/mount.h @@ -22,7 +22,8 @@ #define MNT_NOEXEC 0x04 #define MNT_SHARED 0x10 /* if the vfsmount is a shared mount */ #define MNT_UNBINDABLE 0x20 /* if the vfsmount is a unbindable mount */ -#define MNT_PNODE_MASK 0x30 /* propogation flag mask */ + +#define MNT_PNODE_MASK (MNT_SHARED | MNT_UNBINDABLE) struct vfsmount { struct list_head mnt_hash; -- cgit v1.2.3-71-gd317 From 5160ee6fc891a9ca114be0e90fa6655647bb64b2 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Sun, 8 Jan 2006 01:03:32 -0800 Subject: [PATCH] shrink dentry struct Some long time ago, dentry struct was carefully tuned so that on 32 bits UP, sizeof(struct dentry) was exactly 128, ie a power of 2, and a multiple of memory cache lines. Then RCU was added and dentry struct enlarged by two pointers, with nice results for SMP, but not so good on UP, because breaking the above tuning (128 + 8 = 136 bytes) This patch reverts this unwanted side effect, by using an union (d_u), where d_rcu and d_child are placed so that these two fields can share their memory needs. At the time d_free() is called (and d_rcu is really used), d_child is known to be empty and not touched by the dentry freeing. Lockless lookups only access d_name, d_parent, d_lock, d_op, d_flags (so the previous content of d_child is not needed if said dentry was unhashed but still accessed by a CPU because of RCU constraints) As dentry cache easily contains millions of entries, a size reduction is worth the extra complexity of the ugly C union. Signed-off-by: Eric Dumazet Cc: Dipankar Sarma Cc: Maneesh Soni Cc: Miklos Szeredi Cc: "Paul E. McKenney" Cc: Ian Kent Cc: Paul Jackson Cc: Al Viro Cc: Christoph Hellwig Cc: Trond Myklebust Cc: Neil Brown Cc: James Morris Cc: Stephen Smalley Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- drivers/usb/core/inode.c | 6 +++--- fs/autofs4/autofs_i.h | 2 +- fs/autofs4/expire.c | 12 ++++++------ fs/autofs4/inode.c | 4 ++-- fs/autofs4/root.c | 3 ++- fs/coda/cache.c | 2 +- fs/dcache.c | 34 +++++++++++++++++----------------- fs/libfs.c | 12 ++++++------ fs/ncpfs/dir.c | 2 +- fs/ncpfs/ncplib_kernel.h | 4 ++-- fs/smbfs/cache.c | 4 ++-- include/linux/dcache.h | 9 +++++++-- kernel/cpuset.c | 4 ++-- net/sunrpc/rpc_pipe.c | 2 +- security/selinux/selinuxfs.c | 2 +- 15 files changed, 54 insertions(+), 48 deletions(-) (limited to 'include/linux') diff --git a/drivers/usb/core/inode.c b/drivers/usb/core/inode.c index c44bbedec817..4ddc453023a2 100644 --- a/drivers/usb/core/inode.c +++ b/drivers/usb/core/inode.c @@ -186,7 +186,7 @@ static void update_bus(struct dentry *bus) down(&bus->d_inode->i_sem); - list_for_each_entry(dev, &bus->d_subdirs, d_child) + list_for_each_entry(dev, &bus->d_subdirs, d_u.d_child) if (dev->d_inode) update_dev(dev); @@ -203,7 +203,7 @@ static void update_sb(struct super_block *sb) down(&root->d_inode->i_sem); - list_for_each_entry(bus, &root->d_subdirs, d_child) { + list_for_each_entry(bus, &root->d_subdirs, d_u.d_child) { if (bus->d_inode) { switch (S_IFMT & bus->d_inode->i_mode) { case S_IFDIR: @@ -319,7 +319,7 @@ static int usbfs_empty (struct dentry *dentry) spin_lock(&dcache_lock); list_for_each(list, &dentry->d_subdirs) { - struct dentry *de = list_entry(list, struct dentry, d_child); + struct dentry *de = list_entry(list, struct dentry, d_u.d_child); if (usbfs_positive(de)) { spin_unlock(&dcache_lock); return 0; diff --git a/fs/autofs4/autofs_i.h b/fs/autofs4/autofs_i.h index fca83e28edcf..385bed09b0d8 100644 --- a/fs/autofs4/autofs_i.h +++ b/fs/autofs4/autofs_i.h @@ -209,7 +209,7 @@ static inline int simple_empty_nolock(struct dentry *dentry) struct dentry *child; int ret = 0; - list_for_each_entry(child, &dentry->d_subdirs, d_child) + list_for_each_entry(child, &dentry->d_subdirs, d_u.d_child) if (simple_positive(child)) goto out; ret = 1; diff --git a/fs/autofs4/expire.c b/fs/autofs4/expire.c index feb6ac427d05..dc39589df165 100644 --- a/fs/autofs4/expire.c +++ b/fs/autofs4/expire.c @@ -105,7 +105,7 @@ repeat: next = this_parent->d_subdirs.next; resume: while (next != &this_parent->d_subdirs) { - struct dentry *dentry = list_entry(next, struct dentry, d_child); + struct dentry *dentry = list_entry(next, struct dentry, d_u.d_child); /* Negative dentry - give up */ if (!simple_positive(dentry)) { @@ -138,7 +138,7 @@ resume: } if (this_parent != top) { - next = this_parent->d_child.next; + next = this_parent->d_u.d_child.next; this_parent = this_parent->d_parent; goto resume; } @@ -163,7 +163,7 @@ repeat: next = this_parent->d_subdirs.next; resume: while (next != &this_parent->d_subdirs) { - struct dentry *dentry = list_entry(next, struct dentry, d_child); + struct dentry *dentry = list_entry(next, struct dentry, d_u.d_child); /* Negative dentry - give up */ if (!simple_positive(dentry)) { @@ -199,7 +199,7 @@ cont: } if (this_parent != parent) { - next = this_parent->d_child.next; + next = this_parent->d_u.d_child.next; this_parent = this_parent->d_parent; goto resume; } @@ -238,7 +238,7 @@ static struct dentry *autofs4_expire(struct super_block *sb, /* On exit from the loop expire is set to a dgot dentry * to expire or it's NULL */ while ( next != &root->d_subdirs ) { - struct dentry *dentry = list_entry(next, struct dentry, d_child); + struct dentry *dentry = list_entry(next, struct dentry, d_u.d_child); /* Negative dentry - give up */ if ( !simple_positive(dentry) ) { @@ -302,7 +302,7 @@ next: expired, (int)expired->d_name.len, expired->d_name.name); spin_lock(&dcache_lock); list_del(&expired->d_parent->d_subdirs); - list_add(&expired->d_parent->d_subdirs, &expired->d_child); + list_add(&expired->d_parent->d_subdirs, &expired->d_u.d_child); spin_unlock(&dcache_lock); return expired; } diff --git a/fs/autofs4/inode.c b/fs/autofs4/inode.c index 818b37be5153..2d3082854a29 100644 --- a/fs/autofs4/inode.c +++ b/fs/autofs4/inode.c @@ -91,7 +91,7 @@ repeat: next = this_parent->d_subdirs.next; resume: while (next != &this_parent->d_subdirs) { - struct dentry *dentry = list_entry(next, struct dentry, d_child); + struct dentry *dentry = list_entry(next, struct dentry, d_u.d_child); /* Negative dentry - don`t care */ if (!simple_positive(dentry)) { @@ -117,7 +117,7 @@ resume: if (this_parent != sbi->root) { struct dentry *dentry = this_parent; - next = this_parent->d_child.next; + next = this_parent->d_u.d_child.next; this_parent = this_parent->d_parent; spin_unlock(&dcache_lock); DPRINTK("parent dentry %p %.*s", diff --git a/fs/autofs4/root.c b/fs/autofs4/root.c index 2a771ec66956..2241405ffc41 100644 --- a/fs/autofs4/root.c +++ b/fs/autofs4/root.c @@ -143,7 +143,8 @@ static int autofs4_dcache_readdir(struct file * filp, void * dirent, filldir_t f } while(1) { - struct dentry *de = list_entry(list, struct dentry, d_child); + struct dentry *de = list_entry(list, + struct dentry, d_u.d_child); if (!d_unhashed(de) && de->d_inode) { spin_unlock(&dcache_lock); diff --git a/fs/coda/cache.c b/fs/coda/cache.c index 80072fd9b7fa..c607d923350a 100644 --- a/fs/coda/cache.c +++ b/fs/coda/cache.c @@ -93,7 +93,7 @@ static void coda_flag_children(struct dentry *parent, int flag) spin_lock(&dcache_lock); list_for_each(child, &parent->d_subdirs) { - de = list_entry(child, struct dentry, d_child); + de = list_entry(child, struct dentry, d_u.d_child); /* don't know what to do with negative dentries */ if ( ! de->d_inode ) continue; diff --git a/fs/dcache.c b/fs/dcache.c index 17e439138681..1536f15c4d4c 100644 --- a/fs/dcache.c +++ b/fs/dcache.c @@ -71,7 +71,7 @@ struct dentry_stat_t dentry_stat = { static void d_callback(struct rcu_head *head) { - struct dentry * dentry = container_of(head, struct dentry, d_rcu); + struct dentry * dentry = container_of(head, struct dentry, d_u.d_rcu); if (dname_external(dentry)) kfree(dentry->d_name.name); @@ -86,7 +86,7 @@ static void d_free(struct dentry *dentry) { if (dentry->d_op && dentry->d_op->d_release) dentry->d_op->d_release(dentry); - call_rcu(&dentry->d_rcu, d_callback); + call_rcu(&dentry->d_u.d_rcu, d_callback); } /* @@ -193,7 +193,7 @@ kill_it: { list_del(&dentry->d_lru); dentry_stat.nr_unused--; } - list_del(&dentry->d_child); + list_del(&dentry->d_u.d_child); dentry_stat.nr_dentry--; /* For d_free, below */ /*drops the locks, at that point nobody can reach this dentry */ dentry_iput(dentry); @@ -367,7 +367,7 @@ static inline void prune_one_dentry(struct dentry * dentry) struct dentry * parent; __d_drop(dentry); - list_del(&dentry->d_child); + list_del(&dentry->d_u.d_child); dentry_stat.nr_dentry--; /* For d_free, below */ dentry_iput(dentry); parent = dentry->d_parent; @@ -518,7 +518,7 @@ repeat: resume: while (next != &this_parent->d_subdirs) { struct list_head *tmp = next; - struct dentry *dentry = list_entry(tmp, struct dentry, d_child); + struct dentry *dentry = list_entry(tmp, struct dentry, d_u.d_child); next = tmp->next; /* Have we found a mount point ? */ if (d_mountpoint(dentry)) @@ -532,7 +532,7 @@ resume: * All done at this level ... ascend and resume the search. */ if (this_parent != parent) { - next = this_parent->d_child.next; + next = this_parent->d_u.d_child.next; this_parent = this_parent->d_parent; goto resume; } @@ -569,7 +569,7 @@ repeat: resume: while (next != &this_parent->d_subdirs) { struct list_head *tmp = next; - struct dentry *dentry = list_entry(tmp, struct dentry, d_child); + struct dentry *dentry = list_entry(tmp, struct dentry, d_u.d_child); next = tmp->next; if (!list_empty(&dentry->d_lru)) { @@ -610,7 +610,7 @@ dentry->d_parent->d_name.name, dentry->d_name.name, found); * All done at this level ... ascend and resume the search. */ if (this_parent != parent) { - next = this_parent->d_child.next; + next = this_parent->d_u.d_child.next; this_parent = this_parent->d_parent; #ifdef DCACHE_DEBUG printk(KERN_DEBUG "select_parent: ascending to %s/%s, found=%d\n", @@ -753,12 +753,12 @@ struct dentry *d_alloc(struct dentry * parent, const struct qstr *name) dentry->d_parent = dget(parent); dentry->d_sb = parent->d_sb; } else { - INIT_LIST_HEAD(&dentry->d_child); + INIT_LIST_HEAD(&dentry->d_u.d_child); } spin_lock(&dcache_lock); if (parent) - list_add(&dentry->d_child, &parent->d_subdirs); + list_add(&dentry->d_u.d_child, &parent->d_subdirs); dentry_stat.nr_dentry++; spin_unlock(&dcache_lock); @@ -1310,8 +1310,8 @@ already_unhashed: /* Unhash the target: dput() will then get rid of it */ __d_drop(target); - list_del(&dentry->d_child); - list_del(&target->d_child); + list_del(&dentry->d_u.d_child); + list_del(&target->d_u.d_child); /* Switch the names.. */ switch_names(dentry, target); @@ -1322,15 +1322,15 @@ already_unhashed: if (IS_ROOT(dentry)) { dentry->d_parent = target->d_parent; target->d_parent = target; - INIT_LIST_HEAD(&target->d_child); + INIT_LIST_HEAD(&target->d_u.d_child); } else { do_switch(dentry->d_parent, target->d_parent); /* And add them back to the (new) parent lists */ - list_add(&target->d_child, &target->d_parent->d_subdirs); + list_add(&target->d_u.d_child, &target->d_parent->d_subdirs); } - list_add(&dentry->d_child, &dentry->d_parent->d_subdirs); + list_add(&dentry->d_u.d_child, &dentry->d_parent->d_subdirs); spin_unlock(&target->d_lock); spin_unlock(&dentry->d_lock); write_sequnlock(&rename_lock); @@ -1568,7 +1568,7 @@ repeat: resume: while (next != &this_parent->d_subdirs) { struct list_head *tmp = next; - struct dentry *dentry = list_entry(tmp, struct dentry, d_child); + struct dentry *dentry = list_entry(tmp, struct dentry, d_u.d_child); next = tmp->next; if (d_unhashed(dentry)||!dentry->d_inode) continue; @@ -1579,7 +1579,7 @@ resume: atomic_dec(&dentry->d_count); } if (this_parent != root) { - next = this_parent->d_child.next; + next = this_parent->d_u.d_child.next; atomic_dec(&this_parent->d_count); this_parent = this_parent->d_parent; goto resume; diff --git a/fs/libfs.c b/fs/libfs.c index 58101dff2c66..9c50523382e7 100644 --- a/fs/libfs.c +++ b/fs/libfs.c @@ -93,16 +93,16 @@ loff_t dcache_dir_lseek(struct file *file, loff_t offset, int origin) loff_t n = file->f_pos - 2; spin_lock(&dcache_lock); - list_del(&cursor->d_child); + list_del(&cursor->d_u.d_child); p = file->f_dentry->d_subdirs.next; while (n && p != &file->f_dentry->d_subdirs) { struct dentry *next; - next = list_entry(p, struct dentry, d_child); + next = list_entry(p, struct dentry, d_u.d_child); if (!d_unhashed(next) && next->d_inode) n--; p = p->next; } - list_add_tail(&cursor->d_child, p); + list_add_tail(&cursor->d_u.d_child, p); spin_unlock(&dcache_lock); } } @@ -126,7 +126,7 @@ int dcache_readdir(struct file * filp, void * dirent, filldir_t filldir) { struct dentry *dentry = filp->f_dentry; struct dentry *cursor = filp->private_data; - struct list_head *p, *q = &cursor->d_child; + struct list_head *p, *q = &cursor->d_u.d_child; ino_t ino; int i = filp->f_pos; @@ -153,7 +153,7 @@ int dcache_readdir(struct file * filp, void * dirent, filldir_t filldir) } for (p=q->next; p != &dentry->d_subdirs; p=p->next) { struct dentry *next; - next = list_entry(p, struct dentry, d_child); + next = list_entry(p, struct dentry, d_u.d_child); if (d_unhashed(next) || !next->d_inode) continue; @@ -261,7 +261,7 @@ int simple_empty(struct dentry *dentry) int ret = 0; spin_lock(&dcache_lock); - list_for_each_entry(child, &dentry->d_subdirs, d_child) + list_for_each_entry(child, &dentry->d_subdirs, d_u.d_child) if (simple_positive(child)) goto out; ret = 1; diff --git a/fs/ncpfs/dir.c b/fs/ncpfs/dir.c index a9f7a8ab1d59..cfd76f431dc0 100644 --- a/fs/ncpfs/dir.c +++ b/fs/ncpfs/dir.c @@ -365,7 +365,7 @@ ncp_dget_fpos(struct dentry *dentry, struct dentry *parent, unsigned long fpos) spin_lock(&dcache_lock); next = parent->d_subdirs.next; while (next != &parent->d_subdirs) { - dent = list_entry(next, struct dentry, d_child); + dent = list_entry(next, struct dentry, d_u.d_child); if ((unsigned long)dent->d_fsdata == fpos) { if (dent->d_inode) dget_locked(dent); diff --git a/fs/ncpfs/ncplib_kernel.h b/fs/ncpfs/ncplib_kernel.h index 9e4dc30c2435..799e5c2bec55 100644 --- a/fs/ncpfs/ncplib_kernel.h +++ b/fs/ncpfs/ncplib_kernel.h @@ -196,7 +196,7 @@ ncp_renew_dentries(struct dentry *parent) spin_lock(&dcache_lock); next = parent->d_subdirs.next; while (next != &parent->d_subdirs) { - dentry = list_entry(next, struct dentry, d_child); + dentry = list_entry(next, struct dentry, d_u.d_child); if (dentry->d_fsdata == NULL) ncp_age_dentry(server, dentry); @@ -218,7 +218,7 @@ ncp_invalidate_dircache_entries(struct dentry *parent) spin_lock(&dcache_lock); next = parent->d_subdirs.next; while (next != &parent->d_subdirs) { - dentry = list_entry(next, struct dentry, d_child); + dentry = list_entry(next, struct dentry, d_u.d_child); dentry->d_fsdata = NULL; ncp_age_dentry(server, dentry); next = next->next; diff --git a/fs/smbfs/cache.c b/fs/smbfs/cache.c index f3e6b81288ab..74b86d9725a6 100644 --- a/fs/smbfs/cache.c +++ b/fs/smbfs/cache.c @@ -66,7 +66,7 @@ smb_invalidate_dircache_entries(struct dentry *parent) spin_lock(&dcache_lock); next = parent->d_subdirs.next; while (next != &parent->d_subdirs) { - dentry = list_entry(next, struct dentry, d_child); + dentry = list_entry(next, struct dentry, d_u.d_child); dentry->d_fsdata = NULL; smb_age_dentry(server, dentry); next = next->next; @@ -100,7 +100,7 @@ smb_dget_fpos(struct dentry *dentry, struct dentry *parent, unsigned long fpos) spin_lock(&dcache_lock); next = parent->d_subdirs.next; while (next != &parent->d_subdirs) { - dent = list_entry(next, struct dentry, d_child); + dent = list_entry(next, struct dentry, d_u.d_child); if ((unsigned long)dent->d_fsdata == fpos) { if (dent->d_inode) dget_locked(dent); diff --git a/include/linux/dcache.h b/include/linux/dcache.h index 46a2ba617595..a3ed5e059d47 100644 --- a/include/linux/dcache.h +++ b/include/linux/dcache.h @@ -95,14 +95,19 @@ struct dentry { struct qstr d_name; struct list_head d_lru; /* LRU list */ - struct list_head d_child; /* child of parent list */ + /* + * d_child and d_rcu can share memory + */ + union { + struct list_head d_child; /* child of parent list */ + struct rcu_head d_rcu; + } d_u; struct list_head d_subdirs; /* our children */ struct list_head d_alias; /* inode alias list */ unsigned long d_time; /* used by d_revalidate */ struct dentry_operations *d_op; struct super_block *d_sb; /* The root of the dentry tree */ void *d_fsdata; /* fs-specific data */ - struct rcu_head d_rcu; struct dcookie_struct *d_cookie; /* cookie, if any */ int d_mounted; unsigned char d_iname[DNAME_INLINE_LEN_MIN]; /* small names */ diff --git a/kernel/cpuset.c b/kernel/cpuset.c index e04c2da9dadb..eab64e23bcae 100644 --- a/kernel/cpuset.c +++ b/kernel/cpuset.c @@ -331,7 +331,7 @@ static void cpuset_d_remove_dir(struct dentry *dentry) spin_lock(&dcache_lock); node = dentry->d_subdirs.next; while (node != &dentry->d_subdirs) { - struct dentry *d = list_entry(node, struct dentry, d_child); + struct dentry *d = list_entry(node, struct dentry, d_u.d_child); list_del_init(node); if (d->d_inode) { d = dget_locked(d); @@ -343,7 +343,7 @@ static void cpuset_d_remove_dir(struct dentry *dentry) } node = dentry->d_subdirs.next; } - list_del_init(&dentry->d_child); + list_del_init(&dentry->d_u.d_child); spin_unlock(&dcache_lock); remove_dir(dentry); } diff --git a/net/sunrpc/rpc_pipe.c b/net/sunrpc/rpc_pipe.c index 24cc23af9b95..e14c1cae7460 100644 --- a/net/sunrpc/rpc_pipe.c +++ b/net/sunrpc/rpc_pipe.c @@ -495,7 +495,7 @@ rpc_depopulate(struct dentry *parent) repeat: spin_lock(&dcache_lock); list_for_each_safe(pos, next, &parent->d_subdirs) { - dentry = list_entry(pos, struct dentry, d_child); + dentry = list_entry(pos, struct dentry, d_u.d_child); spin_lock(&dentry->d_lock); if (!d_unhashed(dentry)) { dget_locked(dentry); diff --git a/security/selinux/selinuxfs.c b/security/selinux/selinuxfs.c index e59da6398d44..b5fa02d17b1e 100644 --- a/security/selinux/selinuxfs.c +++ b/security/selinux/selinuxfs.c @@ -889,7 +889,7 @@ static void sel_remove_bools(struct dentry *de) spin_lock(&dcache_lock); node = de->d_subdirs.next; while (node != &de->d_subdirs) { - struct dentry *d = list_entry(node, struct dentry, d_child); + struct dentry *d = list_entry(node, struct dentry, d_u.d_child); list_del_init(node); if (d->d_inode) { -- cgit v1.2.3-71-gd317 From e78c9a004aadebe22306c81d1a7f1d1278dc37f9 Mon Sep 17 00:00:00 2001 From: Pekka Enberg Date: Sun, 8 Jan 2006 01:03:39 -0800 Subject: [PATCH] fs: remove s_old_blocksize from struct super_block This patch inlines the single user of struct super_block field s_old_blocksize and removes the field. Signed-off-by: Pekka Enberg Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- fs/super.c | 3 +-- include/linux/fs.h | 1 - 2 files changed, 1 insertion(+), 3 deletions(-) (limited to 'include/linux') diff --git a/fs/super.c b/fs/super.c index 5a347a4f673a..0a30e51692cf 100644 --- a/fs/super.c +++ b/fs/super.c @@ -700,8 +700,7 @@ struct super_block *get_sb_bdev(struct file_system_type *fs_type, s->s_flags = flags; strlcpy(s->s_id, bdevname(bdev, b), sizeof(s->s_id)); - s->s_old_blocksize = block_size(bdev); - sb_set_blocksize(s, s->s_old_blocksize); + sb_set_blocksize(s, block_size(bdev)); error = fill_super(s, data, flags & MS_VERBOSE ? 1 : 0); if (error) { up_write(&s->s_umount); diff --git a/include/linux/fs.h b/include/linux/fs.h index a1e28f0895c0..4c82219b0fae 100644 --- a/include/linux/fs.h +++ b/include/linux/fs.h @@ -808,7 +808,6 @@ struct super_block { struct list_head s_list; /* Keep this first */ dev_t s_dev; /* search index; _not_ kdev_t */ unsigned long s_blocksize; - unsigned long s_old_blocksize; unsigned char s_blocksize_bits; unsigned char s_dirt; unsigned long long s_maxbytes; /* Max file size */ -- cgit v1.2.3-71-gd317 From f867bac65419a98c9682f4409e087582d29ec5f6 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Sun, 8 Jan 2006 01:03:40 -0800 Subject: [PATCH] remove unused blkp field in percpu_data I found that blkp field was not used in kernel tree. As most of the times NR_CPUS is a power of two and kmalloc() memory blocks too, this extra field basically doubles the memory space allocated in __alloc_percpu() to store the 'struct percpu_data' (for example, if NR_CPUS=8 on i386, kmalloc(4*8+4) returns a 64 bytes block instead of a 32 bytes block after this patch) Signed-off-by: Eric Dumazet Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/percpu.h | 1 - 1 file changed, 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/percpu.h b/include/linux/percpu.h index 20317d88deba..cb9039a21f2a 100644 --- a/include/linux/percpu.h +++ b/include/linux/percpu.h @@ -19,7 +19,6 @@ struct percpu_data { void *ptrs[NR_CPUS]; - void *blkp; }; /* -- cgit v1.2.3-71-gd317 From fd285bb54d8a3e99810090ae88cfe8ed77d1da25 Mon Sep 17 00:00:00 2001 From: Andrew Morton Date: Sun, 8 Jan 2006 01:04:07 -0800 Subject: [PATCH] Abandon gcc-2.95.x There's one scsi driver which doesn't compile due to weird __VA_ARGS__ tricks and the rather useful scsi/sd.c is currently getting an ICE. None of the new SAS code compiles, due to extensive use of anonymous unions. The V4L guys are very good at exploiting the gcc-2.95.x macro expansion bug (_why_ does each driver need to implement its own debug macros?) and various people keep on sneaking in anonymous unions, which are rather nice. Plus anonymous unions are rather useful. Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/compiler-gcc2.h | 29 ----------------------------- include/linux/compiler.h | 2 -- init/main.c | 7 +------ 3 files changed, 1 insertion(+), 37 deletions(-) delete mode 100644 include/linux/compiler-gcc2.h (limited to 'include/linux') diff --git a/include/linux/compiler-gcc2.h b/include/linux/compiler-gcc2.h deleted file mode 100644 index ebed17660c5f..000000000000 --- a/include/linux/compiler-gcc2.h +++ /dev/null @@ -1,29 +0,0 @@ -/* Never include this file directly. Include instead. */ - -/* These definitions are for GCC v2.x. */ - -/* Somewhere in the middle of the GCC 2.96 development cycle, we implemented - a mechanism by which the user can annotate likely branch directions and - expect the blocks to be reordered appropriately. Define __builtin_expect - to nothing for earlier compilers. */ -#include - -#if __GNUC_MINOR__ < 96 -# define __builtin_expect(x, expected_value) (x) -#endif - -#define __attribute_used__ __attribute__((__unused__)) - -/* - * The attribute `pure' is not implemented in GCC versions earlier - * than 2.96. - */ -#if __GNUC_MINOR__ >= 96 -# define __attribute_pure__ __attribute__((pure)) -# define __attribute_const__ __attribute__((__const__)) -#endif - -/* GCC 2.95.x/2.96 recognize __va_copy, but not va_copy. Actually later GCC's - * define both va_copy and __va_copy, but the latter may go away, so limit this - * to this header */ -#define va_copy __va_copy diff --git a/include/linux/compiler.h b/include/linux/compiler.h index d7378215b851..f23d3c6fc2c0 100644 --- a/include/linux/compiler.h +++ b/include/linux/compiler.h @@ -42,8 +42,6 @@ extern void __chk_io_ptr(void __iomem *); # include #elif __GNUC__ == 3 # include -#elif __GNUC__ == 2 -# include #else # error Sorry, your compiler is too old/not recognized. #endif diff --git a/init/main.c b/init/main.c index afe5eb84ad52..8342c2890b16 100644 --- a/init/main.c +++ b/init/main.c @@ -58,11 +58,6 @@ * This is one of the first .c files built. Error out early * if we have compiler trouble.. */ -#if __GNUC__ == 2 && __GNUC_MINOR__ == 96 -#ifdef CONFIG_FRAME_POINTER -#error This compiler cannot compile correctly with frame pointers enabled -#endif -#endif #ifdef CONFIG_X86_LOCAL_APIC #include @@ -74,7 +69,7 @@ * To avoid associated bogus bug reports, we flatly refuse to compile * with a gcc that is known to be too old from the very beginning. */ -#if __GNUC__ < 2 || (__GNUC__ == 2 && __GNUC_MINOR__ < 95) +#if (__GNUC__ < 3) || (__GNUC__ == 3 && __GNUC_MINOR__ < 2) #error Sorry, your GCC is too old. It builds incorrect kernels. #endif -- cgit v1.2.3-71-gd317 From a1365647022eb05a5993f270a78e9bef3bf554eb Mon Sep 17 00:00:00 2001 From: Andrew Morton Date: Sun, 8 Jan 2006 01:04:09 -0800 Subject: [PATCH] remove gcc-2 checks Remove various things which were checking for gcc-1.x and gcc-2.x compilers. From: Adrian Bunk Some documentation updates and removes some code paths for gcc < 3.2. Acked-by: Russell King Signed-off-by: Adrian Bunk Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- Documentation/Changes | 31 +++++-------------------------- README | 7 ++----- arch/arm/kernel/asm-offsets.c | 9 ++------- arch/arm26/kernel/asm-offsets.c | 7 ------- arch/i386/Kconfig | 4 ---- arch/i386/Makefile | 5 +---- arch/i386/Makefile.cpu | 10 +++++----- arch/ia64/Makefile | 4 ---- arch/ia64/kernel/head.S | 2 +- arch/ia64/kernel/ia64_ksyms.c | 2 +- arch/ia64/oprofile/backtrace.c | 2 +- drivers/md/raid0.c | 6 ------ drivers/media/video/v4l2-common.c | 2 -- fs/ocfs2/cluster/masklog.h | 7 +++---- fs/xfs/xfs_log.h | 8 +------- include/asm-alpha/compiler.h | 2 -- include/asm-alpha/processor.h | 21 --------------------- include/asm-ia64/bug.h | 6 +----- include/asm-ia64/spinlock.h | 2 +- include/asm-sparc64/system.h | 4 ---- include/asm-um/rwsem.h | 4 ---- include/asm-v850/unistd.h | 18 ------------------ include/linux/byteorder/generic.h | 2 +- include/linux/byteorder/swab.h | 2 +- include/linux/byteorder/swabb.h | 2 +- include/linux/compiler-gcc.h | 9 +++++++++ include/linux/compiler-gcc3.h | 17 ----------------- include/linux/compiler-gcc4.h | 7 ------- include/linux/kernel.h | 2 -- include/linux/seccomp.h | 6 +----- include/linux/spinlock_types_up.h | 14 -------------- sound/isa/wavefront/wavefront_synth.c | 7 ------- 32 files changed, 37 insertions(+), 194 deletions(-) (limited to 'include/linux') diff --git a/Documentation/Changes b/Documentation/Changes index 86b86399d61d..fe5ae0f55020 100644 --- a/Documentation/Changes +++ b/Documentation/Changes @@ -31,8 +31,6 @@ al espa Eine deutsche Version dieser Datei finden Sie unter . -Last updated: October 29th, 2002 - Chris Ricker (kaboom@gatech.edu or chris.ricker@genetics.utah.edu). Current Minimal Requirements @@ -48,7 +46,7 @@ necessary on all systems; obviously, if you don't have any ISDN hardware, for example, you probably needn't concern yourself with isdn4k-utils. -o Gnu C 2.95.3 # gcc --version +o Gnu C 3.2 # gcc --version o Gnu make 3.79.1 # make --version o binutils 2.12 # ld -v o util-linux 2.10o # fdformat --version @@ -74,26 +72,7 @@ GCC --- The gcc version requirements may vary depending on the type of CPU in your -computer. The next paragraph applies to users of x86 CPUs, but not -necessarily to users of other CPUs. Users of other CPUs should obtain -information about their gcc version requirements from another source. - -The recommended compiler for the kernel is gcc 2.95.x (x >= 3), and it -should be used when you need absolute stability. You may use gcc 3.0.x -instead if you wish, although it may cause problems. Later versions of gcc -have not received much testing for Linux kernel compilation, and there are -almost certainly bugs (mainly, but not exclusively, in the kernel) that -will need to be fixed in order to use these compilers. In any case, using -pgcc instead of plain gcc is just asking for trouble. - -The Red Hat gcc 2.96 compiler subtree can also be used to build this tree. -You should ensure you use gcc-2.96-74 or later. gcc-2.96-54 will not build -the kernel correctly. - -In addition, please pay attention to compiler optimization. Anything -greater than -O2 may not be wise. Similarly, if you choose to use gcc-2.95.x -or derivatives, be sure not to use -fstrict-aliasing (which, depending on -your version of gcc 2.95.x, may necessitate using -fno-strict-aliasing). +computer. Make ---- @@ -322,9 +301,9 @@ Getting updated software Kernel compilation ****************** -gcc 2.95.3 ----------- -o +gcc +--- +o Make ---- diff --git a/README b/README index 61c4f7429233..cd5e2eb6213b 100644 --- a/README +++ b/README @@ -183,11 +183,8 @@ CONFIGURING the kernel: COMPILING the kernel: - - Make sure you have gcc 2.95.3 available. - gcc 2.91.66 (egcs-1.1.2), and gcc 2.7.2.3 are known to miscompile - some parts of the kernel, and are *no longer supported*. - Also remember to upgrade your binutils package (for as/ld/nm and company) - if necessary. For more information, refer to Documentation/Changes. + - Make sure you have at least gcc 3.2 available. + For more information, refer to Documentation/Changes. Please note that you can still run a.out user programs with this kernel. diff --git a/arch/arm/kernel/asm-offsets.c b/arch/arm/kernel/asm-offsets.c index 04d3082a7b94..0abbce8c70bc 100644 --- a/arch/arm/kernel/asm-offsets.c +++ b/arch/arm/kernel/asm-offsets.c @@ -23,20 +23,15 @@ #error Sorry, your compiler targets APCS-26 but this kernel requires APCS-32 #endif /* - * GCC 2.95.1, 2.95.2: ignores register clobber list in asm(). * GCC 3.0, 3.1: general bad code generation. * GCC 3.2.0: incorrect function argument offset calculation. * GCC 3.2.x: miscompiles NEW_AUX_ENT in fs/binfmt_elf.c * (http://gcc.gnu.org/PR8896) and incorrect structure * initialisation in fs/jffs2/erase.c */ -#if __GNUC__ < 2 || \ - (__GNUC__ == 2 && __GNUC_MINOR__ < 95) || \ - (__GNUC__ == 2 && __GNUC_MINOR__ == 95 && __GNUC_PATCHLEVEL__ != 0 && \ - __GNUC_PATCHLEVEL__ < 3) || \ - (__GNUC__ == 3 && __GNUC_MINOR__ < 3) +#if (__GNUC__ == 3 && __GNUC_MINOR__ < 3) #error Your compiler is too buggy; it is known to miscompile kernels. -#error Known good compilers: 2.95.3, 2.95.4, 2.96, 3.3 +#error Known good compilers: 3.3 #endif /* Use marker if you need to separate the values later */ diff --git a/arch/arm26/kernel/asm-offsets.c b/arch/arm26/kernel/asm-offsets.c index 4ccacaef94df..ac682d5fd039 100644 --- a/arch/arm26/kernel/asm-offsets.c +++ b/arch/arm26/kernel/asm-offsets.c @@ -25,13 +25,6 @@ #if defined(__APCS_32__) && defined(CONFIG_CPU_26) #error Sorry, your compiler targets APCS-32 but this kernel requires APCS-26 #endif -#if __GNUC__ < 2 || (__GNUC__ == 2 && __GNUC_MINOR__ < 95) -#error Sorry, your compiler is known to miscompile kernels. Only use gcc 2.95.3 and later. -#endif -#if __GNUC__ == 2 && __GNUC_MINOR__ == 95 -/* shame we can't detect the .1 or .2 releases */ -#warning GCC 2.95.2 and earlier miscompiles kernels. -#endif /* Use marker if you need to separate the values later */ diff --git a/arch/i386/Kconfig b/arch/i386/Kconfig index 968fabd8723f..486449e9e710 100644 --- a/arch/i386/Kconfig +++ b/arch/i386/Kconfig @@ -630,10 +630,6 @@ config REGPARM and passes the first three arguments of a function call in registers. This will probably break binary only modules. - This feature is only enabled for gcc-3.0 and later - earlier compilers - generate incorrect output with certain kernel constructs when - -mregparm=3 is used. - config SECCOMP bool "Enable seccomp to safely compute untrusted bytecode" depends on PROC_FS diff --git a/arch/i386/Makefile b/arch/i386/Makefile index d121ea18460f..b84119f9cc63 100644 --- a/arch/i386/Makefile +++ b/arch/i386/Makefile @@ -37,10 +37,7 @@ CFLAGS += $(call cc-option,-mpreferred-stack-boundary=2) # CPU-specific tuning. Anything which can be shared with UML should go here. include $(srctree)/arch/i386/Makefile.cpu -# -mregparm=3 works ok on gcc-3.0 and later -# -GCC_VERSION := $(call cc-version) -cflags-$(CONFIG_REGPARM) += $(shell if [ $(GCC_VERSION) -ge 0300 ] ; then echo "-mregparm=3"; fi ;) +cflags-$(CONFIG_REGPARM) += -mregparm=3 # Disable unit-at-a-time mode, it makes gcc use a lot more stack # due to the lack of sharing of stacklots. diff --git a/arch/i386/Makefile.cpu b/arch/i386/Makefile.cpu index 8e51456df23d..dcd936ef45db 100644 --- a/arch/i386/Makefile.cpu +++ b/arch/i386/Makefile.cpu @@ -1,7 +1,7 @@ # CPU tuning section - shared with UML. # Must change only cflags-y (or [yn]), not CFLAGS! That makes a difference for UML. -#-mtune exists since gcc 3.4, and some -mcpu flavors didn't exist in gcc 2.95. +#-mtune exists since gcc 3.4 HAS_MTUNE := $(call cc-option-yn, -mtune=i386) ifeq ($(HAS_MTUNE),y) tune = $(call cc-option,-mtune=$(1),) @@ -14,7 +14,7 @@ cflags-$(CONFIG_M386) += -march=i386 cflags-$(CONFIG_M486) += -march=i486 cflags-$(CONFIG_M586) += -march=i586 cflags-$(CONFIG_M586TSC) += -march=i586 -cflags-$(CONFIG_M586MMX) += $(call cc-option,-march=pentium-mmx,-march=i586) +cflags-$(CONFIG_M586MMX) += -march=pentium-mmx cflags-$(CONFIG_M686) += -march=i686 cflags-$(CONFIG_MPENTIUMII) += -march=i686 $(call tune,pentium2) cflags-$(CONFIG_MPENTIUMIII) += -march=i686 $(call tune,pentium3) @@ -23,8 +23,8 @@ cflags-$(CONFIG_MPENTIUM4) += -march=i686 $(call tune,pentium4) cflags-$(CONFIG_MK6) += -march=k6 # Please note, that patches that add -march=athlon-xp and friends are pointless. # They make zero difference whatsosever to performance at this time. -cflags-$(CONFIG_MK7) += $(call cc-option,-march=athlon,-march=i686 $(align)-functions=4) -cflags-$(CONFIG_MK8) += $(call cc-option,-march=k8,$(call cc-option,-march=athlon,-march=i686 $(align)-functions=4)) +cflags-$(CONFIG_MK7) += -march=athlon +cflags-$(CONFIG_MK8) += $(call cc-option,-march=k8,-march=athlon) cflags-$(CONFIG_MCRUSOE) += -march=i686 $(align)-functions=0 $(align)-jumps=0 $(align)-loops=0 cflags-$(CONFIG_MEFFICEON) += -march=i686 $(call tune,pentium3) $(align)-functions=0 $(align)-jumps=0 $(align)-loops=0 cflags-$(CONFIG_MWINCHIPC6) += $(call cc-option,-march=winchip-c6,-march=i586) @@ -37,5 +37,5 @@ cflags-$(CONFIG_MVIAC3_2) += $(call cc-option,-march=c3-2,-march=i686) cflags-$(CONFIG_X86_ELAN) += -march=i486 # Geode GX1 support -cflags-$(CONFIG_MGEODEGX1) += $(call cc-option,-march=pentium-mmx,-march=i486) +cflags-$(CONFIG_MGEODEGX1) += -march=pentium-mmx diff --git a/arch/ia64/Makefile b/arch/ia64/Makefile index 67932ad53082..57b047c27e46 100644 --- a/arch/ia64/Makefile +++ b/arch/ia64/Makefile @@ -37,10 +37,6 @@ $(error Sorry, you need a newer version of the assember, one that is built from ftp://ftp.hpl.hp.com/pub/linux-ia64/gas-030124.tar.gz) endif -ifneq ($(shell if [ $(GCC_VERSION) -lt 0300 ] ; then echo "bad"; fi ;),) -$(error Sorry, your compiler is too old. GCC v2.96 is known to generate bad code.) -endif - ifeq ($(GCC_VERSION),0304) cflags-$(CONFIG_ITANIUM) += -mtune=merced cflags-$(CONFIG_MCKINLEY) += -mtune=mckinley diff --git a/arch/ia64/kernel/head.S b/arch/ia64/kernel/head.S index bfe65b2e8621..fbc7ea35dd57 100644 --- a/arch/ia64/kernel/head.S +++ b/arch/ia64/kernel/head.S @@ -1060,7 +1060,7 @@ SET_REG(b5); * the clobber lists for spin_lock() in include/asm-ia64/spinlock.h. */ -#if __GNUC__ < 3 || (__GNUC__ == 3 && __GNUC_MINOR__ < 3) +#if (__GNUC__ == 3 && __GNUC_MINOR__ < 3) GLOBAL_ENTRY(ia64_spinlock_contention_pre3_4) .prologue diff --git a/arch/ia64/kernel/ia64_ksyms.c b/arch/ia64/kernel/ia64_ksyms.c index 5db9d3bcbbcb..e72de580ebbf 100644 --- a/arch/ia64/kernel/ia64_ksyms.c +++ b/arch/ia64/kernel/ia64_ksyms.c @@ -103,7 +103,7 @@ EXPORT_SYMBOL(unw_init_running); #ifdef ASM_SUPPORTED # ifdef CONFIG_SMP -# if __GNUC__ < 3 || (__GNUC__ == 3 && __GNUC_MINOR__ < 3) +# if (__GNUC__ == 3 && __GNUC_MINOR__ < 3) /* * This is not a normal routine and we don't want a function descriptor for it, so we use * a fake declaration here. diff --git a/arch/ia64/oprofile/backtrace.c b/arch/ia64/oprofile/backtrace.c index b7dabbfb0d61..adb01566bd57 100644 --- a/arch/ia64/oprofile/backtrace.c +++ b/arch/ia64/oprofile/backtrace.c @@ -32,7 +32,7 @@ typedef struct u64 *prev_pfs_loc; /* state for WAR for old spinlock ool code */ } ia64_backtrace_t; -#if __GNUC__ < 3 || (__GNUC__ == 3 && __GNUC_MINOR__ < 3) +#if (__GNUC__ == 3 && __GNUC_MINOR__ < 3) /* * Returns non-zero if the PC is in the spinlock contention out-of-line code * with non-standard calling sequence (on older compilers). diff --git a/drivers/md/raid0.c b/drivers/md/raid0.c index abbca150202b..d03f99cf4b7d 100644 --- a/drivers/md/raid0.c +++ b/drivers/md/raid0.c @@ -306,9 +306,6 @@ static int raid0_run (mddev_t *mddev) printk("raid0 : conf->hash_spacing is %llu blocks.\n", (unsigned long long)conf->hash_spacing); { -#if __GNUC__ < 3 - volatile -#endif sector_t s = mddev->array_size; sector_t space = conf->hash_spacing; int round; @@ -439,9 +436,6 @@ static int raid0_make_request (request_queue_t *q, struct bio *bio) { -#if __GNUC__ < 3 - volatile -#endif sector_t x = block >> conf->preshift; sector_div(x, (u32)conf->hash_spacing); zone = conf->hash_table[x]; diff --git a/drivers/media/video/v4l2-common.c b/drivers/media/video/v4l2-common.c index 597b8db35a13..62a7d636ef11 100644 --- a/drivers/media/video/v4l2-common.c +++ b/drivers/media/video/v4l2-common.c @@ -191,9 +191,7 @@ char *v4l2_type_names[] = { }; char *v4l2_ioctl_names[256] = { -#if __GNUC__ >= 3 [0 ... 255] = "UNKNOWN", -#endif [_IOC_NR(VIDIOC_QUERYCAP)] = "VIDIOC_QUERYCAP", [_IOC_NR(VIDIOC_RESERVED)] = "VIDIOC_RESERVED", [_IOC_NR(VIDIOC_ENUM_FMT)] = "VIDIOC_ENUM_FMT", diff --git a/fs/ocfs2/cluster/masklog.h b/fs/ocfs2/cluster/masklog.h index f5ef5ea61a05..e8c56a3d9c64 100644 --- a/fs/ocfs2/cluster/masklog.h +++ b/fs/ocfs2/cluster/masklog.h @@ -212,11 +212,10 @@ extern struct mlog_bits mlog_and_bits, mlog_not_bits; mlog(ML_ENTRY, "ENTRY:\n"); \ } while (0) -/* We disable this for old compilers since they don't have support for - * __builtin_types_compatible_p. +/* + * We disable this for sparse. */ -#if (__GNUC__ > 3 || (__GNUC__ == 3 && __GNUC_MINOR__ >= 1)) && \ - !defined(__CHECKER__) +#if !defined(__CHECKER__) #define mlog_exit(st) do { \ if (__builtin_types_compatible_p(typeof(st), unsigned long)) \ mlog(ML_EXIT, "EXIT: %lu\n", (unsigned long) (st)); \ diff --git a/fs/xfs/xfs_log.h b/fs/xfs/xfs_log.h index 158829ca56f6..f40d4391fcfc 100644 --- a/fs/xfs/xfs_log.h +++ b/fs/xfs/xfs_log.h @@ -30,13 +30,7 @@ * By comparing each compnent, we don't have to worry about extra * endian issues in treating two 32 bit numbers as one 64 bit number */ -static -#if defined(__GNUC__) && (__GNUC__ == 2) && ( (__GNUC_MINOR__ == 95) || (__GNUC_MINOR__ == 96)) -__attribute__((unused)) /* gcc 2.95, 2.96 miscompile this when inlined */ -#else -__inline__ -#endif -xfs_lsn_t _lsn_cmp(xfs_lsn_t lsn1, xfs_lsn_t lsn2) +static inline xfs_lsn_t _lsn_cmp(xfs_lsn_t lsn1, xfs_lsn_t lsn2) { if (CYCLE_LSN(lsn1) != CYCLE_LSN(lsn2)) return (CYCLE_LSN(lsn1)= 1 || __GNUC__ > 3 #undef __always_inline #define __always_inline inline __attribute__((always_inline)) -#endif #endif /* __ALPHA_COMPILER_H */ diff --git a/include/asm-alpha/processor.h b/include/asm-alpha/processor.h index 059780a7d3d7..bb1a7a3abb8b 100644 --- a/include/asm-alpha/processor.h +++ b/include/asm-alpha/processor.h @@ -77,7 +77,6 @@ unsigned long get_wchan(struct task_struct *p); #define spin_lock_prefetch(lock) do { } while (0) #endif -#if __GNUC__ > 3 || (__GNUC__ == 3 && __GNUC_MINOR__ >= 1) extern inline void prefetch(const void *ptr) { __builtin_prefetch(ptr, 0, 3); @@ -95,24 +94,4 @@ extern inline void spin_lock_prefetch(const void *ptr) } #endif -#else -extern inline void prefetch(const void *ptr) -{ - __asm__ ("ldl $31,%0" : : "m"(*(char *)ptr)); -} - -extern inline void prefetchw(const void *ptr) -{ - __asm__ ("ldq $31,%0" : : "m"(*(char *)ptr)); -} - -#ifdef CONFIG_SMP -extern inline void spin_lock_prefetch(const void *ptr) -{ - __asm__ ("ldq $31,%0" : : "m"(*(char *)ptr)); -} -#endif - -#endif /* GCC 3.1 */ - #endif /* __ASM_ALPHA_PROCESSOR_H */ diff --git a/include/asm-ia64/bug.h b/include/asm-ia64/bug.h index 3aa0a0a5474b..823616b5020b 100644 --- a/include/asm-ia64/bug.h +++ b/include/asm-ia64/bug.h @@ -2,11 +2,7 @@ #define _ASM_IA64_BUG_H #ifdef CONFIG_BUG -#if (__GNUC__ > 3) || (__GNUC__ == 3 && __GNUC_MINOR__ >= 1) -# define ia64_abort() __builtin_trap() -#else -# define ia64_abort() (*(volatile int *) 0 = 0) -#endif +#define ia64_abort() __builtin_trap() #define BUG() do { printk("kernel BUG at %s:%d!\n", __FILE__, __LINE__); ia64_abort(); } while (0) /* should this BUG be made generic? */ diff --git a/include/asm-ia64/spinlock.h b/include/asm-ia64/spinlock.h index 0c91a76c5ea3..9e83210dc312 100644 --- a/include/asm-ia64/spinlock.h +++ b/include/asm-ia64/spinlock.h @@ -34,7 +34,7 @@ __raw_spin_lock_flags (raw_spinlock_t *lock, unsigned long flags) { register volatile unsigned int *ptr asm ("r31") = &lock->lock; -#if __GNUC__ < 3 || (__GNUC__ == 3 && __GNUC_MINOR__ < 3) +#if (__GNUC__ == 3 && __GNUC_MINOR__ < 3) # ifdef CONFIG_ITANIUM /* don't use brl on Itanium... */ asm volatile ("{\n\t" diff --git a/include/asm-sparc64/system.h b/include/asm-sparc64/system.h index b5417529f6f1..309f1466b6fa 100644 --- a/include/asm-sparc64/system.h +++ b/include/asm-sparc64/system.h @@ -193,11 +193,7 @@ do { \ * not preserve it's value. Hairy, but it lets us remove 2 loads * and 2 stores in this critical code path. -DaveM */ -#if __GNUC__ >= 3 #define EXTRA_CLOBBER ,"%l1" -#else -#define EXTRA_CLOBBER -#endif #define switch_to(prev, next, last) \ do { if (test_thread_flag(TIF_PERFCTR)) { \ unsigned long __tmp; \ diff --git a/include/asm-um/rwsem.h b/include/asm-um/rwsem.h index 661c0e54702b..b5fc449dc86b 100644 --- a/include/asm-um/rwsem.h +++ b/include/asm-um/rwsem.h @@ -1,10 +1,6 @@ #ifndef __UM_RWSEM_H__ #define __UM_RWSEM_H__ -#if __GNUC__ < 2 || (__GNUC__ == 2 && __GNUC_MINOR__ < 96) -#define __builtin_expect(exp,c) (exp) -#endif - #include "asm/arch/rwsem.h" #endif diff --git a/include/asm-v850/unistd.h b/include/asm-v850/unistd.h index 5a86f8e976ec..82460a7bb233 100644 --- a/include/asm-v850/unistd.h +++ b/include/asm-v850/unistd.h @@ -241,9 +241,6 @@ /* User programs sometimes end up including this header file (indirectly, via uClibc header files), so I'm a bit nervous just including . */ -#if !defined(__builtin_expect) && __GNUC__ == 2 && __GNUC_MINOR__ < 96 -#define __builtin_expect(x, expected_value) (x) -#endif #define __syscall_return(type, res) \ do { \ @@ -346,20 +343,6 @@ type name (atype a, btype b, ctype c, dtype d, etype e) \ __syscall_return (type, __ret); \ } -#if __GNUC__ < 3 -/* In older versions of gcc, `asm' statements with more than 10 - input/output arguments produce a fatal error. To work around this - problem, we use two versions, one for gcc-3.x and one for earlier - versions of gcc (the `earlier gcc' version doesn't work with gcc-3.x - because gcc-3.x doesn't allow clobbers to also be input arguments). */ -#define __SYSCALL6_TRAP(syscall, ret, a, b, c, d, e, f) \ - __asm__ __volatile__ ("trap " SYSCALL_LONG_TRAP \ - : "=r" (ret), "=r" (syscall) \ - : "1" (syscall), \ - "r" (a), "r" (b), "r" (c), "r" (d), \ - "r" (e), "r" (f) \ - : SYSCALL_CLOBBERS, SYSCALL_ARG4, SYSCALL_ARG5); -#else /* __GNUC__ >= 3 */ #define __SYSCALL6_TRAP(syscall, ret, a, b, c, d, e, f) \ __asm__ __volatile__ ("trap " SYSCALL_LONG_TRAP \ : "=r" (ret), "=r" (syscall), \ @@ -368,7 +351,6 @@ type name (atype a, btype b, ctype c, dtype d, etype e) \ "r" (a), "r" (b), "r" (c), "r" (d), \ "2" (e), "3" (f) \ : SYSCALL_CLOBBERS); -#endif #define _syscall6(type, name, atype, a, btype, b, ctype, c, dtype, d, etype, e, ftype, f) \ type name (atype a, btype b, ctype c, dtype d, etype e, ftype f) \ diff --git a/include/linux/byteorder/generic.h b/include/linux/byteorder/generic.h index 04bd756efc67..e86e4a938373 100644 --- a/include/linux/byteorder/generic.h +++ b/include/linux/byteorder/generic.h @@ -156,7 +156,7 @@ extern __be32 htonl(__u32); extern __u16 ntohs(__be16); extern __be16 htons(__u16); -#if defined(__GNUC__) && (__GNUC__ >= 2) && defined(__OPTIMIZE__) +#if defined(__GNUC__) && defined(__OPTIMIZE__) #define ___htonl(x) __cpu_to_be32(x) #define ___htons(x) __cpu_to_be16(x) diff --git a/include/linux/byteorder/swab.h b/include/linux/byteorder/swab.h index 2f1cb775125a..25f7f32883ec 100644 --- a/include/linux/byteorder/swab.h +++ b/include/linux/byteorder/swab.h @@ -110,7 +110,7 @@ /* * Allow constant folding */ -#if defined(__GNUC__) && (__GNUC__ >= 2) && defined(__OPTIMIZE__) +#if defined(__GNUC__) && defined(__OPTIMIZE__) # define __swab16(x) \ (__builtin_constant_p((__u16)(x)) ? \ ___swab16((x)) : \ diff --git a/include/linux/byteorder/swabb.h b/include/linux/byteorder/swabb.h index d5f2a3205109..ae5e5f914bf4 100644 --- a/include/linux/byteorder/swabb.h +++ b/include/linux/byteorder/swabb.h @@ -77,7 +77,7 @@ /* * Allow constant folding */ -#if defined(__GNUC__) && (__GNUC__ >= 2) && defined(__OPTIMIZE__) +#if defined(__GNUC__) && defined(__OPTIMIZE__) # define __swahw32(x) \ (__builtin_constant_p((__u32)(x)) ? \ ___swahw32((x)) : \ diff --git a/include/linux/compiler-gcc.h b/include/linux/compiler-gcc.h index 152734055403..2e05e1e6b0e6 100644 --- a/include/linux/compiler-gcc.h +++ b/include/linux/compiler-gcc.h @@ -15,3 +15,12 @@ ({ unsigned long __ptr; \ __asm__ ("" : "=g"(__ptr) : "0"(ptr)); \ (typeof(ptr)) (__ptr + (off)); }) + + +#define inline inline __attribute__((always_inline)) +#define __inline__ __inline__ __attribute__((always_inline)) +#define __inline __inline __attribute__((always_inline)) +#define __deprecated __attribute__((deprecated)) +#define noinline __attribute__((noinline)) +#define __attribute_pure__ __attribute__((pure)) +#define __attribute_const__ __attribute__((__const__)) diff --git a/include/linux/compiler-gcc3.h b/include/linux/compiler-gcc3.h index a6fa615afab5..4209082ee934 100644 --- a/include/linux/compiler-gcc3.h +++ b/include/linux/compiler-gcc3.h @@ -3,29 +3,12 @@ /* These definitions are for GCC v3.x. */ #include -#if __GNUC_MINOR__ >= 1 -# define inline inline __attribute__((always_inline)) -# define __inline__ __inline__ __attribute__((always_inline)) -# define __inline __inline __attribute__((always_inline)) -#endif - -#if __GNUC_MINOR__ > 0 -# define __deprecated __attribute__((deprecated)) -#endif - #if __GNUC_MINOR__ >= 3 # define __attribute_used__ __attribute__((__used__)) #else # define __attribute_used__ __attribute__((__unused__)) #endif -#define __attribute_pure__ __attribute__((pure)) -#define __attribute_const__ __attribute__((__const__)) - -#if __GNUC_MINOR__ >= 1 -#define noinline __attribute__((noinline)) -#endif - #if __GNUC_MINOR__ >= 4 #define __must_check __attribute__((warn_unused_result)) #endif diff --git a/include/linux/compiler-gcc4.h b/include/linux/compiler-gcc4.h index 53686c037a06..e913e9beaf69 100644 --- a/include/linux/compiler-gcc4.h +++ b/include/linux/compiler-gcc4.h @@ -3,14 +3,7 @@ /* These definitions are for GCC v4.x. */ #include -#define inline inline __attribute__((always_inline)) -#define __inline__ __inline__ __attribute__((always_inline)) -#define __inline __inline __attribute__((always_inline)) -#define __deprecated __attribute__((deprecated)) #define __attribute_used__ __attribute__((__used__)) -#define __attribute_pure__ __attribute__((pure)) -#define __attribute_const__ __attribute__((__const__)) -#define noinline __attribute__((noinline)) #define __must_check __attribute__((warn_unused_result)) #define __compiler_offsetof(a,b) __builtin_offsetof(a,b) diff --git a/include/linux/kernel.h b/include/linux/kernel.h index b1e407a4fbda..ca7ff8fdd090 100644 --- a/include/linux/kernel.h +++ b/include/linux/kernel.h @@ -316,8 +316,6 @@ extern int randomize_va_space; #endif /* Trap pasters of __FUNCTION__ at compile-time */ -#if __GNUC__ > 2 || __GNUC_MINOR__ >= 95 #define __FUNCTION__ (__func__) -#endif #endif diff --git a/include/linux/seccomp.h b/include/linux/seccomp.h index dc89116bb1ca..cd2773b29a64 100644 --- a/include/linux/seccomp.h +++ b/include/linux/seccomp.h @@ -26,11 +26,7 @@ static inline int has_secure_computing(struct thread_info *ti) #else /* CONFIG_SECCOMP */ -#if (__GNUC__ > 2) - typedef struct { } seccomp_t; -#else - typedef struct { int gcc_is_buggy; } seccomp_t; -#endif +typedef struct { } seccomp_t; #define secure_computing(x) do { } while (0) /* static inline to preserve typechecking */ diff --git a/include/linux/spinlock_types_up.h b/include/linux/spinlock_types_up.h index def2d173a8db..04135b0e198e 100644 --- a/include/linux/spinlock_types_up.h +++ b/include/linux/spinlock_types_up.h @@ -22,30 +22,16 @@ typedef struct { #else -/* - * All gcc 2.95 versions and early versions of 2.96 have a nasty bug - * with empty initializers. - */ -#if (__GNUC__ > 2) typedef struct { } raw_spinlock_t; #define __RAW_SPIN_LOCK_UNLOCKED { } -#else -typedef struct { int gcc_is_buggy; } raw_spinlock_t; -#define __RAW_SPIN_LOCK_UNLOCKED (raw_spinlock_t) { 0 } -#endif #endif -#if (__GNUC__ > 2) typedef struct { /* no debug version on UP */ } raw_rwlock_t; #define __RAW_RW_LOCK_UNLOCKED { } -#else -typedef struct { int gcc_is_buggy; } raw_rwlock_t; -#define __RAW_RW_LOCK_UNLOCKED (raw_rwlock_t) { 0 } -#endif #endif /* __LINUX_SPINLOCK_TYPES_UP_H */ diff --git a/sound/isa/wavefront/wavefront_synth.c b/sound/isa/wavefront/wavefront_synth.c index 679d0ae97e4f..ed81eec6e732 100644 --- a/sound/isa/wavefront/wavefront_synth.c +++ b/sound/isa/wavefront/wavefront_synth.c @@ -115,18 +115,11 @@ MODULE_PARM_DESC(osrun_time, "how many seconds to wait for the ICS2115 OS"); #ifdef WF_DEBUG -#if defined(NEW_MACRO_VARARGS) || __GNUC__ >= 3 #define DPRINT(cond, ...) \ if ((dev->debug & (cond)) == (cond)) { \ snd_printk (__VA_ARGS__); \ } #else -#define DPRINT(cond, args...) \ - if ((dev->debug & (cond)) == (cond)) { \ - snd_printk (args); \ - } -#endif -#else #define DPRINT(cond, args...) #endif /* WF_DEBUG */ -- cgit v1.2.3-71-gd317 From 59d9136b9844d3a0376d93c945ab280decedb323 Mon Sep 17 00:00:00 2001 From: Benjamin LaHaise Date: Sun, 8 Jan 2006 01:04:34 -0800 Subject: [PATCH] aio: reorder kiocb structure elements to make sync iocb setup faster Reorder members of the kiocb structure to make sync kiocb setup faster. By setting the elements sequentially, the write combining buffers on the CPU are able to combine the writes into a single burst, which results in fewer cache cycles being consumed, freeing them up for other code. This results in a 10-20KB/s[*] increase on the bw_unix part of LMbench on my test system. * The improvement varies based on what other patches are in the system, as there are a number of bottlenecks, so this number is not absolutely accurate. Signed-off-by: Benjamin LaHaise Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/aio.h | 12 +++++++----- 1 file changed, 7 insertions(+), 5 deletions(-) (limited to 'include/linux') diff --git a/include/linux/aio.h b/include/linux/aio.h index 49fd37629ee4..00c8efa95cc3 100644 --- a/include/linux/aio.h +++ b/include/linux/aio.h @@ -94,26 +94,27 @@ struct kiocb { ssize_t (*ki_retry)(struct kiocb *); void (*ki_dtor)(struct kiocb *); - struct list_head ki_list; /* the aio core uses this - * for cancellation */ - union { void __user *user; struct task_struct *tsk; } ki_obj; + __u64 ki_user_data; /* user's data for completion */ + wait_queue_t ki_wait; loff_t ki_pos; + + void *private; /* State that we remember to be able to restart/retry */ unsigned short ki_opcode; size_t ki_nbytes; /* copy of iocb->aio_nbytes */ char __user *ki_buf; /* remaining iocb->aio_buf */ size_t ki_left; /* remaining bytes */ - wait_queue_t ki_wait; long ki_retried; /* just for testing */ long ki_kicked; /* just for testing */ long ki_queued; /* just for testing */ - void *private; + struct list_head ki_list; /* the aio core uses this + * for cancellation */ }; #define is_sync_kiocb(iocb) ((iocb)->ki_key == KIOCB_SYNC_KEY) @@ -126,6 +127,7 @@ struct kiocb { (x)->ki_filp = (filp); \ (x)->ki_ctx = NULL; \ (x)->ki_cancel = NULL; \ + (x)->ki_retry = NULL; \ (x)->ki_dtor = NULL; \ (x)->ki_obj.tsk = tsk; \ (x)->ki_user_data = 0; \ -- cgit v1.2.3-71-gd317 From 349aef0bc4c7f07d685c977e12d0e2d0b5d0e6db Mon Sep 17 00:00:00 2001 From: Andrew Morton Date: Sun, 8 Jan 2006 01:04:36 -0800 Subject: [PATCH] shrink struct page Reduce the size of the pageframe for NR_CPUS>4, CONFIG_PREEMPT back to the minimal size by unionising both ->private and ->mapping with the pagetable lock. It uses an anonymous struct and hence requires gcc-3.x. Cc: Hugh Dickins Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/mm.h | 41 ++++++++++++++++++++++------------------- 1 file changed, 22 insertions(+), 19 deletions(-) (limited to 'include/linux') diff --git a/include/linux/mm.h b/include/linux/mm.h index 7ff54242c5d7..df80e63903b5 100644 --- a/include/linux/mm.h +++ b/include/linux/mm.h @@ -223,24 +223,27 @@ struct page { * & limit reverse map searches. */ union { - unsigned long private; /* Mapping-private opaque data: - * usually used for buffer_heads - * if PagePrivate set; used for - * swp_entry_t if PageSwapCache - * When page is free, this indicates - * order in the buddy system. - */ + struct { + unsigned long private; /* Mapping-private opaque data: + * usually used for buffer_heads + * if PagePrivate set; used for + * swp_entry_t if PageSwapCache. + * When page is free, this + * indicates order in the buddy + * system. + */ + struct address_space *mapping; /* If low bit clear, points to + * inode address_space, or NULL. + * If page mapped as anonymous + * memory, low bit is set, and + * it points to anon_vma object: + * see PAGE_MAPPING_ANON below. + */ + }; #if NR_CPUS >= CONFIG_SPLIT_PTLOCK_CPUS - spinlock_t ptl; + spinlock_t ptl; #endif - } u; - struct address_space *mapping; /* If low bit clear, points to - * inode address_space, or NULL. - * If page mapped as anonymous - * memory, low bit is set, and - * it points to anon_vma object: - * see PAGE_MAPPING_ANON below. - */ + }; pgoff_t index; /* Our offset within mapping. */ struct list_head lru; /* Pageout list, eg. active_list * protected by zone->lru_lock ! @@ -261,8 +264,8 @@ struct page { #endif /* WANT_PAGE_VIRTUAL */ }; -#define page_private(page) ((page)->u.private) -#define set_page_private(page, v) ((page)->u.private = (v)) +#define page_private(page) ((page)->private) +#define set_page_private(page, v) ((page)->private = (v)) /* * FIXME: take this include out, include page-flags.h in @@ -815,7 +818,7 @@ static inline pmd_t *pmd_alloc(struct mm_struct *mm, pud_t *pud, unsigned long a * overflow into the next struct page (as it might with DEBUG_SPINLOCK). * When freeing, reset page->mapping so free_pages_check won't complain. */ -#define __pte_lockptr(page) &((page)->u.ptl) +#define __pte_lockptr(page) &((page)->ptl) #define pte_lock_init(_page) do { \ spin_lock_init(__pte_lockptr(_page)); \ } while (0) -- cgit v1.2.3-71-gd317 From a12dea7af93ae83bd868c0dc09367090ead7cc1e Mon Sep 17 00:00:00 2001 From: Paolo 'Blaisorblade' Giarrusso Date: Sun, 8 Jan 2006 01:04:49 -0800 Subject: [PATCH] PTRACE_SYSEMU is only for i386 and clashes with other ptrace codes of other archs PTRACE_SYSEMU{,_SINGLESTEP} is actually arch specific, for now, and the current allocated number clashes with a ptrace code of frv, i.e. PTRACE_GETFDPIC. I should have submitted this much earlier, anyway we get no breakage for this. CC: Daniel Jacobowitz Signed-off-by: Paolo 'Blaisorblade' Giarrusso Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/asm-i386/ptrace.h | 3 +++ include/linux/ptrace.h | 2 -- 2 files changed, 3 insertions(+), 2 deletions(-) (limited to 'include/linux') diff --git a/include/asm-i386/ptrace.h b/include/asm-i386/ptrace.h index 7e0f2945d17d..f324c53b6f9a 100644 --- a/include/asm-i386/ptrace.h +++ b/include/asm-i386/ptrace.h @@ -54,6 +54,9 @@ struct pt_regs { #define PTRACE_GET_THREAD_AREA 25 #define PTRACE_SET_THREAD_AREA 26 +#define PTRACE_SYSEMU 31 +#define PTRACE_SYSEMU_SINGLESTEP 32 + #ifdef __KERNEL__ #include diff --git a/include/linux/ptrace.h b/include/linux/ptrace.h index 864791996b5f..9d5cd106b344 100644 --- a/include/linux/ptrace.h +++ b/include/linux/ptrace.h @@ -20,8 +20,6 @@ #define PTRACE_DETACH 0x11 #define PTRACE_SYSCALL 24 -#define PTRACE_SYSEMU 31 -#define PTRACE_SYSEMU_SINGLESTEP 32 /* 0x4200-0x4300 are reserved for architecture-independent additions. */ #define PTRACE_SETOPTIONS 0x4200 -- cgit v1.2.3-71-gd317 From 7e7f358c8f8f836c504faa293fda0c1c0733b63c Mon Sep 17 00:00:00 2001 From: Brian Gerst Date: Sun, 8 Jan 2006 01:04:54 -0800 Subject: [PATCH] Split out screen_info from tty.h This makes it possible for boot code to use screen_info without dragging in all of tty.h. Signed-off-by: Brian Gerst Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- arch/i386/boot/compressed/misc.c | 2 +- arch/x86_64/boot/compressed/misc.c | 2 +- arch/x86_64/boot/compressed/miscsetup.h | 39 ----------------- include/linux/screen_info.h | 77 +++++++++++++++++++++++++++++++++ include/linux/tty.h | 72 +----------------------------- 5 files changed, 80 insertions(+), 112 deletions(-) delete mode 100644 arch/x86_64/boot/compressed/miscsetup.h create mode 100644 include/linux/screen_info.h (limited to 'include/linux') diff --git a/arch/i386/boot/compressed/misc.c b/arch/i386/boot/compressed/misc.c index 82a807f9f5e6..f19f3a7492a5 100644 --- a/arch/i386/boot/compressed/misc.c +++ b/arch/i386/boot/compressed/misc.c @@ -11,7 +11,7 @@ #include #include -#include +#include #include #include diff --git a/arch/x86_64/boot/compressed/misc.c b/arch/x86_64/boot/compressed/misc.c index 0e10fd84c7cc..cf4b88c416dc 100644 --- a/arch/x86_64/boot/compressed/misc.c +++ b/arch/x86_64/boot/compressed/misc.c @@ -9,7 +9,7 @@ * High loaded stuff by Hans Lermen & Werner Almesberger, Feb. 1996 */ -#include "miscsetup.h" +#include #include #include diff --git a/arch/x86_64/boot/compressed/miscsetup.h b/arch/x86_64/boot/compressed/miscsetup.h deleted file mode 100644 index bb1620531703..000000000000 --- a/arch/x86_64/boot/compressed/miscsetup.h +++ /dev/null @@ -1,39 +0,0 @@ -#define NULL 0 -//typedef unsigned int size_t; - - -struct screen_info { - unsigned char orig_x; /* 0x00 */ - unsigned char orig_y; /* 0x01 */ - unsigned short dontuse1; /* 0x02 -- EXT_MEM_K sits here */ - unsigned short orig_video_page; /* 0x04 */ - unsigned char orig_video_mode; /* 0x06 */ - unsigned char orig_video_cols; /* 0x07 */ - unsigned short unused2; /* 0x08 */ - unsigned short orig_video_ega_bx; /* 0x0a */ - unsigned short unused3; /* 0x0c */ - unsigned char orig_video_lines; /* 0x0e */ - unsigned char orig_video_isVGA; /* 0x0f */ - unsigned short orig_video_points; /* 0x10 */ - - /* VESA graphic mode -- linear frame buffer */ - unsigned short lfb_width; /* 0x12 */ - unsigned short lfb_height; /* 0x14 */ - unsigned short lfb_depth; /* 0x16 */ - unsigned long lfb_base; /* 0x18 */ - unsigned long lfb_size; /* 0x1c */ - unsigned short dontuse2, dontuse3; /* 0x20 -- CL_MAGIC and CL_OFFSET here */ - unsigned short lfb_linelength; /* 0x24 */ - unsigned char red_size; /* 0x26 */ - unsigned char red_pos; /* 0x27 */ - unsigned char green_size; /* 0x28 */ - unsigned char green_pos; /* 0x29 */ - unsigned char blue_size; /* 0x2a */ - unsigned char blue_pos; /* 0x2b */ - unsigned char rsvd_size; /* 0x2c */ - unsigned char rsvd_pos; /* 0x2d */ - unsigned short vesapm_seg; /* 0x2e */ - unsigned short vesapm_off; /* 0x30 */ - unsigned short pages; /* 0x32 */ - /* 0x34 -- 0x3f reserved for future expansion */ -}; diff --git a/include/linux/screen_info.h b/include/linux/screen_info.h new file mode 100644 index 000000000000..76850b75b3f6 --- /dev/null +++ b/include/linux/screen_info.h @@ -0,0 +1,77 @@ +#ifndef _SCREEN_INFO_H +#define _SCREEN_INFO_H + +#include + +/* + * These are set up by the setup-routine at boot-time: + */ + +struct screen_info { + u8 orig_x; /* 0x00 */ + u8 orig_y; /* 0x01 */ + u16 dontuse1; /* 0x02 -- EXT_MEM_K sits here */ + u16 orig_video_page; /* 0x04 */ + u8 orig_video_mode; /* 0x06 */ + u8 orig_video_cols; /* 0x07 */ + u16 unused2; /* 0x08 */ + u16 orig_video_ega_bx; /* 0x0a */ + u16 unused3; /* 0x0c */ + u8 orig_video_lines; /* 0x0e */ + u8 orig_video_isVGA; /* 0x0f */ + u16 orig_video_points; /* 0x10 */ + + /* VESA graphic mode -- linear frame buffer */ + u16 lfb_width; /* 0x12 */ + u16 lfb_height; /* 0x14 */ + u16 lfb_depth; /* 0x16 */ + u32 lfb_base; /* 0x18 */ + u32 lfb_size; /* 0x1c */ + u16 dontuse2, dontuse3; /* 0x20 -- CL_MAGIC and CL_OFFSET here */ + u16 lfb_linelength; /* 0x24 */ + u8 red_size; /* 0x26 */ + u8 red_pos; /* 0x27 */ + u8 green_size; /* 0x28 */ + u8 green_pos; /* 0x29 */ + u8 blue_size; /* 0x2a */ + u8 blue_pos; /* 0x2b */ + u8 rsvd_size; /* 0x2c */ + u8 rsvd_pos; /* 0x2d */ + u16 vesapm_seg; /* 0x2e */ + u16 vesapm_off; /* 0x30 */ + u16 pages; /* 0x32 */ + u16 vesa_attributes; /* 0x34 */ + u32 capabilities; /* 0x36 */ + /* 0x3a -- 0x3f reserved for future expansion */ +}; + +extern struct screen_info screen_info; + +#define ORIG_X (screen_info.orig_x) +#define ORIG_Y (screen_info.orig_y) +#define ORIG_VIDEO_MODE (screen_info.orig_video_mode) +#define ORIG_VIDEO_COLS (screen_info.orig_video_cols) +#define ORIG_VIDEO_EGA_BX (screen_info.orig_video_ega_bx) +#define ORIG_VIDEO_LINES (screen_info.orig_video_lines) +#define ORIG_VIDEO_ISVGA (screen_info.orig_video_isVGA) +#define ORIG_VIDEO_POINTS (screen_info.orig_video_points) + +#define VIDEO_TYPE_MDA 0x10 /* Monochrome Text Display */ +#define VIDEO_TYPE_CGA 0x11 /* CGA Display */ +#define VIDEO_TYPE_EGAM 0x20 /* EGA/VGA in Monochrome Mode */ +#define VIDEO_TYPE_EGAC 0x21 /* EGA in Color Mode */ +#define VIDEO_TYPE_VGAC 0x22 /* VGA+ in Color Mode */ +#define VIDEO_TYPE_VLFB 0x23 /* VESA VGA in graphic mode */ + +#define VIDEO_TYPE_PICA_S3 0x30 /* ACER PICA-61 local S3 video */ +#define VIDEO_TYPE_MIPS_G364 0x31 /* MIPS Magnum 4000 G364 video */ +#define VIDEO_TYPE_SGI 0x33 /* Various SGI graphics hardware */ + +#define VIDEO_TYPE_TGAC 0x40 /* DEC TGA */ + +#define VIDEO_TYPE_SUN 0x50 /* Sun frame buffer. */ +#define VIDEO_TYPE_SUNPCI 0x51 /* Sun PCI based frame buffer. */ + +#define VIDEO_TYPE_PMAC 0x60 /* PowerMacintosh frame buffer. */ + +#endif /* _SCREEN_INFO_H */ diff --git a/include/linux/tty.h b/include/linux/tty.h index 1267f88ece6e..57449704a47b 100644 --- a/include/linux/tty.h +++ b/include/linux/tty.h @@ -23,6 +23,7 @@ #include #include #include +#include #include @@ -36,77 +37,6 @@ #define NR_UNIX98_PTY_MAX (1 << MINORBITS) /* Absolute limit */ #define NR_LDISCS 16 -/* - * These are set up by the setup-routine at boot-time: - */ - -struct screen_info { - u8 orig_x; /* 0x00 */ - u8 orig_y; /* 0x01 */ - u16 dontuse1; /* 0x02 -- EXT_MEM_K sits here */ - u16 orig_video_page; /* 0x04 */ - u8 orig_video_mode; /* 0x06 */ - u8 orig_video_cols; /* 0x07 */ - u16 unused2; /* 0x08 */ - u16 orig_video_ega_bx; /* 0x0a */ - u16 unused3; /* 0x0c */ - u8 orig_video_lines; /* 0x0e */ - u8 orig_video_isVGA; /* 0x0f */ - u16 orig_video_points; /* 0x10 */ - - /* VESA graphic mode -- linear frame buffer */ - u16 lfb_width; /* 0x12 */ - u16 lfb_height; /* 0x14 */ - u16 lfb_depth; /* 0x16 */ - u32 lfb_base; /* 0x18 */ - u32 lfb_size; /* 0x1c */ - u16 dontuse2, dontuse3; /* 0x20 -- CL_MAGIC and CL_OFFSET here */ - u16 lfb_linelength; /* 0x24 */ - u8 red_size; /* 0x26 */ - u8 red_pos; /* 0x27 */ - u8 green_size; /* 0x28 */ - u8 green_pos; /* 0x29 */ - u8 blue_size; /* 0x2a */ - u8 blue_pos; /* 0x2b */ - u8 rsvd_size; /* 0x2c */ - u8 rsvd_pos; /* 0x2d */ - u16 vesapm_seg; /* 0x2e */ - u16 vesapm_off; /* 0x30 */ - u16 pages; /* 0x32 */ - u16 vesa_attributes; /* 0x34 */ - u32 capabilities; /* 0x36 */ - /* 0x3a -- 0x3f reserved for future expansion */ -}; - -extern struct screen_info screen_info; - -#define ORIG_X (screen_info.orig_x) -#define ORIG_Y (screen_info.orig_y) -#define ORIG_VIDEO_MODE (screen_info.orig_video_mode) -#define ORIG_VIDEO_COLS (screen_info.orig_video_cols) -#define ORIG_VIDEO_EGA_BX (screen_info.orig_video_ega_bx) -#define ORIG_VIDEO_LINES (screen_info.orig_video_lines) -#define ORIG_VIDEO_ISVGA (screen_info.orig_video_isVGA) -#define ORIG_VIDEO_POINTS (screen_info.orig_video_points) - -#define VIDEO_TYPE_MDA 0x10 /* Monochrome Text Display */ -#define VIDEO_TYPE_CGA 0x11 /* CGA Display */ -#define VIDEO_TYPE_EGAM 0x20 /* EGA/VGA in Monochrome Mode */ -#define VIDEO_TYPE_EGAC 0x21 /* EGA in Color Mode */ -#define VIDEO_TYPE_VGAC 0x22 /* VGA+ in Color Mode */ -#define VIDEO_TYPE_VLFB 0x23 /* VESA VGA in graphic mode */ - -#define VIDEO_TYPE_PICA_S3 0x30 /* ACER PICA-61 local S3 video */ -#define VIDEO_TYPE_MIPS_G364 0x31 /* MIPS Magnum 4000 G364 video */ -#define VIDEO_TYPE_SGI 0x33 /* Various SGI graphics hardware */ - -#define VIDEO_TYPE_TGAC 0x40 /* DEC TGA */ - -#define VIDEO_TYPE_SUN 0x50 /* Sun frame buffer. */ -#define VIDEO_TYPE_SUNPCI 0x51 /* Sun PCI based frame buffer. */ - -#define VIDEO_TYPE_PMAC 0x60 /* PowerMacintosh frame buffer. */ - /* * This character is the same as _POSIX_VDISABLE: it cannot be used as * a c_cc[] character, but indicates that a particular special character -- cgit v1.2.3-71-gd317 From d8a33496671e4533aed090793436d58debea6f3a Mon Sep 17 00:00:00 2001 From: Marko Kohtala Date: Sun, 8 Jan 2006 01:05:06 -0800 Subject: [PATCH] parport: bring back an unused phase for ppdev ioctl Earlier fix removed unused phase, but that changed the values for other phases. Since these are exposed to userspace through ppdev, it is safer not to change them. Restore the unused phase value. Signed-off-by: Marko Kohtala Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/parport.h | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/parport.h b/include/linux/parport.h index f7ff0b0c4031..f67f838a3a1f 100644 --- a/include/linux/parport.h +++ b/include/linux/parport.h @@ -236,12 +236,14 @@ struct pardevice { /* IEEE1284 information */ -/* IEEE1284 phases */ +/* IEEE1284 phases. These are exposed to userland through ppdev IOCTL + * PP[GS]ETPHASE, so do not change existing values. */ enum ieee1284_phase { IEEE1284_PH_FWD_DATA, IEEE1284_PH_FWD_IDLE, IEEE1284_PH_TERMINATE, IEEE1284_PH_NEGOTIATION, + IEEE1284_PH_HBUSY_DNA, IEEE1284_PH_REV_IDLE, IEEE1284_PH_HBUSY_DAVAIL, IEEE1284_PH_REV_DATA, -- cgit v1.2.3-71-gd317 From 6a878184c202395ea17212f111ab9ec4b5f6d6ee Mon Sep 17 00:00:00 2001 From: Jan Blunck Date: Sun, 8 Jan 2006 01:05:07 -0800 Subject: [PATCH] Eliminate __attribute__ ((packed)) warnings for gcc-4.1 Since version 4.1 the gcc is warning about ignored attributes. This patch is using the equivalent attribute on the struct instead of on each of the structure or union members. GCC Manual: "Specifying Attributes of Types packed This attribute, attached to struct or union type definition, specifies that each member of the structure or union is placed to minimize the memory required. When attached to an enum definition, it indicates that the smallest integral type should be used. Specifying this attribute for struct and union types is equivalent to specifying the packed attribute on each of the structure or union members." Signed-off-by: Jan Blunck Cc: Dave Jones Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- drivers/isdn/hisax/hisax.h | 18 +++--- drivers/isdn/hisax/hisax_fcpcipnp.h | 18 +++--- drivers/net/3c527.h | 50 +++++++------- drivers/net/irda/vlsi_ir.h | 4 +- drivers/net/wan/sdla.c | 6 +- include/linux/atalk.h | 18 +++--- include/linux/cycx_x25.h | 66 +++++++++---------- include/linux/if_frad.h | 12 ++-- include/linux/isdnif.h | 70 ++++++++++---------- include/linux/ncp.h | 126 ++++++++++++++++++------------------ include/linux/sdla.h | 64 +++++++++--------- include/linux/wavefront.h | 36 +++++------ include/net/dn_dev.h | 84 ++++++++++++------------ include/net/dn_nsp.h | 74 ++++++++++----------- include/sound/wavefront.h | 36 +++++------ 15 files changed, 341 insertions(+), 341 deletions(-) (limited to 'include/linux') diff --git a/drivers/isdn/hisax/hisax.h b/drivers/isdn/hisax/hisax.h index 26c545fa223b..1b85ce166af8 100644 --- a/drivers/isdn/hisax/hisax.h +++ b/drivers/isdn/hisax/hisax.h @@ -396,17 +396,17 @@ struct isar_hw { struct hdlc_stat_reg { #ifdef __BIG_ENDIAN - u_char fill __attribute__((packed)); - u_char mode __attribute__((packed)); - u_char xml __attribute__((packed)); - u_char cmd __attribute__((packed)); + u_char fill; + u_char mode; + u_char xml; + u_char cmd; #else - u_char cmd __attribute__((packed)); - u_char xml __attribute__((packed)); - u_char mode __attribute__((packed)); - u_char fill __attribute__((packed)); + u_char cmd; + u_char xml; + u_char mode; + u_char fill; #endif -}; +} __attribute__((packed)); struct hdlc_hw { union { diff --git a/drivers/isdn/hisax/hisax_fcpcipnp.h b/drivers/isdn/hisax/hisax_fcpcipnp.h index bd8a22e4d6a2..21fbcedf3a94 100644 --- a/drivers/isdn/hisax/hisax_fcpcipnp.h +++ b/drivers/isdn/hisax/hisax_fcpcipnp.h @@ -12,17 +12,17 @@ enum { struct hdlc_stat_reg { #ifdef __BIG_ENDIAN - u_char fill __attribute__((packed)); - u_char mode __attribute__((packed)); - u_char xml __attribute__((packed)); - u_char cmd __attribute__((packed)); + u_char fill; + u_char mode; + u_char xml; + u_char cmd; #else - u_char cmd __attribute__((packed)); - u_char xml __attribute__((packed)); - u_char mode __attribute__((packed)); - u_char fill __attribute__((packed)); + u_char cmd; + u_char xml; + u_char mode; + u_char fill; #endif -}; +} __attribute__((packed)); struct fritz_bcs { struct hisax_b_if b_if; diff --git a/drivers/net/3c527.h b/drivers/net/3c527.h index c10f009ce9b6..53b5b071df08 100644 --- a/drivers/net/3c527.h +++ b/drivers/net/3c527.h @@ -32,43 +32,43 @@ struct mc32_mailbox { - u16 mbox __attribute((packed)); - u16 data[1] __attribute((packed)); -}; + u16 mbox; + u16 data[1]; +} __attribute((packed)); struct skb_header { - u8 status __attribute((packed)); - u8 control __attribute((packed)); - u16 next __attribute((packed)); /* Do not change! */ - u16 length __attribute((packed)); - u32 data __attribute((packed)); -}; + u8 status; + u8 control; + u16 next; /* Do not change! */ + u16 length; + u32 data; +} __attribute((packed)); struct mc32_stats { /* RX Errors */ - u32 rx_crc_errors __attribute((packed)); - u32 rx_alignment_errors __attribute((packed)); - u32 rx_overrun_errors __attribute((packed)); - u32 rx_tooshort_errors __attribute((packed)); - u32 rx_toolong_errors __attribute((packed)); - u32 rx_outofresource_errors __attribute((packed)); + u32 rx_crc_errors; + u32 rx_alignment_errors; + u32 rx_overrun_errors; + u32 rx_tooshort_errors; + u32 rx_toolong_errors; + u32 rx_outofresource_errors; - u32 rx_discarded __attribute((packed)); /* via card pattern match filter */ + u32 rx_discarded; /* via card pattern match filter */ /* TX Errors */ - u32 tx_max_collisions __attribute((packed)); - u32 tx_carrier_errors __attribute((packed)); - u32 tx_underrun_errors __attribute((packed)); - u32 tx_cts_errors __attribute((packed)); - u32 tx_timeout_errors __attribute((packed)) ; + u32 tx_max_collisions; + u32 tx_carrier_errors; + u32 tx_underrun_errors; + u32 tx_cts_errors; + u32 tx_timeout_errors; /* various cruft */ - u32 dataA[6] __attribute((packed)); - u16 dataB[5] __attribute((packed)); - u32 dataC[14] __attribute((packed)); -}; + u32 dataA[6]; + u16 dataB[5]; + u32 dataC[14]; +} __attribute((packed)); #define STATUS_MASK 0x0F #define COMPLETED (1<<7) diff --git a/drivers/net/irda/vlsi_ir.h b/drivers/net/irda/vlsi_ir.h index 741aecc655df..a82a4ba8de4f 100644 --- a/drivers/net/irda/vlsi_ir.h +++ b/drivers/net/irda/vlsi_ir.h @@ -577,8 +577,8 @@ struct ring_descr_hw { struct { u8 addr_res[3]; volatile u8 status; /* descriptor status */ - } rd_s __attribute__((packed)); - } rd_u __attribute((packed)); + } __attribute__((packed)) rd_s; + } __attribute((packed)) rd_u; } __attribute__ ((packed)); #define rd_addr rd_u.addr diff --git a/drivers/net/wan/sdla.c b/drivers/net/wan/sdla.c index 036adc4f8ba7..22e794071cf4 100644 --- a/drivers/net/wan/sdla.c +++ b/drivers/net/wan/sdla.c @@ -329,9 +329,9 @@ static int sdla_cpuspeed(struct net_device *dev, struct ifreq *ifr) struct _dlci_stat { - short dlci __attribute__((packed)); - char flags __attribute__((packed)); -}; + short dlci; + char flags; +} __attribute__((packed)); struct _frad_stat { diff --git a/include/linux/atalk.h b/include/linux/atalk.h index 911c09cb9bf9..6ba3aa8a81f4 100644 --- a/include/linux/atalk.h +++ b/include/linux/atalk.h @@ -155,15 +155,15 @@ struct elapaarp { #define AARP_REQUEST 1 #define AARP_REPLY 2 #define AARP_PROBE 3 - __u8 hw_src[ETH_ALEN] __attribute__ ((packed)); - __u8 pa_src_zero __attribute__ ((packed)); - __be16 pa_src_net __attribute__ ((packed)); - __u8 pa_src_node __attribute__ ((packed)); - __u8 hw_dst[ETH_ALEN] __attribute__ ((packed)); - __u8 pa_dst_zero __attribute__ ((packed)); - __be16 pa_dst_net __attribute__ ((packed)); - __u8 pa_dst_node __attribute__ ((packed)); -}; + __u8 hw_src[ETH_ALEN]; + __u8 pa_src_zero; + __be16 pa_src_net; + __u8 pa_src_node; + __u8 hw_dst[ETH_ALEN]; + __u8 pa_dst_zero; + __be16 pa_dst_net; + __u8 pa_dst_node; +} __attribute__ ((packed)); static __inline__ struct elapaarp *aarp_hdr(struct sk_buff *skb) { diff --git a/include/linux/cycx_x25.h b/include/linux/cycx_x25.h index b10a7f3a8cac..f7a906583463 100644 --- a/include/linux/cycx_x25.h +++ b/include/linux/cycx_x25.h @@ -38,11 +38,11 @@ extern unsigned int cycx_debug; /* Data Structures */ /* X.25 Command Block. */ struct cycx_x25_cmd { - u16 command PACKED; - u16 link PACKED; /* values: 0 or 1 */ - u16 len PACKED; /* values: 0 thru 0x205 (517) */ - u32 buf PACKED; -}; + u16 command; + u16 link; /* values: 0 or 1 */ + u16 len; /* values: 0 thru 0x205 (517) */ + u32 buf; +} PACKED; /* Defines for the 'command' field. */ #define X25_CONNECT_REQUEST 0x4401 @@ -92,34 +92,34 @@ struct cycx_x25_cmd { * @flags - see dosx25.doc, in portuguese, for details */ struct cycx_x25_config { - u8 link PACKED; - u8 speed PACKED; - u8 clock PACKED; - u8 n2 PACKED; - u8 n2win PACKED; - u8 n3win PACKED; - u8 nvc PACKED; - u8 pktlen PACKED; - u8 locaddr PACKED; - u8 remaddr PACKED; - u16 t1 PACKED; - u16 t2 PACKED; - u8 t21 PACKED; - u8 npvc PACKED; - u8 t23 PACKED; - u8 flags PACKED; -}; + u8 link; + u8 speed; + u8 clock; + u8 n2; + u8 n2win; + u8 n3win; + u8 nvc; + u8 pktlen; + u8 locaddr; + u8 remaddr; + u16 t1; + u16 t2; + u8 t21; + u8 npvc; + u8 t23; + u8 flags; +} PACKED; struct cycx_x25_stats { - u16 rx_crc_errors PACKED; - u16 rx_over_errors PACKED; - u16 n2_tx_frames PACKED; - u16 n2_rx_frames PACKED; - u16 tx_timeouts PACKED; - u16 rx_timeouts PACKED; - u16 n3_tx_packets PACKED; - u16 n3_rx_packets PACKED; - u16 tx_aborts PACKED; - u16 rx_aborts PACKED; -}; + u16 rx_crc_errors; + u16 rx_over_errors; + u16 n2_tx_frames; + u16 n2_rx_frames; + u16 tx_timeouts; + u16 rx_timeouts; + u16 n3_tx_packets; + u16 n3_rx_packets; + u16 tx_aborts; + u16 rx_aborts; +} PACKED; #endif /* _CYCX_X25_H */ diff --git a/include/linux/if_frad.h b/include/linux/if_frad.h index 511999c7eeda..395f0aad9cbf 100644 --- a/include/linux/if_frad.h +++ b/include/linux/if_frad.h @@ -131,17 +131,17 @@ struct frad_conf /* these are the fields of an RFC 1490 header */ struct frhdr { - unsigned char control __attribute__((packed)); + unsigned char control; /* for IP packets, this can be the NLPID */ - unsigned char pad __attribute__((packed)); + unsigned char pad; - unsigned char NLPID __attribute__((packed)); - unsigned char OUI[3] __attribute__((packed)); - unsigned short PID __attribute__((packed)); + unsigned char NLPID; + unsigned char OUI[3]; + unsigned short PID; #define IP_NLPID pad -}; +} __attribute__((packed)); /* see RFC 1490 for the definition of the following */ #define FRAD_I_UI 0x03 diff --git a/include/linux/isdnif.h b/include/linux/isdnif.h index 7a4eacd77cb2..04e10f9f14f8 100644 --- a/include/linux/isdnif.h +++ b/include/linux/isdnif.h @@ -282,43 +282,43 @@ typedef struct setup_parm { typedef struct T30_s { /* session parameters */ - __u8 resolution __attribute__ ((packed)); - __u8 rate __attribute__ ((packed)); - __u8 width __attribute__ ((packed)); - __u8 length __attribute__ ((packed)); - __u8 compression __attribute__ ((packed)); - __u8 ecm __attribute__ ((packed)); - __u8 binary __attribute__ ((packed)); - __u8 scantime __attribute__ ((packed)); - __u8 id[FAXIDLEN] __attribute__ ((packed)); + __u8 resolution; + __u8 rate; + __u8 width; + __u8 length; + __u8 compression; + __u8 ecm; + __u8 binary; + __u8 scantime; + __u8 id[FAXIDLEN]; /* additional parameters */ - __u8 phase __attribute__ ((packed)); - __u8 direction __attribute__ ((packed)); - __u8 code __attribute__ ((packed)); - __u8 badlin __attribute__ ((packed)); - __u8 badmul __attribute__ ((packed)); - __u8 bor __attribute__ ((packed)); - __u8 fet __attribute__ ((packed)); - __u8 pollid[FAXIDLEN] __attribute__ ((packed)); - __u8 cq __attribute__ ((packed)); - __u8 cr __attribute__ ((packed)); - __u8 ctcrty __attribute__ ((packed)); - __u8 minsp __attribute__ ((packed)); - __u8 phcto __attribute__ ((packed)); - __u8 rel __attribute__ ((packed)); - __u8 nbc __attribute__ ((packed)); + __u8 phase; + __u8 direction; + __u8 code; + __u8 badlin; + __u8 badmul; + __u8 bor; + __u8 fet; + __u8 pollid[FAXIDLEN]; + __u8 cq; + __u8 cr; + __u8 ctcrty; + __u8 minsp; + __u8 phcto; + __u8 rel; + __u8 nbc; /* remote station parameters */ - __u8 r_resolution __attribute__ ((packed)); - __u8 r_rate __attribute__ ((packed)); - __u8 r_width __attribute__ ((packed)); - __u8 r_length __attribute__ ((packed)); - __u8 r_compression __attribute__ ((packed)); - __u8 r_ecm __attribute__ ((packed)); - __u8 r_binary __attribute__ ((packed)); - __u8 r_scantime __attribute__ ((packed)); - __u8 r_id[FAXIDLEN] __attribute__ ((packed)); - __u8 r_code __attribute__ ((packed)); -} T30_s; + __u8 r_resolution; + __u8 r_rate; + __u8 r_width; + __u8 r_length; + __u8 r_compression; + __u8 r_ecm; + __u8 r_binary; + __u8 r_scantime; + __u8 r_id[FAXIDLEN]; + __u8 r_code; +} __attribute__((packed)) T30_s; #define ISDN_TTY_FAX_CONN_IN 0 #define ISDN_TTY_FAX_CONN_OUT 1 diff --git a/include/linux/ncp.h b/include/linux/ncp.h index 99f77876b716..99f0adeeb3f3 100644 --- a/include/linux/ncp.h +++ b/include/linux/ncp.h @@ -20,29 +20,29 @@ #define NCP_DEALLOC_SLOT_REQUEST (0x5555) struct ncp_request_header { - __u16 type __attribute__((packed)); - __u8 sequence __attribute__((packed)); - __u8 conn_low __attribute__((packed)); - __u8 task __attribute__((packed)); - __u8 conn_high __attribute__((packed)); - __u8 function __attribute__((packed)); - __u8 data[0] __attribute__((packed)); -}; + __u16 type; + __u8 sequence; + __u8 conn_low; + __u8 task; + __u8 conn_high; + __u8 function; + __u8 data[0]; +} __attribute__((packed)); #define NCP_REPLY (0x3333) #define NCP_WATCHDOG (0x3E3E) #define NCP_POSITIVE_ACK (0x9999) struct ncp_reply_header { - __u16 type __attribute__((packed)); - __u8 sequence __attribute__((packed)); - __u8 conn_low __attribute__((packed)); - __u8 task __attribute__((packed)); - __u8 conn_high __attribute__((packed)); - __u8 completion_code __attribute__((packed)); - __u8 connection_state __attribute__((packed)); - __u8 data[0] __attribute__((packed)); -}; + __u16 type; + __u8 sequence; + __u8 conn_low; + __u8 task; + __u8 conn_high; + __u8 completion_code; + __u8 connection_state; + __u8 data[0]; +} __attribute__((packed)); #define NCP_VOLNAME_LEN (16) #define NCP_NUMBER_OF_VOLUMES (256) @@ -128,37 +128,37 @@ struct nw_nfs_info { }; struct nw_info_struct { - __u32 spaceAlloc __attribute__((packed)); - __le32 attributes __attribute__((packed)); - __u16 flags __attribute__((packed)); - __le32 dataStreamSize __attribute__((packed)); - __le32 totalStreamSize __attribute__((packed)); - __u16 numberOfStreams __attribute__((packed)); - __le16 creationTime __attribute__((packed)); - __le16 creationDate __attribute__((packed)); - __u32 creatorID __attribute__((packed)); - __le16 modifyTime __attribute__((packed)); - __le16 modifyDate __attribute__((packed)); - __u32 modifierID __attribute__((packed)); - __le16 lastAccessDate __attribute__((packed)); - __u16 archiveTime __attribute__((packed)); - __u16 archiveDate __attribute__((packed)); - __u32 archiverID __attribute__((packed)); - __u16 inheritedRightsMask __attribute__((packed)); - __le32 dirEntNum __attribute__((packed)); - __le32 DosDirNum __attribute__((packed)); - __u32 volNumber __attribute__((packed)); - __u32 EADataSize __attribute__((packed)); - __u32 EAKeyCount __attribute__((packed)); - __u32 EAKeySize __attribute__((packed)); - __u32 NSCreator __attribute__((packed)); - __u8 nameLen __attribute__((packed)); - __u8 entryName[256] __attribute__((packed)); + __u32 spaceAlloc; + __le32 attributes; + __u16 flags; + __le32 dataStreamSize; + __le32 totalStreamSize; + __u16 numberOfStreams; + __le16 creationTime; + __le16 creationDate; + __u32 creatorID; + __le16 modifyTime; + __le16 modifyDate; + __u32 modifierID; + __le16 lastAccessDate; + __u16 archiveTime; + __u16 archiveDate; + __u32 archiverID; + __u16 inheritedRightsMask; + __le32 dirEntNum; + __le32 DosDirNum; + __u32 volNumber; + __u32 EADataSize; + __u32 EAKeyCount; + __u32 EAKeySize; + __u32 NSCreator; + __u8 nameLen; + __u8 entryName[256]; /* libncp may depend on there being nothing after entryName */ #ifdef __KERNEL__ struct nw_nfs_info nfs; #endif -}; +} __attribute__((packed)); /* modify mask - use with MODIFY_DOS_INFO structure */ #define DM_ATTRIBUTES (cpu_to_le32(0x02)) @@ -176,26 +176,26 @@ struct nw_info_struct { #define DM_MAXIMUM_SPACE (cpu_to_le32(0x2000)) struct nw_modify_dos_info { - __le32 attributes __attribute__((packed)); - __le16 creationDate __attribute__((packed)); - __le16 creationTime __attribute__((packed)); - __u32 creatorID __attribute__((packed)); - __le16 modifyDate __attribute__((packed)); - __le16 modifyTime __attribute__((packed)); - __u32 modifierID __attribute__((packed)); - __u16 archiveDate __attribute__((packed)); - __u16 archiveTime __attribute__((packed)); - __u32 archiverID __attribute__((packed)); - __le16 lastAccessDate __attribute__((packed)); - __u16 inheritanceGrantMask __attribute__((packed)); - __u16 inheritanceRevokeMask __attribute__((packed)); - __u32 maximumSpace __attribute__((packed)); -}; + __le32 attributes; + __le16 creationDate; + __le16 creationTime; + __u32 creatorID; + __le16 modifyDate; + __le16 modifyTime; + __u32 modifierID; + __u16 archiveDate; + __u16 archiveTime; + __u32 archiverID; + __le16 lastAccessDate; + __u16 inheritanceGrantMask; + __u16 inheritanceRevokeMask; + __u32 maximumSpace; +} __attribute__((packed)); struct nw_search_sequence { - __u8 volNumber __attribute__((packed)); - __u32 dirBase __attribute__((packed)); - __u32 sequence __attribute__((packed)); -}; + __u8 volNumber; + __u32 dirBase; + __u32 sequence; +} __attribute__((packed)); #endif /* _LINUX_NCP_H */ diff --git a/include/linux/sdla.h b/include/linux/sdla.h index 3b6afb8caa42..564acd3a71c1 100644 --- a/include/linux/sdla.h +++ b/include/linux/sdla.h @@ -293,46 +293,46 @@ void sdla(void *cfg_info, char *dev, struct frad_conf *conf, int quiet); #define SDLA_S508_INTEN 0x10 struct sdla_cmd { - char opp_flag __attribute__((packed)); - char cmd __attribute__((packed)); - short length __attribute__((packed)); - char retval __attribute__((packed)); - short dlci __attribute__((packed)); - char flags __attribute__((packed)); - short rxlost_int __attribute__((packed)); - long rxlost_app __attribute__((packed)); - char reserve[2] __attribute__((packed)); - char data[SDLA_MAX_DATA] __attribute__((packed)); /* transfer data buffer */ -}; + char opp_flag; + char cmd; + short length; + char retval; + short dlci; + char flags; + short rxlost_int; + long rxlost_app; + char reserve[2]; + char data[SDLA_MAX_DATA]; /* transfer data buffer */ +} __attribute__((packed)); struct intr_info { - char flags __attribute__((packed)); - short txlen __attribute__((packed)); - char irq __attribute__((packed)); - char flags2 __attribute__((packed)); - short timeout __attribute__((packed)); -}; + char flags; + short txlen; + char irq; + char flags2; + short timeout; +} __attribute__((packed)); /* found in the 508's control window at RXBUF_INFO */ struct buf_info { - unsigned short rse_num __attribute__((packed)); - unsigned long rse_base __attribute__((packed)); - unsigned long rse_next __attribute__((packed)); - unsigned long buf_base __attribute__((packed)); - unsigned short reserved __attribute__((packed)); - unsigned long buf_top __attribute__((packed)); -}; + unsigned short rse_num; + unsigned long rse_base; + unsigned long rse_next; + unsigned long buf_base; + unsigned short reserved; + unsigned long buf_top; +} __attribute__((packed)); /* structure pointed to by rse_base in RXBUF_INFO struct */ struct buf_entry { - char opp_flag __attribute__((packed)); - short length __attribute__((packed)); - short dlci __attribute__((packed)); - char flags __attribute__((packed)); - short timestamp __attribute__((packed)); - short reserved[2] __attribute__((packed)); - long buf_addr __attribute__((packed)); -}; + char opp_flag; + short length; + short dlci; + char flags; + short timestamp; + short reserved[2]; + long buf_addr; +} __attribute__((packed)); #endif diff --git a/include/linux/wavefront.h b/include/linux/wavefront.h index 61bd0fd35240..51ab3c933acd 100644 --- a/include/linux/wavefront.h +++ b/include/linux/wavefront.h @@ -434,22 +434,22 @@ typedef struct wf_multisample { } wavefront_multisample; typedef struct wf_alias { - INT16 OriginalSample __attribute__ ((packed)); - - struct wf_sample_offset sampleStartOffset __attribute__ ((packed)); - struct wf_sample_offset loopStartOffset __attribute__ ((packed)); - struct wf_sample_offset sampleEndOffset __attribute__ ((packed)); - struct wf_sample_offset loopEndOffset __attribute__ ((packed)); - - INT16 FrequencyBias __attribute__ ((packed)); - - UCHAR8 SampleResolution:2 __attribute__ ((packed)); - UCHAR8 Unused1:1 __attribute__ ((packed)); - UCHAR8 Loop:1 __attribute__ ((packed)); - UCHAR8 Bidirectional:1 __attribute__ ((packed)); - UCHAR8 Unused2:1 __attribute__ ((packed)); - UCHAR8 Reverse:1 __attribute__ ((packed)); - UCHAR8 Unused3:1 __attribute__ ((packed)); + INT16 OriginalSample; + + struct wf_sample_offset sampleStartOffset; + struct wf_sample_offset loopStartOffset; + struct wf_sample_offset sampleEndOffset; + struct wf_sample_offset loopEndOffset; + + INT16 FrequencyBias; + + UCHAR8 SampleResolution:2; + UCHAR8 Unused1:1; + UCHAR8 Loop:1; + UCHAR8 Bidirectional:1; + UCHAR8 Unused2:1; + UCHAR8 Reverse:1; + UCHAR8 Unused3:1; /* This structure is meant to be padded only to 16 bits on their original. Of course, whoever wrote their documentation didn't @@ -460,8 +460,8 @@ typedef struct wf_alias { standard 16->32 bit issues. */ - UCHAR8 sixteen_bit_padding __attribute__ ((packed)); -} wavefront_alias; + UCHAR8 sixteen_bit_padding; +} __attribute__((packed)) wavefront_alias; typedef struct wf_drum { UCHAR8 PatchNumber; diff --git a/include/net/dn_dev.h b/include/net/dn_dev.h index 86e8e86e624a..5a86e78081bf 100644 --- a/include/net/dn_dev.h +++ b/include/net/dn_dev.h @@ -88,8 +88,8 @@ struct dn_dev { struct net_device *dev; struct dn_dev_parms parms; char use_long; - struct timer_list timer; - unsigned long t3; + struct timer_list timer; + unsigned long t3; struct neigh_parms *neigh_parms; unsigned char addr[ETH_ALEN]; struct neighbour *router; /* Default router on circuit */ @@ -99,57 +99,57 @@ struct dn_dev { struct dn_short_packet { - unsigned char msgflg __attribute__((packed)); - unsigned short dstnode __attribute__((packed)); - unsigned short srcnode __attribute__((packed)); - unsigned char forward __attribute__((packed)); -}; + unsigned char msgflg; + unsigned short dstnode; + unsigned short srcnode; + unsigned char forward; +} __attribute__((packed)); struct dn_long_packet { - unsigned char msgflg __attribute__((packed)); - unsigned char d_area __attribute__((packed)); - unsigned char d_subarea __attribute__((packed)); - unsigned char d_id[6] __attribute__((packed)); - unsigned char s_area __attribute__((packed)); - unsigned char s_subarea __attribute__((packed)); - unsigned char s_id[6] __attribute__((packed)); - unsigned char nl2 __attribute__((packed)); - unsigned char visit_ct __attribute__((packed)); - unsigned char s_class __attribute__((packed)); - unsigned char pt __attribute__((packed)); -}; + unsigned char msgflg; + unsigned char d_area; + unsigned char d_subarea; + unsigned char d_id[6]; + unsigned char s_area; + unsigned char s_subarea; + unsigned char s_id[6]; + unsigned char nl2; + unsigned char visit_ct; + unsigned char s_class; + unsigned char pt; +} __attribute__((packed)); /*------------------------- DRP - Routing messages ---------------------*/ struct endnode_hello_message { - unsigned char msgflg __attribute__((packed)); - unsigned char tiver[3] __attribute__((packed)); - unsigned char id[6] __attribute__((packed)); - unsigned char iinfo __attribute__((packed)); - unsigned short blksize __attribute__((packed)); - unsigned char area __attribute__((packed)); - unsigned char seed[8] __attribute__((packed)); - unsigned char neighbor[6] __attribute__((packed)); - unsigned short timer __attribute__((packed)); - unsigned char mpd __attribute__((packed)); - unsigned char datalen __attribute__((packed)); - unsigned char data[2] __attribute__((packed)); -}; + unsigned char msgflg; + unsigned char tiver[3]; + unsigned char id[6]; + unsigned char iinfo; + unsigned short blksize; + unsigned char area; + unsigned char seed[8]; + unsigned char neighbor[6]; + unsigned short timer; + unsigned char mpd; + unsigned char datalen; + unsigned char data[2]; +} __attribute__((packed)); struct rtnode_hello_message { - unsigned char msgflg __attribute__((packed)); - unsigned char tiver[3] __attribute__((packed)); - unsigned char id[6] __attribute__((packed)); - unsigned char iinfo __attribute__((packed)); - unsigned short blksize __attribute__((packed)); - unsigned char priority __attribute__((packed)); - unsigned char area __attribute__((packed)); - unsigned short timer __attribute__((packed)); - unsigned char mpd __attribute__((packed)); -}; + unsigned char msgflg; + unsigned char tiver[3]; + unsigned char id[6]; + unsigned char iinfo; + unsigned short blksize; + unsigned char priority; + unsigned char area; + unsigned short timer; + unsigned char mpd; +} __attribute__((packed)); extern void dn_dev_init(void); diff --git a/include/net/dn_nsp.h b/include/net/dn_nsp.h index 1ba03be0af3a..e6182b86262b 100644 --- a/include/net/dn_nsp.h +++ b/include/net/dn_nsp.h @@ -72,78 +72,78 @@ extern struct sk_buff *dn_alloc_send_skb(struct sock *sk, size_t *size, int nobl struct nsp_data_seg_msg { - unsigned char msgflg __attribute__((packed)); - unsigned short dstaddr __attribute__((packed)); - unsigned short srcaddr __attribute__((packed)); -}; + unsigned char msgflg; + unsigned short dstaddr; + unsigned short srcaddr; +} __attribute__((packed)); struct nsp_data_opt_msg { - unsigned short acknum __attribute__((packed)); - unsigned short segnum __attribute__((packed)); - unsigned short lsflgs __attribute__((packed)); -}; + unsigned short acknum; + unsigned short segnum; + unsigned short lsflgs; +} __attribute__((packed)); struct nsp_data_opt_msg1 { - unsigned short acknum __attribute__((packed)); - unsigned short segnum __attribute__((packed)); -}; + unsigned short acknum; + unsigned short segnum; +} __attribute__((packed)); /* Acknowledgment Message (data/other data) */ struct nsp_data_ack_msg { - unsigned char msgflg __attribute__((packed)); - unsigned short dstaddr __attribute__((packed)); - unsigned short srcaddr __attribute__((packed)); - unsigned short acknum __attribute__((packed)); -}; + unsigned char msgflg; + unsigned short dstaddr; + unsigned short srcaddr; + unsigned short acknum; +} __attribute__((packed)); /* Connect Acknowledgment Message */ struct nsp_conn_ack_msg { - unsigned char msgflg __attribute__((packed)); - unsigned short dstaddr __attribute__((packed)); -}; + unsigned char msgflg; + unsigned short dstaddr; +} __attribute__((packed)); /* Connect Initiate/Retransmit Initiate/Connect Confirm */ struct nsp_conn_init_msg { - unsigned char msgflg __attribute__((packed)); + unsigned char msgflg; #define NSP_CI 0x18 /* Connect Initiate */ #define NSP_RCI 0x68 /* Retrans. Conn Init */ - unsigned short dstaddr __attribute__((packed)); - unsigned short srcaddr __attribute__((packed)); - unsigned char services __attribute__((packed)); + unsigned short dstaddr; + unsigned short srcaddr; + unsigned char services; #define NSP_FC_NONE 0x00 /* Flow Control None */ #define NSP_FC_SRC 0x04 /* Seg Req. Count */ #define NSP_FC_SCMC 0x08 /* Sess. Control Mess */ #define NSP_FC_MASK 0x0c /* FC type mask */ - unsigned char info __attribute__((packed)); - unsigned short segsize __attribute__((packed)); -}; + unsigned char info; + unsigned short segsize; +} __attribute__((packed)); /* Disconnect Initiate/Disconnect Confirm */ struct nsp_disconn_init_msg { - unsigned char msgflg __attribute__((packed)); - unsigned short dstaddr __attribute__((packed)); - unsigned short srcaddr __attribute__((packed)); - unsigned short reason __attribute__((packed)); -}; + unsigned char msgflg; + unsigned short dstaddr; + unsigned short srcaddr; + unsigned short reason; +} __attribute__((packed)); struct srcobj_fmt { - char format __attribute__((packed)); - unsigned char task __attribute__((packed)); - unsigned short grpcode __attribute__((packed)); - unsigned short usrcode __attribute__((packed)); - char dlen __attribute__((packed)); -}; + char format; + unsigned char task; + unsigned short grpcode; + unsigned short usrcode; + char dlen; +} __attribute__((packed)); /* * A collection of functions for manipulating the sequence diff --git a/include/sound/wavefront.h b/include/sound/wavefront.h index 9e572aed2435..15d82e594b56 100644 --- a/include/sound/wavefront.h +++ b/include/sound/wavefront.h @@ -454,22 +454,22 @@ typedef struct wf_multisample { } wavefront_multisample; typedef struct wf_alias { - s16 OriginalSample __attribute__ ((packed)); - - struct wf_sample_offset sampleStartOffset __attribute__ ((packed)); - struct wf_sample_offset loopStartOffset __attribute__ ((packed)); - struct wf_sample_offset sampleEndOffset __attribute__ ((packed)); - struct wf_sample_offset loopEndOffset __attribute__ ((packed)); - - s16 FrequencyBias __attribute__ ((packed)); - - u8 SampleResolution:2 __attribute__ ((packed)); - u8 Unused1:1 __attribute__ ((packed)); - u8 Loop:1 __attribute__ ((packed)); - u8 Bidirectional:1 __attribute__ ((packed)); - u8 Unused2:1 __attribute__ ((packed)); - u8 Reverse:1 __attribute__ ((packed)); - u8 Unused3:1 __attribute__ ((packed)); + s16 OriginalSample; + + struct wf_sample_offset sampleStartOffset; + struct wf_sample_offset loopStartOffset; + struct wf_sample_offset sampleEndOffset; + struct wf_sample_offset loopEndOffset; + + s16 FrequencyBias; + + u8 SampleResolution:2; + u8 Unused1:1; + u8 Loop:1; + u8 Bidirectional:1; + u8 Unused2:1; + u8 Reverse:1; + u8 Unused3:1; /* This structure is meant to be padded only to 16 bits on their original. Of course, whoever wrote their documentation didn't @@ -480,8 +480,8 @@ typedef struct wf_alias { standard 16->32 bit issues. */ - u8 sixteen_bit_padding __attribute__ ((packed)); -} wavefront_alias; + u8 sixteen_bit_padding; +} __attribute__((packed)) wavefront_alias; typedef struct wf_drum { u8 PatchNumber; -- cgit v1.2.3-71-gd317 From ef9ceab28203690a42d7d3915ccf6e208f0762bc Mon Sep 17 00:00:00 2001 From: Andrew Morton Date: Sun, 8 Jan 2006 01:05:10 -0800 Subject: [PATCH] remove semicolons from save_flags() Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/interrupt.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/interrupt.h b/include/linux/interrupt.h index 41f150a3d2dd..e50a95fbeb11 100644 --- a/include/linux/interrupt.h +++ b/include/linux/interrupt.h @@ -79,7 +79,7 @@ static inline void __deprecated save_flags(unsigned long *x) { local_save_flags(*x); } -#define save_flags(x) save_flags(&x); +#define save_flags(x) save_flags(&x) static inline void __deprecated restore_flags(unsigned long x) { local_irq_restore(x); -- cgit v1.2.3-71-gd317 From 730745a5c45093982112ddc94cee6a9973455641 Mon Sep 17 00:00:00 2001 From: Benjamin Herrenschmidt Date: Sat, 7 Jan 2006 11:30:44 +1100 Subject: [PATCH] 1/5 powerpc: Rework PowerMac i2c part 1 This is the first part of a rework of the PowerMac i2c code. It completely reworks the "low_i2c" layer. It is now more flexible, supports KeyWest, SMU and PMU i2c busses, and provides functions to match device nodes to i2c busses and adapters. This patch also extends & fix some bugs in the SMU driver related to i2c support and removes the clock spreading hacks from the pmac feature code rather than adapting them to the new API since they'll be replaced by the platform function code completely in patch 3/5 Signed-off-by: Benjamin Herrenschmidt Signed-off-by: Paul Mackerras --- arch/powerpc/platforms/powermac/feature.c | 127 ----- arch/powerpc/platforms/powermac/low_i2c.c | 853 ++++++++++++++++++++++++------ arch/powerpc/platforms/powermac/setup.c | 23 +- arch/powerpc/platforms/powermac/smp.c | 75 +-- drivers/i2c/busses/i2c-pmac-smu.c | 17 +- drivers/macintosh/smu.c | 58 +- drivers/macintosh/via-pmu.c | 264 +-------- include/asm-powerpc/pmac_feature.h | 4 - include/asm-powerpc/pmac_low_i2c.h | 85 ++- include/asm-powerpc/smu.h | 12 +- include/linux/pmu.h | 8 +- 11 files changed, 859 insertions(+), 667 deletions(-) (limited to 'include/linux') diff --git a/arch/powerpc/platforms/powermac/feature.c b/arch/powerpc/platforms/powermac/feature.c index d2915d64d45e..b271b11583ac 100644 --- a/arch/powerpc/platforms/powermac/feature.c +++ b/arch/powerpc/platforms/powermac/feature.c @@ -1677,124 +1677,6 @@ intrepid_shutdown(struct macio_chip *macio, int sleep_mode) } -void pmac_tweak_clock_spreading(int enable) -{ - struct macio_chip *macio = &macio_chips[0]; - - /* Hack for doing clock spreading on some machines PowerBooks and - * iBooks. This implements the "platform-do-clockspreading" OF - * property as decoded manually on various models. For safety, we also - * check the product ID in the device-tree in cases we'll whack the i2c - * chip to make reasonably sure we won't set wrong values in there - * - * Of course, ultimately, we have to implement a real parser for - * the platform-do-* stuff... - */ - - if (macio->type == macio_intrepid) { - struct device_node *clock = - of_find_node_by_path("/uni-n@f8000000/hw-clock"); - if (clock && get_property(clock, "platform-do-clockspreading", - NULL)) { - printk(KERN_INFO "%sabling clock spreading on Intrepid" - " ASIC\n", enable ? "En" : "Dis"); - if (enable) - UN_OUT(UNI_N_CLOCK_SPREADING, 2); - else - UN_OUT(UNI_N_CLOCK_SPREADING, 0); - mdelay(40); - } - of_node_put(clock); - } - - while (machine_is_compatible("PowerBook5,2") || - machine_is_compatible("PowerBook5,3") || - machine_is_compatible("PowerBook6,2") || - machine_is_compatible("PowerBook6,3")) { - struct device_node *ui2c = of_find_node_by_type(NULL, "i2c"); - struct device_node *dt = of_find_node_by_name(NULL, "device-tree"); - u8 buffer[9]; - u32 *productID; - int i, rc, changed = 0; - - if (dt == NULL) - break; - productID = (u32 *)get_property(dt, "pid#", NULL); - if (productID == NULL) - break; - while(ui2c) { - struct device_node *p = of_get_parent(ui2c); - if (p && !strcmp(p->name, "uni-n")) - break; - ui2c = of_find_node_by_type(ui2c, "i2c"); - } - if (ui2c == NULL) - break; - DBG("Trying to bump clock speed for PID: %08x...\n", *productID); - rc = pmac_low_i2c_open(ui2c, 1); - if (rc != 0) - break; - pmac_low_i2c_setmode(ui2c, pmac_low_i2c_mode_combined); - rc = pmac_low_i2c_xfer(ui2c, 0xd2 | pmac_low_i2c_read, 0x80, buffer, 9); - DBG("read result: %d,", rc); - if (rc != 0) { - pmac_low_i2c_close(ui2c); - break; - } - for (i=0; i<9; i++) - DBG(" %02x", buffer[i]); - DBG("\n"); - - switch(*productID) { - case 0x1182: /* AlBook 12" rev 2 */ - case 0x1183: /* iBook G4 12" */ - buffer[0] = (buffer[0] & 0x8f) | 0x70; - buffer[2] = (buffer[2] & 0x7f) | 0x00; - buffer[5] = (buffer[5] & 0x80) | 0x31; - buffer[6] = (buffer[6] & 0x40) | 0xb0; - buffer[7] = (buffer[7] & 0x00) | (enable ? 0xc0 : 0xba); - buffer[8] = (buffer[8] & 0x00) | 0x30; - changed = 1; - break; - case 0x3142: /* AlBook 15" (ATI M10) */ - case 0x3143: /* AlBook 17" (ATI M10) */ - buffer[0] = (buffer[0] & 0xaf) | 0x50; - buffer[2] = (buffer[2] & 0x7f) | 0x00; - buffer[5] = (buffer[5] & 0x80) | 0x31; - buffer[6] = (buffer[6] & 0x40) | 0xb0; - buffer[7] = (buffer[7] & 0x00) | (enable ? 0xd0 : 0xc0); - buffer[8] = (buffer[8] & 0x00) | 0x30; - changed = 1; - break; - default: - DBG("i2c-hwclock: Machine model not handled\n"); - break; - } - if (!changed) { - pmac_low_i2c_close(ui2c); - break; - } - printk(KERN_INFO "%sabling clock spreading on i2c clock chip\n", - enable ? "En" : "Dis"); - - pmac_low_i2c_setmode(ui2c, pmac_low_i2c_mode_stdsub); - rc = pmac_low_i2c_xfer(ui2c, 0xd2 | pmac_low_i2c_write, 0x80, buffer, 9); - DBG("write result: %d,", rc); - pmac_low_i2c_setmode(ui2c, pmac_low_i2c_mode_combined); - rc = pmac_low_i2c_xfer(ui2c, 0xd2 | pmac_low_i2c_read, 0x80, buffer, 9); - DBG("read result: %d,", rc); - if (rc != 0) { - pmac_low_i2c_close(ui2c); - break; - } - for (i=0; i<9; i++) - DBG(" %02x", buffer[i]); - pmac_low_i2c_close(ui2c); - break; - } -} - - static int core99_sleep(void) { @@ -2980,12 +2862,6 @@ set_initial_features(void) MACIO_BIC(HEATHROW_FCR, HRW_SOUND_POWER_N); } - /* Some machine models need the clock chip to be properly setup for - * clock spreading now. This should be a platform function but we - * don't do these at the moment - */ - pmac_tweak_clock_spreading(1); - #endif /* CONFIG_POWER4 */ /* On all machines, switch modem & serial ports off */ @@ -3013,9 +2889,6 @@ pmac_feature_init(void) return; } - /* Setup low-level i2c stuffs */ - pmac_init_low_i2c(); - /* Probe machine type */ if (probe_motherboard()) printk(KERN_WARNING "Unknown PowerMac !\n"); diff --git a/arch/powerpc/platforms/powermac/low_i2c.c b/arch/powerpc/platforms/powermac/low_i2c.c index 606e0ed13731..f31d6a678b9e 100644 --- a/arch/powerpc/platforms/powermac/low_i2c.c +++ b/arch/powerpc/platforms/powermac/low_i2c.c @@ -1,22 +1,34 @@ /* - * arch/ppc/platforms/pmac_low_i2c.c + * arch/powerpc/platforms/powermac/low_i2c.c * - * Copyright (C) 2003 Ben. Herrenschmidt (benh@kernel.crashing.org) + * Copyright (C) 2003-2005 Ben. Herrenschmidt (benh@kernel.crashing.org) * * This program is free software; you can redistribute it and/or * modify it under the terms of the GNU General Public License * as published by the Free Software Foundation; either version * 2 of the License, or (at your option) any later version. * - * This file contains some low-level i2c access routines that - * need to be used by various bits of the PowerMac platform code - * at times where the real asynchronous & interrupt driven driver - * cannot be used. The API borrows some semantics from the darwin - * driver in order to ease the implementation of the platform - * properties parser + * The linux i2c layer isn't completely suitable for our needs for various + * reasons ranging from too late initialisation to semantics not perfectly + * matching some requirements of the apple platform functions etc... + * + * This file thus provides a simple low level unified i2c interface for + * powermac that covers the various types of i2c busses used in Apple machines. + * For now, keywest, PMU and SMU, though we could add Cuda, or other bit + * banging busses found on older chipstes in earlier machines if we ever need + * one of them. + * + * The drivers in this file are synchronous/blocking. In addition, the + * keywest one is fairly slow due to the use of msleep instead of interrupts + * as the interrupt is currently used by i2c-keywest. In the long run, we + * might want to get rid of those high-level interfaces to linux i2c layer + * either completely (converting all drivers) or replacing them all with a + * single stub driver on top of this one. Once done, the interrupt will be + * available for our use. */ #undef DEBUG +#undef DEBUG_LOW #include #include @@ -25,15 +37,16 @@ #include #include #include +#include +#include #include #include #include #include #include +#include #include -#define MAX_LOW_I2C_HOST 4 - #ifdef DEBUG #define DBG(x...) do {\ printk(KERN_DEBUG "low_i2c:" x); \ @@ -42,49 +55,54 @@ #define DBG(x...) #endif -struct low_i2c_host; - -typedef int (*low_i2c_func_t)(struct low_i2c_host *host, u8 addr, u8 sub, u8 *data, int len); - -struct low_i2c_host -{ - struct device_node *np; /* OF device node */ - struct semaphore mutex; /* Access mutex for use by i2c-keywest */ - low_i2c_func_t func; /* Access function */ - unsigned int is_open : 1; /* Poor man's access control */ - int mode; /* Current mode */ - int channel; /* Current channel */ - int num_channels; /* Number of channels */ - void __iomem *base; /* For keywest-i2c, base address */ - int bsteps; /* And register stepping */ - int speed; /* And speed */ -}; - -static struct low_i2c_host low_i2c_hosts[MAX_LOW_I2C_HOST]; +#ifdef DEBUG_LOW +#define DBG_LOW(x...) do {\ + printk(KERN_DEBUG "low_i2c:" x); \ + } while(0) +#else +#define DBG_LOW(x...) +#endif -/* No locking is necessary on allocation, we are running way before - * anything can race with us +/* + * A bus structure. Each bus in the system has such a structure associated. */ -static struct low_i2c_host *find_low_i2c_host(struct device_node *np) +struct pmac_i2c_bus { - int i; + struct list_head link; + struct device_node *controller; + struct device_node *busnode; + int type; + int flags; + struct i2c_adapter *adapter; + void *hostdata; + int channel; /* some hosts have multiple */ + int mode; /* current mode */ + struct semaphore sem; + int opened; + int polled; /* open mode */ + + /* ops */ + int (*open)(struct pmac_i2c_bus *bus); + void (*close)(struct pmac_i2c_bus *bus); + int (*xfer)(struct pmac_i2c_bus *bus, u8 addrdir, int subsize, + u32 subaddr, u8 *data, int len); +}; - for (i = 0; i < MAX_LOW_I2C_HOST; i++) - if (low_i2c_hosts[i].np == np) - return &low_i2c_hosts[i]; - return NULL; -} +static LIST_HEAD(pmac_i2c_busses); /* - * - * i2c-keywest implementation (UniNorth, U2, U3, Keylargo's) - * + * Keywest implementation */ -/* - * Keywest i2c definitions borrowed from drivers/i2c/i2c-keywest.h, - * should be moved somewhere in include/asm-ppc/ - */ +struct pmac_i2c_host_kw +{ + struct semaphore mutex; /* Access mutex for use by + * i2c-keywest */ + void __iomem *base; /* register base address */ + int bsteps; /* register stepping */ + int speed; /* speed */ +}; + /* Register indices */ typedef enum { reg_mode = 0, @@ -153,52 +171,56 @@ static const char *__kw_state_names[] = { "state_dead" }; -static inline u8 __kw_read_reg(struct low_i2c_host *host, reg_t reg) +static inline u8 __kw_read_reg(struct pmac_i2c_bus *bus, reg_t reg) { + struct pmac_i2c_host_kw *host = bus->hostdata; return readb(host->base + (((unsigned int)reg) << host->bsteps)); } -static inline void __kw_write_reg(struct low_i2c_host *host, reg_t reg, u8 val) +static inline void __kw_write_reg(struct pmac_i2c_bus *bus, reg_t reg, u8 val) { + struct pmac_i2c_host_kw *host = bus->hostdata; writeb(val, host->base + (((unsigned)reg) << host->bsteps)); - (void)__kw_read_reg(host, reg_subaddr); + (void)__kw_read_reg(bus, reg_subaddr); } -#define kw_write_reg(reg, val) __kw_write_reg(host, reg, val) -#define kw_read_reg(reg) __kw_read_reg(host, reg) +#define kw_write_reg(reg, val) __kw_write_reg(bus, reg, val) +#define kw_read_reg(reg) __kw_read_reg(bus, reg) - -/* Don't schedule, the g5 fan controller is too - * timing sensitive - */ -static u8 kw_wait_interrupt(struct low_i2c_host* host) +static u8 kw_i2c_wait_interrupt(struct pmac_i2c_bus* bus) { int i, j; u8 isr; - for (i = 0; i < 100000; i++) { + for (i = 0; i < 1000; i++) { isr = kw_read_reg(reg_isr) & KW_I2C_IRQ_MASK; if (isr != 0) return isr; /* This code is used with the timebase frozen, we cannot rely - * on udelay ! For now, just use a bogus loop + * on udelay nor schedule when in polled mode ! + * For now, just use a bogus loop.... */ - for (j = 1; j < 10000; j++) - mb(); + if (bus->polled) { + for (j = 1; j < 1000000; j++) + mb(); + } else + msleep(1); } return isr; } -static int kw_handle_interrupt(struct low_i2c_host *host, int state, int rw, int *rc, u8 **data, int *len, u8 isr) +static int kw_i2c_handle_interrupt(struct pmac_i2c_bus *bus, int state, int rw, + int *rc, u8 **data, int *len, u8 isr) { u8 ack; - DBG("kw_handle_interrupt(%s, isr: %x)\n", __kw_state_names[state], isr); + DBG_LOW("kw_handle_interrupt(%s, isr: %x)\n", + __kw_state_names[state], isr); if (isr == 0) { if (state != state_stop) { - DBG("KW: Timeout !\n"); + DBG_LOW("KW: Timeout !\n"); *rc = -EIO; goto stop; } @@ -220,15 +242,16 @@ static int kw_handle_interrupt(struct low_i2c_host *host, int state, int rw, int *rc = -EIO; goto stop; } - if ((ack & KW_I2C_STAT_LAST_AAK) == 0) { + if ((ack & KW_I2C_STAT_LAST_AAK) == 0) { *rc = -ENODEV; - DBG("KW: NAK on address\n"); + DBG_LOW("KW: NAK on address\n"); return state_stop; } else { if (rw) { state = state_read; if (*len > 1) - kw_write_reg(reg_control, KW_I2C_CTL_AAK); + kw_write_reg(reg_control, + KW_I2C_CTL_AAK); } else { state = state_write; kw_write_reg(reg_data, **data); @@ -250,7 +273,7 @@ static int kw_handle_interrupt(struct low_i2c_host *host, int state, int rw, int } else if (state == state_write) { ack = kw_read_reg(reg_status); if ((ack & KW_I2C_STAT_LAST_AAK) == 0) { - DBG("KW: nack on data write\n"); + DBG_LOW("KW: nack on data write\n"); *rc = -EIO; goto stop; } else if (*len) { @@ -291,35 +314,57 @@ static int kw_handle_interrupt(struct low_i2c_host *host, int state, int rw, int return state_stop; } -static int keywest_low_i2c_func(struct low_i2c_host *host, u8 addr, u8 subaddr, u8 *data, int len) +static int kw_i2c_open(struct pmac_i2c_bus *bus) { + struct pmac_i2c_host_kw *host = bus->hostdata; + down(&host->mutex); + return 0; +} + +static void kw_i2c_close(struct pmac_i2c_bus *bus) +{ + struct pmac_i2c_host_kw *host = bus->hostdata; + up(&host->mutex); +} + +static int kw_i2c_xfer(struct pmac_i2c_bus *bus, u8 addrdir, int subsize, + u32 subaddr, u8 *data, int len) +{ + struct pmac_i2c_host_kw *host = bus->hostdata; u8 mode_reg = host->speed; int state = state_addr; int rc = 0; /* Setup mode & subaddress if any */ - switch(host->mode) { - case pmac_low_i2c_mode_dumb: - printk(KERN_ERR "low_i2c: Dumb mode not supported !\n"); + switch(bus->mode) { + case pmac_i2c_mode_dumb: return -EINVAL; - case pmac_low_i2c_mode_std: + case pmac_i2c_mode_std: mode_reg |= KW_I2C_MODE_STANDARD; + if (subsize != 0) + return -EINVAL; break; - case pmac_low_i2c_mode_stdsub: + case pmac_i2c_mode_stdsub: mode_reg |= KW_I2C_MODE_STANDARDSUB; + if (subsize != 1) + return -EINVAL; break; - case pmac_low_i2c_mode_combined: + case pmac_i2c_mode_combined: mode_reg |= KW_I2C_MODE_COMBINED; + if (subsize != 1) + return -EINVAL; break; } /* Setup channel & clear pending irqs */ kw_write_reg(reg_isr, kw_read_reg(reg_isr)); - kw_write_reg(reg_mode, mode_reg | (host->channel << 4)); + kw_write_reg(reg_mode, mode_reg | (bus->channel << 4)); kw_write_reg(reg_status, 0); - /* Set up address and r/w bit */ - kw_write_reg(reg_addr, addr); + /* Set up address and r/w bit, strip possible stale bus number from + * address top bits + */ + kw_write_reg(reg_addr, addrdir & 0xff); /* Set up the sub address */ if ((mode_reg & KW_I2C_MODE_MODE_MASK) == KW_I2C_MODE_STANDARDSUB @@ -330,27 +375,27 @@ static int keywest_low_i2c_func(struct low_i2c_host *host, u8 addr, u8 subaddr, kw_write_reg(reg_ier, 0 /*KW_I2C_IRQ_MASK*/); kw_write_reg(reg_control, KW_I2C_CTL_XADDR); - /* State machine, to turn into an interrupt handler */ + /* State machine, to turn into an interrupt handler in the future */ while(state != state_idle) { - u8 isr = kw_wait_interrupt(host); - state = kw_handle_interrupt(host, state, addr & 1, &rc, &data, &len, isr); + u8 isr = kw_i2c_wait_interrupt(bus); + state = kw_i2c_handle_interrupt(bus, state, addrdir & 1, &rc, + &data, &len, isr); } return rc; } -static void keywest_low_i2c_add(struct device_node *np) +static struct pmac_i2c_host_kw *__init kw_i2c_host_init(struct device_node *np) { - struct low_i2c_host *host = find_low_i2c_host(NULL); + struct pmac_i2c_host_kw *host; u32 *psteps, *prate, *addrp, steps; - struct device_node *parent; + host = kzalloc(sizeof(struct pmac_i2c_host_kw), GFP_KERNEL); if (host == NULL) { printk(KERN_ERR "low_i2c: Can't allocate host for %s\n", np->full_name); - return; + return NULL; } - memset(host, 0, sizeof(*host)); /* Apple is kind enough to provide a valid AAPL,address property * on all i2c keywest nodes so far ... we would have to fallback @@ -360,18 +405,14 @@ static void keywest_low_i2c_add(struct device_node *np) if (addrp == NULL) { printk(KERN_ERR "low_i2c: Can't find address for %s\n", np->full_name); - return; + kfree(host); + return NULL; } init_MUTEX(&host->mutex); - host->np = of_node_get(np); psteps = (u32 *)get_property(np, "AAPL,address-step", NULL); steps = psteps ? (*psteps) : 0x10; for (host->bsteps = 0; (steps & 0x01) == 0; host->bsteps++) steps >>= 1; - parent = of_get_parent(np); - host->num_channels = 1; - if (parent && parent->name[0] == 'u') - host->num_channels = 2; /* Select interface rate */ host->speed = KW_I2C_MODE_25KHZ; prate = (u32 *)get_property(np, "AAPL,i2c-rate", NULL); @@ -387,148 +428,620 @@ static void keywest_low_i2c_add(struct device_node *np) break; } - printk(KERN_INFO "low_i2c: Bus %s found at 0x%08x, %d channels," - " speed = %d KHz\n", - np->full_name, *addrp, host->num_channels, prate ? *prate : 25); - - host->mode = pmac_low_i2c_mode_std; + printk(KERN_INFO "KeyWest i2c @0x%08x %s\n", *addrp, np->full_name); host->base = ioremap((*addrp), 0x1000); - host->func = keywest_low_i2c_func; + + return host; } + +static void __init kw_i2c_add(struct pmac_i2c_host_kw *host, + struct device_node *controller, + struct device_node *busnode, + int channel) +{ + struct pmac_i2c_bus *bus; + + bus = kzalloc(sizeof(struct pmac_i2c_bus), GFP_KERNEL); + if (bus == NULL) + return; + + bus->controller = of_node_get(controller); + bus->busnode = of_node_get(busnode); + bus->type = pmac_i2c_bus_keywest; + bus->hostdata = host; + bus->channel = channel; + bus->mode = pmac_i2c_mode_std; + bus->open = kw_i2c_open; + bus->close = kw_i2c_close; + bus->xfer = kw_i2c_xfer; + init_MUTEX(&bus->sem); + if (controller == busnode) + bus->flags = pmac_i2c_multibus; + list_add(&bus->link, &pmac_i2c_busses); + + printk(KERN_INFO " channel %d bus %s\n", channel, + (controller == busnode) ? "" : busnode->full_name); +} + +static void __init kw_i2c_probe(void) +{ + struct device_node *np, *child, *parent; + + /* Probe keywest-i2c busses */ + for (np = NULL; + (np = of_find_compatible_node(np, "i2c","keywest-i2c")) != NULL;){ + struct pmac_i2c_host_kw *host; + int multibus, chans, i; + + /* Found one, init a host structure */ + host = kw_i2c_host_init(np); + if (host == NULL) + continue; + + /* Now check if we have a multibus setup (old style) or if we + * have proper bus nodes. Note that the "new" way (proper bus + * nodes) might cause us to not create some busses that are + * kept hidden in the device-tree. In the future, we might + * want to work around that by creating busses without a node + * but not for now + */ + child = of_get_next_child(np, NULL); + multibus = !child || strcmp(child->name, "i2c-bus"); + of_node_put(child); + + /* For a multibus setup, we get the bus count based on the + * parent type + */ + if (multibus) { + parent = of_get_parent(np); + if (parent == NULL) + continue; + chans = parent->name[0] == 'u' ? 2 : 1; + for (i = 0; i < chans; i++) + kw_i2c_add(host, np, np, i); + } else { + for (child = NULL; + (child = of_get_next_child(np, child)) != NULL;) { + u32 *reg = + (u32 *)get_property(child, "reg", NULL); + if (reg == NULL) + continue; + kw_i2c_add(host, np, child, *reg); + } + } + } +} + + /* * * PMU implementation * */ - #ifdef CONFIG_ADB_PMU -static int pmu_low_i2c_func(struct low_i2c_host *host, u8 addr, u8 sub, u8 *data, int len) +/* + * i2c command block to the PMU + */ +struct pmu_i2c_hdr { + u8 bus; + u8 mode; + u8 bus2; + u8 address; + u8 sub_addr; + u8 comb_addr; + u8 count; + u8 data[]; +}; + +static void pmu_i2c_complete(struct adb_request *req) { - // TODO - return -ENODEV; + complete(req->arg); } -static void pmu_low_i2c_add(struct device_node *np) +static int pmu_i2c_xfer(struct pmac_i2c_bus *bus, u8 addrdir, int subsize, + u32 subaddr, u8 *data, int len) { - struct low_i2c_host *host = find_low_i2c_host(NULL); + struct adb_request *req = bus->hostdata; + struct pmu_i2c_hdr *hdr = (struct pmu_i2c_hdr *)&req->data[1]; + struct completion comp; + int read = addrdir & 1; + int retry; + int rc = 0; - if (host == NULL) { - printk(KERN_ERR "low_i2c: Can't allocate host for %s\n", - np->full_name); - return; + /* For now, limit ourselves to 16 bytes transfers */ + if (len > 16) + return -EINVAL; + + init_completion(&comp); + + for (retry = 0; retry < 16; retry++) { + memset(req, 0, sizeof(struct adb_request)); + hdr->bus = bus->channel; + hdr->count = len; + + switch(bus->mode) { + case pmac_i2c_mode_std: + if (subsize != 0) + return -EINVAL; + hdr->address = addrdir; + hdr->mode = PMU_I2C_MODE_SIMPLE; + break; + case pmac_i2c_mode_stdsub: + case pmac_i2c_mode_combined: + if (subsize != 1) + return -EINVAL; + hdr->address = addrdir & 0xfe; + hdr->comb_addr = addrdir; + hdr->sub_addr = subaddr; + if (bus->mode == pmac_i2c_mode_stdsub) + hdr->mode = PMU_I2C_MODE_STDSUB; + else + hdr->mode = PMU_I2C_MODE_COMBINED; + break; + default: + return -EINVAL; + } + + INIT_COMPLETION(comp); + req->data[0] = PMU_I2C_CMD; + req->reply[0] = 0xff; + req->nbytes = sizeof(struct pmu_i2c_hdr) + 1; + req->done = pmu_i2c_complete; + req->arg = ∁ + if (!read) { + memcpy(hdr->data, data, len); + req->nbytes += len; + } + rc = pmu_queue_request(req); + if (rc) + return rc; + wait_for_completion(&comp); + if (req->reply[0] == PMU_I2C_STATUS_OK) + break; + msleep(15); } - memset(host, 0, sizeof(*host)); + if (req->reply[0] != PMU_I2C_STATUS_OK) + return -EIO; - init_MUTEX(&host->mutex); - host->np = of_node_get(np); - host->num_channels = 3; - host->mode = pmac_low_i2c_mode_std; - host->func = pmu_low_i2c_func; + for (retry = 0; retry < 16; retry++) { + memset(req, 0, sizeof(struct adb_request)); + + /* I know that looks like a lot, slow as hell, but darwin + * does it so let's be on the safe side for now + */ + msleep(15); + + hdr->bus = PMU_I2C_BUS_STATUS; + + INIT_COMPLETION(comp); + req->data[0] = PMU_I2C_CMD; + req->reply[0] = 0xff; + req->nbytes = 2; + req->done = pmu_i2c_complete; + req->arg = ∁ + rc = pmu_queue_request(req); + if (rc) + return rc; + wait_for_completion(&comp); + + if (req->reply[0] == PMU_I2C_STATUS_OK && !read) + return 0; + if (req->reply[0] == PMU_I2C_STATUS_DATAREAD && read) { + int rlen = req->reply_len - 1; + + if (rlen != len) { + printk(KERN_WARNING "low_i2c: PMU returned %d" + " bytes, expected %d !\n", rlen, len); + return -EIO; + } + memcpy(data, &req->reply[1], len); + return 0; + } + } + return -EIO; +} + +static void __init pmu_i2c_probe(void) +{ + struct pmac_i2c_bus *bus; + struct device_node *busnode; + int channel, sz; + + if (!pmu_present()) + return; + + /* There might or might not be a "pmu-i2c" node, we use that + * or via-pmu itself, whatever we find. I haven't seen a machine + * with separate bus nodes, so we assume a multibus setup + */ + busnode = of_find_node_by_name(NULL, "pmu-i2c"); + if (busnode == NULL) + busnode = of_find_node_by_name(NULL, "via-pmu"); + if (busnode == NULL) + return; + + printk(KERN_INFO "PMU i2c %s\n", busnode->full_name); + + /* + * We add bus 1 and 2 only for now, bus 0 is "special" + */ + for (channel = 1; channel <= 2; channel++) { + sz = sizeof(struct pmac_i2c_bus) + sizeof(struct adb_request); + bus = kzalloc(sz, GFP_KERNEL); + if (bus == NULL) + return; + + bus->controller = busnode; + bus->busnode = busnode; + bus->type = pmac_i2c_bus_pmu; + bus->channel = channel; + bus->mode = pmac_i2c_mode_std; + bus->hostdata = bus + 1; + bus->xfer = pmu_i2c_xfer; + init_MUTEX(&bus->sem); + bus->flags = pmac_i2c_multibus; + list_add(&bus->link, &pmac_i2c_busses); + + printk(KERN_INFO " channel %d bus \n", channel); + } } #endif /* CONFIG_ADB_PMU */ -void __init pmac_init_low_i2c(void) + +/* + * + * SMU implementation + * + */ + +#ifdef CONFIG_PMAC_SMU + +static void smu_i2c_complete(struct smu_i2c_cmd *cmd, void *misc) { - struct device_node *np; + complete(misc); +} - /* Probe keywest-i2c busses */ - np = of_find_compatible_node(NULL, "i2c", "keywest-i2c"); - while(np) { - keywest_low_i2c_add(np); - np = of_find_compatible_node(np, "i2c", "keywest-i2c"); +static int smu_i2c_xfer(struct pmac_i2c_bus *bus, u8 addrdir, int subsize, + u32 subaddr, u8 *data, int len) +{ + struct smu_i2c_cmd *cmd = bus->hostdata; + struct completion comp; + int read = addrdir & 1; + int rc = 0; + + memset(cmd, 0, sizeof(struct smu_i2c_cmd)); + cmd->info.bus = bus->channel; + cmd->info.devaddr = addrdir; + cmd->info.datalen = len; + + switch(bus->mode) { + case pmac_i2c_mode_std: + if (subsize != 0) + return -EINVAL; + cmd->info.type = SMU_I2C_TRANSFER_SIMPLE; + break; + case pmac_i2c_mode_stdsub: + case pmac_i2c_mode_combined: + if (subsize > 3 || subsize < 1) + return -EINVAL; + cmd->info.sublen = subsize; + /* that's big-endian only but heh ! */ + memcpy(&cmd->info.subaddr, ((char *)&subaddr) + (4 - subsize), + subsize); + if (bus->mode == pmac_i2c_mode_stdsub) + cmd->info.type = SMU_I2C_TRANSFER_STDSUB; + else + cmd->info.type = SMU_I2C_TRANSFER_COMBINED; + break; + default: + return -EINVAL; } + if (!read) + memcpy(cmd->info.data, data, len); + + init_completion(&comp); + cmd->done = smu_i2c_complete; + cmd->misc = ∁ + rc = smu_queue_i2c(cmd); + if (rc < 0) + return rc; + wait_for_completion(&comp); + rc = cmd->status; + + if (read) + memcpy(data, cmd->info.data, len); + return rc < 0 ? rc : 0; +} -#ifdef CONFIG_ADB_PMU - /* Probe PMU busses */ - np = of_find_node_by_name(NULL, "via-pmu"); - if (np) - pmu_low_i2c_add(np); -#endif /* CONFIG_ADB_PMU */ +static void __init smu_i2c_probe(void) +{ + struct device_node *controller, *busnode; + struct pmac_i2c_bus *bus; + u32 *reg; + int sz; + + if (!smu_present()) + return; + + controller = of_find_node_by_name(NULL, "smu_i2c_control"); + if (controller == NULL) + controller = of_find_node_by_name(NULL, "smu"); + if (controller == NULL) + return; + + printk(KERN_INFO "SMU i2c %s\n", controller->full_name); + + /* Look for childs, note that they might not be of the right + * type as older device trees mix i2c busses and other thigns + * at the same level + */ + for (busnode = NULL; + (busnode = of_get_next_child(controller, busnode)) != NULL;) { + if (strcmp(busnode->type, "i2c") && + strcmp(busnode->type, "i2c-bus")) + continue; + reg = (u32 *)get_property(busnode, "reg", NULL); + if (reg == NULL) + continue; + + sz = sizeof(struct pmac_i2c_bus) + sizeof(struct smu_i2c_cmd); + bus = kzalloc(sz, GFP_KERNEL); + if (bus == NULL) + return; + + bus->controller = controller; + bus->busnode = of_node_get(busnode); + bus->type = pmac_i2c_bus_smu; + bus->channel = *reg; + bus->mode = pmac_i2c_mode_std; + bus->hostdata = bus + 1; + bus->xfer = smu_i2c_xfer; + init_MUTEX(&bus->sem); + bus->flags = 0; + list_add(&bus->link, &pmac_i2c_busses); + + printk(KERN_INFO " channel %x bus %s\n", + bus->channel, busnode->full_name); + } +} + +#endif /* CONFIG_PMAC_SMU */ + +/* + * + * Core code + * + */ + + +struct pmac_i2c_bus *pmac_i2c_find_bus(struct device_node *node) +{ + struct device_node *p = of_node_get(node); + struct device_node *prev = NULL; + struct pmac_i2c_bus *bus; + + while(p) { + list_for_each_entry(bus, &pmac_i2c_busses, link) { + if (p == bus->busnode) { + if (prev && bus->flags & pmac_i2c_multibus) { + u32 *reg; + reg = (u32 *)get_property(prev, "reg", + NULL); + if (!reg) + continue; + if (((*reg) >> 8) != bus->channel) + continue; + } + of_node_put(p); + of_node_put(prev); + return bus; + } + } + of_node_put(prev); + prev = p; + p = of_get_parent(p); + } + return NULL; +} +EXPORT_SYMBOL_GPL(pmac_i2c_find_bus); + +u8 pmac_i2c_get_dev_addr(struct device_node *device) +{ + u32 *reg = (u32 *)get_property(device, "reg", NULL); + + if (reg == NULL) + return 0; + + return (*reg) & 0xff; +} +EXPORT_SYMBOL_GPL(pmac_i2c_get_dev_addr); + +struct device_node *pmac_i2c_get_controller(struct pmac_i2c_bus *bus) +{ + return bus->controller; +} +EXPORT_SYMBOL_GPL(pmac_i2c_get_controller); + +struct device_node *pmac_i2c_get_bus_node(struct pmac_i2c_bus *bus) +{ + return bus->busnode; +} +EXPORT_SYMBOL_GPL(pmac_i2c_get_bus_node); + +int pmac_i2c_get_type(struct pmac_i2c_bus *bus) +{ + return bus->type; +} +EXPORT_SYMBOL_GPL(pmac_i2c_get_type); + +int pmac_i2c_get_flags(struct pmac_i2c_bus *bus) +{ + return bus->flags; +} +EXPORT_SYMBOL_GPL(pmac_i2c_get_flags); - /* TODO: Add CUDA support as well */ +void pmac_i2c_attach_adapter(struct pmac_i2c_bus *bus, + struct i2c_adapter *adapter) +{ + WARN_ON(bus->adapter != NULL); + bus->adapter = adapter; } +EXPORT_SYMBOL_GPL(pmac_i2c_attach_adapter); + +void pmac_i2c_detach_adapter(struct pmac_i2c_bus *bus, + struct i2c_adapter *adapter) +{ + WARN_ON(bus->adapter != adapter); + bus->adapter = NULL; +} +EXPORT_SYMBOL_GPL(pmac_i2c_detach_adapter); + +struct i2c_adapter *pmac_i2c_get_adapter(struct pmac_i2c_bus *bus) +{ + return bus->adapter; +} +EXPORT_SYMBOL_GPL(pmac_i2c_get_adapter); + +extern int pmac_i2c_match_adapter(struct device_node *dev, + struct i2c_adapter *adapter) +{ + struct pmac_i2c_bus *bus = pmac_i2c_find_bus(dev); + + if (bus == NULL) + return 0; + return (bus->adapter == adapter); +} +EXPORT_SYMBOL_GPL(pmac_i2c_match_adapter); int pmac_low_i2c_lock(struct device_node *np) { - struct low_i2c_host *host = find_low_i2c_host(np); + struct pmac_i2c_bus *bus, *found = NULL; - if (!host) + list_for_each_entry(bus, &pmac_i2c_busses, link) { + if (np == bus->controller) { + found = bus; + break; + } + } + if (!found) return -ENODEV; - down(&host->mutex); - return 0; + return pmac_i2c_open(bus, 0); } -EXPORT_SYMBOL(pmac_low_i2c_lock); +EXPORT_SYMBOL_GPL(pmac_low_i2c_lock); int pmac_low_i2c_unlock(struct device_node *np) { - struct low_i2c_host *host = find_low_i2c_host(np); + struct pmac_i2c_bus *bus, *found = NULL; - if (!host) + list_for_each_entry(bus, &pmac_i2c_busses, link) { + if (np == bus->controller) { + found = bus; + break; + } + } + if (!found) return -ENODEV; - up(&host->mutex); + pmac_i2c_close(bus); return 0; } -EXPORT_SYMBOL(pmac_low_i2c_unlock); +EXPORT_SYMBOL_GPL(pmac_low_i2c_unlock); -int pmac_low_i2c_open(struct device_node *np, int channel) +int pmac_i2c_open(struct pmac_i2c_bus *bus, int polled) { - struct low_i2c_host *host = find_low_i2c_host(np); + int rc; + + down(&bus->sem); + bus->polled = polled; + bus->opened = 1; + bus->mode = pmac_i2c_mode_std; + if (bus->open && (rc = bus->open(bus)) != 0) { + bus->opened = 0; + up(&bus->sem); + return rc; + } + return 0; +} +EXPORT_SYMBOL_GPL(pmac_i2c_open); - if (!host) - return -ENODEV; +void pmac_i2c_close(struct pmac_i2c_bus *bus) +{ + WARN_ON(!bus->opened); + if (bus->close) + bus->close(bus); + bus->opened = 0; + up(&bus->sem); +} +EXPORT_SYMBOL_GPL(pmac_i2c_close); - if (channel >= host->num_channels) - return -EINVAL; +int pmac_i2c_setmode(struct pmac_i2c_bus *bus, int mode) +{ + WARN_ON(!bus->opened); - down(&host->mutex); - host->is_open = 1; - host->channel = channel; + /* Report me if you see the error below as there might be a new + * "combined4" mode that I need to implement for the SMU bus + */ + if (mode < pmac_i2c_mode_dumb || mode > pmac_i2c_mode_combined) { + printk(KERN_ERR "low_i2c: Invalid mode %d requested on" + " bus %s !\n", mode, bus->busnode->full_name); + return -EINVAL; + } + bus->mode = mode; return 0; } -EXPORT_SYMBOL(pmac_low_i2c_open); +EXPORT_SYMBOL_GPL(pmac_i2c_setmode); -int pmac_low_i2c_close(struct device_node *np) +int pmac_i2c_xfer(struct pmac_i2c_bus *bus, u8 addrdir, int subsize, + u32 subaddr, u8 *data, int len) { - struct low_i2c_host *host = find_low_i2c_host(np); + int rc; - if (!host) - return -ENODEV; + WARN_ON(!bus->opened); - host->is_open = 0; - up(&host->mutex); + DBG("xfer() chan=%d, addrdir=0x%x, mode=%d, subsize=%d, subaddr=0x%x," + " %d bytes, bus %s\n", bus->channel, addrdir, bus->mode, subsize, + subaddr, len, bus->busnode->full_name); - return 0; + rc = bus->xfer(bus, addrdir, subsize, subaddr, data, len); + +#ifdef DEBUG + if (rc) + DBG("xfer error %d\n", rc); +#endif + return rc; } -EXPORT_SYMBOL(pmac_low_i2c_close); +EXPORT_SYMBOL_GPL(pmac_i2c_xfer); -int pmac_low_i2c_setmode(struct device_node *np, int mode) +/* + * Initialize us: probe all i2c busses on the machine and instantiate + * busses. + */ +/* This is non-static as it might be called early by smp code */ +int __init pmac_i2c_init(void) { - struct low_i2c_host *host = find_low_i2c_host(np); + static int i2c_inited; - if (!host) - return -ENODEV; - WARN_ON(!host->is_open); - host->mode = mode; + if (i2c_inited) + return 0; + i2c_inited = 1; - return 0; -} -EXPORT_SYMBOL(pmac_low_i2c_setmode); + /* Probe keywest-i2c busses */ + kw_i2c_probe(); -int pmac_low_i2c_xfer(struct device_node *np, u8 addrdir, u8 subaddr, u8 *data, int len) -{ - struct low_i2c_host *host = find_low_i2c_host(np); +#ifdef CONFIG_ADB_PMU + pmu_i2c_probe(); +#endif - if (!host) - return -ENODEV; - WARN_ON(!host->is_open); +#ifdef CONFIG_PMAC_SMU + smu_i2c_probe(); +#endif - return host->func(host, addrdir, subaddr, data, len); + return 0; } -EXPORT_SYMBOL(pmac_low_i2c_xfer); +arch_initcall(pmac_i2c_init); diff --git a/arch/powerpc/platforms/powermac/setup.c b/arch/powerpc/platforms/powermac/setup.c index e5a5bdbdda7a..dc5cdc1484e8 100644 --- a/arch/powerpc/platforms/powermac/setup.c +++ b/arch/powerpc/platforms/powermac/setup.c @@ -652,27 +652,22 @@ static int __init pmac_declare_of_platform_devices(void) { struct device_node *np, *npp; - np = find_devices("uni-n"); - if (np) { - for (np = np->child; np != NULL; np = np->sibling) - if (strncmp(np->name, "i2c", 3) == 0) { - of_platform_device_create(np, "uni-n-i2c", - NULL); - break; - } - } - np = find_devices("valkyrie"); + np = of_find_node_by_name(NULL, "valkyrie"); if (np) of_platform_device_create(np, "valkyrie", NULL); - np = find_devices("platinum"); + np = of_find_node_by_name(NULL, "platinum"); if (np) of_platform_device_create(np, "platinum", NULL); - - npp = of_find_node_by_name(NULL, "u3"); + npp = of_find_node_by_name(NULL, "uni-n"); + if (npp == NULL) + npp = of_find_node_by_name(NULL, "u3"); + if (npp == NULL) + npp = of_find_node_by_name(NULL, "u4"); if (npp) { for (np = NULL; (np = of_get_next_child(npp, np)) != NULL;) { if (strncmp(np->name, "i2c", 3) == 0) { - of_platform_device_create(np, "u3-i2c", NULL); + of_platform_device_create(np, "uni-n-i2c", + NULL); of_node_put(np); break; } diff --git a/arch/powerpc/platforms/powermac/smp.c b/arch/powerpc/platforms/powermac/smp.c index df01bb8feb16..ab72ba86be1e 100644 --- a/arch/powerpc/platforms/powermac/smp.c +++ b/arch/powerpc/platforms/powermac/smp.c @@ -482,7 +482,7 @@ static void __devinit smp_core99_take_timebase(void) /* * G5s enable/disable the timebase via an i2c-connected clock chip. */ -static struct device_node *pmac_tb_clock_chip_host; +static struct pmac_i2c_bus *pmac_tb_clock_chip_host; static u8 pmac_tb_pulsar_addr; static void smp_core99_cypress_tb_freeze(int freeze) @@ -493,20 +493,20 @@ static void smp_core99_cypress_tb_freeze(int freeze) /* Strangely, the device-tree says address is 0xd2, but darwin * accesses 0xd0 ... */ - pmac_low_i2c_setmode(pmac_tb_clock_chip_host, - pmac_low_i2c_mode_combined); - rc = pmac_low_i2c_xfer(pmac_tb_clock_chip_host, - 0xd0 | pmac_low_i2c_read, - 0x81, &data, 1); + pmac_i2c_setmode(pmac_tb_clock_chip_host, + pmac_i2c_mode_combined); + rc = pmac_i2c_xfer(pmac_tb_clock_chip_host, + 0xd0 | pmac_i2c_read, + 1, 0x81, &data, 1); if (rc != 0) goto bail; data = (data & 0xf3) | (freeze ? 0x00 : 0x0c); - pmac_low_i2c_setmode(pmac_tb_clock_chip_host, pmac_low_i2c_mode_stdsub); - rc = pmac_low_i2c_xfer(pmac_tb_clock_chip_host, - 0xd0 | pmac_low_i2c_write, - 0x81, &data, 1); + pmac_i2c_setmode(pmac_tb_clock_chip_host, pmac_i2c_mode_stdsub); + rc = pmac_i2c_xfer(pmac_tb_clock_chip_host, + 0xd0 | pmac_i2c_write, + 1, 0x81, &data, 1); bail: if (rc != 0) { @@ -522,20 +522,20 @@ static void smp_core99_pulsar_tb_freeze(int freeze) u8 data; int rc; - pmac_low_i2c_setmode(pmac_tb_clock_chip_host, - pmac_low_i2c_mode_combined); - rc = pmac_low_i2c_xfer(pmac_tb_clock_chip_host, - pmac_tb_pulsar_addr | pmac_low_i2c_read, - 0x2e, &data, 1); + pmac_i2c_setmode(pmac_tb_clock_chip_host, + pmac_i2c_mode_combined); + rc = pmac_i2c_xfer(pmac_tb_clock_chip_host, + pmac_tb_pulsar_addr | pmac_i2c_read, + 1, 0x2e, &data, 1); if (rc != 0) goto bail; data = (data & 0x88) | (freeze ? 0x11 : 0x22); - pmac_low_i2c_setmode(pmac_tb_clock_chip_host, pmac_low_i2c_mode_stdsub); - rc = pmac_low_i2c_xfer(pmac_tb_clock_chip_host, - pmac_tb_pulsar_addr | pmac_low_i2c_write, - 0x2e, &data, 1); + pmac_i2c_setmode(pmac_tb_clock_chip_host, pmac_i2c_mode_stdsub); + rc = pmac_i2c_xfer(pmac_tb_clock_chip_host, + pmac_tb_pulsar_addr | pmac_i2c_write, + 1, 0x2e, &data, 1); bail: if (rc != 0) { printk(KERN_ERR "Pulsar Timebase %s rc: %d\n", @@ -560,13 +560,15 @@ static void __init smp_core99_setup_i2c_hwsync(int ncpus) if (!ok) continue; + pmac_tb_clock_chip_host = pmac_i2c_find_bus(cc); + if (pmac_tb_clock_chip_host == NULL) + continue; reg = (u32 *)get_property(cc, "reg", NULL); if (reg == NULL) continue; - switch (*reg) { case 0xd2: - if (device_is_compatible(cc, "pulsar-legacy-slewing")) { + if (device_is_compatible(cc,"pulsar-legacy-slewing")) { pmac_tb_freeze = smp_core99_pulsar_tb_freeze; pmac_tb_pulsar_addr = 0xd2; name = "Pulsar"; @@ -585,30 +587,19 @@ static void __init smp_core99_setup_i2c_hwsync(int ncpus) break; } if (pmac_tb_freeze != NULL) { - struct device_node *p = of_get_parent(cc); - of_node_put(cc); - while(p && strcmp(p->type, "i2c")) { - cc = of_get_parent(p); - of_node_put(p); - p = cc; - } - if (p == NULL) - goto no_i2c_sync; /* Open i2c bus for synchronous access */ - if (pmac_low_i2c_open(p, 0)) { - printk(KERN_ERR "Failed top open i2c bus %s for clock" - " sync, fallback to software sync !\n", - p->full_name); - of_node_put(p); + if (pmac_i2c_open(pmac_tb_clock_chip_host, 1)) { + printk(KERN_ERR "Failed top open i2c bus for clock" + " sync, fallback to software sync !\n"); goto no_i2c_sync; } - pmac_tb_clock_chip_host = p; printk(KERN_INFO "Processor timebase sync using %s i2c clock\n", name); return; } no_i2c_sync: pmac_tb_freeze = NULL; + pmac_tb_clock_chip_host = NULL; } #endif /* CONFIG_PPC64 */ @@ -752,8 +743,18 @@ static int __init smp_core99_probe(void) if (ncpus <= 1) return 1; + /* We need to perform some early initialisations before we can start + * setting up SMP as we are running before initcalls + */ + pmac_i2c_init(); + + /* Setup various bits like timebase sync method, ability to nap, ... */ smp_core99_setup(ncpus); + + /* Install IPIs */ mpic_request_ipis(); + + /* Collect l2cr and l3cr values from CPU 0 */ core99_init_caches(0); return ncpus; @@ -817,7 +818,7 @@ static void __devinit smp_core99_setup_cpu(int cpu_nr) /* Close i2c bus if it was used for tb sync */ if (pmac_tb_clock_chip_host) { - pmac_low_i2c_close(pmac_tb_clock_chip_host); + pmac_i2c_close(pmac_tb_clock_chip_host); pmac_tb_clock_chip_host = NULL; } diff --git a/drivers/i2c/busses/i2c-pmac-smu.c b/drivers/i2c/busses/i2c-pmac-smu.c index bfefe7f7a53d..7d925be3fd4b 100644 --- a/drivers/i2c/busses/i2c-pmac-smu.c +++ b/drivers/i2c/busses/i2c-pmac-smu.c @@ -103,8 +103,8 @@ static s32 smu_smbus_xfer( struct i2c_adapter* adap, cmd.info.subaddr[1] = 0; cmd.info.subaddr[2] = 0; if (!read) { - cmd.info.data[0] = data->byte & 0xff; - cmd.info.data[1] = (data->byte >> 8) & 0xff; + cmd.info.data[0] = data->word & 0xff; + cmd.info.data[1] = (data->word >> 8) & 0xff; } break; /* Note that these are broken vs. the expected smbus API where @@ -116,7 +116,7 @@ static s32 smu_smbus_xfer( struct i2c_adapter* adap, case I2C_SMBUS_BLOCK_DATA: cmd.info.type = SMU_I2C_TRANSFER_STDSUB; cmd.info.datalen = data->block[0] + 1; - if (cmd.info.datalen > 6) + if (cmd.info.datalen > (SMU_I2C_WRITE_MAX + 1)) return -EINVAL; if (!read) memcpy(cmd.info.data, data->block, cmd.info.datalen); @@ -273,7 +273,13 @@ static int dispose_iface(struct device *dev) static int create_iface_of_platform(struct of_device* dev, const struct of_device_id *match) { - return create_iface(dev->node, &dev->dev); + struct device_node *node = dev->node; + + if (device_is_compatible(node, "smu-i2c") || + (node->parent != NULL && + device_is_compatible(node->parent, "smu-i2c-control"))) + return create_iface(node, &dev->dev); + return -ENODEV; } @@ -288,6 +294,9 @@ static struct of_device_id i2c_smu_match[] = { .compatible = "smu-i2c", }, + { + .compatible = "i2c-bus", + }, {}, }; static struct of_platform_driver i2c_smu_of_platform_driver = diff --git a/drivers/macintosh/smu.c b/drivers/macintosh/smu.c index 96226116a646..9ecd76849e35 100644 --- a/drivers/macintosh/smu.c +++ b/drivers/macintosh/smu.c @@ -94,6 +94,8 @@ struct smu_device { static struct smu_device *smu; static DECLARE_MUTEX(smu_part_access); +static void smu_i2c_retry(unsigned long data); + /* * SMU driver low level stuff */ @@ -469,7 +471,6 @@ int __init smu_init (void) smu->of_node = np; smu->db_irq = NO_IRQ; smu->msg_irq = NO_IRQ; - init_timer(&smu->i2c_timer); /* smu_cmdbuf_abs is in the low 2G of RAM, can be converted to a * 32 bits value safely @@ -544,6 +545,10 @@ static int smu_late_init(void) if (!smu) return 0; + init_timer(&smu->i2c_timer); + smu->i2c_timer.function = smu_i2c_retry; + smu->i2c_timer.data = (unsigned long)smu; + /* * Try to request the interrupts */ @@ -570,28 +575,41 @@ static int smu_late_init(void) return 0; } -arch_initcall(smu_late_init); +/* This has to be before arch_initcall as the low i2c stuff relies on the + * above having been done before we reach arch_initcalls + */ +core_initcall(smu_late_init); /* * sysfs visibility */ +static void smu_create_i2c(struct device_node *np) +{ + char name[32]; + u32 *reg = (u32 *)get_property(np, "reg", NULL); + + if (reg != NULL) { + sprintf(name, "smu-i2c-%02x", *reg); + of_platform_device_create(np, name, &smu->of_dev->dev); + } +} + static void smu_expose_childs(void *unused) { - struct device_node *np; + struct device_node *np, *gp; for (np = NULL; (np = of_get_next_child(smu->of_node, np)) != NULL;) { - if (device_is_compatible(np, "smu-i2c")) { - char name[32]; - u32 *reg = (u32 *)get_property(np, "reg", NULL); - - if (reg == NULL) - continue; - sprintf(name, "smu-i2c-%02x", *reg); - of_platform_device_create(np, name, &smu->of_dev->dev); - } + if (device_is_compatible(np, "smu-i2c-control")) { + gp = NULL; + while ((gp = of_get_next_child(np, gp)) != NULL) + if (device_is_compatible(gp, "i2c-bus")) + smu_create_i2c(gp); + } else if (device_is_compatible(np, "smu-i2c")) + smu_create_i2c(np); if (device_is_compatible(np, "smu-sensors")) - of_platform_device_create(np, "smu-sensors", &smu->of_dev->dev); + of_platform_device_create(np, "smu-sensors", + &smu->of_dev->dev); } } @@ -712,13 +730,13 @@ static void smu_i2c_complete_command(struct smu_i2c_cmd *cmd, int fail) static void smu_i2c_retry(unsigned long data) { - struct smu_i2c_cmd *cmd = (struct smu_i2c_cmd *)data; + struct smu_i2c_cmd *cmd = smu->cmd_i2c_cur; DPRINTK("SMU: i2c failure, requeuing...\n"); /* requeue command simply by resetting reply_len */ cmd->pdata[0] = 0xff; - cmd->scmd.reply_len = 0x10; + cmd->scmd.reply_len = sizeof(cmd->pdata); smu_queue_cmd(&cmd->scmd); } @@ -747,10 +765,8 @@ static void smu_i2c_low_completion(struct smu_cmd *scmd, void *misc) */ if (fail && --cmd->retries > 0) { DPRINTK("SMU: i2c failure, starting timer...\n"); - smu->i2c_timer.function = smu_i2c_retry; - smu->i2c_timer.data = (unsigned long)cmd; - smu->i2c_timer.expires = jiffies + msecs_to_jiffies(5); - add_timer(&smu->i2c_timer); + BUG_ON(cmd != smu->cmd_i2c_cur); + mod_timer(&smu->i2c_timer, jiffies + msecs_to_jiffies(5)); return; } @@ -764,7 +780,7 @@ static void smu_i2c_low_completion(struct smu_cmd *scmd, void *misc) /* Ok, initial command complete, now poll status */ scmd->reply_buf = cmd->pdata; - scmd->reply_len = 0x10; + scmd->reply_len = sizeof(cmd->pdata); scmd->data_buf = cmd->pdata; scmd->data_len = 1; cmd->pdata[0] = 0; @@ -786,7 +802,7 @@ int smu_queue_i2c(struct smu_i2c_cmd *cmd) cmd->scmd.done = smu_i2c_low_completion; cmd->scmd.misc = cmd; cmd->scmd.reply_buf = cmd->pdata; - cmd->scmd.reply_len = 0x10; + cmd->scmd.reply_len = sizeof(cmd->pdata); cmd->scmd.data_buf = (u8 *)(char *)&cmd->info; cmd->scmd.status = 1; cmd->stage = 0; diff --git a/drivers/macintosh/via-pmu.c b/drivers/macintosh/via-pmu.c index 3c0552016b91..aa481a88ccab 100644 --- a/drivers/macintosh/via-pmu.c +++ b/drivers/macintosh/via-pmu.c @@ -197,7 +197,6 @@ static int pmu_adb_reset_bus(void); #endif /* CONFIG_ADB */ static int init_pmu(void); -static int pmu_queue_request(struct adb_request *req); static void pmu_start(void); static irqreturn_t via_pmu_interrupt(int irq, void *arg, struct pt_regs *regs); static irqreturn_t gpio1_interrupt(int irq, void *arg, struct pt_regs *regs); @@ -1802,258 +1801,6 @@ pmu_present(void) return via != 0; } -struct pmu_i2c_hdr { - u8 bus; - u8 mode; - u8 bus2; - u8 address; - u8 sub_addr; - u8 comb_addr; - u8 count; -}; - -int -pmu_i2c_combined_read(int bus, int addr, int subaddr, u8* data, int len) -{ - struct adb_request req; - struct pmu_i2c_hdr *hdr = (struct pmu_i2c_hdr *)&req.data[1]; - int retry; - int rc; - - for (retry=0; retry<16; retry++) { - memset(&req, 0, sizeof(req)); - - hdr->bus = bus; - hdr->address = addr & 0xfe; - hdr->mode = PMU_I2C_MODE_COMBINED; - hdr->bus2 = 0; - hdr->sub_addr = subaddr; - hdr->comb_addr = addr | 1; - hdr->count = len; - - req.nbytes = sizeof(struct pmu_i2c_hdr) + 1; - req.reply_expected = 0; - req.reply_len = 0; - req.data[0] = PMU_I2C_CMD; - req.reply[0] = 0xff; - rc = pmu_queue_request(&req); - if (rc) - return rc; - while(!req.complete) - pmu_poll(); - if (req.reply[0] == PMU_I2C_STATUS_OK) - break; - mdelay(15); - } - if (req.reply[0] != PMU_I2C_STATUS_OK) - return -1; - - for (retry=0; retry<16; retry++) { - memset(&req, 0, sizeof(req)); - - mdelay(15); - - hdr->bus = PMU_I2C_BUS_STATUS; - req.reply[0] = 0xff; - - req.nbytes = 2; - req.reply_expected = 0; - req.reply_len = 0; - req.data[0] = PMU_I2C_CMD; - rc = pmu_queue_request(&req); - if (rc) - return rc; - while(!req.complete) - pmu_poll(); - if (req.reply[0] == PMU_I2C_STATUS_DATAREAD) { - memcpy(data, &req.reply[1], req.reply_len - 1); - return req.reply_len - 1; - } - } - return -1; -} - -int -pmu_i2c_stdsub_write(int bus, int addr, int subaddr, u8* data, int len) -{ - struct adb_request req; - struct pmu_i2c_hdr *hdr = (struct pmu_i2c_hdr *)&req.data[1]; - int retry; - int rc; - - for (retry=0; retry<16; retry++) { - memset(&req, 0, sizeof(req)); - - hdr->bus = bus; - hdr->address = addr & 0xfe; - hdr->mode = PMU_I2C_MODE_STDSUB; - hdr->bus2 = 0; - hdr->sub_addr = subaddr; - hdr->comb_addr = addr & 0xfe; - hdr->count = len; - - req.data[0] = PMU_I2C_CMD; - memcpy(&req.data[sizeof(struct pmu_i2c_hdr) + 1], data, len); - req.nbytes = sizeof(struct pmu_i2c_hdr) + len + 1; - req.reply_expected = 0; - req.reply_len = 0; - req.reply[0] = 0xff; - rc = pmu_queue_request(&req); - if (rc) - return rc; - while(!req.complete) - pmu_poll(); - if (req.reply[0] == PMU_I2C_STATUS_OK) - break; - mdelay(15); - } - if (req.reply[0] != PMU_I2C_STATUS_OK) - return -1; - - for (retry=0; retry<16; retry++) { - memset(&req, 0, sizeof(req)); - - mdelay(15); - - hdr->bus = PMU_I2C_BUS_STATUS; - req.reply[0] = 0xff; - - req.nbytes = 2; - req.reply_expected = 0; - req.reply_len = 0; - req.data[0] = PMU_I2C_CMD; - rc = pmu_queue_request(&req); - if (rc) - return rc; - while(!req.complete) - pmu_poll(); - if (req.reply[0] == PMU_I2C_STATUS_OK) - return len; - } - return -1; -} - -int -pmu_i2c_simple_read(int bus, int addr, u8* data, int len) -{ - struct adb_request req; - struct pmu_i2c_hdr *hdr = (struct pmu_i2c_hdr *)&req.data[1]; - int retry; - int rc; - - for (retry=0; retry<16; retry++) { - memset(&req, 0, sizeof(req)); - - hdr->bus = bus; - hdr->address = addr | 1; - hdr->mode = PMU_I2C_MODE_SIMPLE; - hdr->bus2 = 0; - hdr->sub_addr = 0; - hdr->comb_addr = 0; - hdr->count = len; - - req.data[0] = PMU_I2C_CMD; - req.nbytes = sizeof(struct pmu_i2c_hdr) + 1; - req.reply_expected = 0; - req.reply_len = 0; - req.reply[0] = 0xff; - rc = pmu_queue_request(&req); - if (rc) - return rc; - while(!req.complete) - pmu_poll(); - if (req.reply[0] == PMU_I2C_STATUS_OK) - break; - mdelay(15); - } - if (req.reply[0] != PMU_I2C_STATUS_OK) - return -1; - - for (retry=0; retry<16; retry++) { - memset(&req, 0, sizeof(req)); - - mdelay(15); - - hdr->bus = PMU_I2C_BUS_STATUS; - req.reply[0] = 0xff; - - req.nbytes = 2; - req.reply_expected = 0; - req.reply_len = 0; - req.data[0] = PMU_I2C_CMD; - rc = pmu_queue_request(&req); - if (rc) - return rc; - while(!req.complete) - pmu_poll(); - if (req.reply[0] == PMU_I2C_STATUS_DATAREAD) { - memcpy(data, &req.reply[1], req.reply_len - 1); - return req.reply_len - 1; - } - } - return -1; -} - -int -pmu_i2c_simple_write(int bus, int addr, u8* data, int len) -{ - struct adb_request req; - struct pmu_i2c_hdr *hdr = (struct pmu_i2c_hdr *)&req.data[1]; - int retry; - int rc; - - for (retry=0; retry<16; retry++) { - memset(&req, 0, sizeof(req)); - - hdr->bus = bus; - hdr->address = addr & 0xfe; - hdr->mode = PMU_I2C_MODE_SIMPLE; - hdr->bus2 = 0; - hdr->sub_addr = 0; - hdr->comb_addr = 0; - hdr->count = len; - - req.data[0] = PMU_I2C_CMD; - memcpy(&req.data[sizeof(struct pmu_i2c_hdr) + 1], data, len); - req.nbytes = sizeof(struct pmu_i2c_hdr) + len + 1; - req.reply_expected = 0; - req.reply_len = 0; - req.reply[0] = 0xff; - rc = pmu_queue_request(&req); - if (rc) - return rc; - while(!req.complete) - pmu_poll(); - if (req.reply[0] == PMU_I2C_STATUS_OK) - break; - mdelay(15); - } - if (req.reply[0] != PMU_I2C_STATUS_OK) - return -1; - - for (retry=0; retry<16; retry++) { - memset(&req, 0, sizeof(req)); - - mdelay(15); - - hdr->bus = PMU_I2C_BUS_STATUS; - req.reply[0] = 0xff; - - req.nbytes = 2; - req.reply_expected = 0; - req.reply_len = 0; - req.data[0] = PMU_I2C_CMD; - rc = pmu_queue_request(&req); - if (rc) - return rc; - while(!req.complete) - pmu_poll(); - if (req.reply[0] == PMU_I2C_STATUS_OK) - return len; - } - return -1; -} - #ifdef CONFIG_PM static LIST_HEAD(sleep_notifiers); @@ -2358,9 +2105,6 @@ pmac_suspend_devices(void) return -EBUSY; } - /* Disable clock spreading on some machines */ - pmac_tweak_clock_spreading(0); - /* Stop preemption */ preempt_disable(); @@ -2431,9 +2175,6 @@ pmac_wakeup_devices(void) mdelay(10); preempt_enable(); - /* Re-enable clock spreading on some machines */ - pmac_tweak_clock_spreading(1); - /* Resume devices */ device_resume(); @@ -3150,16 +2891,13 @@ static int __init init_pmu_sysfs(void) subsys_initcall(init_pmu_sysfs); EXPORT_SYMBOL(pmu_request); +EXPORT_SYMBOL(pmu_queue_request); EXPORT_SYMBOL(pmu_poll); EXPORT_SYMBOL(pmu_poll_adb); EXPORT_SYMBOL(pmu_wait_complete); EXPORT_SYMBOL(pmu_suspend); EXPORT_SYMBOL(pmu_resume); EXPORT_SYMBOL(pmu_unlock); -EXPORT_SYMBOL(pmu_i2c_combined_read); -EXPORT_SYMBOL(pmu_i2c_stdsub_write); -EXPORT_SYMBOL(pmu_i2c_simple_read); -EXPORT_SYMBOL(pmu_i2c_simple_write); #if defined(CONFIG_PM) && defined(CONFIG_PPC32) EXPORT_SYMBOL(pmu_enable_irled); EXPORT_SYMBOL(pmu_battery_count); diff --git a/include/asm-powerpc/pmac_feature.h b/include/asm-powerpc/pmac_feature.h index f6997ed5179e..e654ad0e5b42 100644 --- a/include/asm-powerpc/pmac_feature.h +++ b/include/asm-powerpc/pmac_feature.h @@ -318,10 +318,6 @@ extern void pmac_register_agp_pm(struct pci_dev *bridge, extern void pmac_suspend_agp_for_card(struct pci_dev *dev); extern void pmac_resume_agp_for_card(struct pci_dev *dev); -/* Used by the via-pmu driver for suspend/resume - */ -extern void pmac_tweak_clock_spreading(int enable); - /* * The part below is for use by macio_asic.c only, do not rely * on the data structures or constants below in a normal driver diff --git a/include/asm-powerpc/pmac_low_i2c.h b/include/asm-powerpc/pmac_low_i2c.h index 3fb8d51540dd..adf4fa956572 100644 --- a/include/asm-powerpc/pmac_low_i2c.h +++ b/include/asm-powerpc/pmac_low_i2c.h @@ -15,30 +15,87 @@ /* i2c mode (based on the platform functions format) */ enum { - pmac_low_i2c_mode_dumb = 1, - pmac_low_i2c_mode_std = 2, - pmac_low_i2c_mode_stdsub = 3, - pmac_low_i2c_mode_combined = 4, + pmac_i2c_mode_dumb = 1, + pmac_i2c_mode_std = 2, + pmac_i2c_mode_stdsub = 3, + pmac_i2c_mode_combined = 4, }; /* RW bit in address */ enum { - pmac_low_i2c_read = 0x01, - pmac_low_i2c_write = 0x00 + pmac_i2c_read = 0x01, + pmac_i2c_write = 0x00 }; +/* i2c bus type */ +enum { + pmac_i2c_bus_keywest = 0, + pmac_i2c_bus_pmu = 1, + pmac_i2c_bus_smu = 2, +}; + +/* i2c bus features */ +enum { + /* can_largesub : supports >1 byte subaddresses (SMU only) */ + pmac_i2c_can_largesub = 0x00000001u, + + /* multibus : device node holds multiple busses, bus number is + * encoded in bits 0xff00 of "reg" of a given device + */ + pmac_i2c_multibus = 0x00000002u, +}; + +/* i2c busses in the system */ +struct pmac_i2c_bus; +struct i2c_adapter; + /* Init, called early during boot */ -extern void pmac_init_low_i2c(void); +extern int pmac_i2c_init(void); + +/* Lookup an i2c bus for a device-node. The node can be either the bus + * node itself or a device below it. In the case of a multibus, the bus + * node itself is the controller node, else, it's a child of the controller + * node + */ +extern struct pmac_i2c_bus *pmac_i2c_find_bus(struct device_node *node); + +/* Get the address for an i2c device. This strips the bus number if + * necessary. The 7 bits address is returned 1 bit right shifted so that the + * direction can be directly ored in + */ +extern u8 pmac_i2c_get_dev_addr(struct device_node *device); + +/* Get infos about a bus */ +extern struct device_node *pmac_i2c_get_controller(struct pmac_i2c_bus *bus); +extern struct device_node *pmac_i2c_get_bus_node(struct pmac_i2c_bus *bus); +extern int pmac_i2c_get_type(struct pmac_i2c_bus *bus); +extern int pmac_i2c_get_flags(struct pmac_i2c_bus *bus); + +/* i2c layer adapter attach/detach */ +extern void pmac_i2c_attach_adapter(struct pmac_i2c_bus *bus, + struct i2c_adapter *adapter); +extern void pmac_i2c_detach_adapter(struct pmac_i2c_bus *bus, + struct i2c_adapter *adapter); +extern struct i2c_adapter *pmac_i2c_get_adapter(struct pmac_i2c_bus *bus); + +/* March a device or bus with an i2c adapter structure, to be used by drivers + * to match device-tree nodes with i2c adapters during adapter discovery + * callbacks + */ +extern int pmac_i2c_match_adapter(struct device_node *dev, + struct i2c_adapter *adapter); + -/* Locking functions exposed to i2c-keywest */ -int pmac_low_i2c_lock(struct device_node *np); -int pmac_low_i2c_unlock(struct device_node *np); +/* (legacy) Locking functions exposed to i2c-keywest */ +extern int pmac_low_i2c_lock(struct device_node *np); +extern int pmac_low_i2c_unlock(struct device_node *np); /* Access functions for platform code */ -int pmac_low_i2c_open(struct device_node *np, int channel); -int pmac_low_i2c_close(struct device_node *np); -int pmac_low_i2c_setmode(struct device_node *np, int mode); -int pmac_low_i2c_xfer(struct device_node *np, u8 addrdir, u8 subaddr, u8 *data, int len); +extern int pmac_i2c_open(struct pmac_i2c_bus *bus, int polled); +extern void pmac_i2c_close(struct pmac_i2c_bus *bus); +extern int pmac_i2c_setmode(struct pmac_i2c_bus *bus, int mode); +extern int pmac_i2c_xfer(struct pmac_i2c_bus *bus, u8 addrdir, int subsize, + u32 subaddr, u8 *data, int len); #endif /* __KERNEL__ */ diff --git a/include/asm-powerpc/smu.h b/include/asm-powerpc/smu.h index 7fae3ce9a8c1..134c2b5be0f2 100644 --- a/include/asm-powerpc/smu.h +++ b/include/asm-powerpc/smu.h @@ -358,6 +358,9 @@ extern unsigned long smu_cmdbuf_abs; * Kenrel asynchronous i2c interface */ +#define SMU_I2C_READ_MAX 0x1d +#define SMU_I2C_WRITE_MAX 0x15 + /* SMU i2c header, exactly matches i2c header on wire */ struct smu_i2c_param { @@ -368,12 +371,9 @@ struct smu_i2c_param u8 subaddr[3]; /* subaddress */ u8 caddr; /* combined address, filled by SMU driver */ u8 datalen; /* length of transfer */ - u8 data[7]; /* data */ + u8 data[SMU_I2C_READ_MAX]; /* data */ }; -#define SMU_I2C_READ_MAX 0x0d -#define SMU_I2C_WRITE_MAX 0x05 - struct smu_i2c_cmd { /* public */ @@ -387,7 +387,7 @@ struct smu_i2c_cmd int read; int stage; int retries; - u8 pdata[0x10]; + u8 pdata[32]; struct list_head link; }; @@ -519,7 +519,7 @@ struct smu_sdbp_cpupiddata { * if not found. The data format is described below */ extern struct smu_sdbp_header *smu_get_sdb_partition(int id, - unsigned int *size); + unsigned int *size); #endif /* __KERNEL__ */ diff --git a/include/linux/pmu.h b/include/linux/pmu.h index 373bd3b9b330..217d3daf7336 100644 --- a/include/linux/pmu.h +++ b/include/linux/pmu.h @@ -140,7 +140,7 @@ extern int find_via_pmu(void); extern int pmu_request(struct adb_request *req, void (*done)(struct adb_request *), int nbytes, ...); - +extern int pmu_queue_request(struct adb_request *req); extern void pmu_poll(void); extern void pmu_poll_adb(void); /* For use by xmon */ extern void pmu_wait_complete(struct adb_request *req); @@ -160,12 +160,6 @@ extern void pmu_unlock(void); extern int pmu_present(void); extern int pmu_get_model(void); -extern int pmu_i2c_combined_read(int bus, int addr, int subaddr, u8* data, int len); -extern int pmu_i2c_stdsub_write(int bus, int addr, int subaddr, u8* data, int len); -extern int pmu_i2c_simple_read(int bus, int addr, u8* data, int len); -extern int pmu_i2c_simple_write(int bus, int addr, u8* data, int len); - - #ifdef CONFIG_PM /* * Stuff for putting the powerbook to sleep and waking it again. -- cgit v1.2.3-71-gd317 From 769db45b73896a88d6b40e3e648dfc50a155ec93 Mon Sep 17 00:00:00 2001 From: Coywolf Qi Hunt Date: Wed, 28 Dec 2005 10:55:49 +0100 Subject: make elv_try_merge() static, kill the dead declaration of elv_try_last_merge(). Signed-off-by: Coywolf Qi Hunt Signed-off-by: Jens Axboe --- block/elevator.c | 3 +-- include/linux/elevator.h | 2 -- 2 files changed, 1 insertion(+), 4 deletions(-) (limited to 'include/linux') diff --git a/block/elevator.c b/block/elevator.c index 39dcccc82ada..99a4d7b2f8ad 100644 --- a/block/elevator.c +++ b/block/elevator.c @@ -64,7 +64,7 @@ inline int elv_rq_merge_ok(struct request *rq, struct bio *bio) } EXPORT_SYMBOL(elv_rq_merge_ok); -inline int elv_try_merge(struct request *__rq, struct bio *bio) +static inline int elv_try_merge(struct request *__rq, struct bio *bio) { int ret = ELEVATOR_NO_MERGE; @@ -80,7 +80,6 @@ inline int elv_try_merge(struct request *__rq, struct bio *bio) return ret; } -EXPORT_SYMBOL(elv_try_merge); static struct elevator_type *elevator_find(const char *name) { diff --git a/include/linux/elevator.h b/include/linux/elevator.h index fb80fa44c4dd..4a6f50e31c73 100644 --- a/include/linux/elevator.h +++ b/include/linux/elevator.h @@ -114,8 +114,6 @@ extern ssize_t elv_iosched_store(request_queue_t *, const char *, size_t); extern int elevator_init(request_queue_t *, char *); extern void elevator_exit(elevator_t *); extern int elv_rq_merge_ok(struct request *, struct bio *); -extern int elv_try_merge(struct request *, struct bio *); -extern int elv_try_last_merge(request_queue_t *, struct bio *); /* * Return values from elevator merger -- cgit v1.2.3-71-gd317 From 356cebea1123804e4aa85b43ab39bbd0ac8e667c Mon Sep 17 00:00:00 2001 From: Jens Axboe Date: Mon, 9 Jan 2006 15:30:20 +0100 Subject: [BLOCK] Kill blk_attempt_remerge() It's a broken interface, it's done way too late. And apparently it triggers slab problems in recent kernels as well (most likely after the generic dispatch code was merged). So kill it, ide-cd is the only user of it. Signed-off-by: Jens Axboe --- block/ll_rw_blk.c | 24 ------------------------ drivers/ide/ide-cd.c | 10 ---------- include/linux/blkdev.h | 1 - 3 files changed, 35 deletions(-) (limited to 'include/linux') diff --git a/block/ll_rw_blk.c b/block/ll_rw_blk.c index c182c9f4b2c4..c44d6fe9f6ce 100644 --- a/block/ll_rw_blk.c +++ b/block/ll_rw_blk.c @@ -2734,30 +2734,6 @@ static inline int attempt_front_merge(request_queue_t *q, struct request *rq) return 0; } -/** - * blk_attempt_remerge - attempt to remerge active head with next request - * @q: The &request_queue_t belonging to the device - * @rq: The head request (usually) - * - * Description: - * For head-active devices, the queue can easily be unplugged so quickly - * that proper merging is not done on the front request. This may hurt - * performance greatly for some devices. The block layer cannot safely - * do merging on that first request for these queues, but the driver can - * call this function and make it happen any way. Only the driver knows - * when it is safe to do so. - **/ -void blk_attempt_remerge(request_queue_t *q, struct request *rq) -{ - unsigned long flags; - - spin_lock_irqsave(q->queue_lock, flags); - attempt_back_merge(q, rq); - spin_unlock_irqrestore(q->queue_lock, flags); -} - -EXPORT_SYMBOL(blk_attempt_remerge); - static void init_request_from_bio(struct request *req, struct bio *bio) { req->flags |= REQ_CMD; diff --git a/drivers/ide/ide-cd.c b/drivers/ide/ide-cd.c index d31117eb95aa..e4d55ad32d2f 100644 --- a/drivers/ide/ide-cd.c +++ b/drivers/ide/ide-cd.c @@ -1332,8 +1332,6 @@ static ide_startstop_t cdrom_start_read (ide_drive_t *drive, unsigned int block) if (cdrom_read_from_buffer(drive)) return ide_stopped; - blk_attempt_remerge(drive->queue, rq); - /* Clear the local sector buffer. */ info->nsectors_buffered = 0; @@ -1874,14 +1872,6 @@ static ide_startstop_t cdrom_start_write(ide_drive_t *drive, struct request *rq) return ide_stopped; } - /* - * for dvd-ram and such media, it's a really big deal to get - * big writes all the time. so scour the queue and attempt to - * remerge requests, often the plugging will not have had time - * to do this properly - */ - blk_attempt_remerge(drive->queue, rq); - info->nsectors_buffered = 0; /* use dma, if possible. we don't need to check more, since we diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h index fb0985377421..96b233991685 100644 --- a/include/linux/blkdev.h +++ b/include/linux/blkdev.h @@ -592,7 +592,6 @@ extern void generic_make_request(struct bio *bio); extern void blk_put_request(struct request *); extern void __blk_put_request(request_queue_t *, struct request *); extern void blk_end_sync_rq(struct request *rq, int error); -extern void blk_attempt_remerge(request_queue_t *, struct request *); extern struct request *blk_get_request(request_queue_t *, int, gfp_t); extern void blk_insert_request(request_queue_t *, struct request *, int, void *); extern void blk_requeue_request(request_queue_t *, struct request *); -- cgit v1.2.3-71-gd317 From ff856bad67cb65cb4dc4ef88b808804fc4265782 Mon Sep 17 00:00:00 2001 From: Jens Axboe Date: Mon, 9 Jan 2006 16:02:34 +0100 Subject: [BLOCK] ll_rw_blk: Enable out-of-order request completions through softirq Request completion can be a quite heavy process, since it needs to iterate through the entire request and complete the bio's it holds. This patch adds blk_complete_request() which moves this processing into a dedicated block softirq. Signed-off-by: Jens Axboe --- block/ll_rw_blk.c | 106 +++++++++++++++++++++++++++++++++++++++++++++- include/linux/blkdev.h | 21 +++++++-- include/linux/interrupt.h | 1 + 3 files changed, 124 insertions(+), 4 deletions(-) (limited to 'include/linux') diff --git a/block/ll_rw_blk.c b/block/ll_rw_blk.c index 91d3b4828c49..8e136450abc2 100644 --- a/block/ll_rw_blk.c +++ b/block/ll_rw_blk.c @@ -27,6 +27,8 @@ #include #include #include +#include +#include /* * for max sense size @@ -62,13 +64,15 @@ static wait_queue_head_t congestion_wqh[2] = { /* * Controlling structure to kblockd */ -static struct workqueue_struct *kblockd_workqueue; +static struct workqueue_struct *kblockd_workqueue; unsigned long blk_max_low_pfn, blk_max_pfn; EXPORT_SYMBOL(blk_max_low_pfn); EXPORT_SYMBOL(blk_max_pfn); +static DEFINE_PER_CPU(struct list_head, blk_cpu_done); + /* Amount of time in which a process may batch requests */ #define BLK_BATCH_TIME (HZ/50UL) @@ -207,6 +211,13 @@ void blk_queue_merge_bvec(request_queue_t *q, merge_bvec_fn *mbfn) EXPORT_SYMBOL(blk_queue_merge_bvec); +void blk_queue_softirq_done(request_queue_t *q, softirq_done_fn *fn) +{ + q->softirq_done_fn = fn; +} + +EXPORT_SYMBOL(blk_queue_softirq_done); + /** * blk_queue_make_request - define an alternate make_request function for a device * @q: the request queue for the device to be affected @@ -270,6 +281,7 @@ EXPORT_SYMBOL(blk_queue_make_request); static inline void rq_init(request_queue_t *q, struct request *rq) { INIT_LIST_HEAD(&rq->queuelist); + INIT_LIST_HEAD(&rq->donelist); rq->errors = 0; rq->rq_status = RQ_ACTIVE; @@ -286,6 +298,7 @@ static inline void rq_init(request_queue_t *q, struct request *rq) rq->sense = NULL; rq->end_io = NULL; rq->end_io_data = NULL; + rq->completion_data = NULL; } /** @@ -3286,6 +3299,87 @@ int end_that_request_chunk(struct request *req, int uptodate, int nr_bytes) EXPORT_SYMBOL(end_that_request_chunk); +/* + * splice the completion data to a local structure and hand off to + * process_completion_queue() to complete the requests + */ +static void blk_done_softirq(struct softirq_action *h) +{ + struct list_head *cpu_list; + LIST_HEAD(local_list); + + local_irq_disable(); + cpu_list = &__get_cpu_var(blk_cpu_done); + list_splice_init(cpu_list, &local_list); + local_irq_enable(); + + while (!list_empty(&local_list)) { + struct request *rq = list_entry(local_list.next, struct request, donelist); + + list_del_init(&rq->donelist); + rq->q->softirq_done_fn(rq); + } +} + +#ifdef CONFIG_HOTPLUG_CPU + +static int blk_cpu_notify(struct notifier_block *self, unsigned long action, + void *hcpu) +{ + /* + * If a CPU goes away, splice its entries to the current CPU + * and trigger a run of the softirq + */ + if (action == CPU_DEAD) { + int cpu = (unsigned long) hcpu; + + local_irq_disable(); + list_splice_init(&per_cpu(blk_cpu_done, cpu), + &__get_cpu_var(blk_cpu_done)); + raise_softirq_irqoff(BLOCK_SOFTIRQ); + local_irq_enable(); + } + + return NOTIFY_OK; +} + + +static struct notifier_block __devinitdata blk_cpu_notifier = { + .notifier_call = blk_cpu_notify, +}; + +#endif /* CONFIG_HOTPLUG_CPU */ + +/** + * blk_complete_request - end I/O on a request + * @req: the request being processed + * + * Description: + * Ends all I/O on a request. It does not handle partial completions, + * unless the driver actually implements this in its completionc callback + * through requeueing. Theh actual completion happens out-of-order, + * through a softirq handler. The user must have registered a completion + * callback through blk_queue_softirq_done(). + **/ + +void blk_complete_request(struct request *req) +{ + struct list_head *cpu_list; + unsigned long flags; + + BUG_ON(!req->q->softirq_done_fn); + + local_irq_save(flags); + + cpu_list = &__get_cpu_var(blk_cpu_done); + list_add_tail(&req->donelist, cpu_list); + raise_softirq_irqoff(BLOCK_SOFTIRQ); + + local_irq_restore(flags); +} + +EXPORT_SYMBOL(blk_complete_request); + /* * queue lock must be held */ @@ -3364,6 +3458,8 @@ EXPORT_SYMBOL(kblockd_flush); int __init blk_dev_init(void) { + int i; + kblockd_workqueue = create_workqueue("kblockd"); if (!kblockd_workqueue) panic("Failed to create kblockd\n"); @@ -3377,6 +3473,14 @@ int __init blk_dev_init(void) iocontext_cachep = kmem_cache_create("blkdev_ioc", sizeof(struct io_context), 0, SLAB_PANIC, NULL, NULL); + for (i = 0; i < NR_CPUS; i++) + INIT_LIST_HEAD(&per_cpu(blk_cpu_done, i)); + + open_softirq(BLOCK_SOFTIRQ, blk_done_softirq, NULL); +#ifdef CONFIG_HOTPLUG_CPU + register_cpu_notifier(&blk_cpu_notifier); +#endif + blk_max_low_pfn = max_low_pfn; blk_max_pfn = max_pfn; diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h index fb0985377421..804cc4ec9533 100644 --- a/include/linux/blkdev.h +++ b/include/linux/blkdev.h @@ -118,9 +118,9 @@ struct request_list { * try to put the fields that are referenced together in the same cacheline */ struct request { - struct list_head queuelist; /* looking for ->queue? you must _not_ - * access it directly, use - * blkdev_dequeue_request! */ + struct list_head queuelist; + struct list_head donelist; + unsigned long flags; /* see REQ_ bits below */ /* Maintain bio traversal state for part by part I/O submission. @@ -141,6 +141,7 @@ struct request { struct bio *biotail; void *elevator_private; + void *completion_data; unsigned short ioprio; @@ -291,6 +292,7 @@ typedef int (merge_bvec_fn) (request_queue_t *, struct bio *, struct bio_vec *); typedef void (activity_fn) (void *data, int rw); typedef int (issue_flush_fn) (request_queue_t *, struct gendisk *, sector_t *); typedef void (prepare_flush_fn) (request_queue_t *, struct request *); +typedef void (softirq_done_fn)(struct request *); enum blk_queue_state { Queue_down, @@ -332,6 +334,7 @@ struct request_queue activity_fn *activity_fn; issue_flush_fn *issue_flush_fn; prepare_flush_fn *prepare_flush_fn; + softirq_done_fn *softirq_done_fn; /* * Dispatch queue sorting @@ -646,6 +649,17 @@ extern int end_that_request_first(struct request *, int, int); extern int end_that_request_chunk(struct request *, int, int); extern void end_that_request_last(struct request *, int); extern void end_request(struct request *req, int uptodate); +extern void blk_complete_request(struct request *); + +static inline int rq_all_done(struct request *rq, unsigned int nr_bytes) +{ + if (blk_fs_request(rq)) + return (nr_bytes >= (rq->hard_nr_sectors << 9)); + else if (blk_pc_request(rq)) + return nr_bytes >= rq->data_len; + + return 0; +} /* * end_that_request_first/chunk() takes an uptodate argument. we account @@ -694,6 +708,7 @@ extern void blk_queue_segment_boundary(request_queue_t *, unsigned long); extern void blk_queue_prep_rq(request_queue_t *, prep_rq_fn *pfn); extern void blk_queue_merge_bvec(request_queue_t *, merge_bvec_fn *); extern void blk_queue_dma_alignment(request_queue_t *, int); +extern void blk_queue_softirq_done(request_queue_t *, softirq_done_fn *); extern struct backing_dev_info *blk_get_backing_dev_info(struct block_device *bdev); extern int blk_queue_ordered(request_queue_t *, unsigned, prepare_flush_fn *); extern void blk_queue_issue_flush_fn(request_queue_t *, issue_flush_fn *); diff --git a/include/linux/interrupt.h b/include/linux/interrupt.h index e50a95fbeb11..f02204706984 100644 --- a/include/linux/interrupt.h +++ b/include/linux/interrupt.h @@ -112,6 +112,7 @@ enum TIMER_SOFTIRQ, NET_TX_SOFTIRQ, NET_RX_SOFTIRQ, + BLOCK_SOFTIRQ, SCSI_SOFTIRQ, TASKLET_SOFTIRQ }; -- cgit v1.2.3-71-gd317 From 1aea6434eebd25e532d2e5ddabf2733af4e1ff0b Mon Sep 17 00:00:00 2001 From: Jens Axboe Date: Mon, 9 Jan 2006 16:03:03 +0100 Subject: [SCSI] Kill the SCSI softirq handling This patch moves the SCSI softirq handling to the block layer version. There should be no functional changes. Signed-off-by: Jens Axboe --- drivers/scsi/scsi.c | 109 ++++------------------------------------------ drivers/scsi/scsi_lib.c | 36 +++++++++++++++ drivers/scsi/scsi_priv.h | 1 + include/linux/interrupt.h | 1 - 4 files changed, 45 insertions(+), 102 deletions(-) (limited to 'include/linux') diff --git a/drivers/scsi/scsi.c b/drivers/scsi/scsi.c index 180676d7115a..ee5f4dfdab14 100644 --- a/drivers/scsi/scsi.c +++ b/drivers/scsi/scsi.c @@ -69,7 +69,6 @@ #include "scsi_logging.h" static void scsi_done(struct scsi_cmnd *cmd); -static int scsi_retry_command(struct scsi_cmnd *cmd); /* * Definitions and constants. @@ -752,7 +751,7 @@ static void scsi_done(struct scsi_cmnd *cmd) * isn't running --- used by scsi_times_out */ void __scsi_done(struct scsi_cmnd *cmd) { - unsigned long flags; + struct request *rq = cmd->request; /* * Set the serial numbers back to zero @@ -763,71 +762,14 @@ void __scsi_done(struct scsi_cmnd *cmd) if (cmd->result) atomic_inc(&cmd->device->ioerr_cnt); + BUG_ON(!rq); + /* - * Next, enqueue the command into the done queue. - * It is a per-CPU queue, so we just disable local interrupts - * and need no spinlock. + * The uptodate/nbytes values don't matter, as we allow partial + * completes and thus will check this in the softirq callback */ - local_irq_save(flags); - list_add_tail(&cmd->eh_entry, &__get_cpu_var(scsi_done_q)); - raise_softirq_irqoff(SCSI_SOFTIRQ); - local_irq_restore(flags); -} - -/** - * scsi_softirq - Perform post-interrupt processing of finished SCSI commands. - * - * This is the consumer of the done queue. - * - * This is called with all interrupts enabled. This should reduce - * interrupt latency, stack depth, and reentrancy of the low-level - * drivers. - */ -static void scsi_softirq(struct softirq_action *h) -{ - int disposition; - LIST_HEAD(local_q); - - local_irq_disable(); - list_splice_init(&__get_cpu_var(scsi_done_q), &local_q); - local_irq_enable(); - - while (!list_empty(&local_q)) { - struct scsi_cmnd *cmd = list_entry(local_q.next, - struct scsi_cmnd, eh_entry); - /* The longest time any command should be outstanding is the - * per command timeout multiplied by the number of retries. - * - * For a typical command, this is 2.5 minutes */ - unsigned long wait_for - = cmd->allowed * cmd->timeout_per_command; - list_del_init(&cmd->eh_entry); - - disposition = scsi_decide_disposition(cmd); - if (disposition != SUCCESS && - time_before(cmd->jiffies_at_alloc + wait_for, jiffies)) { - sdev_printk(KERN_ERR, cmd->device, - "timing out command, waited %lus\n", - wait_for/HZ); - disposition = SUCCESS; - } - - scsi_log_completion(cmd, disposition); - switch (disposition) { - case SUCCESS: - scsi_finish_command(cmd); - break; - case NEEDS_RETRY: - scsi_retry_command(cmd); - break; - case ADD_TO_MLQUEUE: - scsi_queue_insert(cmd, SCSI_MLQUEUE_DEVICE_BUSY); - break; - default: - if (!scsi_eh_scmd_add(cmd, 0)) - scsi_finish_command(cmd); - } - } + rq->completion_data = cmd; + blk_complete_request(rq); } /* @@ -840,7 +782,7 @@ static void scsi_softirq(struct softirq_action *h) * level drivers should not become re-entrant as a result of * this. */ -static int scsi_retry_command(struct scsi_cmnd *cmd) +int scsi_retry_command(struct scsi_cmnd *cmd) { /* * Restore the SCSI command state. @@ -1273,38 +1215,6 @@ int scsi_device_cancel(struct scsi_device *sdev, int recovery) } EXPORT_SYMBOL(scsi_device_cancel); -#ifdef CONFIG_HOTPLUG_CPU -static int scsi_cpu_notify(struct notifier_block *self, - unsigned long action, void *hcpu) -{ - int cpu = (unsigned long)hcpu; - - switch(action) { - case CPU_DEAD: - /* Drain scsi_done_q. */ - local_irq_disable(); - list_splice_init(&per_cpu(scsi_done_q, cpu), - &__get_cpu_var(scsi_done_q)); - raise_softirq_irqoff(SCSI_SOFTIRQ); - local_irq_enable(); - break; - default: - break; - } - return NOTIFY_OK; -} - -static struct notifier_block __devinitdata scsi_cpu_nb = { - .notifier_call = scsi_cpu_notify, -}; - -#define register_scsi_cpu() register_cpu_notifier(&scsi_cpu_nb) -#define unregister_scsi_cpu() unregister_cpu_notifier(&scsi_cpu_nb) -#else -#define register_scsi_cpu() -#define unregister_scsi_cpu() -#endif /* CONFIG_HOTPLUG_CPU */ - MODULE_DESCRIPTION("SCSI core"); MODULE_LICENSE("GPL"); @@ -1338,8 +1248,6 @@ static int __init init_scsi(void) INIT_LIST_HEAD(&per_cpu(scsi_done_q, i)); devfs_mk_dir("scsi"); - open_softirq(SCSI_SOFTIRQ, scsi_softirq, NULL); - register_scsi_cpu(); printk(KERN_NOTICE "SCSI subsystem initialized\n"); return 0; @@ -1367,7 +1275,6 @@ static void __exit exit_scsi(void) devfs_remove("scsi"); scsi_exit_procfs(); scsi_exit_queue(); - unregister_scsi_cpu(); } subsys_initcall(init_scsi); diff --git a/drivers/scsi/scsi_lib.c b/drivers/scsi/scsi_lib.c index ba93d6e66d48..00c9bf383e23 100644 --- a/drivers/scsi/scsi_lib.c +++ b/drivers/scsi/scsi_lib.c @@ -1493,6 +1493,41 @@ static void scsi_kill_request(struct request *req, request_queue_t *q) __scsi_done(cmd); } +static void scsi_softirq_done(struct request *rq) +{ + struct scsi_cmnd *cmd = rq->completion_data; + unsigned long wait_for = cmd->allowed * cmd->timeout_per_command; + int disposition; + + INIT_LIST_HEAD(&cmd->eh_entry); + + disposition = scsi_decide_disposition(cmd); + if (disposition != SUCCESS && + time_before(cmd->jiffies_at_alloc + wait_for, jiffies)) { + sdev_printk(KERN_ERR, cmd->device, + "timing out command, waited %lus\n", + wait_for/HZ); + disposition = SUCCESS; + } + + scsi_log_completion(cmd, disposition); + + switch (disposition) { + case SUCCESS: + scsi_finish_command(cmd); + break; + case NEEDS_RETRY: + scsi_retry_command(cmd); + break; + case ADD_TO_MLQUEUE: + scsi_queue_insert(cmd, SCSI_MLQUEUE_DEVICE_BUSY); + break; + default: + if (!scsi_eh_scmd_add(cmd, 0)) + scsi_finish_command(cmd); + } +} + /* * Function: scsi_request_fn() * @@ -1667,6 +1702,7 @@ struct request_queue *scsi_alloc_queue(struct scsi_device *sdev) blk_queue_bounce_limit(q, scsi_calculate_bounce_limit(shost)); blk_queue_segment_boundary(q, shost->dma_boundary); blk_queue_issue_flush_fn(q, scsi_issue_flush_fn); + blk_queue_softirq_done(q, scsi_softirq_done); if (!shost->use_clustering) clear_bit(QUEUE_FLAG_CLUSTER, &q->queue_flags); diff --git a/drivers/scsi/scsi_priv.h b/drivers/scsi/scsi_priv.h index f04e7e11f57a..14a6198cb8d2 100644 --- a/drivers/scsi/scsi_priv.h +++ b/drivers/scsi/scsi_priv.h @@ -44,6 +44,7 @@ extern void scsi_init_cmd_from_req(struct scsi_cmnd *cmd, struct scsi_request *sreq); extern void __scsi_release_request(struct scsi_request *sreq); extern void __scsi_done(struct scsi_cmnd *cmd); +extern int scsi_retry_command(struct scsi_cmnd *cmd); #ifdef CONFIG_SCSI_LOGGING void scsi_log_send(struct scsi_cmnd *cmd); void scsi_log_completion(struct scsi_cmnd *cmd, int disposition); diff --git a/include/linux/interrupt.h b/include/linux/interrupt.h index f02204706984..2c08fdc2bdf7 100644 --- a/include/linux/interrupt.h +++ b/include/linux/interrupt.h @@ -113,7 +113,6 @@ enum NET_TX_SOFTIRQ, NET_RX_SOFTIRQ, BLOCK_SOFTIRQ, - SCSI_SOFTIRQ, TASKLET_SOFTIRQ }; -- cgit v1.2.3-71-gd317 From 8672d57138b34447719cd7749f3d21070e1175a1 Mon Sep 17 00:00:00 2001 From: Jens Axboe Date: Mon, 9 Jan 2006 16:03:35 +0100 Subject: [IDE] Use the block layer deferred softirq request completion This patch makes IDE use the new blk_complete_request() interface. There's still room for improvement, as __ide_end_request() really could drop the lock after getting HWGROUP->rq (why does it need to hold it in the first place? If ->rq access isn't serialized, we are screwed anyways). Signed-off-by: Jens Axboe --- drivers/ide/ide-io.c | 42 +++++++++++++++++++++++++++++++++++------- drivers/ide/ide-probe.c | 2 ++ include/linux/ide.h | 1 + 3 files changed, 38 insertions(+), 7 deletions(-) (limited to 'include/linux') diff --git a/drivers/ide/ide-io.c b/drivers/ide/ide-io.c index b5dc6df8e67d..dea2d4dcc698 100644 --- a/drivers/ide/ide-io.c +++ b/drivers/ide/ide-io.c @@ -55,9 +55,22 @@ #include #include +void ide_softirq_done(struct request *rq) +{ + request_queue_t *q = rq->q; + + add_disk_randomness(rq->rq_disk); + end_that_request_chunk(rq, rq->errors, rq->data_len); + + spin_lock_irq(q->queue_lock); + end_that_request_last(rq, rq->errors); + spin_unlock_irq(q->queue_lock); +} + int __ide_end_request(ide_drive_t *drive, struct request *rq, int uptodate, int nr_sectors) { + unsigned int nbytes; int ret = 1; BUG_ON(!(rq->flags & REQ_STARTED)); @@ -81,17 +94,28 @@ int __ide_end_request(ide_drive_t *drive, struct request *rq, int uptodate, HWGROUP(drive)->hwif->ide_dma_on(drive); } - if (!end_that_request_first(rq, uptodate, nr_sectors)) { - add_disk_randomness(rq->rq_disk); - - if (blk_rq_tagged(rq)) - blk_queue_end_tag(drive->queue, rq); - + /* + * For partial completions (or non fs/pc requests), use the regular + * direct completion path. + */ + nbytes = nr_sectors << 9; + if (rq_all_done(rq, nbytes)) { + rq->errors = uptodate; + rq->data_len = nbytes; blkdev_dequeue_request(rq); HWGROUP(drive)->rq = NULL; - end_that_request_last(rq, uptodate); + blk_complete_request(rq); ret = 0; + } else { + if (!end_that_request_first(rq, uptodate, nr_sectors)) { + add_disk_randomness(rq->rq_disk); + blkdev_dequeue_request(rq); + HWGROUP(drive)->rq = NULL; + end_that_request_last(rq, uptodate); + ret = 0; + } } + return ret; } EXPORT_SYMBOL(__ide_end_request); @@ -113,6 +137,10 @@ int ide_end_request (ide_drive_t *drive, int uptodate, int nr_sectors) unsigned long flags; int ret = 1; + /* + * room for locking improvements here, the calls below don't + * need the queue lock held at all + */ spin_lock_irqsave(&ide_lock, flags); rq = HWGROUP(drive)->rq; diff --git a/drivers/ide/ide-probe.c b/drivers/ide/ide-probe.c index 02167a5b751d..1ddaa71a8f45 100644 --- a/drivers/ide/ide-probe.c +++ b/drivers/ide/ide-probe.c @@ -1011,6 +1011,8 @@ static int ide_init_queue(ide_drive_t *drive) blk_queue_max_hw_segments(q, max_sg_entries); blk_queue_max_phys_segments(q, max_sg_entries); + blk_queue_softirq_done(q, ide_softirq_done); + /* assign drive queue */ drive->queue = q; diff --git a/include/linux/ide.h b/include/linux/ide.h index 4dd6694963c0..ef8d0cbb832f 100644 --- a/include/linux/ide.h +++ b/include/linux/ide.h @@ -1001,6 +1001,7 @@ extern int noautodma; extern int ide_end_request (ide_drive_t *drive, int uptodate, int nrsecs); extern int __ide_end_request (ide_drive_t *drive, struct request *rq, int uptodate, int nrsecs); +extern void ide_softirq_done(struct request *rq); /* * This is used on exit from the driver to designate the next irq handler -- cgit v1.2.3-71-gd317 From 0d0fbf8152fb3bb4393be11e8df7f70e1fbbd738 Mon Sep 17 00:00:00 2001 From: Arnd Bergmann Date: Mon, 9 Jan 2006 15:24:57 -0200 Subject: V4L (926_2): Moves compat32 functions from fs to v4l subsystem This moves the 32 bit ioctl compatibility handlers for Video4Linux into a new file and adds explicit calls to them to each v4l device driver. Unfortunately, there does not seem to be any code handling the v4l2 ioctls, so quite often the code goes through two separate conversions, first from 32 bit v4l to 64 bit v4l, and from there to 64 bit v4l2. My patch does not change that, so there is still much room for improvement. Also, some drivers have additional ioctl numbers, for which the conversion should be handled internally to that driver. Signed-off-by: Arnd Bergmann Signed-off-by: Mauro Carvalho Chehab --- drivers/media/radio/miropcm20-radio.c | 1 + drivers/media/radio/radio-aimslab.c | 1 + drivers/media/radio/radio-aztech.c | 1 + drivers/media/radio/radio-cadet.c | 1 + drivers/media/radio/radio-gemtek-pci.c | 1 + drivers/media/radio/radio-gemtek.c | 1 + drivers/media/radio/radio-maestro.c | 1 + drivers/media/radio/radio-maxiradio.c | 1 + drivers/media/radio/radio-rtrack2.c | 1 + drivers/media/radio/radio-sf16fmi.c | 1 + drivers/media/radio/radio-sf16fmr2.c | 1 + drivers/media/radio/radio-terratec.c | 1 + drivers/media/radio/radio-trust.c | 1 + drivers/media/radio/radio-typhoon.c | 1 + drivers/media/radio/radio-zoltrix.c | 1 + drivers/media/video/Makefile | 3 +- drivers/media/video/arv.c | 1 + drivers/media/video/bttv-driver.c | 1 + drivers/media/video/bw-qcam.c | 1 + drivers/media/video/c-qcam.c | 1 + drivers/media/video/compat_ioctl32.c | 318 ++++++++++++++++++++++++++++ drivers/media/video/cpia.c | 1 + drivers/media/video/cx88/cx88-video.c | 2 + drivers/media/video/meye.c | 1 + drivers/media/video/pms.c | 1 + drivers/media/video/saa5249.c | 1 + drivers/media/video/saa7134/saa7134-video.c | 2 + drivers/media/video/stradis.c | 1 + drivers/media/video/w9966.c | 1 + drivers/media/video/zoran_driver.c | 1 + drivers/media/video/zr36120.c | 1 + drivers/usb/media/dsbr100.c | 1 + drivers/usb/media/ov511.c | 1 + drivers/usb/media/pwc/pwc-if.c | 1 + drivers/usb/media/se401.c | 1 + drivers/usb/media/stv680.c | 1 + drivers/usb/media/usbvideo.c | 1 + drivers/usb/media/vicam.c | 1 + drivers/usb/media/w9968cf.c | 1 + fs/compat_ioctl.c | 246 --------------------- include/linux/compat_ioctl.h | 26 --- include/linux/videodev2.h | 3 + 42 files changed, 362 insertions(+), 273 deletions(-) create mode 100644 drivers/media/video/compat_ioctl32.c (limited to 'include/linux') diff --git a/drivers/media/radio/miropcm20-radio.c b/drivers/media/radio/miropcm20-radio.c index c2ebe8754a95..dc292da2605f 100644 --- a/drivers/media/radio/miropcm20-radio.c +++ b/drivers/media/radio/miropcm20-radio.c @@ -220,6 +220,7 @@ static struct file_operations pcm20_fops = { .open = video_exclusive_open, .release = video_exclusive_release, .ioctl = pcm20_ioctl, + .compat_ioctl = v4l_compat_ioctl32, .llseek = no_llseek, }; diff --git a/drivers/media/radio/radio-aimslab.c b/drivers/media/radio/radio-aimslab.c index 877c770558e9..914deab4e044 100644 --- a/drivers/media/radio/radio-aimslab.c +++ b/drivers/media/radio/radio-aimslab.c @@ -299,6 +299,7 @@ static struct file_operations rtrack_fops = { .open = video_exclusive_open, .release = video_exclusive_release, .ioctl = rt_ioctl, + .compat_ioctl = v4l_compat_ioctl32, .llseek = no_llseek, }; diff --git a/drivers/media/radio/radio-aztech.c b/drivers/media/radio/radio-aztech.c index 5319a9c9a979..523be820f9c6 100644 --- a/drivers/media/radio/radio-aztech.c +++ b/drivers/media/radio/radio-aztech.c @@ -256,6 +256,7 @@ static struct file_operations aztech_fops = { .open = video_exclusive_open, .release = video_exclusive_release, .ioctl = az_ioctl, + .compat_ioctl = v4l_compat_ioctl32, .llseek = no_llseek, }; diff --git a/drivers/media/radio/radio-cadet.c b/drivers/media/radio/radio-cadet.c index 9b0406318f2d..f1b5ac81e9d2 100644 --- a/drivers/media/radio/radio-cadet.c +++ b/drivers/media/radio/radio-cadet.c @@ -490,6 +490,7 @@ static struct file_operations cadet_fops = { .release = cadet_release, .read = cadet_read, .ioctl = cadet_ioctl, + .compat_ioctl = v4l_compat_ioctl32, .llseek = no_llseek, }; diff --git a/drivers/media/radio/radio-gemtek-pci.c b/drivers/media/radio/radio-gemtek-pci.c index 630cc786d0a4..42c8fce04aa2 100644 --- a/drivers/media/radio/radio-gemtek-pci.c +++ b/drivers/media/radio/radio-gemtek-pci.c @@ -301,6 +301,7 @@ static struct file_operations gemtek_pci_fops = { .open = video_exclusive_open, .release = video_exclusive_release, .ioctl = gemtek_pci_ioctl, + .compat_ioctl = v4l_compat_ioctl32, .llseek = no_llseek, }; diff --git a/drivers/media/radio/radio-gemtek.c b/drivers/media/radio/radio-gemtek.c index 6418f03b9ce4..47173be97b9f 100644 --- a/drivers/media/radio/radio-gemtek.c +++ b/drivers/media/radio/radio-gemtek.c @@ -233,6 +233,7 @@ static struct file_operations gemtek_fops = { .open = video_exclusive_open, .release = video_exclusive_release, .ioctl = gemtek_ioctl, + .compat_ioctl = v4l_compat_ioctl32, .llseek = no_llseek, }; diff --git a/drivers/media/radio/radio-maestro.c b/drivers/media/radio/radio-maestro.c index e5e2021a7312..c30effdf711f 100644 --- a/drivers/media/radio/radio-maestro.c +++ b/drivers/media/radio/radio-maestro.c @@ -72,6 +72,7 @@ static struct file_operations maestro_fops = { .open = video_exclusive_open, .release = video_exclusive_release, .ioctl = radio_ioctl, + .compat_ioctl = v4l_compat_ioctl32, .llseek = no_llseek, }; diff --git a/drivers/media/radio/radio-maxiradio.c b/drivers/media/radio/radio-maxiradio.c index 02d39a50d5ed..30869308332a 100644 --- a/drivers/media/radio/radio-maxiradio.c +++ b/drivers/media/radio/radio-maxiradio.c @@ -80,6 +80,7 @@ static struct file_operations maxiradio_fops = { .open = video_exclusive_open, .release = video_exclusive_release, .ioctl = radio_ioctl, + .compat_ioctl = v4l_compat_ioctl32, .llseek = no_llseek, }; static struct video_device maxiradio_radio = diff --git a/drivers/media/radio/radio-rtrack2.c b/drivers/media/radio/radio-rtrack2.c index b2256d675b44..28a47c9e7a81 100644 --- a/drivers/media/radio/radio-rtrack2.c +++ b/drivers/media/radio/radio-rtrack2.c @@ -199,6 +199,7 @@ static struct file_operations rtrack2_fops = { .open = video_exclusive_open, .release = video_exclusive_release, .ioctl = rt_ioctl, + .compat_ioctl = v4l_compat_ioctl32, .llseek = no_llseek, }; diff --git a/drivers/media/radio/radio-sf16fmi.c b/drivers/media/radio/radio-sf16fmi.c index 6f03ce4dd7b0..0229f792a059 100644 --- a/drivers/media/radio/radio-sf16fmi.c +++ b/drivers/media/radio/radio-sf16fmi.c @@ -225,6 +225,7 @@ static struct file_operations fmi_fops = { .open = video_exclusive_open, .release = video_exclusive_release, .ioctl = fmi_ioctl, + .compat_ioctl = v4l_compat_ioctl32, .llseek = no_llseek, }; diff --git a/drivers/media/radio/radio-sf16fmr2.c b/drivers/media/radio/radio-sf16fmr2.c index 71971e9bb342..26632cead09a 100644 --- a/drivers/media/radio/radio-sf16fmr2.c +++ b/drivers/media/radio/radio-sf16fmr2.c @@ -356,6 +356,7 @@ static struct file_operations fmr2_fops = { .open = video_exclusive_open, .release = video_exclusive_release, .ioctl = fmr2_ioctl, + .compat_ioctl = v4l_compat_ioctl32, .llseek = no_llseek, }; diff --git a/drivers/media/radio/radio-terratec.c b/drivers/media/radio/radio-terratec.c index b03573c6840e..fcfde2e4f195 100644 --- a/drivers/media/radio/radio-terratec.c +++ b/drivers/media/radio/radio-terratec.c @@ -276,6 +276,7 @@ static struct file_operations terratec_fops = { .open = video_exclusive_open, .release = video_exclusive_release, .ioctl = tt_ioctl, + .compat_ioctl = v4l_compat_ioctl32, .llseek = no_llseek, }; diff --git a/drivers/media/radio/radio-trust.c b/drivers/media/radio/radio-trust.c index b300bedf7c74..5a099a50d4d0 100644 --- a/drivers/media/radio/radio-trust.c +++ b/drivers/media/radio/radio-trust.c @@ -255,6 +255,7 @@ static struct file_operations trust_fops = { .open = video_exclusive_open, .release = video_exclusive_release, .ioctl = tr_ioctl, + .compat_ioctl = v4l_compat_ioctl32, .llseek = no_llseek, }; diff --git a/drivers/media/radio/radio-typhoon.c b/drivers/media/radio/radio-typhoon.c index f304f3c14763..8ac9a8ef9094 100644 --- a/drivers/media/radio/radio-typhoon.c +++ b/drivers/media/radio/radio-typhoon.c @@ -261,6 +261,7 @@ static struct file_operations typhoon_fops = { .open = video_exclusive_open, .release = video_exclusive_release, .ioctl = typhoon_ioctl, + .compat_ioctl = v4l_compat_ioctl32, .llseek = no_llseek, }; diff --git a/drivers/media/radio/radio-zoltrix.c b/drivers/media/radio/radio-zoltrix.c index 4c6d6fb49034..d590e80c922e 100644 --- a/drivers/media/radio/radio-zoltrix.c +++ b/drivers/media/radio/radio-zoltrix.c @@ -313,6 +313,7 @@ static struct file_operations zoltrix_fops = .open = video_exclusive_open, .release = video_exclusive_release, .ioctl = zol_ioctl, + .compat_ioctl = v4l_compat_ioctl32, .llseek = no_llseek, }; diff --git a/drivers/media/video/Makefile b/drivers/media/video/Makefile index 82060f9909d8..618a08ab940a 100644 --- a/drivers/media/video/Makefile +++ b/drivers/media/video/Makefile @@ -8,7 +8,8 @@ zoran-objs := zr36120.o zr36120_i2c.o zr36120_mem.o zr36067-objs := zoran_procfs.o zoran_device.o \ zoran_driver.o zoran_card.o tuner-objs := tuner-core.o tuner-simple.o mt20xx.o tda8290.o tea5767.o -obj-$(CONFIG_VIDEO_DEV) += videodev.o v4l2-common.o v4l1-compat.o + +obj-$(CONFIG_VIDEO_DEV) += videodev.o v4l2-common.o v4l1-compat.o compat_ioctl32.o obj-$(CONFIG_VIDEO_BT848) += bttv.o msp3400.o tvaudio.o \ tda7432.o tda9875.o ir-kbd-i2c.o ir-kbd-gpio.o diff --git a/drivers/media/video/arv.c b/drivers/media/video/arv.c index 881cdcb1875d..7d5a068353f2 100644 --- a/drivers/media/video/arv.c +++ b/drivers/media/video/arv.c @@ -749,6 +749,7 @@ static struct file_operations ar_fops = { .release = video_exclusive_release, .read = ar_read, .ioctl = ar_ioctl, + .compat_ioctl = v4l_compat_ioctl32, .llseek = no_llseek, }; diff --git a/drivers/media/video/bttv-driver.c b/drivers/media/video/bttv-driver.c index 1ddf9ba613ef..03f925724ce9 100644 --- a/drivers/media/video/bttv-driver.c +++ b/drivers/media/video/bttv-driver.c @@ -3120,6 +3120,7 @@ static struct file_operations bttv_fops = .open = bttv_open, .release = bttv_release, .ioctl = bttv_ioctl, + .compat_ioctl = v4l_compat_ioctl32, .llseek = no_llseek, .read = bttv_read, .mmap = bttv_mmap, diff --git a/drivers/media/video/bw-qcam.c b/drivers/media/video/bw-qcam.c index 0065d0c240d1..6bad93ef969f 100644 --- a/drivers/media/video/bw-qcam.c +++ b/drivers/media/video/bw-qcam.c @@ -875,6 +875,7 @@ static struct file_operations qcam_fops = { .open = video_exclusive_open, .release = video_exclusive_release, .ioctl = qcam_ioctl, + .compat_ioctl = v4l_compat_ioctl32, .read = qcam_read, .llseek = no_llseek, }; diff --git a/drivers/media/video/c-qcam.c b/drivers/media/video/c-qcam.c index 75442ec49f35..9976db4f6da8 100644 --- a/drivers/media/video/c-qcam.c +++ b/drivers/media/video/c-qcam.c @@ -687,6 +687,7 @@ static struct file_operations qcam_fops = { .open = video_exclusive_open, .release = video_exclusive_release, .ioctl = qcam_ioctl, + .compat_ioctl = v4l_compat_ioctl32, .read = qcam_read, .llseek = no_llseek, }; diff --git a/drivers/media/video/compat_ioctl32.c b/drivers/media/video/compat_ioctl32.c new file mode 100644 index 000000000000..42dc11c63c0d --- /dev/null +++ b/drivers/media/video/compat_ioctl32.c @@ -0,0 +1,318 @@ +#include +#include +#include + +#ifdef CONFIG_COMPAT +struct video_tuner32 { + compat_int_t tuner; + char name[32]; + compat_ulong_t rangelow, rangehigh; + u32 flags; /* It is really u32 in videodev.h */ + u16 mode, signal; +}; + +static int get_video_tuner32(struct video_tuner *kp, struct video_tuner32 __user *up) +{ + int i; + + if(get_user(kp->tuner, &up->tuner)) + return -EFAULT; + for(i = 0; i < 32; i++) + __get_user(kp->name[i], &up->name[i]); + __get_user(kp->rangelow, &up->rangelow); + __get_user(kp->rangehigh, &up->rangehigh); + __get_user(kp->flags, &up->flags); + __get_user(kp->mode, &up->mode); + __get_user(kp->signal, &up->signal); + return 0; +} + +static int put_video_tuner32(struct video_tuner *kp, struct video_tuner32 __user *up) +{ + int i; + + if(put_user(kp->tuner, &up->tuner)) + return -EFAULT; + for(i = 0; i < 32; i++) + __put_user(kp->name[i], &up->name[i]); + __put_user(kp->rangelow, &up->rangelow); + __put_user(kp->rangehigh, &up->rangehigh); + __put_user(kp->flags, &up->flags); + __put_user(kp->mode, &up->mode); + __put_user(kp->signal, &up->signal); + return 0; +} + +struct video_buffer32 { + compat_caddr_t base; + compat_int_t height, width, depth, bytesperline; +}; + +static int get_video_buffer32(struct video_buffer *kp, struct video_buffer32 __user *up) +{ + u32 tmp; + + if (get_user(tmp, &up->base)) + return -EFAULT; + + /* This is actually a physical address stored + * as a void pointer. + */ + kp->base = (void *)(unsigned long) tmp; + + __get_user(kp->height, &up->height); + __get_user(kp->width, &up->width); + __get_user(kp->depth, &up->depth); + __get_user(kp->bytesperline, &up->bytesperline); + return 0; +} + +static int put_video_buffer32(struct video_buffer *kp, struct video_buffer32 __user *up) +{ + u32 tmp = (u32)((unsigned long)kp->base); + + if(put_user(tmp, &up->base)) + return -EFAULT; + __put_user(kp->height, &up->height); + __put_user(kp->width, &up->width); + __put_user(kp->depth, &up->depth); + __put_user(kp->bytesperline, &up->bytesperline); + return 0; +} + +struct video_clip32 { + s32 x, y, width, height; /* Its really s32 in videodev.h */ + compat_caddr_t next; +}; + +struct video_window32 { + u32 x, y, width, height, chromakey, flags; + compat_caddr_t clips; + compat_int_t clipcount; +}; + +static int native_ioctl(struct file *file, unsigned int cmd, unsigned long arg) +{ + int ret = -ENOIOCTLCMD; + + if (file->f_ops->unlocked_ioctl) + ret = file->f_ops->unlocked_ioctl(file, cmd, arg); + else if (file->f_ops->ioctl) { + lock_kernel(); + ret = file->f_ops->ioctl(file->f_dentry->d_inode, file, cmd, arg); + unlock_kernel(); + } + + return ret; +} + + +/* You get back everything except the clips... */ +static int put_video_window32(struct video_window *kp, struct video_window32 __user *up) +{ + if(put_user(kp->x, &up->x)) + return -EFAULT; + __put_user(kp->y, &up->y); + __put_user(kp->width, &up->width); + __put_user(kp->height, &up->height); + __put_user(kp->chromakey, &up->chromakey); + __put_user(kp->flags, &up->flags); + __put_user(kp->clipcount, &up->clipcount); + return 0; +} + +#define VIDIOCGTUNER32 _IOWR('v',4, struct video_tuner32) +#define VIDIOCSTUNER32 _IOW('v',5, struct video_tuner32) +#define VIDIOCGWIN32 _IOR('v',9, struct video_window32) +#define VIDIOCSWIN32 _IOW('v',10, struct video_window32) +#define VIDIOCGFBUF32 _IOR('v',11, struct video_buffer32) +#define VIDIOCSFBUF32 _IOW('v',12, struct video_buffer32) +#define VIDIOCGFREQ32 _IOR('v',14, u32) +#define VIDIOCSFREQ32 _IOW('v',15, u32) + +enum { + MaxClips = (~0U-sizeof(struct video_window))/sizeof(struct video_clip) +}; + +static int do_set_window(struct file *file, unsigned int cmd, unsigned long arg) +{ + struct video_window32 __user *up = compat_ptr(arg); + struct video_window __user *vw; + struct video_clip __user *p; + int nclips; + u32 n; + + if (get_user(nclips, &up->clipcount)) + return -EFAULT; + + /* Peculiar interface... */ + if (nclips < 0) + nclips = VIDEO_CLIPMAP_SIZE; + + if (nclips > MaxClips) + return -ENOMEM; + + vw = compat_alloc_user_space(sizeof(struct video_window) + + nclips * sizeof(struct video_clip)); + + p = nclips ? (struct video_clip __user *)(vw + 1) : NULL; + + if (get_user(n, &up->x) || put_user(n, &vw->x) || + get_user(n, &up->y) || put_user(n, &vw->y) || + get_user(n, &up->width) || put_user(n, &vw->width) || + get_user(n, &up->height) || put_user(n, &vw->height) || + get_user(n, &up->chromakey) || put_user(n, &vw->chromakey) || + get_user(n, &up->flags) || put_user(n, &vw->flags) || + get_user(n, &up->clipcount) || put_user(n, &vw->clipcount) || + get_user(n, &up->clips) || put_user(p, &vw->clips)) + return -EFAULT; + + if (nclips) { + struct video_clip32 __user *u = compat_ptr(n); + int i; + if (!u) + return -EINVAL; + for (i = 0; i < nclips; i++, u++, p++) { + s32 v; + if (get_user(v, &u->x) || + put_user(v, &p->x) || + get_user(v, &u->y) || + put_user(v, &p->y) || + get_user(v, &u->width) || + put_user(v, &p->width) || + get_user(v, &u->height) || + put_user(v, &p->height) || + put_user(NULL, &p->next)) + return -EFAULT; + } + } + + return native_ioctl(file, VIDIOCSWIN, (unsigned long)p); +} + +static int do_video_ioctl(struct file *file, unsigned int cmd, unsigned long arg) +{ + union { + struct video_tuner vt; + struct video_buffer vb; + struct video_window vw; + unsigned long vx; + } karg; + mm_segment_t old_fs = get_fs(); + void __user *up = compat_ptr(arg); + int err = 0; + + /* First, convert the command. */ + switch(cmd) { + case VIDIOCGTUNER32: cmd = VIDIOCGTUNER; break; + case VIDIOCSTUNER32: cmd = VIDIOCSTUNER; break; + case VIDIOCGWIN32: cmd = VIDIOCGWIN; break; + case VIDIOCGFBUF32: cmd = VIDIOCGFBUF; break; + case VIDIOCSFBUF32: cmd = VIDIOCSFBUF; break; + case VIDIOCGFREQ32: cmd = VIDIOCGFREQ; break; + case VIDIOCSFREQ32: cmd = VIDIOCSFREQ; break; + }; + + switch(cmd) { + case VIDIOCSTUNER: + case VIDIOCGTUNER: + err = get_video_tuner32(&karg.vt, up); + break; + + case VIDIOCSFBUF: + err = get_video_buffer32(&karg.vb, up); + break; + + case VIDIOCSFREQ: + err = get_user(karg.vx, (u32 __user *)up); + break; + }; + if(err) + goto out; + + set_fs(KERNEL_DS); + err = native_ioctl(file, cmd, (unsigned long)&karg); + set_fs(old_fs); + + if(err == 0) { + switch(cmd) { + case VIDIOCGTUNER: + err = put_video_tuner32(&karg.vt, up); + break; + + case VIDIOCGWIN: + err = put_video_window32(&karg.vw, up); + break; + + case VIDIOCGFBUF: + err = put_video_buffer32(&karg.vb, up); + break; + + case VIDIOCGFREQ: + err = put_user(((u32)karg.vx), (u32 __user *)up); + break; + }; + } +out: + return err; +} + +long v4l_compat_ioctl32(struct file *file, unsigned int cmd, unsigned long arg) +{ + int ret = -ENOIOCTLCMD; + + if (!file->f_ops->ioctl) + return ret; + + switch (cmd) { + case VIDIOCSWIN32: + ret = do_set_window(file, cmd, arg); + break; + case VIDIOCGTUNER32: + case VIDIOCSTUNER32: + case VIDIOCGWIN32: + case VIDIOCGFBUF32: + case VIDIOCSFBUF32: + case VIDIOCGFREQ32: + case VIDIOCSFREQ32 + ret = do_video_ioctl(file, cmd, arg); + break; + + /* Little v, the video4linux ioctls (conflict?) */ + case VIDIOCGCAP: + case VIDIOCGCHAN: + case VIDIOCSCHAN: + case VIDIOCGPICT: + case VIDIOCSPICT: + case VIDIOCCAPTURE: + case VIDIOCKEY: + case VIDIOCGAUDIO: + case VIDIOCSAUDIO: + case VIDIOCSYNC: + case VIDIOCMCAPTURE: + case VIDIOCGMBUF: + case VIDIOCGUNIT: + case VIDIOCGCAPTURE: + case VIDIOCSCAPTURE: + + /* BTTV specific... */ + case _IOW('v', BASE_VIDIOCPRIVATE+0, char [256]): + case _IOR('v', BASE_VIDIOCPRIVATE+1, char [256]): + case _IOR('v' , BASE_VIDIOCPRIVATE+2, unsigned int): + case _IOW('v' , BASE_VIDIOCPRIVATE+3, char [16]): /* struct bttv_pll_info */ + case _IOR('v' , BASE_VIDIOCPRIVATE+4, int): + case _IOR('v' , BASE_VIDIOCPRIVATE+5, int): + case _IOR('v' , BASE_VIDIOCPRIVATE+6, int): + case _IOR('v' , BASE_VIDIOCPRIVATE+7, int): + ret = native_ioctl(file, cmd, (unsigned long)compat_ptr(arg)); + break; + + return ret; +} +#else +long v4l_compat_ioctl32(struct file *file, unsigned int cmd, unsigned long arg) +{ + return -ENOIOCTLCMD; +} +#endif +EXPORT_SYMBOL_GPL(v4l_compat_ioctl32); diff --git a/drivers/media/video/cpia.c b/drivers/media/video/cpia.c index b7ec9bf45085..9f59541155d9 100644 --- a/drivers/media/video/cpia.c +++ b/drivers/media/video/cpia.c @@ -3807,6 +3807,7 @@ static struct file_operations cpia_fops = { .read = cpia_read, .mmap = cpia_mmap, .ioctl = cpia_ioctl, + .compat_ioctl = v4l_compat_ioctl32, .llseek = no_llseek, }; diff --git a/drivers/media/video/cx88/cx88-video.c b/drivers/media/video/cx88/cx88-video.c index 24a48f8a48c1..bc025c46aedf 100644 --- a/drivers/media/video/cx88/cx88-video.c +++ b/drivers/media/video/cx88/cx88-video.c @@ -1740,6 +1740,7 @@ static struct file_operations video_fops = .poll = video_poll, .mmap = video_mmap, .ioctl = video_ioctl, + .compat_ioctl = v4l_compat_ioctl32, .llseek = no_llseek, }; @@ -1767,6 +1768,7 @@ static struct file_operations radio_fops = .open = video_open, .release = video_release, .ioctl = radio_ioctl, + .compat_ioctl = v4l_compat_ioctl32, .llseek = no_llseek, }; diff --git a/drivers/media/video/meye.c b/drivers/media/video/meye.c index 3f2a882bc20a..2869464aee0d 100644 --- a/drivers/media/video/meye.c +++ b/drivers/media/video/meye.c @@ -1754,6 +1754,7 @@ static struct file_operations meye_fops = { .release = meye_release, .mmap = meye_mmap, .ioctl = meye_ioctl, + .compat_ioctl = v4l_compat_ioctl32, .poll = meye_poll, .llseek = no_llseek, }; diff --git a/drivers/media/video/pms.c b/drivers/media/video/pms.c index 2504207b2e3d..9e6448639480 100644 --- a/drivers/media/video/pms.c +++ b/drivers/media/video/pms.c @@ -883,6 +883,7 @@ static struct file_operations pms_fops = { .open = video_exclusive_open, .release = video_exclusive_release, .ioctl = pms_ioctl, + .compat_ioctl = v4l_compat_ioctl32, .read = pms_read, .llseek = no_llseek, }; diff --git a/drivers/media/video/saa5249.c b/drivers/media/video/saa5249.c index a51c7bd96618..73b4f0e2abf0 100644 --- a/drivers/media/video/saa5249.c +++ b/drivers/media/video/saa5249.c @@ -702,6 +702,7 @@ static struct file_operations saa_fops = { .open = saa5249_open, .release = saa5249_release, .ioctl = saa5249_ioctl, + .compat_ioctl = v4l_compat_ioctl32, .llseek = no_llseek, }; diff --git a/drivers/media/video/saa7134/saa7134-video.c b/drivers/media/video/saa7134/saa7134-video.c index 45c852df13ed..9b9e1e7f05ef 100644 --- a/drivers/media/video/saa7134/saa7134-video.c +++ b/drivers/media/video/saa7134/saa7134-video.c @@ -2262,6 +2262,7 @@ static struct file_operations video_fops = .poll = video_poll, .mmap = video_mmap, .ioctl = video_ioctl, + .compat_ioctl = v4l_compat_ioctl32, .llseek = no_llseek, }; @@ -2271,6 +2272,7 @@ static struct file_operations radio_fops = .open = video_open, .release = video_release, .ioctl = radio_ioctl, + .compat_ioctl = v4l_compat_ioctl32, .llseek = no_llseek, }; diff --git a/drivers/media/video/stradis.c b/drivers/media/video/stradis.c index d4497dbae05c..6ee54a45411f 100644 --- a/drivers/media/video/stradis.c +++ b/drivers/media/video/stradis.c @@ -1974,6 +1974,7 @@ static struct file_operations saa_fops = .open = saa_open, .release = saa_release, .ioctl = saa_ioctl, + .compat_ioctl = v4l_compat_ioctl32, .read = saa_read, .llseek = no_llseek, .write = saa_write, diff --git a/drivers/media/video/w9966.c b/drivers/media/video/w9966.c index c318ba32fbaf..b7b0afffd214 100644 --- a/drivers/media/video/w9966.c +++ b/drivers/media/video/w9966.c @@ -187,6 +187,7 @@ static struct file_operations w9966_fops = { .open = video_exclusive_open, .release = video_exclusive_release, .ioctl = w9966_v4l_ioctl, + .compat_ioctl = v4l_compat_ioctl32, .read = w9966_v4l_read, .llseek = no_llseek, }; diff --git a/drivers/media/video/zoran_driver.c b/drivers/media/video/zoran_driver.c index 4034f1b45366..15283f44e79f 100644 --- a/drivers/media/video/zoran_driver.c +++ b/drivers/media/video/zoran_driver.c @@ -4678,6 +4678,7 @@ static struct file_operations zoran_fops = { .open = zoran_open, .release = zoran_close, .ioctl = zoran_ioctl, + .compat_ioctl = v4l_compat_ioctl32, .llseek = no_llseek, .read = zoran_read, .write = zoran_write, diff --git a/drivers/media/video/zr36120.c b/drivers/media/video/zr36120.c index 07286816d7df..d4c633b8a7f5 100644 --- a/drivers/media/video/zr36120.c +++ b/drivers/media/video/zr36120.c @@ -1490,6 +1490,7 @@ static struct video_device zr36120_template= .write = zoran_write, .poll = zoran_poll, .ioctl = zoran_ioctl, + .compat_ioctl = v4l_compat_ioctl32, .mmap = zoran_mmap, .minor = -1, }; diff --git a/drivers/usb/media/dsbr100.c b/drivers/usb/media/dsbr100.c index 6a5700e9d428..25646804d5be 100644 --- a/drivers/usb/media/dsbr100.c +++ b/drivers/usb/media/dsbr100.c @@ -127,6 +127,7 @@ static struct file_operations usb_dsbr100_fops = { .open = usb_dsbr100_open, .release = usb_dsbr100_close, .ioctl = usb_dsbr100_ioctl, + .compat_ioctl = v4l_compat_ioctl32, .llseek = no_llseek, }; diff --git a/drivers/usb/media/ov511.c b/drivers/usb/media/ov511.c index 3a0e8ce67ebe..8af665bbe330 100644 --- a/drivers/usb/media/ov511.c +++ b/drivers/usb/media/ov511.c @@ -4774,6 +4774,7 @@ static struct file_operations ov511_fops = { .read = ov51x_v4l1_read, .mmap = ov51x_v4l1_mmap, .ioctl = ov51x_v4l1_ioctl, + .compat_ioctl = v4l_compat_ioctl32, .llseek = no_llseek, }; diff --git a/drivers/usb/media/pwc/pwc-if.c b/drivers/usb/media/pwc/pwc-if.c index 09ca6128ac20..4f9b0dc6fd7b 100644 --- a/drivers/usb/media/pwc/pwc-if.c +++ b/drivers/usb/media/pwc/pwc-if.c @@ -154,6 +154,7 @@ static struct file_operations pwc_fops = { .poll = pwc_video_poll, .mmap = pwc_video_mmap, .ioctl = pwc_video_ioctl, + .compat_ioctl = v4l_compat_ioctl32, .llseek = no_llseek, }; static struct video_device pwc_template = { diff --git a/drivers/usb/media/se401.c b/drivers/usb/media/se401.c index b2ae29af5940..2ba562285fda 100644 --- a/drivers/usb/media/se401.c +++ b/drivers/usb/media/se401.c @@ -1193,6 +1193,7 @@ static struct file_operations se401_fops = { .read = se401_read, .mmap = se401_mmap, .ioctl = se401_ioctl, + .compat_ioctl = v4l_compat_ioctl32, .llseek = no_llseek, }; static struct video_device se401_template = { diff --git a/drivers/usb/media/stv680.c b/drivers/usb/media/stv680.c index 774038b352cd..b497a6a0a206 100644 --- a/drivers/usb/media/stv680.c +++ b/drivers/usb/media/stv680.c @@ -1343,6 +1343,7 @@ static struct file_operations stv680_fops = { .read = stv680_read, .mmap = stv680_mmap, .ioctl = stv680_ioctl, + .compat_ioctl = v4l_compat_ioctl32, .llseek = no_llseek, }; static struct video_device stv680_template = { diff --git a/drivers/usb/media/usbvideo.c b/drivers/usb/media/usbvideo.c index 4bd113325ef9..63a72e550a1b 100644 --- a/drivers/usb/media/usbvideo.c +++ b/drivers/usb/media/usbvideo.c @@ -953,6 +953,7 @@ static struct file_operations usbvideo_fops = { .read = usbvideo_v4l_read, .mmap = usbvideo_v4l_mmap, .ioctl = usbvideo_v4l_ioctl, + .compat_ioctl = v4l_compat_ioctl32, .llseek = no_llseek, }; static const struct video_device usbvideo_template = { diff --git a/drivers/usb/media/vicam.c b/drivers/usb/media/vicam.c index 1c73155c8d77..5df144073871 100644 --- a/drivers/usb/media/vicam.c +++ b/drivers/usb/media/vicam.c @@ -1236,6 +1236,7 @@ static struct file_operations vicam_fops = { .read = vicam_read, .mmap = vicam_mmap, .ioctl = vicam_ioctl, + .compat_ioctl = v4l_compat_ioctl32, .llseek = no_llseek, }; diff --git a/drivers/usb/media/w9968cf.c b/drivers/usb/media/w9968cf.c index 3605a6f3067b..bff9434c8e55 100644 --- a/drivers/usb/media/w9968cf.c +++ b/drivers/usb/media/w9968cf.c @@ -3490,6 +3490,7 @@ static struct file_operations w9968cf_fops = { .release = w9968cf_release, .read = w9968cf_read, .ioctl = w9968cf_ioctl, + .compat_ioctl = v4l_compat_ioctl32, .mmap = w9968cf_mmap, .llseek = no_llseek, }; diff --git a/fs/compat_ioctl.c b/fs/compat_ioctl.c index 43a2508ac696..55d9a3a954cf 100644 --- a/fs/compat_ioctl.c +++ b/fs/compat_ioctl.c @@ -207,244 +207,6 @@ static int do_ext3_ioctl(unsigned int fd, unsigned int cmd, unsigned long arg) return sys_ioctl(fd, cmd, (unsigned long)compat_ptr(arg)); } -struct video_tuner32 { - compat_int_t tuner; - char name[32]; - compat_ulong_t rangelow, rangehigh; - u32 flags; /* It is really u32 in videodev.h */ - u16 mode, signal; -}; - -static int get_video_tuner32(struct video_tuner *kp, struct video_tuner32 __user *up) -{ - int i; - - if(get_user(kp->tuner, &up->tuner)) - return -EFAULT; - for(i = 0; i < 32; i++) - __get_user(kp->name[i], &up->name[i]); - __get_user(kp->rangelow, &up->rangelow); - __get_user(kp->rangehigh, &up->rangehigh); - __get_user(kp->flags, &up->flags); - __get_user(kp->mode, &up->mode); - __get_user(kp->signal, &up->signal); - return 0; -} - -static int put_video_tuner32(struct video_tuner *kp, struct video_tuner32 __user *up) -{ - int i; - - if(put_user(kp->tuner, &up->tuner)) - return -EFAULT; - for(i = 0; i < 32; i++) - __put_user(kp->name[i], &up->name[i]); - __put_user(kp->rangelow, &up->rangelow); - __put_user(kp->rangehigh, &up->rangehigh); - __put_user(kp->flags, &up->flags); - __put_user(kp->mode, &up->mode); - __put_user(kp->signal, &up->signal); - return 0; -} - -struct video_buffer32 { - compat_caddr_t base; - compat_int_t height, width, depth, bytesperline; -}; - -static int get_video_buffer32(struct video_buffer *kp, struct video_buffer32 __user *up) -{ - u32 tmp; - - if (get_user(tmp, &up->base)) - return -EFAULT; - - /* This is actually a physical address stored - * as a void pointer. - */ - kp->base = (void *)(unsigned long) tmp; - - __get_user(kp->height, &up->height); - __get_user(kp->width, &up->width); - __get_user(kp->depth, &up->depth); - __get_user(kp->bytesperline, &up->bytesperline); - return 0; -} - -static int put_video_buffer32(struct video_buffer *kp, struct video_buffer32 __user *up) -{ - u32 tmp = (u32)((unsigned long)kp->base); - - if(put_user(tmp, &up->base)) - return -EFAULT; - __put_user(kp->height, &up->height); - __put_user(kp->width, &up->width); - __put_user(kp->depth, &up->depth); - __put_user(kp->bytesperline, &up->bytesperline); - return 0; -} - -struct video_clip32 { - s32 x, y, width, height; /* Its really s32 in videodev.h */ - compat_caddr_t next; -}; - -struct video_window32 { - u32 x, y, width, height, chromakey, flags; - compat_caddr_t clips; - compat_int_t clipcount; -}; - -/* You get back everything except the clips... */ -static int put_video_window32(struct video_window *kp, struct video_window32 __user *up) -{ - if(put_user(kp->x, &up->x)) - return -EFAULT; - __put_user(kp->y, &up->y); - __put_user(kp->width, &up->width); - __put_user(kp->height, &up->height); - __put_user(kp->chromakey, &up->chromakey); - __put_user(kp->flags, &up->flags); - __put_user(kp->clipcount, &up->clipcount); - return 0; -} - -#define VIDIOCGTUNER32 _IOWR('v',4, struct video_tuner32) -#define VIDIOCSTUNER32 _IOW('v',5, struct video_tuner32) -#define VIDIOCGWIN32 _IOR('v',9, struct video_window32) -#define VIDIOCSWIN32 _IOW('v',10, struct video_window32) -#define VIDIOCGFBUF32 _IOR('v',11, struct video_buffer32) -#define VIDIOCSFBUF32 _IOW('v',12, struct video_buffer32) -#define VIDIOCGFREQ32 _IOR('v',14, u32) -#define VIDIOCSFREQ32 _IOW('v',15, u32) - -enum { - MaxClips = (~0U-sizeof(struct video_window))/sizeof(struct video_clip) -}; - -static int do_set_window(unsigned int fd, unsigned int cmd, unsigned long arg) -{ - struct video_window32 __user *up = compat_ptr(arg); - struct video_window __user *vw; - struct video_clip __user *p; - int nclips; - u32 n; - - if (get_user(nclips, &up->clipcount)) - return -EFAULT; - - /* Peculiar interface... */ - if (nclips < 0) - nclips = VIDEO_CLIPMAP_SIZE; - - if (nclips > MaxClips) - return -ENOMEM; - - vw = compat_alloc_user_space(sizeof(struct video_window) + - nclips * sizeof(struct video_clip)); - - p = nclips ? (struct video_clip __user *)(vw + 1) : NULL; - - if (get_user(n, &up->x) || put_user(n, &vw->x) || - get_user(n, &up->y) || put_user(n, &vw->y) || - get_user(n, &up->width) || put_user(n, &vw->width) || - get_user(n, &up->height) || put_user(n, &vw->height) || - get_user(n, &up->chromakey) || put_user(n, &vw->chromakey) || - get_user(n, &up->flags) || put_user(n, &vw->flags) || - get_user(n, &up->clipcount) || put_user(n, &vw->clipcount) || - get_user(n, &up->clips) || put_user(p, &vw->clips)) - return -EFAULT; - - if (nclips) { - struct video_clip32 __user *u = compat_ptr(n); - int i; - if (!u) - return -EINVAL; - for (i = 0; i < nclips; i++, u++, p++) { - s32 v; - if (get_user(v, &u->x) || - put_user(v, &p->x) || - get_user(v, &u->y) || - put_user(v, &p->y) || - get_user(v, &u->width) || - put_user(v, &p->width) || - get_user(v, &u->height) || - put_user(v, &p->height) || - put_user(NULL, &p->next)) - return -EFAULT; - } - } - - return sys_ioctl(fd, VIDIOCSWIN, (unsigned long)p); -} - -static int do_video_ioctl(unsigned int fd, unsigned int cmd, unsigned long arg) -{ - union { - struct video_tuner vt; - struct video_buffer vb; - struct video_window vw; - unsigned long vx; - } karg; - mm_segment_t old_fs = get_fs(); - void __user *up = compat_ptr(arg); - int err = 0; - - /* First, convert the command. */ - switch(cmd) { - case VIDIOCGTUNER32: cmd = VIDIOCGTUNER; break; - case VIDIOCSTUNER32: cmd = VIDIOCSTUNER; break; - case VIDIOCGWIN32: cmd = VIDIOCGWIN; break; - case VIDIOCGFBUF32: cmd = VIDIOCGFBUF; break; - case VIDIOCSFBUF32: cmd = VIDIOCSFBUF; break; - case VIDIOCGFREQ32: cmd = VIDIOCGFREQ; break; - case VIDIOCSFREQ32: cmd = VIDIOCSFREQ; break; - }; - - switch(cmd) { - case VIDIOCSTUNER: - case VIDIOCGTUNER: - err = get_video_tuner32(&karg.vt, up); - break; - - case VIDIOCSFBUF: - err = get_video_buffer32(&karg.vb, up); - break; - - case VIDIOCSFREQ: - err = get_user(karg.vx, (u32 __user *)up); - break; - }; - if(err) - goto out; - - set_fs(KERNEL_DS); - err = sys_ioctl(fd, cmd, (unsigned long)&karg); - set_fs(old_fs); - - if(err == 0) { - switch(cmd) { - case VIDIOCGTUNER: - err = put_video_tuner32(&karg.vt, up); - break; - - case VIDIOCGWIN: - err = put_video_window32(&karg.vw, up); - break; - - case VIDIOCGFBUF: - err = put_video_buffer32(&karg.vb, up); - break; - - case VIDIOCGFREQ: - err = put_user(((u32)karg.vx), (u32 __user *)up); - break; - }; - } -out: - return err; -} - struct compat_dmx_event { dmx_event_t event; compat_time_t timeStamp; @@ -3015,14 +2777,6 @@ COMPATIBLE_IOCTL(EXT3_IOC_GROUP_ADD) #ifdef CONFIG_JBD_DEBUG HANDLE_IOCTL(EXT3_IOC32_WAIT_FOR_READONLY, do_ext3_ioctl) #endif -HANDLE_IOCTL(VIDIOCGTUNER32, do_video_ioctl) -HANDLE_IOCTL(VIDIOCSTUNER32, do_video_ioctl) -HANDLE_IOCTL(VIDIOCGWIN32, do_video_ioctl) -HANDLE_IOCTL(VIDIOCSWIN32, do_set_window) -HANDLE_IOCTL(VIDIOCGFBUF32, do_video_ioctl) -HANDLE_IOCTL(VIDIOCSFBUF32, do_video_ioctl) -HANDLE_IOCTL(VIDIOCGFREQ32, do_video_ioctl) -HANDLE_IOCTL(VIDIOCSFREQ32, do_video_ioctl) /* One SMB ioctl needs translations. */ #define SMB_IOC_GETMOUNTUID_32 _IOR('u', 1, compat_uid_t) HANDLE_IOCTL(SMB_IOC_GETMOUNTUID_32, do_smb_getmountuid) diff --git a/include/linux/compat_ioctl.h b/include/linux/compat_ioctl.h index 119f9d064cc6..339878952f12 100644 --- a/include/linux/compat_ioctl.h +++ b/include/linux/compat_ioctl.h @@ -218,32 +218,6 @@ COMPATIBLE_IOCTL(VT_RESIZE) COMPATIBLE_IOCTL(VT_RESIZEX) COMPATIBLE_IOCTL(VT_LOCKSWITCH) COMPATIBLE_IOCTL(VT_UNLOCKSWITCH) -/* Little v */ -/* Little v, the video4linux ioctls (conflict?) */ -COMPATIBLE_IOCTL(VIDIOCGCAP) -COMPATIBLE_IOCTL(VIDIOCGCHAN) -COMPATIBLE_IOCTL(VIDIOCSCHAN) -COMPATIBLE_IOCTL(VIDIOCGPICT) -COMPATIBLE_IOCTL(VIDIOCSPICT) -COMPATIBLE_IOCTL(VIDIOCCAPTURE) -COMPATIBLE_IOCTL(VIDIOCKEY) -COMPATIBLE_IOCTL(VIDIOCGAUDIO) -COMPATIBLE_IOCTL(VIDIOCSAUDIO) -COMPATIBLE_IOCTL(VIDIOCSYNC) -COMPATIBLE_IOCTL(VIDIOCMCAPTURE) -COMPATIBLE_IOCTL(VIDIOCGMBUF) -COMPATIBLE_IOCTL(VIDIOCGUNIT) -COMPATIBLE_IOCTL(VIDIOCGCAPTURE) -COMPATIBLE_IOCTL(VIDIOCSCAPTURE) -/* BTTV specific... */ -COMPATIBLE_IOCTL(_IOW('v', BASE_VIDIOCPRIVATE+0, char [256])) -COMPATIBLE_IOCTL(_IOR('v', BASE_VIDIOCPRIVATE+1, char [256])) -COMPATIBLE_IOCTL(_IOR('v' , BASE_VIDIOCPRIVATE+2, unsigned int)) -COMPATIBLE_IOCTL(_IOW('v' , BASE_VIDIOCPRIVATE+3, char [16])) /* struct bttv_pll_info */ -COMPATIBLE_IOCTL(_IOR('v' , BASE_VIDIOCPRIVATE+4, int)) -COMPATIBLE_IOCTL(_IOR('v' , BASE_VIDIOCPRIVATE+5, int)) -COMPATIBLE_IOCTL(_IOR('v' , BASE_VIDIOCPRIVATE+6, int)) -COMPATIBLE_IOCTL(_IOR('v' , BASE_VIDIOCPRIVATE+7, int)) /* Little p (/dev/rtc, /dev/envctrl, etc.) */ COMPATIBLE_IOCTL(RTC_AIE_ON) COMPATIBLE_IOCTL(RTC_AIE_OFF) diff --git a/include/linux/videodev2.h b/include/linux/videodev2.h index 1cded681eb6d..13f78ec4bf76 100644 --- a/include/linux/videodev2.h +++ b/include/linux/videodev2.h @@ -1117,6 +1117,9 @@ typedef int (*v4l2_kioctl)(struct inode *inode, struct file *file, unsigned int cmd, void *arg); int v4l_compat_translate_ioctl(struct inode *inode, struct file *file, int cmd, void *arg, v4l2_kioctl driver_ioctl); +/* 32 Bits compatibility layer for 64 bits processors */ +extern long v4l_compat_ioctl32(struct file *file, unsigned int cmd, + unsigned long arg); #endif /* __KERNEL__ */ #endif /* __LINUX_VIDEODEV2_H */ -- cgit v1.2.3-71-gd317 From f3c5987a386300abea9854b32814d0eab7af7841 Mon Sep 17 00:00:00 2001 From: Mauro Carvalho Chehab Date: Mon, 9 Jan 2006 15:25:00 -0200 Subject: V4L (0987): Added Secam L' std on tda9887 and common macros moved to videodev2.h - Added SECAM L' video standard - Common std macros moved to videodev2.h Signed-off-by: Mauro Carvalho Chehab --- drivers/media/video/tda8290.c | 6 ------ drivers/media/video/tda9887.c | 9 +++++++++ include/linux/videodev2.h | 11 ++++++++++- 3 files changed, 19 insertions(+), 7 deletions(-) (limited to 'include/linux') diff --git a/drivers/media/video/tda8290.c b/drivers/media/video/tda8290.c index 61d94ddaff41..2498b76df429 100644 --- a/drivers/media/video/tda8290.c +++ b/drivers/media/video/tda8290.c @@ -398,14 +398,8 @@ static int tda8290_tune(struct i2c_client *c, u16 ifc, unsigned int freq) return 0; } - /*---------------------------------------------------------------------*/ -#define V4L2_STD_MN (V4L2_STD_PAL_M|V4L2_STD_PAL_N|V4L2_STD_PAL_Nc|V4L2_STD_NTSC) -#define V4L2_STD_B (V4L2_STD_PAL_B|V4L2_STD_PAL_B1|V4L2_STD_SECAM_B) -#define V4L2_STD_GH (V4L2_STD_PAL_G|V4L2_STD_PAL_H|V4L2_STD_SECAM_G|V4L2_STD_SECAM_H) -#define V4L2_STD_DK (V4L2_STD_PAL_DK|V4L2_STD_SECAM_DK) - static void set_audio(struct tuner *t) { char* mode; diff --git a/drivers/media/video/tda9887.c b/drivers/media/video/tda9887.c index 7165a1b9625a..93bf10436b82 100644 --- a/drivers/media/video/tda9887.c +++ b/drivers/media/video/tda9887.c @@ -189,6 +189,15 @@ static struct tvnorm tvnorms[] = { .e = ( cGating_36 | cAudioIF_6_5 | cVideoIF_38_90 ), + },{ + .std = V4L2_STD_SECAM_LC, + .name = "SECAM-L'", + .b = ( cOutputPort2Inactive | + cPositiveAmTV | + cQSS ), + .e = ( cGating_36 | + cAudioIF_6_5 | + cVideoIF_33_90 ), },{ .std = V4L2_STD_SECAM_DK, .name = "SECAM-DK", diff --git a/include/linux/videodev2.h b/include/linux/videodev2.h index 13f78ec4bf76..b2f5e864b397 100644 --- a/include/linux/videodev2.h +++ b/include/linux/videodev2.h @@ -642,6 +642,12 @@ typedef __u64 v4l2_std_id; #define V4L2_STD_ATSC_8_VSB ((v4l2_std_id)0x01000000) #define V4L2_STD_ATSC_16_VSB ((v4l2_std_id)0x02000000) +/* some merged standards */ +#define V4L2_STD_MN (V4L2_STD_PAL_M|V4L2_STD_PAL_N|V4L2_STD_PAL_Nc|V4L2_STD_NTSC) +#define V4L2_STD_B (V4L2_STD_PAL_B|V4L2_STD_PAL_B1|V4L2_STD_SECAM_B) +#define V4L2_STD_GH (V4L2_STD_PAL_G|V4L2_STD_PAL_H|V4L2_STD_SECAM_G|V4L2_STD_SECAM_H) +#define V4L2_STD_DK (V4L2_STD_PAL_DK|V4L2_STD_SECAM_DK) + /* some common needed stuff */ #define V4L2_STD_PAL_BG (V4L2_STD_PAL_B |\ V4L2_STD_PAL_B1 |\ @@ -662,7 +668,8 @@ typedef __u64 v4l2_std_id; V4L2_STD_SECAM_G |\ V4L2_STD_SECAM_H |\ V4L2_STD_SECAM_DK |\ - V4L2_STD_SECAM_L) + V4L2_STD_SECAM_L |\ + V4L2_STD_SECAM_LC) #define V4L2_STD_525_60 (V4L2_STD_PAL_M |\ V4L2_STD_PAL_60 |\ @@ -1117,10 +1124,12 @@ typedef int (*v4l2_kioctl)(struct inode *inode, struct file *file, unsigned int cmd, void *arg); int v4l_compat_translate_ioctl(struct inode *inode, struct file *file, int cmd, void *arg, v4l2_kioctl driver_ioctl); + /* 32 Bits compatibility layer for 64 bits processors */ extern long v4l_compat_ioctl32(struct file *file, unsigned int cmd, unsigned long arg); + #endif /* __KERNEL__ */ #endif /* __LINUX_VIDEODEV2_H */ -- cgit v1.2.3-71-gd317 From 36cb557a2f64513e2fdc1a542167e5e8a6c1c67e Mon Sep 17 00:00:00 2001 From: Andrew de Quincey Date: Mon, 9 Jan 2006 15:25:07 -0200 Subject: DVB (2444): Implement frontend-specific tuning and the ability to disable zigzag - Implement frontend-specific tuning and the ability to disable zigzag Signed-off-by: Andrew de Quincey Signed-off-by: Mauro Carvalho Chehab --- drivers/media/dvb/bt8xx/dst.c | 52 +++-- drivers/media/dvb/dvb-core/dvb_frontend.c | 307 +++++++++++++++++------------- drivers/media/dvb/dvb-core/dvb_frontend.h | 11 +- include/linux/dvb/frontend.h | 10 + 4 files changed, 228 insertions(+), 152 deletions(-) (limited to 'include/linux') diff --git a/drivers/media/dvb/bt8xx/dst.c b/drivers/media/dvb/bt8xx/dst.c index 8977c7a313df..3a2ff1cc24b7 100644 --- a/drivers/media/dvb/bt8xx/dst.c +++ b/drivers/media/dvb/bt8xx/dst.c @@ -1341,30 +1341,40 @@ static int dst_read_snr(struct dvb_frontend *fe, u16 *snr) return 0; } -static int dst_set_frontend(struct dvb_frontend *fe, struct dvb_frontend_parameters *p) +static int dst_set_frontend(struct dvb_frontend* fe, + struct dvb_frontend_parameters* p, + unsigned int mode_flags, + int *delay, + fe_status_t *status) { struct dst_state *state = fe->demodulator_priv; - dst_set_freq(state, p->frequency); - dprintk(verbose, DST_DEBUG, 1, "Set Frequency=[%d]", p->frequency); + if (p != NULL) { + dst_set_freq(state, p->frequency); + dprintk(verbose, DST_DEBUG, 1, "Set Frequency=[%d]", p->frequency); - if (state->dst_type == DST_TYPE_IS_SAT) { - if (state->type_flags & DST_TYPE_HAS_OBS_REGS) - dst_set_inversion(state, p->inversion); - dst_set_fec(state, p->u.qpsk.fec_inner); - dst_set_symbolrate(state, p->u.qpsk.symbol_rate); - dst_set_polarization(state); - dprintk(verbose, DST_DEBUG, 1, "Set Symbolrate=[%d]", p->u.qpsk.symbol_rate); - - } else if (state->dst_type == DST_TYPE_IS_TERR) - dst_set_bandwidth(state, p->u.ofdm.bandwidth); - else if (state->dst_type == DST_TYPE_IS_CABLE) { - dst_set_fec(state, p->u.qam.fec_inner); - dst_set_symbolrate(state, p->u.qam.symbol_rate); - dst_set_modulation(state, p->u.qam.modulation); + if (state->dst_type == DST_TYPE_IS_SAT) { + if (state->type_flags & DST_TYPE_HAS_OBS_REGS) + dst_set_inversion(state, p->inversion); + dst_set_fec(state, p->u.qpsk.fec_inner); + dst_set_symbolrate(state, p->u.qpsk.symbol_rate); + dst_set_polarization(state); + dprintk(verbose, DST_DEBUG, 1, "Set Symbolrate=[%d]", p->u.qpsk.symbol_rate); + + } else if (state->dst_type == DST_TYPE_IS_TERR) + dst_set_bandwidth(state, p->u.ofdm.bandwidth); + else if (state->dst_type == DST_TYPE_IS_CABLE) { + dst_set_fec(state, p->u.qam.fec_inner); + dst_set_symbolrate(state, p->u.qam.symbol_rate); + dst_set_modulation(state, p->u.qam.modulation); + } + dst_write_tuna(fe); } - dst_write_tuna(fe); + if (!(mode_flags & FE_TUNE_MODE_ONESHOT)) + dst_read_status(fe, status); + + *delay = HZ/10; return 0; } @@ -1445,7 +1455,7 @@ static struct dvb_frontend_ops dst_dvbt_ops = { .release = dst_release, .init = dst_init, - .set_frontend = dst_set_frontend, + .tune = dst_set_frontend, .get_frontend = dst_get_frontend, .read_status = dst_read_status, .read_signal_strength = dst_read_signal_strength, @@ -1469,7 +1479,7 @@ static struct dvb_frontend_ops dst_dvbs_ops = { .release = dst_release, .init = dst_init, - .set_frontend = dst_set_frontend, + .tune = dst_set_frontend, .get_frontend = dst_get_frontend, .read_status = dst_read_status, .read_signal_strength = dst_read_signal_strength, @@ -1496,7 +1506,7 @@ static struct dvb_frontend_ops dst_dvbc_ops = { .release = dst_release, .init = dst_init, - .set_frontend = dst_set_frontend, + .tune = dst_set_frontend, .get_frontend = dst_get_frontend, .read_status = dst_read_status, .read_signal_strength = dst_read_signal_strength, diff --git a/drivers/media/dvb/dvb-core/dvb_frontend.c b/drivers/media/dvb/dvb-core/dvb_frontend.c index 95ea5095e07e..9b5fa540e1e7 100644 --- a/drivers/media/dvb/dvb-core/dvb_frontend.c +++ b/drivers/media/dvb/dvb-core/dvb_frontend.c @@ -92,6 +92,7 @@ static DECLARE_MUTEX(frontend_mutex); struct dvb_frontend_private { + /* thread/frontend values */ struct dvb_device *dvbdev; struct dvb_frontend_parameters parameters; struct dvb_fe_events events; @@ -100,20 +101,25 @@ struct dvb_frontend_private { wait_queue_head_t wait_queue; pid_t thread_pid; unsigned long release_jiffies; - int state; - int bending; - int lnb_drift; - int inversion; - int auto_step; - int auto_sub_step; - int started_auto_step; - int min_delay; - int max_drift; - int step_size; - int exit; - int wakeup; + unsigned int exit; + unsigned int wakeup; fe_status_t status; - fe_sec_tone_mode_t tone; + unsigned int tune_mode_flags; + unsigned int delay; + + /* swzigzag values */ + unsigned int state; + unsigned int bending; + int lnb_drift; + unsigned int inversion; + unsigned int auto_step; + unsigned int auto_sub_step; + unsigned int started_auto_step; + unsigned int min_delay; + unsigned int max_drift; + unsigned int step_size; + int quality; + unsigned int check_wrapped; }; @@ -208,21 +214,21 @@ static void dvb_frontend_init(struct dvb_frontend *fe) fe->ops->init(fe); } -static void update_delay(int *quality, int *delay, int min_delay, int locked) +static void dvb_frontend_swzigzag_update_delay(struct dvb_frontend_private *fepriv, int locked) { - int q2; + int q2; - dprintk ("%s\n", __FUNCTION__); + dprintk ("%s\n", __FUNCTION__); - if (locked) - (*quality) = (*quality * 220 + 36*256) / 256; - else - (*quality) = (*quality * 220 + 0) / 256; + if (locked) + (fepriv->quality) = (fepriv->quality * 220 + 36*256) / 256; + else + (fepriv->quality) = (fepriv->quality * 220 + 0) / 256; - q2 = *quality - 128; - q2 *= q2; + q2 = fepriv->quality - 128; + q2 *= q2; - *delay = min_delay + q2 * HZ / (128*128); + fepriv->delay = fepriv->min_delay + q2 * HZ / (128*128); } /** @@ -232,7 +238,7 @@ static void update_delay(int *quality, int *delay, int min_delay, int locked) * @param check_wrapped Checks if an iteration has completed. DO NOT SET ON THE FIRST ATTEMPT * @returns Number of complete iterations that have been performed. */ -static int dvb_frontend_autotune(struct dvb_frontend *fe, int check_wrapped) +static int dvb_frontend_swzigzag_autotune(struct dvb_frontend *fe, int check_wrapped) { int autoinversion; int ready = 0; @@ -321,6 +327,129 @@ static int dvb_frontend_autotune(struct dvb_frontend *fe, int check_wrapped) return 0; } +static void dvb_frontend_swzigzag(struct dvb_frontend *fe) +{ + fe_status_t s; + struct dvb_frontend_private *fepriv = fe->frontend_priv; + + /* if we've got no parameters, just keep idling */ + if (fepriv->state & FESTATE_IDLE) { + fepriv->delay = 3*HZ; + fepriv->quality = 0; + return; + } + + /* in SCAN mode, we just set the frontend when asked and leave it alone */ + if (fepriv->tune_mode_flags & FE_TUNE_MODE_ONESHOT) { + if (fepriv->state & FESTATE_RETUNE) { + if (fe->ops->set_frontend) + fe->ops->set_frontend(fe, &fepriv->parameters); + fepriv->state = FESTATE_TUNED; + } + fepriv->delay = 3*HZ; + fepriv->quality = 0; + return; + } + + /* get the frontend status */ + if (fepriv->state & FESTATE_RETUNE) { + s = 0; + } else { + if (fe->ops->read_status) + fe->ops->read_status(fe, &s); + if (s != fepriv->status) { + dvb_frontend_add_event(fe, s); + fepriv->status = s; + } + } + + /* if we're not tuned, and we have a lock, move to the TUNED state */ + if ((fepriv->state & FESTATE_WAITFORLOCK) && (s & FE_HAS_LOCK)) { + dvb_frontend_swzigzag_update_delay(fepriv, s & FE_HAS_LOCK); + fepriv->state = FESTATE_TUNED; + + /* if we're tuned, then we have determined the correct inversion */ + if ((!(fe->ops->info.caps & FE_CAN_INVERSION_AUTO)) && + (fepriv->parameters.inversion == INVERSION_AUTO)) { + fepriv->parameters.inversion = fepriv->inversion; + } + return; + } + + /* if we are tuned already, check we're still locked */ + if (fepriv->state & FESTATE_TUNED) { + dvb_frontend_swzigzag_update_delay(fepriv, s & FE_HAS_LOCK); + + /* we're tuned, and the lock is still good... */ + if (s & FE_HAS_LOCK) { + return; + } else { /* if we _WERE_ tuned, but now don't have a lock */ + fepriv->state = FESTATE_ZIGZAG_FAST; + fepriv->started_auto_step = fepriv->auto_step; + fepriv->check_wrapped = 0; + } + } + + /* don't actually do anything if we're in the LOSTLOCK state, + * the frontend is set to FE_CAN_RECOVER, and the max_drift is 0 */ + if ((fepriv->state & FESTATE_LOSTLOCK) && + (fe->ops->info.caps & FE_CAN_RECOVER) && (fepriv->max_drift == 0)) { + dvb_frontend_swzigzag_update_delay(fepriv, s & FE_HAS_LOCK); + return; + } + + /* don't do anything if we're in the DISEQC state, since this + * might be someone with a motorized dish controlled by DISEQC. + * If its actually a re-tune, there will be a SET_FRONTEND soon enough. */ + if (fepriv->state & FESTATE_DISEQC) { + dvb_frontend_swzigzag_update_delay(fepriv, s & FE_HAS_LOCK); + return; + } + + /* if we're in the RETUNE state, set everything up for a brand + * new scan, keeping the current inversion setting, as the next + * tune is _very_ likely to require the same */ + if (fepriv->state & FESTATE_RETUNE) { + fepriv->lnb_drift = 0; + fepriv->auto_step = 0; + fepriv->auto_sub_step = 0; + fepriv->started_auto_step = 0; + fepriv->check_wrapped = 0; + } + + /* fast zigzag. */ + if ((fepriv->state & FESTATE_SEARCHING_FAST) || (fepriv->state & FESTATE_RETUNE)) { + fepriv->delay = fepriv->min_delay; + + /* peform a tune */ + if (dvb_frontend_swzigzag_autotune(fe, fepriv->check_wrapped)) { + /* OK, if we've run out of trials at the fast speed. + * Drop back to slow for the _next_ attempt */ + fepriv->state = FESTATE_SEARCHING_SLOW; + fepriv->started_auto_step = fepriv->auto_step; + return; + } + fepriv->check_wrapped = 1; + + /* if we've just retuned, enter the ZIGZAG_FAST state. + * This ensures we cannot return from an + * FE_SET_FRONTEND ioctl before the first frontend tune + * occurs */ + if (fepriv->state & FESTATE_RETUNE) { + fepriv->state = FESTATE_TUNING_FAST; + } + } + + /* slow zigzag */ + if (fepriv->state & FESTATE_SEARCHING_SLOW) { + dvb_frontend_swzigzag_update_delay(fepriv, s & FE_HAS_LOCK); + + /* Note: don't bother checking for wrapping; we stay in this + * state until we get a lock */ + dvb_frontend_swzigzag_autotune(fe, 0); + } +} + static int dvb_frontend_is_exiting(struct dvb_frontend *fe) { struct dvb_frontend_private *fepriv = fe->frontend_priv; @@ -330,7 +459,7 @@ static int dvb_frontend_is_exiting(struct dvb_frontend *fe) if (fepriv->dvbdev->writers == 1) if (time_after(jiffies, fepriv->release_jiffies + - dvb_shutdown_timeout * HZ)) + dvb_shutdown_timeout * HZ)) return 1; return 0; @@ -355,18 +484,14 @@ static void dvb_frontend_wakeup(struct dvb_frontend *fe) wake_up_interruptible(&fepriv->wait_queue); } -/* - * FIXME: use linux/kthread.h - */ static int dvb_frontend_thread(void *data) { struct dvb_frontend *fe = data; struct dvb_frontend_private *fepriv = fe->frontend_priv; unsigned long timeout; char name [15]; - int quality = 0, delay = 3*HZ; fe_status_t s; - int check_wrapped = 0; + struct dvb_frontend_parameters *params; dprintk("%s\n", __FUNCTION__); @@ -377,6 +502,9 @@ static int dvb_frontend_thread(void *data) sigfillset(¤t->blocked); unlock_kernel(); + fepriv->check_wrapped = 0; + fepriv->quality = 0; + fepriv->delay = 3*HZ; fepriv->status = 0; dvb_frontend_init(fe); fepriv->wakeup = 0; @@ -386,7 +514,7 @@ static int dvb_frontend_thread(void *data) timeout = wait_event_interruptible_timeout(fepriv->wait_queue, dvb_frontend_should_wakeup(fe), - delay); + fepriv->delay); if (0 != dvb_frontend_is_exiting(fe)) { /* got signal or quitting */ break; @@ -397,108 +525,22 @@ static int dvb_frontend_thread(void *data) if (down_interruptible(&fepriv->sem)) break; - /* if we've got no parameters, just keep idling */ - if (fepriv->state & FESTATE_IDLE) { - delay = 3*HZ; - quality = 0; - continue; - } + /* do an iteration of the tuning loop */ + if (fe->ops->tune) { + /* have we been asked to retune? */ + params = NULL; + if (fepriv->state & FESTATE_RETUNE) { + params = &fepriv->parameters; + fepriv->state = FESTATE_TUNED; + } - /* get the frontend status */ - if (fepriv->state & FESTATE_RETUNE) { - s = 0; - } else { - if (fe->ops->read_status) - fe->ops->read_status(fe, &s); + fe->ops->tune(fe, params, fepriv->tune_mode_flags, &fepriv->delay, &s); if (s != fepriv->status) { dvb_frontend_add_event(fe, s); fepriv->status = s; } - } - /* if we're not tuned, and we have a lock, move to the TUNED state */ - if ((fepriv->state & FESTATE_WAITFORLOCK) && (s & FE_HAS_LOCK)) { - update_delay(&quality, &delay, fepriv->min_delay, s & FE_HAS_LOCK); - fepriv->state = FESTATE_TUNED; - - /* if we're tuned, then we have determined the correct inversion */ - if ((!(fe->ops->info.caps & FE_CAN_INVERSION_AUTO)) && - (fepriv->parameters.inversion == INVERSION_AUTO)) { - fepriv->parameters.inversion = fepriv->inversion; - } - continue; - } - - /* if we are tuned already, check we're still locked */ - if (fepriv->state & FESTATE_TUNED) { - update_delay(&quality, &delay, fepriv->min_delay, s & FE_HAS_LOCK); - - /* we're tuned, and the lock is still good... */ - if (s & FE_HAS_LOCK) - continue; - else { /* if we _WERE_ tuned, but now don't have a lock */ - fepriv->state = FESTATE_ZIGZAG_FAST; - fepriv->started_auto_step = fepriv->auto_step; - check_wrapped = 0; - } - } - - /* don't actually do anything if we're in the LOSTLOCK state, - * the frontend is set to FE_CAN_RECOVER, and the max_drift is 0 */ - if ((fepriv->state & FESTATE_LOSTLOCK) && - (fe->ops->info.caps & FE_CAN_RECOVER) && (fepriv->max_drift == 0)) { - update_delay(&quality, &delay, fepriv->min_delay, s & FE_HAS_LOCK); - continue; - } - - /* don't do anything if we're in the DISEQC state, since this - * might be someone with a motorized dish controlled by DISEQC. - * If its actually a re-tune, there will be a SET_FRONTEND soon enough. */ - if (fepriv->state & FESTATE_DISEQC) { - update_delay(&quality, &delay, fepriv->min_delay, s & FE_HAS_LOCK); - continue; - } - - /* if we're in the RETUNE state, set everything up for a brand - * new scan, keeping the current inversion setting, as the next - * tune is _very_ likely to require the same */ - if (fepriv->state & FESTATE_RETUNE) { - fepriv->lnb_drift = 0; - fepriv->auto_step = 0; - fepriv->auto_sub_step = 0; - fepriv->started_auto_step = 0; - check_wrapped = 0; - } - - /* fast zigzag. */ - if ((fepriv->state & FESTATE_SEARCHING_FAST) || (fepriv->state & FESTATE_RETUNE)) { - delay = fepriv->min_delay; - - /* peform a tune */ - if (dvb_frontend_autotune(fe, check_wrapped)) { - /* OK, if we've run out of trials at the fast speed. - * Drop back to slow for the _next_ attempt */ - fepriv->state = FESTATE_SEARCHING_SLOW; - fepriv->started_auto_step = fepriv->auto_step; - continue; - } - check_wrapped = 1; - - /* if we've just retuned, enter the ZIGZAG_FAST state. - * This ensures we cannot return from an - * FE_SET_FRONTEND ioctl before the first frontend tune - * occurs */ - if (fepriv->state & FESTATE_RETUNE) { - fepriv->state = FESTATE_TUNING_FAST; - } - } - - /* slow zigzag */ - if (fepriv->state & FESTATE_SEARCHING_SLOW) { - update_delay(&quality, &delay, fepriv->min_delay, s & FE_HAS_LOCK); - - /* Note: don't bother checking for wrapping; we stay in this - * state until we get a lock */ - dvb_frontend_autotune(fe, 0); + } else { + dvb_frontend_swzigzag(fe); } } @@ -733,7 +775,6 @@ static int dvb_frontend_ioctl(struct inode *inode, struct file *file, err = fe->ops->set_tone(fe, (fe_sec_tone_mode_t) parg); fepriv->state = FESTATE_DISEQC; fepriv->status = 0; - fepriv->tone = (fe_sec_tone_mode_t) parg; } break; @@ -891,6 +932,10 @@ static int dvb_frontend_ioctl(struct inode *inode, struct file *file, err = fe->ops->get_frontend(fe, (struct dvb_frontend_parameters*) parg); } break; + + case FE_SET_FRONTEND_TUNE_MODE: + fepriv->tune_mode_flags = (unsigned int) parg; + break; }; up (&fepriv->sem); @@ -932,6 +977,9 @@ static int dvb_frontend_open(struct inode *inode, struct file *file) /* empty event queue */ fepriv->events.eventr = fepriv->events.eventw = 0; + + /* normal tune mode when opened R/W */ + fepriv->tune_mode_flags &= ~FE_TUNE_MODE_ONESHOT; } return ret; @@ -990,7 +1038,6 @@ int dvb_register_frontend(struct dvb_adapter* dvb, init_MUTEX (&fepriv->events.sem); fe->dvb = dvb; fepriv->inversion = INVERSION_OFF; - fepriv->tone = SEC_TONE_OFF; printk ("DVB: registering frontend %i (%s)...\n", fe->dvb->num, diff --git a/drivers/media/dvb/dvb-core/dvb_frontend.h b/drivers/media/dvb/dvb-core/dvb_frontend.h index 1e0840d02f1f..48c3f81be912 100644 --- a/drivers/media/dvb/dvb-core/dvb_frontend.h +++ b/drivers/media/dvb/dvb-core/dvb_frontend.h @@ -58,10 +58,19 @@ struct dvb_frontend_ops { int (*init)(struct dvb_frontend* fe); int (*sleep)(struct dvb_frontend* fe); + /* if this is set, it overrides the default swzigzag */ + int (*tune)(struct dvb_frontend* fe, + struct dvb_frontend_parameters* params, + unsigned int mode_flags, + int *delay, + fe_status_t *status); + + /* these two are only used for the swzigzag code */ int (*set_frontend)(struct dvb_frontend* fe, struct dvb_frontend_parameters* params); - int (*get_frontend)(struct dvb_frontend* fe, struct dvb_frontend_parameters* params); int (*get_tune_settings)(struct dvb_frontend* fe, struct dvb_frontend_tune_settings* settings); + int (*get_frontend)(struct dvb_frontend* fe, struct dvb_frontend_parameters* params); + int (*read_status)(struct dvb_frontend* fe, fe_status_t* status); int (*read_ber)(struct dvb_frontend* fe, u32* ber); int (*read_signal_strength)(struct dvb_frontend* fe, u16* strength); diff --git a/include/linux/dvb/frontend.h b/include/linux/dvb/frontend.h index d41df7047ed7..c8cbd90ba375 100644 --- a/include/linux/dvb/frontend.h +++ b/include/linux/dvb/frontend.h @@ -240,6 +240,15 @@ struct dvb_frontend_event { }; +/** + * When set, this flag will disable any zigzagging or other "normal" tuning + * behaviour. Additionally, there will be no automatic monitoring of the lock + * status, and hence no frontend events will be generated. If a frontend device + * is closed, this flag will be automatically turned off when the device is + * reopened read-write. + */ +#define FE_TUNE_MODE_ONESHOT 0x01 + #define FE_GET_INFO _IOR('o', 61, struct dvb_frontend_info) @@ -260,6 +269,7 @@ struct dvb_frontend_event { #define FE_SET_FRONTEND _IOW('o', 76, struct dvb_frontend_parameters) #define FE_GET_FRONTEND _IOR('o', 77, struct dvb_frontend_parameters) +#define FE_SET_FRONTEND_TUNE_MODE _IO('o', 81) /* unsigned int */ #define FE_GET_EVENT _IOR('o', 78, struct dvb_frontend_event) #define FE_DISHNETWORK_SEND_LEGACY_CMD _IO('o', 80) /* unsigned int */ -- cgit v1.2.3-71-gd317 From 9bb13a6dc3a6f68c990264838ff0493d900c48d7 Mon Sep 17 00:00:00 2001 From: Mauro Carvalho Chehab Date: Mon, 9 Jan 2006 15:25:37 -0200 Subject: V4L/DVB (3233): Fixed API to set I2S speed control - Created a new ioctl to control I2S speed. Old calls to an inadequate V4L2 API replaced. Signed-off-by: Mauro Carvalho Chehab --- drivers/media/video/em28xx/em28xx-cards.c | 8 ++------ drivers/media/video/em28xx/em28xx-video.c | 2 ++ drivers/media/video/em28xx/em28xx.h | 2 ++ drivers/media/video/msp3400.c | 30 ++++++++++++++++++++++-------- include/linux/videodev2.h | 1 - include/media/v4l2-common.h | 7 +++++++ 6 files changed, 35 insertions(+), 15 deletions(-) (limited to 'include/linux') diff --git a/drivers/media/video/em28xx/em28xx-cards.c b/drivers/media/video/em28xx/em28xx-cards.c index 57779e63f35d..58f7b4194a0d 100644 --- a/drivers/media/video/em28xx/em28xx-cards.c +++ b/drivers/media/video/em28xx/em28xx-cards.c @@ -30,6 +30,7 @@ #include #include #include +#include #include "msp3400.h" #include "em28xx.h" @@ -261,7 +262,6 @@ void em28xx_card_setup(struct em28xx *dev) /* request some modules */ if (dev->model == EM2820_BOARD_HAUPPAUGE_WINTV_USB_2) { struct tveeprom tv; - struct v4l2_audioout ao; #ifdef CONFIG_MODULES request_module("tveeprom"); request_module("ir-kbd-i2c"); @@ -274,12 +274,8 @@ void em28xx_card_setup(struct em28xx *dev) dev->tuner_type= tv.tuner_type; if (tv.audio_processor == AUDIO_CHIP_MSP34XX) { + dev->i2s_speed=2048000; dev->has_msp34xx=1; - memset (&ao,0,sizeof(ao)); - - ao.index=2; - ao.mode=V4L2_AUDMODE_32BITS; - em28xx_i2c_call_clients(dev, VIDIOC_S_AUDOUT, &ao); } else dev->has_msp34xx=0; } diff --git a/drivers/media/video/em28xx/em28xx-video.c b/drivers/media/video/em28xx/em28xx-video.c index 5e831fccf3fd..0b5557c479ae 100644 --- a/drivers/media/video/em28xx/em28xx-video.c +++ b/drivers/media/video/em28xx/em28xx-video.c @@ -287,6 +287,8 @@ static void video_mux(struct em28xx *dev, int index) em28xx_videodbg("Setting input index=%d, vmux=%d, amux=%d\n",index,input,dev->ctl_ainput); if (dev->has_msp34xx) { + if (dev->i2s_speed) + em28xx_i2c_call_clients(dev, VIDIOC_INT_I2S_CLOCK_FREQ, &dev->i2s_speed); em28xx_i2c_call_clients(dev, VIDIOC_S_AUDIO, &dev->ctl_ainput); ainput = EM28XX_AUDIO_SRC_TUNER; em28xx_audio_source(dev, ainput); diff --git a/drivers/media/video/em28xx/em28xx.h b/drivers/media/video/em28xx/em28xx.h index 5c7a41ce69f3..ffa9acc9be37 100644 --- a/drivers/media/video/em28xx/em28xx.h +++ b/drivers/media/video/em28xx/em28xx.h @@ -216,6 +216,8 @@ struct em28xx { unsigned int has_msp34xx:1; unsigned int has_tda9887:1; + u32 i2s_speed; /* I2S speed for audio digital stream */ + enum em28xx_decoder decoder; int tuner_type; /* type of the tuner */ diff --git a/drivers/media/video/msp3400.c b/drivers/media/video/msp3400.c index fd0589352822..11235c1ac5c6 100644 --- a/drivers/media/video/msp3400.c +++ b/drivers/media/video/msp3400.c @@ -54,6 +54,7 @@ #include #include +#include #include "msp3400.h" /* ---------------------------------------------------------------------- */ @@ -2104,23 +2105,36 @@ static int msp_command(struct i2c_client *client, unsigned int cmd, void *arg) if (a->index<0||a->index>2) return -EINVAL; - if (a->index==2) { - if (a->mode == V4L2_AUDMODE_32BITS) - msp->i2s_mode=1; - else - msp->i2s_mode=0; - } - msp3400_dbg("Setting audio out on msp34xx to input %i, mode %i\n",a->index,msp->i2s_mode); + msp3400_dbg("Setting audio out on msp34xx to input %i\n",a->index); msp3400c_set_scart(client,msp->in_scart,a->index+1); break; } + case VIDIOC_INT_I2S_CLOCK_FREQ: + { + u32 *a=(u32 *)arg; + + msp3400_dbg("Setting I2S speed to %d\n",*a); + + switch (*a) { + case 1024000: + msp->i2s_mode=0; + break; + case 2048000: + msp->i2s_mode=1; + break; + default: + return -EINVAL; + } + break; + } + case VIDIOC_QUERYCTRL: { struct v4l2_queryctrl *qc = arg; int i; - msp3400_dbg("VIDIOC_QUERYCTRL"); + msp3400_dbg("VIDIOC_QUERYCTRL\n"); for (i = 0; i < ARRAY_SIZE(msp34xx_qctrl); i++) if (qc->id && qc->id == msp34xx_qctrl[i].id) { diff --git a/include/linux/videodev2.h b/include/linux/videodev2.h index b2f5e864b397..6ac7c1f7902f 100644 --- a/include/linux/videodev2.h +++ b/include/linux/videodev2.h @@ -895,7 +895,6 @@ struct v4l2_audio /* Flags for the 'mode' field */ #define V4L2_AUDMODE_AVL 0x00001 -#define V4L2_AUDMODE_32BITS 0x00002 struct v4l2_audioout { diff --git a/include/media/v4l2-common.h b/include/media/v4l2-common.h index d3fd48157eb8..2f2402996409 100644 --- a/include/media/v4l2-common.h +++ b/include/media/v4l2-common.h @@ -107,4 +107,11 @@ enum v4l2_chip_ident { be made. */ #define VIDIOC_INT_G_CHIP_IDENT _IOR ('d', 107, enum v4l2_chip_ident *) +/* Sets I2S speed in bps. This is used to provide a standard way to select I2S + clock used by driving digital audio streams at some board designs. + Usual values for the frequency are 1024000 and 2048000. + If the frequency is not supported, then -EINVAL is returned. */ +#define VIDIOC_INT_I2S_CLOCK_FREQ _IOW ('d', 108, u32) + + #endif /* V4L2_COMMON_H_ */ -- cgit v1.2.3-71-gd317 From 21dcd8ccd76e80118f524b1a730c35ab1c46c09e Mon Sep 17 00:00:00 2001 From: Mauro Carvalho Chehab Date: Mon, 9 Jan 2006 15:25:37 -0200 Subject: V4L/DVB (3234): Included advanced debug option to tvp5150.c - Included advanced debug option to tvp5150.c - Now, advanced debug info is the first item at V4L menu. Signed-off-by: Mauro Carvalho Chehab --- drivers/media/video/Kconfig | 16 +++++++++------- drivers/media/video/tvp5150.c | 24 ++++++++++++++++++++++++ include/linux/i2c-id.h | 1 + 3 files changed, 34 insertions(+), 7 deletions(-) (limited to 'include/linux') diff --git a/drivers/media/video/Kconfig b/drivers/media/video/Kconfig index c89cc0a922ee..2fe260fff85d 100644 --- a/drivers/media/video/Kconfig +++ b/drivers/media/video/Kconfig @@ -7,6 +7,15 @@ menu "Video For Linux" comment "Video Adapters" +config VIDEO_ADV_DEBUG + bool "Enable advanced debug functionality" + depends on VIDEO_DEV + default n + ---help--- + Say Y here to enable advanced debugging functionality on some + V4L devices. + In doubt, say N. + config VIDEO_BT848 tristate "BT848 Video For Linux" depends on VIDEO_DEV && PCI && I2C @@ -344,11 +353,4 @@ config VIDEO_DECODER Say Y here to compile drivers for SAA7115, SAA7127 and CX25840 video decoders. -config VIDEO_ADV_DEBUG - bool "Enable advanced debug functionality" - depends on VIDEO_DEV && VIDEO_DECODER && EXPERIMENTAL - ---help--- - Say Y here to enable advanced debugging functionality in the - SAA7115, SAA7127 and CX25840 video decoders. - endmenu diff --git a/drivers/media/video/tvp5150.c b/drivers/media/video/tvp5150.c index 9ed839d688eb..07ad675cd58e 100644 --- a/drivers/media/video/tvp5150.c +++ b/drivers/media/video/tvp5150.c @@ -850,6 +850,30 @@ static int tvp5150_command(struct i2c_client *c, *(v4l2_std_id *)arg = decoder->norm; break; +#ifdef CONFIG_VIDEO_ADV_DEBUG + case VIDIOC_INT_G_REGISTER: + { + struct v4l2_register *reg = arg; + + if (reg->i2c_id != I2C_DRIVERID_TVP5150) + return -EINVAL; + reg->val = tvp5150_read(c, reg->reg & 0xff); + break; + } + + case VIDIOC_INT_S_REGISTER: + { + struct v4l2_register *reg = arg; + + if (reg->i2c_id != I2C_DRIVERID_TVP5150) + return -EINVAL; + if (!capable(CAP_SYS_ADMIN)) + return -EPERM; + tvp5150_write(c, reg->reg & 0xff, reg->val & 0xff); + break; + } +#endif + case DECODER_DUMP: dump_reg(c); break; diff --git a/include/linux/i2c-id.h b/include/linux/i2c-id.h index fb46f8d56999..6ff2d365895f 100644 --- a/include/linux/i2c-id.h +++ b/include/linux/i2c-id.h @@ -103,6 +103,7 @@ #define I2C_DRIVERID_SAA711X 73 /* saa711x video encoders */ #define I2C_DRIVERID_AKITAIOEXP 74 /* IO Expander on Sharp SL-C1000 */ #define I2C_DRIVERID_INFRARED 75 /* I2C InfraRed on Video boards */ +#define I2C_DRIVERID_TVP5150 76 /* TVP5150 video decoder */ #define I2C_DRIVERID_I2CDEV 900 #define I2C_DRIVERID_ARP 902 /* SMBus ARP Client */ -- cgit v1.2.3-71-gd317 From 5e453dc757385ec892a818e4e3b5de027987ced9 Mon Sep 17 00:00:00 2001 From: Michael Krufky Date: Mon, 9 Jan 2006 15:32:31 -0200 Subject: V4L/DVB (3269): ioctls cleanups. - Now, all internal ioctls are at v4l2-common.h - removed unused ioctl at saa6752hs.h - all debug ioctl code moved to v4l2-common.c - removed duplicated stuff from other cards Signed-off-by: Michael Krufky Signed-off-by: Hans Verkuil Signed-off-by: Mauro Carvalho Chehab --- drivers/media/video/bttv-cards.c | 1 + drivers/media/video/bttv-driver.c | 28 +--- drivers/media/video/bttv-i2c.c | 1 + drivers/media/video/cx88/cx88-blackbird.c | 3 +- drivers/media/video/cx88/cx88-core.c | 56 +------ drivers/media/video/cx88/cx88-dvb.c | 1 + drivers/media/video/cx88/cx88-i2c.c | 1 + drivers/media/video/cx88/cx88-video.c | 7 +- drivers/media/video/cx88/cx88.h | 1 - drivers/media/video/em28xx/em28xx-core.c | 53 ------- drivers/media/video/em28xx/em28xx-i2c.c | 1 + drivers/media/video/em28xx/em28xx-video.c | 2 +- drivers/media/video/em28xx/em28xx.h | 2 - drivers/media/video/mxb.c | 1 + drivers/media/video/saa7134/saa7134-cards.c | 1 + drivers/media/video/saa7134/saa7134-core.c | 72 --------- drivers/media/video/saa7134/saa7134-dvb.c | 1 + drivers/media/video/saa7134/saa7134-empress.c | 3 +- drivers/media/video/saa7134/saa7134-i2c.c | 1 + drivers/media/video/saa7134/saa7134-oss.c | 40 ++++- drivers/media/video/saa7134/saa7134-video.c | 5 +- drivers/media/video/saa7134/saa7134.h | 1 - drivers/media/video/tda9887.c | 2 +- drivers/media/video/tuner-core.c | 12 +- drivers/media/video/tvaudio.c | 1 + drivers/media/video/v4l2-common.c | 219 ++++++++++++++++++++------ include/linux/video_decoder.h | 2 + include/linux/videodev2.h | 1 - include/media/audiochip.h | 5 - include/media/tuner.h | 4 - include/media/v4l2-common.h | 25 +++ 31 files changed, 267 insertions(+), 286 deletions(-) (limited to 'include/linux') diff --git a/drivers/media/video/bttv-cards.c b/drivers/media/video/bttv-cards.c index 440f635e020f..1621ab133d23 100644 --- a/drivers/media/video/bttv-cards.c +++ b/drivers/media/video/bttv-cards.c @@ -38,6 +38,7 @@ #include #include "bttvp.h" +#include /* fwd decl */ static void boot_msp34xx(struct bttv *btv, int pin); diff --git a/drivers/media/video/bttv-driver.c b/drivers/media/video/bttv-driver.c index 69a147b85f1a..f3de85251719 100644 --- a/drivers/media/video/bttv-driver.c +++ b/drivers/media/video/bttv-driver.c @@ -35,6 +35,7 @@ #include #include #include "bttvp.h" +#include #include @@ -1520,14 +1521,6 @@ static struct videobuf_queue_ops bttv_video_qops = { .buf_release = buffer_release, }; -static const char *v4l1_ioctls[] = { - "?", "CGAP", "GCHAN", "SCHAN", "GTUNER", "STUNER", "GPICT", "SPICT", - "CCAPTURE", "GWIN", "SWIN", "GFBUF", "SFBUF", "KEY", "GFREQ", - "SFREQ", "GAUDIO", "SAUDIO", "SYNC", "MCAPTURE", "GMBUF", "GUNIT", - "GCAPTURE", "SCAPTURE", "SPLAYMODE", "SWRITEMODE", "GPLAYINFO", - "SMICROCODE", "GVBIFMT", "SVBIFMT" }; -#define V4L1_IOCTLS ARRAY_SIZE(v4l1_ioctls) - static int bttv_common_ioctls(struct bttv *btv, unsigned int cmd, void *arg) { switch (cmd) { @@ -2216,22 +2209,9 @@ static int bttv_do_ioctl(struct inode *inode, struct file *file, unsigned long flags; int retval = 0; - if (bttv_debug > 1) { - switch (_IOC_TYPE(cmd)) { - case 'v': - printk("bttv%d: ioctl 0x%x (v4l1, VIDIOC%s)\n", - btv->c.nr, cmd, (_IOC_NR(cmd) < V4L1_IOCTLS) ? - v4l1_ioctls[_IOC_NR(cmd)] : "???"); - break; - case 'V': - printk("bttv%d: ioctl 0x%x (v4l2, %s)\n", - btv->c.nr, cmd, v4l2_ioctl_names[_IOC_NR(cmd)]); - break; - default: - printk("bttv%d: ioctl 0x%x (???)\n", - btv->c.nr, cmd); - } - } + if (bttv_debug > 1) + v4l_print_ioctl(btv->c.name, cmd); + if (btv->errors) bttv_reinit_bt848(btv); diff --git a/drivers/media/video/bttv-i2c.c b/drivers/media/video/bttv-i2c.c index a8873f48c808..fd66d386fa7d 100644 --- a/drivers/media/video/bttv-i2c.c +++ b/drivers/media/video/bttv-i2c.c @@ -30,6 +30,7 @@ #include #include "bttvp.h" +#include #include #include diff --git a/drivers/media/video/cx88/cx88-blackbird.c b/drivers/media/video/cx88/cx88-blackbird.c index 5a7f940565cc..a49062119313 100644 --- a/drivers/media/video/cx88/cx88-blackbird.c +++ b/drivers/media/video/cx88/cx88-blackbird.c @@ -32,6 +32,7 @@ #include #include "cx88.h" +#include MODULE_DESCRIPTION("driver for cx2388x/cx23416 based mpeg encoder cards"); MODULE_AUTHOR("Jelle Foks , Gerd Knorr [SuSE Labs]"); @@ -1374,7 +1375,7 @@ static int mpeg_do_ioctl(struct inode *inode, struct file *file, struct cx88_core *core = dev->core; if (debug > 1) - cx88_print_ioctl(core->name,cmd); + v4l_print_ioctl(core->name,cmd); switch (cmd) { diff --git a/drivers/media/video/cx88/cx88-core.c b/drivers/media/video/cx88/cx88-core.c index bb6eb54e19ce..fc814d198694 100644 --- a/drivers/media/video/cx88/cx88-core.c +++ b/drivers/media/video/cx88/cx88-core.c @@ -34,6 +34,7 @@ #include #include "cx88.h" +#include MODULE_DESCRIPTION("v4l2 driver module for cx2388x based TV cards"); MODULE_AUTHOR("Gerd Knorr [SuSE Labs]"); @@ -76,60 +77,6 @@ static unsigned int cx88_devcount; static LIST_HEAD(cx88_devlist); static DECLARE_MUTEX(devlist); -/* ------------------------------------------------------------------ */ -/* debug help functions */ - -static const char *v4l1_ioctls[] = { - "0", "CGAP", "GCHAN", "SCHAN", "GTUNER", "STUNER", "GPICT", "SPICT", - "CCAPTURE", "GWIN", "SWIN", "GFBUF", "SFBUF", "KEY", "GFREQ", - "SFREQ", "GAUDIO", "SAUDIO", "SYNC", "MCAPTURE", "GMBUF", "GUNIT", - "GCAPTURE", "SCAPTURE", "SPLAYMODE", "SWRITEMODE", "GPLAYINFO", - "SMICROCODE", "GVBIFMT", "SVBIFMT" }; -#define V4L1_IOCTLS ARRAY_SIZE(v4l1_ioctls) - -static const char *v4l2_ioctls[] = { - "QUERYCAP", "1", "ENUM_PIXFMT", "ENUM_FBUFFMT", "G_FMT", "S_FMT", - "G_COMP", "S_COMP", "REQBUFS", "QUERYBUF", "G_FBUF", "S_FBUF", - "G_WIN", "S_WIN", "PREVIEW", "QBUF", "16", "DQBUF", "STREAMON", - "STREAMOFF", "G_PERF", "G_PARM", "S_PARM", "G_STD", "S_STD", - "ENUMSTD", "ENUMINPUT", "G_CTRL", "S_CTRL", "G_TUNER", "S_TUNER", - "G_FREQ", "S_FREQ", "G_AUDIO", "S_AUDIO", "35", "QUERYCTRL", - "QUERYMENU", "G_INPUT", "S_INPUT", "ENUMCVT", "41", "42", "43", - "44", "45", "G_OUTPUT", "S_OUTPUT", "ENUMOUTPUT", "G_AUDOUT", - "S_AUDOUT", "ENUMFX", "G_EFFECT", "S_EFFECT", "G_MODULATOR", - "S_MODULATOR" -}; -#define V4L2_IOCTLS ARRAY_SIZE(v4l2_ioctls) - -void cx88_print_ioctl(char *name, unsigned int cmd) -{ - char *dir; - - switch (_IOC_DIR(cmd)) { - case _IOC_NONE: dir = "--"; break; - case _IOC_READ: dir = "r-"; break; - case _IOC_WRITE: dir = "-w"; break; - case _IOC_READ | _IOC_WRITE: dir = "rw"; break; - default: dir = "??"; break; - } - switch (_IOC_TYPE(cmd)) { - case 'v': - printk(KERN_DEBUG "%s: ioctl 0x%08x (v4l1, %s, VIDIOC%s)\n", - name, cmd, dir, (_IOC_NR(cmd) < V4L1_IOCTLS) ? - v4l1_ioctls[_IOC_NR(cmd)] : "???"); - break; - case 'V': - printk(KERN_DEBUG "%s: ioctl 0x%08x (v4l2, %s, VIDIOC_%s)\n", - name, cmd, dir, (_IOC_NR(cmd) < V4L2_IOCTLS) ? - v4l2_ioctls[_IOC_NR(cmd)] : "???"); - break; - default: - printk(KERN_DEBUG "%s: ioctl 0x%08x (???, %s, #%d)\n", - name, cmd, dir, _IOC_NR(cmd)); - } -} - -/* ------------------------------------------------------------------ */ #define NO_SYNC_LINE (-1U) static u32* cx88_risc_field(u32 *rp, struct scatterlist *sglist, @@ -1208,7 +1155,6 @@ void cx88_core_put(struct cx88_core *core, struct pci_dev *pci) /* ------------------------------------------------------------------ */ -EXPORT_SYMBOL(cx88_print_ioctl); EXPORT_SYMBOL(cx88_print_irqbits); EXPORT_SYMBOL(cx88_core_irq); diff --git a/drivers/media/video/cx88/cx88-dvb.c b/drivers/media/video/cx88/cx88-dvb.c index 201050478711..c63f20fdff48 100644 --- a/drivers/media/video/cx88/cx88-dvb.c +++ b/drivers/media/video/cx88/cx88-dvb.c @@ -31,6 +31,7 @@ #include "cx88.h" #include "dvb-pll.h" +#include #ifdef HAVE_MT352 # include "mt352.h" diff --git a/drivers/media/video/cx88/cx88-i2c.c b/drivers/media/video/cx88/cx88-i2c.c index c6492089ee1a..f720901e9638 100644 --- a/drivers/media/video/cx88/cx88-i2c.c +++ b/drivers/media/video/cx88/cx88-i2c.c @@ -30,6 +30,7 @@ #include #include "cx88.h" +#include static unsigned int i2c_debug = 0; module_param(i2c_debug, int, 0644); diff --git a/drivers/media/video/cx88/cx88-video.c b/drivers/media/video/cx88/cx88-video.c index b76abb9b8961..9a02515fe18b 100644 --- a/drivers/media/video/cx88/cx88-video.c +++ b/drivers/media/video/cx88/cx88-video.c @@ -33,6 +33,7 @@ #include #include "cx88.h" +#include /* Include V4L1 specific functions. Should be removed soon */ #include @@ -1118,7 +1119,7 @@ static int video_do_ioctl(struct inode *inode, struct file *file, int err; if (video_debug > 1) - cx88_print_ioctl(core->name,cmd); + v4l_print_ioctl(core->name,cmd); switch (cmd) { /* --- capabilities ------------------------------------------ */ @@ -1254,7 +1255,7 @@ int cx88_do_ioctl(struct inode *inode, struct file *file, int radio, dprintk( 1, "CORE IOCTL: 0x%x\n", cmd ); if (video_debug > 1) - cx88_print_ioctl(core->name,cmd); + v4l_print_ioctl(core->name,cmd); switch (cmd) { /* ---------- tv norms ---------- */ @@ -1474,7 +1475,7 @@ static int radio_do_ioctl(struct inode *inode, struct file *file, struct cx88_core *core = dev->core; if (video_debug > 1) - cx88_print_ioctl(core->name,cmd); + v4l_print_ioctl(core->name,cmd); switch (cmd) { case VIDIOC_QUERYCAP: diff --git a/drivers/media/video/cx88/cx88.h b/drivers/media/video/cx88/cx88.h index 6d370d1b333f..022ef13c45bc 100644 --- a/drivers/media/video/cx88/cx88.h +++ b/drivers/media/video/cx88/cx88.h @@ -461,7 +461,6 @@ struct cx8802_dev { extern void cx88_print_irqbits(char *name, char *tag, char **strings, u32 bits, u32 mask); -extern void cx88_print_ioctl(char *name, unsigned int cmd); extern int cx88_core_irq(struct cx88_core *core, u32 status); extern void cx88_wakeup(struct cx88_core *core, diff --git a/drivers/media/video/em28xx/em28xx-core.c b/drivers/media/video/em28xx/em28xx-core.c index c0db0e9d2cea..dff3893f32fd 100644 --- a/drivers/media/video/em28xx/em28xx-core.c +++ b/drivers/media/video/em28xx/em28xx-core.c @@ -63,59 +63,6 @@ static int alt = EM28XX_PINOUT; module_param(alt, int, 0644); MODULE_PARM_DESC(alt, "alternate setting to use for video endpoint"); -/* ------------------------------------------------------------------ */ -/* debug help functions */ - -static const char *v4l1_ioctls[] = { - "0", "CGAP", "GCHAN", "SCHAN", "GTUNER", "STUNER", "GPICT", "SPICT", - "CCAPTURE", "GWIN", "SWIN", "GFBUF", "SFBUF", "KEY", "GFREQ", - "SFREQ", "GAUDIO", "SAUDIO", "SYNC", "MCAPTURE", "GMBUF", "GUNIT", - "GCAPTURE", "SCAPTURE", "SPLAYMODE", "SWRITEMODE", "GPLAYINFO", - "SMICROCODE", "GVBIFMT", "SVBIFMT" }; -#define V4L1_IOCTLS ARRAY_SIZE(v4l1_ioctls) - -static const char *v4l2_ioctls[] = { - "QUERYCAP", "1", "ENUM_PIXFMT", "ENUM_FBUFFMT", "G_FMT", "S_FMT", - "G_COMP", "S_COMP", "REQBUFS", "QUERYBUF", "G_FBUF", "S_FBUF", - "G_WIN", "S_WIN", "PREVIEW", "QBUF", "16", "DQBUF", "STREAMON", - "STREAMOFF", "G_PERF", "G_PARM", "S_PARM", "G_STD", "S_STD", - "ENUMSTD", "ENUMINPUT", "G_CTRL", "S_CTRL", "G_TUNER", "S_TUNER", - "G_FREQ", "S_FREQ", "G_AUDIO", "S_AUDIO", "35", "QUERYCTRL", - "QUERYMENU", "G_INPUT", "S_INPUT", "ENUMCVT", "41", "42", "43", - "44", "45", "G_OUTPUT", "S_OUTPUT", "ENUMOUTPUT", "G_AUDOUT", - "S_AUDOUT", "ENUMFX", "G_EFFECT", "S_EFFECT", "G_MODULATOR", - "S_MODULATOR" -}; -#define V4L2_IOCTLS ARRAY_SIZE(v4l2_ioctls) - -void em28xx_print_ioctl(char *name, unsigned int cmd) -{ - char *dir; - - switch (_IOC_DIR(cmd)) { - case _IOC_NONE: dir = "--"; break; - case _IOC_READ: dir = "r-"; break; - case _IOC_WRITE: dir = "-w"; break; - case _IOC_READ | _IOC_WRITE: dir = "rw"; break; - default: dir = "??"; break; - } - switch (_IOC_TYPE(cmd)) { - case 'v': - printk(KERN_DEBUG "%s: ioctl 0x%08x (v4l1, %s, VIDIOC%s)\n", - name, cmd, dir, (_IOC_NR(cmd) < V4L1_IOCTLS) ? - v4l1_ioctls[_IOC_NR(cmd)] : "???"); - break; - case 'V': - printk(KERN_DEBUG "%s: ioctl 0x%08x (v4l2, %s, VIDIOC_%s)\n", - name, cmd, dir, (_IOC_NR(cmd) < V4L2_IOCTLS) ? - v4l2_ioctls[_IOC_NR(cmd)] : "???"); - break; - default: - printk(KERN_DEBUG "%s: ioctl 0x%08x (???, %s, #%d)\n", - name, cmd, dir, _IOC_NR(cmd)); - } -} - /* * em28xx_request_buffers() diff --git a/drivers/media/video/em28xx/em28xx-i2c.c b/drivers/media/video/em28xx/em28xx-i2c.c index 5385338efbf4..0591a705b7a1 100644 --- a/drivers/media/video/em28xx/em28xx-i2c.c +++ b/drivers/media/video/em28xx/em28xx-i2c.c @@ -28,6 +28,7 @@ #include #include "em28xx.h" +#include #include /* ----------------------------------------------------------- */ diff --git a/drivers/media/video/em28xx/em28xx-video.c b/drivers/media/video/em28xx/em28xx-video.c index 0b5557c479ae..fdc255918dde 100644 --- a/drivers/media/video/em28xx/em28xx-video.c +++ b/drivers/media/video/em28xx/em28xx-video.c @@ -1269,7 +1269,7 @@ static int em28xx_video_do_ioctl(struct inode *inode, struct file *filp, return -ENODEV; if (video_debug > 1) - em28xx_print_ioctl(dev->name,cmd); + v4l_print_ioctl(dev->name,cmd); switch (cmd) { diff --git a/drivers/media/video/em28xx/em28xx.h b/drivers/media/video/em28xx/em28xx.h index ffa9acc9be37..f99ee8eb5577 100644 --- a/drivers/media/video/em28xx/em28xx.h +++ b/drivers/media/video/em28xx/em28xx.h @@ -295,8 +295,6 @@ void em28xx_set_ir(struct em28xx * dev,struct IR_i2c *ir); /* Provided by em28xx-core.c */ -void em28xx_print_ioctl(char *name, unsigned int cmd); - u32 em28xx_request_buffers(struct em28xx *dev, u32 count); void em28xx_queue_unusedframes(struct em28xx *dev); void em28xx_release_buffers(struct em28xx *dev); diff --git a/drivers/media/video/mxb.c b/drivers/media/video/mxb.c index d04793fb80fc..91681aa6c657 100644 --- a/drivers/media/video/mxb.c +++ b/drivers/media/video/mxb.c @@ -26,6 +26,7 @@ #include #include #include +#include #include "mxb.h" #include "tea6415c.h" diff --git a/drivers/media/video/saa7134/saa7134-cards.c b/drivers/media/video/saa7134/saa7134-cards.c index 73f2525bc764..991829eb15da 100644 --- a/drivers/media/video/saa7134/saa7134-cards.c +++ b/drivers/media/video/saa7134/saa7134-cards.c @@ -25,6 +25,7 @@ #include "saa7134-reg.h" #include "saa7134.h" +#include /* commly used strings */ static char name_mute[] = "mute"; diff --git a/drivers/media/video/saa7134/saa7134-core.c b/drivers/media/video/saa7134/saa7134-core.c index 0bdbd99d0ae6..d4be1fd20a36 100644 --- a/drivers/media/video/saa7134/saa7134-core.c +++ b/drivers/media/video/saa7134/saa7134-core.c @@ -95,77 +95,6 @@ int (*dmasound_exit)(struct saa7134_dev *dev); #define dprintk(fmt, arg...) if (core_debug) \ printk(KERN_DEBUG "%s/core: " fmt, dev->name , ## arg) -/* ------------------------------------------------------------------ */ -/* debug help functions */ - -static const char *v4l1_ioctls[] = { - "0", "GCAP", "GCHAN", "SCHAN", "GTUNER", "STUNER", "GPICT", "SPICT", - "CCAPTURE", "GWIN", "SWIN", "GFBUF", "SFBUF", "KEY", "GFREQ", - "SFREQ", "GAUDIO", "SAUDIO", "SYNC", "MCAPTURE", "GMBUF", "GUNIT", - "GCAPTURE", "SCAPTURE", "SPLAYMODE", "SWRITEMODE", "GPLAYINFO", - "SMICROCODE", "GVBIFMT", "SVBIFMT" }; -#define V4L1_IOCTLS ARRAY_SIZE(v4l1_ioctls) - -static const char *v4l2_ioctls[] = { - "QUERYCAP", "1", "ENUM_PIXFMT", "ENUM_FBUFFMT", "G_FMT", "S_FMT", - "G_COMP", "S_COMP", "REQBUFS", "QUERYBUF", "G_FBUF", "S_FBUF", - "G_WIN", "S_WIN", "PREVIEW", "QBUF", "16", "DQBUF", "STREAMON", - "STREAMOFF", "G_PERF", "G_PARM", "S_PARM", "G_STD", "S_STD", - "ENUMSTD", "ENUMINPUT", "G_CTRL", "S_CTRL", "G_TUNER", "S_TUNER", - "G_FREQ", "S_FREQ", "G_AUDIO", "S_AUDIO", "35", "QUERYCTRL", - "QUERYMENU", "G_INPUT", "S_INPUT", "ENUMCVT", "41", "42", "43", - "44", "45", "G_OUTPUT", "S_OUTPUT", "ENUMOUTPUT", "G_AUDOUT", - "S_AUDOUT", "ENUMFX", "G_EFFECT", "S_EFFECT", "G_MODULATOR", - "S_MODULATOR" -}; -#define V4L2_IOCTLS ARRAY_SIZE(v4l2_ioctls) - -static const char *osspcm_ioctls[] = { - "RESET", "SYNC", "SPEED", "STEREO", "GETBLKSIZE", "SETFMT", - "CHANNELS", "?", "POST", "SUBDIVIDE", "SETFRAGMENT", "GETFMTS", - "GETOSPACE", "GETISPACE", "NONBLOCK", "GETCAPS", "GET/SETTRIGGER", - "GETIPTR", "GETOPTR", "MAPINBUF", "MAPOUTBUF", "SETSYNCRO", - "SETDUPLEX", "GETODELAY" -}; -#define OSSPCM_IOCTLS ARRAY_SIZE(v4l2_ioctls) - -void saa7134_print_ioctl(char *name, unsigned int cmd) -{ - char *dir; - - switch (_IOC_DIR(cmd)) { - case _IOC_NONE: dir = "--"; break; - case _IOC_READ: dir = "r-"; break; - case _IOC_WRITE: dir = "-w"; break; - case _IOC_READ | _IOC_WRITE: dir = "rw"; break; - default: dir = "??"; break; - } - switch (_IOC_TYPE(cmd)) { - case 'v': - printk(KERN_DEBUG "%s: ioctl 0x%08x (v4l1, %s, VIDIOC%s)\n", - name, cmd, dir, (_IOC_NR(cmd) < V4L1_IOCTLS) ? - v4l1_ioctls[_IOC_NR(cmd)] : "???"); - break; - case 'V': - printk(KERN_DEBUG "%s: ioctl 0x%08x (v4l2, %s, VIDIOC_%s)\n", - name, cmd, dir, (_IOC_NR(cmd) < V4L2_IOCTLS) ? - v4l2_ioctls[_IOC_NR(cmd)] : "???"); - break; - case 'P': - printk(KERN_DEBUG "%s: ioctl 0x%08x (oss dsp, %s, SNDCTL_DSP_%s)\n", - name, cmd, dir, (_IOC_NR(cmd) < OSSPCM_IOCTLS) ? - osspcm_ioctls[_IOC_NR(cmd)] : "???"); - break; - case 'M': - printk(KERN_DEBUG "%s: ioctl 0x%08x (oss mixer, %s, #%d)\n", - name, cmd, dir, _IOC_NR(cmd)); - break; - default: - printk(KERN_DEBUG "%s: ioctl 0x%08x (???, %s, #%d)\n", - name, cmd, dir, _IOC_NR(cmd)); - } -} - void saa7134_track_gpio(struct saa7134_dev *dev, char *msg) { unsigned long mode,status; @@ -1173,7 +1102,6 @@ module_exit(saa7134_fini); /* ----------------------------------------------------------- */ -EXPORT_SYMBOL(saa7134_print_ioctl); EXPORT_SYMBOL(saa7134_i2c_call_clients); EXPORT_SYMBOL(saa7134_devlist); EXPORT_SYMBOL(saa7134_boards); diff --git a/drivers/media/video/saa7134/saa7134-dvb.c b/drivers/media/video/saa7134/saa7134-dvb.c index e016480c3468..399f9952596c 100644 --- a/drivers/media/video/saa7134/saa7134-dvb.c +++ b/drivers/media/video/saa7134/saa7134-dvb.c @@ -31,6 +31,7 @@ #include "saa7134-reg.h" #include "saa7134.h" +#include #ifdef HAVE_MT352 # include "mt352.h" diff --git a/drivers/media/video/saa7134/saa7134-empress.c b/drivers/media/video/saa7134/saa7134-empress.c index 575f3e835f91..bd4c389d4c37 100644 --- a/drivers/media/video/saa7134/saa7134-empress.c +++ b/drivers/media/video/saa7134/saa7134-empress.c @@ -29,6 +29,7 @@ #include "saa7134.h" #include +#include /* ------------------------------------------------------------------ */ @@ -163,7 +164,7 @@ static int ts_do_ioctl(struct inode *inode, struct file *file, struct saa7134_dev *dev = file->private_data; if (debug > 1) - saa7134_print_ioctl(dev->name,cmd); + v4l_print_ioctl(dev->name,cmd); switch (cmd) { case VIDIOC_QUERYCAP: { diff --git a/drivers/media/video/saa7134/saa7134-i2c.c b/drivers/media/video/saa7134/saa7134-i2c.c index 7283caa0484b..6162550c4136 100644 --- a/drivers/media/video/saa7134/saa7134-i2c.c +++ b/drivers/media/video/saa7134/saa7134-i2c.c @@ -30,6 +30,7 @@ #include "saa7134-reg.h" #include "saa7134.h" +#include /* ----------------------------------------------------------- */ diff --git a/drivers/media/video/saa7134/saa7134-oss.c b/drivers/media/video/saa7134/saa7134-oss.c index 8badd2a9cb2f..7448e386a804 100644 --- a/drivers/media/video/saa7134/saa7134-oss.c +++ b/drivers/media/video/saa7134/saa7134-oss.c @@ -373,6 +373,42 @@ static ssize_t dsp_write(struct file *file, const char __user *buffer, return -EINVAL; } +static const char *osspcm_ioctls[] = { + "RESET", "SYNC", "SPEED", "STEREO", "GETBLKSIZE", "SETFMT", + "CHANNELS", "?", "POST", "SUBDIVIDE", "SETFRAGMENT", "GETFMTS", + "GETOSPACE", "GETISPACE", "NONBLOCK", "GETCAPS", "GET/SETTRIGGER", + "GETIPTR", "GETOPTR", "MAPINBUF", "MAPOUTBUF", "SETSYNCRO", + "SETDUPLEX", "GETODELAY" +}; +#define OSSPCM_IOCTLS ARRAY_SIZE(osspcm_ioctls) + +static void saa7134_oss_print_ioctl(char *name, unsigned int cmd) +{ + char *dir; + + switch (_IOC_DIR(cmd)) { + case _IOC_NONE: dir = "--"; break; + case _IOC_READ: dir = "r-"; break; + case _IOC_WRITE: dir = "-w"; break; + case _IOC_READ | _IOC_WRITE: dir = "rw"; break; + default: dir = "??"; break; + } + switch (_IOC_TYPE(cmd)) { + case 'P': + printk(KERN_DEBUG "%s: ioctl 0x%08x (oss dsp, %s, SNDCTL_DSP_%s)\n", + name, cmd, dir, (_IOC_NR(cmd) < OSSPCM_IOCTLS) ? + osspcm_ioctls[_IOC_NR(cmd)] : "???"); + break; + case 'M': + printk(KERN_DEBUG "%s: ioctl 0x%08x (oss mixer, %s, #%d)\n", + name, cmd, dir, _IOC_NR(cmd)); + break; + default: + printk(KERN_DEBUG "%s: ioctl 0x%08x (???, %s, #%d)\n", + name, cmd, dir, _IOC_NR(cmd)); + } +} + static int dsp_ioctl(struct inode *inode, struct file *file, unsigned int cmd, unsigned long arg) { @@ -382,7 +418,7 @@ static int dsp_ioctl(struct inode *inode, struct file *file, int val = 0; if (debug > 1) - saa7134_print_ioctl(dev->name,cmd); + saa7134_oss_print_ioctl(dev->name,cmd); switch (cmd) { case OSS_GETVERSION: return put_user(SOUND_VERSION, p); @@ -678,7 +714,7 @@ static int mixer_ioctl(struct inode *inode, struct file *file, int __user *p = argp; if (debug > 1) - saa7134_print_ioctl(dev->name,cmd); + saa7134_oss_print_ioctl(dev->name,cmd); switch (cmd) { case OSS_GETVERSION: return put_user(SOUND_VERSION, p); diff --git a/drivers/media/video/saa7134/saa7134-video.c b/drivers/media/video/saa7134/saa7134-video.c index 9b9e1e7f05ef..adfa8fe49a11 100644 --- a/drivers/media/video/saa7134/saa7134-video.c +++ b/drivers/media/video/saa7134/saa7134-video.c @@ -29,6 +29,7 @@ #include "saa7134-reg.h" #include "saa7134.h" +#include /* Include V4L1 specific functions. Should be removed soon */ #include @@ -1689,7 +1690,7 @@ static int video_do_ioctl(struct inode *inode, struct file *file, int err; if (video_debug > 1) - saa7134_print_ioctl(dev->name,cmd); + v4l_print_ioctl(dev->name,cmd); switch (cmd) { case VIDIOC_S_CTRL: @@ -2142,7 +2143,7 @@ static int radio_do_ioctl(struct inode *inode, struct file *file, struct saa7134_dev *dev = fh->dev; if (video_debug > 1) - saa7134_print_ioctl(dev->name,cmd); + v4l_print_ioctl(dev->name,cmd); switch (cmd) { case VIDIOC_QUERYCAP: { diff --git a/drivers/media/video/saa7134/saa7134.h b/drivers/media/video/saa7134/saa7134.h index 2f28e83102fd..18978a484ddb 100644 --- a/drivers/media/video/saa7134/saa7134.h +++ b/drivers/media/video/saa7134/saa7134.h @@ -546,7 +546,6 @@ struct saa7134_dev { extern struct list_head saa7134_devlist; -void saa7134_print_ioctl(char *name, unsigned int cmd); void saa7134_track_gpio(struct saa7134_dev *dev, char *msg); #define SAA7134_PGTABLE_SIZE 4096 diff --git a/drivers/media/video/tda9887.c b/drivers/media/video/tda9887.c index 9ae43a8cea52..f64baa4b0025 100644 --- a/drivers/media/video/tda9887.c +++ b/drivers/media/video/tda9887.c @@ -9,7 +9,7 @@ #include #include -#include +#include #include diff --git a/drivers/media/video/tuner-core.c b/drivers/media/video/tuner-core.c index df994311251e..fd18a882668e 100644 --- a/drivers/media/video/tuner-core.c +++ b/drivers/media/video/tuner-core.c @@ -20,6 +20,7 @@ #include #include +#include #include #include "msp3400.h" @@ -545,6 +546,9 @@ static int tuner_command(struct i2c_client *client, unsigned int cmd, void *arg) { struct tuner *t = i2c_get_clientdata(client); + if (tuner_debug>1) + v4l_i2c_print_ioctl(&(t->i2c),cmd); + switch (cmd) { /* --- configuration --- */ case TUNER_SET_TYPE_ADDR: @@ -575,9 +579,6 @@ static int tuner_command(struct i2c_client *client, unsigned int cmd, void *arg) /* Should be implemented, since bttv calls it */ tuner_dbg("VIDIOCSAUDIO not implemented.\n"); - break; - case MSP_SET_MATRIX: - case TDA9887_SET_CONFIG: break; /* --- v4l ioctls --- */ /* take care: bttv does userspace copying, we'll get a @@ -764,11 +765,6 @@ static int tuner_command(struct i2c_client *client, unsigned int cmd, void *arg) case VIDIOC_LOG_STATUS: tuner_status(client); break; - default: - tuner_dbg("Unimplemented IOCTL 0x%08x(dir=%d,tp='%c',nr=%d,sz=%d)\n", - cmd, _IOC_DIR(cmd), _IOC_TYPE(cmd), - _IOC_NR(cmd), _IOC_SIZE(cmd)); - break; } return 0; diff --git a/drivers/media/video/tvaudio.c b/drivers/media/video/tvaudio.c index ed6a843dd34a..fec620073aa3 100644 --- a/drivers/media/video/tvaudio.c +++ b/drivers/media/video/tvaudio.c @@ -31,6 +31,7 @@ #include #include +#include #include "tvaudio.h" diff --git a/drivers/media/video/v4l2-common.c b/drivers/media/video/v4l2-common.c index 62a7d636ef11..5dbd7c1b362a 100644 --- a/drivers/media/video/v4l2-common.c +++ b/drivers/media/video/v4l2-common.c @@ -58,6 +58,8 @@ #include #include #include +#include +#include #ifdef CONFIG_KMOD #include @@ -190,55 +192,174 @@ char *v4l2_type_names[] = { [V4L2_BUF_TYPE_VBI_OUTPUT] = "vbi-out", }; -char *v4l2_ioctl_names[256] = { - [0 ... 255] = "UNKNOWN", - [_IOC_NR(VIDIOC_QUERYCAP)] = "VIDIOC_QUERYCAP", - [_IOC_NR(VIDIOC_RESERVED)] = "VIDIOC_RESERVED", - [_IOC_NR(VIDIOC_ENUM_FMT)] = "VIDIOC_ENUM_FMT", - [_IOC_NR(VIDIOC_G_FMT)] = "VIDIOC_G_FMT", - [_IOC_NR(VIDIOC_S_FMT)] = "VIDIOC_S_FMT", - [_IOC_NR(VIDIOC_REQBUFS)] = "VIDIOC_REQBUFS", - [_IOC_NR(VIDIOC_QUERYBUF)] = "VIDIOC_QUERYBUF", - [_IOC_NR(VIDIOC_G_FBUF)] = "VIDIOC_G_FBUF", - [_IOC_NR(VIDIOC_S_FBUF)] = "VIDIOC_S_FBUF", - [_IOC_NR(VIDIOC_OVERLAY)] = "VIDIOC_OVERLAY", - [_IOC_NR(VIDIOC_QBUF)] = "VIDIOC_QBUF", - [_IOC_NR(VIDIOC_DQBUF)] = "VIDIOC_DQBUF", - [_IOC_NR(VIDIOC_STREAMON)] = "VIDIOC_STREAMON", - [_IOC_NR(VIDIOC_STREAMOFF)] = "VIDIOC_STREAMOFF", - [_IOC_NR(VIDIOC_G_PARM)] = "VIDIOC_G_PARM", - [_IOC_NR(VIDIOC_S_PARM)] = "VIDIOC_S_PARM", - [_IOC_NR(VIDIOC_G_STD)] = "VIDIOC_G_STD", - [_IOC_NR(VIDIOC_S_STD)] = "VIDIOC_S_STD", - [_IOC_NR(VIDIOC_ENUMSTD)] = "VIDIOC_ENUMSTD", - [_IOC_NR(VIDIOC_ENUMINPUT)] = "VIDIOC_ENUMINPUT", - [_IOC_NR(VIDIOC_G_CTRL)] = "VIDIOC_G_CTRL", - [_IOC_NR(VIDIOC_S_CTRL)] = "VIDIOC_S_CTRL", - [_IOC_NR(VIDIOC_G_TUNER)] = "VIDIOC_G_TUNER", - [_IOC_NR(VIDIOC_S_TUNER)] = "VIDIOC_S_TUNER", - [_IOC_NR(VIDIOC_G_AUDIO)] = "VIDIOC_G_AUDIO", - [_IOC_NR(VIDIOC_S_AUDIO)] = "VIDIOC_S_AUDIO", - [_IOC_NR(VIDIOC_QUERYCTRL)] = "VIDIOC_QUERYCTRL", - [_IOC_NR(VIDIOC_QUERYMENU)] = "VIDIOC_QUERYMENU", - [_IOC_NR(VIDIOC_G_INPUT)] = "VIDIOC_G_INPUT", - [_IOC_NR(VIDIOC_S_INPUT)] = "VIDIOC_S_INPUT", - [_IOC_NR(VIDIOC_G_OUTPUT)] = "VIDIOC_G_OUTPUT", - [_IOC_NR(VIDIOC_S_OUTPUT)] = "VIDIOC_S_OUTPUT", - [_IOC_NR(VIDIOC_ENUMOUTPUT)] = "VIDIOC_ENUMOUTPUT", - [_IOC_NR(VIDIOC_G_AUDOUT)] = "VIDIOC_G_AUDOUT", - [_IOC_NR(VIDIOC_S_AUDOUT)] = "VIDIOC_S_AUDOUT", - [_IOC_NR(VIDIOC_G_MODULATOR)] = "VIDIOC_G_MODULATOR", - [_IOC_NR(VIDIOC_S_MODULATOR)] = "VIDIOC_S_MODULATOR", - [_IOC_NR(VIDIOC_G_FREQUENCY)] = "VIDIOC_G_FREQUENCY", - [_IOC_NR(VIDIOC_S_FREQUENCY)] = "VIDIOC_S_FREQUENCY", - [_IOC_NR(VIDIOC_CROPCAP)] = "VIDIOC_CROPCAP", - [_IOC_NR(VIDIOC_G_CROP)] = "VIDIOC_G_CROP", - [_IOC_NR(VIDIOC_S_CROP)] = "VIDIOC_S_CROP", - [_IOC_NR(VIDIOC_G_JPEGCOMP)] = "VIDIOC_G_JPEGCOMP", - [_IOC_NR(VIDIOC_S_JPEGCOMP)] = "VIDIOC_S_JPEGCOMP", - [_IOC_NR(VIDIOC_QUERYSTD)] = "VIDIOC_QUERYSTD", - [_IOC_NR(VIDIOC_TRY_FMT)] = "VIDIOC_TRY_FMT", +/* ------------------------------------------------------------------ */ +/* debug help functions */ + +#ifdef HAVE_V4L1 +static const char *v4l1_ioctls[] = { + [_IOC_NR(VIDIOCGCAP)] = "VIDIOCGCAP", + [_IOC_NR(VIDIOCGCHAN)] = "VIDIOCGCHAN", + [_IOC_NR(VIDIOCSCHAN)] = "VIDIOCSCHAN", + [_IOC_NR(VIDIOCGTUNER)] = "VIDIOCGTUNER", + [_IOC_NR(VIDIOCSTUNER)] = "VIDIOCSTUNER", + [_IOC_NR(VIDIOCGPICT)] = "VIDIOCGPICT", + [_IOC_NR(VIDIOCSPICT)] = "VIDIOCSPICT", + [_IOC_NR(VIDIOCCAPTURE)] = "VIDIOCCAPTURE", + [_IOC_NR(VIDIOCGWIN)] = "VIDIOCGWIN", + [_IOC_NR(VIDIOCSWIN)] = "VIDIOCSWIN", + [_IOC_NR(VIDIOCGFBUF)] = "VIDIOCGFBUF", + [_IOC_NR(VIDIOCSFBUF)] = "VIDIOCSFBUF", + [_IOC_NR(VIDIOCKEY)] = "VIDIOCKEY", + [_IOC_NR(VIDIOCGFREQ)] = "VIDIOCGFREQ", + [_IOC_NR(VIDIOCSFREQ)] = "VIDIOCSFREQ", + [_IOC_NR(VIDIOCGAUDIO)] = "VIDIOCGAUDIO", + [_IOC_NR(VIDIOCSAUDIO)] = "VIDIOCSAUDIO", + [_IOC_NR(VIDIOCSYNC)] = "VIDIOCSYNC", + [_IOC_NR(VIDIOCMCAPTURE)] = "VIDIOCMCAPTURE", + [_IOC_NR(VIDIOCGMBUF)] = "VIDIOCGMBUF", + [_IOC_NR(VIDIOCGUNIT)] = "VIDIOCGUNIT", + [_IOC_NR(VIDIOCGCAPTURE)] = "VIDIOCGCAPTURE", + [_IOC_NR(VIDIOCSCAPTURE)] = "VIDIOCSCAPTURE", + [_IOC_NR(VIDIOCSPLAYMODE)] = "VIDIOCSPLAYMODE", + [_IOC_NR(VIDIOCSWRITEMODE)] = "VIDIOCSWRITEMODE", + [_IOC_NR(VIDIOCGPLAYINFO)] = "VIDIOCGPLAYINFO", + [_IOC_NR(VIDIOCSMICROCODE)] = "VIDIOCSMICROCODE", + [_IOC_NR(VIDIOCGVBIFMT)] = "VIDIOCGVBIFMT", + [_IOC_NR(VIDIOCSVBIFMT)] = "VIDIOCSVBIFMT" }; +#define V4L1_IOCTLS ARRAY_SIZE(v4l1_ioctls) +#endif + +static const char *v4l2_ioctls[] = { + [_IOC_NR(VIDIOC_QUERYCAP)] = "VIDIOC_QUERYCAP", + [_IOC_NR(VIDIOC_RESERVED)] = "VIDIOC_RESERVED", + [_IOC_NR(VIDIOC_ENUM_FMT)] = "VIDIOC_ENUM_FMT", + [_IOC_NR(VIDIOC_G_FMT)] = "VIDIOC_G_FMT", + [_IOC_NR(VIDIOC_S_FMT)] = "VIDIOC_S_FMT", + [_IOC_NR(VIDIOC_G_MPEGCOMP)] = "VIDIOC_G_MPEGCOMP", + [_IOC_NR(VIDIOC_S_MPEGCOMP)] = "VIDIOC_S_MPEGCOMP", + [_IOC_NR(VIDIOC_REQBUFS)] = "VIDIOC_REQBUFS", + [_IOC_NR(VIDIOC_QUERYBUF)] = "VIDIOC_QUERYBUF", + [_IOC_NR(VIDIOC_G_FBUF)] = "VIDIOC_G_FBUF", + [_IOC_NR(VIDIOC_S_FBUF)] = "VIDIOC_S_FBUF", + [_IOC_NR(VIDIOC_OVERLAY)] = "VIDIOC_OVERLAY", + [_IOC_NR(VIDIOC_QBUF)] = "VIDIOC_QBUF", + [_IOC_NR(VIDIOC_DQBUF)] = "VIDIOC_DQBUF", + [_IOC_NR(VIDIOC_STREAMON)] = "VIDIOC_STREAMON", + [_IOC_NR(VIDIOC_STREAMOFF)] = "VIDIOC_STREAMOFF", + [_IOC_NR(VIDIOC_G_PARM)] = "VIDIOC_G_PARM", + [_IOC_NR(VIDIOC_S_PARM)] = "VIDIOC_S_PARM", + [_IOC_NR(VIDIOC_G_STD)] = "VIDIOC_G_STD", + [_IOC_NR(VIDIOC_S_STD)] = "VIDIOC_S_STD", + [_IOC_NR(VIDIOC_ENUMSTD)] = "VIDIOC_ENUMSTD", + [_IOC_NR(VIDIOC_ENUMINPUT)] = "VIDIOC_ENUMINPUT", + [_IOC_NR(VIDIOC_G_CTRL)] = "VIDIOC_G_CTRL", + [_IOC_NR(VIDIOC_S_CTRL)] = "VIDIOC_S_CTRL", + [_IOC_NR(VIDIOC_G_TUNER)] = "VIDIOC_G_TUNER", + [_IOC_NR(VIDIOC_S_TUNER)] = "VIDIOC_S_TUNER", + [_IOC_NR(VIDIOC_G_AUDIO)] = "VIDIOC_G_AUDIO", + [_IOC_NR(VIDIOC_S_AUDIO)] = "VIDIOC_S_AUDIO", + [_IOC_NR(VIDIOC_QUERYCTRL)] = "VIDIOC_QUERYCTRL", + [_IOC_NR(VIDIOC_QUERYMENU)] = "VIDIOC_QUERYMENU", + [_IOC_NR(VIDIOC_G_INPUT)] = "VIDIOC_G_INPUT", + [_IOC_NR(VIDIOC_S_INPUT)] = "VIDIOC_S_INPUT", + [_IOC_NR(VIDIOC_G_OUTPUT)] = "VIDIOC_G_OUTPUT", + [_IOC_NR(VIDIOC_S_OUTPUT)] = "VIDIOC_S_OUTPUT", + [_IOC_NR(VIDIOC_ENUMOUTPUT)] = "VIDIOC_ENUMOUTPUT", + [_IOC_NR(VIDIOC_G_AUDOUT)] = "VIDIOC_G_AUDOUT", + [_IOC_NR(VIDIOC_S_AUDOUT)] = "VIDIOC_S_AUDOUT", + [_IOC_NR(VIDIOC_G_MODULATOR)] = "VIDIOC_G_MODULATOR", + [_IOC_NR(VIDIOC_S_MODULATOR)] = "VIDIOC_S_MODULATOR", + [_IOC_NR(VIDIOC_G_FREQUENCY)] = "VIDIOC_G_FREQUENCY", + [_IOC_NR(VIDIOC_S_FREQUENCY)] = "VIDIOC_S_FREQUENCY", + [_IOC_NR(VIDIOC_CROPCAP)] = "VIDIOC_CROPCAP", + [_IOC_NR(VIDIOC_G_CROP)] = "VIDIOC_G_CROP", + [_IOC_NR(VIDIOC_S_CROP)] = "VIDIOC_S_CROP", + [_IOC_NR(VIDIOC_G_JPEGCOMP)] = "VIDIOC_G_JPEGCOMP", + [_IOC_NR(VIDIOC_S_JPEGCOMP)] = "VIDIOC_S_JPEGCOMP", + [_IOC_NR(VIDIOC_QUERYSTD)] = "VIDIOC_QUERYSTD", + [_IOC_NR(VIDIOC_TRY_FMT)] = "VIDIOC_TRY_FMT", + [_IOC_NR(VIDIOC_ENUMAUDIO)] = "VIDIOC_ENUMAUDIO", + [_IOC_NR(VIDIOC_ENUMAUDOUT)] = "VIDIOC_ENUMAUDOUT", + [_IOC_NR(VIDIOC_G_PRIORITY)] = "VIDIOC_G_PRIORITY", + [_IOC_NR(VIDIOC_S_PRIORITY)] = "VIDIOC_S_PRIORITY", +#if 1 + [_IOC_NR(VIDIOC_G_SLICED_VBI_CAP)] = "VIDIOC_G_SLICED_VBI_CAP", +#endif + [_IOC_NR(VIDIOC_LOG_STATUS)] = "VIDIOC_LOG_STATUS" +}; +#define V4L2_IOCTLS ARRAY_SIZE(v4l2_ioctls) + +static const char *v4l2_int_ioctls[] = { +#ifdef HAVE_VIDEO_DECODER + [_IOC_NR(DECODER_GET_CAPABILITIES)] = "DECODER_GET_CAPABILITIES", + [_IOC_NR(DECODER_GET_STATUS)] = "DECODER_GET_STATUS", + [_IOC_NR(DECODER_SET_NORM)] = "DECODER_SET_NORM", + [_IOC_NR(DECODER_SET_INPUT)] = "DECODER_SET_INPUT", + [_IOC_NR(DECODER_SET_OUTPUT)] = "DECODER_SET_OUTPUT", + [_IOC_NR(DECODER_ENABLE_OUTPUT)] = "DECODER_ENABLE_OUTPUT", + [_IOC_NR(DECODER_SET_PICTURE)] = "DECODER_SET_PICTURE", + [_IOC_NR(DECODER_SET_GPIO)] = "DECODER_SET_GPIO", + [_IOC_NR(DECODER_INIT)] = "DECODER_INIT", + [_IOC_NR(DECODER_SET_VBI_BYPASS)] = "DECODER_SET_VBI_BYPASS", + [_IOC_NR(DECODER_DUMP)] = "DECODER_DUMP", +#endif + [_IOC_NR(AUDC_SET_RADIO)] = "AUDC_SET_RADIO", + [_IOC_NR(AUDC_SET_INPUT)] = "AUDC_SET_INPUT", + + [_IOC_NR(TUNER_SET_TYPE_ADDR)] = "TUNER_SET_TYPE_ADDR", + [_IOC_NR(TUNER_SET_STANDBY)] = "TUNER_SET_STANDBY", + [_IOC_NR(TDA9887_SET_CONFIG)] = "TDA9887_SET_CONFIG", + + [_IOC_NR(VIDIOC_INT_S_REGISTER)] = "VIDIOC_INT_S_REGISTER", + [_IOC_NR(VIDIOC_INT_G_REGISTER)] = "VIDIOC_INT_G_REGISTER", + [_IOC_NR(VIDIOC_INT_RESET)] = "VIDIOC_INT_RESET", + [_IOC_NR(VIDIOC_INT_AUDIO_CLOCK_FREQ)] = "VIDIOC_INT_AUDIO_CLOCK_FREQ", + [_IOC_NR(VIDIOC_INT_DECODE_VBI_LINE)] = "VIDIOC_INT_DECODE_VBI_LINE", + [_IOC_NR(VIDIOC_INT_S_VBI_DATA)] = "VIDIOC_INT_S_VBI_DATA", + [_IOC_NR(VIDIOC_INT_G_VBI_DATA)] = "VIDIOC_INT_G_VBI_DATA", + [_IOC_NR(VIDIOC_INT_G_CHIP_IDENT)] = "VIDIOC_INT_G_CHIP_IDENT", + [_IOC_NR(VIDIOC_INT_I2S_CLOCK_FREQ)] = "VIDIOC_INT_I2S_CLOCK_FREQ" +}; +#define V4L2_INT_IOCTLS ARRAY_SIZE(v4l2_int_ioctls) + +/* Common ioctl debug function. This function can be used by + external ioctl messages as well as internal V4L ioctl */ +void v4l_printk_ioctl(unsigned int cmd) +{ + char *dir; + + switch (_IOC_DIR(cmd)) { + case _IOC_NONE: dir = "--"; break; + case _IOC_READ: dir = "r-"; break; + case _IOC_WRITE: dir = "-w"; break; + case _IOC_READ | _IOC_WRITE: dir = "rw"; break; + default: dir = "*ERR*"; break; + } + switch (_IOC_TYPE(cmd)) { + case 'd': + printk("v4l2_int ioctl %s, dir=%s (0x%08x)\n", + (_IOC_NR(cmd) < V4L2_INT_IOCTLS) ? + v4l2_int_ioctls[_IOC_NR(cmd)] : "UNKNOWN", dir, cmd); + break; +#ifdef HAVE_V4L1 + case 'v': + printk("v4l1 ioctl %s, dir=%s (0x%08x)\n", + (_IOC_NR(cmd) < V4L1_IOCTLS) ? + v4l1_ioctls[_IOC_NR(cmd)] : "UNKNOWN", dir, cmd); + break; +#endif + case 'V': + printk("v4l2 ioctl %s, dir=%s (0x%08x)\n", + (_IOC_NR(cmd) < V4L2_IOCTLS) ? + v4l2_ioctls[_IOC_NR(cmd)] : "UNKNOWN", dir, cmd); + break; + + default: + printk("unknown ioctl '%c', dir=%s, #%d (0x%08x)\n", + _IOC_TYPE(cmd), dir, _IOC_NR(cmd), cmd); + } +} /* ----------------------------------------------------------------- */ @@ -253,7 +374,7 @@ EXPORT_SYMBOL(v4l2_prio_check); EXPORT_SYMBOL(v4l2_field_names); EXPORT_SYMBOL(v4l2_type_names); -EXPORT_SYMBOL(v4l2_ioctl_names); +EXPORT_SYMBOL(v4l_printk_ioctl); /* * Local variables: diff --git a/include/linux/video_decoder.h b/include/linux/video_decoder.h index 0e9e48b83e3b..121e26da2c18 100644 --- a/include/linux/video_decoder.h +++ b/include/linux/video_decoder.h @@ -1,6 +1,8 @@ #ifndef _LINUX_VIDEO_DECODER_H #define _LINUX_VIDEO_DECODER_H +#define HAVE_VIDEO_DECODER 1 + struct video_decoder_capability { /* this name is too long */ __u32 flags; #define VIDEO_DECODER_PAL 1 /* can decode PAL signal */ diff --git a/include/linux/videodev2.h b/include/linux/videodev2.h index 6ac7c1f7902f..ce40675324bd 100644 --- a/include/linux/videodev2.h +++ b/include/linux/videodev2.h @@ -1116,7 +1116,6 @@ int v4l2_prio_check(struct v4l2_prio_state *global, enum v4l2_priority *local); /* names for fancy debug output */ extern char *v4l2_field_names[]; extern char *v4l2_type_names[]; -extern char *v4l2_ioctl_names[]; /* Compatibility layer interface -- v4l1-compat module */ typedef int (*v4l2_kioctl)(struct inode *inode, struct file *file, diff --git a/include/media/audiochip.h b/include/media/audiochip.h index 411f09fc4574..295d256ee811 100644 --- a/include/media/audiochip.h +++ b/include/media/audiochip.h @@ -23,11 +23,6 @@ enum audiochip { /* ---------------------------------------------------------------------- */ -/* v4l device was opened in Radio mode */ -#define AUDC_SET_RADIO _IO('m',2) -/* select from TV,radio,extern,MUTE */ -#define AUDC_SET_INPUT _IOW('m',17,int) - /* audio inputs */ #define AUDIO_TUNER 0x00 #define AUDIO_RADIO 0x01 diff --git a/include/media/tuner.h b/include/media/tuner.h index c5f034ecb002..6a8ef189e5fa 100644 --- a/include/media/tuner.h +++ b/include/media/tuner.h @@ -115,10 +115,6 @@ #define TUNER_PHILIPS_TUV1236D 68 /* ATI HDTV Wonder */ #define TUNER_TNF_5335MF 69 /* Sabrent Bt848 */ -#define TUNER_SET_TYPE_ADDR _IOW('T',3,int) -#define TUNER_SET_STANDBY _IOW('T',4,int) -#define TDA9887_SET_CONFIG _IOW('t',5,int) - /* tv card specific */ #define TDA9887_PRESENT (1<<0) #define TDA9887_PORT1_INACTIVE (1<<1) diff --git a/include/media/v4l2-common.h b/include/media/v4l2-common.h index 90248d29ed0a..9ee616261d66 100644 --- a/include/media/v4l2-common.h +++ b/include/media/v4l2-common.h @@ -63,6 +63,20 @@ enum v4l2_chip_ident { V4L2_IDENT_CX25843 = 243, }; +/* audio ioctls */ +/* v4l device was opened in Radio mode */ +#define AUDC_SET_RADIO _IO('d',88) +/* select from TV,radio,extern,MUTE */ +#define AUDC_SET_INPUT _IOW('d',89,int) + +/* tuner ioctls */ +/* Sets tuner type and its I2C addr */ +#define TUNER_SET_TYPE_ADDR _IOW('d',90,int) +/* Puts tuner on powersaving state, disabling it, except for i2c */ +#define TUNER_SET_STANDBY _IOW('d',91,int) +/* Sets tda9887 specific stuff, like port1, port2 and qss */ +#define TDA9887_SET_CONFIG _IOW('d',92,int) + /* only implemented if CONFIG_VIDEO_ADV_DEBUG is defined */ #define VIDIOC_INT_S_REGISTER _IOR ('d', 100, struct v4l2_register) #define VIDIOC_INT_G_REGISTER _IOWR('d', 101, struct v4l2_register) @@ -108,5 +122,16 @@ enum v4l2_chip_ident { If the frequency is not supported, then -EINVAL is returned. */ #define VIDIOC_INT_I2S_CLOCK_FREQ _IOW ('d', 108, u32) +/* Prints used ioctl */ +extern void v4l_printk_ioctl(unsigned int cmd); + +#define v4l_print_ioctl(name,cmd) do {\ + printk(KERN_DEBUG "%s: ", name); \ + v4l_printk_ioctl(cmd); } while (0) + +#define v4l_i2c_print_ioctl(client,cmd) do {\ + printk(KERN_DEBUG "%s %d-%04x: ", (client)->driver->name, \ + i2c_adapter_id((client)->adapter),(client)->addr); \ + v4l_printk_ioctl(cmd); } while (0) #endif /* V4L2_COMMON_H_ */ -- cgit v1.2.3-71-gd317 From 677517771b7b6efaf8617e70f655b16f3cafcc9b Mon Sep 17 00:00:00 2001 From: Oleg Nesterov Date: Sun, 8 Jan 2006 22:19:16 +0300 Subject: [PATCH] rcu: uninline __rcu_pending() __rcu_pending() is rather fat and called twice from rcu_pending(). rcu_pending() has multiple callers, and not that small too. This patch uninlines both of them. Signed-off-by: Oleg Nesterov Acked-by: Paul E. McKenney Signed-off-by: Linus Torvalds --- include/linux/rcupdate.h | 31 +------------------------------ kernel/rcupdate.c | 30 ++++++++++++++++++++++++++++++ 2 files changed, 31 insertions(+), 30 deletions(-) (limited to 'include/linux') diff --git a/include/linux/rcupdate.h b/include/linux/rcupdate.h index 51747cd88d1a..a1d26cb28925 100644 --- a/include/linux/rcupdate.h +++ b/include/linux/rcupdate.h @@ -125,36 +125,7 @@ static inline void rcu_bh_qsctr_inc(int cpu) rdp->passed_quiesc = 1; } -static inline int __rcu_pending(struct rcu_ctrlblk *rcp, - struct rcu_data *rdp) -{ - /* This cpu has pending rcu entries and the grace period - * for them has completed. - */ - if (rdp->curlist && !rcu_batch_before(rcp->completed, rdp->batch)) - return 1; - - /* This cpu has no pending entries, but there are new entries */ - if (!rdp->curlist && rdp->nxtlist) - return 1; - - /* This cpu has finished callbacks to invoke */ - if (rdp->donelist) - return 1; - - /* The rcu core waits for a quiescent state from the cpu */ - if (rdp->quiescbatch != rcp->cur || rdp->qs_pending) - return 1; - - /* nothing to do */ - return 0; -} - -static inline int rcu_pending(int cpu) -{ - return __rcu_pending(&rcu_ctrlblk, &per_cpu(rcu_data, cpu)) || - __rcu_pending(&rcu_bh_ctrlblk, &per_cpu(rcu_bh_data, cpu)); -} +extern int rcu_pending(int cpu); /** * rcu_read_lock - mark the beginning of an RCU read-side critical section. diff --git a/kernel/rcupdate.c b/kernel/rcupdate.c index 30b0bba03859..ccc45d49ce71 100644 --- a/kernel/rcupdate.c +++ b/kernel/rcupdate.c @@ -429,6 +429,36 @@ static void rcu_process_callbacks(unsigned long unused) &__get_cpu_var(rcu_bh_data)); } +static int __rcu_pending(struct rcu_ctrlblk *rcp, struct rcu_data *rdp) +{ + /* This cpu has pending rcu entries and the grace period + * for them has completed. + */ + if (rdp->curlist && !rcu_batch_before(rcp->completed, rdp->batch)) + return 1; + + /* This cpu has no pending entries, but there are new entries */ + if (!rdp->curlist && rdp->nxtlist) + return 1; + + /* This cpu has finished callbacks to invoke */ + if (rdp->donelist) + return 1; + + /* The rcu core waits for a quiescent state from the cpu */ + if (rdp->quiescbatch != rcp->cur || rdp->qs_pending) + return 1; + + /* nothing to do */ + return 0; +} + +int rcu_pending(int cpu) +{ + return __rcu_pending(&rcu_ctrlblk, &per_cpu(rcu_data, cpu)) || + __rcu_pending(&rcu_bh_ctrlblk, &per_cpu(rcu_bh_data, cpu)); +} + void rcu_check_callbacks(int cpu, int user) { if (user || -- cgit v1.2.3-71-gd317 From ffeff788d6b10e342b4a887f28d339dfec1737f6 Mon Sep 17 00:00:00 2001 From: Kristen Accardi Date: Wed, 2 Nov 2005 16:24:32 -0800 Subject: [PATCH] pci: store PCI_INTERRUPT_PIN in pci_dev Store the value of the INTERRUPT_PIN in the pci_dev structure so that it can be retrieved later. Signed-off-by: Kristen Carlson Accardi Signed-off-by: Greg Kroah-Hartman --- drivers/pci/probe.c | 1 + include/linux/pci.h | 1 + 2 files changed, 2 insertions(+) (limited to 'include/linux') diff --git a/drivers/pci/probe.c b/drivers/pci/probe.c index fce2cb2112d8..2f82e63323f3 100644 --- a/drivers/pci/probe.c +++ b/drivers/pci/probe.c @@ -571,6 +571,7 @@ static void pci_read_irq(struct pci_dev *dev) unsigned char irq; pci_read_config_byte(dev, PCI_INTERRUPT_PIN, &irq); + dev->pin = irq; if (irq) pci_read_config_byte(dev, PCI_INTERRUPT_LINE, &irq); dev->irq = irq; diff --git a/include/linux/pci.h b/include/linux/pci.h index de690ca73d58..b32f70fe2dbd 100644 --- a/include/linux/pci.h +++ b/include/linux/pci.h @@ -98,6 +98,7 @@ struct pci_dev { unsigned int class; /* 3 bytes: (base,sub,prog-if) */ u8 hdr_type; /* PCI header type (`multi' flag masked out) */ u8 rom_base_reg; /* which config register controls the ROM */ + u8 pin; /* which interrupt pin this device uses */ struct pci_driver *driver; /* which driver has allocated this device */ u64 dma_mask; /* Mask of the bits of bus address this -- cgit v1.2.3-71-gd317 From ac7dc65ac0b945270548414491efa9c4357417d9 Mon Sep 17 00:00:00 2001 From: Benjamin Herrenschmidt Date: Tue, 13 Dec 2005 18:09:16 +1100 Subject: [PATCH] PCI: Export pci_cfg_space_size The powerpc PCI code sets up the PCI tree without doing config space accesses in most cases, from the firmware tree. However, it still wants to call pci_cfg_space_size() under some conditions, thus it needs to be made non-static (though I don't see a point to export it to modules). Signed-off-by: Benjamin Herrenschmidt Signed-off-by: Greg Kroah-Hartman --- drivers/pci/probe.c | 2 +- include/linux/pci.h | 1 + 2 files changed, 2 insertions(+), 1 deletion(-) (limited to 'include/linux') diff --git a/drivers/pci/probe.c b/drivers/pci/probe.c index 3c9834d80850..adfad4fd6a13 100644 --- a/drivers/pci/probe.c +++ b/drivers/pci/probe.c @@ -717,7 +717,7 @@ static void pci_release_dev(struct device *dev) * reading the dword at 0x100 which must either be 0 or a valid extended * capability header. */ -static int pci_cfg_space_size(struct pci_dev *dev) +int pci_cfg_space_size(struct pci_dev *dev) { int pos; u32 status; diff --git a/include/linux/pci.h b/include/linux/pci.h index b32f70fe2dbd..d0e003926744 100644 --- a/include/linux/pci.h +++ b/include/linux/pci.h @@ -449,6 +449,7 @@ int pci_scan_bridge(struct pci_bus *bus, struct pci_dev * dev, int max, int pass void pci_walk_bus(struct pci_bus *top, void (*cb)(struct pci_dev *, void *), void *userdata); +int pci_cfg_space_size(struct pci_dev *dev); /* kmem_cache style wrapper around pci_alloc_consistent() */ -- cgit v1.2.3-71-gd317 From 392a1ce761bc3b3a5d642ee341c1ff082cbb71f0 Mon Sep 17 00:00:00 2001 From: linas Date: Wed, 16 Nov 2005 17:10:41 -0600 Subject: [PATCH] PCI Error Recovery: header file patch Various PCI bus errors can be signaled by newer PCI controllers. Recovering from those errors requires an infrastructure to notify affected device drivers of the error, and a way of walking through a reset sequence. This patch adds a set of callbacks to be used by error recovery routines to notify device drivers of the various stages of recovery. Signed-off-by: Linas Vepstas Signed-off-by: Greg Kroah-Hartman --- include/linux/pci.h | 67 +++++++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 67 insertions(+) (limited to 'include/linux') diff --git a/include/linux/pci.h b/include/linux/pci.h index d0e003926744..0a44072383ec 100644 --- a/include/linux/pci.h +++ b/include/linux/pci.h @@ -78,6 +78,23 @@ typedef int __bitwise pci_power_t; #define PCI_UNKNOWN ((pci_power_t __force) 5) #define PCI_POWER_ERROR ((pci_power_t __force) -1) +/** The pci_channel state describes connectivity between the CPU and + * the pci device. If some PCI bus between here and the pci device + * has crashed or locked up, this info is reflected here. + */ +typedef unsigned int __bitwise pci_channel_state_t; + +enum pci_channel_state { + /* I/O channel is in normal state */ + pci_channel_io_normal = (__force pci_channel_state_t) 1, + + /* I/O to channel is blocked */ + pci_channel_io_frozen = (__force pci_channel_state_t) 2, + + /* PCI card is dead */ + pci_channel_io_perm_failure = (__force pci_channel_state_t) 3, +}; + /* * The pci_dev structure is used to describe PCI devices. */ @@ -111,6 +128,7 @@ struct pci_dev { this is D0-D3, D0 being fully functional, and D3 being off. */ + pci_channel_state_t error_state; /* current connectivity state */ struct device dev; /* Generic device interface */ /* device is compatible with these IDs */ @@ -233,6 +251,54 @@ struct pci_dynids { unsigned int use_driver_data:1; /* pci_driver->driver_data is used */ }; +/* ---------------------------------------------------------------- */ +/** PCI Error Recovery System (PCI-ERS). If a PCI device driver provides + * a set fof callbacks in struct pci_error_handlers, then that device driver + * will be notified of PCI bus errors, and will be driven to recovery + * when an error occurs. + */ + +typedef unsigned int __bitwise pci_ers_result_t; + +enum pci_ers_result { + /* no result/none/not supported in device driver */ + PCI_ERS_RESULT_NONE = (__force pci_ers_result_t) 1, + + /* Device driver can recover without slot reset */ + PCI_ERS_RESULT_CAN_RECOVER = (__force pci_ers_result_t) 2, + + /* Device driver wants slot to be reset. */ + PCI_ERS_RESULT_NEED_RESET = (__force pci_ers_result_t) 3, + + /* Device has completely failed, is unrecoverable */ + PCI_ERS_RESULT_DISCONNECT = (__force pci_ers_result_t) 4, + + /* Device driver is fully recovered and operational */ + PCI_ERS_RESULT_RECOVERED = (__force pci_ers_result_t) 5, +}; + +/* PCI bus error event callbacks */ +struct pci_error_handlers +{ + /* PCI bus error detected on this device */ + pci_ers_result_t (*error_detected)(struct pci_dev *dev, + enum pci_channel_state error); + + /* MMIO has been re-enabled, but not DMA */ + pci_ers_result_t (*mmio_enabled)(struct pci_dev *dev); + + /* PCI Express link has been reset */ + pci_ers_result_t (*link_reset)(struct pci_dev *dev); + + /* PCI slot has been reset */ + pci_ers_result_t (*slot_reset)(struct pci_dev *dev); + + /* Device driver may resume normal operations */ + void (*resume)(struct pci_dev *dev); +}; + +/* ---------------------------------------------------------------- */ + struct module; struct pci_driver { struct list_head node; @@ -245,6 +311,7 @@ struct pci_driver { int (*enable_wake) (struct pci_dev *dev, pci_power_t state, int enable); /* Enable wake event */ void (*shutdown) (struct pci_dev *dev); + struct pci_error_handlers *err_handler; struct device_driver driver; struct pci_dynids dynids; }; -- cgit v1.2.3-71-gd317 From 788ee7b09883515f3a72a8f2a980df5e94f37e2c Mon Sep 17 00:00:00 2001 From: Russell King Date: Mon, 9 Jan 2006 21:12:17 +0000 Subject: [MMC] Add DATA_MULTI flag Some hosts need to know that a transfer will be multi-block. Add a data flag to indicate multiple data block transfers. Signed-off-by: Russell King --- drivers/mmc/mmc_block.c | 8 +++++++- include/linux/mmc/mmc.h | 1 + 2 files changed, 8 insertions(+), 1 deletion(-) (limited to 'include/linux') diff --git a/drivers/mmc/mmc_block.c b/drivers/mmc/mmc_block.c index 198561d21710..f43cdfcbfd2c 100644 --- a/drivers/mmc/mmc_block.c +++ b/drivers/mmc/mmc_block.c @@ -200,7 +200,13 @@ static int mmc_blk_issue_rq(struct mmc_queue *mq, struct request *req) brq.data.flags |= MMC_DATA_WRITE; brq.data.blocks = 1; } - brq.mrq.stop = brq.data.blocks > 1 ? &brq.stop : NULL; + + if (brq.data.blocks > 1) { + brq.data.flags |= MMC_DATA_MULTI; + brq.mrq.stop = &brq.stop; + } else { + brq.mrq.stop = NULL; + } brq.data.sg = mq->sg; brq.data.sg_len = blk_rq_map_sg(req->q, req, brq.data.sg); diff --git a/include/linux/mmc/mmc.h b/include/linux/mmc/mmc.h index aef6042f8f0b..86d491b3b73f 100644 --- a/include/linux/mmc/mmc.h +++ b/include/linux/mmc/mmc.h @@ -64,6 +64,7 @@ struct mmc_data { #define MMC_DATA_WRITE (1 << 8) #define MMC_DATA_READ (1 << 9) #define MMC_DATA_STREAM (1 << 10) +#define MMC_DATA_MULTI (1 << 11) unsigned int bytes_xfered; -- cgit v1.2.3-71-gd317 From 5cb1454b862ab3040b78364d58330262fea1ddba Mon Sep 17 00:00:00 2001 From: Herbert Xu Date: Sat, 5 Nov 2005 16:58:14 +1100 Subject: [CRYPTO] Allow multiple implementations of the same algorithm This is the first step on the road towards asynchronous support in the Crypto API. It adds support for having multiple crypto_alg objects for the same algorithm registered in the system. For example, each device driver would register a crypto_alg object for each algorithm that it supports. While at the same time the user may load software implementations of those same algorithms. Users of the Crypto API may then select a specific implementation by name, or choose any implementation for a given algorithm with the highest priority. The priority field is a 32-bit signed integer. In future it will be possible to modify it from user-space. This also provides a solution to the problem of selecting amongst various AES implementations, that is, aes vs. aes-i586 vs. aes-padlock. Signed-off-by: Herbert Xu --- crypto/api.c | 52 ++++++++++++++++++++++++++++++++++++++++++++------ crypto/internal.h | 6 ++++++ crypto/proc.c | 6 +++--- include/linux/crypto.h | 5 +++++ 4 files changed, 60 insertions(+), 9 deletions(-) (limited to 'include/linux') diff --git a/crypto/api.c b/crypto/api.c index 40ae42e9b6a6..2715afdf678c 100644 --- a/crypto/api.c +++ b/crypto/api.c @@ -3,6 +3,7 @@ * * Copyright (c) 2002 James Morris * Copyright (c) 2002 David S. Miller (davem@redhat.com) + * Copyright (c) 2005 Herbert Xu * * Portions derived from Cryptoapi, by Alexander Kjeldaas * and Nettle, by Niels Möller. @@ -18,9 +19,11 @@ #include #include #include +#include #include #include #include +#include #include "internal.h" LIST_HEAD(crypto_alg_list); @@ -39,6 +42,7 @@ static inline void crypto_alg_put(struct crypto_alg *alg) static struct crypto_alg *crypto_alg_lookup(const char *name) { struct crypto_alg *q, *alg = NULL; + int best = -1; if (!name) return NULL; @@ -46,11 +50,23 @@ static struct crypto_alg *crypto_alg_lookup(const char *name) down_read(&crypto_alg_sem); list_for_each_entry(q, &crypto_alg_list, cra_list) { - if (!(strcmp(q->cra_name, name))) { - if (crypto_alg_get(q)) - alg = q; + int exact, fuzzy; + + exact = !strcmp(q->cra_driver_name, name); + fuzzy = !strcmp(q->cra_name, name); + if (!exact && !(fuzzy && q->cra_priority > best)) + continue; + + if (unlikely(!crypto_alg_get(q))) + continue; + + best = q->cra_priority; + if (alg) + crypto_alg_put(alg); + alg = q; + + if (exact) break; - } } up_read(&crypto_alg_sem); @@ -207,9 +223,26 @@ void crypto_free_tfm(struct crypto_tfm *tfm) kfree(tfm); } +static inline int crypto_set_driver_name(struct crypto_alg *alg) +{ + static const char suffix[] = "-generic"; + char *driver_name = (char *)alg->cra_driver_name; + int len; + + if (*driver_name) + return 0; + + len = strlcpy(driver_name, alg->cra_name, CRYPTO_MAX_ALG_NAME); + if (len + sizeof(suffix) > CRYPTO_MAX_ALG_NAME) + return -ENAMETOOLONG; + + memcpy(driver_name + len, suffix, sizeof(suffix)); + return 0; +} + int crypto_register_alg(struct crypto_alg *alg) { - int ret = 0; + int ret; struct crypto_alg *q; if (alg->cra_alignmask & (alg->cra_alignmask + 1)) @@ -220,11 +253,18 @@ int crypto_register_alg(struct crypto_alg *alg) if (alg->cra_blocksize > PAGE_SIZE) return -EINVAL; + + if (alg->cra_priority < 0) + return -EINVAL; + ret = crypto_set_driver_name(alg); + if (unlikely(ret)) + return ret; + down_write(&crypto_alg_sem); list_for_each_entry(q, &crypto_alg_list, cra_list) { - if (!(strcmp(q->cra_name, alg->cra_name))) { + if (!strcmp(q->cra_driver_name, alg->cra_driver_name)) { ret = -EEXIST; goto out; } diff --git a/crypto/internal.h b/crypto/internal.h index 37aa652ce5ce..959e602909a6 100644 --- a/crypto/internal.h +++ b/crypto/internal.h @@ -2,6 +2,7 @@ * Cryptographic API. * * Copyright (c) 2002 James Morris + * Copyright (c) 2005 Herbert Xu * * This program is free software; you can redistribute it and/or modify it * under the terms of the GNU General Public License as published by the Free @@ -16,10 +17,15 @@ #include #include #include +#include #include +#include #include #include +extern struct list_head crypto_alg_list; +extern struct rw_semaphore crypto_alg_sem; + extern enum km_type crypto_km_types[]; static inline enum km_type crypto_kmap_type(int out) diff --git a/crypto/proc.c b/crypto/proc.c index 630ba91c08f1..c0a5dd7ce2cc 100644 --- a/crypto/proc.c +++ b/crypto/proc.c @@ -4,6 +4,7 @@ * Procfs information. * * Copyright (c) 2002 James Morris + * Copyright (c) 2005 Herbert Xu * * This program is free software; you can redistribute it and/or modify it * under the terms of the GNU General Public License as published by the Free @@ -18,9 +19,6 @@ #include #include "internal.h" -extern struct list_head crypto_alg_list; -extern struct rw_semaphore crypto_alg_sem; - static void *c_start(struct seq_file *m, loff_t *pos) { struct list_head *v; @@ -53,7 +51,9 @@ static int c_show(struct seq_file *m, void *p) struct crypto_alg *alg = (struct crypto_alg *)p; seq_printf(m, "name : %s\n", alg->cra_name); + seq_printf(m, "driver : %s\n", alg->cra_driver_name); seq_printf(m, "module : %s\n", module_name(alg->cra_module)); + seq_printf(m, "priority : %d\n", alg->cra_priority); switch (alg->cra_flags & CRYPTO_ALG_TYPE_MASK) { case CRYPTO_ALG_TYPE_CIPHER: diff --git a/include/linux/crypto.h b/include/linux/crypto.h index 3c89df6e7768..d88bf8aa8b47 100644 --- a/include/linux/crypto.h +++ b/include/linux/crypto.h @@ -3,6 +3,7 @@ * * Copyright (c) 2002 James Morris * Copyright (c) 2002 David S. Miller (davem@redhat.com) + * Copyright (c) 2005 Herbert Xu * * Portions derived from Cryptoapi, by Alexander Kjeldaas * and Nettle, by Niels Möller. @@ -126,7 +127,11 @@ struct crypto_alg { unsigned int cra_blocksize; unsigned int cra_ctxsize; unsigned int cra_alignmask; + + int cra_priority; + const char cra_name[CRYPTO_MAX_ALG_NAME]; + const char cra_driver_name[CRYPTO_MAX_ALG_NAME]; union { struct cipher_alg cipher; -- cgit v1.2.3-71-gd317 From 7225b3fd0b6e224235fc50a69f70479ff96d5602 Mon Sep 17 00:00:00 2001 From: Pierre Ossman Date: Mon, 9 Jan 2006 22:51:46 +0000 Subject: [MMC] Indicate that R1/R1b contains command opcode Some controllers actually check the first byte of the response (most don't). This byte contains the command opcode for R1/R1b and all 1:s for other types. The difference must be indicated to the controller so it knows which reply to expect. Signed-off-by: Pierre Ossman --- include/linux/mmc/mmc.h | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/mmc/mmc.h b/include/linux/mmc/mmc.h index 86d491b3b73f..ccd3e13de1e8 100644 --- a/include/linux/mmc/mmc.h +++ b/include/linux/mmc/mmc.h @@ -27,14 +27,15 @@ struct mmc_command { #define MMC_RSP_MASK (3 << 0) #define MMC_RSP_CRC (1 << 3) /* expect valid crc */ #define MMC_RSP_BUSY (1 << 4) /* card may send busy */ +#define MMC_RSP_OPCODE (1 << 5) /* response contains opcode */ /* * These are the response types, and correspond to valid bit * patterns of the above flags. One additional valid pattern * is all zeros, which means we don't expect a response. */ -#define MMC_RSP_R1 (MMC_RSP_SHORT|MMC_RSP_CRC) -#define MMC_RSP_R1B (MMC_RSP_SHORT|MMC_RSP_CRC|MMC_RSP_BUSY) +#define MMC_RSP_R1 (MMC_RSP_SHORT|MMC_RSP_CRC|MMC_RSP_OPCODE) +#define MMC_RSP_R1B (MMC_RSP_SHORT|MMC_RSP_CRC|MMC_RSP_OPCODE|MMC_RSP_BUSY) #define MMC_RSP_R2 (MMC_RSP_LONG|MMC_RSP_CRC) #define MMC_RSP_R3 (MMC_RSP_SHORT) #define MMC_RSP_R6 (MMC_RSP_SHORT|MMC_RSP_CRC) -- cgit v1.2.3-71-gd317 From 540695886075964c0e5295bea8e4793e8765d010 Mon Sep 17 00:00:00 2001 From: Adrian Bunk Date: Tue, 10 Jan 2006 00:09:36 +0100 Subject: s/assoicated/associated/ Signed-off-by: Adrian Bunk --- include/linux/elevator.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/elevator.h b/include/linux/elevator.h index 4a6f50e31c73..23fe746a1d51 100644 --- a/include/linux/elevator.h +++ b/include/linux/elevator.h @@ -66,7 +66,7 @@ struct elevator_type }; /* - * each queue has an elevator_queue assoicated with it + * each queue has an elevator_queue associated with it */ struct elevator_queue { -- cgit v1.2.3-71-gd317 From 711a660dc2064013a2b0167ee67389707fc9cac3 Mon Sep 17 00:00:00 2001 From: Chuck Ebbert <76306.1226@compuserve.com> Date: Mon, 9 Jan 2006 15:59:17 -0800 Subject: [PATCH] mutex subsystem, add typecheck_fn(type, function) add typecheck_fn(type, function) to do type-checking of function pointers. Modified-by: Ingo Molnar (made it typeof() based, instead of typedef based.) Signed-off-by: Chuck Ebbert <76306.1226@compuserve.com> Signed-off-by: Ingo Molnar --- include/linux/kernel.h | 9 +++++++++ 1 file changed, 9 insertions(+) (limited to 'include/linux') diff --git a/include/linux/kernel.h b/include/linux/kernel.h index ca7ff8fdd090..d0e6ca3b00ef 100644 --- a/include/linux/kernel.h +++ b/include/linux/kernel.h @@ -286,6 +286,15 @@ extern void dump_stack(void); 1; \ }) +/* + * Check at compile time that 'function' is a certain type, or is a pointer + * to that type (needs to use typedef for the function type.) + */ +#define typecheck_fn(type,function) \ +({ typeof(type) __tmp = function; \ + (void)__tmp; \ +}) + #endif /* __KERNEL__ */ #define SI_LOAD_SHIFT 16 -- cgit v1.2.3-71-gd317 From 6053ee3b32e3437e8c1e72687850f436e779bd49 Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Mon, 9 Jan 2006 15:59:19 -0800 Subject: [PATCH] mutex subsystem, core mutex implementation, core files: just the basic subsystem, no users of it. Signed-off-by: Ingo Molnar Signed-off-by: Arjan van de Ven --- include/linux/mutex.h | 119 ++++++++++++++++++ kernel/Makefile | 2 +- kernel/mutex.c | 325 ++++++++++++++++++++++++++++++++++++++++++++++++++ kernel/mutex.h | 35 ++++++ 4 files changed, 480 insertions(+), 1 deletion(-) create mode 100644 include/linux/mutex.h create mode 100644 kernel/mutex.c create mode 100644 kernel/mutex.h (limited to 'include/linux') diff --git a/include/linux/mutex.h b/include/linux/mutex.h new file mode 100644 index 000000000000..9bce0fee68d4 --- /dev/null +++ b/include/linux/mutex.h @@ -0,0 +1,119 @@ +/* + * Mutexes: blocking mutual exclusion locks + * + * started by Ingo Molnar: + * + * Copyright (C) 2004, 2005, 2006 Red Hat, Inc., Ingo Molnar + * + * This file contains the main data structure and API definitions. + */ +#ifndef __LINUX_MUTEX_H +#define __LINUX_MUTEX_H + +#include +#include + +#include + +/* + * Simple, straightforward mutexes with strict semantics: + * + * - only one task can hold the mutex at a time + * - only the owner can unlock the mutex + * - multiple unlocks are not permitted + * - recursive locking is not permitted + * - a mutex object must be initialized via the API + * - a mutex object must not be initialized via memset or copying + * - task may not exit with mutex held + * - memory areas where held locks reside must not be freed + * - held mutexes must not be reinitialized + * - mutexes may not be used in irq contexts + * + * These semantics are fully enforced when DEBUG_MUTEXES is + * enabled. Furthermore, besides enforcing the above rules, the mutex + * debugging code also implements a number of additional features + * that make lock debugging easier and faster: + * + * - uses symbolic names of mutexes, whenever they are printed in debug output + * - point-of-acquire tracking, symbolic lookup of function names + * - list of all locks held in the system, printout of them + * - owner tracking + * - detects self-recursing locks and prints out all relevant info + * - detects multi-task circular deadlocks and prints out all affected + * locks and tasks (and only those tasks) + */ +struct mutex { + /* 1: unlocked, 0: locked, negative: locked, possible waiters */ + atomic_t count; + spinlock_t wait_lock; + struct list_head wait_list; +#ifdef CONFIG_DEBUG_MUTEXES + struct thread_info *owner; + struct list_head held_list; + unsigned long acquire_ip; + const char *name; + void *magic; +#endif +}; + +/* + * This is the control structure for tasks blocked on mutex, + * which resides on the blocked task's kernel stack: + */ +struct mutex_waiter { + struct list_head list; + struct task_struct *task; +#ifdef CONFIG_DEBUG_MUTEXES + struct mutex *lock; + void *magic; +#endif +}; + +#ifdef CONFIG_DEBUG_MUTEXES +# include +#else +# define __DEBUG_MUTEX_INITIALIZER(lockname) +# define mutex_init(mutex) __mutex_init(mutex, NULL) +# define mutex_destroy(mutex) do { } while (0) +# define mutex_debug_show_all_locks() do { } while (0) +# define mutex_debug_show_held_locks(p) do { } while (0) +# define mutex_debug_check_no_locks_held(task) do { } while (0) +# define mutex_debug_check_no_locks_freed(from, to) do { } while (0) +#endif + +#define __MUTEX_INITIALIZER(lockname) \ + { .count = ATOMIC_INIT(1) \ + , .wait_lock = SPIN_LOCK_UNLOCKED \ + , .wait_list = LIST_HEAD_INIT(lockname.wait_list) \ + __DEBUG_MUTEX_INITIALIZER(lockname) } + +#define DEFINE_MUTEX(mutexname) \ + struct mutex mutexname = __MUTEX_INITIALIZER(mutexname) + +extern void fastcall __mutex_init(struct mutex *lock, const char *name); + +/*** + * mutex_is_locked - is the mutex locked + * @lock: the mutex to be queried + * + * Returns 1 if the mutex is locked, 0 if unlocked. + */ +static inline int fastcall mutex_is_locked(struct mutex *lock) +{ + return atomic_read(&lock->count) != 1; +} + +/* + * See kernel/mutex.c for detailed documentation of these APIs. + * Also see Documentation/mutex-design.txt. + */ +extern void fastcall mutex_lock(struct mutex *lock); +extern int fastcall mutex_lock_interruptible(struct mutex *lock); +/* + * NOTE: mutex_trylock() follows the spin_trylock() convention, + * not the down_trylock() convention! + */ +extern int fastcall mutex_trylock(struct mutex *lock); +extern void fastcall mutex_unlock(struct mutex *lock); + +#endif diff --git a/kernel/Makefile b/kernel/Makefile index 4f5a1453093a..de580b4d54a4 100644 --- a/kernel/Makefile +++ b/kernel/Makefile @@ -7,7 +7,7 @@ obj-y = sched.o fork.o exec_domain.o panic.o printk.o profile.o \ sysctl.o capability.o ptrace.o timer.o user.o \ signal.o sys.o kmod.o workqueue.o pid.o \ rcupdate.o intermodule.o extable.o params.o posix-timers.o \ - kthread.o wait.o kfifo.o sys_ni.o posix-cpu-timers.o + kthread.o wait.o kfifo.o sys_ni.o posix-cpu-timers.o mutex.o obj-$(CONFIG_FUTEX) += futex.o obj-$(CONFIG_GENERIC_ISA_DMA) += dma.o diff --git a/kernel/mutex.c b/kernel/mutex.c new file mode 100644 index 000000000000..7eb960661441 --- /dev/null +++ b/kernel/mutex.c @@ -0,0 +1,325 @@ +/* + * kernel/mutex.c + * + * Mutexes: blocking mutual exclusion locks + * + * Started by Ingo Molnar: + * + * Copyright (C) 2004, 2005, 2006 Red Hat, Inc., Ingo Molnar + * + * Many thanks to Arjan van de Ven, Thomas Gleixner, Steven Rostedt and + * David Howells for suggestions and improvements. + * + * Also see Documentation/mutex-design.txt. + */ +#include +#include +#include +#include +#include + +/* + * In the DEBUG case we are using the "NULL fastpath" for mutexes, + * which forces all calls into the slowpath: + */ +#ifdef CONFIG_DEBUG_MUTEXES +# include "mutex-debug.h" +# include +#else +# include "mutex.h" +# include +#endif + +/*** + * mutex_init - initialize the mutex + * @lock: the mutex to be initialized + * + * Initialize the mutex to unlocked state. + * + * It is not allowed to initialize an already locked mutex. + */ +void fastcall __mutex_init(struct mutex *lock, const char *name) +{ + atomic_set(&lock->count, 1); + spin_lock_init(&lock->wait_lock); + INIT_LIST_HEAD(&lock->wait_list); + + debug_mutex_init(lock, name); +} + +EXPORT_SYMBOL(__mutex_init); + +/* + * We split the mutex lock/unlock logic into separate fastpath and + * slowpath functions, to reduce the register pressure on the fastpath. + * We also put the fastpath first in the kernel image, to make sure the + * branch is predicted by the CPU as default-untaken. + */ +static void fastcall noinline __sched +__mutex_lock_slowpath(atomic_t *lock_count __IP_DECL__); + +/*** + * mutex_lock - acquire the mutex + * @lock: the mutex to be acquired + * + * Lock the mutex exclusively for this task. If the mutex is not + * available right now, it will sleep until it can get it. + * + * The mutex must later on be released by the same task that + * acquired it. Recursive locking is not allowed. The task + * may not exit without first unlocking the mutex. Also, kernel + * memory where the mutex resides mutex must not be freed with + * the mutex still locked. The mutex must first be initialized + * (or statically defined) before it can be locked. memset()-ing + * the mutex to 0 is not allowed. + * + * ( The CONFIG_DEBUG_MUTEXES .config option turns on debugging + * checks that will enforce the restrictions and will also do + * deadlock debugging. ) + * + * This function is similar to (but not equivalent to) down(). + */ +void fastcall __sched mutex_lock(struct mutex *lock) +{ + /* + * The locking fastpath is the 1->0 transition from + * 'unlocked' into 'locked' state. + * + * NOTE: if asm/mutex.h is included, then some architectures + * rely on mutex_lock() having _no other code_ here but this + * fastpath. That allows the assembly fastpath to do + * tail-merging optimizations. (If you want to put testcode + * here, do it under #ifndef CONFIG_MUTEX_DEBUG.) + */ + __mutex_fastpath_lock(&lock->count, __mutex_lock_slowpath); +} + +EXPORT_SYMBOL(mutex_lock); + +static void fastcall noinline __sched +__mutex_unlock_slowpath(atomic_t *lock_count __IP_DECL__); + +/*** + * mutex_unlock - release the mutex + * @lock: the mutex to be released + * + * Unlock a mutex that has been locked by this task previously. + * + * This function must not be used in interrupt context. Unlocking + * of a not locked mutex is not allowed. + * + * This function is similar to (but not equivalent to) up(). + */ +void fastcall __sched mutex_unlock(struct mutex *lock) +{ + /* + * The unlocking fastpath is the 0->1 transition from 'locked' + * into 'unlocked' state: + * + * NOTE: no other code must be here - see mutex_lock() . + */ + __mutex_fastpath_unlock(&lock->count, __mutex_unlock_slowpath); +} + +EXPORT_SYMBOL(mutex_unlock); + +/* + * Lock a mutex (possibly interruptible), slowpath: + */ +static inline int __sched +__mutex_lock_common(struct mutex *lock, long state __IP_DECL__) +{ + struct task_struct *task = current; + struct mutex_waiter waiter; + unsigned int old_val; + + debug_mutex_init_waiter(&waiter); + + spin_lock_mutex(&lock->wait_lock); + + debug_mutex_add_waiter(lock, &waiter, task->thread_info, ip); + + /* add waiting tasks to the end of the waitqueue (FIFO): */ + list_add_tail(&waiter.list, &lock->wait_list); + waiter.task = task; + + for (;;) { + /* + * Lets try to take the lock again - this is needed even if + * we get here for the first time (shortly after failing to + * acquire the lock), to make sure that we get a wakeup once + * it's unlocked. Later on, if we sleep, this is the + * operation that gives us the lock. We xchg it to -1, so + * that when we release the lock, we properly wake up the + * other waiters: + */ + old_val = atomic_xchg(&lock->count, -1); + if (old_val == 1) + break; + + /* + * got a signal? (This code gets eliminated in the + * TASK_UNINTERRUPTIBLE case.) + */ + if (unlikely(state == TASK_INTERRUPTIBLE && + signal_pending(task))) { + mutex_remove_waiter(lock, &waiter, task->thread_info); + spin_unlock_mutex(&lock->wait_lock); + + debug_mutex_free_waiter(&waiter); + return -EINTR; + } + __set_task_state(task, state); + + /* didnt get the lock, go to sleep: */ + spin_unlock_mutex(&lock->wait_lock); + schedule(); + spin_lock_mutex(&lock->wait_lock); + } + + /* got the lock - rejoice! */ + mutex_remove_waiter(lock, &waiter, task->thread_info); + debug_mutex_set_owner(lock, task->thread_info __IP__); + + /* set it to 0 if there are no waiters left: */ + if (likely(list_empty(&lock->wait_list))) + atomic_set(&lock->count, 0); + + spin_unlock_mutex(&lock->wait_lock); + + debug_mutex_free_waiter(&waiter); + + DEBUG_WARN_ON(list_empty(&lock->held_list)); + DEBUG_WARN_ON(lock->owner != task->thread_info); + + return 0; +} + +static void fastcall noinline __sched +__mutex_lock_slowpath(atomic_t *lock_count __IP_DECL__) +{ + struct mutex *lock = container_of(lock_count, struct mutex, count); + + __mutex_lock_common(lock, TASK_UNINTERRUPTIBLE __IP__); +} + +/* + * Release the lock, slowpath: + */ +static fastcall noinline void +__mutex_unlock_slowpath(atomic_t *lock_count __IP_DECL__) +{ + struct mutex *lock = container_of(lock_count, struct mutex, count); + + DEBUG_WARN_ON(lock->owner != current_thread_info()); + + spin_lock_mutex(&lock->wait_lock); + + /* + * some architectures leave the lock unlocked in the fastpath failure + * case, others need to leave it locked. In the later case we have to + * unlock it here + */ + if (__mutex_slowpath_needs_to_unlock()) + atomic_set(&lock->count, 1); + + debug_mutex_unlock(lock); + + if (!list_empty(&lock->wait_list)) { + /* get the first entry from the wait-list: */ + struct mutex_waiter *waiter = + list_entry(lock->wait_list.next, + struct mutex_waiter, list); + + debug_mutex_wake_waiter(lock, waiter); + + wake_up_process(waiter->task); + } + + debug_mutex_clear_owner(lock); + + spin_unlock_mutex(&lock->wait_lock); +} + +/* + * Here come the less common (and hence less performance-critical) APIs: + * mutex_lock_interruptible() and mutex_trylock(). + */ +static int fastcall noinline __sched +__mutex_lock_interruptible_slowpath(atomic_t *lock_count __IP_DECL__); + +/*** + * mutex_lock_interruptible - acquire the mutex, interruptable + * @lock: the mutex to be acquired + * + * Lock the mutex like mutex_lock(), and return 0 if the mutex has + * been acquired or sleep until the mutex becomes available. If a + * signal arrives while waiting for the lock then this function + * returns -EINTR. + * + * This function is similar to (but not equivalent to) down_interruptible(). + */ +int fastcall __sched mutex_lock_interruptible(struct mutex *lock) +{ + /* NOTE: no other code must be here - see mutex_lock() */ + return __mutex_fastpath_lock_retval + (&lock->count, __mutex_lock_interruptible_slowpath); +} + +EXPORT_SYMBOL(mutex_lock_interruptible); + +static int fastcall noinline __sched +__mutex_lock_interruptible_slowpath(atomic_t *lock_count __IP_DECL__) +{ + struct mutex *lock = container_of(lock_count, struct mutex, count); + + return __mutex_lock_common(lock, TASK_INTERRUPTIBLE __IP__); +} + +/* + * Spinlock based trylock, we take the spinlock and check whether we + * can get the lock: + */ +static inline int __mutex_trylock_slowpath(atomic_t *lock_count) +{ + struct mutex *lock = container_of(lock_count, struct mutex, count); + int prev; + + spin_lock_mutex(&lock->wait_lock); + + prev = atomic_xchg(&lock->count, -1); + if (likely(prev == 1)) + debug_mutex_set_owner(lock, current_thread_info() __RET_IP__); + /* Set it back to 0 if there are no waiters: */ + if (likely(list_empty(&lock->wait_list))) + atomic_set(&lock->count, 0); + + spin_unlock_mutex(&lock->wait_lock); + + return prev == 1; +} + +/*** + * mutex_trylock - try acquire the mutex, without waiting + * @lock: the mutex to be acquired + * + * Try to acquire the mutex atomically. Returns 1 if the mutex + * has been acquired successfully, and 0 on contention. + * + * NOTE: this function follows the spin_trylock() convention, so + * it is negated to the down_trylock() return values! Be careful + * about this when converting semaphore users to mutexes. + * + * This function must not be used in interrupt context. The + * mutex must be released by the same task that acquired it. + */ +int fastcall mutex_trylock(struct mutex *lock) +{ + return __mutex_fastpath_trylock(&lock->count, + __mutex_trylock_slowpath); +} + +EXPORT_SYMBOL(mutex_trylock); + + + diff --git a/kernel/mutex.h b/kernel/mutex.h new file mode 100644 index 000000000000..00fe84e7b672 --- /dev/null +++ b/kernel/mutex.h @@ -0,0 +1,35 @@ +/* + * Mutexes: blocking mutual exclusion locks + * + * started by Ingo Molnar: + * + * Copyright (C) 2004, 2005, 2006 Red Hat, Inc., Ingo Molnar + * + * This file contains mutex debugging related internal prototypes, for the + * !CONFIG_DEBUG_MUTEXES case. Most of them are NOPs: + */ + +#define spin_lock_mutex(lock) spin_lock(lock) +#define spin_unlock_mutex(lock) spin_unlock(lock) +#define mutex_remove_waiter(lock, waiter, ti) \ + __list_del((waiter)->list.prev, (waiter)->list.next) + +#define DEBUG_WARN_ON(c) do { } while (0) +#define debug_mutex_set_owner(lock, new_owner) do { } while (0) +#define debug_mutex_clear_owner(lock) do { } while (0) +#define debug_mutex_init_waiter(waiter) do { } while (0) +#define debug_mutex_wake_waiter(lock, waiter) do { } while (0) +#define debug_mutex_free_waiter(waiter) do { } while (0) +#define debug_mutex_add_waiter(lock, waiter, ti, ip) do { } while (0) +#define debug_mutex_unlock(lock) do { } while (0) +#define debug_mutex_init(lock, name) do { } while (0) + +/* + * Return-address parameters/declarations. They are very useful for + * debugging, but add overhead in the !DEBUG case - so we go the + * trouble of using this not too elegant but zero-cost solution: + */ +#define __IP_DECL__ +#define __IP__ +#define __RET_IP__ + -- cgit v1.2.3-71-gd317 From 408894ee4dd4debfdedd472eb4d8414892fc90f6 Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Mon, 9 Jan 2006 15:59:20 -0800 Subject: [PATCH] mutex subsystem, debugging code mutex implementation - add debugging code. Signed-off-by: Ingo Molnar Signed-off-by: Arjan van de Ven --- include/linux/mutex-debug.h | 21 ++ include/linux/sched.h | 5 + kernel/Makefile | 1 + kernel/fork.c | 4 + kernel/mutex-debug.c | 464 ++++++++++++++++++++++++++++++++++++++++++++ kernel/mutex-debug.h | 134 +++++++++++++ lib/Kconfig.debug | 8 + 7 files changed, 637 insertions(+) create mode 100644 include/linux/mutex-debug.h create mode 100644 kernel/mutex-debug.c create mode 100644 kernel/mutex-debug.h (limited to 'include/linux') diff --git a/include/linux/mutex-debug.h b/include/linux/mutex-debug.h new file mode 100644 index 000000000000..0ccd8f983b50 --- /dev/null +++ b/include/linux/mutex-debug.h @@ -0,0 +1,21 @@ +#ifndef __LINUX_MUTEX_DEBUG_H +#define __LINUX_MUTEX_DEBUG_H + +/* + * Mutexes - debugging helpers: + */ + +#define __DEBUG_MUTEX_INITIALIZER(lockname) \ + , .held_list = LIST_HEAD_INIT(lockname.held_list), \ + .name = #lockname , .magic = &lockname + +#define mutex_init(sem) __mutex_init(sem, __FUNCTION__) + +extern void FASTCALL(mutex_destroy(struct mutex *lock)); + +extern void mutex_debug_show_all_locks(void); +extern void mutex_debug_show_held_locks(struct task_struct *filter); +extern void mutex_debug_check_no_locks_held(struct task_struct *task); +extern void mutex_debug_check_no_locks_freed(const void *from, const void *to); + +#endif diff --git a/include/linux/sched.h b/include/linux/sched.h index 78eb92ae4d94..85b53f87c703 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h @@ -817,6 +817,11 @@ struct task_struct { /* Protection of proc_dentry: nesting proc_lock, dcache_lock, write_lock_irq(&tasklist_lock); */ spinlock_t proc_lock; +#ifdef CONFIG_DEBUG_MUTEXES + /* mutex deadlock detection */ + struct mutex_waiter *blocked_on; +#endif + /* journalling filesystem info */ void *journal_info; diff --git a/kernel/Makefile b/kernel/Makefile index de580b4d54a4..a940bac02837 100644 --- a/kernel/Makefile +++ b/kernel/Makefile @@ -9,6 +9,7 @@ obj-y = sched.o fork.o exec_domain.o panic.o printk.o profile.o \ rcupdate.o intermodule.o extable.o params.o posix-timers.o \ kthread.o wait.o kfifo.o sys_ni.o posix-cpu-timers.o mutex.o +obj-$(CONFIG_DEBUG_MUTEXES) += mutex-debug.o obj-$(CONFIG_FUTEX) += futex.o obj-$(CONFIG_GENERIC_ISA_DMA) += dma.o obj-$(CONFIG_SMP) += cpu.o spinlock.o diff --git a/kernel/fork.c b/kernel/fork.c index 72e3252c6763..b18d64554feb 100644 --- a/kernel/fork.c +++ b/kernel/fork.c @@ -979,6 +979,10 @@ static task_t *copy_process(unsigned long clone_flags, } #endif +#ifdef CONFIG_DEBUG_MUTEXES + p->blocked_on = NULL; /* not blocked yet */ +#endif + p->tgid = p->pid; if (clone_flags & CLONE_THREAD) p->tgid = current->tgid; diff --git a/kernel/mutex-debug.c b/kernel/mutex-debug.c new file mode 100644 index 000000000000..4fcb051a8b9e --- /dev/null +++ b/kernel/mutex-debug.c @@ -0,0 +1,464 @@ +/* + * kernel/mutex-debug.c + * + * Debugging code for mutexes + * + * Started by Ingo Molnar: + * + * Copyright (C) 2004, 2005, 2006 Red Hat, Inc., Ingo Molnar + * + * lock debugging, locking tree, deadlock detection started by: + * + * Copyright (C) 2004, LynuxWorks, Inc., Igor Manyilov, Bill Huey + * Released under the General Public License (GPL). + */ +#include +#include +#include +#include +#include +#include +#include + +#include + +#include "mutex-debug.h" + +/* + * We need a global lock when we walk through the multi-process + * lock tree. Only used in the deadlock-debugging case. + */ +DEFINE_SPINLOCK(debug_mutex_lock); + +/* + * All locks held by all tasks, in a single global list: + */ +LIST_HEAD(debug_mutex_held_locks); + +/* + * In the debug case we carry the caller's instruction pointer into + * other functions, but we dont want the function argument overhead + * in the nondebug case - hence these macros: + */ +#define __IP_DECL__ , unsigned long ip +#define __IP__ , ip +#define __RET_IP__ , (unsigned long)__builtin_return_address(0) + +/* + * "mutex debugging enabled" flag. We turn it off when we detect + * the first problem because we dont want to recurse back + * into the tracing code when doing error printk or + * executing a BUG(): + */ +int debug_mutex_on = 1; + +static void printk_task(struct task_struct *p) +{ + if (p) + printk("%16s:%5d [%p, %3d]", p->comm, p->pid, p, p->prio); + else + printk(""); +} + +static void printk_ti(struct thread_info *ti) +{ + if (ti) + printk_task(ti->task); + else + printk(""); +} + +static void printk_task_short(struct task_struct *p) +{ + if (p) + printk("%s/%d [%p, %3d]", p->comm, p->pid, p, p->prio); + else + printk(""); +} + +static void printk_lock(struct mutex *lock, int print_owner) +{ + printk(" [%p] {%s}\n", lock, lock->name); + + if (print_owner && lock->owner) { + printk(".. held by: "); + printk_ti(lock->owner); + printk("\n"); + } + if (lock->owner) { + printk("... acquired at: "); + print_symbol("%s\n", lock->acquire_ip); + } +} + +/* + * printk locks held by a task: + */ +static void show_task_locks(struct task_struct *p) +{ + switch (p->state) { + case TASK_RUNNING: printk("R"); break; + case TASK_INTERRUPTIBLE: printk("S"); break; + case TASK_UNINTERRUPTIBLE: printk("D"); break; + case TASK_STOPPED: printk("T"); break; + case EXIT_ZOMBIE: printk("Z"); break; + case EXIT_DEAD: printk("X"); break; + default: printk("?"); break; + } + printk_task(p); + if (p->blocked_on) { + struct mutex *lock = p->blocked_on->lock; + + printk(" blocked on mutex:"); + printk_lock(lock, 1); + } else + printk(" (not blocked on mutex)\n"); +} + +/* + * printk all locks held in the system (if filter == NULL), + * or all locks belonging to a single task (if filter != NULL): + */ +void show_held_locks(struct task_struct *filter) +{ + struct list_head *curr, *cursor = NULL; + struct mutex *lock; + struct thread_info *t; + unsigned long flags; + int count = 0; + + if (filter) { + printk("------------------------------\n"); + printk("| showing all locks held by: | ("); + printk_task_short(filter); + printk("):\n"); + printk("------------------------------\n"); + } else { + printk("---------------------------\n"); + printk("| showing all locks held: |\n"); + printk("---------------------------\n"); + } + + /* + * Play safe and acquire the global trace lock. We + * cannot printk with that lock held so we iterate + * very carefully: + */ +next: + debug_spin_lock_save(&debug_mutex_lock, flags); + list_for_each(curr, &debug_mutex_held_locks) { + if (cursor && curr != cursor) + continue; + lock = list_entry(curr, struct mutex, held_list); + t = lock->owner; + if (filter && (t != filter->thread_info)) + continue; + count++; + cursor = curr->next; + debug_spin_lock_restore(&debug_mutex_lock, flags); + + printk("\n#%03d: ", count); + printk_lock(lock, filter ? 0 : 1); + goto next; + } + debug_spin_lock_restore(&debug_mutex_lock, flags); + printk("\n"); +} + +void mutex_debug_show_all_locks(void) +{ + struct task_struct *g, *p; + int count = 10; + int unlock = 1; + + printk("\nShowing all blocking locks in the system:\n"); + + /* + * Here we try to get the tasklist_lock as hard as possible, + * if not successful after 2 seconds we ignore it (but keep + * trying). This is to enable a debug printout even if a + * tasklist_lock-holding task deadlocks or crashes. + */ +retry: + if (!read_trylock(&tasklist_lock)) { + if (count == 10) + printk("hm, tasklist_lock locked, retrying... "); + if (count) { + count--; + printk(" #%d", 10-count); + mdelay(200); + goto retry; + } + printk(" ignoring it.\n"); + unlock = 0; + } + if (count != 10) + printk(" locked it.\n"); + + do_each_thread(g, p) { + show_task_locks(p); + if (!unlock) + if (read_trylock(&tasklist_lock)) + unlock = 1; + } while_each_thread(g, p); + + printk("\n"); + show_held_locks(NULL); + printk("=============================================\n\n"); + + if (unlock) + read_unlock(&tasklist_lock); +} + +static void report_deadlock(struct task_struct *task, struct mutex *lock, + struct mutex *lockblk, unsigned long ip) +{ + printk("\n%s/%d is trying to acquire this lock:\n", + current->comm, current->pid); + printk_lock(lock, 1); + printk("... trying at: "); + print_symbol("%s\n", ip); + show_held_locks(current); + + if (lockblk) { + printk("but %s/%d is deadlocking current task %s/%d!\n\n", + task->comm, task->pid, current->comm, current->pid); + printk("\n%s/%d is blocked on this lock:\n", + task->comm, task->pid); + printk_lock(lockblk, 1); + + show_held_locks(task); + + printk("\n%s/%d's [blocked] stackdump:\n\n", + task->comm, task->pid); + show_stack(task, NULL); + } + + printk("\n%s/%d's [current] stackdump:\n\n", + current->comm, current->pid); + dump_stack(); + mutex_debug_show_all_locks(); + printk("[ turning off deadlock detection. Please report this. ]\n\n"); + local_irq_disable(); +} + +/* + * Recursively check for mutex deadlocks: + */ +static int check_deadlock(struct mutex *lock, int depth, + struct thread_info *ti, unsigned long ip) +{ + struct mutex *lockblk; + struct task_struct *task; + + if (!debug_mutex_on) + return 0; + + ti = lock->owner; + if (!ti) + return 0; + + task = ti->task; + lockblk = NULL; + if (task->blocked_on) + lockblk = task->blocked_on->lock; + + /* Self-deadlock: */ + if (current == task) { + DEBUG_OFF(); + if (depth) + return 1; + printk("\n==========================================\n"); + printk( "[ BUG: lock recursion deadlock detected! |\n"); + printk( "------------------------------------------\n"); + report_deadlock(task, lock, NULL, ip); + return 0; + } + + /* Ugh, something corrupted the lock data structure? */ + if (depth > 20) { + DEBUG_OFF(); + printk("\n===========================================\n"); + printk( "[ BUG: infinite lock dependency detected!? |\n"); + printk( "-------------------------------------------\n"); + report_deadlock(task, lock, lockblk, ip); + return 0; + } + + /* Recursively check for dependencies: */ + if (lockblk && check_deadlock(lockblk, depth+1, ti, ip)) { + printk("\n============================================\n"); + printk( "[ BUG: circular locking deadlock detected! ]\n"); + printk( "--------------------------------------------\n"); + report_deadlock(task, lock, lockblk, ip); + return 0; + } + return 0; +} + +/* + * Called when a task exits, this function checks whether the + * task is holding any locks, and reports the first one if so: + */ +void mutex_debug_check_no_locks_held(struct task_struct *task) +{ + struct list_head *curr, *next; + struct thread_info *t; + unsigned long flags; + struct mutex *lock; + + if (!debug_mutex_on) + return; + + debug_spin_lock_save(&debug_mutex_lock, flags); + list_for_each_safe(curr, next, &debug_mutex_held_locks) { + lock = list_entry(curr, struct mutex, held_list); + t = lock->owner; + if (t != task->thread_info) + continue; + list_del_init(curr); + DEBUG_OFF(); + debug_spin_lock_restore(&debug_mutex_lock, flags); + + printk("BUG: %s/%d, lock held at task exit time!\n", + task->comm, task->pid); + printk_lock(lock, 1); + if (lock->owner != task->thread_info) + printk("exiting task is not even the owner??\n"); + return; + } + debug_spin_lock_restore(&debug_mutex_lock, flags); +} + +/* + * Called when kernel memory is freed (or unmapped), or if a mutex + * is destroyed or reinitialized - this code checks whether there is + * any held lock in the memory range of to : + */ +void mutex_debug_check_no_locks_freed(const void *from, const void *to) +{ + struct list_head *curr, *next; + unsigned long flags; + struct mutex *lock; + void *lock_addr; + + if (!debug_mutex_on) + return; + + debug_spin_lock_save(&debug_mutex_lock, flags); + list_for_each_safe(curr, next, &debug_mutex_held_locks) { + lock = list_entry(curr, struct mutex, held_list); + lock_addr = lock; + if (lock_addr < from || lock_addr >= to) + continue; + list_del_init(curr); + DEBUG_OFF(); + debug_spin_lock_restore(&debug_mutex_lock, flags); + + printk("BUG: %s/%d, active lock [%p(%p-%p)] freed!\n", + current->comm, current->pid, lock, from, to); + dump_stack(); + printk_lock(lock, 1); + if (lock->owner != current_thread_info()) + printk("freeing task is not even the owner??\n"); + return; + } + debug_spin_lock_restore(&debug_mutex_lock, flags); +} + +/* + * Must be called with lock->wait_lock held. + */ +void debug_mutex_set_owner(struct mutex *lock, + struct thread_info *new_owner __IP_DECL__) +{ + lock->owner = new_owner; + DEBUG_WARN_ON(!list_empty(&lock->held_list)); + if (debug_mutex_on) { + list_add_tail(&lock->held_list, &debug_mutex_held_locks); + lock->acquire_ip = ip; + } +} + +void debug_mutex_init_waiter(struct mutex_waiter *waiter) +{ + memset(waiter, 0x11, sizeof(*waiter)); + waiter->magic = waiter; + INIT_LIST_HEAD(&waiter->list); +} + +void debug_mutex_wake_waiter(struct mutex *lock, struct mutex_waiter *waiter) +{ + SMP_DEBUG_WARN_ON(!spin_is_locked(&lock->wait_lock)); + DEBUG_WARN_ON(list_empty(&lock->wait_list)); + DEBUG_WARN_ON(waiter->magic != waiter); + DEBUG_WARN_ON(list_empty(&waiter->list)); +} + +void debug_mutex_free_waiter(struct mutex_waiter *waiter) +{ + DEBUG_WARN_ON(!list_empty(&waiter->list)); + memset(waiter, 0x22, sizeof(*waiter)); +} + +void debug_mutex_add_waiter(struct mutex *lock, struct mutex_waiter *waiter, + struct thread_info *ti __IP_DECL__) +{ + SMP_DEBUG_WARN_ON(!spin_is_locked(&lock->wait_lock)); + check_deadlock(lock, 0, ti, ip); + /* Mark the current thread as blocked on the lock: */ + ti->task->blocked_on = waiter; + waiter->lock = lock; +} + +void mutex_remove_waiter(struct mutex *lock, struct mutex_waiter *waiter, + struct thread_info *ti) +{ + DEBUG_WARN_ON(list_empty(&waiter->list)); + DEBUG_WARN_ON(waiter->task != ti->task); + DEBUG_WARN_ON(ti->task->blocked_on != waiter); + ti->task->blocked_on = NULL; + + list_del_init(&waiter->list); + waiter->task = NULL; +} + +void debug_mutex_unlock(struct mutex *lock) +{ + DEBUG_WARN_ON(lock->magic != lock); + DEBUG_WARN_ON(!lock->wait_list.prev && !lock->wait_list.next); + DEBUG_WARN_ON(lock->owner != current_thread_info()); + if (debug_mutex_on) { + DEBUG_WARN_ON(list_empty(&lock->held_list)); + list_del_init(&lock->held_list); + } +} + +void debug_mutex_init(struct mutex *lock, const char *name) +{ + /* + * Make sure we are not reinitializing a held lock: + */ + mutex_debug_check_no_locks_freed((void *)lock, (void *)(lock + 1)); + lock->owner = NULL; + INIT_LIST_HEAD(&lock->held_list); + lock->name = name; + lock->magic = lock; +} + +/*** + * mutex_destroy - mark a mutex unusable + * @lock: the mutex to be destroyed + * + * This function marks the mutex uninitialized, and any subsequent + * use of the mutex is forbidden. The mutex must not be locked when + * this function is called. + */ +void fastcall mutex_destroy(struct mutex *lock) +{ + DEBUG_WARN_ON(mutex_is_locked(lock)); + lock->magic = NULL; +} + +EXPORT_SYMBOL_GPL(mutex_destroy); + diff --git a/kernel/mutex-debug.h b/kernel/mutex-debug.h new file mode 100644 index 000000000000..fd384050acb1 --- /dev/null +++ b/kernel/mutex-debug.h @@ -0,0 +1,134 @@ +/* + * Mutexes: blocking mutual exclusion locks + * + * started by Ingo Molnar: + * + * Copyright (C) 2004, 2005, 2006 Red Hat, Inc., Ingo Molnar + * + * This file contains mutex debugging related internal declarations, + * prototypes and inline functions, for the CONFIG_DEBUG_MUTEXES case. + * More details are in kernel/mutex-debug.c. + */ + +extern spinlock_t debug_mutex_lock; +extern struct list_head debug_mutex_held_locks; +extern int debug_mutex_on; + +/* + * In the debug case we carry the caller's instruction pointer into + * other functions, but we dont want the function argument overhead + * in the nondebug case - hence these macros: + */ +#define __IP_DECL__ , unsigned long ip +#define __IP__ , ip +#define __RET_IP__ , (unsigned long)__builtin_return_address(0) + +/* + * This must be called with lock->wait_lock held. + */ +extern void debug_mutex_set_owner(struct mutex *lock, + struct thread_info *new_owner __IP_DECL__); + +static inline void debug_mutex_clear_owner(struct mutex *lock) +{ + lock->owner = NULL; +} + +extern void debug_mutex_init_waiter(struct mutex_waiter *waiter); +extern void debug_mutex_wake_waiter(struct mutex *lock, + struct mutex_waiter *waiter); +extern void debug_mutex_free_waiter(struct mutex_waiter *waiter); +extern void debug_mutex_add_waiter(struct mutex *lock, + struct mutex_waiter *waiter, + struct thread_info *ti __IP_DECL__); +extern void mutex_remove_waiter(struct mutex *lock, struct mutex_waiter *waiter, + struct thread_info *ti); +extern void debug_mutex_unlock(struct mutex *lock); +extern void debug_mutex_init(struct mutex *lock, const char *name); + +#define debug_spin_lock(lock) \ + do { \ + local_irq_disable(); \ + if (debug_mutex_on) \ + spin_lock(lock); \ + } while (0) + +#define debug_spin_unlock(lock) \ + do { \ + if (debug_mutex_on) \ + spin_unlock(lock); \ + local_irq_enable(); \ + preempt_check_resched(); \ + } while (0) + +#define debug_spin_lock_save(lock, flags) \ + do { \ + local_irq_save(flags); \ + if (debug_mutex_on) \ + spin_lock(lock); \ + } while (0) + +#define debug_spin_lock_restore(lock, flags) \ + do { \ + if (debug_mutex_on) \ + spin_unlock(lock); \ + local_irq_restore(flags); \ + preempt_check_resched(); \ + } while (0) + +#define spin_lock_mutex(lock) \ + do { \ + struct mutex *l = container_of(lock, struct mutex, wait_lock); \ + \ + DEBUG_WARN_ON(in_interrupt()); \ + debug_spin_lock(&debug_mutex_lock); \ + spin_lock(lock); \ + DEBUG_WARN_ON(l->magic != l); \ + } while (0) + +#define spin_unlock_mutex(lock) \ + do { \ + spin_unlock(lock); \ + debug_spin_unlock(&debug_mutex_lock); \ + } while (0) + +#define DEBUG_OFF() \ +do { \ + if (debug_mutex_on) { \ + debug_mutex_on = 0; \ + console_verbose(); \ + if (spin_is_locked(&debug_mutex_lock)) \ + spin_unlock(&debug_mutex_lock); \ + } \ +} while (0) + +#define DEBUG_BUG() \ +do { \ + if (debug_mutex_on) { \ + DEBUG_OFF(); \ + BUG(); \ + } \ +} while (0) + +#define DEBUG_WARN_ON(c) \ +do { \ + if (unlikely(c && debug_mutex_on)) { \ + DEBUG_OFF(); \ + WARN_ON(1); \ + } \ +} while (0) + +# define DEBUG_BUG_ON(c) \ +do { \ + if (unlikely(c)) \ + DEBUG_BUG(); \ +} while (0) + +#ifdef CONFIG_SMP +# define SMP_DEBUG_WARN_ON(c) DEBUG_WARN_ON(c) +# define SMP_DEBUG_BUG_ON(c) DEBUG_BUG_ON(c) +#else +# define SMP_DEBUG_WARN_ON(c) do { } while (0) +# define SMP_DEBUG_BUG_ON(c) do { } while (0) +#endif + diff --git a/lib/Kconfig.debug b/lib/Kconfig.debug index c48260fb8fd9..1fcd856edec1 100644 --- a/lib/Kconfig.debug +++ b/lib/Kconfig.debug @@ -95,6 +95,14 @@ config DEBUG_PREEMPT if kernel code uses it in a preemption-unsafe way. Also, the kernel will detect preemption count underflows. +config DEBUG_MUTEXES + bool "Mutex debugging, deadlock detection" + default y + depends on DEBUG_KERNEL + help + This allows mutex semantics violations and mutex related deadlocks + (lockups) to be detected and reported automatically. + config DEBUG_SPINLOCK bool "Spinlock debugging" depends on DEBUG_KERNEL -- cgit v1.2.3-71-gd317 From de5097c2e73f826302cd8957c225b3725e0c7553 Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Mon, 9 Jan 2006 15:59:21 -0800 Subject: [PATCH] mutex subsystem, more debugging code more mutex debugging: check for held locks during memory freeing, task exit, enable sysrq printouts, etc. Signed-off-by: Ingo Molnar Signed-off-by: Arjan van de Ven --- arch/i386/mm/pageattr.c | 4 ++++ drivers/char/sysrq.c | 19 +++++++++++++++++++ include/linux/mm.h | 4 ++++ kernel/exit.c | 5 +++++ kernel/sched.c | 1 + mm/page_alloc.c | 3 +++ mm/slab.c | 1 + 7 files changed, 37 insertions(+) (limited to 'include/linux') diff --git a/arch/i386/mm/pageattr.c b/arch/i386/mm/pageattr.c index c30a16df6440..e8a53552b13d 100644 --- a/arch/i386/mm/pageattr.c +++ b/arch/i386/mm/pageattr.c @@ -222,6 +222,10 @@ void kernel_map_pages(struct page *page, int numpages, int enable) { if (PageHighMem(page)) return; + if (!enable) + mutex_debug_check_no_locks_freed(page_address(page), + page_address(page+numpages)); + /* the return value is ignored - the calls cannot fail, * large pages are disabled at boot time. */ diff --git a/drivers/char/sysrq.c b/drivers/char/sysrq.c index 145275ebdd7e..5765f672e853 100644 --- a/drivers/char/sysrq.c +++ b/drivers/char/sysrq.c @@ -153,6 +153,21 @@ static struct sysrq_key_op sysrq_mountro_op = { /* END SYNC SYSRQ HANDLERS BLOCK */ +#ifdef CONFIG_DEBUG_MUTEXES + +static void +sysrq_handle_showlocks(int key, struct pt_regs *pt_regs, struct tty_struct *tty) +{ + mutex_debug_show_all_locks(); +} + +static struct sysrq_key_op sysrq_showlocks_op = { + .handler = sysrq_handle_showlocks, + .help_msg = "show-all-locks(D)", + .action_msg = "Show Locks Held", +}; + +#endif /* SHOW SYSRQ HANDLERS BLOCK */ @@ -294,7 +309,11 @@ static struct sysrq_key_op *sysrq_key_table[SYSRQ_KEY_TABLE_LENGTH] = { #else /* c */ NULL, #endif +#ifdef CONFIG_DEBUG_MUTEXES +/* d */ &sysrq_showlocks_op, +#else /* d */ NULL, +#endif /* e */ &sysrq_term_op, /* f */ &sysrq_moom_op, /* g */ NULL, diff --git a/include/linux/mm.h b/include/linux/mm.h index df80e63903b5..3f1fafc0245e 100644 --- a/include/linux/mm.h +++ b/include/linux/mm.h @@ -13,6 +13,7 @@ #include #include #include +#include struct mempolicy; struct anon_vma; @@ -1024,6 +1025,9 @@ static inline void vm_stat_account(struct mm_struct *mm, static inline void kernel_map_pages(struct page *page, int numpages, int enable) { + if (!PageHighMem(page) && !enable) + mutex_debug_check_no_locks_freed(page_address(page), + page_address(page + numpages)); } #endif diff --git a/kernel/exit.c b/kernel/exit.c index caceabf3f230..309a46fa16f8 100644 --- a/kernel/exit.c +++ b/kernel/exit.c @@ -29,6 +29,7 @@ #include #include #include +#include #include #include @@ -869,6 +870,10 @@ fastcall NORET_TYPE void do_exit(long code) mpol_free(tsk->mempolicy); tsk->mempolicy = NULL; #endif + /* + * If DEBUG_MUTEXES is on, make sure we are holding no locks: + */ + mutex_debug_check_no_locks_held(tsk); /* PF_DEAD causes final put_task_struct after we schedule. */ preempt_disable(); diff --git a/kernel/sched.c b/kernel/sched.c index 92733091154c..34a945bcc022 100644 --- a/kernel/sched.c +++ b/kernel/sched.c @@ -4386,6 +4386,7 @@ void show_state(void) } while_each_thread(g, p); read_unlock(&tasklist_lock); + mutex_debug_show_all_locks(); } /** diff --git a/mm/page_alloc.c b/mm/page_alloc.c index e0e84924171b..a5e6891f7bb6 100644 --- a/mm/page_alloc.c +++ b/mm/page_alloc.c @@ -415,6 +415,9 @@ static void __free_pages_ok(struct page *page, unsigned int order) int reserved = 0; arch_free_page(page, order); + if (!PageHighMem(page)) + mutex_debug_check_no_locks_freed(page_address(page), + page_address(page+(1< Date: Mon, 9 Jan 2006 15:59:24 -0800 Subject: [PATCH] mutex subsystem, semaphore to mutex: VFS, ->i_sem This patch converts the inode semaphore to a mutex. I have tested it on XFS and compiled as much as one can consider on an ia64. Anyway your luck with it might be different. Modified-by: Ingo Molnar (finished the conversion) Signed-off-by: Jes Sorensen Signed-off-by: Ingo Molnar --- arch/powerpc/platforms/cell/spufs/inode.c | 12 ++--- drivers/block/loop.c | 4 +- drivers/char/mem.c | 4 +- drivers/isdn/capi/capifs.c | 6 +-- drivers/md/dm.c | 4 +- drivers/md/md.c | 8 +-- drivers/pci/proc.c | 4 +- drivers/usb/core/inode.c | 28 +++++------ drivers/usb/gadget/file_storage.c | 4 +- drivers/usb/gadget/inode.c | 4 +- fs/affs/inode.c | 4 +- fs/autofs/root.c | 4 +- fs/autofs4/root.c | 4 +- fs/binfmt_misc.c | 12 ++--- fs/block_dev.c | 4 +- fs/buffer.c | 6 +-- fs/cifs/cifsfs.c | 6 +-- fs/cifs/inode.c | 8 +-- fs/coda/dir.c | 4 +- fs/coda/file.c | 8 +-- fs/configfs/dir.c | 54 ++++++++++---------- fs/configfs/file.c | 4 +- fs/configfs/inode.c | 6 +-- fs/debugfs/inode.c | 8 +-- fs/devfs/base.c | 22 ++++----- fs/devpts/inode.c | 8 +-- fs/direct-io.c | 30 +++++------ fs/dquot.c | 16 +++--- fs/exportfs/expfs.c | 12 ++--- fs/ext2/acl.c | 10 ++-- fs/ext2/ext2.h | 2 +- fs/ext2/super.c | 4 +- fs/ext2/xattr.c | 2 +- fs/ext3/acl.c | 10 ++-- fs/ext3/super.c | 4 +- fs/ext3/xattr.c | 2 +- fs/fat/dir.c | 4 +- fs/fat/file.c | 4 +- fs/fifo.c | 6 +-- fs/fuse/file.c | 4 +- fs/hfs/inode.c | 4 +- fs/hfsplus/bitmap.c | 8 +-- fs/hfsplus/inode.c | 4 +- fs/hpfs/dir.c | 6 +-- fs/hppfs/hppfs_kern.c | 6 +-- fs/hugetlbfs/inode.c | 4 +- fs/inode.c | 2 +- fs/jffs/inode-v23.c | 2 +- fs/jfs/jfs_incore.h | 4 +- fs/libfs.c | 8 +-- fs/namei.c | 82 +++++++++++++++---------------- fs/namespace.c | 12 ++--- fs/nfs/dir.c | 10 ++-- fs/nfsd/nfs4recover.c | 20 ++++---- fs/nfsd/vfs.c | 12 ++--- fs/ntfs/attrib.c | 4 +- fs/ntfs/dir.c | 8 +-- fs/ntfs/file.c | 18 +++---- fs/ntfs/index.c | 6 +-- fs/ntfs/inode.c | 8 +-- fs/ntfs/namei.c | 6 +-- fs/ntfs/quota.c | 6 +-- fs/ntfs/super.c | 16 +++--- fs/ocfs2/alloc.c | 24 ++++----- fs/ocfs2/cluster/nodemanager.c | 2 +- fs/ocfs2/dir.c | 4 +- fs/ocfs2/file.c | 8 +-- fs/ocfs2/inode.c | 12 ++--- fs/ocfs2/journal.c | 14 +++--- fs/ocfs2/localalloc.c | 6 +-- fs/open.c | 24 ++++----- fs/pipe.c | 44 ++++++++--------- fs/quota.c | 6 +-- fs/read_write.c | 4 +- fs/readdir.c | 4 +- fs/reiserfs/file.c | 10 ++-- fs/reiserfs/inode.c | 14 +++--- fs/reiserfs/ioctl.c | 4 +- fs/reiserfs/super.c | 4 +- fs/reiserfs/tail_conversion.c | 2 +- fs/reiserfs/xattr.c | 34 ++++++------- fs/reiserfs/xattr_acl.c | 6 +-- fs/relayfs/inode.c | 12 ++--- fs/sysfs/dir.c | 31 ++++++------ fs/sysfs/file.c | 17 +++---- fs/sysfs/inode.c | 8 ++- fs/sysfs/symlink.c | 5 +- fs/ufs/super.c | 6 +-- fs/xattr.c | 8 +-- fs/xfs/linux-2.6/xfs_iops.c | 2 +- fs/xfs/linux-2.6/xfs_lrw.c | 18 +++---- fs/xfs/xfs_dmapi.h | 14 +++--- include/linux/ext3_fs_i.h | 2 +- include/linux/fs.h | 7 +-- include/linux/jffs2_fs_i.h | 4 +- include/linux/nfsd/nfsfh.h | 6 +-- include/linux/pipe_fs_i.h | 2 +- include/linux/reiserfs_fs.h | 2 +- ipc/mqueue.c | 8 +-- kernel/cpuset.c | 10 ++-- mm/filemap.c | 30 +++++------ mm/filemap_xip.c | 6 +-- mm/memory.c | 4 +- mm/msync.c | 2 +- mm/rmap.c | 8 +-- mm/shmem.c | 6 +-- mm/swapfile.c | 8 +-- mm/truncate.c | 2 +- net/sunrpc/rpc_pipe.c | 58 +++++++++++----------- net/unix/af_unix.c | 4 +- security/inode.c | 8 +-- sound/core/oss/pcm_oss.c | 2 - sound/core/seq/seq_memory.c | 4 -- 113 files changed, 563 insertions(+), 573 deletions(-) (limited to 'include/linux') diff --git a/arch/powerpc/platforms/cell/spufs/inode.c b/arch/powerpc/platforms/cell/spufs/inode.c index 1f3507c75e90..d2ba358c6e38 100644 --- a/arch/powerpc/platforms/cell/spufs/inode.c +++ b/arch/powerpc/platforms/cell/spufs/inode.c @@ -137,7 +137,7 @@ spufs_delete_inode(struct inode *inode) static void spufs_prune_dir(struct dentry *dir) { struct dentry *dentry, *tmp; - down(&dir->d_inode->i_sem); + mutex_lock(&dir->d_inode->i_mutex); list_for_each_entry_safe(dentry, tmp, &dir->d_subdirs, d_child) { spin_lock(&dcache_lock); spin_lock(&dentry->d_lock); @@ -154,7 +154,7 @@ static void spufs_prune_dir(struct dentry *dir) } } shrink_dcache_parent(dir); - up(&dir->d_inode->i_sem); + mutex_unlock(&dir->d_inode->i_mutex); } static int spufs_rmdir(struct inode *root, struct dentry *dir_dentry) @@ -162,15 +162,15 @@ static int spufs_rmdir(struct inode *root, struct dentry *dir_dentry) struct spu_context *ctx; /* remove all entries */ - down(&root->i_sem); + mutex_lock(&root->i_mutex); spufs_prune_dir(dir_dentry); - up(&root->i_sem); + mutex_unlock(&root->i_mutex); /* We have to give up the mm_struct */ ctx = SPUFS_I(dir_dentry->d_inode)->i_ctx; spu_forget(ctx); - /* XXX Do we need to hold i_sem here ? */ + /* XXX Do we need to hold i_mutex here ? */ return simple_rmdir(root, dir_dentry); } @@ -330,7 +330,7 @@ long spufs_create_thread(struct nameidata *nd, out_dput: dput(dentry); out_dir: - up(&nd->dentry->d_inode->i_sem); + mutex_unlock(&nd->dentry->d_inode->i_mutex); out: return ret; } diff --git a/drivers/block/loop.c b/drivers/block/loop.c index a452b13620a2..bed9ad76c04c 100644 --- a/drivers/block/loop.c +++ b/drivers/block/loop.c @@ -215,7 +215,7 @@ static int do_lo_send_aops(struct loop_device *lo, struct bio_vec *bvec, unsigned offset, bv_offs; int len, ret; - down(&mapping->host->i_sem); + mutex_lock(&mapping->host->i_mutex); index = pos >> PAGE_CACHE_SHIFT; offset = pos & ((pgoff_t)PAGE_CACHE_SIZE - 1); bv_offs = bvec->bv_offset; @@ -278,7 +278,7 @@ static int do_lo_send_aops(struct loop_device *lo, struct bio_vec *bvec, } ret = 0; out: - up(&mapping->host->i_sem); + mutex_unlock(&mapping->host->i_mutex); return ret; unlock: unlock_page(page); diff --git a/drivers/char/mem.c b/drivers/char/mem.c index 5b2d18035073..704c3c07f0ab 100644 --- a/drivers/char/mem.c +++ b/drivers/char/mem.c @@ -741,7 +741,7 @@ static loff_t memory_lseek(struct file * file, loff_t offset, int orig) { loff_t ret; - down(&file->f_dentry->d_inode->i_sem); + mutex_lock(&file->f_dentry->d_inode->i_mutex); switch (orig) { case 0: file->f_pos = offset; @@ -756,7 +756,7 @@ static loff_t memory_lseek(struct file * file, loff_t offset, int orig) default: ret = -EINVAL; } - up(&file->f_dentry->d_inode->i_sem); + mutex_unlock(&file->f_dentry->d_inode->i_mutex); return ret; } diff --git a/drivers/isdn/capi/capifs.c b/drivers/isdn/capi/capifs.c index 207cae366256..0a37aded4b54 100644 --- a/drivers/isdn/capi/capifs.c +++ b/drivers/isdn/capi/capifs.c @@ -138,7 +138,7 @@ static struct dentry *get_node(int num) { char s[10]; struct dentry *root = capifs_root; - down(&root->d_inode->i_sem); + mutex_lock(&root->d_inode->i_mutex); return lookup_one_len(s, root, sprintf(s, "%d", num)); } @@ -159,7 +159,7 @@ void capifs_new_ncci(unsigned int number, dev_t device) dentry = get_node(number); if (!IS_ERR(dentry) && !dentry->d_inode) d_instantiate(dentry, inode); - up(&capifs_root->d_inode->i_sem); + mutex_unlock(&capifs_root->d_inode->i_mutex); } void capifs_free_ncci(unsigned int number) @@ -175,7 +175,7 @@ void capifs_free_ncci(unsigned int number) } dput(dentry); } - up(&capifs_root->d_inode->i_sem); + mutex_unlock(&capifs_root->d_inode->i_mutex); } static int __init capifs_init(void) diff --git a/drivers/md/dm.c b/drivers/md/dm.c index 0e481512f918..5c210b0a4cb0 100644 --- a/drivers/md/dm.c +++ b/drivers/md/dm.c @@ -837,9 +837,9 @@ static void __set_size(struct mapped_device *md, sector_t size) { set_capacity(md->disk, size); - down(&md->suspended_bdev->bd_inode->i_sem); + mutex_lock(&md->suspended_bdev->bd_inode->i_mutex); i_size_write(md->suspended_bdev->bd_inode, (loff_t)size << SECTOR_SHIFT); - up(&md->suspended_bdev->bd_inode->i_sem); + mutex_unlock(&md->suspended_bdev->bd_inode->i_mutex); } static int __bind(struct mapped_device *md, struct dm_table *t) diff --git a/drivers/md/md.c b/drivers/md/md.c index e423a16ba3c9..0302723fa21f 100644 --- a/drivers/md/md.c +++ b/drivers/md/md.c @@ -3460,9 +3460,9 @@ static int update_size(mddev_t *mddev, unsigned long size) bdev = bdget_disk(mddev->gendisk, 0); if (bdev) { - down(&bdev->bd_inode->i_sem); + mutex_lock(&bdev->bd_inode->i_mutex); i_size_write(bdev->bd_inode, mddev->array_size << 10); - up(&bdev->bd_inode->i_sem); + mutex_unlock(&bdev->bd_inode->i_mutex); bdput(bdev); } } @@ -3486,9 +3486,9 @@ static int update_raid_disks(mddev_t *mddev, int raid_disks) bdev = bdget_disk(mddev->gendisk, 0); if (bdev) { - down(&bdev->bd_inode->i_sem); + mutex_lock(&bdev->bd_inode->i_mutex); i_size_write(bdev->bd_inode, mddev->array_size << 10); - up(&bdev->bd_inode->i_sem); + mutex_unlock(&bdev->bd_inode->i_mutex); bdput(bdev); } } diff --git a/drivers/pci/proc.c b/drivers/pci/proc.c index 9eb465727fce..9cb6dd0834be 100644 --- a/drivers/pci/proc.c +++ b/drivers/pci/proc.c @@ -25,7 +25,7 @@ proc_bus_pci_lseek(struct file *file, loff_t off, int whence) loff_t new = -1; struct inode *inode = file->f_dentry->d_inode; - down(&inode->i_sem); + mutex_lock(&inode->i_mutex); switch (whence) { case 0: new = off; @@ -41,7 +41,7 @@ proc_bus_pci_lseek(struct file *file, loff_t off, int whence) new = -EINVAL; else file->f_pos = new; - up(&inode->i_sem); + mutex_unlock(&inode->i_mutex); return new; } diff --git a/drivers/usb/core/inode.c b/drivers/usb/core/inode.c index 4ddc453023a2..3cf945cc5b9a 100644 --- a/drivers/usb/core/inode.c +++ b/drivers/usb/core/inode.c @@ -184,13 +184,13 @@ static void update_bus(struct dentry *bus) bus->d_inode->i_gid = busgid; bus->d_inode->i_mode = S_IFDIR | busmode; - down(&bus->d_inode->i_sem); + mutex_lock(&bus->d_inode->i_mutex); list_for_each_entry(dev, &bus->d_subdirs, d_u.d_child) if (dev->d_inode) update_dev(dev); - up(&bus->d_inode->i_sem); + mutex_unlock(&bus->d_inode->i_mutex); } static void update_sb(struct super_block *sb) @@ -201,7 +201,7 @@ static void update_sb(struct super_block *sb) if (!root) return; - down(&root->d_inode->i_sem); + mutex_lock(&root->d_inode->i_mutex); list_for_each_entry(bus, &root->d_subdirs, d_u.d_child) { if (bus->d_inode) { @@ -219,7 +219,7 @@ static void update_sb(struct super_block *sb) } } - up(&root->d_inode->i_sem); + mutex_unlock(&root->d_inode->i_mutex); } static int remount(struct super_block *sb, int *flags, char *data) @@ -333,10 +333,10 @@ static int usbfs_empty (struct dentry *dentry) static int usbfs_unlink (struct inode *dir, struct dentry *dentry) { struct inode *inode = dentry->d_inode; - down(&inode->i_sem); + mutex_lock(&inode->i_mutex); dentry->d_inode->i_nlink--; dput(dentry); - up(&inode->i_sem); + mutex_unlock(&inode->i_mutex); d_delete(dentry); return 0; } @@ -346,7 +346,7 @@ static int usbfs_rmdir(struct inode *dir, struct dentry *dentry) int error = -ENOTEMPTY; struct inode * inode = dentry->d_inode; - down(&inode->i_sem); + mutex_lock(&inode->i_mutex); dentry_unhash(dentry); if (usbfs_empty(dentry)) { dentry->d_inode->i_nlink -= 2; @@ -355,7 +355,7 @@ static int usbfs_rmdir(struct inode *dir, struct dentry *dentry) dir->i_nlink--; error = 0; } - up(&inode->i_sem); + mutex_unlock(&inode->i_mutex); if (!error) d_delete(dentry); dput(dentry); @@ -380,7 +380,7 @@ static loff_t default_file_lseek (struct file *file, loff_t offset, int orig) { loff_t retval = -EINVAL; - down(&file->f_dentry->d_inode->i_sem); + mutex_lock(&file->f_dentry->d_inode->i_mutex); switch(orig) { case 0: if (offset > 0) { @@ -397,7 +397,7 @@ static loff_t default_file_lseek (struct file *file, loff_t offset, int orig) default: break; } - up(&file->f_dentry->d_inode->i_sem); + mutex_unlock(&file->f_dentry->d_inode->i_mutex); return retval; } @@ -480,7 +480,7 @@ static int fs_create_by_name (const char *name, mode_t mode, } *dentry = NULL; - down(&parent->d_inode->i_sem); + mutex_lock(&parent->d_inode->i_mutex); *dentry = lookup_one_len(name, parent, strlen(name)); if (!IS_ERR(dentry)) { if ((mode & S_IFMT) == S_IFDIR) @@ -489,7 +489,7 @@ static int fs_create_by_name (const char *name, mode_t mode, error = usbfs_create (parent->d_inode, *dentry, mode); } else error = PTR_ERR(dentry); - up(&parent->d_inode->i_sem); + mutex_unlock(&parent->d_inode->i_mutex); return error; } @@ -528,7 +528,7 @@ static void fs_remove_file (struct dentry *dentry) if (!parent || !parent->d_inode) return; - down(&parent->d_inode->i_sem); + mutex_lock(&parent->d_inode->i_mutex); if (usbfs_positive(dentry)) { if (dentry->d_inode) { if (S_ISDIR(dentry->d_inode->i_mode)) @@ -538,7 +538,7 @@ static void fs_remove_file (struct dentry *dentry) dput(dentry); } } - up(&parent->d_inode->i_sem); + mutex_unlock(&parent->d_inode->i_mutex); } /* --------------------------------------------------------------------- */ diff --git a/drivers/usb/gadget/file_storage.c b/drivers/usb/gadget/file_storage.c index 0cea9782d7d4..de59c58896d6 100644 --- a/drivers/usb/gadget/file_storage.c +++ b/drivers/usb/gadget/file_storage.c @@ -1891,7 +1891,7 @@ static int fsync_sub(struct lun *curlun) return -EINVAL; inode = filp->f_dentry->d_inode; - down(&inode->i_sem); + mutex_lock(&inode->i_mutex); current->flags |= PF_SYNCWRITE; rc = filemap_fdatawrite(inode->i_mapping); err = filp->f_op->fsync(filp, filp->f_dentry, 1); @@ -1901,7 +1901,7 @@ static int fsync_sub(struct lun *curlun) if (!rc) rc = err; current->flags &= ~PF_SYNCWRITE; - up(&inode->i_sem); + mutex_unlock(&inode->i_mutex); VLDBG(curlun, "fdatasync -> %d\n", rc); return rc; } diff --git a/drivers/usb/gadget/inode.c b/drivers/usb/gadget/inode.c index 5c40980a5bd9..c6c279de832e 100644 --- a/drivers/usb/gadget/inode.c +++ b/drivers/usb/gadget/inode.c @@ -1562,10 +1562,10 @@ restart: spin_unlock_irq (&dev->lock); /* break link to dcache */ - down (&parent->i_sem); + mutex_lock (&parent->i_mutex); d_delete (dentry); dput (dentry); - up (&parent->i_sem); + mutex_unlock (&parent->i_mutex); /* fds may still be open */ goto restart; diff --git a/fs/affs/inode.c b/fs/affs/inode.c index 9ebe881c6786..44d439cb69f4 100644 --- a/fs/affs/inode.c +++ b/fs/affs/inode.c @@ -244,10 +244,10 @@ affs_put_inode(struct inode *inode) pr_debug("AFFS: put_inode(ino=%lu, nlink=%u)\n", inode->i_ino, inode->i_nlink); affs_free_prealloc(inode); if (atomic_read(&inode->i_count) == 1) { - down(&inode->i_sem); + mutex_lock(&inode->i_mutex); if (inode->i_size != AFFS_I(inode)->mmu_private) affs_truncate(inode); - up(&inode->i_sem); + mutex_unlock(&inode->i_mutex); } } diff --git a/fs/autofs/root.c b/fs/autofs/root.c index a1ab1c0ed215..808134a5a2fa 100644 --- a/fs/autofs/root.c +++ b/fs/autofs/root.c @@ -229,9 +229,9 @@ static struct dentry *autofs_root_lookup(struct inode *dir, struct dentry *dentr dentry->d_flags |= DCACHE_AUTOFS_PENDING; d_add(dentry, NULL); - up(&dir->i_sem); + mutex_unlock(&dir->i_mutex); autofs_revalidate(dentry, nd); - down(&dir->i_sem); + mutex_lock(&dir->i_mutex); /* * If we are still pending, check if we had to handle diff --git a/fs/autofs4/root.c b/fs/autofs4/root.c index 2241405ffc41..541b19e6fec9 100644 --- a/fs/autofs4/root.c +++ b/fs/autofs4/root.c @@ -489,9 +489,9 @@ static struct dentry *autofs4_lookup(struct inode *dir, struct dentry *dentry, s d_add(dentry, NULL); if (dentry->d_op && dentry->d_op->d_revalidate) { - up(&dir->i_sem); + mutex_unlock(&dir->i_mutex); (dentry->d_op->d_revalidate)(dentry, nd); - down(&dir->i_sem); + mutex_lock(&dir->i_mutex); } /* diff --git a/fs/binfmt_misc.c b/fs/binfmt_misc.c index 2568eb41cb3a..9ccc7d8275b8 100644 --- a/fs/binfmt_misc.c +++ b/fs/binfmt_misc.c @@ -588,11 +588,11 @@ static ssize_t bm_entry_write(struct file *file, const char __user *buffer, case 2: set_bit(Enabled, &e->flags); break; case 3: root = dget(file->f_vfsmnt->mnt_sb->s_root); - down(&root->d_inode->i_sem); + mutex_lock(&root->d_inode->i_mutex); kill_node(e); - up(&root->d_inode->i_sem); + mutex_unlock(&root->d_inode->i_mutex); dput(root); break; default: return res; @@ -622,7 +622,7 @@ static ssize_t bm_register_write(struct file *file, const char __user *buffer, return PTR_ERR(e); root = dget(sb->s_root); - down(&root->d_inode->i_sem); + mutex_lock(&root->d_inode->i_mutex); dentry = lookup_one_len(e->name, root, strlen(e->name)); err = PTR_ERR(dentry); if (IS_ERR(dentry)) @@ -658,7 +658,7 @@ static ssize_t bm_register_write(struct file *file, const char __user *buffer, out2: dput(dentry); out: - up(&root->d_inode->i_sem); + mutex_unlock(&root->d_inode->i_mutex); dput(root); if (err) { @@ -703,12 +703,12 @@ static ssize_t bm_status_write(struct file * file, const char __user * buffer, case 1: enabled = 0; break; case 2: enabled = 1; break; case 3: root = dget(file->f_vfsmnt->mnt_sb->s_root); - down(&root->d_inode->i_sem); + mutex_lock(&root->d_inode->i_mutex); while (!list_empty(&entries)) kill_node(list_entry(entries.next, Node, list)); - up(&root->d_inode->i_sem); + mutex_unlock(&root->d_inode->i_mutex); dput(root); default: return res; } diff --git a/fs/block_dev.c b/fs/block_dev.c index e0df94c37b7e..6e50346fb1ee 100644 --- a/fs/block_dev.c +++ b/fs/block_dev.c @@ -202,7 +202,7 @@ static loff_t block_llseek(struct file *file, loff_t offset, int origin) loff_t size; loff_t retval; - down(&bd_inode->i_sem); + mutex_lock(&bd_inode->i_mutex); size = i_size_read(bd_inode); switch (origin) { @@ -219,7 +219,7 @@ static loff_t block_llseek(struct file *file, loff_t offset, int origin) } retval = offset; } - up(&bd_inode->i_sem); + mutex_unlock(&bd_inode->i_mutex); return retval; } diff --git a/fs/buffer.c b/fs/buffer.c index 55f0975a9b15..6466bc8a3dc7 100644 --- a/fs/buffer.c +++ b/fs/buffer.c @@ -352,11 +352,11 @@ static long do_fsync(unsigned int fd, int datasync) * We need to protect against concurrent writers, * which could cause livelocks in fsync_buffers_list */ - down(&mapping->host->i_sem); + mutex_lock(&mapping->host->i_mutex); err = file->f_op->fsync(file, file->f_dentry, datasync); if (!ret) ret = err; - up(&mapping->host->i_sem); + mutex_unlock(&mapping->host->i_mutex); err = filemap_fdatawait(mapping); if (!ret) ret = err; @@ -2338,7 +2338,7 @@ int generic_commit_write(struct file *file, struct page *page, __block_commit_write(inode,page,from,to); /* * No need to use i_size_read() here, the i_size - * cannot change under us because we hold i_sem. + * cannot change under us because we hold i_mutex. */ if (pos > inode->i_size) { i_size_write(inode, pos); diff --git a/fs/cifs/cifsfs.c b/fs/cifs/cifsfs.c index 2a13a2bac8f1..e10213b7541e 100644 --- a/fs/cifs/cifsfs.c +++ b/fs/cifs/cifsfs.c @@ -860,9 +860,9 @@ static int cifs_oplock_thread(void * dummyarg) DeleteOplockQEntry(oplock_item); /* can not grab inode sem here since it would deadlock when oplock received on delete - since vfs_unlink holds the i_sem across + since vfs_unlink holds the i_mutex across the call */ - /* down(&inode->i_sem);*/ + /* mutex_lock(&inode->i_mutex);*/ if (S_ISREG(inode->i_mode)) { rc = filemap_fdatawrite(inode->i_mapping); if(CIFS_I(inode)->clientCanCacheRead == 0) { @@ -871,7 +871,7 @@ static int cifs_oplock_thread(void * dummyarg) } } else rc = 0; - /* up(&inode->i_sem);*/ + /* mutex_unlock(&inode->i_mutex);*/ if (rc) CIFS_I(inode)->write_behind_rc = rc; cFYI(1,("Oplock flush inode %p rc %d",inode,rc)); diff --git a/fs/cifs/inode.c b/fs/cifs/inode.c index 9558f51bca55..3ebce9430f4a 100644 --- a/fs/cifs/inode.c +++ b/fs/cifs/inode.c @@ -1040,9 +1040,9 @@ int cifs_revalidate(struct dentry *direntry) } /* can not grab this sem since kernel filesys locking documentation - indicates i_sem may be taken by the kernel on lookup and rename - which could deadlock if we grab the i_sem here as well */ -/* down(&direntry->d_inode->i_sem);*/ + indicates i_mutex may be taken by the kernel on lookup and rename + which could deadlock if we grab the i_mutex here as well */ +/* mutex_lock(&direntry->d_inode->i_mutex);*/ /* need to write out dirty pages here */ if (direntry->d_inode->i_mapping) { /* do we need to lock inode until after invalidate completes @@ -1066,7 +1066,7 @@ int cifs_revalidate(struct dentry *direntry) } } } -/* up(&direntry->d_inode->i_sem); */ +/* mutex_unlock(&direntry->d_inode->i_mutex); */ kfree(full_path); FreeXid(xid); diff --git a/fs/coda/dir.c b/fs/coda/dir.c index 2391766e9c7c..8f1a517f8b4e 100644 --- a/fs/coda/dir.c +++ b/fs/coda/dir.c @@ -453,7 +453,7 @@ int coda_readdir(struct file *coda_file, void *dirent, filldir_t filldir) coda_vfs_stat.readdir++; host_inode = host_file->f_dentry->d_inode; - down(&host_inode->i_sem); + mutex_lock(&host_inode->i_mutex); host_file->f_pos = coda_file->f_pos; if (!host_file->f_op->readdir) { @@ -475,7 +475,7 @@ int coda_readdir(struct file *coda_file, void *dirent, filldir_t filldir) } out: coda_file->f_pos = host_file->f_pos; - up(&host_inode->i_sem); + mutex_unlock(&host_inode->i_mutex); return ret; } diff --git a/fs/coda/file.c b/fs/coda/file.c index e6bc022568f3..30b4630bd735 100644 --- a/fs/coda/file.c +++ b/fs/coda/file.c @@ -77,14 +77,14 @@ coda_file_write(struct file *coda_file, const char __user *buf, size_t count, lo return -EINVAL; host_inode = host_file->f_dentry->d_inode; - down(&coda_inode->i_sem); + mutex_lock(&coda_inode->i_mutex); ret = host_file->f_op->write(host_file, buf, count, ppos); coda_inode->i_size = host_inode->i_size; coda_inode->i_blocks = (coda_inode->i_size + 511) >> 9; coda_inode->i_mtime = coda_inode->i_ctime = CURRENT_TIME_SEC; - up(&coda_inode->i_sem); + mutex_unlock(&coda_inode->i_mutex); return ret; } @@ -272,9 +272,9 @@ int coda_fsync(struct file *coda_file, struct dentry *coda_dentry, int datasync) if (host_file->f_op && host_file->f_op->fsync) { host_dentry = host_file->f_dentry; host_inode = host_dentry->d_inode; - down(&host_inode->i_sem); + mutex_lock(&host_inode->i_mutex); err = host_file->f_op->fsync(host_file, host_dentry, datasync); - up(&host_inode->i_sem); + mutex_unlock(&host_inode->i_mutex); } if ( !err && !datasync ) { diff --git a/fs/configfs/dir.c b/fs/configfs/dir.c index e48b539243a1..b668ec61527e 100644 --- a/fs/configfs/dir.c +++ b/fs/configfs/dir.c @@ -288,10 +288,10 @@ static struct dentry * configfs_lookup(struct inode *dir, /* * Only subdirectories count here. Files (CONFIGFS_NOT_PINNED) are - * attributes and are removed by rmdir(). We recurse, taking i_sem + * attributes and are removed by rmdir(). We recurse, taking i_mutex * on all children that are candidates for default detach. If the * result is clean, then configfs_detach_group() will handle dropping - * i_sem. If there is an error, the caller will clean up the i_sem + * i_mutex. If there is an error, the caller will clean up the i_mutex * holders via configfs_detach_rollback(). */ static int configfs_detach_prep(struct dentry *dentry) @@ -309,8 +309,8 @@ static int configfs_detach_prep(struct dentry *dentry) if (sd->s_type & CONFIGFS_NOT_PINNED) continue; if (sd->s_type & CONFIGFS_USET_DEFAULT) { - down(&sd->s_dentry->d_inode->i_sem); - /* Mark that we've taken i_sem */ + mutex_lock(&sd->s_dentry->d_inode->i_mutex); + /* Mark that we've taken i_mutex */ sd->s_type |= CONFIGFS_USET_DROPPING; ret = configfs_detach_prep(sd->s_dentry); @@ -327,7 +327,7 @@ out: } /* - * Walk the tree, dropping i_sem wherever CONFIGFS_USET_DROPPING is + * Walk the tree, dropping i_mutex wherever CONFIGFS_USET_DROPPING is * set. */ static void configfs_detach_rollback(struct dentry *dentry) @@ -341,7 +341,7 @@ static void configfs_detach_rollback(struct dentry *dentry) if (sd->s_type & CONFIGFS_USET_DROPPING) { sd->s_type &= ~CONFIGFS_USET_DROPPING; - up(&sd->s_dentry->d_inode->i_sem); + mutex_unlock(&sd->s_dentry->d_inode->i_mutex); } } } @@ -424,11 +424,11 @@ static void detach_groups(struct config_group *group) /* * From rmdir/unregister, a configfs_detach_prep() pass - * has taken our i_sem for us. Drop it. + * has taken our i_mutex for us. Drop it. * From mkdir/register cleanup, there is no sem held. */ if (sd->s_type & CONFIGFS_USET_DROPPING) - up(&child->d_inode->i_sem); + mutex_unlock(&child->d_inode->i_mutex); d_delete(child); dput(child); @@ -493,11 +493,11 @@ static int populate_groups(struct config_group *group) /* FYI, we're faking mkdir here * I'm not sure we need this semaphore, as we're called * from our parent's mkdir. That holds our parent's - * i_sem, so afaik lookup cannot continue through our + * i_mutex, so afaik lookup cannot continue through our * parent to find us, let alone mess with our tree. - * That said, taking our i_sem is closer to mkdir + * That said, taking our i_mutex is closer to mkdir * emulation, and shouldn't hurt. */ - down(&dentry->d_inode->i_sem); + mutex_lock(&dentry->d_inode->i_mutex); for (i = 0; group->default_groups[i]; i++) { new_group = group->default_groups[i]; @@ -507,7 +507,7 @@ static int populate_groups(struct config_group *group) break; } - up(&dentry->d_inode->i_sem); + mutex_unlock(&dentry->d_inode->i_mutex); } if (ret) @@ -856,7 +856,7 @@ int configfs_rename_dir(struct config_item * item, const char *new_name) down_write(&configfs_rename_sem); parent = item->parent->dentry; - down(&parent->d_inode->i_sem); + mutex_lock(&parent->d_inode->i_mutex); new_dentry = lookup_one_len(new_name, parent, strlen(new_name)); if (!IS_ERR(new_dentry)) { @@ -872,7 +872,7 @@ int configfs_rename_dir(struct config_item * item, const char *new_name) error = -EEXIST; dput(new_dentry); } - up(&parent->d_inode->i_sem); + mutex_unlock(&parent->d_inode->i_mutex); up_write(&configfs_rename_sem); return error; @@ -884,9 +884,9 @@ static int configfs_dir_open(struct inode *inode, struct file *file) struct dentry * dentry = file->f_dentry; struct configfs_dirent * parent_sd = dentry->d_fsdata; - down(&dentry->d_inode->i_sem); + mutex_lock(&dentry->d_inode->i_mutex); file->private_data = configfs_new_dirent(parent_sd, NULL); - up(&dentry->d_inode->i_sem); + mutex_unlock(&dentry->d_inode->i_mutex); return file->private_data ? 0 : -ENOMEM; @@ -897,9 +897,9 @@ static int configfs_dir_close(struct inode *inode, struct file *file) struct dentry * dentry = file->f_dentry; struct configfs_dirent * cursor = file->private_data; - down(&dentry->d_inode->i_sem); + mutex_lock(&dentry->d_inode->i_mutex); list_del_init(&cursor->s_sibling); - up(&dentry->d_inode->i_sem); + mutex_unlock(&dentry->d_inode->i_mutex); release_configfs_dirent(cursor); @@ -975,7 +975,7 @@ static loff_t configfs_dir_lseek(struct file * file, loff_t offset, int origin) { struct dentry * dentry = file->f_dentry; - down(&dentry->d_inode->i_sem); + mutex_lock(&dentry->d_inode->i_mutex); switch (origin) { case 1: offset += file->f_pos; @@ -983,7 +983,7 @@ static loff_t configfs_dir_lseek(struct file * file, loff_t offset, int origin) if (offset >= 0) break; default: - up(&file->f_dentry->d_inode->i_sem); + mutex_unlock(&file->f_dentry->d_inode->i_mutex); return -EINVAL; } if (offset != file->f_pos) { @@ -1007,7 +1007,7 @@ static loff_t configfs_dir_lseek(struct file * file, loff_t offset, int origin) list_add_tail(&cursor->s_sibling, p); } } - up(&dentry->d_inode->i_sem); + mutex_unlock(&dentry->d_inode->i_mutex); return offset; } @@ -1037,7 +1037,7 @@ int configfs_register_subsystem(struct configfs_subsystem *subsys) sd = configfs_sb->s_root->d_fsdata; link_group(to_config_group(sd->s_element), group); - down(&configfs_sb->s_root->d_inode->i_sem); + mutex_lock(&configfs_sb->s_root->d_inode->i_mutex); name.name = group->cg_item.ci_name; name.len = strlen(name.name); @@ -1057,7 +1057,7 @@ int configfs_register_subsystem(struct configfs_subsystem *subsys) else d_delete(dentry); - up(&configfs_sb->s_root->d_inode->i_sem); + mutex_unlock(&configfs_sb->s_root->d_inode->i_mutex); if (dentry) { dput(dentry); @@ -1079,18 +1079,18 @@ void configfs_unregister_subsystem(struct configfs_subsystem *subsys) return; } - down(&configfs_sb->s_root->d_inode->i_sem); - down(&dentry->d_inode->i_sem); + mutex_lock(&configfs_sb->s_root->d_inode->i_mutex); + mutex_lock(&dentry->d_inode->i_mutex); if (configfs_detach_prep(dentry)) { printk(KERN_ERR "configfs: Tried to unregister non-empty subsystem!\n"); } configfs_detach_group(&group->cg_item); dentry->d_inode->i_flags |= S_DEAD; - up(&dentry->d_inode->i_sem); + mutex_unlock(&dentry->d_inode->i_mutex); d_delete(dentry); - up(&configfs_sb->s_root->d_inode->i_sem); + mutex_unlock(&configfs_sb->s_root->d_inode->i_mutex); dput(dentry); diff --git a/fs/configfs/file.c b/fs/configfs/file.c index af1ffc9a15c0..c26cd61f13af 100644 --- a/fs/configfs/file.c +++ b/fs/configfs/file.c @@ -336,9 +336,9 @@ int configfs_add_file(struct dentry * dir, const struct configfs_attribute * att umode_t mode = (attr->ca_mode & S_IALLUGO) | S_IFREG; int error = 0; - down(&dir->d_inode->i_sem); + mutex_lock(&dir->d_inode->i_mutex); error = configfs_make_dirent(parent_sd, NULL, (void *) attr, mode, type); - up(&dir->d_inode->i_sem); + mutex_unlock(&dir->d_inode->i_mutex); return error; } diff --git a/fs/configfs/inode.c b/fs/configfs/inode.c index 6b274c6d428f..6577c588de9d 100644 --- a/fs/configfs/inode.c +++ b/fs/configfs/inode.c @@ -122,7 +122,7 @@ const unsigned char * configfs_get_name(struct configfs_dirent *sd) /* * Unhashes the dentry corresponding to given configfs_dirent - * Called with parent inode's i_sem held. + * Called with parent inode's i_mutex held. */ void configfs_drop_dentry(struct configfs_dirent * sd, struct dentry * parent) { @@ -145,7 +145,7 @@ void configfs_hash_and_remove(struct dentry * dir, const char * name) struct configfs_dirent * sd; struct configfs_dirent * parent_sd = dir->d_fsdata; - down(&dir->d_inode->i_sem); + mutex_lock(&dir->d_inode->i_mutex); list_for_each_entry(sd, &parent_sd->s_children, s_sibling) { if (!sd->s_element) continue; @@ -156,7 +156,7 @@ void configfs_hash_and_remove(struct dentry * dir, const char * name) break; } } - up(&dir->d_inode->i_sem); + mutex_unlock(&dir->d_inode->i_mutex); } diff --git a/fs/debugfs/inode.c b/fs/debugfs/inode.c index a86ac4aeaedb..d4f1a2cddd47 100644 --- a/fs/debugfs/inode.c +++ b/fs/debugfs/inode.c @@ -146,7 +146,7 @@ static int debugfs_create_by_name(const char *name, mode_t mode, } *dentry = NULL; - down(&parent->d_inode->i_sem); + mutex_lock(&parent->d_inode->i_mutex); *dentry = lookup_one_len(name, parent, strlen(name)); if (!IS_ERR(dentry)) { if ((mode & S_IFMT) == S_IFDIR) @@ -155,7 +155,7 @@ static int debugfs_create_by_name(const char *name, mode_t mode, error = debugfs_create(parent->d_inode, *dentry, mode); } else error = PTR_ERR(dentry); - up(&parent->d_inode->i_sem); + mutex_unlock(&parent->d_inode->i_mutex); return error; } @@ -273,7 +273,7 @@ void debugfs_remove(struct dentry *dentry) if (!parent || !parent->d_inode) return; - down(&parent->d_inode->i_sem); + mutex_lock(&parent->d_inode->i_mutex); if (debugfs_positive(dentry)) { if (dentry->d_inode) { if (S_ISDIR(dentry->d_inode->i_mode)) @@ -283,7 +283,7 @@ void debugfs_remove(struct dentry *dentry) dput(dentry); } } - up(&parent->d_inode->i_sem); + mutex_unlock(&parent->d_inode->i_mutex); simple_release_fs(&debugfs_mount, &debugfs_mount_count); } EXPORT_SYMBOL_GPL(debugfs_remove); diff --git a/fs/devfs/base.c b/fs/devfs/base.c index 1274422a5384..b621521e09d4 100644 --- a/fs/devfs/base.c +++ b/fs/devfs/base.c @@ -2162,27 +2162,27 @@ static int devfs_d_revalidate_wait(struct dentry *dentry, struct nameidata *nd) * * make sure that * d_instantiate always runs under lock - * we release i_sem lock before going to sleep + * we release i_mutex lock before going to sleep * * unfortunately sometimes d_revalidate is called with - * and sometimes without i_sem lock held. The following checks + * and sometimes without i_mutex lock held. The following checks * attempt to deduce when we need to add (and drop resp.) lock * here. This relies on current (2.6.2) calling coventions: * - * lookup_hash is always run under i_sem and is passing NULL + * lookup_hash is always run under i_mutex and is passing NULL * as nd * - * open(...,O_CREATE,...) calls _lookup_hash under i_sem + * open(...,O_CREATE,...) calls _lookup_hash under i_mutex * and sets flags to LOOKUP_OPEN|LOOKUP_CREATE * * all other invocations of ->d_revalidate seem to happen - * outside of i_sem + * outside of i_mutex */ need_lock = nd && (!(nd->flags & LOOKUP_CREATE) || (nd->flags & LOOKUP_PARENT)); if (need_lock) - down(&dir->i_sem); + mutex_lock(&dir->i_mutex); if (is_devfsd_or_child(fs_info)) { devfs_handle_t de = lookup_info->de; @@ -2221,9 +2221,9 @@ static int devfs_d_revalidate_wait(struct dentry *dentry, struct nameidata *nd) add_wait_queue(&lookup_info->wait_queue, &wait); read_unlock(&parent->u.dir.lock); /* at this point it is always (hopefully) locked */ - up(&dir->i_sem); + mutex_unlock(&dir->i_mutex); schedule(); - down(&dir->i_sem); + mutex_lock(&dir->i_mutex); /* * This does not need nor should remove wait from wait_queue. * Wait queue head is never reused - nothing is ever added to it @@ -2238,7 +2238,7 @@ static int devfs_d_revalidate_wait(struct dentry *dentry, struct nameidata *nd) out: if (need_lock) - up(&dir->i_sem); + mutex_unlock(&dir->i_mutex); return 1; } /* End Function devfs_d_revalidate_wait */ @@ -2284,9 +2284,9 @@ static struct dentry *devfs_lookup(struct inode *dir, struct dentry *dentry, /* Unlock directory semaphore, which will release any waiters. They will get the hashed dentry, and may be forced to wait for revalidation */ - up(&dir->i_sem); + mutex_unlock(&dir->i_mutex); wait_for_devfsd_finished(fs_info); /* If I'm not devfsd, must wait */ - down(&dir->i_sem); /* Grab it again because them's the rules */ + mutex_lock(&dir->i_mutex); /* Grab it again because them's the rules */ de = lookup_info.de; /* If someone else has been so kind as to make the inode, we go home early */ diff --git a/fs/devpts/inode.c b/fs/devpts/inode.c index f2be44d4491f..bfb8a230bac9 100644 --- a/fs/devpts/inode.c +++ b/fs/devpts/inode.c @@ -130,7 +130,7 @@ static struct dentry *get_node(int num) { char s[12]; struct dentry *root = devpts_root; - down(&root->d_inode->i_sem); + mutex_lock(&root->d_inode->i_mutex); return lookup_one_len(s, root, sprintf(s, "%d", num)); } @@ -161,7 +161,7 @@ int devpts_pty_new(struct tty_struct *tty) if (!IS_ERR(dentry) && !dentry->d_inode) d_instantiate(dentry, inode); - up(&devpts_root->d_inode->i_sem); + mutex_unlock(&devpts_root->d_inode->i_mutex); return 0; } @@ -178,7 +178,7 @@ struct tty_struct *devpts_get_tty(int number) dput(dentry); } - up(&devpts_root->d_inode->i_sem); + mutex_unlock(&devpts_root->d_inode->i_mutex); return tty; } @@ -196,7 +196,7 @@ void devpts_pty_kill(int number) } dput(dentry); } - up(&devpts_root->d_inode->i_sem); + mutex_unlock(&devpts_root->d_inode->i_mutex); } static int __init init_devpts_fs(void) diff --git a/fs/direct-io.c b/fs/direct-io.c index 3931e7f1e6bf..30dbbd1df511 100644 --- a/fs/direct-io.c +++ b/fs/direct-io.c @@ -56,7 +56,7 @@ * lock_type is DIO_LOCKING for regular files on direct-IO-naive filesystems. * This determines whether we need to do the fancy locking which prevents * direct-IO from being able to read uninitialised disk blocks. If its zero - * (blockdev) this locking is not done, and if it is DIO_OWN_LOCKING i_sem is + * (blockdev) this locking is not done, and if it is DIO_OWN_LOCKING i_mutex is * not held for the entire direct write (taken briefly, initially, during a * direct read though, but its never held for the duration of a direct-IO). */ @@ -930,7 +930,7 @@ out: } /* - * Releases both i_sem and i_alloc_sem + * Releases both i_mutex and i_alloc_sem */ static ssize_t direct_io_worker(int rw, struct kiocb *iocb, struct inode *inode, @@ -1062,11 +1062,11 @@ direct_io_worker(int rw, struct kiocb *iocb, struct inode *inode, /* * All block lookups have been performed. For READ requests - * we can let i_sem go now that its achieved its purpose + * we can let i_mutex go now that its achieved its purpose * of protecting us from looking up uninitialized blocks. */ if ((rw == READ) && (dio->lock_type == DIO_LOCKING)) - up(&dio->inode->i_sem); + mutex_unlock(&dio->inode->i_mutex); /* * OK, all BIOs are submitted, so we can decrement bio_count to truly @@ -1145,18 +1145,18 @@ direct_io_worker(int rw, struct kiocb *iocb, struct inode *inode, * The locking rules are governed by the dio_lock_type parameter. * * DIO_NO_LOCKING (no locking, for raw block device access) - * For writes, i_sem is not held on entry; it is never taken. + * For writes, i_mutex is not held on entry; it is never taken. * * DIO_LOCKING (simple locking for regular files) - * For writes we are called under i_sem and return with i_sem held, even though + * For writes we are called under i_mutex and return with i_mutex held, even though * it is internally dropped. - * For reads, i_sem is not held on entry, but it is taken and dropped before + * For reads, i_mutex is not held on entry, but it is taken and dropped before * returning. * * DIO_OWN_LOCKING (filesystem provides synchronisation and handling of * uninitialised data, allowing parallel direct readers and writers) - * For writes we are called without i_sem, return without it, never touch it. - * For reads, i_sem is held on entry and will be released before returning. + * For writes we are called without i_mutex, return without it, never touch it. + * For reads, i_mutex is held on entry and will be released before returning. * * Additional i_alloc_sem locking requirements described inline below. */ @@ -1214,11 +1214,11 @@ __blockdev_direct_IO(int rw, struct kiocb *iocb, struct inode *inode, * For block device access DIO_NO_LOCKING is used, * neither readers nor writers do any locking at all * For regular files using DIO_LOCKING, - * readers need to grab i_sem and i_alloc_sem - * writers need to grab i_alloc_sem only (i_sem is already held) + * readers need to grab i_mutex and i_alloc_sem + * writers need to grab i_alloc_sem only (i_mutex is already held) * For regular files using DIO_OWN_LOCKING, * neither readers nor writers take any locks here - * (i_sem is already held and release for writers here) + * (i_mutex is already held and release for writers here) */ dio->lock_type = dio_lock_type; if (dio_lock_type != DIO_NO_LOCKING) { @@ -1228,7 +1228,7 @@ __blockdev_direct_IO(int rw, struct kiocb *iocb, struct inode *inode, mapping = iocb->ki_filp->f_mapping; if (dio_lock_type != DIO_OWN_LOCKING) { - down(&inode->i_sem); + mutex_lock(&inode->i_mutex); reader_with_isem = 1; } @@ -1240,7 +1240,7 @@ __blockdev_direct_IO(int rw, struct kiocb *iocb, struct inode *inode, } if (dio_lock_type == DIO_OWN_LOCKING) { - up(&inode->i_sem); + mutex_unlock(&inode->i_mutex); reader_with_isem = 0; } } @@ -1266,7 +1266,7 @@ __blockdev_direct_IO(int rw, struct kiocb *iocb, struct inode *inode, out: if (reader_with_isem) - up(&inode->i_sem); + mutex_unlock(&inode->i_mutex); if (rw & WRITE) current->flags &= ~PF_SYNCWRITE; return retval; diff --git a/fs/dquot.c b/fs/dquot.c index 2a62b3dc20ec..cb6d5bfbdfd5 100644 --- a/fs/dquot.c +++ b/fs/dquot.c @@ -100,7 +100,7 @@ * operation is just reading pointers from inode (or not using them at all) the * read lock is enough. If pointers are altered function must hold write lock * (these locking rules also apply for S_NOQUOTA flag in the inode - note that - * for altering the flag i_sem is also needed). If operation is holding + * for altering the flag i_mutex is also needed). If operation is holding * reference to dquot in other way (e.g. quotactl ops) it must be guarded by * dqonoff_sem. * This locking assures that: @@ -117,9 +117,9 @@ * spinlock to internal buffers before writing. * * Lock ordering (including related VFS locks) is the following: - * i_sem > dqonoff_sem > iprune_sem > journal_lock > dqptr_sem > + * i_mutex > dqonoff_sem > iprune_sem > journal_lock > dqptr_sem > * > dquot->dq_lock > dqio_sem - * i_sem on quota files is special (it's below dqio_sem) + * i_mutex on quota files is special (it's below dqio_sem) */ static DEFINE_SPINLOCK(dq_list_lock); @@ -1369,11 +1369,11 @@ int vfs_quota_off(struct super_block *sb, int type) /* If quota was reenabled in the meantime, we have * nothing to do */ if (!sb_has_quota_enabled(sb, cnt)) { - down(&toputinode[cnt]->i_sem); + mutex_lock(&toputinode[cnt]->i_mutex); toputinode[cnt]->i_flags &= ~(S_IMMUTABLE | S_NOATIME | S_NOQUOTA); truncate_inode_pages(&toputinode[cnt]->i_data, 0); - up(&toputinode[cnt]->i_sem); + mutex_unlock(&toputinode[cnt]->i_mutex); mark_inode_dirty(toputinode[cnt]); iput(toputinode[cnt]); } @@ -1417,7 +1417,7 @@ static int vfs_quota_on_inode(struct inode *inode, int type, int format_id) write_inode_now(inode, 1); /* And now flush the block cache so that kernel sees the changes */ invalidate_bdev(sb->s_bdev, 0); - down(&inode->i_sem); + mutex_lock(&inode->i_mutex); down(&dqopt->dqonoff_sem); if (sb_has_quota_enabled(sb, type)) { error = -EBUSY; @@ -1449,7 +1449,7 @@ static int vfs_quota_on_inode(struct inode *inode, int type, int format_id) goto out_file_init; } up(&dqopt->dqio_sem); - up(&inode->i_sem); + mutex_unlock(&inode->i_mutex); set_enable_flags(dqopt, type); add_dquot_ref(sb, type); @@ -1470,7 +1470,7 @@ out_lock: inode->i_flags |= oldflags; up_write(&dqopt->dqptr_sem); } - up(&inode->i_sem); + mutex_unlock(&inode->i_mutex); out_fmt: put_quota_format(fmt); diff --git a/fs/exportfs/expfs.c b/fs/exportfs/expfs.c index c49d6254379a..5bfe40085fbc 100644 --- a/fs/exportfs/expfs.c +++ b/fs/exportfs/expfs.c @@ -177,9 +177,9 @@ find_exported_dentry(struct super_block *sb, void *obj, void *parent, struct dentry *ppd; struct dentry *npd; - down(&pd->d_inode->i_sem); + mutex_lock(&pd->d_inode->i_mutex); ppd = CALL(nops,get_parent)(pd); - up(&pd->d_inode->i_sem); + mutex_unlock(&pd->d_inode->i_mutex); if (IS_ERR(ppd)) { err = PTR_ERR(ppd); @@ -201,9 +201,9 @@ find_exported_dentry(struct super_block *sb, void *obj, void *parent, break; } dprintk("find_exported_dentry: found name: %s\n", nbuf); - down(&ppd->d_inode->i_sem); + mutex_lock(&ppd->d_inode->i_mutex); npd = lookup_one_len(nbuf, ppd, strlen(nbuf)); - up(&ppd->d_inode->i_sem); + mutex_unlock(&ppd->d_inode->i_mutex); if (IS_ERR(npd)) { err = PTR_ERR(npd); dprintk("find_exported_dentry: lookup failed: %d\n", err); @@ -242,9 +242,9 @@ find_exported_dentry(struct super_block *sb, void *obj, void *parent, struct dentry *nresult; err = CALL(nops,get_name)(target_dir, nbuf, result); if (!err) { - down(&target_dir->d_inode->i_sem); + mutex_lock(&target_dir->d_inode->i_mutex); nresult = lookup_one_len(nbuf, target_dir, strlen(nbuf)); - up(&target_dir->d_inode->i_sem); + mutex_unlock(&target_dir->d_inode->i_mutex); if (!IS_ERR(nresult)) { if (nresult->d_inode) { dput(result); diff --git a/fs/ext2/acl.c b/fs/ext2/acl.c index 6af2f4130290..239133d01d91 100644 --- a/fs/ext2/acl.c +++ b/fs/ext2/acl.c @@ -149,7 +149,7 @@ ext2_iset_acl(struct inode *inode, struct posix_acl **i_acl, } /* - * inode->i_sem: don't care + * inode->i_mutex: don't care */ static struct posix_acl * ext2_get_acl(struct inode *inode, int type) @@ -211,7 +211,7 @@ ext2_get_acl(struct inode *inode, int type) } /* - * inode->i_sem: down + * inode->i_mutex: down */ static int ext2_set_acl(struct inode *inode, int type, struct posix_acl *acl) @@ -301,8 +301,8 @@ ext2_permission(struct inode *inode, int mask, struct nameidata *nd) /* * Initialize the ACLs of a new inode. Called from ext2_new_inode. * - * dir->i_sem: down - * inode->i_sem: up (access to inode is still exclusive) + * dir->i_mutex: down + * inode->i_mutex: up (access to inode is still exclusive) */ int ext2_init_acl(struct inode *inode, struct inode *dir) @@ -361,7 +361,7 @@ cleanup: * for directories) are added. There are no more bits available in the * file mode. * - * inode->i_sem: down + * inode->i_mutex: down */ int ext2_acl_chmod(struct inode *inode) diff --git a/fs/ext2/ext2.h b/fs/ext2/ext2.h index e977f8566d14..00de0a7312a2 100644 --- a/fs/ext2/ext2.h +++ b/fs/ext2/ext2.h @@ -53,7 +53,7 @@ struct ext2_inode_info { #ifdef CONFIG_EXT2_FS_XATTR /* * Extended attributes can be read independently of the main file - * data. Taking i_sem even when reading would cause contention + * data. Taking i_mutex even when reading would cause contention * between readers of EAs and writers of regular file data, so * instead we synchronize on xattr_sem when reading or changing * EAs. diff --git a/fs/ext2/super.c b/fs/ext2/super.c index 522fa70dd8ea..8d6819846fc9 100644 --- a/fs/ext2/super.c +++ b/fs/ext2/super.c @@ -1152,7 +1152,7 @@ static ssize_t ext2_quota_write(struct super_block *sb, int type, struct buffer_head tmp_bh; struct buffer_head *bh; - down(&inode->i_sem); + mutex_lock(&inode->i_mutex); while (towrite > 0) { tocopy = sb->s_blocksize - offset < towrite ? sb->s_blocksize - offset : towrite; @@ -1189,7 +1189,7 @@ out: inode->i_version++; inode->i_mtime = inode->i_ctime = CURRENT_TIME; mark_inode_dirty(inode); - up(&inode->i_sem); + mutex_unlock(&inode->i_mutex); return len - towrite; } diff --git a/fs/ext2/xattr.c b/fs/ext2/xattr.c index 0099462d4271..f7a3b5fee274 100644 --- a/fs/ext2/xattr.c +++ b/fs/ext2/xattr.c @@ -325,7 +325,7 @@ cleanup: /* * Inode operation listxattr() * - * dentry->d_inode->i_sem: don't care + * dentry->d_inode->i_mutex: don't care */ ssize_t ext2_listxattr(struct dentry *dentry, char *buffer, size_t size) diff --git a/fs/ext3/acl.c b/fs/ext3/acl.c index 3ac38266fc9e..9ed132c96034 100644 --- a/fs/ext3/acl.c +++ b/fs/ext3/acl.c @@ -152,7 +152,7 @@ ext3_iset_acl(struct inode *inode, struct posix_acl **i_acl, /* * Inode operation get_posix_acl(). * - * inode->i_sem: don't care + * inode->i_mutex: don't care */ static struct posix_acl * ext3_get_acl(struct inode *inode, int type) @@ -216,7 +216,7 @@ ext3_get_acl(struct inode *inode, int type) /* * Set the access or default ACL of an inode. * - * inode->i_sem: down unless called from ext3_new_inode + * inode->i_mutex: down unless called from ext3_new_inode */ static int ext3_set_acl(handle_t *handle, struct inode *inode, int type, @@ -306,8 +306,8 @@ ext3_permission(struct inode *inode, int mask, struct nameidata *nd) /* * Initialize the ACLs of a new inode. Called from ext3_new_inode. * - * dir->i_sem: down - * inode->i_sem: up (access to inode is still exclusive) + * dir->i_mutex: down + * inode->i_mutex: up (access to inode is still exclusive) */ int ext3_init_acl(handle_t *handle, struct inode *inode, struct inode *dir) @@ -368,7 +368,7 @@ cleanup: * for directories) are added. There are no more bits available in the * file mode. * - * inode->i_sem: down + * inode->i_mutex: down */ int ext3_acl_chmod(struct inode *inode) diff --git a/fs/ext3/super.c b/fs/ext3/super.c index 7c45acf94589..c3dbebdb9897 100644 --- a/fs/ext3/super.c +++ b/fs/ext3/super.c @@ -2601,7 +2601,7 @@ static ssize_t ext3_quota_write(struct super_block *sb, int type, struct buffer_head *bh; handle_t *handle = journal_current_handle(); - down(&inode->i_sem); + mutex_lock(&inode->i_mutex); while (towrite > 0) { tocopy = sb->s_blocksize - offset < towrite ? sb->s_blocksize - offset : towrite; @@ -2644,7 +2644,7 @@ out: inode->i_version++; inode->i_mtime = inode->i_ctime = CURRENT_TIME; ext3_mark_inode_dirty(handle, inode); - up(&inode->i_sem); + mutex_unlock(&inode->i_mutex); return len - towrite; } diff --git a/fs/ext3/xattr.c b/fs/ext3/xattr.c index 430de9f63be3..238199d82ce5 100644 --- a/fs/ext3/xattr.c +++ b/fs/ext3/xattr.c @@ -140,7 +140,7 @@ ext3_xattr_handler(int name_index) /* * Inode operation listxattr() * - * dentry->d_inode->i_sem: don't care + * dentry->d_inode->i_mutex: don't care */ ssize_t ext3_listxattr(struct dentry *dentry, char *buffer, size_t size) diff --git a/fs/fat/dir.c b/fs/fat/dir.c index eef1b81aa294..db0de5c621c7 100644 --- a/fs/fat/dir.c +++ b/fs/fat/dir.c @@ -729,13 +729,13 @@ static int fat_dir_ioctl(struct inode * inode, struct file * filp, buf.dirent = d1; buf.result = 0; - down(&inode->i_sem); + mutex_lock(&inode->i_mutex); ret = -ENOENT; if (!IS_DEADDIR(inode)) { ret = __fat_readdir(inode, filp, &buf, fat_ioctl_filldir, short_only, both); } - up(&inode->i_sem); + mutex_unlock(&inode->i_mutex); if (ret >= 0) ret = buf.result; return ret; diff --git a/fs/fat/file.c b/fs/fat/file.c index 9b07c328a6fc..d30876cf35f5 100644 --- a/fs/fat/file.c +++ b/fs/fat/file.c @@ -41,7 +41,7 @@ int fat_generic_ioctl(struct inode *inode, struct file *filp, if (err) return err; - down(&inode->i_sem); + mutex_lock(&inode->i_mutex); if (IS_RDONLY(inode)) { err = -EROFS; @@ -103,7 +103,7 @@ int fat_generic_ioctl(struct inode *inode, struct file *filp, MSDOS_I(inode)->i_attrs = attr & ATTR_UNUSED; mark_inode_dirty(inode); up: - up(&inode->i_sem); + mutex_unlock(&inode->i_mutex); return err; } default: diff --git a/fs/fifo.c b/fs/fifo.c index 5455916241f0..923371b753ab 100644 --- a/fs/fifo.c +++ b/fs/fifo.c @@ -35,7 +35,7 @@ static int fifo_open(struct inode *inode, struct file *filp) int ret; ret = -ERESTARTSYS; - if (down_interruptible(PIPE_SEM(*inode))) + if (mutex_lock_interruptible(PIPE_MUTEX(*inode))) goto err_nolock_nocleanup; if (!inode->i_pipe) { @@ -119,7 +119,7 @@ static int fifo_open(struct inode *inode, struct file *filp) } /* Ok! */ - up(PIPE_SEM(*inode)); + mutex_unlock(PIPE_MUTEX(*inode)); return 0; err_rd: @@ -139,7 +139,7 @@ err: free_pipe_info(inode); err_nocleanup: - up(PIPE_SEM(*inode)); + mutex_unlock(PIPE_MUTEX(*inode)); err_nolock_nocleanup: return ret; diff --git a/fs/fuse/file.c b/fs/fuse/file.c index 05dedddf4289..63d2980df5c9 100644 --- a/fs/fuse/file.c +++ b/fs/fuse/file.c @@ -560,9 +560,9 @@ static ssize_t fuse_direct_write(struct file *file, const char __user *buf, struct inode *inode = file->f_dentry->d_inode; ssize_t res; /* Don't allow parallel writes to the same file */ - down(&inode->i_sem); + mutex_lock(&inode->i_mutex); res = fuse_direct_io(file, buf, count, ppos, 1); - up(&inode->i_sem); + mutex_unlock(&inode->i_mutex); return res; } diff --git a/fs/hfs/inode.c b/fs/hfs/inode.c index d499393a8ae7..050a49276499 100644 --- a/fs/hfs/inode.c +++ b/fs/hfs/inode.c @@ -547,13 +547,13 @@ static int hfs_file_release(struct inode *inode, struct file *file) if (atomic_read(&file->f_count) != 0) return 0; if (atomic_dec_and_test(&HFS_I(inode)->opencnt)) { - down(&inode->i_sem); + mutex_lock(&inode->i_mutex); hfs_file_truncate(inode); //if (inode->i_flags & S_DEAD) { // hfs_delete_cat(inode->i_ino, HFSPLUS_SB(sb).hidden_dir, NULL); // hfs_delete_inode(inode); //} - up(&inode->i_sem); + mutex_unlock(&inode->i_mutex); } return 0; } diff --git a/fs/hfsplus/bitmap.c b/fs/hfsplus/bitmap.c index c7d316455fa0..9fb51632303c 100644 --- a/fs/hfsplus/bitmap.c +++ b/fs/hfsplus/bitmap.c @@ -29,7 +29,7 @@ int hfsplus_block_allocate(struct super_block *sb, u32 size, u32 offset, u32 *ma return size; dprint(DBG_BITMAP, "block_allocate: %u,%u,%u\n", size, offset, len); - down(&HFSPLUS_SB(sb).alloc_file->i_sem); + mutex_lock(&HFSPLUS_SB(sb).alloc_file->i_mutex); mapping = HFSPLUS_SB(sb).alloc_file->i_mapping; page = read_cache_page(mapping, offset / PAGE_CACHE_BITS, (filler_t *)mapping->a_ops->readpage, NULL); @@ -143,7 +143,7 @@ done: sb->s_dirt = 1; dprint(DBG_BITMAP, "-> %u,%u\n", start, *max); out: - up(&HFSPLUS_SB(sb).alloc_file->i_sem); + mutex_unlock(&HFSPLUS_SB(sb).alloc_file->i_mutex); return start; } @@ -164,7 +164,7 @@ int hfsplus_block_free(struct super_block *sb, u32 offset, u32 count) if ((offset + count) > HFSPLUS_SB(sb).total_blocks) return -2; - down(&HFSPLUS_SB(sb).alloc_file->i_sem); + mutex_lock(&HFSPLUS_SB(sb).alloc_file->i_mutex); mapping = HFSPLUS_SB(sb).alloc_file->i_mapping; pnr = offset / PAGE_CACHE_BITS; page = read_cache_page(mapping, pnr, (filler_t *)mapping->a_ops->readpage, NULL); @@ -215,7 +215,7 @@ out: kunmap(page); HFSPLUS_SB(sb).free_blocks += len; sb->s_dirt = 1; - up(&HFSPLUS_SB(sb).alloc_file->i_sem); + mutex_unlock(&HFSPLUS_SB(sb).alloc_file->i_mutex); return 0; } diff --git a/fs/hfsplus/inode.c b/fs/hfsplus/inode.c index fc98583cf045..983bcd02ac1c 100644 --- a/fs/hfsplus/inode.c +++ b/fs/hfsplus/inode.c @@ -276,13 +276,13 @@ static int hfsplus_file_release(struct inode *inode, struct file *file) if (atomic_read(&file->f_count) != 0) return 0; if (atomic_dec_and_test(&HFSPLUS_I(inode).opencnt)) { - down(&inode->i_sem); + mutex_lock(&inode->i_mutex); hfsplus_file_truncate(inode); if (inode->i_flags & S_DEAD) { hfsplus_delete_cat(inode->i_ino, HFSPLUS_SB(sb).hidden_dir, NULL); hfsplus_delete_inode(inode); } - up(&inode->i_sem); + mutex_unlock(&inode->i_mutex); } return 0; } diff --git a/fs/hpfs/dir.c b/fs/hpfs/dir.c index 0217c3a04441..5591f9623aa2 100644 --- a/fs/hpfs/dir.c +++ b/fs/hpfs/dir.c @@ -32,19 +32,19 @@ static loff_t hpfs_dir_lseek(struct file *filp, loff_t off, int whence) /*printk("dir lseek\n");*/ if (new_off == 0 || new_off == 1 || new_off == 11 || new_off == 12 || new_off == 13) goto ok; - down(&i->i_sem); + mutex_lock(&i->i_mutex); pos = ((loff_t) hpfs_de_as_down_as_possible(s, hpfs_inode->i_dno) << 4) + 1; while (pos != new_off) { if (map_pos_dirent(i, &pos, &qbh)) hpfs_brelse4(&qbh); else goto fail; if (pos == 12) goto fail; } - up(&i->i_sem); + mutex_unlock(&i->i_mutex); ok: unlock_kernel(); return filp->f_pos = new_off; fail: - up(&i->i_sem); + mutex_unlock(&i->i_mutex); /*printk("illegal lseek: %016llx\n", new_off);*/ unlock_kernel(); return -ESPIPE; diff --git a/fs/hppfs/hppfs_kern.c b/fs/hppfs/hppfs_kern.c index 52930915bad8..a44dc5897399 100644 --- a/fs/hppfs/hppfs_kern.c +++ b/fs/hppfs/hppfs_kern.c @@ -171,12 +171,12 @@ static struct dentry *hppfs_lookup(struct inode *ino, struct dentry *dentry, err = -ENOMEM; parent = HPPFS_I(ino)->proc_dentry; - down(&parent->d_inode->i_sem); + mutex_lock(&parent->d_inode->i_mutex); proc_dentry = d_lookup(parent, &dentry->d_name); if(proc_dentry == NULL){ proc_dentry = d_alloc(parent, &dentry->d_name); if(proc_dentry == NULL){ - up(&parent->d_inode->i_sem); + mutex_unlock(&parent->d_inode->i_mutex); goto out; } new = (*parent->d_inode->i_op->lookup)(parent->d_inode, @@ -186,7 +186,7 @@ static struct dentry *hppfs_lookup(struct inode *ino, struct dentry *dentry, proc_dentry = new; } } - up(&parent->d_inode->i_sem); + mutex_unlock(&parent->d_inode->i_mutex); if(IS_ERR(proc_dentry)) return(proc_dentry); diff --git a/fs/hugetlbfs/inode.c b/fs/hugetlbfs/inode.c index 8c41315a6e42..ff1b7d108bd0 100644 --- a/fs/hugetlbfs/inode.c +++ b/fs/hugetlbfs/inode.c @@ -118,7 +118,7 @@ static int hugetlbfs_file_mmap(struct file *file, struct vm_area_struct *vma) vma_len = (loff_t)(vma->vm_end - vma->vm_start); - down(&inode->i_sem); + mutex_lock(&inode->i_mutex); file_accessed(file); vma->vm_flags |= VM_HUGETLB | VM_RESERVED; vma->vm_ops = &hugetlb_vm_ops; @@ -133,7 +133,7 @@ static int hugetlbfs_file_mmap(struct file *file, struct vm_area_struct *vma) if (inode->i_size < len) inode->i_size = len; out: - up(&inode->i_sem); + mutex_unlock(&inode->i_mutex); return ret; } diff --git a/fs/inode.c b/fs/inode.c index fd568caf7f74..e08767fd57b0 100644 --- a/fs/inode.c +++ b/fs/inode.c @@ -192,7 +192,7 @@ void inode_init_once(struct inode *inode) INIT_HLIST_NODE(&inode->i_hash); INIT_LIST_HEAD(&inode->i_dentry); INIT_LIST_HEAD(&inode->i_devices); - sema_init(&inode->i_sem, 1); + mutex_init(&inode->i_mutex); init_rwsem(&inode->i_alloc_sem); INIT_RADIX_TREE(&inode->i_data.page_tree, GFP_ATOMIC); rwlock_init(&inode->i_data.tree_lock); diff --git a/fs/jffs/inode-v23.c b/fs/jffs/inode-v23.c index 2559ee10beda..fc3855a1aef3 100644 --- a/fs/jffs/inode-v23.c +++ b/fs/jffs/inode-v23.c @@ -1415,7 +1415,7 @@ jffs_file_write(struct file *filp, const char *buf, size_t count, * This will never trigger with sane page sizes. leave it in * anyway, since I'm thinking about how to merge larger writes * (the current idea is to poke a thread that does the actual - * I/O and starts by doing a down(&inode->i_sem). then we + * I/O and starts by doing a mutex_lock(&inode->i_mutex). then we * would need to get the page cache pages and have a list of * I/O requests and do write-merging here. * -- prumpf diff --git a/fs/jfs/jfs_incore.h b/fs/jfs/jfs_incore.h index c0fd7b3eadc6..dc21a5bd54d4 100644 --- a/fs/jfs/jfs_incore.h +++ b/fs/jfs/jfs_incore.h @@ -58,7 +58,7 @@ struct jfs_inode_info { /* * rdwrlock serializes xtree between reads & writes and synchronizes * changes to special inodes. It's use would be redundant on - * directories since the i_sem taken in the VFS is sufficient. + * directories since the i_mutex taken in the VFS is sufficient. */ struct rw_semaphore rdwrlock; /* @@ -68,7 +68,7 @@ struct jfs_inode_info { * inode is blocked in txBegin or TxBeginAnon */ struct semaphore commit_sem; - /* xattr_sem allows us to access the xattrs without taking i_sem */ + /* xattr_sem allows us to access the xattrs without taking i_mutex */ struct rw_semaphore xattr_sem; lid_t xtlid; /* lid of xtree lock on directory */ #ifdef CONFIG_JFS_POSIX_ACL diff --git a/fs/libfs.c b/fs/libfs.c index 9c50523382e7..63c020e6589e 100644 --- a/fs/libfs.c +++ b/fs/libfs.c @@ -74,7 +74,7 @@ int dcache_dir_close(struct inode *inode, struct file *file) loff_t dcache_dir_lseek(struct file *file, loff_t offset, int origin) { - down(&file->f_dentry->d_inode->i_sem); + mutex_lock(&file->f_dentry->d_inode->i_mutex); switch (origin) { case 1: offset += file->f_pos; @@ -82,7 +82,7 @@ loff_t dcache_dir_lseek(struct file *file, loff_t offset, int origin) if (offset >= 0) break; default: - up(&file->f_dentry->d_inode->i_sem); + mutex_unlock(&file->f_dentry->d_inode->i_mutex); return -EINVAL; } if (offset != file->f_pos) { @@ -106,7 +106,7 @@ loff_t dcache_dir_lseek(struct file *file, loff_t offset, int origin) spin_unlock(&dcache_lock); } } - up(&file->f_dentry->d_inode->i_sem); + mutex_unlock(&file->f_dentry->d_inode->i_mutex); return offset; } @@ -356,7 +356,7 @@ int simple_commit_write(struct file *file, struct page *page, /* * No need to use i_size_read() here, the i_size - * cannot change under us because we hold the i_sem. + * cannot change under us because we hold the i_mutex. */ if (pos > inode->i_size) i_size_write(inode, pos); diff --git a/fs/namei.c b/fs/namei.c index 300eae088d5f..0a8f073435af 100644 --- a/fs/namei.c +++ b/fs/namei.c @@ -438,7 +438,7 @@ static struct dentry * real_lookup(struct dentry * parent, struct qstr * name, s struct dentry * result; struct inode *dir = parent->d_inode; - down(&dir->i_sem); + mutex_lock(&dir->i_mutex); /* * First re-do the cached lookup just in case it was created * while we waited for the directory semaphore.. @@ -464,7 +464,7 @@ static struct dentry * real_lookup(struct dentry * parent, struct qstr * name, s else result = dentry; } - up(&dir->i_sem); + mutex_unlock(&dir->i_mutex); return result; } @@ -472,7 +472,7 @@ static struct dentry * real_lookup(struct dentry * parent, struct qstr * name, s * Uhhuh! Nasty case: the cache was re-populated while * we waited on the semaphore. Need to revalidate. */ - up(&dir->i_sem); + mutex_unlock(&dir->i_mutex); if (result->d_op && result->d_op->d_revalidate) { if (!result->d_op->d_revalidate(result, nd) && !d_invalidate(result)) { dput(result); @@ -1366,7 +1366,7 @@ struct dentry *lock_rename(struct dentry *p1, struct dentry *p2) struct dentry *p; if (p1 == p2) { - down(&p1->d_inode->i_sem); + mutex_lock(&p1->d_inode->i_mutex); return NULL; } @@ -1374,30 +1374,30 @@ struct dentry *lock_rename(struct dentry *p1, struct dentry *p2) for (p = p1; p->d_parent != p; p = p->d_parent) { if (p->d_parent == p2) { - down(&p2->d_inode->i_sem); - down(&p1->d_inode->i_sem); + mutex_lock(&p2->d_inode->i_mutex); + mutex_lock(&p1->d_inode->i_mutex); return p; } } for (p = p2; p->d_parent != p; p = p->d_parent) { if (p->d_parent == p1) { - down(&p1->d_inode->i_sem); - down(&p2->d_inode->i_sem); + mutex_lock(&p1->d_inode->i_mutex); + mutex_lock(&p2->d_inode->i_mutex); return p; } } - down(&p1->d_inode->i_sem); - down(&p2->d_inode->i_sem); + mutex_lock(&p1->d_inode->i_mutex); + mutex_lock(&p2->d_inode->i_mutex); return NULL; } void unlock_rename(struct dentry *p1, struct dentry *p2) { - up(&p1->d_inode->i_sem); + mutex_unlock(&p1->d_inode->i_mutex); if (p1 != p2) { - up(&p2->d_inode->i_sem); + mutex_unlock(&p2->d_inode->i_mutex); up(&p1->d_inode->i_sb->s_vfs_rename_sem); } } @@ -1563,14 +1563,14 @@ int open_namei(const char * pathname, int flag, int mode, struct nameidata *nd) dir = nd->dentry; nd->flags &= ~LOOKUP_PARENT; - down(&dir->d_inode->i_sem); + mutex_lock(&dir->d_inode->i_mutex); path.dentry = lookup_hash(nd); path.mnt = nd->mnt; do_last: error = PTR_ERR(path.dentry); if (IS_ERR(path.dentry)) { - up(&dir->d_inode->i_sem); + mutex_unlock(&dir->d_inode->i_mutex); goto exit; } @@ -1579,7 +1579,7 @@ do_last: if (!IS_POSIXACL(dir->d_inode)) mode &= ~current->fs->umask; error = vfs_create(dir->d_inode, path.dentry, mode, nd); - up(&dir->d_inode->i_sem); + mutex_unlock(&dir->d_inode->i_mutex); dput(nd->dentry); nd->dentry = path.dentry; if (error) @@ -1593,7 +1593,7 @@ do_last: /* * It already exists. */ - up(&dir->d_inode->i_sem); + mutex_unlock(&dir->d_inode->i_mutex); error = -EEXIST; if (flag & O_EXCL) @@ -1665,7 +1665,7 @@ do_link: goto exit; } dir = nd->dentry; - down(&dir->d_inode->i_sem); + mutex_lock(&dir->d_inode->i_mutex); path.dentry = lookup_hash(nd); path.mnt = nd->mnt; __putname(nd->last.name); @@ -1680,13 +1680,13 @@ do_link: * Simple function to lookup and return a dentry and create it * if it doesn't exist. Is SMP-safe. * - * Returns with nd->dentry->d_inode->i_sem locked. + * Returns with nd->dentry->d_inode->i_mutex locked. */ struct dentry *lookup_create(struct nameidata *nd, int is_dir) { struct dentry *dentry = ERR_PTR(-EEXIST); - down(&nd->dentry->d_inode->i_sem); + mutex_lock(&nd->dentry->d_inode->i_mutex); /* * Yucky last component or no last component at all? * (foo/., foo/.., /////) @@ -1784,7 +1784,7 @@ asmlinkage long sys_mknod(const char __user * filename, int mode, unsigned dev) } dput(dentry); } - up(&nd.dentry->d_inode->i_sem); + mutex_unlock(&nd.dentry->d_inode->i_mutex); path_release(&nd); out: putname(tmp); @@ -1836,7 +1836,7 @@ asmlinkage long sys_mkdir(const char __user * pathname, int mode) error = vfs_mkdir(nd.dentry->d_inode, dentry, mode); dput(dentry); } - up(&nd.dentry->d_inode->i_sem); + mutex_unlock(&nd.dentry->d_inode->i_mutex); path_release(&nd); out: putname(tmp); @@ -1885,7 +1885,7 @@ int vfs_rmdir(struct inode *dir, struct dentry *dentry) DQUOT_INIT(dir); - down(&dentry->d_inode->i_sem); + mutex_lock(&dentry->d_inode->i_mutex); dentry_unhash(dentry); if (d_mountpoint(dentry)) error = -EBUSY; @@ -1897,7 +1897,7 @@ int vfs_rmdir(struct inode *dir, struct dentry *dentry) dentry->d_inode->i_flags |= S_DEAD; } } - up(&dentry->d_inode->i_sem); + mutex_unlock(&dentry->d_inode->i_mutex); if (!error) { d_delete(dentry); } @@ -1932,14 +1932,14 @@ asmlinkage long sys_rmdir(const char __user * pathname) error = -EBUSY; goto exit1; } - down(&nd.dentry->d_inode->i_sem); + mutex_lock(&nd.dentry->d_inode->i_mutex); dentry = lookup_hash(&nd); error = PTR_ERR(dentry); if (!IS_ERR(dentry)) { error = vfs_rmdir(nd.dentry->d_inode, dentry); dput(dentry); } - up(&nd.dentry->d_inode->i_sem); + mutex_unlock(&nd.dentry->d_inode->i_mutex); exit1: path_release(&nd); exit: @@ -1959,7 +1959,7 @@ int vfs_unlink(struct inode *dir, struct dentry *dentry) DQUOT_INIT(dir); - down(&dentry->d_inode->i_sem); + mutex_lock(&dentry->d_inode->i_mutex); if (d_mountpoint(dentry)) error = -EBUSY; else { @@ -1967,7 +1967,7 @@ int vfs_unlink(struct inode *dir, struct dentry *dentry) if (!error) error = dir->i_op->unlink(dir, dentry); } - up(&dentry->d_inode->i_sem); + mutex_unlock(&dentry->d_inode->i_mutex); /* We don't d_delete() NFS sillyrenamed files--they still exist. */ if (!error && !(dentry->d_flags & DCACHE_NFSFS_RENAMED)) { @@ -1979,7 +1979,7 @@ int vfs_unlink(struct inode *dir, struct dentry *dentry) /* * Make sure that the actual truncation of the file will occur outside its - * directory's i_sem. Truncate can take a long time if there is a lot of + * directory's i_mutex. Truncate can take a long time if there is a lot of * writeout happening, and we don't want to prevent access to the directory * while waiting on the I/O. */ @@ -2001,7 +2001,7 @@ asmlinkage long sys_unlink(const char __user * pathname) error = -EISDIR; if (nd.last_type != LAST_NORM) goto exit1; - down(&nd.dentry->d_inode->i_sem); + mutex_lock(&nd.dentry->d_inode->i_mutex); dentry = lookup_hash(&nd); error = PTR_ERR(dentry); if (!IS_ERR(dentry)) { @@ -2015,7 +2015,7 @@ asmlinkage long sys_unlink(const char __user * pathname) exit2: dput(dentry); } - up(&nd.dentry->d_inode->i_sem); + mutex_unlock(&nd.dentry->d_inode->i_mutex); if (inode) iput(inode); /* truncate the inode here */ exit1: @@ -2075,7 +2075,7 @@ asmlinkage long sys_symlink(const char __user * oldname, const char __user * new error = vfs_symlink(nd.dentry->d_inode, dentry, from, S_IALLUGO); dput(dentry); } - up(&nd.dentry->d_inode->i_sem); + mutex_unlock(&nd.dentry->d_inode->i_mutex); path_release(&nd); out: putname(to); @@ -2113,10 +2113,10 @@ int vfs_link(struct dentry *old_dentry, struct inode *dir, struct dentry *new_de if (error) return error; - down(&old_dentry->d_inode->i_sem); + mutex_lock(&old_dentry->d_inode->i_mutex); DQUOT_INIT(dir); error = dir->i_op->link(old_dentry, dir, new_dentry); - up(&old_dentry->d_inode->i_sem); + mutex_unlock(&old_dentry->d_inode->i_mutex); if (!error) fsnotify_create(dir, new_dentry->d_name.name); return error; @@ -2157,7 +2157,7 @@ asmlinkage long sys_link(const char __user * oldname, const char __user * newnam error = vfs_link(old_nd.dentry, nd.dentry->d_inode, new_dentry); dput(new_dentry); } - up(&nd.dentry->d_inode->i_sem); + mutex_unlock(&nd.dentry->d_inode->i_mutex); out_release: path_release(&nd); out: @@ -2178,7 +2178,7 @@ exit: * sb->s_vfs_rename_sem. We might be more accurate, but that's another * story. * c) we have to lock _three_ objects - parents and victim (if it exists). - * And that - after we got ->i_sem on parents (until then we don't know + * And that - after we got ->i_mutex on parents (until then we don't know * whether the target exists). Solution: try to be smart with locking * order for inodes. We rely on the fact that tree topology may change * only under ->s_vfs_rename_sem _and_ that parent of the object we @@ -2195,9 +2195,9 @@ exit: * stuff into VFS), but the former is not going away. Solution: the same * trick as in rmdir(). * e) conversion from fhandle to dentry may come in the wrong moment - when - * we are removing the target. Solution: we will have to grab ->i_sem + * we are removing the target. Solution: we will have to grab ->i_mutex * in the fhandle_to_dentry code. [FIXME - current nfsfh.c relies on - * ->i_sem on parents, which works but leads to some truely excessive + * ->i_mutex on parents, which works but leads to some truely excessive * locking]. */ static int vfs_rename_dir(struct inode *old_dir, struct dentry *old_dentry, @@ -2222,7 +2222,7 @@ static int vfs_rename_dir(struct inode *old_dir, struct dentry *old_dentry, target = new_dentry->d_inode; if (target) { - down(&target->i_sem); + mutex_lock(&target->i_mutex); dentry_unhash(new_dentry); } if (d_mountpoint(old_dentry)||d_mountpoint(new_dentry)) @@ -2232,7 +2232,7 @@ static int vfs_rename_dir(struct inode *old_dir, struct dentry *old_dentry, if (target) { if (!error) target->i_flags |= S_DEAD; - up(&target->i_sem); + mutex_unlock(&target->i_mutex); if (d_unhashed(new_dentry)) d_rehash(new_dentry); dput(new_dentry); @@ -2255,7 +2255,7 @@ static int vfs_rename_other(struct inode *old_dir, struct dentry *old_dentry, dget(new_dentry); target = new_dentry->d_inode; if (target) - down(&target->i_sem); + mutex_lock(&target->i_mutex); if (d_mountpoint(old_dentry)||d_mountpoint(new_dentry)) error = -EBUSY; else @@ -2266,7 +2266,7 @@ static int vfs_rename_other(struct inode *old_dir, struct dentry *old_dentry, d_move(old_dentry, new_dentry); } if (target) - up(&target->i_sem); + mutex_unlock(&target->i_mutex); dput(new_dentry); return error; } diff --git a/fs/namespace.c b/fs/namespace.c index 3e8fb61ad597..f0e353f5bc30 100644 --- a/fs/namespace.c +++ b/fs/namespace.c @@ -814,7 +814,7 @@ static int graft_tree(struct vfsmount *mnt, struct nameidata *nd) return -ENOTDIR; err = -ENOENT; - down(&nd->dentry->d_inode->i_sem); + mutex_lock(&nd->dentry->d_inode->i_mutex); if (IS_DEADDIR(nd->dentry->d_inode)) goto out_unlock; @@ -826,7 +826,7 @@ static int graft_tree(struct vfsmount *mnt, struct nameidata *nd) if (IS_ROOT(nd->dentry) || !d_unhashed(nd->dentry)) err = attach_recursive_mnt(mnt, nd, NULL); out_unlock: - up(&nd->dentry->d_inode->i_sem); + mutex_unlock(&nd->dentry->d_inode->i_mutex); if (!err) security_sb_post_addmount(mnt, nd); return err; @@ -962,7 +962,7 @@ static int do_move_mount(struct nameidata *nd, char *old_name) goto out; err = -ENOENT; - down(&nd->dentry->d_inode->i_sem); + mutex_lock(&nd->dentry->d_inode->i_mutex); if (IS_DEADDIR(nd->dentry->d_inode)) goto out1; @@ -1004,7 +1004,7 @@ static int do_move_mount(struct nameidata *nd, char *old_name) list_del_init(&old_nd.mnt->mnt_expire); spin_unlock(&vfsmount_lock); out1: - up(&nd->dentry->d_inode->i_sem); + mutex_unlock(&nd->dentry->d_inode->i_mutex); out: up_write(&namespace_sem); if (!err) @@ -1573,7 +1573,7 @@ asmlinkage long sys_pivot_root(const char __user * new_root, user_nd.dentry = dget(current->fs->root); read_unlock(¤t->fs->lock); down_write(&namespace_sem); - down(&old_nd.dentry->d_inode->i_sem); + mutex_lock(&old_nd.dentry->d_inode->i_mutex); error = -EINVAL; if (IS_MNT_SHARED(old_nd.mnt) || IS_MNT_SHARED(new_nd.mnt->mnt_parent) || @@ -1626,7 +1626,7 @@ asmlinkage long sys_pivot_root(const char __user * new_root, path_release(&root_parent); path_release(&parent_nd); out2: - up(&old_nd.dentry->d_inode->i_sem); + mutex_unlock(&old_nd.dentry->d_inode->i_mutex); up_write(&namespace_sem); path_release(&user_nd); path_release(&old_nd); diff --git a/fs/nfs/dir.c b/fs/nfs/dir.c index e9255198f767..a1554bead692 100644 --- a/fs/nfs/dir.c +++ b/fs/nfs/dir.c @@ -194,7 +194,7 @@ int nfs_readdir_filler(nfs_readdir_descriptor_t *desc, struct page *page) spin_unlock(&inode->i_lock); /* Ensure consistent page alignment of the data. * Note: assumes we have exclusive access to this mapping either - * through inode->i_sem or some other mechanism. + * through inode->i_mutex or some other mechanism. */ if (page->index == 0) invalidate_inode_pages2_range(inode->i_mapping, PAGE_CACHE_SIZE, -1); @@ -573,7 +573,7 @@ static int nfs_readdir(struct file *filp, void *dirent, filldir_t filldir) loff_t nfs_llseek_dir(struct file *filp, loff_t offset, int origin) { - down(&filp->f_dentry->d_inode->i_sem); + mutex_lock(&filp->f_dentry->d_inode->i_mutex); switch (origin) { case 1: offset += filp->f_pos; @@ -589,7 +589,7 @@ loff_t nfs_llseek_dir(struct file *filp, loff_t offset, int origin) ((struct nfs_open_context *)filp->private_data)->dir_cookie = 0; } out: - up(&filp->f_dentry->d_inode->i_sem); + mutex_unlock(&filp->f_dentry->d_inode->i_mutex); return offset; } @@ -1001,7 +1001,7 @@ static int nfs_open_revalidate(struct dentry *dentry, struct nameidata *nd) openflags &= ~(O_CREAT|O_TRUNC); /* - * Note: we're not holding inode->i_sem and so may be racing with + * Note: we're not holding inode->i_mutex and so may be racing with * operations that change the directory. We therefore save the * change attribute *before* we do the RPC call. */ @@ -1051,7 +1051,7 @@ static struct dentry *nfs_readdir_lookup(nfs_readdir_descriptor_t *desc) return dentry; if (!desc->plus || !(entry->fattr->valid & NFS_ATTR_FATTR)) return NULL; - /* Note: caller is already holding the dir->i_sem! */ + /* Note: caller is already holding the dir->i_mutex! */ dentry = d_alloc(parent, &name); if (dentry == NULL) return NULL; diff --git a/fs/nfsd/nfs4recover.c b/fs/nfsd/nfs4recover.c index 954cf893d50c..be963a133aaa 100644 --- a/fs/nfsd/nfs4recover.c +++ b/fs/nfsd/nfs4recover.c @@ -121,9 +121,9 @@ out: static void nfsd4_sync_rec_dir(void) { - down(&rec_dir.dentry->d_inode->i_sem); + mutex_lock(&rec_dir.dentry->d_inode->i_mutex); nfsd_sync_dir(rec_dir.dentry); - up(&rec_dir.dentry->d_inode->i_sem); + mutex_unlock(&rec_dir.dentry->d_inode->i_mutex); } int @@ -143,7 +143,7 @@ nfsd4_create_clid_dir(struct nfs4_client *clp) nfs4_save_user(&uid, &gid); /* lock the parent */ - down(&rec_dir.dentry->d_inode->i_sem); + mutex_lock(&rec_dir.dentry->d_inode->i_mutex); dentry = lookup_one_len(dname, rec_dir.dentry, HEXDIR_LEN-1); if (IS_ERR(dentry)) { @@ -159,7 +159,7 @@ nfsd4_create_clid_dir(struct nfs4_client *clp) out_put: dput(dentry); out_unlock: - up(&rec_dir.dentry->d_inode->i_sem); + mutex_unlock(&rec_dir.dentry->d_inode->i_mutex); if (status == 0) { clp->cl_firststate = 1; nfsd4_sync_rec_dir(); @@ -259,9 +259,9 @@ nfsd4_remove_clid_file(struct dentry *dir, struct dentry *dentry) printk("nfsd4: non-file found in client recovery directory\n"); return -EINVAL; } - down(&dir->d_inode->i_sem); + mutex_lock(&dir->d_inode->i_mutex); status = vfs_unlink(dir->d_inode, dentry); - up(&dir->d_inode->i_sem); + mutex_unlock(&dir->d_inode->i_mutex); return status; } @@ -274,9 +274,9 @@ nfsd4_clear_clid_dir(struct dentry *dir, struct dentry *dentry) * any regular files anyway, just in case the directory was created by * a kernel from the future.... */ nfsd4_list_rec_dir(dentry, nfsd4_remove_clid_file); - down(&dir->d_inode->i_sem); + mutex_lock(&dir->d_inode->i_mutex); status = vfs_rmdir(dir->d_inode, dentry); - up(&dir->d_inode->i_sem); + mutex_unlock(&dir->d_inode->i_mutex); return status; } @@ -288,9 +288,9 @@ nfsd4_unlink_clid_dir(char *name, int namlen) dprintk("NFSD: nfsd4_unlink_clid_dir. name %.*s\n", namlen, name); - down(&rec_dir.dentry->d_inode->i_sem); + mutex_lock(&rec_dir.dentry->d_inode->i_mutex); dentry = lookup_one_len(name, rec_dir.dentry, namlen); - up(&rec_dir.dentry->d_inode->i_sem); + mutex_unlock(&rec_dir.dentry->d_inode->i_mutex); if (IS_ERR(dentry)) { status = PTR_ERR(dentry); return status; diff --git a/fs/nfsd/vfs.c b/fs/nfsd/vfs.c index df4019f04560..bb36b4304491 100644 --- a/fs/nfsd/vfs.c +++ b/fs/nfsd/vfs.c @@ -390,12 +390,12 @@ set_nfsv4_acl_one(struct dentry *dentry, struct posix_acl *pacl, char *key) error = -EOPNOTSUPP; if (inode->i_op && inode->i_op->setxattr) { - down(&inode->i_sem); + mutex_lock(&inode->i_mutex); security_inode_setxattr(dentry, key, buf, len, 0); error = inode->i_op->setxattr(dentry, key, buf, len, 0); if (!error) security_inode_post_setxattr(dentry, key, buf, len, 0); - up(&inode->i_sem); + mutex_unlock(&inode->i_mutex); } out: kfree(buf); @@ -739,9 +739,9 @@ nfsd_sync(struct file *filp) int err; struct inode *inode = filp->f_dentry->d_inode; dprintk("nfsd: sync file %s\n", filp->f_dentry->d_name.name); - down(&inode->i_sem); + mutex_lock(&inode->i_mutex); err=nfsd_dosync(filp, filp->f_dentry, filp->f_op); - up(&inode->i_sem); + mutex_unlock(&inode->i_mutex); return err; } @@ -885,9 +885,9 @@ static void kill_suid(struct dentry *dentry) struct iattr ia; ia.ia_valid = ATTR_KILL_SUID | ATTR_KILL_SGID; - down(&dentry->d_inode->i_sem); + mutex_lock(&dentry->d_inode->i_mutex); notify_change(dentry, &ia); - up(&dentry->d_inode->i_sem); + mutex_unlock(&dentry->d_inode->i_mutex); } static inline int diff --git a/fs/ntfs/attrib.c b/fs/ntfs/attrib.c index eda056bac256..9480a0526cd3 100644 --- a/fs/ntfs/attrib.c +++ b/fs/ntfs/attrib.c @@ -1532,7 +1532,7 @@ int ntfs_resident_attr_value_resize(MFT_RECORD *m, ATTR_RECORD *a, * NOTE to self: No changes in the attribute list are required to move from * a resident to a non-resident attribute. * - * Locking: - The caller must hold i_sem on the inode. + * Locking: - The caller must hold i_mutex on the inode. */ int ntfs_attr_make_non_resident(ntfs_inode *ni, const u32 data_size) { @@ -1728,7 +1728,7 @@ int ntfs_attr_make_non_resident(ntfs_inode *ni, const u32 data_size) /* * This needs to be last since the address space operations ->readpage * and ->writepage can run concurrently with us as they are not - * serialized on i_sem. Note, we are not allowed to fail once we flip + * serialized on i_mutex. Note, we are not allowed to fail once we flip * this switch, which is another reason to do this last. */ NInoSetNonResident(ni); diff --git a/fs/ntfs/dir.c b/fs/ntfs/dir.c index 795c3d1930f5..b0690d4c8906 100644 --- a/fs/ntfs/dir.c +++ b/fs/ntfs/dir.c @@ -69,7 +69,7 @@ ntfschar I30[5] = { const_cpu_to_le16('$'), const_cpu_to_le16('I'), * work but we don't care for how quickly one can access them. This also fixes * the dcache aliasing issues. * - * Locking: - Caller must hold i_sem on the directory. + * Locking: - Caller must hold i_mutex on the directory. * - Each page cache page in the index allocation mapping must be * locked whilst being accessed otherwise we may find a corrupt * page due to it being under ->writepage at the moment which @@ -1085,11 +1085,11 @@ static inline int ntfs_filldir(ntfs_volume *vol, loff_t fpos, * While this will return the names in random order this doesn't matter for * ->readdir but OTOH results in a faster ->readdir. * - * VFS calls ->readdir without BKL but with i_sem held. This protects the VFS + * VFS calls ->readdir without BKL but with i_mutex held. This protects the VFS * parts (e.g. ->f_pos and ->i_size, and it also protects against directory * modifications). * - * Locking: - Caller must hold i_sem on the directory. + * Locking: - Caller must hold i_mutex on the directory. * - Each page cache page in the index allocation mapping must be * locked whilst being accessed otherwise we may find a corrupt * page due to it being under ->writepage at the moment which @@ -1520,7 +1520,7 @@ static int ntfs_dir_open(struct inode *vi, struct file *filp) * Note: In the past @filp could be NULL so we ignore it as we don't need it * anyway. * - * Locking: Caller must hold i_sem on the inode. + * Locking: Caller must hold i_mutex on the inode. * * TODO: We should probably also write all attribute/index inodes associated * with this inode but since we have no simple way of getting to them we ignore diff --git a/fs/ntfs/file.c b/fs/ntfs/file.c index 727533891813..30f71acdc1cb 100644 --- a/fs/ntfs/file.c +++ b/fs/ntfs/file.c @@ -106,7 +106,7 @@ static int ntfs_file_open(struct inode *vi, struct file *filp) * this is the case, the necessary zeroing will also have happened and that all * metadata is self-consistent. * - * Locking: i_sem on the vfs inode corrseponsind to the ntfs inode @ni must be + * Locking: i_mutex on the vfs inode corrseponsind to the ntfs inode @ni must be * held by the caller. */ static int ntfs_attr_extend_initialized(ntfs_inode *ni, const s64 new_init_size, @@ -473,7 +473,7 @@ static inline int ntfs_submit_bh_for_read(struct buffer_head *bh) * @bytes: number of bytes to be written * * This is called for non-resident attributes from ntfs_file_buffered_write() - * with i_sem held on the inode (@pages[0]->mapping->host). There are + * with i_mutex held on the inode (@pages[0]->mapping->host). There are * @nr_pages pages in @pages which are locked but not kmap()ped. The source * data has not yet been copied into the @pages. * @@ -1637,7 +1637,7 @@ err_out: * @pos: byte position in file at which the write begins * @bytes: number of bytes to be written * - * This is called from ntfs_file_buffered_write() with i_sem held on the inode + * This is called from ntfs_file_buffered_write() with i_mutex held on the inode * (@pages[0]->mapping->host). There are @nr_pages pages in @pages which are * locked but not kmap()ped. The source data has already been copied into the * @page. ntfs_prepare_pages_for_non_resident_write() has been called before @@ -1814,7 +1814,7 @@ err_out: /** * ntfs_file_buffered_write - * - * Locking: The vfs is holding ->i_sem on the inode. + * Locking: The vfs is holding ->i_mutex on the inode. */ static ssize_t ntfs_file_buffered_write(struct kiocb *iocb, const struct iovec *iov, unsigned long nr_segs, @@ -2196,9 +2196,9 @@ static ssize_t ntfs_file_aio_write(struct kiocb *iocb, const char __user *buf, BUG_ON(iocb->ki_pos != pos); - down(&inode->i_sem); + mutex_lock(&inode->i_mutex); ret = ntfs_file_aio_write_nolock(iocb, &local_iov, 1, &iocb->ki_pos); - up(&inode->i_sem); + mutex_unlock(&inode->i_mutex); if (ret > 0 && ((file->f_flags & O_SYNC) || IS_SYNC(inode))) { int err = sync_page_range(inode, mapping, pos, ret); if (err < 0) @@ -2221,12 +2221,12 @@ static ssize_t ntfs_file_writev(struct file *file, const struct iovec *iov, struct kiocb kiocb; ssize_t ret; - down(&inode->i_sem); + mutex_lock(&inode->i_mutex); init_sync_kiocb(&kiocb, file); ret = ntfs_file_aio_write_nolock(&kiocb, iov, nr_segs, ppos); if (ret == -EIOCBQUEUED) ret = wait_on_sync_kiocb(&kiocb); - up(&inode->i_sem); + mutex_unlock(&inode->i_mutex); if (ret > 0 && ((file->f_flags & O_SYNC) || IS_SYNC(inode))) { int err = sync_page_range(inode, mapping, *ppos - ret, ret); if (err < 0) @@ -2269,7 +2269,7 @@ static ssize_t ntfs_file_write(struct file *file, const char __user *buf, * Note: In the past @filp could be NULL so we ignore it as we don't need it * anyway. * - * Locking: Caller must hold i_sem on the inode. + * Locking: Caller must hold i_mutex on the inode. * * TODO: We should probably also write all attribute/index inodes associated * with this inode but since we have no simple way of getting to them we ignore diff --git a/fs/ntfs/index.c b/fs/ntfs/index.c index 8f2d5727546f..9f5427c2d105 100644 --- a/fs/ntfs/index.c +++ b/fs/ntfs/index.c @@ -32,7 +32,7 @@ * Allocate a new index context, initialize it with @idx_ni and return it. * Return NULL if allocation failed. * - * Locking: Caller must hold i_sem on the index inode. + * Locking: Caller must hold i_mutex on the index inode. */ ntfs_index_context *ntfs_index_ctx_get(ntfs_inode *idx_ni) { @@ -50,7 +50,7 @@ ntfs_index_context *ntfs_index_ctx_get(ntfs_inode *idx_ni) * * Release the index context @ictx, releasing all associated resources. * - * Locking: Caller must hold i_sem on the index inode. + * Locking: Caller must hold i_mutex on the index inode. */ void ntfs_index_ctx_put(ntfs_index_context *ictx) { @@ -106,7 +106,7 @@ void ntfs_index_ctx_put(ntfs_index_context *ictx) * or ntfs_index_entry_write() before the call to ntfs_index_ctx_put() to * ensure that the changes are written to disk. * - * Locking: - Caller must hold i_sem on the index inode. + * Locking: - Caller must hold i_mutex on the index inode. * - Each page cache page in the index allocation mapping must be * locked whilst being accessed otherwise we may find a corrupt * page due to it being under ->writepage at the moment which diff --git a/fs/ntfs/inode.c b/fs/ntfs/inode.c index b24f4c4b2c5c..bda7a08911a5 100644 --- a/fs/ntfs/inode.c +++ b/fs/ntfs/inode.c @@ -2125,13 +2125,13 @@ void ntfs_put_inode(struct inode *vi) ntfs_inode *ni = NTFS_I(vi); if (NInoIndexAllocPresent(ni)) { struct inode *bvi = NULL; - down(&vi->i_sem); + mutex_lock(&vi->i_mutex); if (atomic_read(&vi->i_count) == 2) { bvi = ni->itype.index.bmp_ino; if (bvi) ni->itype.index.bmp_ino = NULL; } - up(&vi->i_sem); + mutex_unlock(&vi->i_mutex); if (bvi) iput(bvi); } @@ -2311,7 +2311,7 @@ static const char *es = " Leaving inconsistent metadata. Unmount and run " * * Returns 0 on success or -errno on error. * - * Called with ->i_sem held. In all but one case ->i_alloc_sem is held for + * Called with ->i_mutex held. In all but one case ->i_alloc_sem is held for * writing. The only case in the kernel where ->i_alloc_sem is not held is * mm/filemap.c::generic_file_buffered_write() where vmtruncate() is called * with the current i_size as the offset. The analogous place in NTFS is in @@ -2831,7 +2831,7 @@ void ntfs_truncate_vfs(struct inode *vi) { * We also abort all changes of user, group, and mode as we do not implement * the NTFS ACLs yet. * - * Called with ->i_sem held. For the ATTR_SIZE (i.e. ->truncate) case, also + * Called with ->i_mutex held. For the ATTR_SIZE (i.e. ->truncate) case, also * called with ->i_alloc_sem held for writing. * * Basically this is a copy of generic notify_change() and inode_setattr() diff --git a/fs/ntfs/namei.c b/fs/ntfs/namei.c index 351dbc3b6e40..5ea9eb93af62 100644 --- a/fs/ntfs/namei.c +++ b/fs/ntfs/namei.c @@ -96,7 +96,7 @@ * name. We then convert the name to the current NLS code page, and proceed * searching for a dentry with this name, etc, as in case 2), above. * - * Locking: Caller must hold i_sem on the directory. + * Locking: Caller must hold i_mutex on the directory. */ static struct dentry *ntfs_lookup(struct inode *dir_ino, struct dentry *dent, struct nameidata *nd) @@ -254,7 +254,7 @@ handle_name: nls_name.hash = full_name_hash(nls_name.name, nls_name.len); /* - * Note: No need for dent->d_lock lock as i_sem is held on the + * Note: No need for dent->d_lock lock as i_mutex is held on the * parent inode. */ @@ -374,7 +374,7 @@ struct inode_operations ntfs_dir_inode_ops = { * The code is based on the ext3 ->get_parent() implementation found in * fs/ext3/namei.c::ext3_get_parent(). * - * Note: ntfs_get_parent() is called with @child_dent->d_inode->i_sem down. + * Note: ntfs_get_parent() is called with @child_dent->d_inode->i_mutex down. * * Return the dentry of the parent directory on success or the error code on * error (IS_ERR() is true). diff --git a/fs/ntfs/quota.c b/fs/ntfs/quota.c index 833df2a4e9fb..d0ef4182147b 100644 --- a/fs/ntfs/quota.c +++ b/fs/ntfs/quota.c @@ -48,7 +48,7 @@ BOOL ntfs_mark_quotas_out_of_date(ntfs_volume *vol) ntfs_error(vol->sb, "Quota inodes are not open."); return FALSE; } - down(&vol->quota_q_ino->i_sem); + mutex_lock(&vol->quota_q_ino->i_mutex); ictx = ntfs_index_ctx_get(NTFS_I(vol->quota_q_ino)); if (!ictx) { ntfs_error(vol->sb, "Failed to get index context."); @@ -98,7 +98,7 @@ BOOL ntfs_mark_quotas_out_of_date(ntfs_volume *vol) ntfs_index_entry_mark_dirty(ictx); set_done: ntfs_index_ctx_put(ictx); - up(&vol->quota_q_ino->i_sem); + mutex_unlock(&vol->quota_q_ino->i_mutex); /* * We set the flag so we do not try to mark the quotas out of date * again on remount. @@ -110,7 +110,7 @@ done: err_out: if (ictx) ntfs_index_ctx_put(ictx); - up(&vol->quota_q_ino->i_sem); + mutex_unlock(&vol->quota_q_ino->i_mutex); return FALSE; } diff --git a/fs/ntfs/super.c b/fs/ntfs/super.c index 6c16db9e1a8a..280e383fc84e 100644 --- a/fs/ntfs/super.c +++ b/fs/ntfs/super.c @@ -1213,10 +1213,10 @@ static int check_windows_hibernation_status(ntfs_volume *vol) * Find the inode number for the hibernation file by looking up the * filename hiberfil.sys in the root directory. */ - down(&vol->root_ino->i_sem); + mutex_lock(&vol->root_ino->i_mutex); mref = ntfs_lookup_inode_by_name(NTFS_I(vol->root_ino), hiberfil, 12, &name); - up(&vol->root_ino->i_sem); + mutex_unlock(&vol->root_ino->i_mutex); if (IS_ERR_MREF(mref)) { ret = MREF_ERR(mref); /* If the file does not exist, Windows is not hibernated. */ @@ -1307,10 +1307,10 @@ static BOOL load_and_init_quota(ntfs_volume *vol) * Find the inode number for the quota file by looking up the filename * $Quota in the extended system files directory $Extend. */ - down(&vol->extend_ino->i_sem); + mutex_lock(&vol->extend_ino->i_mutex); mref = ntfs_lookup_inode_by_name(NTFS_I(vol->extend_ino), Quota, 6, &name); - up(&vol->extend_ino->i_sem); + mutex_unlock(&vol->extend_ino->i_mutex); if (IS_ERR_MREF(mref)) { /* * If the file does not exist, quotas are disabled and have @@ -1390,10 +1390,10 @@ static BOOL load_and_init_usnjrnl(ntfs_volume *vol) * Find the inode number for the transaction log file by looking up the * filename $UsnJrnl in the extended system files directory $Extend. */ - down(&vol->extend_ino->i_sem); + mutex_lock(&vol->extend_ino->i_mutex); mref = ntfs_lookup_inode_by_name(NTFS_I(vol->extend_ino), UsnJrnl, 8, &name); - up(&vol->extend_ino->i_sem); + mutex_unlock(&vol->extend_ino->i_mutex); if (IS_ERR_MREF(mref)) { /* * If the file does not exist, transaction logging is disabled, @@ -2312,9 +2312,9 @@ static void ntfs_put_super(struct super_block *sb) if (!list_empty(&sb->s_dirty)) { const char *s1, *s2; - down(&vol->mft_ino->i_sem); + mutex_lock(&vol->mft_ino->i_mutex); truncate_inode_pages(vol->mft_ino->i_mapping, 0); - up(&vol->mft_ino->i_sem); + mutex_unlock(&vol->mft_ino->i_mutex); write_inode_now(vol->mft_ino, 1); if (!list_empty(&sb->s_dirty)) { static const char *_s1 = "inodes"; diff --git a/fs/ocfs2/alloc.c b/fs/ocfs2/alloc.c index 465f797451ee..6b9812db3779 100644 --- a/fs/ocfs2/alloc.c +++ b/fs/ocfs2/alloc.c @@ -966,7 +966,7 @@ static int ocfs2_truncate_log_append(struct ocfs2_super *osb, mlog_entry("start_blk = %"MLFu64", num_clusters = %u\n", start_blk, num_clusters); - BUG_ON(!down_trylock(&tl_inode->i_sem)); + BUG_ON(mutex_trylock(&tl_inode->i_mutex)); start_cluster = ocfs2_blocks_to_clusters(osb->sb, start_blk); @@ -1108,7 +1108,7 @@ bail: return status; } -/* Expects you to already be holding tl_inode->i_sem */ +/* Expects you to already be holding tl_inode->i_mutex */ static int __ocfs2_flush_truncate_log(struct ocfs2_super *osb) { int status; @@ -1123,7 +1123,7 @@ static int __ocfs2_flush_truncate_log(struct ocfs2_super *osb) mlog_entry_void(); - BUG_ON(!down_trylock(&tl_inode->i_sem)); + BUG_ON(mutex_trylock(&tl_inode->i_mutex)); di = (struct ocfs2_dinode *) tl_bh->b_data; tl = &di->id2.i_dealloc; @@ -1198,9 +1198,9 @@ int ocfs2_flush_truncate_log(struct ocfs2_super *osb) int status; struct inode *tl_inode = osb->osb_tl_inode; - down(&tl_inode->i_sem); + mutex_lock(&tl_inode->i_mutex); status = __ocfs2_flush_truncate_log(osb); - up(&tl_inode->i_sem); + mutex_unlock(&tl_inode->i_mutex); return status; } @@ -1363,7 +1363,7 @@ int ocfs2_complete_truncate_log_recovery(struct ocfs2_super *osb, mlog(0, "cleanup %u records from %"MLFu64"\n", num_recs, tl_copy->i_blkno); - down(&tl_inode->i_sem); + mutex_lock(&tl_inode->i_mutex); for(i = 0; i < num_recs; i++) { if (ocfs2_truncate_log_needs_flush(osb)) { status = __ocfs2_flush_truncate_log(osb); @@ -1395,7 +1395,7 @@ int ocfs2_complete_truncate_log_recovery(struct ocfs2_super *osb, } bail_up: - up(&tl_inode->i_sem); + mutex_unlock(&tl_inode->i_mutex); mlog_exit(status); return status; @@ -1840,7 +1840,7 @@ start: mlog(0, "clusters_to_del = %u in this pass\n", clusters_to_del); - down(&tl_inode->i_sem); + mutex_lock(&tl_inode->i_mutex); tl_sem = 1; /* ocfs2_truncate_log_needs_flush guarantees us at least one * record is free for use. If there isn't any, we flush to get @@ -1875,7 +1875,7 @@ start: goto bail; } - up(&tl_inode->i_sem); + mutex_unlock(&tl_inode->i_mutex); tl_sem = 0; ocfs2_commit_trans(handle); @@ -1890,7 +1890,7 @@ bail: ocfs2_schedule_truncate_log_flush(osb, 1); if (tl_sem) - up(&tl_inode->i_sem); + mutex_unlock(&tl_inode->i_mutex); if (handle) ocfs2_commit_trans(handle); @@ -1994,7 +1994,7 @@ int ocfs2_prepare_truncate(struct ocfs2_super *osb, goto bail; } - down(&ext_alloc_inode->i_sem); + mutex_lock(&ext_alloc_inode->i_mutex); (*tc)->tc_ext_alloc_inode = ext_alloc_inode; status = ocfs2_meta_lock(ext_alloc_inode, @@ -2026,7 +2026,7 @@ static void ocfs2_free_truncate_context(struct ocfs2_truncate_context *tc) if (tc->tc_ext_alloc_locked) ocfs2_meta_unlock(tc->tc_ext_alloc_inode, 1); - up(&tc->tc_ext_alloc_inode->i_sem); + mutex_unlock(&tc->tc_ext_alloc_inode->i_mutex); iput(tc->tc_ext_alloc_inode); } diff --git a/fs/ocfs2/cluster/nodemanager.c b/fs/ocfs2/cluster/nodemanager.c index 5fd60c105913..cf7828f23361 100644 --- a/fs/ocfs2/cluster/nodemanager.c +++ b/fs/ocfs2/cluster/nodemanager.c @@ -653,7 +653,7 @@ static struct config_group *o2nm_cluster_group_make_group(struct config_group *g struct config_group *o2hb_group = NULL, *ret = NULL; void *defs = NULL; - /* this runs under the parent dir's i_sem; there can be only + /* this runs under the parent dir's i_mutex; there can be only * one caller in here at a time */ if (o2nm_single_cluster) goto out; /* ENOSPC */ diff --git a/fs/ocfs2/dir.c b/fs/ocfs2/dir.c index 856e20ae8263..57158fa75d91 100644 --- a/fs/ocfs2/dir.c +++ b/fs/ocfs2/dir.c @@ -202,7 +202,7 @@ bail: } /* - * NOTE: this should always be called with parent dir i_sem taken. + * NOTE: this should always be called with parent dir i_mutex taken. */ int ocfs2_find_files_on_disk(const char *name, int namelen, @@ -245,7 +245,7 @@ leave: * Return 0 if the name does not exist * Return -EEXIST if the directory contains the name * - * Callers should have i_sem + a cluster lock on dir + * Callers should have i_mutex + a cluster lock on dir */ int ocfs2_check_dir_for_entry(struct inode *dir, const char *name, diff --git a/fs/ocfs2/file.c b/fs/ocfs2/file.c index 72ae9e3306f4..ca5f9f90d794 100644 --- a/fs/ocfs2/file.c +++ b/fs/ocfs2/file.c @@ -492,7 +492,7 @@ restart_all: } /* blocks peope in read/write from reading our allocation - * until we're done changing it. We depend on i_sem to block + * until we're done changing it. We depend on i_mutex to block * other extend/truncate calls while we're here. Ordering wrt * start_trans is important here -- always do it before! */ down_write(&OCFS2_I(inode)->ip_alloc_sem); @@ -958,8 +958,8 @@ static ssize_t ocfs2_file_aio_write(struct kiocb *iocb, filp->f_flags &= ~O_DIRECT; #endif - down(&inode->i_sem); - /* to match setattr's i_sem -> i_alloc_sem -> rw_lock ordering */ + mutex_lock(&inode->i_mutex); + /* to match setattr's i_mutex -> i_alloc_sem -> rw_lock ordering */ if (filp->f_flags & O_DIRECT) { have_alloc_sem = 1; down_read(&inode->i_alloc_sem); @@ -1123,7 +1123,7 @@ out: up_read(&inode->i_alloc_sem); if (rw_level != -1) ocfs2_rw_unlock(inode, rw_level); - up(&inode->i_sem); + mutex_unlock(&inode->i_mutex); mlog_exit(ret); return ret; diff --git a/fs/ocfs2/inode.c b/fs/ocfs2/inode.c index a91ba4dec936..d4ecc0627716 100644 --- a/fs/ocfs2/inode.c +++ b/fs/ocfs2/inode.c @@ -485,10 +485,10 @@ static int ocfs2_remove_inode(struct inode *inode, goto bail; } - down(&inode_alloc_inode->i_sem); + mutex_lock(&inode_alloc_inode->i_mutex); status = ocfs2_meta_lock(inode_alloc_inode, NULL, &inode_alloc_bh, 1); if (status < 0) { - up(&inode_alloc_inode->i_sem); + mutex_unlock(&inode_alloc_inode->i_mutex); mlog_errno(status); goto bail; @@ -536,7 +536,7 @@ bail_commit: ocfs2_commit_trans(handle); bail_unlock: ocfs2_meta_unlock(inode_alloc_inode, 1); - up(&inode_alloc_inode->i_sem); + mutex_unlock(&inode_alloc_inode->i_mutex); brelse(inode_alloc_bh); bail: iput(inode_alloc_inode); @@ -567,10 +567,10 @@ static int ocfs2_wipe_inode(struct inode *inode, /* Lock the orphan dir. The lock will be held for the entire * delete_inode operation. We do this now to avoid races with * recovery completion on other nodes. */ - down(&orphan_dir_inode->i_sem); + mutex_lock(&orphan_dir_inode->i_mutex); status = ocfs2_meta_lock(orphan_dir_inode, NULL, &orphan_dir_bh, 1); if (status < 0) { - up(&orphan_dir_inode->i_sem); + mutex_unlock(&orphan_dir_inode->i_mutex); mlog_errno(status); goto bail; @@ -593,7 +593,7 @@ static int ocfs2_wipe_inode(struct inode *inode, bail_unlock_dir: ocfs2_meta_unlock(orphan_dir_inode, 1); - up(&orphan_dir_inode->i_sem); + mutex_unlock(&orphan_dir_inode->i_mutex); brelse(orphan_dir_bh); bail: iput(orphan_dir_inode); diff --git a/fs/ocfs2/journal.c b/fs/ocfs2/journal.c index 04428042e5e5..303c8d96457f 100644 --- a/fs/ocfs2/journal.c +++ b/fs/ocfs2/journal.c @@ -216,7 +216,7 @@ void ocfs2_handle_add_inode(struct ocfs2_journal_handle *handle, atomic_inc(&inode->i_count); /* we're obviously changing it... */ - down(&inode->i_sem); + mutex_lock(&inode->i_mutex); /* sanity check */ BUG_ON(OCFS2_I(inode)->ip_handle); @@ -241,7 +241,7 @@ static void ocfs2_handle_unlock_inodes(struct ocfs2_journal_handle *handle) OCFS2_I(inode)->ip_handle = NULL; list_del_init(&OCFS2_I(inode)->ip_handle_list); - up(&inode->i_sem); + mutex_unlock(&inode->i_mutex); iput(inode); } } @@ -1433,10 +1433,10 @@ static int ocfs2_recover_orphans(struct ocfs2_super *osb, goto out; } - down(&orphan_dir_inode->i_sem); + mutex_lock(&orphan_dir_inode->i_mutex); status = ocfs2_meta_lock(orphan_dir_inode, NULL, NULL, 0); if (status < 0) { - up(&orphan_dir_inode->i_sem); + mutex_unlock(&orphan_dir_inode->i_mutex); mlog_errno(status); goto out; } @@ -1451,7 +1451,7 @@ static int ocfs2_recover_orphans(struct ocfs2_super *osb, if (!bh) status = -EINVAL; if (status < 0) { - up(&orphan_dir_inode->i_sem); + mutex_unlock(&orphan_dir_inode->i_mutex); if (bh) brelse(bh); mlog_errno(status); @@ -1465,7 +1465,7 @@ static int ocfs2_recover_orphans(struct ocfs2_super *osb, if (!ocfs2_check_dir_entry(orphan_dir_inode, de, bh, local)) { - up(&orphan_dir_inode->i_sem); + mutex_unlock(&orphan_dir_inode->i_mutex); status = -EINVAL; mlog_errno(status); brelse(bh); @@ -1509,7 +1509,7 @@ static int ocfs2_recover_orphans(struct ocfs2_super *osb, } brelse(bh); } - up(&orphan_dir_inode->i_sem); + mutex_unlock(&orphan_dir_inode->i_mutex); ocfs2_meta_unlock(orphan_dir_inode, 0); have_disk_lock = 0; diff --git a/fs/ocfs2/localalloc.c b/fs/ocfs2/localalloc.c index fe373a2101d9..149b35181666 100644 --- a/fs/ocfs2/localalloc.c +++ b/fs/ocfs2/localalloc.c @@ -334,7 +334,7 @@ int ocfs2_begin_local_alloc_recovery(struct ocfs2_super *osb, goto bail; } - down(&inode->i_sem); + mutex_lock(&inode->i_mutex); status = ocfs2_read_block(osb, OCFS2_I(inode)->ip_blkno, &alloc_bh, 0, inode); @@ -367,7 +367,7 @@ bail: brelse(alloc_bh); if (inode) { - up(&inode->i_sem); + mutex_unlock(&inode->i_mutex); iput(inode); } @@ -446,7 +446,7 @@ bail: /* * make sure we've got at least bitswanted contiguous bits in the - * local alloc. You lose them when you drop i_sem. + * local alloc. You lose them when you drop i_mutex. * * We will add ourselves to the transaction passed in, but may start * our own in order to shift windows. diff --git a/fs/open.c b/fs/open.c index 75f3329e8a67..a3b3a9b5c2ff 100644 --- a/fs/open.c +++ b/fs/open.c @@ -211,9 +211,9 @@ int do_truncate(struct dentry *dentry, loff_t length, unsigned int time_attrs, newattrs.ia_valid |= ATTR_FILE; } - down(&dentry->d_inode->i_sem); + mutex_lock(&dentry->d_inode->i_mutex); err = notify_change(dentry, &newattrs); - up(&dentry->d_inode->i_sem); + mutex_unlock(&dentry->d_inode->i_mutex); return err; } @@ -398,9 +398,9 @@ asmlinkage long sys_utime(char __user * filename, struct utimbuf __user * times) (error = vfs_permission(&nd, MAY_WRITE)) != 0) goto dput_and_out; } - down(&inode->i_sem); + mutex_lock(&inode->i_mutex); error = notify_change(nd.dentry, &newattrs); - up(&inode->i_sem); + mutex_unlock(&inode->i_mutex); dput_and_out: path_release(&nd); out: @@ -451,9 +451,9 @@ long do_utimes(char __user * filename, struct timeval * times) (error = vfs_permission(&nd, MAY_WRITE)) != 0) goto dput_and_out; } - down(&inode->i_sem); + mutex_lock(&inode->i_mutex); error = notify_change(nd.dentry, &newattrs); - up(&inode->i_sem); + mutex_unlock(&inode->i_mutex); dput_and_out: path_release(&nd); out: @@ -620,13 +620,13 @@ asmlinkage long sys_fchmod(unsigned int fd, mode_t mode) err = -EPERM; if (IS_IMMUTABLE(inode) || IS_APPEND(inode)) goto out_putf; - down(&inode->i_sem); + mutex_lock(&inode->i_mutex); if (mode == (mode_t) -1) mode = inode->i_mode; newattrs.ia_mode = (mode & S_IALLUGO) | (inode->i_mode & ~S_IALLUGO); newattrs.ia_valid = ATTR_MODE | ATTR_CTIME; err = notify_change(dentry, &newattrs); - up(&inode->i_sem); + mutex_unlock(&inode->i_mutex); out_putf: fput(file); @@ -654,13 +654,13 @@ asmlinkage long sys_chmod(const char __user * filename, mode_t mode) if (IS_IMMUTABLE(inode) || IS_APPEND(inode)) goto dput_and_out; - down(&inode->i_sem); + mutex_lock(&inode->i_mutex); if (mode == (mode_t) -1) mode = inode->i_mode; newattrs.ia_mode = (mode & S_IALLUGO) | (inode->i_mode & ~S_IALLUGO); newattrs.ia_valid = ATTR_MODE | ATTR_CTIME; error = notify_change(nd.dentry, &newattrs); - up(&inode->i_sem); + mutex_unlock(&inode->i_mutex); dput_and_out: path_release(&nd); @@ -696,9 +696,9 @@ static int chown_common(struct dentry * dentry, uid_t user, gid_t group) } if (!S_ISDIR(inode->i_mode)) newattrs.ia_valid |= ATTR_KILL_SUID|ATTR_KILL_SGID; - down(&inode->i_sem); + mutex_lock(&inode->i_mutex); error = notify_change(dentry, &newattrs); - up(&inode->i_sem); + mutex_unlock(&inode->i_mutex); out: return error; } diff --git a/fs/pipe.c b/fs/pipe.c index 66aa0b938d6a..acb030b61fb0 100644 --- a/fs/pipe.c +++ b/fs/pipe.c @@ -44,10 +44,10 @@ void pipe_wait(struct inode * inode) * is considered a noninteractive wait: */ prepare_to_wait(PIPE_WAIT(*inode), &wait, TASK_INTERRUPTIBLE|TASK_NONINTERACTIVE); - up(PIPE_SEM(*inode)); + mutex_unlock(PIPE_MUTEX(*inode)); schedule(); finish_wait(PIPE_WAIT(*inode), &wait); - down(PIPE_SEM(*inode)); + mutex_lock(PIPE_MUTEX(*inode)); } static inline int @@ -136,7 +136,7 @@ pipe_readv(struct file *filp, const struct iovec *_iov, do_wakeup = 0; ret = 0; - down(PIPE_SEM(*inode)); + mutex_lock(PIPE_MUTEX(*inode)); info = inode->i_pipe; for (;;) { int bufs = info->nrbufs; @@ -200,7 +200,7 @@ pipe_readv(struct file *filp, const struct iovec *_iov, } pipe_wait(inode); } - up(PIPE_SEM(*inode)); + mutex_unlock(PIPE_MUTEX(*inode)); /* Signal writers asynchronously that there is more room. */ if (do_wakeup) { wake_up_interruptible(PIPE_WAIT(*inode)); @@ -237,7 +237,7 @@ pipe_writev(struct file *filp, const struct iovec *_iov, do_wakeup = 0; ret = 0; - down(PIPE_SEM(*inode)); + mutex_lock(PIPE_MUTEX(*inode)); info = inode->i_pipe; if (!PIPE_READERS(*inode)) { @@ -341,7 +341,7 @@ pipe_writev(struct file *filp, const struct iovec *_iov, PIPE_WAITING_WRITERS(*inode)--; } out: - up(PIPE_SEM(*inode)); + mutex_unlock(PIPE_MUTEX(*inode)); if (do_wakeup) { wake_up_interruptible(PIPE_WAIT(*inode)); kill_fasync(PIPE_FASYNC_READERS(*inode), SIGIO, POLL_IN); @@ -381,7 +381,7 @@ pipe_ioctl(struct inode *pino, struct file *filp, switch (cmd) { case FIONREAD: - down(PIPE_SEM(*inode)); + mutex_lock(PIPE_MUTEX(*inode)); info = inode->i_pipe; count = 0; buf = info->curbuf; @@ -390,7 +390,7 @@ pipe_ioctl(struct inode *pino, struct file *filp, count += info->bufs[buf].len; buf = (buf+1) & (PIPE_BUFFERS-1); } - up(PIPE_SEM(*inode)); + mutex_unlock(PIPE_MUTEX(*inode)); return put_user(count, (int __user *)arg); default: return -EINVAL; @@ -433,7 +433,7 @@ pipe_poll(struct file *filp, poll_table *wait) static int pipe_release(struct inode *inode, int decr, int decw) { - down(PIPE_SEM(*inode)); + mutex_lock(PIPE_MUTEX(*inode)); PIPE_READERS(*inode) -= decr; PIPE_WRITERS(*inode) -= decw; if (!PIPE_READERS(*inode) && !PIPE_WRITERS(*inode)) { @@ -443,7 +443,7 @@ pipe_release(struct inode *inode, int decr, int decw) kill_fasync(PIPE_FASYNC_READERS(*inode), SIGIO, POLL_IN); kill_fasync(PIPE_FASYNC_WRITERS(*inode), SIGIO, POLL_OUT); } - up(PIPE_SEM(*inode)); + mutex_unlock(PIPE_MUTEX(*inode)); return 0; } @@ -454,9 +454,9 @@ pipe_read_fasync(int fd, struct file *filp, int on) struct inode *inode = filp->f_dentry->d_inode; int retval; - down(PIPE_SEM(*inode)); + mutex_lock(PIPE_MUTEX(*inode)); retval = fasync_helper(fd, filp, on, PIPE_FASYNC_READERS(*inode)); - up(PIPE_SEM(*inode)); + mutex_unlock(PIPE_MUTEX(*inode)); if (retval < 0) return retval; @@ -471,9 +471,9 @@ pipe_write_fasync(int fd, struct file *filp, int on) struct inode *inode = filp->f_dentry->d_inode; int retval; - down(PIPE_SEM(*inode)); + mutex_lock(PIPE_MUTEX(*inode)); retval = fasync_helper(fd, filp, on, PIPE_FASYNC_WRITERS(*inode)); - up(PIPE_SEM(*inode)); + mutex_unlock(PIPE_MUTEX(*inode)); if (retval < 0) return retval; @@ -488,14 +488,14 @@ pipe_rdwr_fasync(int fd, struct file *filp, int on) struct inode *inode = filp->f_dentry->d_inode; int retval; - down(PIPE_SEM(*inode)); + mutex_lock(PIPE_MUTEX(*inode)); retval = fasync_helper(fd, filp, on, PIPE_FASYNC_READERS(*inode)); if (retval >= 0) retval = fasync_helper(fd, filp, on, PIPE_FASYNC_WRITERS(*inode)); - up(PIPE_SEM(*inode)); + mutex_unlock(PIPE_MUTEX(*inode)); if (retval < 0) return retval; @@ -534,9 +534,9 @@ pipe_read_open(struct inode *inode, struct file *filp) { /* We could have perhaps used atomic_t, but this and friends below are the only places. So it doesn't seem worthwhile. */ - down(PIPE_SEM(*inode)); + mutex_lock(PIPE_MUTEX(*inode)); PIPE_READERS(*inode)++; - up(PIPE_SEM(*inode)); + mutex_unlock(PIPE_MUTEX(*inode)); return 0; } @@ -544,9 +544,9 @@ pipe_read_open(struct inode *inode, struct file *filp) static int pipe_write_open(struct inode *inode, struct file *filp) { - down(PIPE_SEM(*inode)); + mutex_lock(PIPE_MUTEX(*inode)); PIPE_WRITERS(*inode)++; - up(PIPE_SEM(*inode)); + mutex_unlock(PIPE_MUTEX(*inode)); return 0; } @@ -554,12 +554,12 @@ pipe_write_open(struct inode *inode, struct file *filp) static int pipe_rdwr_open(struct inode *inode, struct file *filp) { - down(PIPE_SEM(*inode)); + mutex_lock(PIPE_MUTEX(*inode)); if (filp->f_mode & FMODE_READ) PIPE_READERS(*inode)++; if (filp->f_mode & FMODE_WRITE) PIPE_WRITERS(*inode)++; - up(PIPE_SEM(*inode)); + mutex_unlock(PIPE_MUTEX(*inode)); return 0; } diff --git a/fs/quota.c b/fs/quota.c index 612e04db4b93..d14d872646d4 100644 --- a/fs/quota.c +++ b/fs/quota.c @@ -168,7 +168,7 @@ static void quota_sync_sb(struct super_block *sb, int type) sync_blockdev(sb->s_bdev); /* Now when everything is written we can discard the pagecache so - * that userspace sees the changes. We need i_sem and so we could + * that userspace sees the changes. We need i_mutex and so we could * not do it inside dqonoff_sem. Moreover we need to be carefull * about races with quotaoff() (that is the reason why we have own * reference to inode). */ @@ -184,9 +184,9 @@ static void quota_sync_sb(struct super_block *sb, int type) up(&sb_dqopt(sb)->dqonoff_sem); for (cnt = 0; cnt < MAXQUOTAS; cnt++) { if (discard[cnt]) { - down(&discard[cnt]->i_sem); + mutex_lock(&discard[cnt]->i_mutex); truncate_inode_pages(&discard[cnt]->i_data, 0); - up(&discard[cnt]->i_sem); + mutex_unlock(&discard[cnt]->i_mutex); iput(discard[cnt]); } } diff --git a/fs/read_write.c b/fs/read_write.c index df3468a22fea..3f7a1a62165f 100644 --- a/fs/read_write.c +++ b/fs/read_write.c @@ -33,7 +33,7 @@ loff_t generic_file_llseek(struct file *file, loff_t offset, int origin) long long retval; struct inode *inode = file->f_mapping->host; - down(&inode->i_sem); + mutex_lock(&inode->i_mutex); switch (origin) { case 2: offset += inode->i_size; @@ -49,7 +49,7 @@ loff_t generic_file_llseek(struct file *file, loff_t offset, int origin) } retval = offset; } - up(&inode->i_sem); + mutex_unlock(&inode->i_mutex); return retval; } diff --git a/fs/readdir.c b/fs/readdir.c index b03579bc0210..b6109329b607 100644 --- a/fs/readdir.c +++ b/fs/readdir.c @@ -30,13 +30,13 @@ int vfs_readdir(struct file *file, filldir_t filler, void *buf) if (res) goto out; - down(&inode->i_sem); + mutex_lock(&inode->i_mutex); res = -ENOENT; if (!IS_DEADDIR(inode)) { res = file->f_op->readdir(file, buf, filler); file_accessed(file); } - up(&inode->i_sem); + mutex_unlock(&inode->i_mutex); out: return res; } diff --git a/fs/reiserfs/file.c b/fs/reiserfs/file.c index 7892a865b58a..127e7d2cabdd 100644 --- a/fs/reiserfs/file.c +++ b/fs/reiserfs/file.c @@ -49,7 +49,7 @@ static int reiserfs_file_release(struct inode *inode, struct file *filp) } reiserfs_write_lock(inode->i_sb); - down(&inode->i_sem); + mutex_lock(&inode->i_mutex); /* freeing preallocation only involves relogging blocks that * are already in the current transaction. preallocation gets * freed at the end of each transaction, so it is impossible for @@ -100,7 +100,7 @@ static int reiserfs_file_release(struct inode *inode, struct file *filp) err = reiserfs_truncate_file(inode, 0); } out: - up(&inode->i_sem); + mutex_unlock(&inode->i_mutex); reiserfs_write_unlock(inode->i_sb); return err; } @@ -1342,7 +1342,7 @@ static ssize_t reiserfs_file_write(struct file *file, /* the file we are going t if (unlikely(!access_ok(VERIFY_READ, buf, count))) return -EFAULT; - down(&inode->i_sem); // locks the entire file for just us + mutex_lock(&inode->i_mutex); // locks the entire file for just us pos = *ppos; @@ -1532,12 +1532,12 @@ static ssize_t reiserfs_file_write(struct file *file, /* the file we are going t generic_osync_inode(inode, file->f_mapping, OSYNC_METADATA | OSYNC_DATA); - up(&inode->i_sem); + mutex_unlock(&inode->i_mutex); reiserfs_async_progress_wait(inode->i_sb); return (already_written != 0) ? already_written : res; out: - up(&inode->i_sem); // unlock the file on exit. + mutex_unlock(&inode->i_mutex); // unlock the file on exit. return res; } diff --git a/fs/reiserfs/inode.c b/fs/reiserfs/inode.c index a5e3a0ddbe53..ffa34b861bdb 100644 --- a/fs/reiserfs/inode.c +++ b/fs/reiserfs/inode.c @@ -40,12 +40,12 @@ void reiserfs_delete_inode(struct inode *inode) /* The = 0 happens when we abort creating a new inode for some reason like lack of space.. */ if (!(inode->i_state & I_NEW) && INODE_PKEY(inode)->k_objectid != 0) { /* also handles bad_inode case */ - down(&inode->i_sem); + mutex_lock(&inode->i_mutex); reiserfs_delete_xattrs(inode); if (journal_begin(&th, inode->i_sb, jbegin_count)) { - up(&inode->i_sem); + mutex_unlock(&inode->i_mutex); goto out; } reiserfs_update_inode_transaction(inode); @@ -59,11 +59,11 @@ void reiserfs_delete_inode(struct inode *inode) DQUOT_FREE_INODE(inode); if (journal_end(&th, inode->i_sb, jbegin_count)) { - up(&inode->i_sem); + mutex_unlock(&inode->i_mutex); goto out; } - up(&inode->i_sem); + mutex_unlock(&inode->i_mutex); /* check return value from reiserfs_delete_object after * ending the transaction @@ -551,7 +551,7 @@ static int convert_tail_for_hole(struct inode *inode, /* we don't have to make sure the conversion did not happen while ** we were locking the page because anyone that could convert - ** must first take i_sem. + ** must first take i_mutex. ** ** We must fix the tail page for writing because it might have buffers ** that are mapped, but have a block number of 0. This indicates tail @@ -586,7 +586,7 @@ static inline int _allocate_block(struct reiserfs_transaction_handle *th, BUG_ON(!th->t_trans_id); #ifdef REISERFS_PREALLOCATE - if (!(flags & GET_BLOCK_NO_ISEM)) { + if (!(flags & GET_BLOCK_NO_IMUX)) { return reiserfs_new_unf_blocknrs2(th, inode, allocated_block_nr, path, block); } @@ -2318,7 +2318,7 @@ static int map_block_for_writepage(struct inode *inode, /* this is where we fill in holes in the file. */ if (use_get_block) { retval = reiserfs_get_block(inode, block, bh_result, - GET_BLOCK_CREATE | GET_BLOCK_NO_ISEM + GET_BLOCK_CREATE | GET_BLOCK_NO_IMUX | GET_BLOCK_NO_DANGLE); if (!retval) { if (!buffer_mapped(bh_result) diff --git a/fs/reiserfs/ioctl.c b/fs/reiserfs/ioctl.c index 81fc00285f60..ba8bf8df6dc7 100644 --- a/fs/reiserfs/ioctl.c +++ b/fs/reiserfs/ioctl.c @@ -120,7 +120,7 @@ static int reiserfs_unpack(struct inode *inode, struct file *filp) /* we need to make sure nobody is changing the file size beneath ** us */ - down(&inode->i_sem); + mutex_lock(&inode->i_mutex); write_from = inode->i_size & (blocksize - 1); /* if we are on a block boundary, we are already unpacked. */ @@ -156,7 +156,7 @@ static int reiserfs_unpack(struct inode *inode, struct file *filp) page_cache_release(page); out: - up(&inode->i_sem); + mutex_unlock(&inode->i_mutex); reiserfs_write_unlock(inode->i_sb); return retval; } diff --git a/fs/reiserfs/super.c b/fs/reiserfs/super.c index 42afb5bef111..397d9590c8f2 100644 --- a/fs/reiserfs/super.c +++ b/fs/reiserfs/super.c @@ -2211,7 +2211,7 @@ static ssize_t reiserfs_quota_write(struct super_block *sb, int type, size_t towrite = len; struct buffer_head tmp_bh, *bh; - down(&inode->i_sem); + mutex_lock(&inode->i_mutex); while (towrite > 0) { tocopy = sb->s_blocksize - offset < towrite ? sb->s_blocksize - offset : towrite; @@ -2250,7 +2250,7 @@ static ssize_t reiserfs_quota_write(struct super_block *sb, int type, inode->i_version++; inode->i_mtime = inode->i_ctime = CURRENT_TIME; mark_inode_dirty(inode); - up(&inode->i_sem); + mutex_unlock(&inode->i_mutex); return len - towrite; } diff --git a/fs/reiserfs/tail_conversion.c b/fs/reiserfs/tail_conversion.c index c92e124f628e..196e971c03c9 100644 --- a/fs/reiserfs/tail_conversion.c +++ b/fs/reiserfs/tail_conversion.c @@ -205,7 +205,7 @@ int indirect2direct(struct reiserfs_transaction_handle *th, struct inode *p_s_in 1) * p_s_sb->s_blocksize; pos1 = pos; - // we are protected by i_sem. The tail can not disapper, not + // we are protected by i_mutex. The tail can not disapper, not // append can be done either // we are in truncate or packing tail in file_release diff --git a/fs/reiserfs/xattr.c b/fs/reiserfs/xattr.c index 02091eaac0b4..f1895f0a278e 100644 --- a/fs/reiserfs/xattr.c +++ b/fs/reiserfs/xattr.c @@ -67,11 +67,11 @@ static struct dentry *create_xa_root(struct super_block *sb) goto out; } else if (!xaroot->d_inode) { int err; - down(&privroot->d_inode->i_sem); + mutex_lock(&privroot->d_inode->i_mutex); err = privroot->d_inode->i_op->mkdir(privroot->d_inode, xaroot, 0700); - up(&privroot->d_inode->i_sem); + mutex_unlock(&privroot->d_inode->i_mutex); if (err) { dput(xaroot); @@ -219,7 +219,7 @@ static struct dentry *get_xa_file_dentry(const struct inode *inode, } else if (flags & XATTR_REPLACE || flags & FL_READONLY) { goto out; } else { - /* inode->i_sem is down, so nothing else can try to create + /* inode->i_mutex is down, so nothing else can try to create * the same xattr */ err = xadir->d_inode->i_op->create(xadir->d_inode, xafile, 0700 | S_IFREG, NULL); @@ -268,7 +268,7 @@ static struct file *open_xa_file(const struct inode *inode, const char *name, * and don't mess with f->f_pos, but the idea is the same. Do some * action on each and every entry in the directory. * - * we're called with i_sem held, so there are no worries about the directory + * we're called with i_mutex held, so there are no worries about the directory * changing underneath us. */ static int __xattr_readdir(struct file *filp, void *dirent, filldir_t filldir) @@ -426,7 +426,7 @@ int xattr_readdir(struct file *file, filldir_t filler, void *buf) int res = -ENOTDIR; if (!file->f_op || !file->f_op->readdir) goto out; - down(&inode->i_sem); + mutex_lock(&inode->i_mutex); // down(&inode->i_zombie); res = -ENOENT; if (!IS_DEADDIR(inode)) { @@ -435,7 +435,7 @@ int xattr_readdir(struct file *file, filldir_t filler, void *buf) unlock_kernel(); } // up(&inode->i_zombie); - up(&inode->i_sem); + mutex_unlock(&inode->i_mutex); out: return res; } @@ -480,7 +480,7 @@ static inline __u32 xattr_hash(const char *msg, int len) /* Generic extended attribute operations that can be used by xa plugins */ /* - * inode->i_sem: down + * inode->i_mutex: down */ int reiserfs_xattr_set(struct inode *inode, const char *name, const void *buffer, @@ -535,7 +535,7 @@ reiserfs_xattr_set(struct inode *inode, const char *name, const void *buffer, /* Resize it so we're ok to write there */ newattrs.ia_size = buffer_size; newattrs.ia_valid = ATTR_SIZE | ATTR_CTIME; - down(&xinode->i_sem); + mutex_lock(&xinode->i_mutex); err = notify_change(fp->f_dentry, &newattrs); if (err) goto out_filp; @@ -598,7 +598,7 @@ reiserfs_xattr_set(struct inode *inode, const char *name, const void *buffer, } out_filp: - up(&xinode->i_sem); + mutex_unlock(&xinode->i_mutex); fput(fp); out: @@ -606,7 +606,7 @@ reiserfs_xattr_set(struct inode *inode, const char *name, const void *buffer, } /* - * inode->i_sem: down + * inode->i_mutex: down */ int reiserfs_xattr_get(const struct inode *inode, const char *name, void *buffer, @@ -793,7 +793,7 @@ reiserfs_delete_xattrs_filler(void *buf, const char *name, int namelen, } -/* This is called w/ inode->i_sem downed */ +/* This is called w/ inode->i_mutex downed */ int reiserfs_delete_xattrs(struct inode *inode) { struct file *fp; @@ -946,7 +946,7 @@ int reiserfs_chown_xattrs(struct inode *inode, struct iattr *attrs) /* * Inode operation getxattr() - * Preliminary locking: we down dentry->d_inode->i_sem + * Preliminary locking: we down dentry->d_inode->i_mutex */ ssize_t reiserfs_getxattr(struct dentry * dentry, const char *name, void *buffer, @@ -970,7 +970,7 @@ reiserfs_getxattr(struct dentry * dentry, const char *name, void *buffer, /* * Inode operation setxattr() * - * dentry->d_inode->i_sem down + * dentry->d_inode->i_mutex down */ int reiserfs_setxattr(struct dentry *dentry, const char *name, const void *value, @@ -1008,7 +1008,7 @@ reiserfs_setxattr(struct dentry *dentry, const char *name, const void *value, /* * Inode operation removexattr() * - * dentry->d_inode->i_sem down + * dentry->d_inode->i_mutex down */ int reiserfs_removexattr(struct dentry *dentry, const char *name) { @@ -1091,7 +1091,7 @@ reiserfs_listxattr_filler(void *buf, const char *name, int namelen, /* * Inode operation listxattr() * - * Preliminary locking: we down dentry->d_inode->i_sem + * Preliminary locking: we down dentry->d_inode->i_mutex */ ssize_t reiserfs_listxattr(struct dentry * dentry, char *buffer, size_t size) { @@ -1289,9 +1289,9 @@ int reiserfs_xattr_init(struct super_block *s, int mount_flags) if (!IS_ERR(dentry)) { if (!(mount_flags & MS_RDONLY) && !dentry->d_inode) { struct inode *inode = dentry->d_parent->d_inode; - down(&inode->i_sem); + mutex_lock(&inode->i_mutex); err = inode->i_op->mkdir(inode, dentry, 0700); - up(&inode->i_sem); + mutex_unlock(&inode->i_mutex); if (err) { dput(dentry); dentry = NULL; diff --git a/fs/reiserfs/xattr_acl.c b/fs/reiserfs/xattr_acl.c index a47ac9aac8b2..2dc953504cc0 100644 --- a/fs/reiserfs/xattr_acl.c +++ b/fs/reiserfs/xattr_acl.c @@ -174,7 +174,7 @@ static void *posix_acl_to_disk(const struct posix_acl *acl, size_t * size) /* * Inode operation get_posix_acl(). * - * inode->i_sem: down + * inode->i_mutex: down * BKL held [before 2.5.x] */ struct posix_acl *reiserfs_get_acl(struct inode *inode, int type) @@ -237,7 +237,7 @@ struct posix_acl *reiserfs_get_acl(struct inode *inode, int type) /* * Inode operation set_posix_acl(). * - * inode->i_sem: down + * inode->i_mutex: down * BKL held [before 2.5.x] */ static int @@ -312,7 +312,7 @@ reiserfs_set_acl(struct inode *inode, int type, struct posix_acl *acl) return error; } -/* dir->i_sem: down, +/* dir->i_mutex: locked, * inode is new and not released into the wild yet */ int reiserfs_inherit_default_acl(struct inode *dir, struct dentry *dentry, diff --git a/fs/relayfs/inode.c b/fs/relayfs/inode.c index 7b7f2cb5f0e1..383523011aad 100644 --- a/fs/relayfs/inode.c +++ b/fs/relayfs/inode.c @@ -109,7 +109,7 @@ static struct dentry *relayfs_create_entry(const char *name, } parent = dget(parent); - down(&parent->d_inode->i_sem); + mutex_lock(&parent->d_inode->i_mutex); d = lookup_one_len(name, parent, strlen(name)); if (IS_ERR(d)) { d = NULL; @@ -139,7 +139,7 @@ release_mount: simple_release_fs(&relayfs_mount, &relayfs_mount_count); exit: - up(&parent->d_inode->i_sem); + mutex_unlock(&parent->d_inode->i_mutex); dput(parent); return d; } @@ -204,7 +204,7 @@ int relayfs_remove(struct dentry *dentry) return -EINVAL; parent = dget(parent); - down(&parent->d_inode->i_sem); + mutex_lock(&parent->d_inode->i_mutex); if (dentry->d_inode) { if (S_ISDIR(dentry->d_inode->i_mode)) error = simple_rmdir(parent->d_inode, dentry); @@ -215,7 +215,7 @@ int relayfs_remove(struct dentry *dentry) } if (!error) dput(dentry); - up(&parent->d_inode->i_sem); + mutex_unlock(&parent->d_inode->i_mutex); dput(parent); if (!error) @@ -476,7 +476,7 @@ static ssize_t relay_file_read(struct file *filp, ssize_t ret = 0; void *from; - down(&inode->i_sem); + mutex_lock(&inode->i_mutex); if(!relay_file_read_avail(buf, *ppos)) goto out; @@ -494,7 +494,7 @@ static ssize_t relay_file_read(struct file *filp, relay_file_read_consume(buf, read_start, count); *ppos = relay_file_read_end_pos(buf, read_start, count); out: - up(&inode->i_sem); + mutex_unlock(&inode->i_mutex); return ret; } diff --git a/fs/sysfs/dir.c b/fs/sysfs/dir.c index d36780382176..49bd219275db 100644 --- a/fs/sysfs/dir.c +++ b/fs/sysfs/dir.c @@ -99,7 +99,7 @@ static int create_dir(struct kobject * k, struct dentry * p, int error; umode_t mode = S_IFDIR| S_IRWXU | S_IRUGO | S_IXUGO; - down(&p->d_inode->i_sem); + mutex_lock(&p->d_inode->i_mutex); *d = lookup_one_len(n, p, strlen(n)); if (!IS_ERR(*d)) { error = sysfs_make_dirent(p->d_fsdata, *d, k, mode, SYSFS_DIR); @@ -122,7 +122,7 @@ static int create_dir(struct kobject * k, struct dentry * p, dput(*d); } else error = PTR_ERR(*d); - up(&p->d_inode->i_sem); + mutex_unlock(&p->d_inode->i_mutex); return error; } @@ -246,7 +246,7 @@ static void remove_dir(struct dentry * d) struct dentry * parent = dget(d->d_parent); struct sysfs_dirent * sd; - down(&parent->d_inode->i_sem); + mutex_lock(&parent->d_inode->i_mutex); d_delete(d); sd = d->d_fsdata; list_del_init(&sd->s_sibling); @@ -257,7 +257,7 @@ static void remove_dir(struct dentry * d) pr_debug(" o %s removing done (%d)\n",d->d_name.name, atomic_read(&d->d_count)); - up(&parent->d_inode->i_sem); + mutex_unlock(&parent->d_inode->i_mutex); dput(parent); } @@ -286,7 +286,7 @@ void sysfs_remove_dir(struct kobject * kobj) return; pr_debug("sysfs %s: removing dir\n",dentry->d_name.name); - down(&dentry->d_inode->i_sem); + mutex_lock(&dentry->d_inode->i_mutex); parent_sd = dentry->d_fsdata; list_for_each_entry_safe(sd, tmp, &parent_sd->s_children, s_sibling) { if (!sd->s_element || !(sd->s_type & SYSFS_NOT_PINNED)) @@ -295,7 +295,7 @@ void sysfs_remove_dir(struct kobject * kobj) sysfs_drop_dentry(sd, dentry); sysfs_put(sd); } - up(&dentry->d_inode->i_sem); + mutex_unlock(&dentry->d_inode->i_mutex); remove_dir(dentry); /** @@ -318,7 +318,7 @@ int sysfs_rename_dir(struct kobject * kobj, const char *new_name) down_write(&sysfs_rename_sem); parent = kobj->parent->dentry; - down(&parent->d_inode->i_sem); + mutex_lock(&parent->d_inode->i_mutex); new_dentry = lookup_one_len(new_name, parent, strlen(new_name)); if (!IS_ERR(new_dentry)) { @@ -334,7 +334,7 @@ int sysfs_rename_dir(struct kobject * kobj, const char *new_name) error = -EEXIST; dput(new_dentry); } - up(&parent->d_inode->i_sem); + mutex_unlock(&parent->d_inode->i_mutex); up_write(&sysfs_rename_sem); return error; @@ -345,9 +345,9 @@ static int sysfs_dir_open(struct inode *inode, struct file *file) struct dentry * dentry = file->f_dentry; struct sysfs_dirent * parent_sd = dentry->d_fsdata; - down(&dentry->d_inode->i_sem); + mutex_lock(&dentry->d_inode->i_mutex); file->private_data = sysfs_new_dirent(parent_sd, NULL); - up(&dentry->d_inode->i_sem); + mutex_unlock(&dentry->d_inode->i_mutex); return file->private_data ? 0 : -ENOMEM; @@ -358,9 +358,9 @@ static int sysfs_dir_close(struct inode *inode, struct file *file) struct dentry * dentry = file->f_dentry; struct sysfs_dirent * cursor = file->private_data; - down(&dentry->d_inode->i_sem); + mutex_lock(&dentry->d_inode->i_mutex); list_del_init(&cursor->s_sibling); - up(&dentry->d_inode->i_sem); + mutex_unlock(&dentry->d_inode->i_mutex); release_sysfs_dirent(cursor); @@ -436,7 +436,7 @@ static loff_t sysfs_dir_lseek(struct file * file, loff_t offset, int origin) { struct dentry * dentry = file->f_dentry; - down(&dentry->d_inode->i_sem); + mutex_lock(&dentry->d_inode->i_mutex); switch (origin) { case 1: offset += file->f_pos; @@ -444,7 +444,7 @@ static loff_t sysfs_dir_lseek(struct file * file, loff_t offset, int origin) if (offset >= 0) break; default: - up(&file->f_dentry->d_inode->i_sem); + mutex_unlock(&file->f_dentry->d_inode->i_mutex); return -EINVAL; } if (offset != file->f_pos) { @@ -468,7 +468,7 @@ static loff_t sysfs_dir_lseek(struct file * file, loff_t offset, int origin) list_add_tail(&cursor->s_sibling, p); } } - up(&dentry->d_inode->i_sem); + mutex_unlock(&dentry->d_inode->i_mutex); return offset; } @@ -483,4 +483,3 @@ struct file_operations sysfs_dir_operations = { EXPORT_SYMBOL_GPL(sysfs_create_dir); EXPORT_SYMBOL_GPL(sysfs_remove_dir); EXPORT_SYMBOL_GPL(sysfs_rename_dir); - diff --git a/fs/sysfs/file.c b/fs/sysfs/file.c index 4013d7905e84..d0e3d8495165 100644 --- a/fs/sysfs/file.c +++ b/fs/sysfs/file.c @@ -364,9 +364,9 @@ int sysfs_add_file(struct dentry * dir, const struct attribute * attr, int type) umode_t mode = (attr->mode & S_IALLUGO) | S_IFREG; int error = 0; - down(&dir->d_inode->i_sem); + mutex_lock(&dir->d_inode->i_mutex); error = sysfs_make_dirent(parent_sd, NULL, (void *) attr, mode, type); - up(&dir->d_inode->i_sem); + mutex_unlock(&dir->d_inode->i_mutex); return error; } @@ -398,7 +398,7 @@ int sysfs_update_file(struct kobject * kobj, const struct attribute * attr) struct dentry * victim; int res = -ENOENT; - down(&dir->d_inode->i_sem); + mutex_lock(&dir->d_inode->i_mutex); victim = lookup_one_len(attr->name, dir, strlen(attr->name)); if (!IS_ERR(victim)) { /* make sure dentry is really there */ @@ -420,7 +420,7 @@ int sysfs_update_file(struct kobject * kobj, const struct attribute * attr) */ dput(victim); } - up(&dir->d_inode->i_sem); + mutex_unlock(&dir->d_inode->i_mutex); return res; } @@ -441,22 +441,22 @@ int sysfs_chmod_file(struct kobject *kobj, struct attribute *attr, mode_t mode) struct iattr newattrs; int res = -ENOENT; - down(&dir->d_inode->i_sem); + mutex_lock(&dir->d_inode->i_mutex); victim = lookup_one_len(attr->name, dir, strlen(attr->name)); if (!IS_ERR(victim)) { if (victim->d_inode && (victim->d_parent->d_inode == dir->d_inode)) { inode = victim->d_inode; - down(&inode->i_sem); + mutex_lock(&inode->i_mutex); newattrs.ia_mode = (mode & S_IALLUGO) | (inode->i_mode & ~S_IALLUGO); newattrs.ia_valid = ATTR_MODE | ATTR_CTIME; res = notify_change(victim, &newattrs); - up(&inode->i_sem); + mutex_unlock(&inode->i_mutex); } dput(victim); } - up(&dir->d_inode->i_sem); + mutex_unlock(&dir->d_inode->i_mutex); return res; } @@ -480,4 +480,3 @@ void sysfs_remove_file(struct kobject * kobj, const struct attribute * attr) EXPORT_SYMBOL_GPL(sysfs_create_file); EXPORT_SYMBOL_GPL(sysfs_remove_file); EXPORT_SYMBOL_GPL(sysfs_update_file); - diff --git a/fs/sysfs/inode.c b/fs/sysfs/inode.c index 970a33f03299..c3133219941c 100644 --- a/fs/sysfs/inode.c +++ b/fs/sysfs/inode.c @@ -201,7 +201,7 @@ const unsigned char * sysfs_get_name(struct sysfs_dirent *sd) /* * Unhashes the dentry corresponding to given sysfs_dirent - * Called with parent inode's i_sem held. + * Called with parent inode's i_mutex held. */ void sysfs_drop_dentry(struct sysfs_dirent * sd, struct dentry * parent) { @@ -232,7 +232,7 @@ void sysfs_hash_and_remove(struct dentry * dir, const char * name) /* no inode means this hasn't been made visible yet */ return; - down(&dir->d_inode->i_sem); + mutex_lock(&dir->d_inode->i_mutex); list_for_each_entry(sd, &parent_sd->s_children, s_sibling) { if (!sd->s_element) continue; @@ -243,7 +243,5 @@ void sysfs_hash_and_remove(struct dentry * dir, const char * name) break; } } - up(&dir->d_inode->i_sem); + mutex_unlock(&dir->d_inode->i_mutex); } - - diff --git a/fs/sysfs/symlink.c b/fs/sysfs/symlink.c index de402fa915f2..e38d6338a20d 100644 --- a/fs/sysfs/symlink.c +++ b/fs/sysfs/symlink.c @@ -86,9 +86,9 @@ int sysfs_create_link(struct kobject * kobj, struct kobject * target, const char BUG_ON(!kobj || !kobj->dentry || !name); - down(&dentry->d_inode->i_sem); + mutex_lock(&dentry->d_inode->i_mutex); error = sysfs_add_link(dentry, name, target); - up(&dentry->d_inode->i_sem); + mutex_unlock(&dentry->d_inode->i_mutex); return error; } @@ -177,4 +177,3 @@ struct inode_operations sysfs_symlink_inode_operations = { EXPORT_SYMBOL_GPL(sysfs_create_link); EXPORT_SYMBOL_GPL(sysfs_remove_link); - diff --git a/fs/ufs/super.c b/fs/ufs/super.c index 2ba11a9aa995..e9a42c711a9e 100644 --- a/fs/ufs/super.c +++ b/fs/ufs/super.c @@ -1275,7 +1275,7 @@ static ssize_t ufs_quota_write(struct super_block *sb, int type, size_t towrite = len; struct buffer_head *bh; - down(&inode->i_sem); + mutex_lock(&inode->i_mutex); while (towrite > 0) { tocopy = sb->s_blocksize - offset < towrite ? sb->s_blocksize - offset : towrite; @@ -1297,7 +1297,7 @@ static ssize_t ufs_quota_write(struct super_block *sb, int type, } out: if (len == towrite) { - up(&inode->i_sem); + mutex_unlock(&inode->i_mutex); return err; } if (inode->i_size < off+len-towrite) @@ -1305,7 +1305,7 @@ out: inode->i_version++; inode->i_mtime = inode->i_ctime = CURRENT_TIME_SEC; mark_inode_dirty(inode); - up(&inode->i_sem); + mutex_unlock(&inode->i_mutex); return len - towrite; } diff --git a/fs/xattr.c b/fs/xattr.c index bcc2156d4d28..386a532ee5a9 100644 --- a/fs/xattr.c +++ b/fs/xattr.c @@ -51,7 +51,7 @@ setxattr(struct dentry *d, char __user *name, void __user *value, } } - down(&d->d_inode->i_sem); + mutex_lock(&d->d_inode->i_mutex); error = security_inode_setxattr(d, kname, kvalue, size, flags); if (error) goto out; @@ -73,7 +73,7 @@ setxattr(struct dentry *d, char __user *name, void __user *value, fsnotify_xattr(d); } out: - up(&d->d_inode->i_sem); + mutex_unlock(&d->d_inode->i_mutex); kfree(kvalue); return error; } @@ -323,9 +323,9 @@ removexattr(struct dentry *d, char __user *name) error = security_inode_removexattr(d, kname); if (error) goto out; - down(&d->d_inode->i_sem); + mutex_lock(&d->d_inode->i_mutex); error = d->d_inode->i_op->removexattr(d, kname); - up(&d->d_inode->i_sem); + mutex_unlock(&d->d_inode->i_mutex); if (!error) fsnotify_xattr(d); } diff --git a/fs/xfs/linux-2.6/xfs_iops.c b/fs/xfs/linux-2.6/xfs_iops.c index 14215a7db59f..41c478bb1ffc 100644 --- a/fs/xfs/linux-2.6/xfs_iops.c +++ b/fs/xfs/linux-2.6/xfs_iops.c @@ -203,7 +203,7 @@ validate_fields( ip->i_nlink = va.va_nlink; ip->i_blocks = va.va_nblocks; - /* we're under i_sem so i_size can't change under us */ + /* we're under i_mutex so i_size can't change under us */ if (i_size_read(ip) != va.va_size) i_size_write(ip, va.va_size); } diff --git a/fs/xfs/linux-2.6/xfs_lrw.c b/fs/xfs/linux-2.6/xfs_lrw.c index 279e9bc92aba..5675117ef227 100644 --- a/fs/xfs/linux-2.6/xfs_lrw.c +++ b/fs/xfs/linux-2.6/xfs_lrw.c @@ -254,7 +254,7 @@ xfs_read( } if (unlikely(ioflags & IO_ISDIRECT)) - down(&inode->i_sem); + mutex_lock(&inode->i_mutex); xfs_ilock(ip, XFS_IOLOCK_SHARED); if (DM_EVENT_ENABLED(vp->v_vfsp, ip, DM_EVENT_READ) && @@ -286,7 +286,7 @@ xfs_read( unlock_isem: if (unlikely(ioflags & IO_ISDIRECT)) - up(&inode->i_sem); + mutex_unlock(&inode->i_mutex); return ret; } @@ -655,7 +655,7 @@ relock: iolock = XFS_IOLOCK_EXCL; locktype = VRWLOCK_WRITE; - down(&inode->i_sem); + mutex_lock(&inode->i_mutex); } else { iolock = XFS_IOLOCK_SHARED; locktype = VRWLOCK_WRITE_DIRECT; @@ -686,7 +686,7 @@ start: int dmflags = FILP_DELAY_FLAG(file); if (need_isem) - dmflags |= DM_FLAGS_ISEM; + dmflags |= DM_FLAGS_IMUX; xfs_iunlock(xip, XFS_ILOCK_EXCL); error = XFS_SEND_DATA(xip->i_mount, DM_EVENT_WRITE, vp, @@ -772,7 +772,7 @@ retry: if (need_isem) { /* demote the lock now the cached pages are gone */ XFS_ILOCK_DEMOTE(mp, io, XFS_IOLOCK_EXCL); - up(&inode->i_sem); + mutex_unlock(&inode->i_mutex); iolock = XFS_IOLOCK_SHARED; locktype = VRWLOCK_WRITE_DIRECT; @@ -817,14 +817,14 @@ retry: xfs_rwunlock(bdp, locktype); if (need_isem) - up(&inode->i_sem); + mutex_unlock(&inode->i_mutex); error = XFS_SEND_NAMESP(xip->i_mount, DM_EVENT_NOSPACE, vp, DM_RIGHT_NULL, vp, DM_RIGHT_NULL, NULL, NULL, 0, 0, 0); /* Delay flag intentionally unused */ if (error) goto out_nounlocks; if (need_isem) - down(&inode->i_sem); + mutex_lock(&inode->i_mutex); xfs_rwlock(bdp, locktype); pos = xip->i_d.di_size; ret = 0; @@ -926,7 +926,7 @@ retry: xfs_rwunlock(bdp, locktype); if (need_isem) - up(&inode->i_sem); + mutex_unlock(&inode->i_mutex); error = sync_page_range(inode, mapping, pos, ret); if (!error) @@ -938,7 +938,7 @@ retry: xfs_rwunlock(bdp, locktype); out_unlock_isem: if (need_isem) - up(&inode->i_sem); + mutex_unlock(&inode->i_mutex); out_nounlocks: return -error; } diff --git a/fs/xfs/xfs_dmapi.h b/fs/xfs/xfs_dmapi.h index 864bf6955689..b4c7f2bc55a0 100644 --- a/fs/xfs/xfs_dmapi.h +++ b/fs/xfs/xfs_dmapi.h @@ -152,7 +152,7 @@ typedef enum { #define DM_FLAGS_NDELAY 0x001 /* return EAGAIN after dm_pending() */ #define DM_FLAGS_UNWANTED 0x002 /* event not in fsys dm_eventset_t */ -#define DM_FLAGS_ISEM 0x004 /* thread holds i_sem */ +#define DM_FLAGS_IMUX 0x004 /* thread holds i_mutex */ #define DM_FLAGS_IALLOCSEM_RD 0x010 /* thread holds i_alloc_sem rd */ #define DM_FLAGS_IALLOCSEM_WR 0x020 /* thread holds i_alloc_sem wr */ @@ -161,21 +161,21 @@ typedef enum { */ #if LINUX_VERSION_CODE > KERNEL_VERSION(2,6,0) #define DM_SEM_FLAG_RD(ioflags) (((ioflags) & IO_ISDIRECT) ? \ - DM_FLAGS_ISEM : 0) -#define DM_SEM_FLAG_WR (DM_FLAGS_IALLOCSEM_WR | DM_FLAGS_ISEM) + DM_FLAGS_IMUX : 0) +#define DM_SEM_FLAG_WR (DM_FLAGS_IALLOCSEM_WR | DM_FLAGS_IMUX) #endif #if (LINUX_VERSION_CODE < KERNEL_VERSION(2,6,0)) && \ (LINUX_VERSION_CODE >= KERNEL_VERSION(2,4,22)) #define DM_SEM_FLAG_RD(ioflags) (((ioflags) & IO_ISDIRECT) ? \ - DM_FLAGS_IALLOCSEM_RD : DM_FLAGS_ISEM) -#define DM_SEM_FLAG_WR (DM_FLAGS_IALLOCSEM_WR | DM_FLAGS_ISEM) + DM_FLAGS_IALLOCSEM_RD : DM_FLAGS_IMUX) +#define DM_SEM_FLAG_WR (DM_FLAGS_IALLOCSEM_WR | DM_FLAGS_IMUX) #endif #if LINUX_VERSION_CODE <= KERNEL_VERSION(2,4,21) #define DM_SEM_FLAG_RD(ioflags) (((ioflags) & IO_ISDIRECT) ? \ - 0 : DM_FLAGS_ISEM) -#define DM_SEM_FLAG_WR (DM_FLAGS_ISEM) + 0 : DM_FLAGS_IMUX) +#define DM_SEM_FLAG_WR (DM_FLAGS_IMUX) #endif diff --git a/include/linux/ext3_fs_i.h b/include/linux/ext3_fs_i.h index 2914f7b07156..e71dd98dbcae 100644 --- a/include/linux/ext3_fs_i.h +++ b/include/linux/ext3_fs_i.h @@ -87,7 +87,7 @@ struct ext3_inode_info { #ifdef CONFIG_EXT3_FS_XATTR /* * Extended attributes can be read independently of the main file - * data. Taking i_sem even when reading would cause contention + * data. Taking i_mutex even when reading would cause contention * between readers of EAs and writers of regular file data, so * instead we synchronize on xattr_sem when reading or changing * EAs. diff --git a/include/linux/fs.h b/include/linux/fs.h index 4c82219b0fae..01654b218e42 100644 --- a/include/linux/fs.h +++ b/include/linux/fs.h @@ -219,6 +219,7 @@ extern int dir_notify_enable; #include #include #include +#include #include #include @@ -484,7 +485,7 @@ struct inode { unsigned long i_blocks; unsigned short i_bytes; spinlock_t i_lock; /* i_blocks, i_bytes, maybe i_size */ - struct semaphore i_sem; + struct mutex i_mutex; struct rw_semaphore i_alloc_sem; struct inode_operations *i_op; struct file_operations *i_fop; /* former ->i_op->default_file_ops */ @@ -1191,7 +1192,7 @@ int sync_inode(struct inode *inode, struct writeback_control *wbc); * directory. The name should be stored in the @name (with the * understanding that it is already pointing to a a %NAME_MAX+1 sized * buffer. get_name() should return %0 on success, a negative error code - * or error. @get_name will be called without @parent->i_sem held. + * or error. @get_name will be called without @parent->i_mutex held. * * get_parent: * @get_parent should find the parent directory for the given @child which @@ -1213,7 +1214,7 @@ int sync_inode(struct inode *inode, struct writeback_control *wbc); * nfsd_find_fh_dentry() in either the @obj or @parent parameters. * * Locking rules: - * get_parent is called with child->d_inode->i_sem down + * get_parent is called with child->d_inode->i_mutex down * get_name is not (which is possibly inconsistent) */ diff --git a/include/linux/jffs2_fs_i.h b/include/linux/jffs2_fs_i.h index ef85ab56302b..ad565bf9dcc1 100644 --- a/include/linux/jffs2_fs_i.h +++ b/include/linux/jffs2_fs_i.h @@ -8,11 +8,11 @@ #include struct jffs2_inode_info { - /* We need an internal semaphore similar to inode->i_sem. + /* We need an internal mutex similar to inode->i_mutex. Unfortunately, we can't used the existing one, because either the GC would deadlock, or we'd have to release it before letting GC proceed. Or we'd have to put ugliness - into the GC code so it didn't attempt to obtain the i_sem + into the GC code so it didn't attempt to obtain the i_mutex for the inode(s) which are already locked */ struct semaphore sem; diff --git a/include/linux/nfsd/nfsfh.h b/include/linux/nfsd/nfsfh.h index bb842ea41033..0798b7781a6e 100644 --- a/include/linux/nfsd/nfsfh.h +++ b/include/linux/nfsd/nfsfh.h @@ -294,7 +294,7 @@ fill_post_wcc(struct svc_fh *fhp) /* * Lock a file handle/inode * NOTE: both fh_lock and fh_unlock are done "by hand" in - * vfs.c:nfsd_rename as it needs to grab 2 i_sem's at once + * vfs.c:nfsd_rename as it needs to grab 2 i_mutex's at once * so, any changes here should be reflected there. */ static inline void @@ -317,7 +317,7 @@ fh_lock(struct svc_fh *fhp) } inode = dentry->d_inode; - down(&inode->i_sem); + mutex_lock(&inode->i_mutex); fill_pre_wcc(fhp); fhp->fh_locked = 1; } @@ -333,7 +333,7 @@ fh_unlock(struct svc_fh *fhp) if (fhp->fh_locked) { fill_post_wcc(fhp); - up(&fhp->fh_dentry->d_inode->i_sem); + mutex_unlock(&fhp->fh_dentry->d_inode->i_mutex); fhp->fh_locked = 0; } } diff --git a/include/linux/pipe_fs_i.h b/include/linux/pipe_fs_i.h index 1767073df26f..b12e59c75752 100644 --- a/include/linux/pipe_fs_i.h +++ b/include/linux/pipe_fs_i.h @@ -37,7 +37,7 @@ struct pipe_inode_info { memory allocation, whereas PIPE_BUF makes atomicity guarantees. */ #define PIPE_SIZE PAGE_SIZE -#define PIPE_SEM(inode) (&(inode).i_sem) +#define PIPE_MUTEX(inode) (&(inode).i_mutex) #define PIPE_WAIT(inode) (&(inode).i_pipe->wait) #define PIPE_READERS(inode) ((inode).i_pipe->readers) #define PIPE_WRITERS(inode) ((inode).i_pipe->writers) diff --git a/include/linux/reiserfs_fs.h b/include/linux/reiserfs_fs.h index 001ab82df051..e276c5ba2bb7 100644 --- a/include/linux/reiserfs_fs.h +++ b/include/linux/reiserfs_fs.h @@ -1857,7 +1857,7 @@ void padd_item(char *item, int total_length, int length); #define GET_BLOCK_CREATE 1 /* add anything you need to find block */ #define GET_BLOCK_NO_HOLE 2 /* return -ENOENT for file holes */ #define GET_BLOCK_READ_DIRECT 4 /* read the tail if indirect item not found */ -#define GET_BLOCK_NO_ISEM 8 /* i_sem is not held, don't preallocate */ +#define GET_BLOCK_NO_IMUX 8 /* i_mutex is not held, don't preallocate */ #define GET_BLOCK_NO_DANGLE 16 /* don't leave any transactions running */ int restart_transaction(struct reiserfs_transaction_handle *th, diff --git a/ipc/mqueue.c b/ipc/mqueue.c index c8943b53d8e6..a8aa6152eea6 100644 --- a/ipc/mqueue.c +++ b/ipc/mqueue.c @@ -660,7 +660,7 @@ asmlinkage long sys_mq_open(const char __user *u_name, int oflag, mode_t mode, if (fd < 0) goto out_putname; - down(&mqueue_mnt->mnt_root->d_inode->i_sem); + mutex_lock(&mqueue_mnt->mnt_root->d_inode->i_mutex); dentry = lookup_one_len(name, mqueue_mnt->mnt_root, strlen(name)); if (IS_ERR(dentry)) { error = PTR_ERR(dentry); @@ -697,7 +697,7 @@ out_putfd: out_err: fd = error; out_upsem: - up(&mqueue_mnt->mnt_root->d_inode->i_sem); + mutex_unlock(&mqueue_mnt->mnt_root->d_inode->i_mutex); out_putname: putname(name); return fd; @@ -714,7 +714,7 @@ asmlinkage long sys_mq_unlink(const char __user *u_name) if (IS_ERR(name)) return PTR_ERR(name); - down(&mqueue_mnt->mnt_root->d_inode->i_sem); + mutex_lock(&mqueue_mnt->mnt_root->d_inode->i_mutex); dentry = lookup_one_len(name, mqueue_mnt->mnt_root, strlen(name)); if (IS_ERR(dentry)) { err = PTR_ERR(dentry); @@ -735,7 +735,7 @@ out_err: dput(dentry); out_unlock: - up(&mqueue_mnt->mnt_root->d_inode->i_sem); + mutex_unlock(&mqueue_mnt->mnt_root->d_inode->i_mutex); putname(name); if (inode) iput(inode); diff --git a/kernel/cpuset.c b/kernel/cpuset.c index eab64e23bcae..2a75e44e1a41 100644 --- a/kernel/cpuset.c +++ b/kernel/cpuset.c @@ -1513,7 +1513,7 @@ static int cpuset_add_file(struct dentry *dir, const struct cftype *cft) struct dentry *dentry; int error; - down(&dir->d_inode->i_sem); + mutex_lock(&dir->d_inode->i_mutex); dentry = cpuset_get_dentry(dir, cft->name); if (!IS_ERR(dentry)) { error = cpuset_create_file(dentry, 0644 | S_IFREG); @@ -1522,7 +1522,7 @@ static int cpuset_add_file(struct dentry *dir, const struct cftype *cft) dput(dentry); } else error = PTR_ERR(dentry); - up(&dir->d_inode->i_sem); + mutex_unlock(&dir->d_inode->i_mutex); return error; } @@ -1793,7 +1793,7 @@ static long cpuset_create(struct cpuset *parent, const char *name, int mode) /* * Release manage_sem before cpuset_populate_dir() because it - * will down() this new directory's i_sem and if we race with + * will down() this new directory's i_mutex and if we race with * another mkdir, we might deadlock. */ up(&manage_sem); @@ -1812,7 +1812,7 @@ static int cpuset_mkdir(struct inode *dir, struct dentry *dentry, int mode) { struct cpuset *c_parent = dentry->d_parent->d_fsdata; - /* the vfs holds inode->i_sem already */ + /* the vfs holds inode->i_mutex already */ return cpuset_create(c_parent, dentry->d_name.name, mode | S_IFDIR); } @@ -1823,7 +1823,7 @@ static int cpuset_rmdir(struct inode *unused_dir, struct dentry *dentry) struct cpuset *parent; char *pathbuf = NULL; - /* the vfs holds both inode->i_sem already */ + /* the vfs holds both inode->i_mutex already */ down(&manage_sem); cpuset_update_task_memory_state(); diff --git a/mm/filemap.c b/mm/filemap.c index 478f4c74cc31..5fca2737c971 100644 --- a/mm/filemap.c +++ b/mm/filemap.c @@ -61,7 +61,7 @@ generic_file_direct_IO(int rw, struct kiocb *iocb, const struct iovec *iov, * ->swap_lock (exclusive_swap_page, others) * ->mapping->tree_lock * - * ->i_sem + * ->i_mutex * ->i_mmap_lock (truncate->unmap_mapping_range) * * ->mmap_sem @@ -73,9 +73,9 @@ generic_file_direct_IO(int rw, struct kiocb *iocb, const struct iovec *iov, * ->lock_page (access_process_vm) * * ->mmap_sem - * ->i_sem (msync) + * ->i_mutex (msync) * - * ->i_sem + * ->i_mutex * ->i_alloc_sem (various) * * ->inode_lock @@ -276,7 +276,7 @@ static int wait_on_page_writeback_range(struct address_space *mapping, * integrity" operation. It waits upon in-flight writeout before starting and * waiting upon new writeout. If there was an IO error, return it. * - * We need to re-take i_sem during the generic_osync_inode list walk because + * We need to re-take i_mutex during the generic_osync_inode list walk because * it is otherwise livelockable. */ int sync_page_range(struct inode *inode, struct address_space *mapping, @@ -290,9 +290,9 @@ int sync_page_range(struct inode *inode, struct address_space *mapping, return 0; ret = filemap_fdatawrite_range(mapping, pos, pos + count - 1); if (ret == 0) { - down(&inode->i_sem); + mutex_lock(&inode->i_mutex); ret = generic_osync_inode(inode, mapping, OSYNC_METADATA); - up(&inode->i_sem); + mutex_unlock(&inode->i_mutex); } if (ret == 0) ret = wait_on_page_writeback_range(mapping, start, end); @@ -301,7 +301,7 @@ int sync_page_range(struct inode *inode, struct address_space *mapping, EXPORT_SYMBOL(sync_page_range); /* - * Note: Holding i_sem across sync_page_range_nolock is not a good idea + * Note: Holding i_mutex across sync_page_range_nolock is not a good idea * as it forces O_SYNC writers to different parts of the same file * to be serialised right until io completion. */ @@ -1892,7 +1892,7 @@ generic_file_direct_write(struct kiocb *iocb, const struct iovec *iov, /* * Sync the fs metadata but not the minor inode changes and * of course not the data as we did direct DMA for the IO. - * i_sem is held, which protects generic_osync_inode() from + * i_mutex is held, which protects generic_osync_inode() from * livelocking. */ if (written >= 0 && ((file->f_flags & O_SYNC) || IS_SYNC(inode))) { @@ -2195,10 +2195,10 @@ ssize_t generic_file_aio_write(struct kiocb *iocb, const char __user *buf, BUG_ON(iocb->ki_pos != pos); - down(&inode->i_sem); + mutex_lock(&inode->i_mutex); ret = __generic_file_aio_write_nolock(iocb, &local_iov, 1, &iocb->ki_pos); - up(&inode->i_sem); + mutex_unlock(&inode->i_mutex); if (ret > 0 && ((file->f_flags & O_SYNC) || IS_SYNC(inode))) { ssize_t err; @@ -2220,9 +2220,9 @@ ssize_t generic_file_write(struct file *file, const char __user *buf, struct iovec local_iov = { .iov_base = (void __user *)buf, .iov_len = count }; - down(&inode->i_sem); + mutex_lock(&inode->i_mutex); ret = __generic_file_write_nolock(file, &local_iov, 1, ppos); - up(&inode->i_sem); + mutex_unlock(&inode->i_mutex); if (ret > 0 && ((file->f_flags & O_SYNC) || IS_SYNC(inode))) { ssize_t err; @@ -2256,9 +2256,9 @@ ssize_t generic_file_writev(struct file *file, const struct iovec *iov, struct inode *inode = mapping->host; ssize_t ret; - down(&inode->i_sem); + mutex_lock(&inode->i_mutex); ret = __generic_file_write_nolock(file, iov, nr_segs, ppos); - up(&inode->i_sem); + mutex_unlock(&inode->i_mutex); if (ret > 0 && ((file->f_flags & O_SYNC) || IS_SYNC(inode))) { int err; @@ -2272,7 +2272,7 @@ ssize_t generic_file_writev(struct file *file, const struct iovec *iov, EXPORT_SYMBOL(generic_file_writev); /* - * Called under i_sem for writes to S_ISREG files. Returns -EIO if something + * Called under i_mutex for writes to S_ISREG files. Returns -EIO if something * went wrong during pagecache shootdown. */ static ssize_t diff --git a/mm/filemap_xip.c b/mm/filemap_xip.c index 9cf687e4a29a..e2b34e95913e 100644 --- a/mm/filemap_xip.c +++ b/mm/filemap_xip.c @@ -338,7 +338,7 @@ __xip_file_write(struct file *filp, const char __user *buf, *ppos = pos; /* * No need to use i_size_read() here, the i_size - * cannot change under us because we hold i_sem. + * cannot change under us because we hold i_mutex. */ if (pos > inode->i_size) { i_size_write(inode, pos); @@ -358,7 +358,7 @@ xip_file_write(struct file *filp, const char __user *buf, size_t len, loff_t pos; ssize_t ret; - down(&inode->i_sem); + mutex_lock(&inode->i_mutex); if (!access_ok(VERIFY_READ, buf, len)) { ret=-EFAULT; @@ -390,7 +390,7 @@ xip_file_write(struct file *filp, const char __user *buf, size_t len, out_backing: current->backing_dev_info = NULL; out_up: - up(&inode->i_sem); + mutex_unlock(&inode->i_mutex); return ret; } EXPORT_SYMBOL_GPL(xip_file_write); diff --git a/mm/memory.c b/mm/memory.c index 3944fec38012..7a11ddd5060f 100644 --- a/mm/memory.c +++ b/mm/memory.c @@ -1784,13 +1784,13 @@ int vmtruncate_range(struct inode *inode, loff_t offset, loff_t end) if (!inode->i_op || !inode->i_op->truncate_range) return -ENOSYS; - down(&inode->i_sem); + mutex_lock(&inode->i_mutex); down_write(&inode->i_alloc_sem); unmap_mapping_range(mapping, offset, (end - offset), 1); truncate_inode_pages_range(mapping, offset, end); inode->i_op->truncate_range(inode, offset, end); up_write(&inode->i_alloc_sem); - up(&inode->i_sem); + mutex_unlock(&inode->i_mutex); return 0; } diff --git a/mm/msync.c b/mm/msync.c index 1b5b6f662dcf..3563a56e1a51 100644 --- a/mm/msync.c +++ b/mm/msync.c @@ -137,7 +137,7 @@ static int msync_interval(struct vm_area_struct *vma, ret = filemap_fdatawrite(mapping); if (file->f_op && file->f_op->fsync) { /* - * We don't take i_sem here because mmap_sem + * We don't take i_mutex here because mmap_sem * is already held. */ err = file->f_op->fsync(file,file->f_dentry,1); diff --git a/mm/rmap.c b/mm/rmap.c index 66ec43053a4d..dfbb89f99a15 100644 --- a/mm/rmap.c +++ b/mm/rmap.c @@ -20,13 +20,13 @@ /* * Lock ordering in mm: * - * inode->i_sem (while writing or truncating, not reading or faulting) + * inode->i_mutex (while writing or truncating, not reading or faulting) * inode->i_alloc_sem * * When a page fault occurs in writing from user to file, down_read - * of mmap_sem nests within i_sem; in sys_msync, i_sem nests within - * down_read of mmap_sem; i_sem and down_write of mmap_sem are never - * taken together; in truncation, i_sem is taken outermost. + * of mmap_sem nests within i_mutex; in sys_msync, i_mutex nests within + * down_read of mmap_sem; i_mutex and down_write of mmap_sem are never + * taken together; in truncation, i_mutex is taken outermost. * * mm->mmap_sem * page->flags PG_locked (lock_page) diff --git a/mm/shmem.c b/mm/shmem.c index a1f2f02af724..343b3c0937e5 100644 --- a/mm/shmem.c +++ b/mm/shmem.c @@ -1370,7 +1370,7 @@ shmem_file_write(struct file *file, const char __user *buf, size_t count, loff_t if (!access_ok(VERIFY_READ, buf, count)) return -EFAULT; - down(&inode->i_sem); + mutex_lock(&inode->i_mutex); pos = *ppos; written = 0; @@ -1455,7 +1455,7 @@ shmem_file_write(struct file *file, const char __user *buf, size_t count, loff_t if (written) err = written; out: - up(&inode->i_sem); + mutex_unlock(&inode->i_mutex); return err; } @@ -1491,7 +1491,7 @@ static void do_shmem_file_read(struct file *filp, loff_t *ppos, read_descriptor_ /* * We must evaluate after, since reads (unlike writes) - * are called without i_sem protection against truncate + * are called without i_mutex protection against truncate */ nr = PAGE_CACHE_SIZE; i_size = i_size_read(inode); diff --git a/mm/swapfile.c b/mm/swapfile.c index 80f948a2028b..6544565a7c0f 100644 --- a/mm/swapfile.c +++ b/mm/swapfile.c @@ -1187,9 +1187,9 @@ asmlinkage long sys_swapoff(const char __user * specialfile) set_blocksize(bdev, p->old_block_size); bd_release(bdev); } else { - down(&inode->i_sem); + mutex_lock(&inode->i_mutex); inode->i_flags &= ~S_SWAPFILE; - up(&inode->i_sem); + mutex_unlock(&inode->i_mutex); } filp_close(swap_file, NULL); err = 0; @@ -1406,7 +1406,7 @@ asmlinkage long sys_swapon(const char __user * specialfile, int swap_flags) p->bdev = bdev; } else if (S_ISREG(inode->i_mode)) { p->bdev = inode->i_sb->s_bdev; - down(&inode->i_sem); + mutex_lock(&inode->i_mutex); did_down = 1; if (IS_SWAPFILE(inode)) { error = -EBUSY; @@ -1596,7 +1596,7 @@ out: if (did_down) { if (!error) inode->i_flags |= S_SWAPFILE; - up(&inode->i_sem); + mutex_unlock(&inode->i_mutex); } return error; } diff --git a/mm/truncate.c b/mm/truncate.c index b1a463d0fe71..6cb3fff25f67 100644 --- a/mm/truncate.c +++ b/mm/truncate.c @@ -196,7 +196,7 @@ EXPORT_SYMBOL(truncate_inode_pages_range); * @mapping: mapping to truncate * @lstart: offset from which to truncate * - * Called under (and serialised by) inode->i_sem. + * Called under (and serialised by) inode->i_mutex. */ void truncate_inode_pages(struct address_space *mapping, loff_t lstart) { diff --git a/net/sunrpc/rpc_pipe.c b/net/sunrpc/rpc_pipe.c index e14c1cae7460..9764c80ab0b2 100644 --- a/net/sunrpc/rpc_pipe.c +++ b/net/sunrpc/rpc_pipe.c @@ -69,13 +69,13 @@ rpc_timeout_upcall_queue(void *data) struct rpc_inode *rpci = (struct rpc_inode *)data; struct inode *inode = &rpci->vfs_inode; - down(&inode->i_sem); + mutex_lock(&inode->i_mutex); if (rpci->ops == NULL) goto out; if (rpci->nreaders == 0 && !list_empty(&rpci->pipe)) __rpc_purge_upcall(inode, -ETIMEDOUT); out: - up(&inode->i_sem); + mutex_unlock(&inode->i_mutex); } int @@ -84,7 +84,7 @@ rpc_queue_upcall(struct inode *inode, struct rpc_pipe_msg *msg) struct rpc_inode *rpci = RPC_I(inode); int res = -EPIPE; - down(&inode->i_sem); + mutex_lock(&inode->i_mutex); if (rpci->ops == NULL) goto out; if (rpci->nreaders) { @@ -100,7 +100,7 @@ rpc_queue_upcall(struct inode *inode, struct rpc_pipe_msg *msg) res = 0; } out: - up(&inode->i_sem); + mutex_unlock(&inode->i_mutex); wake_up(&rpci->waitq); return res; } @@ -116,7 +116,7 @@ rpc_close_pipes(struct inode *inode) { struct rpc_inode *rpci = RPC_I(inode); - down(&inode->i_sem); + mutex_lock(&inode->i_mutex); if (rpci->ops != NULL) { rpci->nreaders = 0; __rpc_purge_list(rpci, &rpci->in_upcall, -EPIPE); @@ -127,7 +127,7 @@ rpc_close_pipes(struct inode *inode) rpci->ops = NULL; } rpc_inode_setowner(inode, NULL); - up(&inode->i_sem); + mutex_unlock(&inode->i_mutex); cancel_delayed_work(&rpci->queue_timeout); flush_scheduled_work(); } @@ -154,7 +154,7 @@ rpc_pipe_open(struct inode *inode, struct file *filp) struct rpc_inode *rpci = RPC_I(inode); int res = -ENXIO; - down(&inode->i_sem); + mutex_lock(&inode->i_mutex); if (rpci->ops != NULL) { if (filp->f_mode & FMODE_READ) rpci->nreaders ++; @@ -162,7 +162,7 @@ rpc_pipe_open(struct inode *inode, struct file *filp) rpci->nwriters ++; res = 0; } - up(&inode->i_sem); + mutex_unlock(&inode->i_mutex); return res; } @@ -172,7 +172,7 @@ rpc_pipe_release(struct inode *inode, struct file *filp) struct rpc_inode *rpci = RPC_I(inode); struct rpc_pipe_msg *msg; - down(&inode->i_sem); + mutex_lock(&inode->i_mutex); if (rpci->ops == NULL) goto out; msg = (struct rpc_pipe_msg *)filp->private_data; @@ -190,7 +190,7 @@ rpc_pipe_release(struct inode *inode, struct file *filp) if (rpci->ops->release_pipe) rpci->ops->release_pipe(inode); out: - up(&inode->i_sem); + mutex_unlock(&inode->i_mutex); return 0; } @@ -202,7 +202,7 @@ rpc_pipe_read(struct file *filp, char __user *buf, size_t len, loff_t *offset) struct rpc_pipe_msg *msg; int res = 0; - down(&inode->i_sem); + mutex_lock(&inode->i_mutex); if (rpci->ops == NULL) { res = -EPIPE; goto out_unlock; @@ -229,7 +229,7 @@ rpc_pipe_read(struct file *filp, char __user *buf, size_t len, loff_t *offset) rpci->ops->destroy_msg(msg); } out_unlock: - up(&inode->i_sem); + mutex_unlock(&inode->i_mutex); return res; } @@ -240,11 +240,11 @@ rpc_pipe_write(struct file *filp, const char __user *buf, size_t len, loff_t *of struct rpc_inode *rpci = RPC_I(inode); int res; - down(&inode->i_sem); + mutex_lock(&inode->i_mutex); res = -EPIPE; if (rpci->ops != NULL) res = rpci->ops->downcall(filp, buf, len); - up(&inode->i_sem); + mutex_unlock(&inode->i_mutex); return res; } @@ -322,7 +322,7 @@ rpc_info_open(struct inode *inode, struct file *file) if (!ret) { struct seq_file *m = file->private_data; - down(&inode->i_sem); + mutex_lock(&inode->i_mutex); clnt = RPC_I(inode)->private; if (clnt) { atomic_inc(&clnt->cl_users); @@ -331,7 +331,7 @@ rpc_info_open(struct inode *inode, struct file *file) single_release(inode, file); ret = -EINVAL; } - up(&inode->i_sem); + mutex_unlock(&inode->i_mutex); } return ret; } @@ -491,7 +491,7 @@ rpc_depopulate(struct dentry *parent) struct dentry *dentry, *dvec[10]; int n = 0; - down(&dir->i_sem); + mutex_lock(&dir->i_mutex); repeat: spin_lock(&dcache_lock); list_for_each_safe(pos, next, &parent->d_subdirs) { @@ -519,7 +519,7 @@ repeat: } while (n); goto repeat; } - up(&dir->i_sem); + mutex_unlock(&dir->i_mutex); } static int @@ -532,7 +532,7 @@ rpc_populate(struct dentry *parent, struct dentry *dentry; int mode, i; - down(&dir->i_sem); + mutex_lock(&dir->i_mutex); for (i = start; i < eof; i++) { dentry = d_alloc_name(parent, files[i].name); if (!dentry) @@ -552,10 +552,10 @@ rpc_populate(struct dentry *parent, dir->i_nlink++; d_add(dentry, inode); } - up(&dir->i_sem); + mutex_unlock(&dir->i_mutex); return 0; out_bad: - up(&dir->i_sem); + mutex_unlock(&dir->i_mutex); printk(KERN_WARNING "%s: %s failed to populate directory %s\n", __FILE__, __FUNCTION__, parent->d_name.name); return -ENOMEM; @@ -609,7 +609,7 @@ rpc_lookup_negative(char *path, struct nameidata *nd) if ((error = rpc_lookup_parent(path, nd)) != 0) return ERR_PTR(error); dir = nd->dentry->d_inode; - down(&dir->i_sem); + mutex_lock(&dir->i_mutex); dentry = lookup_hash(nd); if (IS_ERR(dentry)) goto out_err; @@ -620,7 +620,7 @@ rpc_lookup_negative(char *path, struct nameidata *nd) } return dentry; out_err: - up(&dir->i_sem); + mutex_unlock(&dir->i_mutex); rpc_release_path(nd); return dentry; } @@ -646,7 +646,7 @@ rpc_mkdir(char *path, struct rpc_clnt *rpc_client) if (error) goto err_depopulate; out: - up(&dir->i_sem); + mutex_unlock(&dir->i_mutex); rpc_release_path(&nd); return dentry; err_depopulate: @@ -671,7 +671,7 @@ rpc_rmdir(char *path) if ((error = rpc_lookup_parent(path, &nd)) != 0) return error; dir = nd.dentry->d_inode; - down(&dir->i_sem); + mutex_lock(&dir->i_mutex); dentry = lookup_hash(&nd); if (IS_ERR(dentry)) { error = PTR_ERR(dentry); @@ -681,7 +681,7 @@ rpc_rmdir(char *path) error = __rpc_rmdir(dir, dentry); dput(dentry); out_release: - up(&dir->i_sem); + mutex_unlock(&dir->i_mutex); rpc_release_path(&nd); return error; } @@ -710,7 +710,7 @@ rpc_mkpipe(char *path, void *private, struct rpc_pipe_ops *ops, int flags) rpci->ops = ops; inode_dir_notify(dir, DN_CREATE); out: - up(&dir->i_sem); + mutex_unlock(&dir->i_mutex); rpc_release_path(&nd); return dentry; err_dput: @@ -732,7 +732,7 @@ rpc_unlink(char *path) if ((error = rpc_lookup_parent(path, &nd)) != 0) return error; dir = nd.dentry->d_inode; - down(&dir->i_sem); + mutex_lock(&dir->i_mutex); dentry = lookup_hash(&nd); if (IS_ERR(dentry)) { error = PTR_ERR(dentry); @@ -746,7 +746,7 @@ rpc_unlink(char *path) dput(dentry); inode_dir_notify(dir, DN_DELETE); out_release: - up(&dir->i_sem); + mutex_unlock(&dir->i_mutex); rpc_release_path(&nd); return error; } diff --git a/net/unix/af_unix.c b/net/unix/af_unix.c index 5f6ae79b8b16..1b5989b1b670 100644 --- a/net/unix/af_unix.c +++ b/net/unix/af_unix.c @@ -784,7 +784,7 @@ static int unix_bind(struct socket *sock, struct sockaddr *uaddr, int addr_len) err = vfs_mknod(nd.dentry->d_inode, dentry, mode, 0); if (err) goto out_mknod_dput; - up(&nd.dentry->d_inode->i_sem); + mutex_unlock(&nd.dentry->d_inode->i_mutex); dput(nd.dentry); nd.dentry = dentry; @@ -823,7 +823,7 @@ out: out_mknod_dput: dput(dentry); out_mknod_unlock: - up(&nd.dentry->d_inode->i_sem); + mutex_unlock(&nd.dentry->d_inode->i_mutex); path_release(&nd); out_mknod_parent: if (err==-EEXIST) diff --git a/security/inode.c b/security/inode.c index a5964502ae30..0f77b0223662 100644 --- a/security/inode.c +++ b/security/inode.c @@ -172,7 +172,7 @@ static int create_by_name(const char *name, mode_t mode, return -EFAULT; } - down(&parent->d_inode->i_sem); + mutex_lock(&parent->d_inode->i_mutex); *dentry = lookup_one_len(name, parent, strlen(name)); if (!IS_ERR(dentry)) { if ((mode & S_IFMT) == S_IFDIR) @@ -181,7 +181,7 @@ static int create_by_name(const char *name, mode_t mode, error = create(parent->d_inode, *dentry, mode); } else error = PTR_ERR(dentry); - up(&parent->d_inode->i_sem); + mutex_unlock(&parent->d_inode->i_mutex); return error; } @@ -302,7 +302,7 @@ void securityfs_remove(struct dentry *dentry) if (!parent || !parent->d_inode) return; - down(&parent->d_inode->i_sem); + mutex_lock(&parent->d_inode->i_mutex); if (positive(dentry)) { if (dentry->d_inode) { if (S_ISDIR(dentry->d_inode->i_mode)) @@ -312,7 +312,7 @@ void securityfs_remove(struct dentry *dentry) dput(dentry); } } - up(&parent->d_inode->i_sem); + mutex_unlock(&parent->d_inode->i_mutex); simple_release_fs(&mount, &mount_count); } EXPORT_SYMBOL_GPL(securityfs_remove); diff --git a/sound/core/oss/pcm_oss.c b/sound/core/oss/pcm_oss.c index 16df1246a131..7fd072392c7e 100644 --- a/sound/core/oss/pcm_oss.c +++ b/sound/core/oss/pcm_oss.c @@ -2135,9 +2135,7 @@ static ssize_t snd_pcm_oss_write(struct file *file, const char __user *buf, size substream = pcm_oss_file->streams[SNDRV_PCM_STREAM_PLAYBACK]; if (substream == NULL) return -ENXIO; - up(&file->f_dentry->d_inode->i_sem); result = snd_pcm_oss_write1(substream, buf, count); - down(&file->f_dentry->d_inode->i_sem); #ifdef OSS_DEBUG printk("pcm_oss: write %li bytes (wrote %li bytes)\n", (long)count, (long)result); #endif diff --git a/sound/core/seq/seq_memory.c b/sound/core/seq/seq_memory.c index 9ee6c177db0c..40b4f679c80e 100644 --- a/sound/core/seq/seq_memory.c +++ b/sound/core/seq/seq_memory.c @@ -32,10 +32,6 @@ #include "seq_info.h" #include "seq_lock.h" -/* semaphore in struct file record */ -#define semaphore_of(fp) ((fp)->f_dentry->d_inode->i_sem) - - static inline int snd_seq_pool_available(struct snd_seq_pool *pool) { return pool->total_elements - atomic_read(&pool->counter); -- cgit v1.2.3-71-gd317 From 7892f2f48d165a34b0b8130c8a195dfd807b8cb6 Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Mon, 9 Jan 2006 15:59:25 -0800 Subject: [PATCH] mutex subsystem, semaphore to mutex: VFS, sb->s_lock This patch converts the superblock-lock semaphore to a mutex, affecting lock_super()/unlock_super(). Tested on ext3 and XFS. Signed-off-by: Ingo Molnar --- fs/ext3/super.c | 2 +- fs/ocfs2/super.c | 2 +- fs/super.c | 2 +- include/linux/fs.h | 6 +++--- 4 files changed, 6 insertions(+), 6 deletions(-) (limited to 'include/linux') diff --git a/fs/ext3/super.c b/fs/ext3/super.c index c3dbebdb9897..56bf76586019 100644 --- a/fs/ext3/super.c +++ b/fs/ext3/super.c @@ -2150,7 +2150,7 @@ int ext3_force_commit(struct super_block *sb) static void ext3_write_super (struct super_block * sb) { - if (down_trylock(&sb->s_lock) == 0) + if (mutex_trylock(&sb->s_lock) != 0) BUG(); sb->s_dirt = 0; } diff --git a/fs/ocfs2/super.c b/fs/ocfs2/super.c index 48bf7f0ce544..364d64bd5f10 100644 --- a/fs/ocfs2/super.c +++ b/fs/ocfs2/super.c @@ -169,7 +169,7 @@ static match_table_t tokens = { */ static void ocfs2_write_super(struct super_block *sb) { - if (down_trylock(&sb->s_lock) == 0) + if (mutex_trylock(&sb->s_lock) != 0) BUG(); sb->s_dirt = 0; } diff --git a/fs/super.c b/fs/super.c index 0a30e51692cf..c177b92419c5 100644 --- a/fs/super.c +++ b/fs/super.c @@ -72,7 +72,7 @@ static struct super_block *alloc_super(void) INIT_HLIST_HEAD(&s->s_anon); INIT_LIST_HEAD(&s->s_inodes); init_rwsem(&s->s_umount); - sema_init(&s->s_lock, 1); + mutex_init(&s->s_lock); down_write(&s->s_umount); s->s_count = S_BIAS; atomic_set(&s->s_active, 1); diff --git a/include/linux/fs.h b/include/linux/fs.h index 01654b218e42..92ae3e2067b0 100644 --- a/include/linux/fs.h +++ b/include/linux/fs.h @@ -821,7 +821,7 @@ struct super_block { unsigned long s_magic; struct dentry *s_root; struct rw_semaphore s_umount; - struct semaphore s_lock; + struct mutex s_lock; int s_count; int s_syncing; int s_need_sync_fs; @@ -893,13 +893,13 @@ static inline int has_fs_excl(void) static inline void lock_super(struct super_block * sb) { get_fs_excl(); - down(&sb->s_lock); + mutex_lock(&sb->s_lock); } static inline void unlock_super(struct super_block * sb) { put_fs_excl(); - up(&sb->s_lock); + mutex_unlock(&sb->s_lock); } /* -- cgit v1.2.3-71-gd317 From f36d4024caa3790606e43228a574157c45b73b22 Mon Sep 17 00:00:00 2001 From: Aleksey Makarov Date: Mon, 9 Jan 2006 15:59:27 -0800 Subject: [PATCH] mutex subsystem, semaphore to completion: IDE ->gendev_rel_sem The patch changes semaphores that are initialized as locked to complete(). Source: MontaVista Software, Inc. Modified-by: Steven Rostedt The following patch is from Montavista. I modified it slightly. Semaphores are currently being used where it makes more sense for completions. This patch corrects that. Signed-off-by: Aleksey Makarov Signed-off-by: Steven Rostedt Signed-off-by: Ingo Molnar --- drivers/ide/ide-probe.c | 4 ++-- drivers/ide/ide.c | 8 ++++---- include/linux/ide.h | 5 +++-- 3 files changed, 9 insertions(+), 8 deletions(-) (limited to 'include/linux') diff --git a/drivers/ide/ide-probe.c b/drivers/ide/ide-probe.c index 1ddaa71a8f45..7cb2d86601db 100644 --- a/drivers/ide/ide-probe.c +++ b/drivers/ide/ide-probe.c @@ -655,7 +655,7 @@ static void hwif_release_dev (struct device *dev) { ide_hwif_t *hwif = container_of(dev, ide_hwif_t, gendev); - up(&hwif->gendev_rel_sem); + complete(&hwif->gendev_rel_comp); } static void hwif_register (ide_hwif_t *hwif) @@ -1327,7 +1327,7 @@ static void drive_release_dev (struct device *dev) drive->queue = NULL; spin_unlock_irq(&ide_lock); - up(&drive->gendev_rel_sem); + complete(&drive->gendev_rel_comp); } /* diff --git a/drivers/ide/ide.c b/drivers/ide/ide.c index b069b13b75a7..ec5a4cb173b0 100644 --- a/drivers/ide/ide.c +++ b/drivers/ide/ide.c @@ -222,7 +222,7 @@ static void init_hwif_data(ide_hwif_t *hwif, unsigned int index) hwif->mwdma_mask = 0x80; /* disable all mwdma */ hwif->swdma_mask = 0x80; /* disable all swdma */ - sema_init(&hwif->gendev_rel_sem, 0); + init_completion(&hwif->gendev_rel_comp); default_hwif_iops(hwif); default_hwif_transport(hwif); @@ -245,7 +245,7 @@ static void init_hwif_data(ide_hwif_t *hwif, unsigned int index) drive->is_flash = 0; drive->vdma = 0; INIT_LIST_HEAD(&drive->list); - sema_init(&drive->gendev_rel_sem, 0); + init_completion(&drive->gendev_rel_comp); } } @@ -602,7 +602,7 @@ void ide_unregister(unsigned int index) } spin_unlock_irq(&ide_lock); device_unregister(&drive->gendev); - down(&drive->gendev_rel_sem); + wait_for_completion(&drive->gendev_rel_comp); spin_lock_irq(&ide_lock); } hwif->present = 0; @@ -662,7 +662,7 @@ void ide_unregister(unsigned int index) /* More messed up locking ... */ spin_unlock_irq(&ide_lock); device_unregister(&hwif->gendev); - down(&hwif->gendev_rel_sem); + wait_for_completion(&hwif->gendev_rel_comp); /* * Remove us from the kernel's knowledge diff --git a/include/linux/ide.h b/include/linux/ide.h index ef8d0cbb832f..9a8c05dbe4f3 100644 --- a/include/linux/ide.h +++ b/include/linux/ide.h @@ -18,6 +18,7 @@ #include #include #include +#include #include #include #include @@ -638,7 +639,7 @@ typedef struct ide_drive_s { int crc_count; /* crc counter to reduce drive speed */ struct list_head list; struct device gendev; - struct semaphore gendev_rel_sem; /* to deal with device release() */ + struct completion gendev_rel_comp; /* to deal with device release() */ } ide_drive_t; #define to_ide_device(dev)container_of(dev, ide_drive_t, gendev) @@ -794,7 +795,7 @@ typedef struct hwif_s { unsigned sg_mapped : 1; /* sg_table and sg_nents are ready */ struct device gendev; - struct semaphore gendev_rel_sem; /* To deal with device release() */ + struct completion gendev_rel_comp; /* To deal with device release() */ void *hwif_data; /* extra hwif data */ -- cgit v1.2.3-71-gd317 From 11b751ae8c8ca3fa24c85bd5a3e51dd9f95cda17 Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Mon, 9 Jan 2006 15:59:27 -0800 Subject: [PATCH] mutex subsystem, semaphore to completion: drivers/block/loop.c convert the block loop device from semaphores to completions. Signed-off-by: Ingo Molnar --- drivers/block/loop.c | 27 ++++++++++++--------------- include/linux/loop.h | 4 ++-- 2 files changed, 14 insertions(+), 17 deletions(-) (limited to 'include/linux') diff --git a/drivers/block/loop.c b/drivers/block/loop.c index bed9ad76c04c..864729046e22 100644 --- a/drivers/block/loop.c +++ b/drivers/block/loop.c @@ -527,12 +527,12 @@ static int loop_make_request(request_queue_t *q, struct bio *old_bio) lo->lo_pending++; loop_add_bio(lo, old_bio); spin_unlock_irq(&lo->lo_lock); - up(&lo->lo_bh_mutex); + complete(&lo->lo_bh_done); return 0; out: if (lo->lo_pending == 0) - up(&lo->lo_bh_mutex); + complete(&lo->lo_bh_done); spin_unlock_irq(&lo->lo_lock); bio_io_error(old_bio, old_bio->bi_size); return 0; @@ -593,23 +593,20 @@ static int loop_thread(void *data) lo->lo_pending = 1; /* - * up sem, we are running + * complete it, we are running */ - up(&lo->lo_sem); + complete(&lo->lo_done); for (;;) { int pending; - /* - * interruptible just to not contribute to load avg - */ - if (down_interruptible(&lo->lo_bh_mutex)) + if (wait_for_completion_interruptible(&lo->lo_bh_done)) continue; spin_lock_irq(&lo->lo_lock); /* - * could be upped because of tear-down, not pending work + * could be completed because of tear-down, not pending work */ if (unlikely(!lo->lo_pending)) { spin_unlock_irq(&lo->lo_lock); @@ -632,7 +629,7 @@ static int loop_thread(void *data) break; } - up(&lo->lo_sem); + complete(&lo->lo_done); return 0; } @@ -843,7 +840,7 @@ static int loop_set_fd(struct loop_device *lo, struct file *lo_file, set_blocksize(bdev, lo_blocksize); kernel_thread(loop_thread, lo, CLONE_KERNEL); - down(&lo->lo_sem); + wait_for_completion(&lo->lo_done); return 0; out_putf: @@ -909,10 +906,10 @@ static int loop_clr_fd(struct loop_device *lo, struct block_device *bdev) lo->lo_state = Lo_rundown; lo->lo_pending--; if (!lo->lo_pending) - up(&lo->lo_bh_mutex); + complete(&lo->lo_bh_done); spin_unlock_irq(&lo->lo_lock); - down(&lo->lo_sem); + wait_for_completion(&lo->lo_done); lo->lo_backing_file = NULL; @@ -1289,8 +1286,8 @@ static int __init loop_init(void) if (!lo->lo_queue) goto out_mem4; init_MUTEX(&lo->lo_ctl_mutex); - init_MUTEX_LOCKED(&lo->lo_sem); - init_MUTEX_LOCKED(&lo->lo_bh_mutex); + init_completion(&lo->lo_done); + init_completion(&lo->lo_bh_done); lo->lo_number = i; spin_lock_init(&lo->lo_lock); disk->major = LOOP_MAJOR; diff --git a/include/linux/loop.h b/include/linux/loop.h index 40f63c9879d2..f96506782ebe 100644 --- a/include/linux/loop.h +++ b/include/linux/loop.h @@ -58,9 +58,9 @@ struct loop_device { struct bio *lo_bio; struct bio *lo_biotail; int lo_state; - struct semaphore lo_sem; + struct completion lo_done; + struct completion lo_bh_done; struct semaphore lo_ctl_mutex; - struct semaphore lo_bh_mutex; int lo_pending; request_queue_t *lo_queue; -- cgit v1.2.3-71-gd317 From 8b4ad5e3ff94409973e824716c65568f0d97364c Mon Sep 17 00:00:00 2001 From: "David S. Miller" Date: Mon, 9 Jan 2006 21:38:23 -0800 Subject: [MUTEX]: linux/mutex-debug.h needs linux/linkage.h For FASTCALL() define. Signed-off-by: David S. Miller --- include/linux/mutex-debug.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include/linux') diff --git a/include/linux/mutex-debug.h b/include/linux/mutex-debug.h index 0ccd8f983b50..8138d9eb58ec 100644 --- a/include/linux/mutex-debug.h +++ b/include/linux/mutex-debug.h @@ -1,6 +1,8 @@ #ifndef __LINUX_MUTEX_DEBUG_H #define __LINUX_MUTEX_DEBUG_H +#include + /* * Mutexes - debugging helpers: */ -- cgit v1.2.3-71-gd317 From e329113ca437e44ec399b7ffe114ed36e84ccf5e Mon Sep 17 00:00:00 2001 From: Ben Gardner Date: Mon, 9 Jan 2006 20:51:29 -0800 Subject: [PATCH] i386: GPIO driver for AMD CS5535/CS5536 A simple driver for the CS5535 and CS5536 that allows a user-space program to manipulate GPIO pins. The CS5535/CS5536 chips are Geode processor companion devices. Signed-off-by: Ben Gardner Signed-off-by: Richard Knutsson Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- drivers/char/Kconfig | 9 ++ drivers/char/Makefile | 1 + drivers/char/cs5535_gpio.c | 250 ++++++++++++++++++++++++++++++++++++ include/linux/pci_ids.h | 9 ++ sound/pci/cs5535audio/cs5535audio.c | 2 +- 5 files changed, 270 insertions(+), 1 deletion(-) create mode 100644 drivers/char/cs5535_gpio.c (limited to 'include/linux') diff --git a/drivers/char/Kconfig b/drivers/char/Kconfig index dd7e6901c575..77286eb5826d 100644 --- a/drivers/char/Kconfig +++ b/drivers/char/Kconfig @@ -936,6 +936,15 @@ config SCx200_GPIO If compiled as a module, it will be called scx200_gpio. +config CS5535_GPIO + tristate "AMD CS5535/CS5536 GPIO (Geode Companion Device)" + depends on X86_32 + help + Give userspace access to the GPIO pins on the AMD CS5535 and + CS5536 Geode companion devices. + + If compiled as a module, it will be called cs5535_gpio. + config GPIO_VR41XX tristate "NEC VR4100 series General-purpose I/O Unit support" depends on CPU_VR41XX diff --git a/drivers/char/Makefile b/drivers/char/Makefile index d973d14d8f7f..503dd901d406 100644 --- a/drivers/char/Makefile +++ b/drivers/char/Makefile @@ -81,6 +81,7 @@ obj-$(CONFIG_PPDEV) += ppdev.o obj-$(CONFIG_NWBUTTON) += nwbutton.o obj-$(CONFIG_NWFLASH) += nwflash.o obj-$(CONFIG_SCx200_GPIO) += scx200_gpio.o +obj-$(CONFIG_CS5535_GPIO) += cs5535_gpio.o obj-$(CONFIG_GPIO_VR41XX) += vr41xx_giu.o obj-$(CONFIG_TANBAC_TB0219) += tb0219.o obj-$(CONFIG_TELCLOCK) += tlclk.o diff --git a/drivers/char/cs5535_gpio.c b/drivers/char/cs5535_gpio.c new file mode 100644 index 000000000000..5d72f50de1ac --- /dev/null +++ b/drivers/char/cs5535_gpio.c @@ -0,0 +1,250 @@ +/* + * AMD CS5535/CS5536 GPIO driver. + * Allows a user space process to play with the GPIO pins. + * + * Copyright (c) 2005 Ben Gardner + * + * This program is free software; you can redistribute it and/or modify + * it under the smems of the GNU General Public License as published by + * the Free Software Foundation; version 2 of the License. + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + + +#define NAME "cs5535_gpio" + +MODULE_AUTHOR("Ben Gardner "); +MODULE_DESCRIPTION("AMD CS5535/CS5536 GPIO Pin Driver"); +MODULE_LICENSE("GPL"); + +static int major; +module_param(major, int, 0); +MODULE_PARM_DESC(major, "Major device number"); + +static ulong mask; +module_param(mask, ulong, 0); +MODULE_PARM_DESC(mask, "GPIO channel mask"); + +#define MSR_LBAR_GPIO 0x5140000C + +static u32 gpio_base; + +static struct pci_device_id divil_pci[] = { + { PCI_DEVICE(PCI_VENDOR_ID_NS, PCI_DEVICE_ID_NS_CS5535_ISA) }, + { PCI_DEVICE(PCI_VENDOR_ID_AMD, PCI_DEVICE_ID_AMD_CS5536_ISA) }, + { } /* NULL entry */ +}; + +static struct cdev cs5535_gpio_cdev; + +/* reserve 32 entries even though some aren't usable */ +#define CS5535_GPIO_COUNT 32 + +/* IO block size */ +#define CS5535_GPIO_SIZE 256 + +struct gpio_regmap { + u32 rd_offset; + u32 wr_offset; + char on; + char off; +}; +static struct gpio_regmap rm[] = +{ + { 0x30, 0x00, '1', '0' }, /* GPIOx_READ_BACK / GPIOx_OUT_VAL */ + { 0x20, 0x20, 'I', 'i' }, /* GPIOx_IN_EN */ + { 0x04, 0x04, 'O', 'o' }, /* GPIOx_OUT_EN */ + { 0x08, 0x08, 't', 'T' }, /* GPIOx_OUT_OD_EN */ + { 0x18, 0x18, 'P', 'p' }, /* GPIOx_OUT_PU_EN */ + { 0x1c, 0x1c, 'D', 'd' }, /* GPIOx_OUT_PD_EN */ +}; + + +/** + * Gets the register offset for the GPIO bank. + * Low (0-15) starts at 0x00, high (16-31) starts at 0x80 + */ +static inline u32 cs5535_lowhigh_base(int reg) +{ + return (reg & 0x10) << 3; +} + +static ssize_t cs5535_gpio_write(struct file *file, const char __user *data, + size_t len, loff_t *ppos) +{ + u32 m = iminor(file->f_dentry->d_inode); + int i, j; + u32 base = gpio_base + cs5535_lowhigh_base(m); + u32 m0, m1; + char c; + + /** + * Creates the mask for atomic bit programming. + * The high 16 bits and the low 16 bits are used to set the mask. + * For example, GPIO 15 maps to 31,15: 0,1 => On; 1,0=> Off + */ + m1 = 1 << (m & 0x0F); + m0 = m1 << 16; + + for (i = 0; i < len; ++i) { + if (get_user(c, data+i)) + return -EFAULT; + + for (j = 0; j < ARRAY_SIZE(rm); j++) { + if (c == rm[j].on) { + outl(m1, base + rm[j].wr_offset); + break; + } else if (c == rm[j].off) { + outl(m0, base + rm[j].wr_offset); + break; + } + } + } + *ppos = 0; + return len; +} + +static ssize_t cs5535_gpio_read(struct file *file, char __user *buf, + size_t len, loff_t *ppos) +{ + u32 m = iminor(file->f_dentry->d_inode); + u32 base = gpio_base + cs5535_lowhigh_base(m); + int rd_bit = 1 << (m & 0x0f); + int i; + char ch; + ssize_t count = 0; + + if (*ppos >= ARRAY_SIZE(rm)) + return 0; + + for (i = *ppos; (i < (*ppos + len)) && (i < ARRAY_SIZE(rm)); i++) { + ch = (inl(base + rm[i].rd_offset) & rd_bit) ? + rm[i].on : rm[i].off; + + if (put_user(ch, buf+count)) + return -EFAULT; + + count++; + } + + /* add a line-feed if there is room */ + if ((i == ARRAY_SIZE(rm)) && (count < len)) { + put_user('\n', buf + count); + count++; + } + + *ppos += count; + return count; +} + +static int cs5535_gpio_open(struct inode *inode, struct file *file) +{ + u32 m = iminor(inode); + + /* the mask says which pins are usable by this driver */ + if ((mask & (1 << m)) == 0) + return -EINVAL; + + return nonseekable_open(inode, file); +} + +static struct file_operations cs5535_gpio_fops = { + .owner = THIS_MODULE, + .write = cs5535_gpio_write, + .read = cs5535_gpio_read, + .open = cs5535_gpio_open +}; + +static int __init cs5535_gpio_init(void) +{ + dev_t dev_id; + u32 low, hi; + int retval; + + if (pci_dev_present(divil_pci) == 0) { + printk(KERN_WARNING NAME ": DIVIL not found\n"); + return -ENODEV; + } + + /* Grab the GPIO I/O range */ + rdmsr(MSR_LBAR_GPIO, low, hi); + + /* Check the mask and whether GPIO is enabled (sanity check) */ + if (hi != 0x0000f001) { + printk(KERN_WARNING NAME ": GPIO not enabled\n"); + return -ENODEV; + } + + /* Mask off the IO base address */ + gpio_base = low & 0x0000ff00; + + /** + * Some GPIO pins + * 31-29,23 : reserved (always mask out) + * 28 : Power Button + * 26 : PME# + * 22-16 : LPC + * 14,15 : SMBus + * 9,8 : UART1 + * 7 : PCI INTB + * 3,4 : UART2/DDC + * 2 : IDE_IRQ0 + * 0 : PCI INTA + * + * If a mask was not specified, be conservative and only allow: + * 1,2,5,6,10-13,24,25,27 + */ + if (mask != 0) + mask &= 0x1f7fffff; + else + mask = 0x0b003c66; + + if (request_region(gpio_base, CS5535_GPIO_SIZE, NAME) == 0) { + printk(KERN_ERR NAME ": can't allocate I/O for GPIO\n"); + return -ENODEV; + } + + if (major) { + dev_id = MKDEV(major, 0); + retval = register_chrdev_region(dev_id, CS5535_GPIO_COUNT, + NAME); + } else { + retval = alloc_chrdev_region(&dev_id, 0, CS5535_GPIO_COUNT, + NAME); + major = MAJOR(dev_id); + } + + if (retval) { + release_region(gpio_base, CS5535_GPIO_SIZE); + return -1; + } + + printk(KERN_DEBUG NAME ": base=%#x mask=%#lx major=%d\n", + gpio_base, mask, major); + + cdev_init(&cs5535_gpio_cdev, &cs5535_gpio_fops); + cdev_add(&cs5535_gpio_cdev, dev_id, CS5535_GPIO_COUNT); + + return 0; +} + +static void __exit cs5535_gpio_cleanup(void) +{ + dev_t dev_id = MKDEV(major, 0); + unregister_chrdev_region(dev_id, CS5535_GPIO_COUNT); + if (gpio_base != 0) + release_region(gpio_base, CS5535_GPIO_SIZE); +} + +module_init(cs5535_gpio_init); +module_exit(cs5535_gpio_cleanup); diff --git a/include/linux/pci_ids.h b/include/linux/pci_ids.h index c3caa93efb10..100eba0f4771 100644 --- a/include/linux/pci_ids.h +++ b/include/linux/pci_ids.h @@ -377,6 +377,7 @@ #define PCI_DEVICE_ID_NS_87560_USB 0x0012 #define PCI_DEVICE_ID_NS_83815 0x0020 #define PCI_DEVICE_ID_NS_83820 0x0022 +#define PCI_DEVICE_ID_NS_CS5535_ISA 0x002b #define PCI_DEVICE_ID_NS_CS5535_IDE 0x002d #define PCI_DEVICE_ID_NS_CS5535_AUDIO 0x002e #define PCI_DEVICE_ID_NS_CS5535_USB 0x002f @@ -500,6 +501,14 @@ #define PCI_DEVICE_ID_AMD_8111_AUDIO 0x746d #define PCI_DEVICE_ID_AMD_8151_0 0x7454 #define PCI_DEVICE_ID_AMD_8131_APIC 0x7450 +#define PCI_DEVICE_ID_AMD_CS5536_ISA 0x2090 +#define PCI_DEVICE_ID_AMD_CS5536_FLASH 0x2091 +#define PCI_DEVICE_ID_AMD_CS5536_AUDIO 0x2093 +#define PCI_DEVICE_ID_AMD_CS5536_OHC 0x2094 +#define PCI_DEVICE_ID_AMD_CS5536_EHC 0x2095 +#define PCI_DEVICE_ID_AMD_CS5536_UDC 0x2096 +#define PCI_DEVICE_ID_AMD_CS5536_UOC 0x2097 +#define PCI_DEVICE_ID_AMD_CS5536_IDE 0x209A #define PCI_DEVICE_ID_AMD_CS5536_IDE 0x209A diff --git a/sound/pci/cs5535audio/cs5535audio.c b/sound/pci/cs5535audio/cs5535audio.c index 202c7cf3e328..f36ede827479 100644 --- a/sound/pci/cs5535audio/cs5535audio.c +++ b/sound/pci/cs5535audio/cs5535audio.c @@ -385,7 +385,7 @@ static struct pci_driver driver = { static int __init alsa_card_cs5535audio_init(void) { - return pci_module_init(&driver); + return pci_register_driver(&driver); } static void __exit alsa_card_cs5535audio_exit(void) -- cgit v1.2.3-71-gd317 From 0ad42352c01788e41a33336577fdd270d8de55bb Mon Sep 17 00:00:00 2001 From: David Howells Date: Mon, 9 Jan 2006 20:51:31 -0800 Subject: [PATCH] Add list_for_each_entry_safe_reverse() Add list_for_each_entry_safe_reverse() to linux/list.h This is needed by unmerged cachefs and be an as-yet-unreviewed device_shutdown() fix. Signed-off-by: David Howells Cc: Patrick Mochel Cc: Greg KH Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/list.h | 14 ++++++++++++++ 1 file changed, 14 insertions(+) (limited to 'include/linux') diff --git a/include/linux/list.h b/include/linux/list.h index 8e3388284530..945daa1f13dd 100644 --- a/include/linux/list.h +++ b/include/linux/list.h @@ -435,6 +435,20 @@ static inline void list_splice_init(struct list_head *list, &pos->member != (head); \ pos = n, n = list_entry(n->member.next, typeof(*n), member)) +/** + * list_for_each_entry_safe_reverse - iterate backwards over list of given type safe against + * removal of list entry + * @pos: the type * to use as a loop counter. + * @n: another type * to use as temporary storage + * @head: the head for your list. + * @member: the name of the list_struct within the struct. + */ +#define list_for_each_entry_safe_reverse(pos, n, head, member) \ + for (pos = list_entry((head)->prev, typeof(*pos), member), \ + n = list_entry(pos->member.prev, typeof(*pos), member); \ + &pos->member != (head); \ + pos = n, n = list_entry(n->member.prev, typeof(*n), member)) + /** * list_for_each_rcu - iterate over an rcu-protected list * @pos: the &struct list_head to use as a loop counter. -- cgit v1.2.3-71-gd317 From df2e71fb9115a8d4f721fb1464db09adc8332bc5 Mon Sep 17 00:00:00 2001 From: "akpm@osdl.org" Date: Mon, 9 Jan 2006 20:51:37 -0800 Subject: [PATCH] dump_thread() cleanup ) From: Adrian Bunk - create one common dump_thread() prototype in kernel.h - dump_thread() is only used in fs/binfmt_aout.c and can therefore be removed on all architectures where CONFIG_BINFMT_AOUT is not available Signed-off-by: Adrian Bunk Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- arch/alpha/kernel/alpha_ksyms.c | 1 - arch/alpha/mm/init.c | 1 + arch/arm26/kernel/armksyms.c | 1 - arch/cris/kernel/crisksyms.c | 2 -- arch/cris/kernel/process.c | 28 ---------------------- arch/frv/kernel/frv_ksyms.c | 2 -- arch/frv/kernel/process.c | 22 ------------------ arch/h8300/kernel/h8300_ksyms.c | 3 --- arch/h8300/kernel/process.c | 28 ---------------------- arch/m32r/kernel/m32r_ksyms.c | 3 --- arch/m32r/kernel/process.c | 8 ------- arch/m68k/kernel/m68k_ksyms.c | 2 -- arch/m68knommu/kernel/m68k_ksyms.c | 2 -- arch/m68knommu/kernel/process.c | 46 ------------------------------------- arch/s390/kernel/process.c | 21 ----------------- arch/sh/kernel/process.c | 20 ---------------- arch/sh/kernel/sh_ksyms.c | 2 -- arch/sh64/kernel/process.c | 20 ---------------- arch/sh64/kernel/sh_ksyms.c | 2 -- arch/sparc/kernel/sparc_ksyms.c | 2 -- arch/sparc64/kernel/binfmt_aout32.c | 2 -- arch/sparc64/kernel/sparc64_ksyms.c | 2 -- arch/v850/kernel/process.c | 24 ------------------- arch/v850/kernel/v850_ksyms.c | 2 -- fs/binfmt_aout.c | 2 -- fs/binfmt_flat.c | 2 -- include/asm-um/processor-generic.h | 1 - include/linux/kernel.h | 4 ++++ 28 files changed, 5 insertions(+), 250 deletions(-) (limited to 'include/linux') diff --git a/arch/alpha/kernel/alpha_ksyms.c b/arch/alpha/kernel/alpha_ksyms.c index f3e98f837784..1898ea79d0e2 100644 --- a/arch/alpha/kernel/alpha_ksyms.c +++ b/arch/alpha/kernel/alpha_ksyms.c @@ -40,7 +40,6 @@ #include extern struct hwrpb_struct *hwrpb; -extern void dump_thread(struct pt_regs *, struct user *); extern spinlock_t rtc_lock; /* these are C runtime functions with special calling conventions: */ diff --git a/arch/alpha/mm/init.c b/arch/alpha/mm/init.c index 90752f6d8867..486d7945583d 100644 --- a/arch/alpha/mm/init.c +++ b/arch/alpha/mm/init.c @@ -7,6 +7,7 @@ /* 2.3.x zone allocator, 1999 Andrea Arcangeli */ #include +#include #include #include #include diff --git a/arch/arm26/kernel/armksyms.c b/arch/arm26/kernel/armksyms.c index 35514b398e2e..811a6376c624 100644 --- a/arch/arm26/kernel/armksyms.c +++ b/arch/arm26/kernel/armksyms.c @@ -35,7 +35,6 @@ #include #include -extern void dump_thread(struct pt_regs *, struct user *); extern int dump_fpu(struct pt_regs *, struct user_fp_struct *); extern void inswb(unsigned int port, void *to, int len); extern void outswb(unsigned int port, const void *to, int len); diff --git a/arch/cris/kernel/crisksyms.c b/arch/cris/kernel/crisksyms.c index 85833d704ebb..de39725da920 100644 --- a/arch/cris/kernel/crisksyms.c +++ b/arch/cris/kernel/crisksyms.c @@ -21,7 +21,6 @@ #include #include -extern void dump_thread(struct pt_regs *, struct user *); extern unsigned long get_cmos_time(void); extern void __Udiv(void); extern void __Umod(void); @@ -33,7 +32,6 @@ extern void __lshrdi3(void); extern void iounmap(volatile void * __iomem); /* Platform dependent support */ -EXPORT_SYMBOL(dump_thread); EXPORT_SYMBOL(kernel_thread); EXPORT_SYMBOL(get_cmos_time); EXPORT_SYMBOL(loops_per_usec); diff --git a/arch/cris/kernel/process.c b/arch/cris/kernel/process.c index 7c80afb10460..4ab3e87115b6 100644 --- a/arch/cris/kernel/process.c +++ b/arch/cris/kernel/process.c @@ -257,34 +257,6 @@ void flush_thread(void) { } -/* - * fill in the user structure for a core dump.. - */ -void dump_thread(struct pt_regs * regs, struct user * dump) -{ -#if 0 - int i; - - /* changed the size calculations - should hopefully work better. lbt */ - dump->magic = CMAGIC; - dump->start_code = 0; - dump->start_stack = regs->esp & ~(PAGE_SIZE - 1); - dump->u_tsize = ((unsigned long) current->mm->end_code) >> PAGE_SHIFT; - dump->u_dsize = ((unsigned long) (current->mm->brk + (PAGE_SIZE-1))) >> PAGE_SHIFT; - dump->u_dsize -= dump->u_tsize; - dump->u_ssize = 0; - for (i = 0; i < 8; i++) - dump->u_debugreg[i] = current->debugreg[i]; - - if (dump->start_stack < TASK_SIZE) - dump->u_ssize = ((unsigned long) (TASK_SIZE - dump->start_stack)) >> PAGE_SHIFT; - - dump->regs = *regs; - - dump->u_fpvalid = dump_fpu (regs, &dump->i387); -#endif -} - /* Fill in the fpu structure for a core dump. */ int dump_fpu(struct pt_regs *regs, elf_fpregset_t *fpu) { diff --git a/arch/frv/kernel/frv_ksyms.c b/arch/frv/kernel/frv_ksyms.c index 5f118c89d091..0f1c6cbc4f50 100644 --- a/arch/frv/kernel/frv_ksyms.c +++ b/arch/frv/kernel/frv_ksyms.c @@ -18,7 +18,6 @@ #include #include -extern void dump_thread(struct pt_regs *, struct user *); extern long __memcpy_user(void *dst, const void *src, size_t count); extern long __memset_user(void *dst, const void *src, size_t count); @@ -27,7 +26,6 @@ extern long __memset_user(void *dst, const void *src, size_t count); EXPORT_SYMBOL(__ioremap); EXPORT_SYMBOL(iounmap); -EXPORT_SYMBOL(dump_thread); EXPORT_SYMBOL(strnlen); EXPORT_SYMBOL(strrchr); EXPORT_SYMBOL(strstr); diff --git a/arch/frv/kernel/process.c b/arch/frv/kernel/process.c index 54a452136f00..c4488379ac3b 100644 --- a/arch/frv/kernel/process.c +++ b/arch/frv/kernel/process.c @@ -243,28 +243,6 @@ int copy_thread(int nr, unsigned long clone_flags, return 0; } /* end copy_thread() */ -/* - * fill in the user structure for a core dump.. - */ -void dump_thread(struct pt_regs *regs, struct user *dump) -{ -#if 0 - /* changed the size calculations - should hopefully work better. lbt */ - dump->magic = CMAGIC; - dump->start_code = 0; - dump->start_stack = user_stack(regs) & ~(PAGE_SIZE - 1); - dump->u_tsize = ((unsigned long) current->mm->end_code) >> PAGE_SHIFT; - dump->u_dsize = ((unsigned long) (current->mm->brk + (PAGE_SIZE-1))) >> PAGE_SHIFT; - dump->u_dsize -= dump->u_tsize; - dump->u_ssize = 0; - - if (dump->start_stack < TASK_SIZE) - dump->u_ssize = ((unsigned long) (TASK_SIZE - dump->start_stack)) >> PAGE_SHIFT; - - dump->regs = *(struct user_context *) regs; -#endif -} - /* * sys_execve() executes a new program. */ diff --git a/arch/h8300/kernel/h8300_ksyms.c b/arch/h8300/kernel/h8300_ksyms.c index 5a630233112f..3e0d80ea4464 100644 --- a/arch/h8300/kernel/h8300_ksyms.c +++ b/arch/h8300/kernel/h8300_ksyms.c @@ -22,11 +22,8 @@ //asmlinkage long long __lshrdi3 (long long, int); extern char h8300_debug_device[]; -extern void dump_thread(struct pt_regs *, struct user *); - /* platform dependent support */ -EXPORT_SYMBOL(dump_thread); EXPORT_SYMBOL(strnlen); EXPORT_SYMBOL(strrchr); EXPORT_SYMBOL(strstr); diff --git a/arch/h8300/kernel/process.c b/arch/h8300/kernel/process.c index fe21adf3e75e..585ed5efd0f7 100644 --- a/arch/h8300/kernel/process.c +++ b/arch/h8300/kernel/process.c @@ -207,34 +207,6 @@ int copy_thread(int nr, unsigned long clone_flags, return 0; } -/* - * fill in the user structure for a core dump.. - */ -void dump_thread(struct pt_regs * regs, struct user * dump) -{ -/* changed the size calculations - should hopefully work better. lbt */ - dump->magic = CMAGIC; - dump->start_code = 0; - dump->start_stack = rdusp() & ~(PAGE_SIZE - 1); - dump->u_tsize = ((unsigned long) current->mm->end_code) >> PAGE_SHIFT; - dump->u_dsize = ((unsigned long) (current->mm->brk + - (PAGE_SIZE-1))) >> PAGE_SHIFT; - dump->u_dsize -= dump->u_tsize; - dump->u_ssize = 0; - - dump->u_ar0 = (struct user_regs_struct *)(((int)(&dump->regs)) -((int)(dump))); - dump->regs.er0 = regs->er0; - dump->regs.er1 = regs->er1; - dump->regs.er2 = regs->er2; - dump->regs.er3 = regs->er3; - dump->regs.er4 = regs->er4; - dump->regs.er5 = regs->er5; - dump->regs.er6 = regs->er6; - dump->regs.orig_er0 = regs->orig_er0; - dump->regs.ccr = regs->ccr; - dump->regs.pc = regs->pc; -} - /* * sys_execve() executes a new program. */ diff --git a/arch/m32r/kernel/m32r_ksyms.c b/arch/m32r/kernel/m32r_ksyms.c index e5ec134d81d9..dbc8a392105f 100644 --- a/arch/m32r/kernel/m32r_ksyms.c +++ b/arch/m32r/kernel/m32r_ksyms.c @@ -18,8 +18,6 @@ #include #include -extern void dump_thread(struct pt_regs *, struct user *); - #if defined(CONFIG_BLK_DEV_IDE) || defined(CONFIG_BLK_DEV_HD) || defined(CONFIG_BLK_DEV_IDE_MODULE) || defined(CONFIG_BLK_DEV_HD_MODULE) extern struct drive_info_struct drive_info; EXPORT_SYMBOL(drive_info); @@ -27,7 +25,6 @@ EXPORT_SYMBOL(drive_info); /* platform dependent support */ EXPORT_SYMBOL(boot_cpu_data); -EXPORT_SYMBOL(dump_thread); EXPORT_SYMBOL(dump_fpu); EXPORT_SYMBOL(__ioremap); EXPORT_SYMBOL(iounmap); diff --git a/arch/m32r/kernel/process.c b/arch/m32r/kernel/process.c index 3bf55d92933f..2a1f250349b7 100644 --- a/arch/m32r/kernel/process.c +++ b/arch/m32r/kernel/process.c @@ -260,14 +260,6 @@ int copy_thread(int nr, unsigned long clone_flags, unsigned long spu, return 0; } -/* - * fill in the user structure for a core dump.. - */ -void dump_thread(struct pt_regs * regs, struct user * dump) -{ - /* M32R_FIXME */ -} - /* * Capture the user space registers if the task is not running (in user space) */ diff --git a/arch/m68k/kernel/m68k_ksyms.c b/arch/m68k/kernel/m68k_ksyms.c index 73e2f5e168dd..3d7f2000b714 100644 --- a/arch/m68k/kernel/m68k_ksyms.c +++ b/arch/m68k/kernel/m68k_ksyms.c @@ -23,8 +23,6 @@ asmlinkage long long __lshrdi3 (long long, int); asmlinkage long long __muldi3 (long long, long long); extern char m68k_debug_device[]; -extern void dump_thread(struct pt_regs *, struct user *); - /* platform dependent support */ EXPORT_SYMBOL(m68k_machtype); diff --git a/arch/m68knommu/kernel/m68k_ksyms.c b/arch/m68knommu/kernel/m68k_ksyms.c index b2c62eeb3bab..eddb8d3e130a 100644 --- a/arch/m68knommu/kernel/m68k_ksyms.c +++ b/arch/m68knommu/kernel/m68k_ksyms.c @@ -18,7 +18,6 @@ #include #include -extern void dump_thread(struct pt_regs *, struct user *); extern int dump_fpu(struct pt_regs *, elf_fpregset_t *); /* platform dependent support */ @@ -26,7 +25,6 @@ extern int dump_fpu(struct pt_regs *, elf_fpregset_t *); EXPORT_SYMBOL(__ioremap); EXPORT_SYMBOL(iounmap); EXPORT_SYMBOL(dump_fpu); -EXPORT_SYMBOL(dump_thread); EXPORT_SYMBOL(strnlen); EXPORT_SYMBOL(strrchr); EXPORT_SYMBOL(strstr); diff --git a/arch/m68knommu/kernel/process.c b/arch/m68knommu/kernel/process.c index 82e7ec888806..8b3cf57ba706 100644 --- a/arch/m68knommu/kernel/process.c +++ b/arch/m68knommu/kernel/process.c @@ -275,52 +275,6 @@ int dump_fpu(struct pt_regs *regs, struct user_m68kfp_struct *fpu) return 1; } -/* - * fill in the user structure for a core dump.. - */ -void dump_thread(struct pt_regs * regs, struct user * dump) -{ - struct switch_stack *sw; - - /* changed the size calculations - should hopefully work better. lbt */ - dump->magic = CMAGIC; - dump->start_code = 0; - dump->start_stack = rdusp() & ~(PAGE_SIZE - 1); - dump->u_tsize = ((unsigned long) current->mm->end_code) >> PAGE_SHIFT; - dump->u_dsize = ((unsigned long) (current->mm->brk + - (PAGE_SIZE-1))) >> PAGE_SHIFT; - dump->u_dsize -= dump->u_tsize; - dump->u_ssize = 0; - - if (dump->start_stack < TASK_SIZE) - dump->u_ssize = ((unsigned long) (TASK_SIZE - dump->start_stack)) >> PAGE_SHIFT; - - dump->u_ar0 = (struct user_regs_struct *)((int)&dump->regs - (int)dump); - sw = ((struct switch_stack *)regs) - 1; - dump->regs.d1 = regs->d1; - dump->regs.d2 = regs->d2; - dump->regs.d3 = regs->d3; - dump->regs.d4 = regs->d4; - dump->regs.d5 = regs->d5; - dump->regs.d6 = sw->d6; - dump->regs.d7 = sw->d7; - dump->regs.a0 = regs->a0; - dump->regs.a1 = regs->a1; - dump->regs.a2 = regs->a2; - dump->regs.a3 = sw->a3; - dump->regs.a4 = sw->a4; - dump->regs.a5 = sw->a5; - dump->regs.a6 = sw->a6; - dump->regs.d0 = regs->d0; - dump->regs.orig_d0 = regs->orig_d0; - dump->regs.stkadj = regs->stkadj; - dump->regs.sr = regs->sr; - dump->regs.pc = regs->pc; - dump->regs.fmtvec = (regs->format << 12) | regs->vector; - /* dump floating point stuff */ - dump->u_fpvalid = dump_fpu (regs, &dump->m68kfp); -} - /* * Generic dumping code. Used for panic and debug. */ diff --git a/arch/s390/kernel/process.c b/arch/s390/kernel/process.c index a942bf2d58e9..7dd58f8ac6b5 100644 --- a/arch/s390/kernel/process.c +++ b/arch/s390/kernel/process.c @@ -352,27 +352,6 @@ int dump_fpu (struct pt_regs * regs, s390_fp_regs *fpregs) return 1; } -/* - * fill in the user structure for a core dump.. - */ -void dump_thread(struct pt_regs * regs, struct user * dump) -{ - -/* changed the size calculations - should hopefully work better. lbt */ - dump->magic = CMAGIC; - dump->start_code = 0; - dump->start_stack = regs->gprs[15] & ~(PAGE_SIZE - 1); - dump->u_tsize = current->mm->end_code >> PAGE_SHIFT; - dump->u_dsize = (current->mm->brk + PAGE_SIZE - 1) >> PAGE_SHIFT; - dump->u_dsize -= dump->u_tsize; - dump->u_ssize = 0; - if (dump->start_stack < TASK_SIZE) - dump->u_ssize = (TASK_SIZE - dump->start_stack) >> PAGE_SHIFT; - memcpy(&dump->regs, regs, sizeof(s390_regs)); - dump_fpu (regs, &dump->regs.fp_regs); - dump->regs.per_info = current->thread.per_info; -} - unsigned long get_wchan(struct task_struct *p) { struct stack_frame *sf, *low, *high; diff --git a/arch/sh/kernel/process.c b/arch/sh/kernel/process.c index fd4f240b833d..8a2bea34ddd2 100644 --- a/arch/sh/kernel/process.c +++ b/arch/sh/kernel/process.c @@ -305,26 +305,6 @@ int copy_thread(int nr, unsigned long clone_flags, unsigned long usp, return 0; } -/* - * fill in the user structure for a core dump.. - */ -void dump_thread(struct pt_regs * regs, struct user * dump) -{ - dump->magic = CMAGIC; - dump->start_code = current->mm->start_code; - dump->start_data = current->mm->start_data; - dump->start_stack = regs->regs[15] & ~(PAGE_SIZE - 1); - dump->u_tsize = (current->mm->end_code - dump->start_code) >> PAGE_SHIFT; - dump->u_dsize = (current->mm->brk + (PAGE_SIZE-1) - dump->start_data) >> PAGE_SHIFT; - dump->u_ssize = (current->mm->start_stack - dump->start_stack + - PAGE_SIZE - 1) >> PAGE_SHIFT; - /* Debug registers will come here. */ - - dump->regs = *regs; - - dump->u_fpvalid = dump_fpu(regs, &dump->fpu); -} - /* Tracing by user break controller. */ static void ubc_set_tracing(int asid, unsigned long pc) diff --git a/arch/sh/kernel/sh_ksyms.c b/arch/sh/kernel/sh_ksyms.c index 6954fd62470a..1cf94a618be3 100644 --- a/arch/sh/kernel/sh_ksyms.c +++ b/arch/sh/kernel/sh_ksyms.c @@ -21,14 +21,12 @@ #include #include -extern void dump_thread(struct pt_regs *, struct user *); extern int dump_fpu(struct pt_regs *, elf_fpregset_t *); extern struct hw_interrupt_type no_irq_type; EXPORT_SYMBOL(sh_mv); /* platform dependent support */ -EXPORT_SYMBOL(dump_thread); EXPORT_SYMBOL(dump_fpu); EXPORT_SYMBOL(iounmap); EXPORT_SYMBOL(enable_irq); diff --git a/arch/sh64/kernel/process.c b/arch/sh64/kernel/process.c index b95d04141855..419b5a710441 100644 --- a/arch/sh64/kernel/process.c +++ b/arch/sh64/kernel/process.c @@ -775,26 +775,6 @@ int copy_thread(int nr, unsigned long clone_flags, unsigned long usp, return 0; } -/* - * fill in the user structure for a core dump.. - */ -void dump_thread(struct pt_regs * regs, struct user * dump) -{ - dump->magic = CMAGIC; - dump->start_code = current->mm->start_code; - dump->start_data = current->mm->start_data; - dump->start_stack = regs->regs[15] & ~(PAGE_SIZE - 1); - dump->u_tsize = (current->mm->end_code - dump->start_code) >> PAGE_SHIFT; - dump->u_dsize = (current->mm->brk + (PAGE_SIZE-1) - dump->start_data) >> PAGE_SHIFT; - dump->u_ssize = (current->mm->start_stack - dump->start_stack + - PAGE_SIZE - 1) >> PAGE_SHIFT; - /* Debug registers will come here. */ - - dump->regs = *regs; - - dump->u_fpvalid = dump_fpu(regs, &dump->fpu); -} - asmlinkage int sys_fork(unsigned long r2, unsigned long r3, unsigned long r4, unsigned long r5, unsigned long r6, unsigned long r7, diff --git a/arch/sh64/kernel/sh_ksyms.c b/arch/sh64/kernel/sh_ksyms.c index 0b5497d70bd3..472b450e61be 100644 --- a/arch/sh64/kernel/sh_ksyms.c +++ b/arch/sh64/kernel/sh_ksyms.c @@ -29,7 +29,6 @@ #include #include -extern void dump_thread(struct pt_regs *, struct user *); extern int dump_fpu(struct pt_regs *, elf_fpregset_t *); #if 0 @@ -41,7 +40,6 @@ EXPORT_SYMBOL(drive_info); #endif /* platform dependent support */ -EXPORT_SYMBOL(dump_thread); EXPORT_SYMBOL(dump_fpu); EXPORT_SYMBOL(iounmap); EXPORT_SYMBOL(enable_irq); diff --git a/arch/sparc/kernel/sparc_ksyms.c b/arch/sparc/kernel/sparc_ksyms.c index 1c8fd0fd9305..0b0d492c953b 100644 --- a/arch/sparc/kernel/sparc_ksyms.c +++ b/arch/sparc/kernel/sparc_ksyms.c @@ -82,8 +82,6 @@ extern int __lshrdi3(int, int); extern int __muldi3(int, int); extern int __divdi3(int, int); -extern void dump_thread(struct pt_regs *, struct user *); - /* Private functions with odd calling conventions. */ extern void ___atomic24_add(void); extern void ___atomic24_sub(void); diff --git a/arch/sparc64/kernel/binfmt_aout32.c b/arch/sparc64/kernel/binfmt_aout32.c index edf52d06b280..202a80c24b6f 100644 --- a/arch/sparc64/kernel/binfmt_aout32.c +++ b/arch/sparc64/kernel/binfmt_aout32.c @@ -36,8 +36,6 @@ static int load_aout32_binary(struct linux_binprm *, struct pt_regs * regs); static int load_aout32_library(struct file*); static int aout32_core_dump(long signr, struct pt_regs * regs, struct file *file); -extern void dump_thread(struct pt_regs *, struct user *); - static struct linux_binfmt aout32_format = { NULL, THIS_MODULE, load_aout32_binary, load_aout32_library, aout32_core_dump, PAGE_SIZE diff --git a/arch/sparc64/kernel/sparc64_ksyms.c b/arch/sparc64/kernel/sparc64_ksyms.c index fb7a5370dbfc..d177d7e5c9d3 100644 --- a/arch/sparc64/kernel/sparc64_ksyms.c +++ b/arch/sparc64/kernel/sparc64_ksyms.c @@ -94,7 +94,6 @@ extern void (*prom_palette)(int); extern int __ashrdi3(int, int); -extern void dump_thread(struct pt_regs *, struct user *); extern int dump_fpu (struct pt_regs * regs, elf_fpregset_t * fpregs); extern unsigned long phys_base; @@ -241,7 +240,6 @@ EXPORT_SYMBOL(io_remap_pfn_range); EXPORT_SYMBOL(_sigpause_common); EXPORT_SYMBOL(verify_compat_iovec); -EXPORT_SYMBOL(dump_thread); EXPORT_SYMBOL(dump_fpu); EXPORT_SYMBOL(pte_alloc_one_kernel); #ifndef CONFIG_SMP diff --git a/arch/v850/kernel/process.c b/arch/v850/kernel/process.c index 39cf247cdae4..062ffa0a9998 100644 --- a/arch/v850/kernel/process.c +++ b/arch/v850/kernel/process.c @@ -163,30 +163,6 @@ int copy_thread (int nr, unsigned long clone_flags, return 0; } -/* - * fill in the user structure for a core dump.. - */ -void dump_thread (struct pt_regs *regs, struct user *dump) -{ -#if 0 /* Later. XXX */ - dump->magic = CMAGIC; - dump->start_code = 0; - dump->start_stack = regs->gpr[GPR_SP]; - dump->u_tsize = ((unsigned long) current->mm->end_code) >> PAGE_SHIFT; - dump->u_dsize = ((unsigned long) (current->mm->brk + - (PAGE_SIZE-1))) >> PAGE_SHIFT; - dump->u_dsize -= dump->u_tsize; - dump->u_ssize = 0; - - if (dump->start_stack < TASK_SIZE) - dump->u_ssize = ((unsigned long) (TASK_SIZE - dump->start_stack)) >> PAGE_SHIFT; - - dump->u_ar0 = (struct user_regs_struct *)((int)&dump->regs - (int)dump); - dump->regs = *regs; - dump->u_fpvalid = 0; -#endif -} - /* * sys_execve() executes a new program. */ diff --git a/arch/v850/kernel/v850_ksyms.c b/arch/v850/kernel/v850_ksyms.c index 0ca64900dd91..8ffc29c1c89d 100644 --- a/arch/v850/kernel/v850_ksyms.c +++ b/arch/v850/kernel/v850_ksyms.c @@ -21,8 +21,6 @@ extern void *trap_table; EXPORT_SYMBOL (trap_table); /* platform dependent support */ -extern void dump_thread (struct pt_regs *, struct user *); -EXPORT_SYMBOL (dump_thread); EXPORT_SYMBOL (kernel_thread); EXPORT_SYMBOL (enable_irq); EXPORT_SYMBOL (disable_irq); diff --git a/fs/binfmt_aout.c b/fs/binfmt_aout.c index 72011826f0cb..f312103434d4 100644 --- a/fs/binfmt_aout.c +++ b/fs/binfmt_aout.c @@ -33,8 +33,6 @@ static int load_aout_binary(struct linux_binprm *, struct pt_regs * regs); static int load_aout_library(struct file*); static int aout_core_dump(long signr, struct pt_regs * regs, struct file *file); -extern void dump_thread(struct pt_regs *, struct user *); - static struct linux_binfmt aout_format = { .module = THIS_MODULE, .load_binary = load_aout_binary, diff --git a/fs/binfmt_flat.c b/fs/binfmt_flat.c index 9d6625829b99..b72dc31a0970 100644 --- a/fs/binfmt_flat.c +++ b/fs/binfmt_flat.c @@ -77,8 +77,6 @@ static int load_flat_shared_library(int id, struct lib_info *p); static int load_flat_binary(struct linux_binprm *, struct pt_regs * regs); static int flat_core_dump(long signr, struct pt_regs * regs, struct file *file); -extern void dump_thread(struct pt_regs *, struct user *); - static struct linux_binfmt flat_format = { .module = THIS_MODULE, .load_binary = load_flat_binary, diff --git a/include/asm-um/processor-generic.h b/include/asm-um/processor-generic.h index 075771c371f6..da07a69ce82a 100644 --- a/include/asm-um/processor-generic.h +++ b/include/asm-um/processor-generic.h @@ -89,7 +89,6 @@ extern struct task_struct *alloc_task_struct(void); extern void release_thread(struct task_struct *); extern int kernel_thread(int (*fn)(void *), void * arg, unsigned long flags); -extern void dump_thread(struct pt_regs *regs, struct user *u); static inline void prepare_to_copy(struct task_struct *tsk) { diff --git a/include/linux/kernel.h b/include/linux/kernel.h index d0e6ca3b00ef..e6ee2d95da7a 100644 --- a/include/linux/kernel.h +++ b/include/linux/kernel.h @@ -47,6 +47,8 @@ extern int console_printk[]; #define default_console_loglevel (console_printk[3]) struct completion; +struct pt_regs; +struct user; /** * might_sleep - annotation for functions that can sleep @@ -123,6 +125,8 @@ extern int __kernel_text_address(unsigned long addr); extern int kernel_text_address(unsigned long addr); extern int session_of_pgrp(int pgrp); +extern void dump_thread(struct pt_regs *regs, struct user *dump); + #ifdef CONFIG_PRINTK asmlinkage int vprintk(const char *fmt, va_list args) __attribute__ ((format (printf, 1, 0))); -- cgit v1.2.3-71-gd317 From cc57165874e938ef684d71ba7d36e7088b551489 Mon Sep 17 00:00:00 2001 From: Vivek Goyal Date: Mon, 9 Jan 2006 20:51:41 -0800 Subject: [PATCH] kdump: dynamic per cpu allocation of memory for saving cpu registers - In case of system crash, current state of cpu registers is saved in memory in elf note format. So far memory for storing elf notes was being allocated statically for NR_CPUS. - This patch introduces dynamic allocation of memory for storing elf notes. It uses alloc_percpu() interface. This should lead to better memory usage. - Introduced based on Andi Kleen's and Eric W. Biederman's suggestions. - This patch also moves memory allocation for elf notes from architecture dependent portion to architecture independent portion. Now crash_notes is architecture independent. The whole idea is that size of memory to be allocated per cpu (MAX_NOTE_BYTES) can be architecture dependent and allocation of this memory can be architecture independent. Signed-off-by: Vivek Goyal Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- arch/i386/kernel/crash.c | 5 +++-- arch/ppc/kernel/machine_kexec.c | 6 ------ arch/s390/kernel/crash.c | 2 -- arch/x86_64/kernel/crash.c | 2 -- include/asm-i386/kexec.h | 3 --- include/asm-powerpc/kexec.h | 3 --- include/asm-s390/kexec.h | 3 --- include/asm-x86_64/kexec.h | 3 --- include/linux/kexec.h | 2 ++ kernel/kexec.c | 16 ++++++++++++++++ 10 files changed, 21 insertions(+), 24 deletions(-) (limited to 'include/linux') diff --git a/arch/i386/kernel/crash.c b/arch/i386/kernel/crash.c index 10fe6569751e..f1e65c2ead6e 100644 --- a/arch/i386/kernel/crash.c +++ b/arch/i386/kernel/crash.c @@ -25,7 +25,6 @@ #include -note_buf_t crash_notes[NR_CPUS]; /* This keeps a track of which one is crashing cpu. */ static int crashing_cpu; @@ -72,7 +71,9 @@ static void crash_save_this_cpu(struct pt_regs *regs, int cpu) * squirrelled away. ELF notes happen to provide * all of that that no need to invent something new. */ - buf = &crash_notes[cpu][0]; + buf = (u32*)per_cpu_ptr(crash_notes, cpu); + if (!buf) + return; memset(&prstatus, 0, sizeof(prstatus)); prstatus.pr_pid = current->pid; elf_core_copy_regs(&prstatus.pr_reg, regs); diff --git a/arch/ppc/kernel/machine_kexec.c b/arch/ppc/kernel/machine_kexec.c index a882b0dbe8de..84d65a87191e 100644 --- a/arch/ppc/kernel/machine_kexec.c +++ b/arch/ppc/kernel/machine_kexec.c @@ -28,12 +28,6 @@ typedef NORET_TYPE void (*relocate_new_kernel_t)( const extern unsigned char relocate_new_kernel[]; const extern unsigned int relocate_new_kernel_size; -/* - * Provide a dummy crash_notes definition while crash dump arrives to ppc. - * This prevents breakage of crash_notes attribute in kernel/ksysfs.c. - */ -note_buf_t crash_notes[NR_CPUS]; - void machine_shutdown(void) { if (ppc_md.machine_shutdown) diff --git a/arch/s390/kernel/crash.c b/arch/s390/kernel/crash.c index 7bd169c58b0c..926cceeae0fa 100644 --- a/arch/s390/kernel/crash.c +++ b/arch/s390/kernel/crash.c @@ -10,8 +10,6 @@ #include #include -note_buf_t crash_notes[NR_CPUS]; - void machine_crash_shutdown(struct pt_regs *regs) { } diff --git a/arch/x86_64/kernel/crash.c b/arch/x86_64/kernel/crash.c index 535e04466079..efe450760bbc 100644 --- a/arch/x86_64/kernel/crash.c +++ b/arch/x86_64/kernel/crash.c @@ -19,8 +19,6 @@ #include #include -note_buf_t crash_notes[NR_CPUS]; - void machine_crash_shutdown(struct pt_regs *regs) { /* This function is only called after the system diff --git a/include/asm-i386/kexec.h b/include/asm-i386/kexec.h index 6ed2a03e37b3..d80d446498fb 100644 --- a/include/asm-i386/kexec.h +++ b/include/asm-i386/kexec.h @@ -26,8 +26,5 @@ #define KEXEC_ARCH KEXEC_ARCH_386 #define MAX_NOTE_BYTES 1024 -typedef u32 note_buf_t[MAX_NOTE_BYTES/4]; - -extern note_buf_t crash_notes[]; #endif /* _I386_KEXEC_H */ diff --git a/include/asm-powerpc/kexec.h b/include/asm-powerpc/kexec.h index 4263af3cadfd..e363752276ef 100644 --- a/include/asm-powerpc/kexec.h +++ b/include/asm-powerpc/kexec.h @@ -38,9 +38,6 @@ #ifdef CONFIG_KEXEC #define MAX_NOTE_BYTES 1024 -typedef u32 note_buf_t[MAX_NOTE_BYTES / sizeof(u32)]; - -extern note_buf_t crash_notes[]; #ifdef __powerpc64__ extern void kexec_smp_wait(void); /* get and clear naca physid, wait for diff --git a/include/asm-s390/kexec.h b/include/asm-s390/kexec.h index 54cf7d9f251c..b4809d98fe69 100644 --- a/include/asm-s390/kexec.h +++ b/include/asm-s390/kexec.h @@ -35,8 +35,5 @@ #define KEXEC_ARCH KEXEC_ARCH_S390 #define MAX_NOTE_BYTES 1024 -typedef u32 note_buf_t[MAX_NOTE_BYTES/4]; - -extern note_buf_t crash_notes[]; #endif /*_S390_KEXEC_H */ diff --git a/include/asm-x86_64/kexec.h b/include/asm-x86_64/kexec.h index 42d2ff15c592..cea78543a574 100644 --- a/include/asm-x86_64/kexec.h +++ b/include/asm-x86_64/kexec.h @@ -26,8 +26,5 @@ #define KEXEC_ARCH KEXEC_ARCH_X86_64 #define MAX_NOTE_BYTES 1024 -typedef u32 note_buf_t[MAX_NOTE_BYTES/4]; - -extern note_buf_t crash_notes[]; #endif /* _X86_64_KEXEC_H */ diff --git a/include/linux/kexec.h b/include/linux/kexec.h index c8468472aec0..c1cd9b31159e 100644 --- a/include/linux/kexec.h +++ b/include/linux/kexec.h @@ -125,6 +125,8 @@ extern struct kimage *kexec_image; /* Location of a reserved region to hold the crash kernel. */ extern struct resource crashk_res; +typedef u32 note_buf_t[MAX_NOTE_BYTES/4]; +extern note_buf_t *crash_notes; #else /* !CONFIG_KEXEC */ struct pt_regs; diff --git a/kernel/kexec.c b/kernel/kexec.c index 2c95848fbce8..1197de8b2a94 100644 --- a/kernel/kexec.c +++ b/kernel/kexec.c @@ -26,6 +26,9 @@ #include #include +/* Per cpu memory for storing cpu states in case of system crash. */ +note_buf_t* crash_notes; + /* Location of the reserved area for the crash kernel */ struct resource crashk_res = { .name = "Crash kernel", @@ -1060,3 +1063,16 @@ void crash_kexec(struct pt_regs *regs) xchg(&kexec_lock, 0); } } + +static int __init crash_notes_memory_init(void) +{ + /* Allocate memory for saving cpu registers. */ + crash_notes = alloc_percpu(note_buf_t); + if (!crash_notes) { + printk("Kexec: Memory allocation for saving cpu register" + " states failed\n"); + return -ENOMEM; + } + return 0; +} +module_init(crash_notes_memory_init) -- cgit v1.2.3-71-gd317 From 720e1a9f1c3bfa9f72cded56962e7f092fefaaed Mon Sep 17 00:00:00 2001 From: "akpm@osdl.org" Date: Mon, 9 Jan 2006 20:51:51 -0800 Subject: [PATCH] kexec: increase max segment limit ) From: Vivek Goyal - In some cases, the number of segments, on a kexec load, exceeds the existing cap of 8. This patch increases the KEXEC_SEGMENT_MAX limit from 8 to 16. Signed-off-by: Rachita Kothiyal Signed-off-by: Vivek Goyal Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/kexec.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/kexec.h b/include/linux/kexec.h index c1cd9b31159e..94abc07cb164 100644 --- a/include/linux/kexec.h +++ b/include/linux/kexec.h @@ -41,7 +41,7 @@ typedef unsigned long kimage_entry_t; #define IND_DONE 0x4 #define IND_SOURCE 0x8 -#define KEXEC_SEGMENT_MAX 8 +#define KEXEC_SEGMENT_MAX 16 struct kexec_segment { void __user *buf; size_t bufsz; -- cgit v1.2.3-71-gd317 From 5be196e5f925dab2309530fabce69c2e562b9791 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Mon, 9 Jan 2006 20:51:55 -0800 Subject: [PATCH] add vfs_* helpers for xattr operations Add vfs_getxattr, vfs_setxattr and vfs_removexattr helpers for common checks around invocation of the xattr methods. NFSD already was missing some of the checks and there will be more soon. Signed-off-by: Christoph Hellwig Cc: James Morris (James, I haven't touched selinux yet because it's doing various odd things and I'm not sure how it would interact with the security attribute fallbacks you added. Could you investigate whether it could use vfs_getxattr or if not add a __vfs_getxattr helper to share the bits it is fine with?) For NFSv4: instead of just converting it add an nfsd_getxattr helper for the code shared by NFSv2/3 and NFSv4 ACLs. In fact that code isn't even NFS-specific, but I'll wait for more users to pop up first before moving it to common code. Signed-off-by: Christoph Hellwig Acked-by: Dave Kleikamp Signed-off-by: Adrian Bunk Signed-off-by: Neil Brown Cc: Trond Myklebust Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- fs/nfsd/vfs.c | 125 +++++++++++++++++------------------------- fs/xattr.c | 146 ++++++++++++++++++++++++++++++++------------------ include/linux/xattr.h | 4 ++ 3 files changed, 145 insertions(+), 130 deletions(-) (limited to 'include/linux') diff --git a/fs/nfsd/vfs.c b/fs/nfsd/vfs.c index bb36b4304491..eef0576a7785 100644 --- a/fs/nfsd/vfs.c +++ b/fs/nfsd/vfs.c @@ -48,8 +48,8 @@ #include #include #include -#ifdef CONFIG_NFSD_V4 #include +#ifdef CONFIG_NFSD_V4 #include #include #include @@ -365,8 +365,30 @@ out_nfserr: goto out; } -#if defined(CONFIG_NFSD_V4) +#if defined(CONFIG_NFSD_V2_ACL) || \ + defined(CONFIG_NFSD_V3_ACL) || \ + defined(CONFIG_NFSD_V4) +static ssize_t nfsd_getxattr(struct dentry *dentry, char *key, void **buf) +{ + ssize_t buflen; + int error; + + buflen = vfs_getxattr(dentry, key, NULL, 0); + if (buflen <= 0) + return buflen; + *buf = kmalloc(buflen, GFP_KERNEL); + if (!*buf) + return -ENOMEM; + + error = vfs_getxattr(dentry, key, *buf, buflen); + if (error < 0) + return error; + return buflen; +} +#endif + +#if defined(CONFIG_NFSD_V4) static int set_nfsv4_acl_one(struct dentry *dentry, struct posix_acl *pacl, char *key) { @@ -374,7 +396,6 @@ set_nfsv4_acl_one(struct dentry *dentry, struct posix_acl *pacl, char *key) size_t buflen; char *buf = NULL; int error = 0; - struct inode *inode = dentry->d_inode; buflen = posix_acl_xattr_size(pacl->a_count); buf = kmalloc(buflen, GFP_KERNEL); @@ -388,15 +409,7 @@ set_nfsv4_acl_one(struct dentry *dentry, struct posix_acl *pacl, char *key) goto out; } - error = -EOPNOTSUPP; - if (inode->i_op && inode->i_op->setxattr) { - mutex_lock(&inode->i_mutex); - security_inode_setxattr(dentry, key, buf, len, 0); - error = inode->i_op->setxattr(dentry, key, buf, len, 0); - if (!error) - security_inode_post_setxattr(dentry, key, buf, len, 0); - mutex_unlock(&inode->i_mutex); - } + error = vfs_setxattr(dentry, key, buf, len, 0); out: kfree(buf); return error; @@ -455,44 +468,19 @@ out_nfserr: static struct posix_acl * _get_posix_acl(struct dentry *dentry, char *key) { - struct inode *inode = dentry->d_inode; - char *buf = NULL; - int buflen, error = 0; + void *buf = NULL; struct posix_acl *pacl = NULL; + int buflen; - error = -EOPNOTSUPP; - if (inode->i_op == NULL) - goto out_err; - if (inode->i_op->getxattr == NULL) - goto out_err; - - error = security_inode_getxattr(dentry, key); - if (error) - goto out_err; - - buflen = inode->i_op->getxattr(dentry, key, NULL, 0); - if (buflen <= 0) { - error = buflen < 0 ? buflen : -ENODATA; - goto out_err; - } - - buf = kmalloc(buflen, GFP_KERNEL); - if (buf == NULL) { - error = -ENOMEM; - goto out_err; - } - - error = inode->i_op->getxattr(dentry, key, buf, buflen); - if (error < 0) - goto out_err; + buflen = nfsd_getxattr(dentry, key, &buf); + if (!buflen) + buflen = -ENODATA; + if (buflen <= 0) + return ERR_PTR(buflen); pacl = posix_acl_from_xattr(buf, buflen); - out: kfree(buf); return pacl; - out_err: - pacl = ERR_PTR(error); - goto out; } int @@ -1884,39 +1872,25 @@ nfsd_get_posix_acl(struct svc_fh *fhp, int type) ssize_t size; struct posix_acl *acl; - if (!IS_POSIXACL(inode) || !inode->i_op || !inode->i_op->getxattr) + if (!IS_POSIXACL(inode)) + return ERR_PTR(-EOPNOTSUPP); + + switch (type) { + case ACL_TYPE_ACCESS: + name = POSIX_ACL_XATTR_ACCESS; + break; + case ACL_TYPE_DEFAULT: + name = POSIX_ACL_XATTR_DEFAULT; + break; + default: return ERR_PTR(-EOPNOTSUPP); - switch(type) { - case ACL_TYPE_ACCESS: - name = POSIX_ACL_XATTR_ACCESS; - break; - case ACL_TYPE_DEFAULT: - name = POSIX_ACL_XATTR_DEFAULT; - break; - default: - return ERR_PTR(-EOPNOTSUPP); } - size = inode->i_op->getxattr(fhp->fh_dentry, name, NULL, 0); + size = nfsd_getxattr(fhp->fh_dentry, name, &value); + if (size < 0) + return ERR_PTR(size); - if (size < 0) { - acl = ERR_PTR(size); - goto getout; - } else if (size > 0) { - value = kmalloc(size, GFP_KERNEL); - if (!value) { - acl = ERR_PTR(-ENOMEM); - goto getout; - } - size = inode->i_op->getxattr(fhp->fh_dentry, name, value, size); - if (size < 0) { - acl = ERR_PTR(size); - goto getout; - } - } acl = posix_acl_from_xattr(value, size); - -getout: kfree(value); return acl; } @@ -1957,16 +1931,13 @@ nfsd_set_posix_acl(struct svc_fh *fhp, int type, struct posix_acl *acl) } else size = 0; - if (!fhp->fh_locked) - fh_lock(fhp); /* unlocking is done automatically */ if (size) - error = inode->i_op->setxattr(fhp->fh_dentry, name, - value, size, 0); + error = vfs_setxattr(fhp->fh_dentry, name, value, size, 0); else { if (!S_ISDIR(inode->i_mode) && type == ACL_TYPE_DEFAULT) error = 0; else { - error = inode->i_op->removexattr(fhp->fh_dentry, name); + error = vfs_removexattr(fhp->fh_dentry, name); if (error == -ENODATA) error = 0; } diff --git a/fs/xattr.c b/fs/xattr.c index 386a532ee5a9..fee804e69a9a 100644 --- a/fs/xattr.c +++ b/fs/xattr.c @@ -19,6 +19,96 @@ #include #include + +int +vfs_setxattr(struct dentry *dentry, char *name, void *value, + size_t size, int flags) +{ + struct inode *inode = dentry->d_inode; + int error; + + mutex_lock(&inode->i_mutex); + error = security_inode_setxattr(dentry, name, value, size, flags); + if (error) + goto out; + error = -EOPNOTSUPP; + if (inode->i_op->setxattr) { + error = inode->i_op->setxattr(dentry, name, value, size, flags); + if (!error) { + fsnotify_xattr(dentry); + security_inode_post_setxattr(dentry, name, value, + size, flags); + } + } else if (!strncmp(name, XATTR_SECURITY_PREFIX, + sizeof XATTR_SECURITY_PREFIX - 1)) { + const char *suffix = name + sizeof XATTR_SECURITY_PREFIX - 1; + error = security_inode_setsecurity(inode, suffix, value, + size, flags); + if (!error) + fsnotify_xattr(dentry); + } +out: + mutex_unlock(&inode->i_mutex); + return error; +} +EXPORT_SYMBOL_GPL(vfs_setxattr); + +ssize_t +vfs_getxattr(struct dentry *dentry, char *name, void *value, size_t size) +{ + struct inode *inode = dentry->d_inode; + int error; + + error = security_inode_getxattr(dentry, name); + if (error) + return error; + + if (inode->i_op->getxattr) + error = inode->i_op->getxattr(dentry, name, value, size); + else + error = -EOPNOTSUPP; + + if (!strncmp(name, XATTR_SECURITY_PREFIX, + sizeof XATTR_SECURITY_PREFIX - 1)) { + const char *suffix = name + sizeof XATTR_SECURITY_PREFIX - 1; + int ret = security_inode_getsecurity(inode, suffix, value, + size, error); + /* + * Only overwrite the return value if a security module + * is actually active. + */ + if (ret != -EOPNOTSUPP) + error = ret; + } + + return error; +} +EXPORT_SYMBOL_GPL(vfs_getxattr); + +int +vfs_removexattr(struct dentry *dentry, char *name) +{ + struct inode *inode = dentry->d_inode; + int error; + + if (!inode->i_op->removexattr) + return -EOPNOTSUPP; + + error = security_inode_removexattr(dentry, name); + if (error) + return error; + + mutex_lock(&inode->i_mutex); + error = inode->i_op->removexattr(dentry, name); + mutex_unlock(&inode->i_mutex); + + if (!error) + fsnotify_xattr(dentry); + return error; +} +EXPORT_SYMBOL_GPL(vfs_removexattr); + + /* * Extended attribute SET operations */ @@ -51,29 +141,7 @@ setxattr(struct dentry *d, char __user *name, void __user *value, } } - mutex_lock(&d->d_inode->i_mutex); - error = security_inode_setxattr(d, kname, kvalue, size, flags); - if (error) - goto out; - error = -EOPNOTSUPP; - if (d->d_inode->i_op && d->d_inode->i_op->setxattr) { - error = d->d_inode->i_op->setxattr(d, kname, kvalue, - size, flags); - if (!error) { - fsnotify_xattr(d); - security_inode_post_setxattr(d, kname, kvalue, - size, flags); - } - } else if (!strncmp(kname, XATTR_SECURITY_PREFIX, - sizeof XATTR_SECURITY_PREFIX - 1)) { - const char *suffix = kname + sizeof XATTR_SECURITY_PREFIX - 1; - error = security_inode_setsecurity(d->d_inode, suffix, kvalue, - size, flags); - if (!error) - fsnotify_xattr(d); - } -out: - mutex_unlock(&d->d_inode->i_mutex); + error = vfs_setxattr(d, kname, kvalue, size, flags); kfree(kvalue); return error; } @@ -147,22 +215,7 @@ getxattr(struct dentry *d, char __user *name, void __user *value, size_t size) return -ENOMEM; } - error = security_inode_getxattr(d, kname); - if (error) - goto out; - error = -EOPNOTSUPP; - if (d->d_inode->i_op && d->d_inode->i_op->getxattr) - error = d->d_inode->i_op->getxattr(d, kname, kvalue, size); - - if (!strncmp(kname, XATTR_SECURITY_PREFIX, - sizeof XATTR_SECURITY_PREFIX - 1)) { - const char *suffix = kname + sizeof XATTR_SECURITY_PREFIX - 1; - int rv = security_inode_getsecurity(d->d_inode, suffix, kvalue, - size, error); - /* Security module active: overwrite error value */ - if (rv != -EOPNOTSUPP) - error = rv; - } + error = vfs_getxattr(d, kname, kvalue, size); if (error > 0) { if (size && copy_to_user(value, kvalue, error)) error = -EFAULT; @@ -171,7 +224,6 @@ getxattr(struct dentry *d, char __user *name, void __user *value, size_t size) than XATTR_SIZE_MAX bytes. Not possible. */ error = -E2BIG; } -out: kfree(kvalue); return error; } @@ -318,19 +370,7 @@ removexattr(struct dentry *d, char __user *name) if (error < 0) return error; - error = -EOPNOTSUPP; - if (d->d_inode->i_op && d->d_inode->i_op->removexattr) { - error = security_inode_removexattr(d, kname); - if (error) - goto out; - mutex_lock(&d->d_inode->i_mutex); - error = d->d_inode->i_op->removexattr(d, kname); - mutex_unlock(&d->d_inode->i_mutex); - if (!error) - fsnotify_xattr(d); - } -out: - return error; + return vfs_removexattr(d, kname); } asmlinkage long diff --git a/include/linux/xattr.h b/include/linux/xattr.h index 23f9c61d9546..366f0ab4219f 100644 --- a/include/linux/xattr.h +++ b/include/linux/xattr.h @@ -25,6 +25,10 @@ struct xattr_handler { size_t size, int flags); }; +ssize_t vfs_getxattr(struct dentry *, char *, void *, size_t); +int vfs_setxattr(struct dentry *, char *, void *, size_t, int); +int vfs_removexattr(struct dentry *, char *); + ssize_t generic_getxattr(struct dentry *dentry, const char *name, void *buffer, size_t size); ssize_t generic_listxattr(struct dentry *dentry, char *buffer, size_t buffer_size); int generic_setxattr(struct dentry *dentry, const char *name, const void *value, size_t size, int flags); -- cgit v1.2.3-71-gd317 From e0ad7b073eb7317e5afe0385b02dcb1d52a1eedf Mon Sep 17 00:00:00 2001 From: "akpm@osdl.org" Date: Mon, 9 Jan 2006 20:51:56 -0800 Subject: [PATCH] move xattr permission checks into the VFS ) From: Christoph Hellwig The xattr code has rather complex permission checks because the rules are very different for different attribute namespaces. This patch moves as much as we can into the generic code. Currently all the major disk based filesystems duplicate these checks, while many minor filesystems or network filesystems lack some or all of them. To do this we need defines for the extended attribute names in common code, I moved them up from JFS which had the nicest defintions. Signed-off-by: Christoph Hellwig Acked-by: Dave Kleikamp Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- fs/jfs/xattr.c | 15 ------------- fs/xattr.c | 61 +++++++++++++++++++++++++++++++++++++++++++++++---- include/linux/xattr.h | 15 +++++++++++++ 3 files changed, 72 insertions(+), 19 deletions(-) (limited to 'include/linux') diff --git a/fs/jfs/xattr.c b/fs/jfs/xattr.c index 23aa5066b5a4..9dde36a1eb5d 100644 --- a/fs/jfs/xattr.c +++ b/fs/jfs/xattr.c @@ -83,21 +83,6 @@ struct ea_buffer { #define EA_NEW 0x0004 #define EA_MALLOC 0x0008 -/* Namespaces */ -#define XATTR_SYSTEM_PREFIX "system." -#define XATTR_SYSTEM_PREFIX_LEN (sizeof (XATTR_SYSTEM_PREFIX) - 1) - -#define XATTR_USER_PREFIX "user." -#define XATTR_USER_PREFIX_LEN (sizeof (XATTR_USER_PREFIX) - 1) - -#define XATTR_OS2_PREFIX "os2." -#define XATTR_OS2_PREFIX_LEN (sizeof (XATTR_OS2_PREFIX) - 1) - -/* XATTR_SECURITY_PREFIX is defined in include/linux/xattr.h */ -#define XATTR_SECURITY_PREFIX_LEN (sizeof (XATTR_SECURITY_PREFIX) - 1) - -#define XATTR_TRUSTED_PREFIX "trusted." -#define XATTR_TRUSTED_PREFIX_LEN (sizeof (XATTR_TRUSTED_PREFIX) - 1) /* * These three routines are used to recognize on-disk extended attributes diff --git a/fs/xattr.c b/fs/xattr.c index fee804e69a9a..80eca7d3d69f 100644 --- a/fs/xattr.c +++ b/fs/xattr.c @@ -20,6 +20,47 @@ #include +/* + * Check permissions for extended attribute access. This is a bit complicated + * because different namespaces have very different rules. + */ +static int +xattr_permission(struct inode *inode, const char *name, int mask) +{ + /* + * We can never set or remove an extended attribute on a read-only + * filesystem or on an immutable / append-only inode. + */ + if (mask & MAY_WRITE) { + if (IS_RDONLY(inode)) + return -EROFS; + if (IS_IMMUTABLE(inode) || IS_APPEND(inode)) + return -EPERM; + } + + /* + * No restriction for security.* and system.* from the VFS. Decision + * on these is left to the underlying filesystem / security module. + */ + if (!strncmp(name, XATTR_SECURITY_PREFIX, XATTR_SECURITY_PREFIX_LEN) || + !strncmp(name, XATTR_SYSTEM_PREFIX, XATTR_SYSTEM_PREFIX_LEN)) + return 0; + + /* + * The trusted.* namespace can only accessed by a privilegued user. + */ + if (!strncmp(name, XATTR_TRUSTED_PREFIX, XATTR_TRUSTED_PREFIX_LEN)) + return (capable(CAP_SYS_ADMIN) ? 0 : -EPERM); + + if (!strncmp(name, XATTR_USER_PREFIX, XATTR_USER_PREFIX_LEN)) { + if (!S_ISREG(inode->i_mode) && + (!S_ISDIR(inode->i_mode) || inode->i_mode & S_ISVTX)) + return -EPERM; + } + + return permission(inode, mask, NULL); +} + int vfs_setxattr(struct dentry *dentry, char *name, void *value, size_t size, int flags) @@ -27,6 +68,10 @@ vfs_setxattr(struct dentry *dentry, char *name, void *value, struct inode *inode = dentry->d_inode; int error; + error = xattr_permission(inode, name, MAY_WRITE); + if (error) + return error; + mutex_lock(&inode->i_mutex); error = security_inode_setxattr(dentry, name, value, size, flags); if (error) @@ -40,8 +85,8 @@ vfs_setxattr(struct dentry *dentry, char *name, void *value, size, flags); } } else if (!strncmp(name, XATTR_SECURITY_PREFIX, - sizeof XATTR_SECURITY_PREFIX - 1)) { - const char *suffix = name + sizeof XATTR_SECURITY_PREFIX - 1; + XATTR_SECURITY_PREFIX_LEN)) { + const char *suffix = name + XATTR_SECURITY_PREFIX_LEN; error = security_inode_setsecurity(inode, suffix, value, size, flags); if (!error) @@ -59,6 +104,10 @@ vfs_getxattr(struct dentry *dentry, char *name, void *value, size_t size) struct inode *inode = dentry->d_inode; int error; + error = xattr_permission(inode, name, MAY_READ); + if (error) + return error; + error = security_inode_getxattr(dentry, name); if (error) return error; @@ -69,8 +118,8 @@ vfs_getxattr(struct dentry *dentry, char *name, void *value, size_t size) error = -EOPNOTSUPP; if (!strncmp(name, XATTR_SECURITY_PREFIX, - sizeof XATTR_SECURITY_PREFIX - 1)) { - const char *suffix = name + sizeof XATTR_SECURITY_PREFIX - 1; + XATTR_SECURITY_PREFIX_LEN)) { + const char *suffix = name + XATTR_SECURITY_PREFIX_LEN; int ret = security_inode_getsecurity(inode, suffix, value, size, error); /* @@ -94,6 +143,10 @@ vfs_removexattr(struct dentry *dentry, char *name) if (!inode->i_op->removexattr) return -EOPNOTSUPP; + error = xattr_permission(inode, name, MAY_WRITE); + if (error) + return error; + error = security_inode_removexattr(dentry, name); if (error) return error; diff --git a/include/linux/xattr.h b/include/linux/xattr.h index 366f0ab4219f..cda8a96e2fa0 100644 --- a/include/linux/xattr.h +++ b/include/linux/xattr.h @@ -13,7 +13,22 @@ #define XATTR_CREATE 0x1 /* set value, fail if attr already exists */ #define XATTR_REPLACE 0x2 /* set value, fail if attr does not exist */ +/* Namespaces */ +#define XATTR_OS2_PREFIX "os2." +#define XATTR_OS2_PREFIX_LEN (sizeof (XATTR_OS2_PREFIX) - 1) + #define XATTR_SECURITY_PREFIX "security." +#define XATTR_SECURITY_PREFIX_LEN (sizeof (XATTR_SECURITY_PREFIX) - 1) + +#define XATTR_SYSTEM_PREFIX "system." +#define XATTR_SYSTEM_PREFIX_LEN (sizeof (XATTR_SYSTEM_PREFIX) - 1) + +#define XATTR_TRUSTED_PREFIX "trusted." +#define XATTR_TRUSTED_PREFIX_LEN (sizeof (XATTR_TRUSTED_PREFIX) - 1) + +#define XATTR_USER_PREFIX "user." +#define XATTR_USER_PREFIX_LEN (sizeof (XATTR_USER_PREFIX) - 1) + struct xattr_handler { char *prefix; -- cgit v1.2.3-71-gd317 From 870f481793b585323fbda3e87c54efc116f46351 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Mon, 9 Jan 2006 20:52:01 -0800 Subject: [PATCH] replace inode_update_time with file_update_time To allow various options to work per-mount instead of per-sb we need a struct vfsmount when updating ctime and mtime. This preparation patch replaces the inode_update_time routine with a file_update_atime routine so we can easily get at the vfsmount. (and the file makes more sense in this context anyway). Also get rid of the unused second argument - we always want to update the ctime when calling this routine. Signed-off-by: Christoph Hellwig Cc: Al Viro Cc: Anton Altaparmakov Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- fs/inode.c | 27 +++++++++++++++------------ fs/ncpfs/file.c | 2 +- fs/ntfs/file.c | 2 +- fs/ntfs/inode.c | 20 +++++++++++++++++++- fs/ocfs2/mmap.c | 8 ++------ fs/pipe.c | 2 +- fs/reiserfs/file.c | 2 +- fs/xfs/linux-2.6/xfs_lrw.c | 2 +- include/linux/fs.h | 2 +- mm/filemap.c | 2 +- mm/filemap_xip.c | 2 +- 11 files changed, 44 insertions(+), 27 deletions(-) (limited to 'include/linux') diff --git a/fs/inode.c b/fs/inode.c index e08767fd57b0..e177769f3b41 100644 --- a/fs/inode.c +++ b/fs/inode.c @@ -1204,16 +1204,20 @@ void update_atime(struct inode *inode) EXPORT_SYMBOL(update_atime); /** - * inode_update_time - update mtime and ctime time - * @inode: inode accessed - * @ctime_too: update ctime too + * file_update_time - update mtime and ctime time + * @file: file accessed * - * Update the mtime time on an inode and mark it for writeback. - * When ctime_too is specified update the ctime too. + * Update the mtime and ctime members of an inode and mark the inode + * for writeback. Note that this function is meant exclusively for + * usage in the file write path of filesystems, and filesystems may + * choose to explicitly ignore update via this function with the + * S_NOCTIME inode flag, e.g. for network filesystem where these + * timestamps are handled by the server. */ -void inode_update_time(struct inode *inode, int ctime_too) +void file_update_time(struct file *file) { + struct inode *inode = file->f_dentry->d_inode; struct timespec now; int sync_it = 0; @@ -1227,16 +1231,15 @@ void inode_update_time(struct inode *inode, int ctime_too) sync_it = 1; inode->i_mtime = now; - if (ctime_too) { - if (!timespec_equal(&inode->i_ctime, &now)) - sync_it = 1; - inode->i_ctime = now; - } + if (!timespec_equal(&inode->i_ctime, &now)) + sync_it = 1; + inode->i_ctime = now; + if (sync_it) mark_inode_dirty_sync(inode); } -EXPORT_SYMBOL(inode_update_time); +EXPORT_SYMBOL(file_update_time); int inode_needs_sync(struct inode *inode) { diff --git a/fs/ncpfs/file.c b/fs/ncpfs/file.c index 4947d9b11fc1..973b444d6914 100644 --- a/fs/ncpfs/file.c +++ b/fs/ncpfs/file.c @@ -262,7 +262,7 @@ ncp_file_write(struct file *file, const char __user *buf, size_t count, loff_t * } vfree(bouncebuffer); - inode_update_time(inode, 1); + file_update_time(file); *ppos = pos; diff --git a/fs/ntfs/file.c b/fs/ntfs/file.c index 30f71acdc1cb..fb413d3d8618 100644 --- a/fs/ntfs/file.c +++ b/fs/ntfs/file.c @@ -2173,7 +2173,7 @@ static ssize_t ntfs_file_aio_write_nolock(struct kiocb *iocb, err = remove_suid(file->f_dentry); if (err) goto out; - inode_update_time(inode, 1); + file_update_time(file); written = ntfs_file_buffered_write(iocb, iov, nr_segs, pos, ppos, count); out: diff --git a/fs/ntfs/inode.c b/fs/ntfs/inode.c index bda7a08911a5..ea1bd3feea1b 100644 --- a/fs/ntfs/inode.c +++ b/fs/ntfs/inode.c @@ -2767,7 +2767,25 @@ unm_done: up_write(&ni->runlist.lock); done: /* Update the mtime and ctime on the base inode. */ - inode_update_time(VFS_I(base_ni), 1); + /* normally ->truncate shouldn't update ctime or mtime, + * but ntfs did before so it got a copy & paste version + * of file_update_time. one day someone should fix this + * for real. + */ + if (!IS_NOCMTIME(VFS_I(base_ni)) && !IS_RDONLY(VFS_I(base_ni))) { + struct timespec now = current_fs_time(VFS_I(base_ni)->i_sb); + int sync_it = 0; + + if (!timespec_equal(&VFS_I(base_ni)->i_mtime, &now) || + !timespec_equal(&VFS_I(base_ni)->i_ctime, &now)) + sync_it = 1; + VFS_I(base_ni)->i_mtime = now; + VFS_I(base_ni)->i_ctime = now; + + if (sync_it) + mark_inode_dirty_sync(VFS_I(base_ni)); + } + if (likely(!err)) { NInoClearTruncateFailed(ni); ntfs_debug("Done."); diff --git a/fs/ocfs2/mmap.c b/fs/ocfs2/mmap.c index afdeec4b0eef..843cf9ddefe8 100644 --- a/fs/ocfs2/mmap.c +++ b/fs/ocfs2/mmap.c @@ -80,12 +80,8 @@ static struct vm_operations_struct ocfs2_file_vm_ops = { .nopage = ocfs2_nopage, }; -int ocfs2_mmap(struct file *file, - struct vm_area_struct *vma) +int ocfs2_mmap(struct file *file, struct vm_area_struct *vma) { - struct address_space *mapping = file->f_dentry->d_inode->i_mapping; - struct inode *inode = mapping->host; - /* We don't want to support shared writable mappings yet. */ if (((vma->vm_flags & VM_SHARED) || (vma->vm_flags & VM_MAYSHARE)) && ((vma->vm_flags & VM_WRITE) || (vma->vm_flags & VM_MAYWRITE))) { @@ -95,7 +91,7 @@ int ocfs2_mmap(struct file *file, return -EINVAL; } - update_atime(inode); + file_accessed(file); vma->vm_ops = &ocfs2_file_vm_ops; return 0; } diff --git a/fs/pipe.c b/fs/pipe.c index acb030b61fb0..eef0f29e86ef 100644 --- a/fs/pipe.c +++ b/fs/pipe.c @@ -347,7 +347,7 @@ out: kill_fasync(PIPE_FASYNC_READERS(*inode), SIGIO, POLL_IN); } if (ret > 0) - inode_update_time(inode, 1); /* mtime and ctime */ + file_update_time(filp); return ret; } diff --git a/fs/reiserfs/file.c b/fs/reiserfs/file.c index 127e7d2cabdd..ad6fa964b0e7 100644 --- a/fs/reiserfs/file.c +++ b/fs/reiserfs/file.c @@ -1360,7 +1360,7 @@ static ssize_t reiserfs_file_write(struct file *file, /* the file we are going t if (res) goto out; - inode_update_time(inode, 1); /* Both mtime and ctime */ + file_update_time(file); // Ok, we are done with all the checks. diff --git a/fs/xfs/linux-2.6/xfs_lrw.c b/fs/xfs/linux-2.6/xfs_lrw.c index 5675117ef227..885dfafeabee 100644 --- a/fs/xfs/linux-2.6/xfs_lrw.c +++ b/fs/xfs/linux-2.6/xfs_lrw.c @@ -713,7 +713,7 @@ start: } if (likely(!(ioflags & IO_INVIS))) { - inode_update_time(inode, 1); + file_update_time(file); xfs_ichgtime_fast(xip, inode, XFS_ICHGTIME_MOD | XFS_ICHGTIME_CHG); } diff --git a/include/linux/fs.h b/include/linux/fs.h index 92ae3e2067b0..1feee2e7e47b 100644 --- a/include/linux/fs.h +++ b/include/linux/fs.h @@ -1716,7 +1716,7 @@ extern ssize_t simple_read_from_buffer(void __user *, size_t, loff_t *, const vo extern int inode_change_ok(struct inode *, struct iattr *); extern int __must_check inode_setattr(struct inode *, struct iattr *); -extern void inode_update_time(struct inode *inode, int ctime_too); +extern void file_update_time(struct file *file); static inline ino_t parent_ino(struct dentry *dentry) { diff --git a/mm/filemap.c b/mm/filemap.c index 5fca2737c971..96de772be487 100644 --- a/mm/filemap.c +++ b/mm/filemap.c @@ -2108,7 +2108,7 @@ __generic_file_aio_write_nolock(struct kiocb *iocb, const struct iovec *iov, if (err) goto out; - inode_update_time(inode, 1); + file_update_time(file); /* coalesce the iovecs and go direct-to-BIO for O_DIRECT */ if (unlikely(file->f_flags & O_DIRECT)) { diff --git a/mm/filemap_xip.c b/mm/filemap_xip.c index e2b34e95913e..b960ac8e5918 100644 --- a/mm/filemap_xip.c +++ b/mm/filemap_xip.c @@ -383,7 +383,7 @@ xip_file_write(struct file *filp, const char __user *buf, size_t len, if (ret) goto out_backing; - inode_update_time(inode, 1); + file_update_time(filp); ret = __xip_file_write (filp, buf, count, pos, ppos); -- cgit v1.2.3-71-gd317 From 869243a0f6143f76e7c847e707eee6ece9cbf821 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Mon, 9 Jan 2006 20:52:03 -0800 Subject: [PATCH] remove update_atime All callers use touch_atime now which takes a vfsmount and allows us to implement per-mount noatime. Signed-off-by: Christoph Hellwig Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- fs/inode.c | 9 ++++++--- include/linux/fs.h | 10 +--------- 2 files changed, 7 insertions(+), 12 deletions(-) (limited to 'include/linux') diff --git a/fs/inode.c b/fs/inode.c index e177769f3b41..76980a9c92e7 100644 --- a/fs/inode.c +++ b/fs/inode.c @@ -1176,17 +1176,20 @@ sector_t bmap(struct inode * inode, sector_t block) EXPORT_SYMBOL(bmap); /** - * update_atime - update the access time + * touch_atime - update the access time + * @mnt: mount the inode is accessed on * @inode: inode accessed * * Update the accessed time on an inode and mark it for writeback. * This function automatically handles read only file systems and media, * as well as the "noatime" flag and inode specific "noatime" markers. */ -void update_atime(struct inode *inode) +void touch_atime(struct vfsmount *mnt, struct dentry *dentry) { + struct inode *inode = dentry->d_inode; struct timespec now; + /* per-mountpoint checks will go here */ if (IS_NOATIME(inode)) return; if (IS_NODIRATIME(inode) && S_ISDIR(inode->i_mode)) @@ -1201,7 +1204,7 @@ void update_atime(struct inode *inode) } } -EXPORT_SYMBOL(update_atime); +EXPORT_SYMBOL(touch_atime); /** * file_update_time - update mtime and ctime time diff --git a/include/linux/fs.h b/include/linux/fs.h index 1feee2e7e47b..85c5656756b6 100644 --- a/include/linux/fs.h +++ b/include/linux/fs.h @@ -235,9 +235,6 @@ struct kstatfs; struct vm_area_struct; struct vfsmount; -/* Used to be a macro which just called the function, now just a function */ -extern void update_atime (struct inode *); - extern void __init inode_init(unsigned long); extern void __init inode_init_early(void); extern void __init mnt_init(unsigned long); @@ -1118,12 +1115,7 @@ static inline void mark_inode_dirty_sync(struct inode *inode) __mark_inode_dirty(inode, I_DIRTY_SYNC); } -static inline void touch_atime(struct vfsmount *mnt, struct dentry *dentry) -{ - /* per-mountpoint checks will go here */ - update_atime(dentry->d_inode); -} - +extern void touch_atime(struct vfsmount *mnt, struct dentry *dentry); static inline void file_accessed(struct file *file) { if (!(file->f_flags & O_NOATIME)) -- cgit v1.2.3-71-gd317 From bdff071dbf911bf5d1dcaedfaafebb549d2fd969 Mon Sep 17 00:00:00 2001 From: Adrian Bunk Date: Mon, 9 Jan 2006 20:52:03 -0800 Subject: [PATCH] __deprecated_for_modules the lookup_hash() prototype This patch __deprecated_for_modules the lookup_hash() prototype. Signed-off-by: Adrian Bunk Cc: Christoph Hellwig Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/namei.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/namei.h b/include/linux/namei.h index 455660eafba9..b699e427c00c 100644 --- a/include/linux/namei.h +++ b/include/linux/namei.h @@ -74,7 +74,7 @@ extern struct file *nameidata_to_filp(struct nameidata *nd, int flags); extern void release_open_intent(struct nameidata *); extern struct dentry * lookup_one_len(const char *, struct dentry *, int); -extern struct dentry * lookup_hash(struct nameidata *); +extern __deprecated_for_modules struct dentry * lookup_hash(struct nameidata *); extern int follow_down(struct vfsmount **, struct dentry **); extern int follow_up(struct vfsmount **, struct dentry **); -- cgit v1.2.3-71-gd317 From e6a6d2efcb7e7c87c5fe0395803da1453b29cbef Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Mon, 9 Jan 2006 20:52:14 -0800 Subject: [PATCH] sanitize building of fs/compat_ioctl.c Now that all these entries in the arch ioctl32.c files are gone [1], we can build fs/compat_ioctl.c as a normal object and kill tons of cruft. We need a special do_ioctl32_pointer handler for s390 so the compat_ptr call is done. This is not needed but harmless on all other architectures. Also remove some superflous includes in fs/compat_ioctl.c Tested on ppc64. [1] parisc still had it's PPP handler left, which is not fully correct for ppp and besides that ppp uses the generic SIOCPRIV ioctl so it'd kick in for all netdevice users. We can introduce a proper handler in one of the next patch series by adding a compat_ioctl method to struct net_device but for now let's just kill it - parisc doesn't compile in mainline anyway and I don't want this to block this patchset. Signed-off-by: Christoph Hellwig Cc: Matthew Wilcox Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- arch/ia64/ia32/Makefile | 4 +--- arch/ia64/ia32/ia32_ioctl.c | 45 ------------------------------------- arch/mips/kernel/Makefile | 3 +-- arch/mips/kernel/ioctl32.c | 50 ----------------------------------------- arch/parisc/kernel/Makefile | 3 +-- arch/parisc/kernel/ioctl32.c | 41 --------------------------------- arch/powerpc/kernel/Makefile | 3 +-- arch/powerpc/kernel/ioctl32.c | 45 ------------------------------------- arch/s390/kernel/Makefile | 3 +-- arch/s390/kernel/compat_ioctl.c | 47 -------------------------------------- arch/sparc64/kernel/Makefile | 4 +--- arch/sparc64/kernel/ioctl32.c | 39 -------------------------------- arch/x86_64/ia32/Makefile | 4 +--- arch/x86_64/ia32/ia32_ioctl.c | 32 -------------------------- fs/Makefile | 2 +- fs/compat_ioctl.c | 37 +++++++++++++++++------------- include/linux/compat_ioctl.h | 8 ------- 17 files changed, 29 insertions(+), 341 deletions(-) delete mode 100644 arch/ia64/ia32/ia32_ioctl.c delete mode 100644 arch/mips/kernel/ioctl32.c delete mode 100644 arch/parisc/kernel/ioctl32.c delete mode 100644 arch/powerpc/kernel/ioctl32.c delete mode 100644 arch/s390/kernel/compat_ioctl.c delete mode 100644 arch/sparc64/kernel/ioctl32.c delete mode 100644 arch/x86_64/ia32/ia32_ioctl.c (limited to 'include/linux') diff --git a/arch/ia64/ia32/Makefile b/arch/ia64/ia32/Makefile index 2ed90da81166..61cb60affd95 100644 --- a/arch/ia64/ia32/Makefile +++ b/arch/ia64/ia32/Makefile @@ -2,11 +2,9 @@ # Makefile for the ia32 kernel emulation subsystem. # -obj-y := ia32_entry.o sys_ia32.o ia32_ioctl.o ia32_signal.o \ +obj-y := ia32_entry.o sys_ia32.o ia32_signal.o \ ia32_support.o ia32_traps.o binfmt_elf32.o ia32_ldt.o -CFLAGS_ia32_ioctl.o += -Ifs/ - # Don't let GCC uses f16-f31 so that save_ia32_fpstate_live() and # restore_ia32_fpstate_live() can be sure the live register contain user-level state. CFLAGS_ia32_signal.o += -mfixed-range=f16-f31 diff --git a/arch/ia64/ia32/ia32_ioctl.c b/arch/ia64/ia32/ia32_ioctl.c deleted file mode 100644 index 88739394f6df..000000000000 --- a/arch/ia64/ia32/ia32_ioctl.c +++ /dev/null @@ -1,45 +0,0 @@ -/* - * IA32 Architecture-specific ioctl shim code - * - * Copyright (C) 2000 VA Linux Co - * Copyright (C) 2000 Don Dugger - * Copyright (C) 2001-2003 Hewlett-Packard Co - * David Mosberger-Tang - */ - -#include /* argh, msdos_fs.h isn't self-contained... */ -#include -#include "ia32priv.h" - -#define INCLUDES -#include "compat_ioctl.c" - -#define IOCTL_NR(a) ((a) & ~(_IOC_SIZEMASK << _IOC_SIZESHIFT)) - -#define DO_IOCTL(fd, cmd, arg) ({ \ - int _ret; \ - mm_segment_t _old_fs = get_fs(); \ - \ - set_fs(KERNEL_DS); \ - _ret = sys_ioctl(fd, cmd, (unsigned long)arg); \ - set_fs(_old_fs); \ - _ret; \ -}) - -#define CODE -#include "compat_ioctl.c" - -#define COMPATIBLE_IOCTL(cmd) HANDLE_IOCTL((cmd),sys_ioctl) -#define HANDLE_IOCTL(cmd,handler) { (cmd), (ioctl_trans_handler_t)(handler), NULL }, -#define IOCTL_TABLE_START \ - struct ioctl_trans ioctl_start[] = { -#define IOCTL_TABLE_END \ - }; - -IOCTL_TABLE_START -#define DECLARES -#include "compat_ioctl.c" -#include -IOCTL_TABLE_END - -int ioctl_table_size = ARRAY_SIZE(ioctl_start); diff --git a/arch/mips/kernel/Makefile b/arch/mips/kernel/Makefile index 72f2126ad19d..f36c4f20ee8a 100644 --- a/arch/mips/kernel/Makefile +++ b/arch/mips/kernel/Makefile @@ -50,7 +50,7 @@ obj-$(CONFIG_MIPS_BOARDS_GEN) += irq-msc01.o obj-$(CONFIG_32BIT) += scall32-o32.o obj-$(CONFIG_64BIT) += scall64-64.o obj-$(CONFIG_BINFMT_IRIX) += binfmt_irix.o -obj-$(CONFIG_MIPS32_COMPAT) += ioctl32.o linux32.o signal32.o +obj-$(CONFIG_MIPS32_COMPAT) += linux32.o signal32.o obj-$(CONFIG_MIPS32_N32) += binfmt_elfn32.o scall64-n32.o signal_n32.o obj-$(CONFIG_MIPS32_O32) += binfmt_elfo32.o scall64-o32.o ptrace32.o @@ -60,6 +60,5 @@ obj-$(CONFIG_PROC_FS) += proc.o obj-$(CONFIG_64BIT) += cpu-bugs64.o CFLAGS_cpu-bugs64.o = $(shell if $(CC) $(CFLAGS) -Wa,-mdaddi -c -o /dev/null -xc /dev/null >/dev/null 2>&1; then echo "-DHAVE_AS_SET_DADDI"; fi) -CFLAGS_ioctl32.o += -Ifs/ EXTRA_AFLAGS := $(CFLAGS) diff --git a/arch/mips/kernel/ioctl32.c b/arch/mips/kernel/ioctl32.c deleted file mode 100644 index 9ea1fc748864..000000000000 --- a/arch/mips/kernel/ioctl32.c +++ /dev/null @@ -1,50 +0,0 @@ -/* - * ioctl32.c: Conversion between 32bit and 64bit native ioctls. - * - * Copyright (C) 2000 Silicon Graphics, Inc. - * Written by Ulf Carlsson (ulfc@engr.sgi.com) - * Copyright (C) 2000, 2004 Ralf Baechle - * Copyright (C) 2002, 2003 Maciej W. Rozycki - */ -#define INCLUDES -#include "compat_ioctl.c" - -#include -#include -#include -#include -#include - -#ifdef CONFIG_SIBYTE_TBPROF -#include -#endif - -#define A(__x) ((unsigned long)(__x)) - -long sys_ioctl(unsigned int fd, unsigned int cmd, unsigned long arg); - -#define CODE -#include "compat_ioctl.c" - -#define COMPATIBLE_IOCTL(cmd) HANDLE_IOCTL((cmd),sys_ioctl) -#define HANDLE_IOCTL(cmd,handler) { (cmd), (ioctl_trans_handler_t)(handler), NULL }, -#define IOCTL_TABLE_START \ - struct ioctl_trans ioctl_start[] = { -#define IOCTL_TABLE_END \ - }; - -IOCTL_TABLE_START - -#include -#define DECLARES -#include "compat_ioctl.c" - -/*HANDLE_IOCTL(RTC_IRQP_READ, w_long) -COMPATIBLE_IOCTL(RTC_IRQP_SET) -HANDLE_IOCTL(RTC_EPOCH_READ, w_long) -COMPATIBLE_IOCTL(RTC_EPOCH_SET) -*/ - -IOCTL_TABLE_END - -int ioctl_table_size = ARRAY_SIZE(ioctl_start); diff --git a/arch/parisc/kernel/Makefile b/arch/parisc/kernel/Makefile index 171f9c239f60..27827bc3717e 100644 --- a/arch/parisc/kernel/Makefile +++ b/arch/parisc/kernel/Makefile @@ -6,7 +6,6 @@ extra-y := init_task.o head.o vmlinux.lds AFLAGS_entry.o := -traditional AFLAGS_pacache.o := -traditional -CFLAGS_ioctl32.o := -Ifs/ obj-y := cache.o pacache.o setup.o traps.o time.o irq.o \ pa7300lc.o syscall.o entry.o sys_parisc.o firmware.o \ @@ -19,6 +18,6 @@ obj-$(CONFIG_SMP) += smp.o obj-$(CONFIG_PA11) += pci-dma.o obj-$(CONFIG_PCI) += pci.o obj-$(CONFIG_MODULES) += module.o -obj-$(CONFIG_64BIT) += binfmt_elf32.o sys_parisc32.o ioctl32.o signal32.o +obj-$(CONFIG_64BIT) += binfmt_elf32.o sys_parisc32.o signal32.o # only supported for PCX-W/U in 64-bit mode at the moment obj-$(CONFIG_64BIT) += perf.o perf_asm.o diff --git a/arch/parisc/kernel/ioctl32.c b/arch/parisc/kernel/ioctl32.c deleted file mode 100644 index 805f31486cf9..000000000000 --- a/arch/parisc/kernel/ioctl32.c +++ /dev/null @@ -1,41 +0,0 @@ -/* $Id: ioctl32.c,v 1.5 2002/10/18 00:21:43 varenet Exp $ - * ioctl32.c: Conversion between 32bit and 64bit native ioctls. - * - * Copyright (C) 1997-2000 Jakub Jelinek (jakub@redhat.com) - * Copyright (C) 1998 Eddie C. Dost (ecd@skynet.be) - * - * These routines maintain argument size conversion between 32bit and 64bit - * ioctls. - */ - -#include - -#define INCLUDES -#include "compat_ioctl.c" - -#include -#include - -#define CODE -#include "compat_ioctl.c" - -#define HANDLE_IOCTL(cmd, handler) { cmd, (ioctl_trans_handler_t)handler, NULL }, -#define COMPATIBLE_IOCTL(cmd) HANDLE_IOCTL(cmd, sys_ioctl) - -#define IOCTL_TABLE_START struct ioctl_trans ioctl_start[] = { -#define IOCTL_TABLE_END }; - -IOCTL_TABLE_START -#include - -#define DECLARES -#include "compat_ioctl.c" - -/* And these ioctls need translation */ -HANDLE_IOCTL(SIOCGPPPSTATS, dev_ifsioc) -HANDLE_IOCTL(SIOCGPPPCSTATS, dev_ifsioc) -HANDLE_IOCTL(SIOCGPPPVER, dev_ifsioc) - -IOCTL_TABLE_END - -int ioctl_table_size = ARRAY_SIZE(ioctl_start); diff --git a/arch/powerpc/kernel/Makefile b/arch/powerpc/kernel/Makefile index 6e03b595b6c8..17ed5018288b 100644 --- a/arch/powerpc/kernel/Makefile +++ b/arch/powerpc/kernel/Makefile @@ -4,7 +4,6 @@ ifeq ($(CONFIG_PPC64),y) EXTRA_CFLAGS += -mno-minimal-toc -CFLAGS_ioctl32.o += -Ifs/ endif ifeq ($(CONFIG_PPC32),y) CFLAGS_prom_init.o += -fPIC @@ -16,7 +15,7 @@ obj-y := semaphore.o cputable.o ptrace.o syscalls.o \ obj-y += vdso32/ obj-$(CONFIG_PPC64) += setup_64.o binfmt_elf32.o sys_ppc32.o \ signal_64.o ptrace32.o systbl.o \ - paca.o ioctl32.o cpu_setup_power4.o \ + paca.o cpu_setup_power4.o \ firmware.o sysfs.o idle_64.o obj-$(CONFIG_PPC64) += vdso64/ obj-$(CONFIG_ALTIVEC) += vecemu.o vector.o diff --git a/arch/powerpc/kernel/ioctl32.c b/arch/powerpc/kernel/ioctl32.c deleted file mode 100644 index 0fa3d27fef01..000000000000 --- a/arch/powerpc/kernel/ioctl32.c +++ /dev/null @@ -1,45 +0,0 @@ -/* - * ioctl32.c: Conversion between 32bit and 64bit native ioctls. - * - * Based on sparc64 ioctl32.c by: - * - * Copyright (C) 1997-2000 Jakub Jelinek (jakub@redhat.com) - * Copyright (C) 1998 Eddie C. Dost (ecd@skynet.be) - * - * ppc64 changes: - * - * Copyright (C) 2000 Ken Aaker (kdaaker@rchland.vnet.ibm.com) - * Copyright (C) 2001 Anton Blanchard (antonb@au.ibm.com) - * - * These routines maintain argument size conversion between 32bit and 64bit - * ioctls. - * - * This program is free software; you can redistribute it and/or - * modify it under the terms of the GNU General Public License - * as published by the Free Software Foundation; either version - * 2 of the License, or (at your option) any later version. - */ - -#define INCLUDES -#include "compat_ioctl.c" -#include - -#define CODE -#include "compat_ioctl.c" - -#define HANDLE_IOCTL(cmd,handler) { cmd, (ioctl_trans_handler_t)handler, NULL }, -#define COMPATIBLE_IOCTL(cmd) HANDLE_IOCTL(cmd,sys_ioctl) - -#define IOCTL_TABLE_START \ - struct ioctl_trans ioctl_start[] = { -#define IOCTL_TABLE_END \ - }; - -IOCTL_TABLE_START -#include -#define DECLARES -#include "compat_ioctl.c" - -IOCTL_TABLE_END - -int ioctl_table_size = ARRAY_SIZE(ioctl_start); diff --git a/arch/s390/kernel/Makefile b/arch/s390/kernel/Makefile index 4865e4b49464..9269b5788fac 100644 --- a/arch/s390/kernel/Makefile +++ b/arch/s390/kernel/Makefile @@ -17,8 +17,7 @@ obj-$(CONFIG_MODULES) += s390_ksyms.o module.o obj-$(CONFIG_SMP) += smp.o obj-$(CONFIG_COMPAT) += compat_linux.o compat_signal.o \ - compat_ioctl.o compat_wrapper.o \ - compat_exec_domain.o + compat_wrapper.o compat_exec_domain.o obj-$(CONFIG_BINFMT_ELF32) += binfmt_elf32.o obj-$(CONFIG_VIRT_TIMER) += vtime.o diff --git a/arch/s390/kernel/compat_ioctl.c b/arch/s390/kernel/compat_ioctl.c deleted file mode 100644 index d716b1768c99..000000000000 --- a/arch/s390/kernel/compat_ioctl.c +++ /dev/null @@ -1,47 +0,0 @@ -/* - * ioctl32.c: Conversion between 32bit and 64bit native ioctls. - * - * S390 version - * Copyright (C) 2000-2003 IBM Deutschland Entwicklung GmbH, IBM Corporation - * Author(s): Gerhard Tonn (ton@de.ibm.com) - * Arnd Bergmann (arndb@de.ibm.com) - * - * Original implementation from 32-bit Sparc compat code which is - * Copyright (C) 2000 Silicon Graphics, Inc. - * Written by Ulf Carlsson (ulfc@engr.sgi.com) - */ - -#include "compat_linux.h" -#define INCLUDES -#define CODE -#include "../../../fs/compat_ioctl.c" -#include -#include -#include -#include -#include "../../../drivers/s390/char/raw3270.h" - -static int do_ioctl32_pointer(unsigned int fd, unsigned int cmd, - unsigned long arg, struct file *f) -{ - return sys_ioctl(fd, cmd, (unsigned long)compat_ptr(arg)); -} - -static int do_ioctl32_ulong(unsigned int fd, unsigned int cmd, - unsigned long arg, struct file *f) -{ - return sys_ioctl(fd, cmd, arg); -} - -#define COMPATIBLE_IOCTL(cmd) HANDLE_IOCTL((cmd),(ioctl_trans_handler_t)do_ioctl32_pointer) -#define ULONG_IOCTL(cmd) HANDLE_IOCTL((cmd),(ioctl_trans_handler_t)do_ioctl32_ulong) -#define HANDLE_IOCTL(cmd,handler) { (cmd), (ioctl_trans_handler_t)(handler), NULL }, - -struct ioctl_trans ioctl_start[] = { -/* architecture independent ioctls */ -#include -#define DECLARES -#include "../../../fs/compat_ioctl.c" -}; - -int ioctl_table_size = ARRAY_SIZE(ioctl_start); diff --git a/arch/sparc64/kernel/Makefile b/arch/sparc64/kernel/Makefile index 6f00ab8b9d23..83d67eb18895 100644 --- a/arch/sparc64/kernel/Makefile +++ b/arch/sparc64/kernel/Makefile @@ -16,7 +16,7 @@ obj-y := process.o setup.o cpu.o idprom.o \ obj-$(CONFIG_PCI) += ebus.o isa.o pci_common.o pci_iommu.o \ pci_psycho.o pci_sabre.o pci_schizo.o obj-$(CONFIG_SMP) += smp.o trampoline.o -obj-$(CONFIG_SPARC32_COMPAT) += sys32.o sys_sparc32.o signal32.o ioctl32.o +obj-$(CONFIG_SPARC32_COMPAT) += sys32.o sys_sparc32.o signal32.o obj-$(CONFIG_BINFMT_ELF32) += binfmt_elf32.o obj-$(CONFIG_BINFMT_AOUT32) += binfmt_aout32.o obj-$(CONFIG_MODULES) += module.o @@ -40,5 +40,3 @@ endif head.o: head.S ttable.S itlb_base.S dtlb_base.S dtlb_backend.S dtlb_prot.S \ etrap.S rtrap.S winfixup.S entry.S - -CFLAGS_ioctl32.o += -Ifs/ diff --git a/arch/sparc64/kernel/ioctl32.c b/arch/sparc64/kernel/ioctl32.c deleted file mode 100644 index 196b208665a2..000000000000 --- a/arch/sparc64/kernel/ioctl32.c +++ /dev/null @@ -1,39 +0,0 @@ -/* $Id: ioctl32.c,v 1.136 2002/01/14 09:49:52 davem Exp $ - * ioctl32.c: Conversion between 32bit and 64bit native ioctls. - * - * Copyright (C) 1997-2000 Jakub Jelinek (jakub@redhat.com) - * Copyright (C) 1998 Eddie C. Dost (ecd@skynet.be) - * Copyright (C) 2003 Pavel Machek (pavel@suse.cz) - * - * These routines maintain argument size conversion between 32bit and 64bit - * ioctls. - */ - -#define INCLUDES -#include "compat_ioctl.c" -#include - -#define CODE -#include "compat_ioctl.c" - -#define COMPATIBLE_IOCTL(cmd) HANDLE_IOCTL((cmd),sys_ioctl) -#define HANDLE_IOCTL(cmd,handler) { (cmd), (ioctl_trans_handler_t)(handler), NULL }, -#define IOCTL_TABLE_START \ - struct ioctl_trans ioctl_start[] = { -#define IOCTL_TABLE_END \ - }; - -IOCTL_TABLE_START -#include -#define DECLARES -#include "compat_ioctl.c" -#if 0 -HANDLE_IOCTL(RTC32_IRQP_READ, do_rtc_ioctl) -HANDLE_IOCTL(RTC32_IRQP_SET, do_rtc_ioctl) -HANDLE_IOCTL(RTC32_EPOCH_READ, do_rtc_ioctl) -HANDLE_IOCTL(RTC32_EPOCH_SET, do_rtc_ioctl) -#endif -/* take care of sizeof(sizeof()) breakage */ -IOCTL_TABLE_END - -int ioctl_table_size = ARRAY_SIZE(ioctl_start); diff --git a/arch/x86_64/ia32/Makefile b/arch/x86_64/ia32/Makefile index f76217d8f579..051608d55920 100644 --- a/arch/x86_64/ia32/Makefile +++ b/arch/x86_64/ia32/Makefile @@ -2,8 +2,7 @@ # Makefile for the ia32 kernel emulation subsystem. # -obj-$(CONFIG_IA32_EMULATION) := ia32entry.o sys_ia32.o ia32_ioctl.o \ - ia32_signal.o tls32.o \ +obj-$(CONFIG_IA32_EMULATION) := ia32entry.o sys_ia32.o ia32_signal.o tls32.o \ ia32_binfmt.o fpu32.o ptrace32.o syscall32.o syscall32_syscall.o sysv-$(CONFIG_SYSVIPC) := ipc32.o @@ -29,4 +28,3 @@ $(obj)/vsyscall-%.so: $(src)/vsyscall.lds $(obj)/vsyscall-%.o FORCE AFLAGS_vsyscall-sysenter.o = -m32 AFLAGS_vsyscall-syscall.o = -m32 -CFLAGS_ia32_ioctl.o += -Ifs/ diff --git a/arch/x86_64/ia32/ia32_ioctl.c b/arch/x86_64/ia32/ia32_ioctl.c deleted file mode 100644 index e11cc5699352..000000000000 --- a/arch/x86_64/ia32/ia32_ioctl.c +++ /dev/null @@ -1,32 +0,0 @@ -/* $Id: ia32_ioctl.c,v 1.25 2002/10/11 07:17:06 ak Exp $ - * ioctl32.c: Conversion between 32bit and 64bit native ioctls. - * - * Copyright (C) 1997-2000 Jakub Jelinek (jakub@redhat.com) - * Copyright (C) 1998 Eddie C. Dost (ecd@skynet.be) - * Copyright (C) 2001,2002 Andi Kleen, SuSE Labs - * - * These routines maintain argument size conversion between 32bit and 64bit - * ioctls. - */ - -#define INCLUDES -#include -#include "compat_ioctl.c" -#include - -#define CODE -#include "compat_ioctl.c" - - -#define HANDLE_IOCTL(cmd,handler) { (cmd), (ioctl_trans_handler_t)(handler) }, -#define COMPATIBLE_IOCTL(cmd) HANDLE_IOCTL(cmd,sys_ioctl) - -struct ioctl_trans ioctl_start[] = { -#include -#define DECLARES -#include "compat_ioctl.c" -/* take care of sizeof(sizeof()) breakage */ -}; - -int ioctl_table_size = ARRAY_SIZE(ioctl_start); - diff --git a/fs/Makefile b/fs/Makefile index 35e9aec608e4..1db711319c80 100644 --- a/fs/Makefile +++ b/fs/Makefile @@ -14,7 +14,7 @@ obj-y := open.o read_write.o file_table.o buffer.o bio.o super.o \ obj-$(CONFIG_INOTIFY) += inotify.o obj-$(CONFIG_EPOLL) += eventpoll.o -obj-$(CONFIG_COMPAT) += compat.o +obj-$(CONFIG_COMPAT) += compat.o compat_ioctl.o nfsd-$(CONFIG_NFSD) := nfsctl.o obj-y += $(nfsd-y) $(nfsd-m) diff --git a/fs/compat_ioctl.c b/fs/compat_ioctl.c index b9aeacc11c8f..890bc30fbe20 100644 --- a/fs/compat_ioctl.c +++ b/fs/compat_ioctl.c @@ -10,7 +10,6 @@ * ioctls. */ -#ifdef INCLUDES #include #include #include @@ -81,13 +80,9 @@ #include #include -/* Ugly hack. */ -#undef __KERNEL__ #include -#define __KERNEL__ #include -#include #include #include #include @@ -95,7 +90,6 @@ #include #include -#include #include #include #include @@ -128,11 +122,6 @@ #include #include -#undef INCLUDES -#endif - -#ifdef CODE - /* Aiee. Someone does not find a difference between int and long */ #define EXT2_IOC32_GETFLAGS _IOR('f', 1, int) #define EXT2_IOC32_SETFLAGS _IOW('f', 2, int) @@ -148,6 +137,12 @@ #define EXT2_IOC32_GETVERSION _IOR('v', 1, int) #define EXT2_IOC32_SETVERSION _IOW('v', 2, int) +static int do_ioctl32_pointer(unsigned int fd, unsigned int cmd, + unsigned long arg, struct file *f) +{ + return sys_ioctl(fd, cmd, (unsigned long)compat_ptr(arg)); +} + static int w_long(unsigned int fd, unsigned int cmd, unsigned long arg) { mm_segment_t old_fs = get_fs(); @@ -2705,10 +2700,20 @@ static int do_ncp_setprivatedata(unsigned int fd, unsigned int cmd, unsigned lon } #endif -#undef CODE -#endif +#define HANDLE_IOCTL(cmd,handler) \ + { (cmd), (ioctl_trans_handler_t)(handler) }, + +/* pointer to compatible structure or no argument */ +#define COMPATIBLE_IOCTL(cmd) \ + { (cmd), do_ioctl32_pointer }, + +/* argument is an unsigned long integer, not a pointer */ +#define ULONG_IOCTL(cmd) \ + { (cmd), (ioctl_trans_handler_t)sys_ioctl }, -#ifdef DECLARES + +struct ioctl_trans ioctl_start[] = { +#include HANDLE_IOCTL(MEMREADOOB32, mtd_rw_oob) HANDLE_IOCTL(MEMWRITEOOB32, mtd_rw_oob) #ifdef CONFIG_NET @@ -2921,6 +2926,6 @@ HANDLE_IOCTL(DMX_GET_EVENT, do_dmx_get_event) HANDLE_IOCTL(VIDEO_GET_EVENT, do_video_get_event) HANDLE_IOCTL(VIDEO_STILLPICTURE, do_video_stillpicture) HANDLE_IOCTL(VIDEO_SET_SPU_PALETTE, do_video_set_spu_palette) +}; -#undef DECLARES -#endif +int ioctl_table_size = ARRAY_SIZE(ioctl_start); diff --git a/include/linux/compat_ioctl.h b/include/linux/compat_ioctl.h index 339878952f12..8fad50f8e389 100644 --- a/include/linux/compat_ioctl.h +++ b/include/linux/compat_ioctl.h @@ -2,14 +2,6 @@ * compatible types passed or none at all... Please include * only stuff that is compatible on *all architectures*. */ -#ifndef COMPATIBLE_IOCTL /* pointer to compatible structure or no argument */ -#define COMPATIBLE_IOCTL(cmd) HANDLE_IOCTL((cmd),(ioctl_trans_handler_t)sys_ioctl) -#endif - -#ifndef ULONG_IOCTL /* argument is an unsigned long integer, not a pointer */ -#define ULONG_IOCTL(cmd) HANDLE_IOCTL((cmd),(ioctl_trans_handler_t)sys_ioctl) -#endif - COMPATIBLE_IOCTL(0x4B50) /* KDGHWCLK - not in the kernel, but don't complain */ COMPATIBLE_IOCTL(0x4B51) /* KDSHWCLK - not in the kernel, but don't complain */ -- cgit v1.2.3-71-gd317 From fc33a7bb9c6dd8f6e4a014976200f8fdabb3a45c Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Mon, 9 Jan 2006 20:52:17 -0800 Subject: [PATCH] per-mountpoint noatime/nodiratime Turn noatime and nodiratime into per-mount instead of per-sb flags. After all the preparations this is a rather trivial patch. The mount code needs to treat the two options as per-mount instead of per-superblock, and touch_atime needs to be changed to check the new MNT_ flags in addition to the MS_ flags that are kept for filesystems that are always noatime/nodiratime but not user settable anymore. Besides that core code only nfs needed an update because it's leaving atime updates to the server and thus sets the S_NOATIME flag on every inode, but needs to know whether it's a real noatime mount for an getattr optimization. While we're at it I've killed the IS_NOATIME/IS_NODIRATIME macros that were only used by touch_atime. Signed-off-by: Christoph Hellwig Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- fs/inode.c | 17 +++++++++++++---- fs/namespace.c | 12 +++++++++--- fs/nfs/inode.c | 17 +++++++++++++---- fs/xfs/linux-2.6/xfs_iops.c | 3 +++ include/linux/fs.h | 5 +---- include/linux/mount.h | 8 +++++--- 6 files changed, 44 insertions(+), 18 deletions(-) (limited to 'include/linux') diff --git a/fs/inode.c b/fs/inode.c index 76980a9c92e7..108138d4e909 100644 --- a/fs/inode.c +++ b/fs/inode.c @@ -22,6 +22,7 @@ #include #include #include +#include /* * This is needed for the following functions: @@ -1189,12 +1190,20 @@ void touch_atime(struct vfsmount *mnt, struct dentry *dentry) struct inode *inode = dentry->d_inode; struct timespec now; - /* per-mountpoint checks will go here */ - if (IS_NOATIME(inode)) + if (IS_RDONLY(inode)) return; - if (IS_NODIRATIME(inode) && S_ISDIR(inode->i_mode)) + + if ((inode->i_flags & S_NOATIME) || + (inode->i_sb->s_flags & MS_NOATIME) || + ((inode->i_sb->s_flags & MS_NODIRATIME) && S_ISDIR(inode->i_mode))) return; - if (IS_RDONLY(inode)) + + /* + * We may have a NULL vfsmount when coming from NFSD + */ + if (mnt && + ((mnt->mnt_flags & MNT_NOATIME) || + ((mnt->mnt_flags & MNT_NODIRATIME) && S_ISDIR(inode->i_mode)))) return; now = current_fs_time(inode->i_sb); diff --git a/fs/namespace.c b/fs/namespace.c index f0e353f5bc30..2ca6145f43d6 100644 --- a/fs/namespace.c +++ b/fs/namespace.c @@ -355,14 +355,14 @@ static int show_vfsmnt(struct seq_file *m, void *v) { MS_SYNCHRONOUS, ",sync" }, { MS_DIRSYNC, ",dirsync" }, { MS_MANDLOCK, ",mand" }, - { MS_NOATIME, ",noatime" }, - { MS_NODIRATIME, ",nodiratime" }, { 0, NULL } }; static struct proc_fs_info mnt_info[] = { { MNT_NOSUID, ",nosuid" }, { MNT_NODEV, ",nodev" }, { MNT_NOEXEC, ",noexec" }, + { MNT_NOATIME, ",noatime" }, + { MNT_NODIRATIME, ",nodiratime" }, { 0, NULL } }; struct proc_fs_info *fs_infop; @@ -1286,7 +1286,13 @@ long do_mount(char *dev_name, char *dir_name, char *type_page, mnt_flags |= MNT_NODEV; if (flags & MS_NOEXEC) mnt_flags |= MNT_NOEXEC; - flags &= ~(MS_NOSUID | MS_NOEXEC | MS_NODEV | MS_ACTIVE); + if (flags & MS_NOATIME) + mnt_flags |= MNT_NOATIME; + if (flags & MS_NODIRATIME) + mnt_flags |= MNT_NODIRATIME; + + flags &= ~(MS_NOSUID | MS_NOEXEC | MS_NODEV | MS_ACTIVE | + MS_NOATIME | MS_NODIRATIME); /* ... and get the mountpoint */ retval = path_lookup(dir_name, LOOKUP_FOLLOW, &nd); diff --git a/fs/nfs/inode.c b/fs/nfs/inode.c index 3e4ba9cb7f80..a77ee95b7efb 100644 --- a/fs/nfs/inode.c +++ b/fs/nfs/inode.c @@ -950,11 +950,20 @@ int nfs_getattr(struct vfsmount *mnt, struct dentry *dentry, struct kstat *stat) /* Flush out writes to the server in order to update c/mtime */ nfs_sync_inode(inode, 0, 0, FLUSH_WAIT|FLUSH_NOCOMMIT); - if (__IS_FLG(inode, MS_NOATIME)) - need_atime = 0; - else if (__IS_FLG(inode, MS_NODIRATIME) && S_ISDIR(inode->i_mode)) + + /* + * We may force a getattr if the user cares about atime. + * + * Note that we only have to check the vfsmount flags here: + * - NFS always sets S_NOATIME by so checking it would give a + * bogus result + * - NFS never sets MS_NOATIME or MS_NODIRATIME so there is + * no point in checking those. + */ + if ((mnt->mnt_flags & MNT_NOATIME) || + ((mnt->mnt_flags & MNT_NODIRATIME) && S_ISDIR(inode->i_mode))) need_atime = 0; - /* We may force a getattr if the user cares about atime */ + if (need_atime) err = __nfs_revalidate_inode(NFS_SERVER(inode), inode); else diff --git a/fs/xfs/linux-2.6/xfs_iops.c b/fs/xfs/linux-2.6/xfs_iops.c index 41c478bb1ffc..97fb1470cf28 100644 --- a/fs/xfs/linux-2.6/xfs_iops.c +++ b/fs/xfs/linux-2.6/xfs_iops.c @@ -54,6 +54,9 @@ #include #include +#define IS_NOATIME(inode) ((inode->i_sb->s_flags & MS_NOATIME) || \ + (S_ISDIR(inode->i_mode) && inode->i_sb->s_flags & MS_NODIRATIME)) + /* * Change the requested timestamp in the given inode. * We don't lock across timestamp updates, and we don't log them but diff --git a/include/linux/fs.h b/include/linux/fs.h index 85c5656756b6..d1e370d25f7b 100644 --- a/include/linux/fs.h +++ b/include/linux/fs.h @@ -114,8 +114,7 @@ extern int dir_notify_enable; /* * Superblock flags that can be altered by MS_REMOUNT */ -#define MS_RMT_MASK (MS_RDONLY|MS_SYNCHRONOUS|MS_MANDLOCK|MS_NOATIME|\ - MS_NODIRATIME) +#define MS_RMT_MASK (MS_RDONLY|MS_SYNCHRONOUS|MS_MANDLOCK) /* * Old magic mount flag and mask @@ -161,8 +160,6 @@ extern int dir_notify_enable; #define IS_NOQUOTA(inode) ((inode)->i_flags & S_NOQUOTA) #define IS_APPEND(inode) ((inode)->i_flags & S_APPEND) #define IS_IMMUTABLE(inode) ((inode)->i_flags & S_IMMUTABLE) -#define IS_NOATIME(inode) (__IS_FLG(inode, MS_NOATIME) || ((inode)->i_flags & S_NOATIME)) -#define IS_NODIRATIME(inode) __IS_FLG(inode, MS_NODIRATIME) #define IS_POSIXACL(inode) __IS_FLG(inode, MS_POSIXACL) #define IS_DEADDIR(inode) ((inode)->i_flags & S_DEAD) diff --git a/include/linux/mount.h b/include/linux/mount.h index b98a709f1794..b7472ae91fa4 100644 --- a/include/linux/mount.h +++ b/include/linux/mount.h @@ -20,10 +20,12 @@ #define MNT_NOSUID 0x01 #define MNT_NODEV 0x02 #define MNT_NOEXEC 0x04 -#define MNT_SHARED 0x10 /* if the vfsmount is a shared mount */ -#define MNT_UNBINDABLE 0x20 /* if the vfsmount is a unbindable mount */ +#define MNT_NOATIME 0x08 +#define MNT_NODIRATIME 0x10 -#define MNT_PNODE_MASK (MNT_SHARED | MNT_UNBINDABLE) +#define MNT_SHARED 0x1000 /* if the vfsmount is a shared mount */ +#define MNT_UNBINDABLE 0x2000 /* if the vfsmount is a unbindable mount */ +#define MNT_PNODE_MASK 0x3000 /* propogation flag mask */ struct vfsmount { struct list_head mnt_hash; -- cgit v1.2.3-71-gd317 From 5cca7619a562c9d98a3a0123dc878d79bf3c8fb3 Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Mon, 9 Jan 2006 20:52:20 -0800 Subject: [PATCH] hrtimer: move div_long_long_rem out of jiffies.h move div_long_long_rem() from jiffies.h into a new calc64.h include file, as it is a general math function useful for other things than the jiffy code. Convert it to an inline function Signed-off-by: Thomas Gleixner Signed-off-by: Ingo Molnar Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/calc64.h | 49 +++++++++++++++++++++++++++++++++++++++++++++++++ include/linux/jiffies.h | 18 ++++++------------ 2 files changed, 55 insertions(+), 12 deletions(-) create mode 100644 include/linux/calc64.h (limited to 'include/linux') diff --git a/include/linux/calc64.h b/include/linux/calc64.h new file mode 100644 index 000000000000..ebf4b8f38d88 --- /dev/null +++ b/include/linux/calc64.h @@ -0,0 +1,49 @@ +#ifndef _LINUX_CALC64_H +#define _LINUX_CALC64_H + +#include +#include + +/* + * This is a generic macro which is used when the architecture + * specific div64.h does not provide a optimized one. + * + * The 64bit dividend is divided by the divisor (data type long), the + * result is returned and the remainder stored in the variable + * referenced by remainder (data type long *). In contrast to the + * do_div macro the dividend is kept intact. + */ +#ifndef div_long_long_rem +#define div_long_long_rem(dividend, divisor, remainder) \ + do_div_llr((dividend), divisor, remainder) + +static inline unsigned long do_div_llr(const long long dividend, + const long divisor, long *remainder) +{ + u64 result = dividend; + + *(remainder) = do_div(result, divisor); + return (unsigned long) result; +} +#endif + +/* + * Sign aware variation of the above. On some architectures a + * negative dividend leads to an divide overflow exception, which + * is avoided by the sign check. + */ +static inline long div_long_long_rem_signed(const long long dividend, + const long divisor, long *remainder) +{ + long res; + + if (unlikely(dividend < 0)) { + res = -div_long_long_rem(-dividend, divisor, remainder); + *remainder = -(*remainder); + } else + res = div_long_long_rem(dividend, divisor, remainder); + + return res; +} + +#endif diff --git a/include/linux/jiffies.h b/include/linux/jiffies.h index 6acfdbba734b..99905e180532 100644 --- a/include/linux/jiffies.h +++ b/include/linux/jiffies.h @@ -1,21 +1,12 @@ #ifndef _LINUX_JIFFIES_H #define _LINUX_JIFFIES_H +#include #include #include #include #include #include /* for HZ */ -#include - -#ifndef div_long_long_rem -#define div_long_long_rem(dividend,divisor,remainder) \ -({ \ - u64 result = dividend; \ - *remainder = do_div(result,divisor); \ - result; \ -}) -#endif /* * The following defines establish the engineering parameters of the PLL @@ -373,8 +364,11 @@ jiffies_to_timeval(const unsigned long jiffies, struct timeval *value) * one divide. */ u64 nsec = (u64)jiffies * TICK_NSEC; - value->tv_sec = div_long_long_rem(nsec, NSEC_PER_SEC, &value->tv_usec); - value->tv_usec /= NSEC_PER_USEC; + long tv_usec; + + value->tv_sec = div_long_long_rem(nsec, NSEC_PER_SEC, &tv_usec); + tv_usec /= NSEC_PER_USEC; + value->tv_usec = tv_usec; } /* -- cgit v1.2.3-71-gd317 From 753be6222728996974e9e12c185108fcabbb7c6e Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Mon, 9 Jan 2006 20:52:22 -0800 Subject: [PATCH] hrtimer: deinline mktime and set_normalized_timespec mktime() and set_normalized_timespec() are large inline functions used in many places: deinline them. From: George Anzinger, off-by-1 bugfix Signed-off-by: Thomas Gleixner Signed-off-by: Ingo Molnar Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/time.h | 52 +++++--------------------------------------- kernel/time.c | 61 ++++++++++++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 66 insertions(+), 47 deletions(-) (limited to 'include/linux') diff --git a/include/linux/time.h b/include/linux/time.h index 797ccd813bb0..9c444d9c4aa0 100644 --- a/include/linux/time.h +++ b/include/linux/time.h @@ -38,38 +38,9 @@ static __inline__ int timespec_equal(struct timespec *a, struct timespec *b) return (a->tv_sec == b->tv_sec) && (a->tv_nsec == b->tv_nsec); } -/* Converts Gregorian date to seconds since 1970-01-01 00:00:00. - * Assumes input in normal date format, i.e. 1980-12-31 23:59:59 - * => year=1980, mon=12, day=31, hour=23, min=59, sec=59. - * - * [For the Julian calendar (which was used in Russia before 1917, - * Britain & colonies before 1752, anywhere else before 1582, - * and is still in use by some communities) leave out the - * -year/100+year/400 terms, and add 10.] - * - * This algorithm was first published by Gauss (I think). - * - * WARNING: this function will overflow on 2106-02-07 06:28:16 on - * machines were long is 32-bit! (However, as time_t is signed, we - * will already get problems at other places on 2038-01-19 03:14:08) - */ -static inline unsigned long -mktime (unsigned int year, unsigned int mon, - unsigned int day, unsigned int hour, - unsigned int min, unsigned int sec) -{ - if (0 >= (int) (mon -= 2)) { /* 1..12 -> 11,12,1..10 */ - mon += 12; /* Puts Feb last since it has leap day */ - year -= 1; - } - - return ((( - (unsigned long) (year/4 - year/100 + year/400 + 367*mon/12 + day) + - year*365 - 719499 - )*24 + hour /* now have hours */ - )*60 + min /* now have minutes */ - )*60 + sec; /* finally seconds */ -} +extern unsigned long mktime (unsigned int year, unsigned int mon, + unsigned int day, unsigned int hour, + unsigned int min, unsigned int sec); extern struct timespec xtime; extern struct timespec wall_to_monotonic; @@ -80,6 +51,8 @@ static inline unsigned long get_seconds(void) return xtime.tv_sec; } +extern void set_normalized_timespec (struct timespec *ts, time_t sec, long nsec); + struct timespec current_kernel_time(void); #define CURRENT_TIME (current_kernel_time()) @@ -99,21 +72,6 @@ extern void getnstimestamp(struct timespec *ts); extern struct timespec timespec_trunc(struct timespec t, unsigned gran); -static inline void -set_normalized_timespec (struct timespec *ts, time_t sec, long nsec) -{ - while (nsec >= NSEC_PER_SEC) { - nsec -= NSEC_PER_SEC; - ++sec; - } - while (nsec < 0) { - nsec += NSEC_PER_SEC; - --sec; - } - ts->tv_sec = sec; - ts->tv_nsec = nsec; -} - #endif /* __KERNEL__ */ #define NFDBITS __NFDBITS diff --git a/kernel/time.c b/kernel/time.c index b94bfa8c03e0..fa569885e22b 100644 --- a/kernel/time.c +++ b/kernel/time.c @@ -583,6 +583,67 @@ void getnstimestamp(struct timespec *ts) } EXPORT_SYMBOL_GPL(getnstimestamp); +/* Converts Gregorian date to seconds since 1970-01-01 00:00:00. + * Assumes input in normal date format, i.e. 1980-12-31 23:59:59 + * => year=1980, mon=12, day=31, hour=23, min=59, sec=59. + * + * [For the Julian calendar (which was used in Russia before 1917, + * Britain & colonies before 1752, anywhere else before 1582, + * and is still in use by some communities) leave out the + * -year/100+year/400 terms, and add 10.] + * + * This algorithm was first published by Gauss (I think). + * + * WARNING: this function will overflow on 2106-02-07 06:28:16 on + * machines were long is 32-bit! (However, as time_t is signed, we + * will already get problems at other places on 2038-01-19 03:14:08) + */ +unsigned long +mktime (unsigned int year, unsigned int mon, + unsigned int day, unsigned int hour, + unsigned int min, unsigned int sec) +{ + if (0 >= (int) (mon -= 2)) { /* 1..12 -> 11,12,1..10 */ + mon += 12; /* Puts Feb last since it has leap day */ + year -= 1; + } + + return ((((unsigned long) + (year/4 - year/100 + year/400 + 367*mon/12 + day) + + year*365 - 719499 + )*24 + hour /* now have hours */ + )*60 + min /* now have minutes */ + )*60 + sec; /* finally seconds */ +} + +/** + * set_normalized_timespec - set timespec sec and nsec parts and normalize + * + * @ts: pointer to timespec variable to be set + * @sec: seconds to set + * @nsec: nanoseconds to set + * + * Set seconds and nanoseconds field of a timespec variable and + * normalize to the timespec storage format + * + * Note: The tv_nsec part is always in the range of + * 0 <= tv_nsec < NSEC_PER_SEC + * For negative values only the tv_sec field is negative ! + */ +void set_normalized_timespec (struct timespec *ts, time_t sec, long nsec) +{ + while (nsec >= NSEC_PER_SEC) { + nsec -= NSEC_PER_SEC; + ++sec; + } + while (nsec < 0) { + nsec += NSEC_PER_SEC; + --sec; + } + ts->tv_sec = sec; + ts->tv_nsec = nsec; +} + #if (BITS_PER_LONG < 64) u64 get_jiffies_64(void) { -- cgit v1.2.3-71-gd317 From f4818900fa3ee1c56e96f6dede7cc4c05ed383d1 Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Mon, 9 Jan 2006 20:52:23 -0800 Subject: [PATCH] hrtimer: clean up mktime and make arguments const add 'const' to mktime arguments, and clean it up a bit Signed-off-by: Ingo Molnar Signed-off-by: Thomas Gleixner Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/time.h | 10 +++++----- kernel/time.c | 15 +++++++++------ 2 files changed, 14 insertions(+), 11 deletions(-) (limited to 'include/linux') diff --git a/include/linux/time.h b/include/linux/time.h index 9c444d9c4aa0..773b83ddd8ef 100644 --- a/include/linux/time.h +++ b/include/linux/time.h @@ -38,9 +38,11 @@ static __inline__ int timespec_equal(struct timespec *a, struct timespec *b) return (a->tv_sec == b->tv_sec) && (a->tv_nsec == b->tv_nsec); } -extern unsigned long mktime (unsigned int year, unsigned int mon, - unsigned int day, unsigned int hour, - unsigned int min, unsigned int sec); +extern unsigned long mktime(const unsigned int year, const unsigned int mon, + const unsigned int day, const unsigned int hour, + const unsigned int min, const unsigned int sec); + +extern void set_normalized_timespec(struct timespec *ts, time_t sec, long nsec); extern struct timespec xtime; extern struct timespec wall_to_monotonic; @@ -51,8 +53,6 @@ static inline unsigned long get_seconds(void) return xtime.tv_sec; } -extern void set_normalized_timespec (struct timespec *ts, time_t sec, long nsec); - struct timespec current_kernel_time(void); #define CURRENT_TIME (current_kernel_time()) diff --git a/kernel/time.c b/kernel/time.c index fa569885e22b..a0502aef43ce 100644 --- a/kernel/time.c +++ b/kernel/time.c @@ -599,12 +599,15 @@ EXPORT_SYMBOL_GPL(getnstimestamp); * will already get problems at other places on 2038-01-19 03:14:08) */ unsigned long -mktime (unsigned int year, unsigned int mon, - unsigned int day, unsigned int hour, - unsigned int min, unsigned int sec) +mktime(const unsigned int year0, const unsigned int mon0, + const unsigned int day, const unsigned int hour, + const unsigned int min, const unsigned int sec) { - if (0 >= (int) (mon -= 2)) { /* 1..12 -> 11,12,1..10 */ - mon += 12; /* Puts Feb last since it has leap day */ + unsigned int mon = mon0, year = year0; + + /* 1..12 -> 11,12,1..10 */ + if (0 >= (int) (mon -= 2)) { + mon += 12; /* Puts Feb last since it has leap day */ year -= 1; } @@ -630,7 +633,7 @@ mktime (unsigned int year, unsigned int mon, * 0 <= tv_nsec < NSEC_PER_SEC * For negative values only the tv_sec field is negative ! */ -void set_normalized_timespec (struct timespec *ts, time_t sec, long nsec) +void set_normalized_timespec(struct timespec *ts, time_t sec, long nsec) { while (nsec >= NSEC_PER_SEC) { nsec -= NSEC_PER_SEC; -- cgit v1.2.3-71-gd317 From 0c4f6eeca98a805fd0c2536b55039383eb56d2ba Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Mon, 9 Jan 2006 20:52:25 -0800 Subject: [PATCH] hrtimer: remove unused clock constants remove unused CLOCK_ constants from time.h Signed-off-by: Thomas Gleixner Signed-off-by: Ingo Molnar Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/time.h | 11 ++++------- 1 file changed, 4 insertions(+), 7 deletions(-) (limited to 'include/linux') diff --git a/include/linux/time.h b/include/linux/time.h index 773b83ddd8ef..ea64cde7c450 100644 --- a/include/linux/time.h +++ b/include/linux/time.h @@ -104,12 +104,10 @@ struct itimerval { /* * The IDs of the various system clocks (for POSIX.1b interval timers). */ -#define CLOCK_REALTIME 0 -#define CLOCK_MONOTONIC 1 +#define CLOCK_REALTIME 0 +#define CLOCK_MONOTONIC 1 #define CLOCK_PROCESS_CPUTIME_ID 2 #define CLOCK_THREAD_CPUTIME_ID 3 -#define CLOCK_REALTIME_HR 4 -#define CLOCK_MONOTONIC_HR 5 /* * The IDs of various hardware clocks @@ -118,9 +116,8 @@ struct itimerval { #define CLOCK_SGI_CYCLE 10 #define MAX_CLOCKS 16 -#define CLOCKS_MASK (CLOCK_REALTIME | CLOCK_MONOTONIC | \ - CLOCK_REALTIME_HR | CLOCK_MONOTONIC_HR) -#define CLOCKS_MONO (CLOCK_MONOTONIC & CLOCK_MONOTONIC_HR) +#define CLOCKS_MASK (CLOCK_REALTIME | CLOCK_MONOTONIC) +#define CLOCKS_MONO (CLOCK_MONOTONIC) /* * The various flags for setting POSIX.1b interval timers. -- cgit v1.2.3-71-gd317 From 1ad106ca185e66dc312518e18e2ffaedf376a160 Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Mon, 9 Jan 2006 20:52:25 -0800 Subject: [PATCH] hrtimer: coding style clean up of clock constants clean up the CLOCK_ portions of time.h Signed-off-by: Ingo Molnar Signed-off-by: Thomas Gleixner Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/time.h | 23 +++++++++-------------- 1 file changed, 9 insertions(+), 14 deletions(-) (limited to 'include/linux') diff --git a/include/linux/time.h b/include/linux/time.h index ea64cde7c450..aded44c48d42 100644 --- a/include/linux/time.h +++ b/include/linux/time.h @@ -100,30 +100,25 @@ struct itimerval { struct timeval it_value; /* current value */ }; - /* * The IDs of the various system clocks (for POSIX.1b interval timers). */ -#define CLOCK_REALTIME 0 -#define CLOCK_MONOTONIC 1 -#define CLOCK_PROCESS_CPUTIME_ID 2 -#define CLOCK_THREAD_CPUTIME_ID 3 +#define CLOCK_REALTIME 0 +#define CLOCK_MONOTONIC 1 +#define CLOCK_PROCESS_CPUTIME_ID 2 +#define CLOCK_THREAD_CPUTIME_ID 3 /* * The IDs of various hardware clocks */ - - -#define CLOCK_SGI_CYCLE 10 -#define MAX_CLOCKS 16 -#define CLOCKS_MASK (CLOCK_REALTIME | CLOCK_MONOTONIC) -#define CLOCKS_MONO (CLOCK_MONOTONIC) +#define CLOCK_SGI_CYCLE 10 +#define MAX_CLOCKS 16 +#define CLOCKS_MASK (CLOCK_REALTIME | CLOCK_MONOTONIC) +#define CLOCKS_MONO CLOCK_MONOTONIC /* * The various flags for setting POSIX.1b interval timers. */ - -#define TIMER_ABSTIME 0x01 - +#define TIMER_ABSTIME 0x01 #endif -- cgit v1.2.3-71-gd317 From 57a558757bdbb877b54ed5ea15bd0892e02a707d Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Mon, 9 Jan 2006 20:52:26 -0800 Subject: [PATCH] hrtimer: coding style and white space cleanup style and whitespace cleanup of the rest of time.h. Signed-off-by: Ingo Molnar Signed-off-by: Thomas Gleixner Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/time.h | 61 ++++++++++++++++++++++++++-------------------------- 1 file changed, 31 insertions(+), 30 deletions(-) (limited to 'include/linux') diff --git a/include/linux/time.h b/include/linux/time.h index aded44c48d42..4d49cabb9b47 100644 --- a/include/linux/time.h +++ b/include/linux/time.h @@ -4,7 +4,7 @@ #include #ifdef __KERNEL__ -#include +# include #endif #ifndef _STRUCT_TIMESPEC @@ -13,7 +13,7 @@ struct timespec { time_t tv_sec; /* seconds */ long tv_nsec; /* nanoseconds */ }; -#endif /* _STRUCT_TIMESPEC */ +#endif struct timeval { time_t tv_sec; /* seconds */ @@ -27,16 +27,16 @@ struct timezone { #ifdef __KERNEL__ -/* Parameters used to convert the timespec values */ -#define MSEC_PER_SEC (1000L) -#define USEC_PER_SEC (1000000L) -#define NSEC_PER_SEC (1000000000L) -#define NSEC_PER_USEC (1000L) +/* Parameters used to convert the timespec values: */ +#define MSEC_PER_SEC 1000L +#define USEC_PER_SEC 1000000L +#define NSEC_PER_SEC 1000000000L +#define NSEC_PER_USEC 1000L -static __inline__ int timespec_equal(struct timespec *a, struct timespec *b) -{ +static __inline__ int timespec_equal(struct timespec *a, struct timespec *b) +{ return (a->tv_sec == b->tv_sec) && (a->tv_nsec == b->tv_nsec); -} +} extern unsigned long mktime(const unsigned int year, const unsigned int mon, const unsigned int day, const unsigned int hour, @@ -49,25 +49,26 @@ extern struct timespec wall_to_monotonic; extern seqlock_t xtime_lock; static inline unsigned long get_seconds(void) -{ +{ return xtime.tv_sec; } struct timespec current_kernel_time(void); -#define CURRENT_TIME (current_kernel_time()) -#define CURRENT_TIME_SEC ((struct timespec) { xtime.tv_sec, 0 }) +#define CURRENT_TIME (current_kernel_time()) +#define CURRENT_TIME_SEC ((struct timespec) { xtime.tv_sec, 0 }) extern void do_gettimeofday(struct timeval *tv); extern int do_settimeofday(struct timespec *tv); extern int do_sys_settimeofday(struct timespec *tv, struct timezone *tz); -extern void clock_was_set(void); // call when ever the clock is set +extern void clock_was_set(void); // call whenever the clock is set extern int do_posix_clock_monotonic_gettime(struct timespec *tp); -extern long do_utimes(char __user * filename, struct timeval * times); +extern long do_utimes(char __user *filename, struct timeval *times); struct itimerval; -extern int do_setitimer(int which, struct itimerval *value, struct itimerval *ovalue); +extern int do_setitimer(int which, struct itimerval *value, + struct itimerval *ovalue); extern int do_getitimer(int which, struct itimerval *value); -extern void getnstimeofday (struct timespec *tv); +extern void getnstimeofday(struct timespec *tv); extern void getnstimestamp(struct timespec *ts); extern struct timespec timespec_trunc(struct timespec t, unsigned gran); @@ -84,24 +85,24 @@ extern struct timespec timespec_trunc(struct timespec t, unsigned gran); /* * Names of the interval timers, and structure - * defining a timer setting. + * defining a timer setting: */ -#define ITIMER_REAL 0 -#define ITIMER_VIRTUAL 1 -#define ITIMER_PROF 2 +#define ITIMER_REAL 0 +#define ITIMER_VIRTUAL 1 +#define ITIMER_PROF 2 -struct itimerspec { - struct timespec it_interval; /* timer period */ - struct timespec it_value; /* timer expiration */ +struct itimerspec { + struct timespec it_interval; /* timer period */ + struct timespec it_value; /* timer expiration */ }; -struct itimerval { - struct timeval it_interval; /* timer interval */ - struct timeval it_value; /* current value */ +struct itimerval { + struct timeval it_interval; /* timer interval */ + struct timeval it_value; /* current value */ }; /* - * The IDs of the various system clocks (for POSIX.1b interval timers). + * The IDs of the various system clocks (for POSIX.1b interval timers): */ #define CLOCK_REALTIME 0 #define CLOCK_MONOTONIC 1 @@ -109,7 +110,7 @@ struct itimerval { #define CLOCK_THREAD_CPUTIME_ID 3 /* - * The IDs of various hardware clocks + * The IDs of various hardware clocks: */ #define CLOCK_SGI_CYCLE 10 #define MAX_CLOCKS 16 @@ -117,7 +118,7 @@ struct itimerval { #define CLOCKS_MONO CLOCK_MONOTONIC /* - * The various flags for setting POSIX.1b interval timers. + * The various flags for setting POSIX.1b interval timers: */ #define TIMER_ABSTIME 0x01 -- cgit v1.2.3-71-gd317 From a924b04ddea9788e09f387fe19ccbede5f09ddd8 Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Mon, 9 Jan 2006 20:52:27 -0800 Subject: [PATCH] hrtimer: make clockid_t arguments const add const arguments to the posix-timers.h API functions Signed-off-by: Thomas Gleixner Signed-off-by: Ingo Molnar Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/posix-timers.h | 22 +++++++++++----------- kernel/posix-cpu-timers.c | 40 ++++++++++++++++++++++------------------ kernel/posix-timers.c | 38 +++++++++++++++++++++----------------- 3 files changed, 54 insertions(+), 46 deletions(-) (limited to 'include/linux') diff --git a/include/linux/posix-timers.h b/include/linux/posix-timers.h index f942e2bad8e3..ecda38e07899 100644 --- a/include/linux/posix-timers.h +++ b/include/linux/posix-timers.h @@ -72,12 +72,12 @@ struct k_clock_abs { }; struct k_clock { int res; /* in nano seconds */ - int (*clock_getres) (clockid_t which_clock, struct timespec *tp); + int (*clock_getres) (const clockid_t which_clock, struct timespec *tp); struct k_clock_abs *abs_struct; - int (*clock_set) (clockid_t which_clock, struct timespec * tp); - int (*clock_get) (clockid_t which_clock, struct timespec * tp); + int (*clock_set) (const clockid_t which_clock, struct timespec * tp); + int (*clock_get) (const clockid_t which_clock, struct timespec * tp); int (*timer_create) (struct k_itimer *timer); - int (*nsleep) (clockid_t which_clock, int flags, struct timespec *); + int (*nsleep) (const clockid_t which_clock, int flags, struct timespec *); int (*timer_set) (struct k_itimer * timr, int flags, struct itimerspec * new_setting, struct itimerspec * old_setting); @@ -87,12 +87,12 @@ struct k_clock { struct itimerspec * cur_setting); }; -void register_posix_clock(clockid_t clock_id, struct k_clock *new_clock); +void register_posix_clock(const clockid_t clock_id, struct k_clock *new_clock); /* Error handlers for timer_create, nanosleep and settime */ int do_posix_clock_notimer_create(struct k_itimer *timer); -int do_posix_clock_nonanosleep(clockid_t, int flags, struct timespec *); -int do_posix_clock_nosettime(clockid_t, struct timespec *tp); +int do_posix_clock_nonanosleep(const clockid_t, int flags, struct timespec *); +int do_posix_clock_nosettime(const clockid_t, struct timespec *tp); /* function to call to trigger timer event */ int posix_timer_event(struct k_itimer *timr, int si_private); @@ -117,11 +117,11 @@ struct now_struct { } \ }while (0) -int posix_cpu_clock_getres(clockid_t which_clock, struct timespec *); -int posix_cpu_clock_get(clockid_t which_clock, struct timespec *); -int posix_cpu_clock_set(clockid_t which_clock, const struct timespec *tp); +int posix_cpu_clock_getres(const clockid_t which_clock, struct timespec *); +int posix_cpu_clock_get(const clockid_t which_clock, struct timespec *); +int posix_cpu_clock_set(const clockid_t which_clock, const struct timespec *tp); int posix_cpu_timer_create(struct k_itimer *); -int posix_cpu_nsleep(clockid_t, int, struct timespec *); +int posix_cpu_nsleep(const clockid_t, int, struct timespec *); int posix_cpu_timer_set(struct k_itimer *, int, struct itimerspec *, struct itimerspec *); int posix_cpu_timer_del(struct k_itimer *); diff --git a/kernel/posix-cpu-timers.c b/kernel/posix-cpu-timers.c index 4c68edff900b..abf6990c6eb5 100644 --- a/kernel/posix-cpu-timers.c +++ b/kernel/posix-cpu-timers.c @@ -7,7 +7,7 @@ #include #include -static int check_clock(clockid_t which_clock) +static int check_clock(const clockid_t which_clock) { int error = 0; struct task_struct *p; @@ -31,7 +31,7 @@ static int check_clock(clockid_t which_clock) } static inline union cpu_time_count -timespec_to_sample(clockid_t which_clock, const struct timespec *tp) +timespec_to_sample(const clockid_t which_clock, const struct timespec *tp) { union cpu_time_count ret; ret.sched = 0; /* high half always zero when .cpu used */ @@ -43,7 +43,7 @@ timespec_to_sample(clockid_t which_clock, const struct timespec *tp) return ret; } -static void sample_to_timespec(clockid_t which_clock, +static void sample_to_timespec(const clockid_t which_clock, union cpu_time_count cpu, struct timespec *tp) { @@ -55,7 +55,7 @@ static void sample_to_timespec(clockid_t which_clock, } } -static inline int cpu_time_before(clockid_t which_clock, +static inline int cpu_time_before(const clockid_t which_clock, union cpu_time_count now, union cpu_time_count then) { @@ -65,7 +65,7 @@ static inline int cpu_time_before(clockid_t which_clock, return cputime_lt(now.cpu, then.cpu); } } -static inline void cpu_time_add(clockid_t which_clock, +static inline void cpu_time_add(const clockid_t which_clock, union cpu_time_count *acc, union cpu_time_count val) { @@ -75,7 +75,7 @@ static inline void cpu_time_add(clockid_t which_clock, acc->cpu = cputime_add(acc->cpu, val.cpu); } } -static inline union cpu_time_count cpu_time_sub(clockid_t which_clock, +static inline union cpu_time_count cpu_time_sub(const clockid_t which_clock, union cpu_time_count a, union cpu_time_count b) { @@ -151,7 +151,7 @@ static inline unsigned long long sched_ns(struct task_struct *p) return (p == current) ? current_sched_time(p) : p->sched_time; } -int posix_cpu_clock_getres(clockid_t which_clock, struct timespec *tp) +int posix_cpu_clock_getres(const clockid_t which_clock, struct timespec *tp) { int error = check_clock(which_clock); if (!error) { @@ -169,7 +169,7 @@ int posix_cpu_clock_getres(clockid_t which_clock, struct timespec *tp) return error; } -int posix_cpu_clock_set(clockid_t which_clock, const struct timespec *tp) +int posix_cpu_clock_set(const clockid_t which_clock, const struct timespec *tp) { /* * You can never reset a CPU clock, but we check for other errors @@ -186,7 +186,7 @@ int posix_cpu_clock_set(clockid_t which_clock, const struct timespec *tp) /* * Sample a per-thread clock for the given task. */ -static int cpu_clock_sample(clockid_t which_clock, struct task_struct *p, +static int cpu_clock_sample(const clockid_t which_clock, struct task_struct *p, union cpu_time_count *cpu) { switch (CPUCLOCK_WHICH(which_clock)) { @@ -248,7 +248,7 @@ static int cpu_clock_sample_group_locked(unsigned int clock_idx, * Sample a process (thread group) clock for the given group_leader task. * Must be called with tasklist_lock held for reading. */ -static int cpu_clock_sample_group(clockid_t which_clock, +static int cpu_clock_sample_group(const clockid_t which_clock, struct task_struct *p, union cpu_time_count *cpu) { @@ -262,7 +262,7 @@ static int cpu_clock_sample_group(clockid_t which_clock, } -int posix_cpu_clock_get(clockid_t which_clock, struct timespec *tp) +int posix_cpu_clock_get(const clockid_t which_clock, struct timespec *tp) { const pid_t pid = CPUCLOCK_PID(which_clock); int error = -EINVAL; @@ -1399,7 +1399,7 @@ void set_process_cpu_timer(struct task_struct *tsk, unsigned int clock_idx, static long posix_cpu_clock_nanosleep_restart(struct restart_block *); -int posix_cpu_nsleep(clockid_t which_clock, int flags, +int posix_cpu_nsleep(const clockid_t which_clock, int flags, struct timespec *rqtp) { struct restart_block *restart_block = @@ -1503,11 +1503,13 @@ posix_cpu_clock_nanosleep_restart(struct restart_block *restart_block) #define PROCESS_CLOCK MAKE_PROCESS_CPUCLOCK(0, CPUCLOCK_SCHED) #define THREAD_CLOCK MAKE_THREAD_CPUCLOCK(0, CPUCLOCK_SCHED) -static int process_cpu_clock_getres(clockid_t which_clock, struct timespec *tp) +static int process_cpu_clock_getres(const clockid_t which_clock, + struct timespec *tp) { return posix_cpu_clock_getres(PROCESS_CLOCK, tp); } -static int process_cpu_clock_get(clockid_t which_clock, struct timespec *tp) +static int process_cpu_clock_get(const clockid_t which_clock, + struct timespec *tp) { return posix_cpu_clock_get(PROCESS_CLOCK, tp); } @@ -1516,16 +1518,18 @@ static int process_cpu_timer_create(struct k_itimer *timer) timer->it_clock = PROCESS_CLOCK; return posix_cpu_timer_create(timer); } -static int process_cpu_nsleep(clockid_t which_clock, int flags, +static int process_cpu_nsleep(const clockid_t which_clock, int flags, struct timespec *rqtp) { return posix_cpu_nsleep(PROCESS_CLOCK, flags, rqtp); } -static int thread_cpu_clock_getres(clockid_t which_clock, struct timespec *tp) +static int thread_cpu_clock_getres(const clockid_t which_clock, + struct timespec *tp) { return posix_cpu_clock_getres(THREAD_CLOCK, tp); } -static int thread_cpu_clock_get(clockid_t which_clock, struct timespec *tp) +static int thread_cpu_clock_get(const clockid_t which_clock, + struct timespec *tp) { return posix_cpu_clock_get(THREAD_CLOCK, tp); } @@ -1534,7 +1538,7 @@ static int thread_cpu_timer_create(struct k_itimer *timer) timer->it_clock = THREAD_CLOCK; return posix_cpu_timer_create(timer); } -static int thread_cpu_nsleep(clockid_t which_clock, int flags, +static int thread_cpu_nsleep(const clockid_t which_clock, int flags, struct timespec *rqtp) { return -EINVAL; diff --git a/kernel/posix-timers.c b/kernel/posix-timers.c index fdb710777439..69d5a4b5395b 100644 --- a/kernel/posix-timers.c +++ b/kernel/posix-timers.c @@ -151,7 +151,7 @@ static void posix_timer_fn(unsigned long); static u64 do_posix_clock_monotonic_gettime_parts( struct timespec *tp, struct timespec *mo); int do_posix_clock_monotonic_gettime(struct timespec *tp); -static int do_posix_clock_monotonic_get(clockid_t, struct timespec *tp); +static int do_posix_clock_monotonic_get(const clockid_t, struct timespec *tp); static struct k_itimer *lock_timer(timer_t timer_id, unsigned long *flags); @@ -176,7 +176,7 @@ static inline void unlock_timer(struct k_itimer *timr, unsigned long flags) * the function pointer CALL in struct k_clock. */ -static inline int common_clock_getres(clockid_t which_clock, +static inline int common_clock_getres(const clockid_t which_clock, struct timespec *tp) { tp->tv_sec = 0; @@ -184,13 +184,15 @@ static inline int common_clock_getres(clockid_t which_clock, return 0; } -static inline int common_clock_get(clockid_t which_clock, struct timespec *tp) +static inline int common_clock_get(const clockid_t which_clock, + struct timespec *tp) { getnstimeofday(tp); return 0; } -static inline int common_clock_set(clockid_t which_clock, struct timespec *tp) +static inline int common_clock_set(const clockid_t which_clock, + struct timespec *tp) { return do_sys_settimeofday(tp, NULL); } @@ -207,7 +209,7 @@ static inline int common_timer_create(struct k_itimer *new_timer) /* * These ones are defined below. */ -static int common_nsleep(clockid_t, int flags, struct timespec *t); +static int common_nsleep(const clockid_t, int flags, struct timespec *t); static void common_timer_get(struct k_itimer *, struct itimerspec *); static int common_timer_set(struct k_itimer *, int, struct itimerspec *, struct itimerspec *); @@ -216,7 +218,7 @@ static int common_timer_del(struct k_itimer *timer); /* * Return nonzero iff we know a priori this clockid_t value is bogus. */ -static inline int invalid_clockid(clockid_t which_clock) +static inline int invalid_clockid(const clockid_t which_clock) { if (which_clock < 0) /* CPU clock, posix_cpu_* will check it */ return 0; @@ -522,7 +524,7 @@ static inline struct task_struct * good_sigevent(sigevent_t * event) return rtn; } -void register_posix_clock(clockid_t clock_id, struct k_clock *new_clock) +void register_posix_clock(const clockid_t clock_id, struct k_clock *new_clock) { if ((unsigned) clock_id >= MAX_CLOCKS) { printk("POSIX clock register failed for clock_id %d\n", @@ -568,7 +570,7 @@ static void release_posix_timer(struct k_itimer *tmr, int it_id_set) /* Create a POSIX.1b interval timer. */ asmlinkage long -sys_timer_create(clockid_t which_clock, +sys_timer_create(const clockid_t which_clock, struct sigevent __user *timer_event_spec, timer_t __user * created_timer_id) { @@ -1195,7 +1197,8 @@ static u64 do_posix_clock_monotonic_gettime_parts( return jiff; } -static int do_posix_clock_monotonic_get(clockid_t clock, struct timespec *tp) +static int do_posix_clock_monotonic_get(const clockid_t clock, + struct timespec *tp) { struct timespec wall_to_mono; @@ -1212,7 +1215,7 @@ int do_posix_clock_monotonic_gettime(struct timespec *tp) return do_posix_clock_monotonic_get(CLOCK_MONOTONIC, tp); } -int do_posix_clock_nosettime(clockid_t clockid, struct timespec *tp) +int do_posix_clock_nosettime(const clockid_t clockid, struct timespec *tp) { return -EINVAL; } @@ -1224,7 +1227,8 @@ int do_posix_clock_notimer_create(struct k_itimer *timer) } EXPORT_SYMBOL_GPL(do_posix_clock_notimer_create); -int do_posix_clock_nonanosleep(clockid_t clock, int flags, struct timespec *t) +int do_posix_clock_nonanosleep(const clockid_t clock, int flags, + struct timespec *t) { #ifndef ENOTSUP return -EOPNOTSUPP; /* aka ENOTSUP in userland for POSIX */ @@ -1234,8 +1238,8 @@ int do_posix_clock_nonanosleep(clockid_t clock, int flags, struct timespec *t) } EXPORT_SYMBOL_GPL(do_posix_clock_nonanosleep); -asmlinkage long -sys_clock_settime(clockid_t which_clock, const struct timespec __user *tp) +asmlinkage long sys_clock_settime(const clockid_t which_clock, + const struct timespec __user *tp) { struct timespec new_tp; @@ -1248,7 +1252,7 @@ sys_clock_settime(clockid_t which_clock, const struct timespec __user *tp) } asmlinkage long -sys_clock_gettime(clockid_t which_clock, struct timespec __user *tp) +sys_clock_gettime(const clockid_t which_clock, struct timespec __user *tp) { struct timespec kernel_tp; int error; @@ -1265,7 +1269,7 @@ sys_clock_gettime(clockid_t which_clock, struct timespec __user *tp) } asmlinkage long -sys_clock_getres(clockid_t which_clock, struct timespec __user *tp) +sys_clock_getres(const clockid_t which_clock, struct timespec __user *tp) { struct timespec rtn_tp; int error; @@ -1387,7 +1391,7 @@ void clock_was_set(void) long clock_nanosleep_restart(struct restart_block *restart_block); asmlinkage long -sys_clock_nanosleep(clockid_t which_clock, int flags, +sys_clock_nanosleep(const clockid_t which_clock, int flags, const struct timespec __user *rqtp, struct timespec __user *rmtp) { @@ -1419,7 +1423,7 @@ sys_clock_nanosleep(clockid_t which_clock, int flags, } -static int common_nsleep(clockid_t which_clock, +static int common_nsleep(const clockid_t which_clock, int flags, struct timespec *tsave) { struct timespec t, dum; -- cgit v1.2.3-71-gd317 From 2a698971941bf5e6ebe96275f7d5318b2cf91ccf Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Mon, 9 Jan 2006 20:52:28 -0800 Subject: [PATCH] hrtimer: coding style and white space cleanup 2 style/whitespace/macro cleanups of posix-timers.h Signed-off-by: Thomas Gleixner Signed-off-by: Ingo Molnar Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/posix-timers.h | 80 ++++++++++++++++++++++++-------------------- 1 file changed, 44 insertions(+), 36 deletions(-) (limited to 'include/linux') diff --git a/include/linux/posix-timers.h b/include/linux/posix-timers.h index ecda38e07899..ae51473d3d48 100644 --- a/include/linux/posix-timers.h +++ b/include/linux/posix-timers.h @@ -42,7 +42,7 @@ struct k_itimer { timer_t it_id; /* timer id */ int it_overrun; /* overrun on pending signal */ int it_overrun_last; /* overrun on last delivered signal */ - int it_requeue_pending; /* waiting to requeue this timer */ + int it_requeue_pending; /* waiting to requeue this timer */ #define REQUEUE_PENDING 1 int it_sigev_notify; /* notify word of sigevent struct */ int it_sigev_signo; /* signo word of sigevent struct */ @@ -52,8 +52,10 @@ struct k_itimer { union { struct { struct timer_list timer; - struct list_head abs_timer_entry; /* clock abs_timer_list */ - struct timespec wall_to_prev; /* wall_to_monotonic used when set */ + /* clock abs_timer_list: */ + struct list_head abs_timer_entry; + /* wall_to_monotonic used when set: */ + struct timespec wall_to_prev; unsigned long incr; /* interval in jiffies */ } real; struct cpu_timer_list cpu; @@ -70,14 +72,16 @@ struct k_clock_abs { struct list_head list; spinlock_t lock; }; + struct k_clock { - int res; /* in nano seconds */ + int res; /* in nanoseconds */ int (*clock_getres) (const clockid_t which_clock, struct timespec *tp); struct k_clock_abs *abs_struct; int (*clock_set) (const clockid_t which_clock, struct timespec * tp); int (*clock_get) (const clockid_t which_clock, struct timespec * tp); int (*timer_create) (struct k_itimer *timer); - int (*nsleep) (const clockid_t which_clock, int flags, struct timespec *); + int (*nsleep) (const clockid_t which_clock, int flags, + struct timespec *); int (*timer_set) (struct k_itimer * timr, int flags, struct itimerspec * new_setting, struct itimerspec * old_setting); @@ -89,7 +93,7 @@ struct k_clock { void register_posix_clock(const clockid_t clock_id, struct k_clock *new_clock); -/* Error handlers for timer_create, nanosleep and settime */ +/* error handlers for timer_create, nanosleep and settime */ int do_posix_clock_notimer_create(struct k_itimer *timer); int do_posix_clock_nonanosleep(const clockid_t, int flags, struct timespec *); int do_posix_clock_nosettime(const clockid_t, struct timespec *tp); @@ -101,39 +105,43 @@ struct now_struct { unsigned long jiffies; }; -#define posix_get_now(now) (now)->jiffies = jiffies; +#define posix_get_now(now) \ + do { (now)->jiffies = jiffies; } while (0) + #define posix_time_before(timer, now) \ time_before((timer)->expires, (now)->jiffies) #define posix_bump_timer(timr, now) \ - do { \ - long delta, orun; \ - delta = now.jiffies - (timr)->it.real.timer.expires; \ - if (delta >= 0) { \ - orun = 1 + (delta / (timr)->it.real.incr); \ - (timr)->it.real.timer.expires += \ - orun * (timr)->it.real.incr; \ - (timr)->it_overrun += orun; \ - } \ - }while (0) - -int posix_cpu_clock_getres(const clockid_t which_clock, struct timespec *); -int posix_cpu_clock_get(const clockid_t which_clock, struct timespec *); -int posix_cpu_clock_set(const clockid_t which_clock, const struct timespec *tp); -int posix_cpu_timer_create(struct k_itimer *); -int posix_cpu_nsleep(const clockid_t, int, struct timespec *); -int posix_cpu_timer_set(struct k_itimer *, int, - struct itimerspec *, struct itimerspec *); -int posix_cpu_timer_del(struct k_itimer *); -void posix_cpu_timer_get(struct k_itimer *, struct itimerspec *); - -void posix_cpu_timer_schedule(struct k_itimer *); - -void run_posix_cpu_timers(struct task_struct *); -void posix_cpu_timers_exit(struct task_struct *); -void posix_cpu_timers_exit_group(struct task_struct *); - -void set_process_cpu_timer(struct task_struct *, unsigned int, - cputime_t *, cputime_t *); + do { \ + long delta, orun; \ + \ + delta = (now).jiffies - (timr)->it.real.timer.expires; \ + if (delta >= 0) { \ + orun = 1 + (delta / (timr)->it.real.incr); \ + (timr)->it.real.timer.expires += \ + orun * (timr)->it.real.incr; \ + (timr)->it_overrun += orun; \ + } \ + } while (0) + +int posix_cpu_clock_getres(const clockid_t which_clock, struct timespec *ts); +int posix_cpu_clock_get(const clockid_t which_clock, struct timespec *ts); +int posix_cpu_clock_set(const clockid_t which_clock, const struct timespec *ts); +int posix_cpu_timer_create(struct k_itimer *timer); +int posix_cpu_nsleep(const clockid_t which_clock, int flags, + struct timespec *ts); +int posix_cpu_timer_set(struct k_itimer *timer, int flags, + struct itimerspec *new, struct itimerspec *old); +int posix_cpu_timer_del(struct k_itimer *timer); +void posix_cpu_timer_get(struct k_itimer *timer, struct itimerspec *itp); + +void posix_cpu_timer_schedule(struct k_itimer *timer); + +void run_posix_cpu_timers(struct task_struct *task); +void posix_cpu_timers_exit(struct task_struct *task); +void posix_cpu_timers_exit_group(struct task_struct *task); + +void set_process_cpu_timer(struct task_struct *task, unsigned int clock_idx, + cputime_t *newval, cputime_t *oldval); #endif -- cgit v1.2.3-71-gd317 From 5f82b2b77e66d452c3037cc47f436d2d76fd5f06 Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Mon, 9 Jan 2006 20:52:29 -0800 Subject: [PATCH] hrtimer: create and use timespec_valid macro add timespec_valid(ts) [returns false if the timespec is denorm] Signed-off-by: Thomas Gleixner Signed-off-by: Ingo Molnar Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/time.h | 6 ++++++ kernel/posix-timers.c | 5 ++--- 2 files changed, 8 insertions(+), 3 deletions(-) (limited to 'include/linux') diff --git a/include/linux/time.h b/include/linux/time.h index 4d49cabb9b47..64e797464589 100644 --- a/include/linux/time.h +++ b/include/linux/time.h @@ -44,6 +44,12 @@ extern unsigned long mktime(const unsigned int year, const unsigned int mon, extern void set_normalized_timespec(struct timespec *ts, time_t sec, long nsec); +/* + * Returns true if the timespec is norm, false if denorm: + */ +#define timespec_valid(ts) \ + (((ts)->tv_sec >= 0) && (((unsigned) (ts)->tv_nsec) < NSEC_PER_SEC)) + extern struct timespec xtime; extern struct timespec wall_to_monotonic; extern seqlock_t xtime_lock; diff --git a/kernel/posix-timers.c b/kernel/posix-timers.c index 69d5a4b5395b..6b851a1bf4b0 100644 --- a/kernel/posix-timers.c +++ b/kernel/posix-timers.c @@ -712,8 +712,7 @@ out: */ static int good_timespec(const struct timespec *ts) { - if ((!ts) || (ts->tv_sec < 0) || - ((unsigned) ts->tv_nsec >= NSEC_PER_SEC)) + if ((!ts) || !timespec_valid(ts)) return 0; return 1; } @@ -1406,7 +1405,7 @@ sys_clock_nanosleep(const clockid_t which_clock, int flags, if (copy_from_user(&t, rqtp, sizeof (struct timespec))) return -EFAULT; - if ((unsigned) t.tv_nsec >= NSEC_PER_SEC || t.tv_sec < 0) + if (!timespec_valid(&t)) return -EINVAL; /* -- cgit v1.2.3-71-gd317 From f8f46da3b4cbb03b43a102b1eb92b63419e10f90 Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Mon, 9 Jan 2006 20:52:30 -0800 Subject: [PATCH] hrtimer: introduce nsec_t type and conversion functions - introduce the nsec_t type - basic nsec conversion routines: timespec_to_ns(), timeval_to_ns(), ns_to_timespec(), ns_to_timeval(). Signed-off-by: Thomas Gleixner Signed-off-by: Ingo Molnar Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/time.h | 47 +++++++++++++++++++++++++++++++++++++++++++++++ kernel/time.c | 36 ++++++++++++++++++++++++++++++++++++ 2 files changed, 83 insertions(+) (limited to 'include/linux') diff --git a/include/linux/time.h b/include/linux/time.h index 64e797464589..f639fde29253 100644 --- a/include/linux/time.h +++ b/include/linux/time.h @@ -50,6 +50,12 @@ extern void set_normalized_timespec(struct timespec *ts, time_t sec, long nsec); #define timespec_valid(ts) \ (((ts)->tv_sec >= 0) && (((unsigned) (ts)->tv_nsec) < NSEC_PER_SEC)) +/* + * 64-bit nanosec type. Large enough to span 292+ years in nanosecond + * resolution. Ought to be enough for a while. + */ +typedef s64 nsec_t; + extern struct timespec xtime; extern struct timespec wall_to_monotonic; extern seqlock_t xtime_lock; @@ -79,6 +85,47 @@ extern void getnstimestamp(struct timespec *ts); extern struct timespec timespec_trunc(struct timespec t, unsigned gran); +/** + * timespec_to_ns - Convert timespec to nanoseconds + * @ts: pointer to the timespec variable to be converted + * + * Returns the scalar nanosecond representation of the timespec + * parameter. + */ +static inline nsec_t timespec_to_ns(const struct timespec *ts) +{ + return ((nsec_t) ts->tv_sec * NSEC_PER_SEC) + ts->tv_nsec; +} + +/** + * timeval_to_ns - Convert timeval to nanoseconds + * @ts: pointer to the timeval variable to be converted + * + * Returns the scalar nanosecond representation of the timeval + * parameter. + */ +static inline nsec_t timeval_to_ns(const struct timeval *tv) +{ + return ((nsec_t) tv->tv_sec * NSEC_PER_SEC) + + tv->tv_usec * NSEC_PER_USEC; +} + +/** + * ns_to_timespec - Convert nanoseconds to timespec + * @nsec: the nanoseconds value to be converted + * + * Returns the timespec representation of the nsec parameter. + */ +extern struct timespec ns_to_timespec(const nsec_t nsec); + +/** + * ns_to_timeval - Convert nanoseconds to timeval + * @nsec: the nanoseconds value to be converted + * + * Returns the timeval representation of the nsec parameter. + */ +extern struct timeval ns_to_timeval(const nsec_t nsec); + #endif /* __KERNEL__ */ #define NFDBITS __NFDBITS diff --git a/kernel/time.c b/kernel/time.c index c689b53297cf..cf5a4582a672 100644 --- a/kernel/time.c +++ b/kernel/time.c @@ -652,6 +652,42 @@ void set_normalized_timespec(struct timespec *ts, time_t sec, long nsec) ts->tv_nsec = nsec; } +/** + * ns_to_timespec - Convert nanoseconds to timespec + * @nsec: the nanoseconds value to be converted + * + * Returns the timespec representation of the nsec parameter. + */ +inline struct timespec ns_to_timespec(const nsec_t nsec) +{ + struct timespec ts; + + if (nsec) + ts.tv_sec = div_long_long_rem_signed(nsec, NSEC_PER_SEC, + &ts.tv_nsec); + else + ts.tv_sec = ts.tv_nsec = 0; + + return ts; +} + +/** + * ns_to_timeval - Convert nanoseconds to timeval + * @nsec: the nanoseconds value to be converted + * + * Returns the timeval representation of the nsec parameter. + */ +struct timeval ns_to_timeval(const nsec_t nsec) +{ + struct timespec ts = ns_to_timespec(nsec); + struct timeval tv; + + tv.tv_sec = ts.tv_sec; + tv.tv_usec = (suseconds_t) ts.tv_nsec / 1000; + + return tv; +} + #if (BITS_PER_LONG < 64) u64 get_jiffies_64(void) { -- cgit v1.2.3-71-gd317 From 97fc79f97b1111c80010d34ee66312b88f531e41 Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Mon, 9 Jan 2006 20:52:31 -0800 Subject: [PATCH] hrtimer: introduce ktime_t time format - introduce ktime_t: nanosecond-resolution time format. - eliminate the plain s64 scalar type, and always use the union. This simplifies the arithmetics. Idea from Roman Zippel. Signed-off-by: Thomas Gleixner Signed-off-by: Ingo Molnar Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- arch/i386/Kconfig | 4 + include/linux/ktime.h | 269 ++++++++++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 273 insertions(+) create mode 100644 include/linux/ktime.h (limited to 'include/linux') diff --git a/arch/i386/Kconfig b/arch/i386/Kconfig index 554ce3f344c9..815878ebd30f 100644 --- a/arch/i386/Kconfig +++ b/arch/i386/Kconfig @@ -1060,3 +1060,7 @@ config X86_TRAMPOLINE bool depends on X86_SMP || (X86_VOYAGER && SMP) default y + +config KTIME_SCALAR + bool + default y diff --git a/include/linux/ktime.h b/include/linux/ktime.h new file mode 100644 index 000000000000..5b9a9eb82baa --- /dev/null +++ b/include/linux/ktime.h @@ -0,0 +1,269 @@ +/* + * include/linux/ktime.h + * + * ktime_t - nanosecond-resolution time format. + * + * Copyright(C) 2005, Thomas Gleixner + * Copyright(C) 2005, Red Hat, Inc., Ingo Molnar + * + * data type definitions, declarations, prototypes and macros. + * + * Started by: Thomas Gleixner and Ingo Molnar + * + * For licencing details see kernel-base/COPYING + */ +#ifndef _LINUX_KTIME_H +#define _LINUX_KTIME_H + +#include +#include + +/* + * ktime_t: + * + * On 64-bit CPUs a single 64-bit variable is used to store the hrtimers + * internal representation of time values in scalar nanoseconds. The + * design plays out best on 64-bit CPUs, where most conversions are + * NOPs and most arithmetic ktime_t operations are plain arithmetic + * operations. + * + * On 32-bit CPUs an optimized representation of the timespec structure + * is used to avoid expensive conversions from and to timespecs. The + * endian-aware order of the tv struct members is choosen to allow + * mathematical operations on the tv64 member of the union too, which + * for certain operations produces better code. + * + * For architectures with efficient support for 64/32-bit conversions the + * plain scalar nanosecond based representation can be selected by the + * config switch CONFIG_KTIME_SCALAR. + */ +typedef union { + s64 tv64; +#if BITS_PER_LONG != 64 && !defined(CONFIG_KTIME_SCALAR) + struct { +# ifdef __BIG_ENDIAN + s32 sec, nsec; +# else + s32 nsec, sec; +# endif + } tv; +#endif +} ktime_t; + +#define KTIME_MAX (~((u64)1 << 63)) + +/* + * ktime_t definitions when using the 64-bit scalar representation: + */ + +#if (BITS_PER_LONG == 64) || defined(CONFIG_KTIME_SCALAR) + +/* Define a ktime_t variable and initialize it to zero: */ +#define DEFINE_KTIME(kt) ktime_t kt = { .tv64 = 0 } + +/** + * ktime_set - Set a ktime_t variable from a seconds/nanoseconds value + * + * @secs: seconds to set + * @nsecs: nanoseconds to set + * + * Return the ktime_t representation of the value + */ +static inline ktime_t ktime_set(const long secs, const unsigned long nsecs) +{ + return (ktime_t) { .tv64 = (s64)secs * NSEC_PER_SEC + (s64)nsecs }; +} + +/* Subtract two ktime_t variables. rem = lhs -rhs: */ +#define ktime_sub(lhs, rhs) \ + ({ (ktime_t){ .tv64 = (lhs).tv64 - (rhs).tv64 }; }) + +/* Add two ktime_t variables. res = lhs + rhs: */ +#define ktime_add(lhs, rhs) \ + ({ (ktime_t){ .tv64 = (lhs).tv64 + (rhs).tv64 }; }) + +/* + * Add a ktime_t variable and a scalar nanosecond value. + * res = kt + nsval: + */ +#define ktime_add_ns(kt, nsval) \ + ({ (ktime_t){ .tv64 = (kt).tv64 + (nsval) }; }) + +/* convert a timespec to ktime_t format: */ +#define timespec_to_ktime(ts) ktime_set((ts).tv_sec, (ts).tv_nsec) + +/* convert a timeval to ktime_t format: */ +#define timeval_to_ktime(tv) ktime_set((tv).tv_sec, (tv).tv_usec * 1000) + +/* Map the ktime_t to timespec conversion to ns_to_timespec function */ +#define ktime_to_timespec(kt) ns_to_timespec((kt).tv64) + +/* Map the ktime_t to timeval conversion to ns_to_timeval function */ +#define ktime_to_timeval(kt) ns_to_timeval((kt).tv64) + +/* Map the ktime_t to clock_t conversion to the inline in jiffies.h: */ +#define ktime_to_clock_t(kt) nsec_to_clock_t((kt).tv64) + +/* Convert ktime_t to nanoseconds - NOP in the scalar storage format: */ +#define ktime_to_ns(kt) ((kt).tv64) + +#else + +/* + * Helper macros/inlines to get the ktime_t math right in the timespec + * representation. The macros are sometimes ugly - their actual use is + * pretty okay-ish, given the circumstances. We do all this for + * performance reasons. The pure scalar nsec_t based code was nice and + * simple, but created too many 64-bit / 32-bit conversions and divisions. + * + * Be especially aware that negative values are represented in a way + * that the tv.sec field is negative and the tv.nsec field is greater + * or equal to zero but less than nanoseconds per second. This is the + * same representation which is used by timespecs. + * + * tv.sec < 0 and 0 >= tv.nsec < NSEC_PER_SEC + */ + +/* Define a ktime_t variable and initialize it to zero: */ +#define DEFINE_KTIME(kt) ktime_t kt = { .tv64 = 0 } + +/* Set a ktime_t variable to a value in sec/nsec representation: */ +static inline ktime_t ktime_set(const long secs, const unsigned long nsecs) +{ + return (ktime_t) { .tv = { .sec = secs, .nsec = nsecs } }; +} + +/** + * ktime_sub - subtract two ktime_t variables + * + * @lhs: minuend + * @rhs: subtrahend + * + * Returns the remainder of the substraction + */ +static inline ktime_t ktime_sub(const ktime_t lhs, const ktime_t rhs) +{ + ktime_t res; + + res.tv64 = lhs.tv64 - rhs.tv64; + if (res.tv.nsec < 0) + res.tv.nsec += NSEC_PER_SEC; + + return res; +} + +/** + * ktime_add - add two ktime_t variables + * + * @add1: addend1 + * @add2: addend2 + * + * Returns the sum of addend1 and addend2 + */ +static inline ktime_t ktime_add(const ktime_t add1, const ktime_t add2) +{ + ktime_t res; + + res.tv64 = add1.tv64 + add2.tv64; + /* + * performance trick: the (u32) -NSEC gives 0x00000000Fxxxxxxx + * so we subtract NSEC_PER_SEC and add 1 to the upper 32 bit. + * + * it's equivalent to: + * tv.nsec -= NSEC_PER_SEC + * tv.sec ++; + */ + if (res.tv.nsec >= NSEC_PER_SEC) + res.tv64 += (u32)-NSEC_PER_SEC; + + return res; +} + +/** + * ktime_add_ns - Add a scalar nanoseconds value to a ktime_t variable + * + * @kt: addend + * @nsec: the scalar nsec value to add + * + * Returns the sum of kt and nsec in ktime_t format + */ +extern ktime_t ktime_add_ns(const ktime_t kt, u64 nsec); + +/** + * timespec_to_ktime - convert a timespec to ktime_t format + * + * @ts: the timespec variable to convert + * + * Returns a ktime_t variable with the converted timespec value + */ +static inline ktime_t timespec_to_ktime(const struct timespec ts) +{ + return (ktime_t) { .tv = { .sec = (s32)ts.tv_sec, + .nsec = (s32)ts.tv_nsec } }; +} + +/** + * timeval_to_ktime - convert a timeval to ktime_t format + * + * @tv: the timeval variable to convert + * + * Returns a ktime_t variable with the converted timeval value + */ +static inline ktime_t timeval_to_ktime(const struct timeval tv) +{ + return (ktime_t) { .tv = { .sec = (s32)tv.tv_sec, + .nsec = (s32)tv.tv_usec * 1000 } }; +} + +/** + * ktime_to_timespec - convert a ktime_t variable to timespec format + * + * @kt: the ktime_t variable to convert + * + * Returns the timespec representation of the ktime value + */ +static inline struct timespec ktime_to_timespec(const ktime_t kt) +{ + return (struct timespec) { .tv_sec = (time_t) kt.tv.sec, + .tv_nsec = (long) kt.tv.nsec }; +} + +/** + * ktime_to_timeval - convert a ktime_t variable to timeval format + * + * @kt: the ktime_t variable to convert + * + * Returns the timeval representation of the ktime value + */ +static inline struct timeval ktime_to_timeval(const ktime_t kt) +{ + return (struct timeval) { + .tv_sec = (time_t) kt.tv.sec, + .tv_usec = (suseconds_t) (kt.tv.nsec / NSEC_PER_USEC) }; +} + +/** + * ktime_to_clock_t - convert a ktime_t variable to clock_t format + * @kt: the ktime_t variable to convert + * + * Returns a clock_t variable with the converted value + */ +static inline clock_t ktime_to_clock_t(const ktime_t kt) +{ + return nsec_to_clock_t( (u64) kt.tv.sec * NSEC_PER_SEC + kt.tv.nsec); +} + +/** + * ktime_to_ns - convert a ktime_t variable to scalar nanoseconds + * @kt: the ktime_t variable to convert + * + * Returns the scalar nanoseconds representation of kt + */ +static inline u64 ktime_to_ns(const ktime_t kt) +{ + return (u64) kt.tv.sec * NSEC_PER_SEC + kt.tv.nsec; +} + +#endif + +#endif -- cgit v1.2.3-71-gd317 From c0a3132963db68f1fbbd0e316b73de100fee3f08 Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Mon, 9 Jan 2006 20:52:32 -0800 Subject: [PATCH] hrtimer: hrtimer core code hrtimer subsystem core. It is initialized at bootup and expired by the timer interrupt, but is otherwise not utilized by any other subsystem yet. Signed-off-by: Thomas Gleixner Signed-off-by: Ingo Molnar Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/hrtimer.h | 130 +++++++++ include/linux/ktime.h | 15 ++ init/main.c | 1 + kernel/Makefile | 3 +- kernel/hrtimer.c | 679 ++++++++++++++++++++++++++++++++++++++++++++++++ kernel/timer.c | 1 + 6 files changed, 828 insertions(+), 1 deletion(-) create mode 100644 include/linux/hrtimer.h create mode 100644 kernel/hrtimer.c (limited to 'include/linux') diff --git a/include/linux/hrtimer.h b/include/linux/hrtimer.h new file mode 100644 index 000000000000..64f8d554fbb8 --- /dev/null +++ b/include/linux/hrtimer.h @@ -0,0 +1,130 @@ +/* + * include/linux/hrtimer.h + * + * hrtimers - High-resolution kernel timers + * + * Copyright(C) 2005, Thomas Gleixner + * Copyright(C) 2005, Red Hat, Inc., Ingo Molnar + * + * data type definitions, declarations, prototypes + * + * Started by: Thomas Gleixner and Ingo Molnar + * + * For licencing details see kernel-base/COPYING + */ +#ifndef _LINUX_HRTIMER_H +#define _LINUX_HRTIMER_H + +#include +#include +#include +#include +#include + +/* + * Mode arguments of xxx_hrtimer functions: + */ +enum hrtimer_mode { + HRTIMER_ABS, /* Time value is absolute */ + HRTIMER_REL, /* Time value is relative to now */ +}; + +enum hrtimer_restart { + HRTIMER_NORESTART, + HRTIMER_RESTART, +}; + +/* + * Timer states: + */ +enum hrtimer_state { + HRTIMER_INACTIVE, /* Timer is inactive */ + HRTIMER_EXPIRED, /* Timer is expired */ + HRTIMER_PENDING, /* Timer is pending */ +}; + +struct hrtimer_base; + +/** + * struct hrtimer - the basic hrtimer structure + * + * @node: red black tree node for time ordered insertion + * @list: list head for easier access to the time ordered list, + * without walking the red black tree. + * @expires: the absolute expiry time in the hrtimers internal + * representation. The time is related to the clock on + * which the timer is based. + * @state: state of the timer + * @function: timer expiry callback function + * @data: argument for the callback function + * @base: pointer to the timer base (per cpu and per clock) + * + * The hrtimer structure must be initialized by init_hrtimer_#CLOCKTYPE() + */ +struct hrtimer { + struct rb_node node; + struct list_head list; + ktime_t expires; + enum hrtimer_state state; + int (*function)(void *); + void *data; + struct hrtimer_base *base; +}; + +/** + * struct hrtimer_base - the timer base for a specific clock + * + * @index: clock type index for per_cpu support when moving a timer + * to a base on another cpu. + * @lock: lock protecting the base and associated timers + * @active: red black tree root node for the active timers + * @pending: list of pending timers for simple time ordered access + * @resolution: the resolution of the clock, in nanoseconds + * @get_time: function to retrieve the current time of the clock + * @curr_timer: the timer which is executing a callback right now + */ +struct hrtimer_base { + clockid_t index; + spinlock_t lock; + struct rb_root active; + struct list_head pending; + unsigned long resolution; + ktime_t (*get_time)(void); + struct hrtimer *curr_timer; +}; + +/* Exported timer functions: */ + +/* Initialize timers: */ +extern void hrtimer_init(struct hrtimer *timer, const clockid_t which_clock); +extern void hrtimer_rebase(struct hrtimer *timer, const clockid_t which_clock); + + +/* Basic timer operations: */ +extern int hrtimer_start(struct hrtimer *timer, ktime_t tim, + const enum hrtimer_mode mode); +extern int hrtimer_cancel(struct hrtimer *timer); +extern int hrtimer_try_to_cancel(struct hrtimer *timer); + +#define hrtimer_restart(timer) hrtimer_start((timer), (timer)->expires, HRTIMER_ABS) + +/* Query timers: */ +extern ktime_t hrtimer_get_remaining(const struct hrtimer *timer); +extern int hrtimer_get_res(const clockid_t which_clock, struct timespec *tp); + +static inline int hrtimer_active(const struct hrtimer *timer) +{ + return timer->state == HRTIMER_PENDING; +} + +/* Forward a hrtimer so it expires after now: */ +extern unsigned long hrtimer_forward(struct hrtimer *timer, + const ktime_t interval); + +/* Soft interrupt function to run the hrtimer queues: */ +extern void hrtimer_run_queues(void); + +/* Bootup initialization: */ +extern void __init hrtimers_init(void); + +#endif diff --git a/include/linux/ktime.h b/include/linux/ktime.h index 5b9a9eb82baa..222a047cc145 100644 --- a/include/linux/ktime.h +++ b/include/linux/ktime.h @@ -266,4 +266,19 @@ static inline u64 ktime_to_ns(const ktime_t kt) #endif +/* + * The resolution of the clocks. The resolution value is returned in + * the clock_getres() system call to give application programmers an + * idea of the (in)accuracy of timers. Timer values are rounded up to + * this resolution values. + */ +#define KTIME_REALTIME_RES (NSEC_PER_SEC/HZ) +#define KTIME_MONOTONIC_RES (NSEC_PER_SEC/HZ) + +/* Get the monotonic time in timespec format: */ +extern void ktime_get_ts(struct timespec *ts); + +/* Get the real (wall-) time in timespec format: */ +#define ktime_get_real_ts(ts) getnstimeofday(ts) + #endif diff --git a/init/main.c b/init/main.c index 8342c2890b16..e092b1979a90 100644 --- a/init/main.c +++ b/init/main.c @@ -485,6 +485,7 @@ asmlinkage void __init start_kernel(void) init_IRQ(); pidhash_init(); init_timers(); + hrtimers_init(); softirq_init(); time_init(); diff --git a/kernel/Makefile b/kernel/Makefile index 1e039700c0ad..355126606d1b 100644 --- a/kernel/Makefile +++ b/kernel/Makefile @@ -7,7 +7,8 @@ obj-y = sched.o fork.o exec_domain.o panic.o printk.o profile.o \ sysctl.o capability.o ptrace.o timer.o user.o \ signal.o sys.o kmod.o workqueue.o pid.o \ rcupdate.o intermodule.o extable.o params.o posix-timers.o \ - kthread.o wait.o kfifo.o sys_ni.o posix-cpu-timers.o mutex.o + kthread.o wait.o kfifo.o sys_ni.o posix-cpu-timers.o mutex.o \ + hrtimer.o obj-$(CONFIG_DEBUG_MUTEXES) += mutex-debug.o obj-$(CONFIG_FUTEX) += futex.o diff --git a/kernel/hrtimer.c b/kernel/hrtimer.c new file mode 100644 index 000000000000..690efd9d9adf --- /dev/null +++ b/kernel/hrtimer.c @@ -0,0 +1,679 @@ +/* + * linux/kernel/hrtimer.c + * + * Copyright(C) 2005, Thomas Gleixner + * Copyright(C) 2005, Red Hat, Inc., Ingo Molnar + * + * High-resolution kernel timers + * + * In contrast to the low-resolution timeout API implemented in + * kernel/timer.c, hrtimers provide finer resolution and accuracy + * depending on system configuration and capabilities. + * + * These timers are currently used for: + * - itimers + * - POSIX timers + * - nanosleep + * - precise in-kernel timing + * + * Started by: Thomas Gleixner and Ingo Molnar + * + * Credits: + * based on kernel/timer.c + * + * For licencing details see kernel-base/COPYING + */ + +#include +#include +#include +#include +#include +#include +#include + +#include + +/** + * ktime_get - get the monotonic time in ktime_t format + * + * returns the time in ktime_t format + */ +static ktime_t ktime_get(void) +{ + struct timespec now; + + ktime_get_ts(&now); + + return timespec_to_ktime(now); +} + +/** + * ktime_get_real - get the real (wall-) time in ktime_t format + * + * returns the time in ktime_t format + */ +static ktime_t ktime_get_real(void) +{ + struct timespec now; + + getnstimeofday(&now); + + return timespec_to_ktime(now); +} + +EXPORT_SYMBOL_GPL(ktime_get_real); + +/* + * The timer bases: + */ + +#define MAX_HRTIMER_BASES 2 + +static DEFINE_PER_CPU(struct hrtimer_base, hrtimer_bases[MAX_HRTIMER_BASES]) = +{ + { + .index = CLOCK_REALTIME, + .get_time = &ktime_get_real, + .resolution = KTIME_REALTIME_RES, + }, + { + .index = CLOCK_MONOTONIC, + .get_time = &ktime_get, + .resolution = KTIME_MONOTONIC_RES, + }, +}; + +/** + * ktime_get_ts - get the monotonic clock in timespec format + * + * @ts: pointer to timespec variable + * + * The function calculates the monotonic clock from the realtime + * clock and the wall_to_monotonic offset and stores the result + * in normalized timespec format in the variable pointed to by ts. + */ +void ktime_get_ts(struct timespec *ts) +{ + struct timespec tomono; + unsigned long seq; + + do { + seq = read_seqbegin(&xtime_lock); + getnstimeofday(ts); + tomono = wall_to_monotonic; + + } while (read_seqretry(&xtime_lock, seq)); + + set_normalized_timespec(ts, ts->tv_sec + tomono.tv_sec, + ts->tv_nsec + tomono.tv_nsec); +} + +/* + * Functions and macros which are different for UP/SMP systems are kept in a + * single place + */ +#ifdef CONFIG_SMP + +#define set_curr_timer(b, t) do { (b)->curr_timer = (t); } while (0) + +/* + * We are using hashed locking: holding per_cpu(hrtimer_bases)[n].lock + * means that all timers which are tied to this base via timer->base are + * locked, and the base itself is locked too. + * + * So __run_timers/migrate_timers can safely modify all timers which could + * be found on the lists/queues. + * + * When the timer's base is locked, and the timer removed from list, it is + * possible to set timer->base = NULL and drop the lock: the timer remains + * locked. + */ +static struct hrtimer_base *lock_hrtimer_base(const struct hrtimer *timer, + unsigned long *flags) +{ + struct hrtimer_base *base; + + for (;;) { + base = timer->base; + if (likely(base != NULL)) { + spin_lock_irqsave(&base->lock, *flags); + if (likely(base == timer->base)) + return base; + /* The timer has migrated to another CPU: */ + spin_unlock_irqrestore(&base->lock, *flags); + } + cpu_relax(); + } +} + +/* + * Switch the timer base to the current CPU when possible. + */ +static inline struct hrtimer_base * +switch_hrtimer_base(struct hrtimer *timer, struct hrtimer_base *base) +{ + struct hrtimer_base *new_base; + + new_base = &__get_cpu_var(hrtimer_bases[base->index]); + + if (base != new_base) { + /* + * We are trying to schedule the timer on the local CPU. + * However we can't change timer's base while it is running, + * so we keep it on the same CPU. No hassle vs. reprogramming + * the event source in the high resolution case. The softirq + * code will take care of this when the timer function has + * completed. There is no conflict as we hold the lock until + * the timer is enqueued. + */ + if (unlikely(base->curr_timer == timer)) + return base; + + /* See the comment in lock_timer_base() */ + timer->base = NULL; + spin_unlock(&base->lock); + spin_lock(&new_base->lock); + timer->base = new_base; + } + return new_base; +} + +#else /* CONFIG_SMP */ + +#define set_curr_timer(b, t) do { } while (0) + +static inline struct hrtimer_base * +lock_hrtimer_base(const struct hrtimer *timer, unsigned long *flags) +{ + struct hrtimer_base *base = timer->base; + + spin_lock_irqsave(&base->lock, *flags); + + return base; +} + +#define switch_hrtimer_base(t, b) (b) + +#endif /* !CONFIG_SMP */ + +/* + * Functions for the union type storage format of ktime_t which are + * too large for inlining: + */ +#if BITS_PER_LONG < 64 +# ifndef CONFIG_KTIME_SCALAR +/** + * ktime_add_ns - Add a scalar nanoseconds value to a ktime_t variable + * + * @kt: addend + * @nsec: the scalar nsec value to add + * + * Returns the sum of kt and nsec in ktime_t format + */ +ktime_t ktime_add_ns(const ktime_t kt, u64 nsec) +{ + ktime_t tmp; + + if (likely(nsec < NSEC_PER_SEC)) { + tmp.tv64 = nsec; + } else { + unsigned long rem = do_div(nsec, NSEC_PER_SEC); + + tmp = ktime_set((long)nsec, rem); + } + + return ktime_add(kt, tmp); +} + +#else /* CONFIG_KTIME_SCALAR */ + +# endif /* !CONFIG_KTIME_SCALAR */ + +/* + * Divide a ktime value by a nanosecond value + */ +static unsigned long ktime_divns(const ktime_t kt, nsec_t div) +{ + u64 dclc, inc, dns; + int sft = 0; + + dclc = dns = ktime_to_ns(kt); + inc = div; + /* Make sure the divisor is less than 2^32: */ + while (div >> 32) { + sft++; + div >>= 1; + } + dclc >>= sft; + do_div(dclc, (unsigned long) div); + + return (unsigned long) dclc; +} + +#else /* BITS_PER_LONG < 64 */ +# define ktime_divns(kt, div) (unsigned long)((kt).tv64 / (div)) +#endif /* BITS_PER_LONG >= 64 */ + +/* + * Counterpart to lock_timer_base above: + */ +static inline +void unlock_hrtimer_base(const struct hrtimer *timer, unsigned long *flags) +{ + spin_unlock_irqrestore(&timer->base->lock, *flags); +} + +/** + * hrtimer_forward - forward the timer expiry + * + * @timer: hrtimer to forward + * @interval: the interval to forward + * + * Forward the timer expiry so it will expire in the future. + * The number of overruns is added to the overrun field. + */ +unsigned long +hrtimer_forward(struct hrtimer *timer, const ktime_t interval) +{ + unsigned long orun = 1; + ktime_t delta, now; + + now = timer->base->get_time(); + + delta = ktime_sub(now, timer->expires); + + if (delta.tv64 < 0) + return 0; + + if (unlikely(delta.tv64 >= interval.tv64)) { + nsec_t incr = ktime_to_ns(interval); + + orun = ktime_divns(delta, incr); + timer->expires = ktime_add_ns(timer->expires, incr * orun); + if (timer->expires.tv64 > now.tv64) + return orun; + /* + * This (and the ktime_add() below) is the + * correction for exact: + */ + orun++; + } + timer->expires = ktime_add(timer->expires, interval); + + return orun; +} + +/* + * enqueue_hrtimer - internal function to (re)start a timer + * + * The timer is inserted in expiry order. Insertion into the + * red black tree is O(log(n)). Must hold the base lock. + */ +static void enqueue_hrtimer(struct hrtimer *timer, struct hrtimer_base *base) +{ + struct rb_node **link = &base->active.rb_node; + struct list_head *prev = &base->pending; + struct rb_node *parent = NULL; + struct hrtimer *entry; + + /* + * Find the right place in the rbtree: + */ + while (*link) { + parent = *link; + entry = rb_entry(parent, struct hrtimer, node); + /* + * We dont care about collisions. Nodes with + * the same expiry time stay together. + */ + if (timer->expires.tv64 < entry->expires.tv64) + link = &(*link)->rb_left; + else { + link = &(*link)->rb_right; + prev = &entry->list; + } + } + + /* + * Insert the timer to the rbtree and to the sorted list: + */ + rb_link_node(&timer->node, parent, link); + rb_insert_color(&timer->node, &base->active); + list_add(&timer->list, prev); + + timer->state = HRTIMER_PENDING; +} + + +/* + * __remove_hrtimer - internal function to remove a timer + * + * Caller must hold the base lock. + */ +static void __remove_hrtimer(struct hrtimer *timer, struct hrtimer_base *base) +{ + /* + * Remove the timer from the sorted list and from the rbtree: + */ + list_del(&timer->list); + rb_erase(&timer->node, &base->active); +} + +/* + * remove hrtimer, called with base lock held + */ +static inline int +remove_hrtimer(struct hrtimer *timer, struct hrtimer_base *base) +{ + if (hrtimer_active(timer)) { + __remove_hrtimer(timer, base); + timer->state = HRTIMER_INACTIVE; + return 1; + } + return 0; +} + +/** + * hrtimer_start - (re)start an relative timer on the current CPU + * + * @timer: the timer to be added + * @tim: expiry time + * @mode: expiry mode: absolute (HRTIMER_ABS) or relative (HRTIMER_REL) + * + * Returns: + * 0 on success + * 1 when the timer was active + */ +int +hrtimer_start(struct hrtimer *timer, ktime_t tim, const enum hrtimer_mode mode) +{ + struct hrtimer_base *base, *new_base; + unsigned long flags; + int ret; + + base = lock_hrtimer_base(timer, &flags); + + /* Remove an active timer from the queue: */ + ret = remove_hrtimer(timer, base); + + /* Switch the timer base, if necessary: */ + new_base = switch_hrtimer_base(timer, base); + + if (mode == HRTIMER_REL) + tim = ktime_add(tim, new_base->get_time()); + timer->expires = tim; + + enqueue_hrtimer(timer, new_base); + + unlock_hrtimer_base(timer, &flags); + + return ret; +} + +/** + * hrtimer_try_to_cancel - try to deactivate a timer + * + * @timer: hrtimer to stop + * + * Returns: + * 0 when the timer was not active + * 1 when the timer was active + * -1 when the timer is currently excuting the callback function and + * can not be stopped + */ +int hrtimer_try_to_cancel(struct hrtimer *timer) +{ + struct hrtimer_base *base; + unsigned long flags; + int ret = -1; + + base = lock_hrtimer_base(timer, &flags); + + if (base->curr_timer != timer) + ret = remove_hrtimer(timer, base); + + unlock_hrtimer_base(timer, &flags); + + return ret; + +} + +/** + * hrtimer_cancel - cancel a timer and wait for the handler to finish. + * + * @timer: the timer to be cancelled + * + * Returns: + * 0 when the timer was not active + * 1 when the timer was active + */ +int hrtimer_cancel(struct hrtimer *timer) +{ + for (;;) { + int ret = hrtimer_try_to_cancel(timer); + + if (ret >= 0) + return ret; + } +} + +/** + * hrtimer_get_remaining - get remaining time for the timer + * + * @timer: the timer to read + */ +ktime_t hrtimer_get_remaining(const struct hrtimer *timer) +{ + struct hrtimer_base *base; + unsigned long flags; + ktime_t rem; + + base = lock_hrtimer_base(timer, &flags); + rem = ktime_sub(timer->expires, timer->base->get_time()); + unlock_hrtimer_base(timer, &flags); + + return rem; +} + +/** + * hrtimer_rebase - rebase an initialized hrtimer to a different base + * + * @timer: the timer to be rebased + * @clock_id: the clock to be used + */ +void hrtimer_rebase(struct hrtimer *timer, const clockid_t clock_id) +{ + struct hrtimer_base *bases; + + bases = per_cpu(hrtimer_bases, raw_smp_processor_id()); + timer->base = &bases[clock_id]; +} + +/** + * hrtimer_init - initialize a timer to the given clock + * + * @timer: the timer to be initialized + * @clock_id: the clock to be used + */ +void hrtimer_init(struct hrtimer *timer, const clockid_t clock_id) +{ + memset(timer, 0, sizeof(struct hrtimer)); + hrtimer_rebase(timer, clock_id); +} + +/** + * hrtimer_get_res - get the timer resolution for a clock + * + * @which_clock: which clock to query + * @tp: pointer to timespec variable to store the resolution + * + * Store the resolution of the clock selected by which_clock in the + * variable pointed to by tp. + */ +int hrtimer_get_res(const clockid_t which_clock, struct timespec *tp) +{ + struct hrtimer_base *bases; + + tp->tv_sec = 0; + bases = per_cpu(hrtimer_bases, raw_smp_processor_id()); + tp->tv_nsec = bases[which_clock].resolution; + + return 0; +} + +/* + * Expire the per base hrtimer-queue: + */ +static inline void run_hrtimer_queue(struct hrtimer_base *base) +{ + ktime_t now = base->get_time(); + + spin_lock_irq(&base->lock); + + while (!list_empty(&base->pending)) { + struct hrtimer *timer; + int (*fn)(void *); + int restart; + void *data; + + timer = list_entry(base->pending.next, struct hrtimer, list); + if (now.tv64 <= timer->expires.tv64) + break; + + fn = timer->function; + data = timer->data; + set_curr_timer(base, timer); + __remove_hrtimer(timer, base); + spin_unlock_irq(&base->lock); + + /* + * fn == NULL is special case for the simplest timer + * variant - wake up process and do not restart: + */ + if (!fn) { + wake_up_process(data); + restart = HRTIMER_NORESTART; + } else + restart = fn(data); + + spin_lock_irq(&base->lock); + + if (restart == HRTIMER_RESTART) + enqueue_hrtimer(timer, base); + else + timer->state = HRTIMER_EXPIRED; + } + set_curr_timer(base, NULL); + spin_unlock_irq(&base->lock); +} + +/* + * Called from timer softirq every jiffy, expire hrtimers: + */ +void hrtimer_run_queues(void) +{ + struct hrtimer_base *base = __get_cpu_var(hrtimer_bases); + int i; + + for (i = 0; i < MAX_HRTIMER_BASES; i++) + run_hrtimer_queue(&base[i]); +} + +/* + * Functions related to boot-time initialization: + */ +static void __devinit init_hrtimers_cpu(int cpu) +{ + struct hrtimer_base *base = per_cpu(hrtimer_bases, cpu); + int i; + + for (i = 0; i < MAX_HRTIMER_BASES; i++) { + spin_lock_init(&base->lock); + INIT_LIST_HEAD(&base->pending); + base++; + } +} + +#ifdef CONFIG_HOTPLUG_CPU + +static void migrate_hrtimer_list(struct hrtimer_base *old_base, + struct hrtimer_base *new_base) +{ + struct hrtimer *timer; + struct rb_node *node; + + while ((node = rb_first(&old_base->active))) { + timer = rb_entry(node, struct hrtimer, node); + __remove_hrtimer(timer, old_base); + timer->base = new_base; + enqueue_hrtimer(timer, new_base); + } +} + +static void migrate_hrtimers(int cpu) +{ + struct hrtimer_base *old_base, *new_base; + int i; + + BUG_ON(cpu_online(cpu)); + old_base = per_cpu(hrtimer_bases, cpu); + new_base = get_cpu_var(hrtimer_bases); + + local_irq_disable(); + + for (i = 0; i < MAX_HRTIMER_BASES; i++) { + + spin_lock(&new_base->lock); + spin_lock(&old_base->lock); + + BUG_ON(old_base->curr_timer); + + migrate_hrtimer_list(old_base, new_base); + + spin_unlock(&old_base->lock); + spin_unlock(&new_base->lock); + old_base++; + new_base++; + } + + local_irq_enable(); + put_cpu_var(hrtimer_bases); +} +#endif /* CONFIG_HOTPLUG_CPU */ + +static int __devinit hrtimer_cpu_notify(struct notifier_block *self, + unsigned long action, void *hcpu) +{ + long cpu = (long)hcpu; + + switch (action) { + + case CPU_UP_PREPARE: + init_hrtimers_cpu(cpu); + break; + +#ifdef CONFIG_HOTPLUG_CPU + case CPU_DEAD: + migrate_hrtimers(cpu); + break; +#endif + + default: + break; + } + + return NOTIFY_OK; +} + +static struct notifier_block __devinitdata hrtimers_nb = { + .notifier_call = hrtimer_cpu_notify, +}; + +void __init hrtimers_init(void) +{ + hrtimer_cpu_notify(&hrtimers_nb, (unsigned long)CPU_UP_PREPARE, + (void *)(long)smp_processor_id()); + register_cpu_notifier(&hrtimers_nb); +} + diff --git a/kernel/timer.c b/kernel/timer.c index 074b4bd5cfd8..80bf2acf6b08 100644 --- a/kernel/timer.c +++ b/kernel/timer.c @@ -858,6 +858,7 @@ static void run_timer_softirq(struct softirq_action *h) { tvec_base_t *base = &__get_cpu_var(tvec_bases); + hrtimer_run_queues(); if (time_after_eq(jiffies, base->timer_jiffies)) __run_timers(base); } -- cgit v1.2.3-71-gd317 From 2ff678b8da6478d861c1b0ecb3ac14575760e906 Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Mon, 9 Jan 2006 20:52:34 -0800 Subject: [PATCH] hrtimer: switch itimers to hrtimer switch itimers to a hrtimers-based implementation Signed-off-by: Thomas Gleixner Signed-off-by: Ingo Molnar Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- fs/exec.c | 6 +-- fs/proc/array.c | 6 +-- include/linux/sched.h | 5 ++- include/linux/timer.h | 2 +- kernel/exit.c | 2 +- kernel/fork.c | 6 +-- kernel/itimer.c | 106 ++++++++++++++++++++++++-------------------------- 7 files changed, 65 insertions(+), 68 deletions(-) (limited to 'include/linux') diff --git a/fs/exec.c b/fs/exec.c index fd02ea4a81e9..b5bcf1aae0ab 100644 --- a/fs/exec.c +++ b/fs/exec.c @@ -632,10 +632,10 @@ static inline int de_thread(struct task_struct *tsk) * synchronize with any firing (by calling del_timer_sync) * before we can safely let the old group leader die. */ - sig->real_timer.data = (unsigned long)current; + sig->real_timer.data = current; spin_unlock_irq(lock); - if (del_timer_sync(&sig->real_timer)) - add_timer(&sig->real_timer); + if (hrtimer_cancel(&sig->real_timer)) + hrtimer_restart(&sig->real_timer); spin_lock_irq(lock); } while (atomic_read(&sig->count) > count) { diff --git a/fs/proc/array.c b/fs/proc/array.c index 5e9251f65317..7eb1bd7f800c 100644 --- a/fs/proc/array.c +++ b/fs/proc/array.c @@ -330,7 +330,7 @@ static int do_task_stat(struct task_struct *task, char * buffer, int whole) unsigned long min_flt = 0, maj_flt = 0; cputime_t cutime, cstime, utime, stime; unsigned long rsslim = 0; - unsigned long it_real_value = 0; + DEFINE_KTIME(it_real_value); struct task_struct *t; char tcomm[sizeof(task->comm)]; @@ -386,7 +386,7 @@ static int do_task_stat(struct task_struct *task, char * buffer, int whole) utime = cputime_add(utime, task->signal->utime); stime = cputime_add(stime, task->signal->stime); } - it_real_value = task->signal->it_real_value; + it_real_value = task->signal->real_timer.expires; } ppid = pid_alive(task) ? task->group_leader->real_parent->tgid : 0; read_unlock(&tasklist_lock); @@ -435,7 +435,7 @@ static int do_task_stat(struct task_struct *task, char * buffer, int whole) priority, nice, num_threads, - jiffies_to_clock_t(it_real_value), + (long) ktime_to_clock_t(it_real_value), start_time, vsize, mm ? get_mm_rss(mm) : 0, diff --git a/include/linux/sched.h b/include/linux/sched.h index 85b53f87c703..ee4677ad204e 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h @@ -105,6 +105,7 @@ extern unsigned long nr_iowait(void); #include #include #include +#include #include @@ -398,8 +399,8 @@ struct signal_struct { struct list_head posix_timers; /* ITIMER_REAL timer for the process */ - struct timer_list real_timer; - unsigned long it_real_value, it_real_incr; + struct hrtimer real_timer; + ktime_t it_real_incr; /* ITIMER_PROF and ITIMER_VIRTUAL timers for the process */ cputime_t it_prof_expires, it_virt_expires; diff --git a/include/linux/timer.h b/include/linux/timer.h index 72f3a7781106..9b9877fd2505 100644 --- a/include/linux/timer.h +++ b/include/linux/timer.h @@ -96,6 +96,6 @@ static inline void add_timer(struct timer_list *timer) extern void init_timers(void); extern void run_local_timers(void); -extern void it_real_fn(unsigned long); +extern int it_real_fn(void *); #endif diff --git a/kernel/exit.c b/kernel/exit.c index 309a46fa16f8..e75a51f33768 100644 --- a/kernel/exit.c +++ b/kernel/exit.c @@ -842,7 +842,7 @@ fastcall NORET_TYPE void do_exit(long code) } group_dead = atomic_dec_and_test(&tsk->signal->live); if (group_dead) { - del_timer_sync(&tsk->signal->real_timer); + hrtimer_cancel(&tsk->signal->real_timer); exit_itimers(tsk->signal); acct_process(code); } diff --git a/kernel/fork.c b/kernel/fork.c index b18d64554feb..3bdcab49998d 100644 --- a/kernel/fork.c +++ b/kernel/fork.c @@ -801,10 +801,10 @@ static inline int copy_signal(unsigned long clone_flags, struct task_struct * ts init_sigpending(&sig->shared_pending); INIT_LIST_HEAD(&sig->posix_timers); - sig->it_real_value = sig->it_real_incr = 0; + hrtimer_init(&sig->real_timer, CLOCK_MONOTONIC); + sig->it_real_incr.tv64 = 0; sig->real_timer.function = it_real_fn; - sig->real_timer.data = (unsigned long) tsk; - init_timer(&sig->real_timer); + sig->real_timer.data = tsk; sig->it_virt_expires = cputime_zero; sig->it_virt_incr = cputime_zero; diff --git a/kernel/itimer.c b/kernel/itimer.c index 7c1b25e25e47..c2c05c4ff28d 100644 --- a/kernel/itimer.c +++ b/kernel/itimer.c @@ -12,36 +12,46 @@ #include #include #include +#include #include -static unsigned long it_real_value(struct signal_struct *sig) +/** + * itimer_get_remtime - get remaining time for the timer + * + * @timer: the timer to read + * + * Returns the delta between the expiry time and now, which can be + * less than zero or 1usec for an pending expired timer + */ +static struct timeval itimer_get_remtime(struct hrtimer *timer) { - unsigned long val = 0; - if (timer_pending(&sig->real_timer)) { - val = sig->real_timer.expires - jiffies; + ktime_t rem = hrtimer_get_remaining(timer); - /* look out for negative/zero itimer.. */ - if ((long) val <= 0) - val = 1; - } - return val; + /* + * Racy but safe: if the itimer expires after the above + * hrtimer_get_remtime() call but before this condition + * then we return 0 - which is correct. + */ + if (hrtimer_active(timer)) { + if (rem.tv64 <= 0) + rem.tv64 = NSEC_PER_USEC; + } else + rem.tv64 = 0; + + return ktime_to_timeval(rem); } int do_getitimer(int which, struct itimerval *value) { struct task_struct *tsk = current; - unsigned long interval, val; cputime_t cinterval, cval; switch (which) { case ITIMER_REAL: - spin_lock_irq(&tsk->sighand->siglock); - interval = tsk->signal->it_real_incr; - val = it_real_value(tsk->signal); - spin_unlock_irq(&tsk->sighand->siglock); - jiffies_to_timeval(val, &value->it_value); - jiffies_to_timeval(interval, &value->it_interval); + value->it_value = itimer_get_remtime(&tsk->signal->real_timer); + value->it_interval = + ktime_to_timeval(tsk->signal->it_real_incr); break; case ITIMER_VIRTUAL: read_lock(&tasklist_lock); @@ -113,59 +123,45 @@ asmlinkage long sys_getitimer(int which, struct itimerval __user *value) } -void it_real_fn(unsigned long __data) +/* + * The timer is automagically restarted, when interval != 0 + */ +int it_real_fn(void *data) { - struct task_struct * p = (struct task_struct *) __data; - unsigned long inc = p->signal->it_real_incr; + struct task_struct *tsk = (struct task_struct *) data; - send_group_sig_info(SIGALRM, SEND_SIG_PRIV, p); + send_group_sig_info(SIGALRM, SEND_SIG_PRIV, tsk); - /* - * Now restart the timer if necessary. We don't need any locking - * here because do_setitimer makes sure we have finished running - * before it touches anything. - * Note, we KNOW we are (or should be) at a jiffie edge here so - * we don't need the +1 stuff. Also, we want to use the prior - * expire value so as to not "slip" a jiffie if we are late. - * Deal with requesting a time prior to "now" here rather than - * in add_timer. - */ - if (!inc) - return; - while (time_before_eq(p->signal->real_timer.expires, jiffies)) - p->signal->real_timer.expires += inc; - add_timer(&p->signal->real_timer); + if (tsk->signal->it_real_incr.tv64 != 0) { + hrtimer_forward(&tsk->signal->real_timer, + tsk->signal->it_real_incr); + + return HRTIMER_RESTART; + } + return HRTIMER_NORESTART; } int do_setitimer(int which, struct itimerval *value, struct itimerval *ovalue) { struct task_struct *tsk = current; - unsigned long val, interval, expires; + struct hrtimer *timer; + ktime_t expires; cputime_t cval, cinterval, nval, ninterval; switch (which) { case ITIMER_REAL: -again: - spin_lock_irq(&tsk->sighand->siglock); - interval = tsk->signal->it_real_incr; - val = it_real_value(tsk->signal); - /* We are sharing ->siglock with it_real_fn() */ - if (try_to_del_timer_sync(&tsk->signal->real_timer) < 0) { - spin_unlock_irq(&tsk->sighand->siglock); - goto again; - } - tsk->signal->it_real_incr = - timeval_to_jiffies(&value->it_interval); - expires = timeval_to_jiffies(&value->it_value); - if (expires) - mod_timer(&tsk->signal->real_timer, - jiffies + 1 + expires); - spin_unlock_irq(&tsk->sighand->siglock); + timer = &tsk->signal->real_timer; + hrtimer_cancel(timer); if (ovalue) { - jiffies_to_timeval(val, &ovalue->it_value); - jiffies_to_timeval(interval, - &ovalue->it_interval); + ovalue->it_value = itimer_get_remtime(timer); + ovalue->it_interval + = ktime_to_timeval(tsk->signal->it_real_incr); } + tsk->signal->it_real_incr = + timeval_to_ktime(value->it_interval); + expires = timeval_to_ktime(value->it_value); + if (expires.tv64 != 0) + hrtimer_start(timer, expires, HRTIMER_REL); break; case ITIMER_VIRTUAL: nval = timeval_to_cputime(&value->it_value); -- cgit v1.2.3-71-gd317 From 10c94ec16dd187f8d8dfdbb088e98330c05bf03c Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Mon, 9 Jan 2006 20:52:35 -0800 Subject: [PATCH] hrtimer: create hrtimer nanosleep API introduce the hrtimer_nanosleep() and hrtimer_nanosleep_real() APIs. Not yet used by any code. Signed-off-by: Thomas Gleixner Signed-off-by: Ingo Molnar Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/hrtimer.h | 6 +++ kernel/hrtimer.c | 127 ++++++++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 133 insertions(+) (limited to 'include/linux') diff --git a/include/linux/hrtimer.h b/include/linux/hrtimer.h index 64f8d554fbb8..2ac20b48b2f3 100644 --- a/include/linux/hrtimer.h +++ b/include/linux/hrtimer.h @@ -121,6 +121,12 @@ static inline int hrtimer_active(const struct hrtimer *timer) extern unsigned long hrtimer_forward(struct hrtimer *timer, const ktime_t interval); +/* Precise sleep: */ +extern long hrtimer_nanosleep(struct timespec *rqtp, + struct timespec __user *rmtp, + const enum hrtimer_mode mode, + const clockid_t clockid); + /* Soft interrupt function to run the hrtimer queues: */ extern void hrtimer_run_queues(void); diff --git a/kernel/hrtimer.c b/kernel/hrtimer.c index 690efd9d9adf..64d37a3c5948 100644 --- a/kernel/hrtimer.c +++ b/kernel/hrtimer.c @@ -580,6 +580,133 @@ void hrtimer_run_queues(void) run_hrtimer_queue(&base[i]); } +/* + * Sleep related functions: + */ + +/** + * schedule_hrtimer - sleep until timeout + * + * @timer: hrtimer variable initialized with the correct clock base + * @mode: timeout value is abs/rel + * + * Make the current task sleep until @timeout is + * elapsed. + * + * You can set the task state as follows - + * + * %TASK_UNINTERRUPTIBLE - at least @timeout is guaranteed to + * pass before the routine returns. The routine will return 0 + * + * %TASK_INTERRUPTIBLE - the routine may return early if a signal is + * delivered to the current task. In this case the remaining time + * will be returned + * + * The current task state is guaranteed to be TASK_RUNNING when this + * routine returns. + */ +static ktime_t __sched +schedule_hrtimer(struct hrtimer *timer, const enum hrtimer_mode mode) +{ + /* fn stays NULL, meaning single-shot wakeup: */ + timer->data = current; + + hrtimer_start(timer, timer->expires, mode); + + schedule(); + hrtimer_cancel(timer); + + /* Return the remaining time: */ + if (timer->state != HRTIMER_EXPIRED) + return ktime_sub(timer->expires, timer->base->get_time()); + else + return (ktime_t) {.tv64 = 0 }; +} + +static inline ktime_t __sched +schedule_hrtimer_interruptible(struct hrtimer *timer, + const enum hrtimer_mode mode) +{ + set_current_state(TASK_INTERRUPTIBLE); + + return schedule_hrtimer(timer, mode); +} + +static long __sched +nanosleep_restart(struct restart_block *restart, clockid_t clockid) +{ + struct timespec __user *rmtp, tu; + void *rfn_save = restart->fn; + struct hrtimer timer; + ktime_t rem; + + restart->fn = do_no_restart_syscall; + + hrtimer_init(&timer, clockid); + + timer.expires.tv64 = ((u64)restart->arg1 << 32) | (u64) restart->arg0; + + rem = schedule_hrtimer_interruptible(&timer, HRTIMER_ABS); + + if (rem.tv64 <= 0) + return 0; + + rmtp = (struct timespec __user *) restart->arg2; + tu = ktime_to_timespec(rem); + if (rmtp && copy_to_user(rmtp, &tu, sizeof(tu))) + return -EFAULT; + + restart->fn = rfn_save; + + /* The other values in restart are already filled in */ + return -ERESTART_RESTARTBLOCK; +} + +static long __sched nanosleep_restart_mono(struct restart_block *restart) +{ + return nanosleep_restart(restart, CLOCK_MONOTONIC); +} + +static long __sched nanosleep_restart_real(struct restart_block *restart) +{ + return nanosleep_restart(restart, CLOCK_REALTIME); +} + +long hrtimer_nanosleep(struct timespec *rqtp, struct timespec __user *rmtp, + const enum hrtimer_mode mode, const clockid_t clockid) +{ + struct restart_block *restart; + struct hrtimer timer; + struct timespec tu; + ktime_t rem; + + hrtimer_init(&timer, clockid); + + timer.expires = timespec_to_ktime(*rqtp); + + rem = schedule_hrtimer_interruptible(&timer, mode); + if (rem.tv64 <= 0) + return 0; + + /* Absolute timers do not update the rmtp value: */ + if (mode == HRTIMER_ABS) + return -ERESTARTNOHAND; + + tu = ktime_to_timespec(rem); + + if (rmtp && copy_to_user(rmtp, &tu, sizeof(tu))) + return -EFAULT; + + restart = ¤t_thread_info()->restart_block; + restart->fn = (clockid == CLOCK_MONOTONIC) ? + nanosleep_restart_mono : nanosleep_restart_real; + restart->arg0 = timer.expires.tv64 & 0xFFFFFFFF; + restart->arg1 = timer.expires.tv64 >> 32; + restart->arg2 = (unsigned long) rmtp; + + return -ERESTART_RESTARTBLOCK; +} + /* * Functions related to boot-time initialization: */ -- cgit v1.2.3-71-gd317 From 97735f25d2ba898ec5e13746451525580631c834 Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Mon, 9 Jan 2006 20:52:37 -0800 Subject: [PATCH] hrtimer: switch clock_nanosleep to hrtimer nanosleep API Switch clock_nanosleep to use the new nanosleep functions in hrtimer.c Signed-off-by: Thomas Gleixner Signed-off-by: Ingo Molnar Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/posix-timers.h | 7 +- kernel/posix-cpu-timers.c | 23 ++++--- kernel/posix-timers.c | 151 ++++++++----------------------------------- 3 files changed, 45 insertions(+), 136 deletions(-) (limited to 'include/linux') diff --git a/include/linux/posix-timers.h b/include/linux/posix-timers.h index ae51473d3d48..3c0a5beb7f0d 100644 --- a/include/linux/posix-timers.h +++ b/include/linux/posix-timers.h @@ -81,7 +81,7 @@ struct k_clock { int (*clock_get) (const clockid_t which_clock, struct timespec * tp); int (*timer_create) (struct k_itimer *timer); int (*nsleep) (const clockid_t which_clock, int flags, - struct timespec *); + struct timespec *, struct timespec __user *); int (*timer_set) (struct k_itimer * timr, int flags, struct itimerspec * new_setting, struct itimerspec * old_setting); @@ -95,7 +95,8 @@ void register_posix_clock(const clockid_t clock_id, struct k_clock *new_clock); /* error handlers for timer_create, nanosleep and settime */ int do_posix_clock_notimer_create(struct k_itimer *timer); -int do_posix_clock_nonanosleep(const clockid_t, int flags, struct timespec *); +int do_posix_clock_nonanosleep(const clockid_t, int flags, struct timespec *, + struct timespec __user *); int do_posix_clock_nosettime(const clockid_t, struct timespec *tp); /* function to call to trigger timer event */ @@ -129,7 +130,7 @@ int posix_cpu_clock_get(const clockid_t which_clock, struct timespec *ts); int posix_cpu_clock_set(const clockid_t which_clock, const struct timespec *ts); int posix_cpu_timer_create(struct k_itimer *timer); int posix_cpu_nsleep(const clockid_t which_clock, int flags, - struct timespec *ts); + struct timespec *rqtp, struct timespec __user *rmtp); int posix_cpu_timer_set(struct k_itimer *timer, int flags, struct itimerspec *new, struct itimerspec *old); int posix_cpu_timer_del(struct k_itimer *timer); diff --git a/kernel/posix-cpu-timers.c b/kernel/posix-cpu-timers.c index abf6990c6eb5..520f6c59948d 100644 --- a/kernel/posix-cpu-timers.c +++ b/kernel/posix-cpu-timers.c @@ -1400,7 +1400,7 @@ void set_process_cpu_timer(struct task_struct *tsk, unsigned int clock_idx, static long posix_cpu_clock_nanosleep_restart(struct restart_block *); int posix_cpu_nsleep(const clockid_t which_clock, int flags, - struct timespec *rqtp) + struct timespec *rqtp, struct timespec __user *rmtp) { struct restart_block *restart_block = ¤t_thread_info()->restart_block; @@ -1425,7 +1425,6 @@ int posix_cpu_nsleep(const clockid_t which_clock, int flags, error = posix_cpu_timer_create(&timer); timer.it_process = current; if (!error) { - struct timespec __user *rmtp; static struct itimerspec zero_it; struct itimerspec it = { .it_value = *rqtp, .it_interval = {} }; @@ -1472,7 +1471,6 @@ int posix_cpu_nsleep(const clockid_t which_clock, int flags, /* * Report back to the user the time still remaining. */ - rmtp = (struct timespec __user *) restart_block->arg1; if (rmtp != NULL && !(flags & TIMER_ABSTIME) && copy_to_user(rmtp, &it.it_value, sizeof *rmtp)) return -EFAULT; @@ -1480,6 +1478,7 @@ int posix_cpu_nsleep(const clockid_t which_clock, int flags, restart_block->fn = posix_cpu_clock_nanosleep_restart; /* Caller already set restart_block->arg1 */ restart_block->arg0 = which_clock; + restart_block->arg1 = (unsigned long) rmtp; restart_block->arg2 = rqtp->tv_sec; restart_block->arg3 = rqtp->tv_nsec; @@ -1493,10 +1492,15 @@ static long posix_cpu_clock_nanosleep_restart(struct restart_block *restart_block) { clockid_t which_clock = restart_block->arg0; - struct timespec t = { .tv_sec = restart_block->arg2, - .tv_nsec = restart_block->arg3 }; + struct timespec __user *rmtp; + struct timespec t; + + rmtp = (struct timespec __user *) restart_block->arg1; + t.tv_sec = restart_block->arg2; + t.tv_nsec = restart_block->arg3; + restart_block->fn = do_no_restart_syscall; - return posix_cpu_nsleep(which_clock, TIMER_ABSTIME, &t); + return posix_cpu_nsleep(which_clock, TIMER_ABSTIME, &t, rmtp); } @@ -1519,9 +1523,10 @@ static int process_cpu_timer_create(struct k_itimer *timer) return posix_cpu_timer_create(timer); } static int process_cpu_nsleep(const clockid_t which_clock, int flags, - struct timespec *rqtp) + struct timespec *rqtp, + struct timespec __user *rmtp) { - return posix_cpu_nsleep(PROCESS_CLOCK, flags, rqtp); + return posix_cpu_nsleep(PROCESS_CLOCK, flags, rqtp, rmtp); } static int thread_cpu_clock_getres(const clockid_t which_clock, struct timespec *tp) @@ -1539,7 +1544,7 @@ static int thread_cpu_timer_create(struct k_itimer *timer) return posix_cpu_timer_create(timer); } static int thread_cpu_nsleep(const clockid_t which_clock, int flags, - struct timespec *rqtp) + struct timespec *rqtp, struct timespec __user *rmtp) { return -EINVAL; } diff --git a/kernel/posix-timers.c b/kernel/posix-timers.c index 6b851a1bf4b0..ba900587b815 100644 --- a/kernel/posix-timers.c +++ b/kernel/posix-timers.c @@ -209,7 +209,8 @@ static inline int common_timer_create(struct k_itimer *new_timer) /* * These ones are defined below. */ -static int common_nsleep(const clockid_t, int flags, struct timespec *t); +static int common_nsleep(const clockid_t, int flags, struct timespec *t, + struct timespec __user *rmtp); static void common_timer_get(struct k_itimer *, struct itimerspec *); static int common_timer_set(struct k_itimer *, int, struct itimerspec *, struct itimerspec *); @@ -1227,7 +1228,7 @@ int do_posix_clock_notimer_create(struct k_itimer *timer) EXPORT_SYMBOL_GPL(do_posix_clock_notimer_create); int do_posix_clock_nonanosleep(const clockid_t clock, int flags, - struct timespec *t) + struct timespec *t, struct timespec __user *r) { #ifndef ENOTSUP return -EOPNOTSUPP; /* aka ENOTSUP in userland for POSIX */ @@ -1387,7 +1388,28 @@ void clock_was_set(void) up(&clock_was_set_lock); } -long clock_nanosleep_restart(struct restart_block *restart_block); +/* + * nanosleep for monotonic and realtime clocks + */ +static int common_nsleep(const clockid_t which_clock, int flags, + struct timespec *tsave, struct timespec __user *rmtp) +{ + int mode = flags & TIMER_ABSTIME ? HRTIMER_ABS : HRTIMER_REL; + int clockid = which_clock; + + switch (which_clock) { + case CLOCK_REALTIME: + /* Posix madness. Only absolute timers on clock realtime + are affected by clock set. */ + if (mode == HRTIMER_ABS) + clockid = CLOCK_MONOTONIC; + case CLOCK_MONOTONIC: + break; + default: + return -EINVAL; + } + return hrtimer_nanosleep(tsave, rmtp, mode, clockid); +} asmlinkage long sys_clock_nanosleep(const clockid_t which_clock, int flags, @@ -1395,9 +1417,6 @@ sys_clock_nanosleep(const clockid_t which_clock, int flags, struct timespec __user *rmtp) { struct timespec t; - struct restart_block *restart_block = - &(current_thread_info()->restart_block); - int ret; if (invalid_clockid(which_clock)) return -EINVAL; @@ -1408,122 +1427,6 @@ sys_clock_nanosleep(const clockid_t which_clock, int flags, if (!timespec_valid(&t)) return -EINVAL; - /* - * Do this here as nsleep function does not have the real address. - */ - restart_block->arg1 = (unsigned long)rmtp; - - ret = CLOCK_DISPATCH(which_clock, nsleep, (which_clock, flags, &t)); - - if ((ret == -ERESTART_RESTARTBLOCK) && rmtp && - copy_to_user(rmtp, &t, sizeof (t))) - return -EFAULT; - return ret; -} - - -static int common_nsleep(const clockid_t which_clock, - int flags, struct timespec *tsave) -{ - struct timespec t, dum; - DECLARE_WAITQUEUE(abs_wqueue, current); - u64 rq_time = (u64)0; - s64 left; - int abs; - struct restart_block *restart_block = - ¤t_thread_info()->restart_block; - - abs_wqueue.flags = 0; - abs = flags & TIMER_ABSTIME; - - if (restart_block->fn == clock_nanosleep_restart) { - /* - * Interrupted by a non-delivered signal, pick up remaining - * time and continue. Remaining time is in arg2 & 3. - */ - restart_block->fn = do_no_restart_syscall; - - rq_time = restart_block->arg3; - rq_time = (rq_time << 32) + restart_block->arg2; - if (!rq_time) - return -EINTR; - left = rq_time - get_jiffies_64(); - if (left <= (s64)0) - return 0; /* Already passed */ - } - - if (abs && (posix_clocks[which_clock].clock_get != - posix_clocks[CLOCK_MONOTONIC].clock_get)) - add_wait_queue(&nanosleep_abs_wqueue, &abs_wqueue); - - do { - t = *tsave; - if (abs || !rq_time) { - adjust_abs_time(&posix_clocks[which_clock], &t, abs, - &rq_time, &dum); - } - - left = rq_time - get_jiffies_64(); - if (left >= (s64)MAX_JIFFY_OFFSET) - left = (s64)MAX_JIFFY_OFFSET; - if (left < (s64)0) - break; - - schedule_timeout_interruptible(left); - - left = rq_time - get_jiffies_64(); - } while (left > (s64)0 && !test_thread_flag(TIF_SIGPENDING)); - - if (abs_wqueue.task_list.next) - finish_wait(&nanosleep_abs_wqueue, &abs_wqueue); - - if (left > (s64)0) { - - /* - * Always restart abs calls from scratch to pick up any - * clock shifting that happened while we are away. - */ - if (abs) - return -ERESTARTNOHAND; - - left *= TICK_NSEC; - tsave->tv_sec = div_long_long_rem(left, - NSEC_PER_SEC, - &tsave->tv_nsec); - /* - * Restart works by saving the time remaing in - * arg2 & 3 (it is 64-bits of jiffies). The other - * info we need is the clock_id (saved in arg0). - * The sys_call interface needs the users - * timespec return address which _it_ saves in arg1. - * Since we have cast the nanosleep call to a clock_nanosleep - * both can be restarted with the same code. - */ - restart_block->fn = clock_nanosleep_restart; - restart_block->arg0 = which_clock; - /* - * Caller sets arg1 - */ - restart_block->arg2 = rq_time & 0xffffffffLL; - restart_block->arg3 = rq_time >> 32; - - return -ERESTART_RESTARTBLOCK; - } - - return 0; -} -/* - * This will restart clock_nanosleep. - */ -long -clock_nanosleep_restart(struct restart_block *restart_block) -{ - struct timespec t; - int ret = common_nsleep(restart_block->arg0, 0, &t); - - if ((ret == -ERESTART_RESTARTBLOCK) && restart_block->arg1 && - copy_to_user((struct timespec __user *)(restart_block->arg1), &t, - sizeof (t))) - return -EFAULT; - return ret; + return CLOCK_DISPATCH(which_clock, nsleep, + (which_clock, flags, &t, rmtp)); } -- cgit v1.2.3-71-gd317 From becf8b5d00f4b47e847f98322cdaf8cd16243861 Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Mon, 9 Jan 2006 20:52:38 -0800 Subject: [PATCH] hrtimer: convert posix timers completely - convert posix-timers.c to use hrtimers - remove the now obsolete abslist code Signed-off-by: Thomas Gleixner Signed-off-by: Ingo Molnar Signed-off-by: Miklos Szeredi Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- arch/um/kernel/time.c | 7 +- include/linux/hrtimer.h | 7 + include/linux/posix-timers.h | 37 +-- include/linux/time.h | 3 +- kernel/posix-timers.c | 717 ++++++++----------------------------------- 5 files changed, 149 insertions(+), 622 deletions(-) (limited to 'include/linux') diff --git a/arch/um/kernel/time.c b/arch/um/kernel/time.c index 11f518a7e156..8fa2ae7f3026 100644 --- a/arch/um/kernel/time.c +++ b/arch/um/kernel/time.c @@ -99,7 +99,8 @@ void uml_idle_timer(void) set_interval(ITIMER_REAL); } -extern int do_posix_clock_monotonic_gettime(struct timespec *tp); +extern void ktime_get_ts(struct timespec *ts); +#define do_posix_clock_monotonic_gettime(ts) ktime_get_ts(ts) void time_init(void) { @@ -114,8 +115,8 @@ void time_init(void) wall_to_monotonic.tv_nsec = -now.tv_nsec; } -/* Declared in linux/time.h, which can't be included here */ -extern void clock_was_set(void); +/* Defined in linux/ktimer.h, which can't be included here */ +#define clock_was_set() do { } while (0) void do_gettimeofday(struct timeval *tv) { diff --git a/include/linux/hrtimer.h b/include/linux/hrtimer.h index 2ac20b48b2f3..cf5cfdf8d613 100644 --- a/include/linux/hrtimer.h +++ b/include/linux/hrtimer.h @@ -93,6 +93,13 @@ struct hrtimer_base { struct hrtimer *curr_timer; }; +/* + * clock_was_set() is a NOP for non- high-resolution systems. The + * time-sorted order guarantees that a timer does not expire early and + * is expired in the next softirq when the clock was advanced. + */ +#define clock_was_set() do { } while (0) + /* Exported timer functions: */ /* Initialize timers: */ diff --git a/include/linux/posix-timers.h b/include/linux/posix-timers.h index 3c0a5beb7f0d..54faf5236da0 100644 --- a/include/linux/posix-timers.h +++ b/include/linux/posix-timers.h @@ -51,12 +51,8 @@ struct k_itimer { struct sigqueue *sigq; /* signal queue entry. */ union { struct { - struct timer_list timer; - /* clock abs_timer_list: */ - struct list_head abs_timer_entry; - /* wall_to_monotonic used when set: */ - struct timespec wall_to_prev; - unsigned long incr; /* interval in jiffies */ + struct hrtimer timer; + ktime_t interval; } real; struct cpu_timer_list cpu; struct { @@ -68,15 +64,9 @@ struct k_itimer { } it; }; -struct k_clock_abs { - struct list_head list; - spinlock_t lock; -}; - struct k_clock { int res; /* in nanoseconds */ int (*clock_getres) (const clockid_t which_clock, struct timespec *tp); - struct k_clock_abs *abs_struct; int (*clock_set) (const clockid_t which_clock, struct timespec * tp); int (*clock_get) (const clockid_t which_clock, struct timespec * tp); int (*timer_create) (struct k_itimer *timer); @@ -102,29 +92,6 @@ int do_posix_clock_nosettime(const clockid_t, struct timespec *tp); /* function to call to trigger timer event */ int posix_timer_event(struct k_itimer *timr, int si_private); -struct now_struct { - unsigned long jiffies; -}; - -#define posix_get_now(now) \ - do { (now)->jiffies = jiffies; } while (0) - -#define posix_time_before(timer, now) \ - time_before((timer)->expires, (now)->jiffies) - -#define posix_bump_timer(timr, now) \ - do { \ - long delta, orun; \ - \ - delta = (now).jiffies - (timr)->it.real.timer.expires; \ - if (delta >= 0) { \ - orun = 1 + (delta / (timr)->it.real.incr); \ - (timr)->it.real.timer.expires += \ - orun * (timr)->it.real.incr; \ - (timr)->it_overrun += orun; \ - } \ - } while (0) - int posix_cpu_clock_getres(const clockid_t which_clock, struct timespec *ts); int posix_cpu_clock_get(const clockid_t which_clock, struct timespec *ts); int posix_cpu_clock_set(const clockid_t which_clock, const struct timespec *ts); diff --git a/include/linux/time.h b/include/linux/time.h index f639fde29253..1201155b2202 100644 --- a/include/linux/time.h +++ b/include/linux/time.h @@ -73,8 +73,7 @@ struct timespec current_kernel_time(void); extern void do_gettimeofday(struct timeval *tv); extern int do_settimeofday(struct timespec *tv); extern int do_sys_settimeofday(struct timespec *tv, struct timezone *tz); -extern void clock_was_set(void); // call whenever the clock is set -extern int do_posix_clock_monotonic_gettime(struct timespec *tp); +#define do_posix_clock_monotonic_gettime(ts) ktime_get_ts(ts) extern long do_utimes(char __user *filename, struct timeval *times); struct itimerval; extern int do_setitimer(int which, struct itimerval *value, diff --git a/kernel/posix-timers.c b/kernel/posix-timers.c index ba900587b815..9e66e614862a 100644 --- a/kernel/posix-timers.c +++ b/kernel/posix-timers.c @@ -35,7 +35,6 @@ #include #include #include -#include #include #include @@ -49,12 +48,6 @@ #include #include -#define CLOCK_REALTIME_RES TICK_NSEC /* In nano seconds. */ - -static inline u64 mpy_l_X_l_ll(unsigned long mpy1,unsigned long mpy2) -{ - return (u64)mpy1 * mpy2; -} /* * Management arrays for POSIX timers. Timers are kept in slab memory * Timer ids are allocated by an external routine that keeps track of the @@ -140,18 +133,18 @@ static DEFINE_SPINLOCK(idr_lock); */ static struct k_clock posix_clocks[MAX_CLOCKS]; + /* - * We only have one real clock that can be set so we need only one abs list, - * even if we should want to have several clocks with differing resolutions. + * These ones are defined below. */ -static struct k_clock_abs abs_list = {.list = LIST_HEAD_INIT(abs_list.list), - .lock = SPIN_LOCK_UNLOCKED}; +static int common_nsleep(const clockid_t, int flags, struct timespec *t, + struct timespec __user *rmtp); +static void common_timer_get(struct k_itimer *, struct itimerspec *); +static int common_timer_set(struct k_itimer *, int, + struct itimerspec *, struct itimerspec *); +static int common_timer_del(struct k_itimer *timer); -static void posix_timer_fn(unsigned long); -static u64 do_posix_clock_monotonic_gettime_parts( - struct timespec *tp, struct timespec *mo); -int do_posix_clock_monotonic_gettime(struct timespec *tp); -static int do_posix_clock_monotonic_get(const clockid_t, struct timespec *tp); +static int posix_timer_fn(void *data); static struct k_itimer *lock_timer(timer_t timer_id, unsigned long *flags); @@ -184,10 +177,12 @@ static inline int common_clock_getres(const clockid_t which_clock, return 0; } -static inline int common_clock_get(const clockid_t which_clock, - struct timespec *tp) +/* + * Get real time for posix timers + */ +static int common_clock_get(clockid_t which_clock, struct timespec *tp) { - getnstimeofday(tp); + ktime_get_real_ts(tp); return 0; } @@ -199,25 +194,14 @@ static inline int common_clock_set(const clockid_t which_clock, static inline int common_timer_create(struct k_itimer *new_timer) { - INIT_LIST_HEAD(&new_timer->it.real.abs_timer_entry); - init_timer(&new_timer->it.real.timer); - new_timer->it.real.timer.data = (unsigned long) new_timer; + hrtimer_init(&new_timer->it.real.timer, new_timer->it_clock); + new_timer->it.real.timer.data = new_timer; new_timer->it.real.timer.function = posix_timer_fn; return 0; } /* - * These ones are defined below. - */ -static int common_nsleep(const clockid_t, int flags, struct timespec *t, - struct timespec __user *rmtp); -static void common_timer_get(struct k_itimer *, struct itimerspec *); -static int common_timer_set(struct k_itimer *, int, - struct itimerspec *, struct itimerspec *); -static int common_timer_del(struct k_itimer *timer); - -/* - * Return nonzero iff we know a priori this clockid_t value is bogus. + * Return nonzero if we know a priori this clockid_t value is bogus. */ static inline int invalid_clockid(const clockid_t which_clock) { @@ -227,26 +211,32 @@ static inline int invalid_clockid(const clockid_t which_clock) return 1; if (posix_clocks[which_clock].clock_getres != NULL) return 0; -#ifndef CLOCK_DISPATCH_DIRECT if (posix_clocks[which_clock].res != 0) return 0; -#endif return 1; } +/* + * Get monotonic time for posix timers + */ +static int posix_ktime_get_ts(clockid_t which_clock, struct timespec *tp) +{ + ktime_get_ts(tp); + return 0; +} /* * Initialize everything, well, just everything in Posix clocks/timers ;) */ static __init int init_posix_timers(void) { - struct k_clock clock_realtime = {.res = CLOCK_REALTIME_RES, - .abs_struct = &abs_list + struct k_clock clock_realtime = { + .clock_getres = hrtimer_get_res, }; - struct k_clock clock_monotonic = {.res = CLOCK_REALTIME_RES, - .abs_struct = NULL, - .clock_get = do_posix_clock_monotonic_get, - .clock_set = do_posix_clock_nosettime + struct k_clock clock_monotonic = { + .clock_getres = hrtimer_get_res, + .clock_get = posix_ktime_get_ts, + .clock_set = do_posix_clock_nosettime, }; register_posix_clock(CLOCK_REALTIME, &clock_realtime); @@ -260,117 +250,17 @@ static __init int init_posix_timers(void) __initcall(init_posix_timers); -static void tstojiffie(struct timespec *tp, int res, u64 *jiff) -{ - long sec = tp->tv_sec; - long nsec = tp->tv_nsec + res - 1; - - if (nsec >= NSEC_PER_SEC) { - sec++; - nsec -= NSEC_PER_SEC; - } - - /* - * The scaling constants are defined in - * The difference between there and here is that we do the - * res rounding and compute a 64-bit result (well so does that - * but it then throws away the high bits). - */ - *jiff = (mpy_l_X_l_ll(sec, SEC_CONVERSION) + - (mpy_l_X_l_ll(nsec, NSEC_CONVERSION) >> - (NSEC_JIFFIE_SC - SEC_JIFFIE_SC))) >> SEC_JIFFIE_SC; -} - -/* - * This function adjusts the timer as needed as a result of the clock - * being set. It should only be called for absolute timers, and then - * under the abs_list lock. It computes the time difference and sets - * the new jiffies value in the timer. It also updates the timers - * reference wall_to_monotonic value. It is complicated by the fact - * that tstojiffies() only handles positive times and it needs to work - * with both positive and negative times. Also, for negative offsets, - * we need to defeat the res round up. - * - * Return is true if there is a new time, else false. - */ -static long add_clockset_delta(struct k_itimer *timr, - struct timespec *new_wall_to) -{ - struct timespec delta; - int sign = 0; - u64 exp; - - set_normalized_timespec(&delta, - new_wall_to->tv_sec - - timr->it.real.wall_to_prev.tv_sec, - new_wall_to->tv_nsec - - timr->it.real.wall_to_prev.tv_nsec); - if (likely(!(delta.tv_sec | delta.tv_nsec))) - return 0; - if (delta.tv_sec < 0) { - set_normalized_timespec(&delta, - -delta.tv_sec, - 1 - delta.tv_nsec - - posix_clocks[timr->it_clock].res); - sign++; - } - tstojiffie(&delta, posix_clocks[timr->it_clock].res, &exp); - timr->it.real.wall_to_prev = *new_wall_to; - timr->it.real.timer.expires += (sign ? -exp : exp); - return 1; -} - -static void remove_from_abslist(struct k_itimer *timr) -{ - if (!list_empty(&timr->it.real.abs_timer_entry)) { - spin_lock(&abs_list.lock); - list_del_init(&timr->it.real.abs_timer_entry); - spin_unlock(&abs_list.lock); - } -} - static void schedule_next_timer(struct k_itimer *timr) { - struct timespec new_wall_to; - struct now_struct now; - unsigned long seq; - - /* - * Set up the timer for the next interval (if there is one). - * Note: this code uses the abs_timer_lock to protect - * it.real.wall_to_prev and must hold it until exp is set, not exactly - * obvious... - - * This function is used for CLOCK_REALTIME* and - * CLOCK_MONOTONIC* timers. If we ever want to handle other - * CLOCKs, the calling code (do_schedule_next_timer) would need - * to pull the "clock" info from the timer and dispatch the - * "other" CLOCKs "next timer" code (which, I suppose should - * also be added to the k_clock structure). - */ - if (!timr->it.real.incr) + if (timr->it.real.interval.tv64 == 0) return; - do { - seq = read_seqbegin(&xtime_lock); - new_wall_to = wall_to_monotonic; - posix_get_now(&now); - } while (read_seqretry(&xtime_lock, seq)); - - if (!list_empty(&timr->it.real.abs_timer_entry)) { - spin_lock(&abs_list.lock); - add_clockset_delta(timr, &new_wall_to); - - posix_bump_timer(timr, now); - - spin_unlock(&abs_list.lock); - } else { - posix_bump_timer(timr, now); - } + timr->it_overrun += hrtimer_forward(&timr->it.real.timer, + timr->it.real.interval); timr->it_overrun_last = timr->it_overrun; timr->it_overrun = -1; ++timr->it_requeue_pending; - add_timer(&timr->it.real.timer); + hrtimer_restart(&timr->it.real.timer); } /* @@ -391,31 +281,23 @@ void do_schedule_next_timer(struct siginfo *info) timr = lock_timer(info->si_tid, &flags); - if (!timr || timr->it_requeue_pending != info->si_sys_private) - goto exit; + if (timr && timr->it_requeue_pending == info->si_sys_private) { + if (timr->it_clock < 0) + posix_cpu_timer_schedule(timr); + else + schedule_next_timer(timr); - if (timr->it_clock < 0) /* CPU clock */ - posix_cpu_timer_schedule(timr); - else - schedule_next_timer(timr); - info->si_overrun = timr->it_overrun_last; -exit: - if (timr) - unlock_timer(timr, flags); + info->si_overrun = timr->it_overrun_last; + } + + unlock_timer(timr, flags); } int posix_timer_event(struct k_itimer *timr,int si_private) { memset(&timr->sigq->info, 0, sizeof(siginfo_t)); timr->sigq->info.si_sys_private = si_private; - /* - * Send signal to the process that owns this timer. - - * This code assumes that all the possible abs_lists share the - * same lock (there is only one list at this time). If this is - * not the case, the CLOCK info would need to be used to find - * the proper abs list lock. - */ + /* Send signal to the process that owns this timer.*/ timr->sigq->info.si_signo = timr->it_sigev_signo; timr->sigq->info.si_errno = 0; @@ -449,64 +331,35 @@ EXPORT_SYMBOL_GPL(posix_timer_event); * This code is for CLOCK_REALTIME* and CLOCK_MONOTONIC* timers. */ -static void posix_timer_fn(unsigned long __data) +static int posix_timer_fn(void *data) { - struct k_itimer *timr = (struct k_itimer *) __data; + struct k_itimer *timr = data; unsigned long flags; - unsigned long seq; - struct timespec delta, new_wall_to; - u64 exp = 0; - int do_notify = 1; + int si_private = 0; + int ret = HRTIMER_NORESTART; spin_lock_irqsave(&timr->it_lock, flags); - if (!list_empty(&timr->it.real.abs_timer_entry)) { - spin_lock(&abs_list.lock); - do { - seq = read_seqbegin(&xtime_lock); - new_wall_to = wall_to_monotonic; - } while (read_seqretry(&xtime_lock, seq)); - set_normalized_timespec(&delta, - new_wall_to.tv_sec - - timr->it.real.wall_to_prev.tv_sec, - new_wall_to.tv_nsec - - timr->it.real.wall_to_prev.tv_nsec); - if (likely((delta.tv_sec | delta.tv_nsec ) == 0)) { - /* do nothing, timer is on time */ - } else if (delta.tv_sec < 0) { - /* do nothing, timer is already late */ - } else { - /* timer is early due to a clock set */ - tstojiffie(&delta, - posix_clocks[timr->it_clock].res, - &exp); - timr->it.real.wall_to_prev = new_wall_to; - timr->it.real.timer.expires += exp; - add_timer(&timr->it.real.timer); - do_notify = 0; - } - spin_unlock(&abs_list.lock); - } - if (do_notify) { - int si_private=0; + if (timr->it.real.interval.tv64 != 0) + si_private = ++timr->it_requeue_pending; - if (timr->it.real.incr) - si_private = ++timr->it_requeue_pending; - else { - remove_from_abslist(timr); + if (posix_timer_event(timr, si_private)) { + /* + * signal was not sent because of sig_ignor + * we will not get a call back to restart it AND + * it should be restarted. + */ + if (timr->it.real.interval.tv64 != 0) { + timr->it_overrun += + hrtimer_forward(&timr->it.real.timer, + timr->it.real.interval); + ret = HRTIMER_RESTART; } - - if (posix_timer_event(timr, si_private)) - /* - * signal was not sent because of sig_ignor - * we will not get a call back to restart it AND - * it should be restarted. - */ - schedule_next_timer(timr); } - unlock_timer(timr, flags); /* hold thru abs lock to keep irq off */ -} + unlock_timer(timr, flags); + return ret; +} static inline struct task_struct * good_sigevent(sigevent_t * event) { @@ -597,8 +450,7 @@ sys_timer_create(const clockid_t which_clock, goto out; } spin_lock_irq(&idr_lock); - error = idr_get_new(&posix_timers_id, - (void *) new_timer, + error = idr_get_new(&posix_timers_id, (void *) new_timer, &new_timer_id); spin_unlock_irq(&idr_lock); if (error == -EAGAIN) @@ -698,26 +550,6 @@ out: return error; } -/* - * good_timespec - * - * This function checks the elements of a timespec structure. - * - * Arguments: - * ts : Pointer to the timespec structure to check - * - * Return value: - * If a NULL pointer was passed in, or the tv_nsec field was less than 0 - * or greater than NSEC_PER_SEC, or the tv_sec field was less than 0, - * this function returns 0. Otherwise it returns 1. - */ -static int good_timespec(const struct timespec *ts) -{ - if ((!ts) || !timespec_valid(ts)) - return 0; - return 1; -} - /* * Locking issues: We need to protect the result of the id look up until * we get the timer locked down so it is not deleted under us. The @@ -770,39 +602,39 @@ static struct k_itimer * lock_timer(timer_t timer_id, unsigned long *flags) static void common_timer_get(struct k_itimer *timr, struct itimerspec *cur_setting) { - unsigned long expires; - struct now_struct now; - - do - expires = timr->it.real.timer.expires; - while ((volatile long) (timr->it.real.timer.expires) != expires); - - posix_get_now(&now); - - if (expires && - ((timr->it_sigev_notify & ~SIGEV_THREAD_ID) == SIGEV_NONE) && - !timr->it.real.incr && - posix_time_before(&timr->it.real.timer, &now)) - timr->it.real.timer.expires = expires = 0; - if (expires) { - if (timr->it_requeue_pending & REQUEUE_PENDING || - (timr->it_sigev_notify & ~SIGEV_THREAD_ID) == SIGEV_NONE) { - posix_bump_timer(timr, now); - expires = timr->it.real.timer.expires; - } - else - if (!timer_pending(&timr->it.real.timer)) - expires = 0; - if (expires) - expires -= now.jiffies; - } - jiffies_to_timespec(expires, &cur_setting->it_value); - jiffies_to_timespec(timr->it.real.incr, &cur_setting->it_interval); + ktime_t remaining; + struct hrtimer *timer = &timr->it.real.timer; - if (cur_setting->it_value.tv_sec < 0) { - cur_setting->it_value.tv_nsec = 1; - cur_setting->it_value.tv_sec = 0; + memset(cur_setting, 0, sizeof(struct itimerspec)); + remaining = hrtimer_get_remaining(timer); + + /* Time left ? or timer pending */ + if (remaining.tv64 > 0 || hrtimer_active(timer)) + goto calci; + /* interval timer ? */ + if (timr->it.real.interval.tv64 == 0) + return; + /* + * When a requeue is pending or this is a SIGEV_NONE timer + * move the expiry time forward by intervals, so expiry is > + * now. + */ + if (timr->it_requeue_pending & REQUEUE_PENDING || + (timr->it_sigev_notify & ~SIGEV_THREAD_ID) == SIGEV_NONE) { + timr->it_overrun += + hrtimer_forward(timer, timr->it.real.interval); + remaining = hrtimer_get_remaining(timer); } + calci: + /* interval timer ? */ + if (timr->it.real.interval.tv64 != 0) + cur_setting->it_interval = + ktime_to_timespec(timr->it.real.interval); + /* Return 0 only, when the timer is expired and not pending */ + if (remaining.tv64 <= 0) + cur_setting->it_value.tv_nsec = 1; + else + cur_setting->it_value = ktime_to_timespec(remaining); } /* Get the time remaining on a POSIX.1b interval timer. */ @@ -826,6 +658,7 @@ sys_timer_gettime(timer_t timer_id, struct itimerspec __user *setting) return 0; } + /* * Get the number of overruns of a POSIX.1b interval timer. This is to * be the overrun of the timer last delivered. At the same time we are @@ -835,7 +668,6 @@ sys_timer_gettime(timer_t timer_id, struct itimerspec __user *setting) * the call back to do_schedule_next_timer(). So all we need to do is * to pick up the frozen overrun. */ - asmlinkage long sys_timer_getoverrun(timer_t timer_id) { @@ -852,84 +684,6 @@ sys_timer_getoverrun(timer_t timer_id) return overrun; } -/* - * Adjust for absolute time - * - * If absolute time is given and it is not CLOCK_MONOTONIC, we need to - * adjust for the offset between the timer clock (CLOCK_MONOTONIC) and - * what ever clock he is using. - * - * If it is relative time, we need to add the current (CLOCK_MONOTONIC) - * time to it to get the proper time for the timer. - */ -static int adjust_abs_time(struct k_clock *clock, struct timespec *tp, - int abs, u64 *exp, struct timespec *wall_to) -{ - struct timespec now; - struct timespec oc = *tp; - u64 jiffies_64_f; - int rtn =0; - - if (abs) { - /* - * The mask pick up the 4 basic clocks - */ - if (!((clock - &posix_clocks[0]) & ~CLOCKS_MASK)) { - jiffies_64_f = do_posix_clock_monotonic_gettime_parts( - &now, wall_to); - /* - * If we are doing a MONOTONIC clock - */ - if((clock - &posix_clocks[0]) & CLOCKS_MONO){ - now.tv_sec += wall_to->tv_sec; - now.tv_nsec += wall_to->tv_nsec; - } - } else { - /* - * Not one of the basic clocks - */ - clock->clock_get(clock - posix_clocks, &now); - jiffies_64_f = get_jiffies_64(); - } - /* - * Take away now to get delta and normalize - */ - set_normalized_timespec(&oc, oc.tv_sec - now.tv_sec, - oc.tv_nsec - now.tv_nsec); - }else{ - jiffies_64_f = get_jiffies_64(); - } - /* - * Check if the requested time is prior to now (if so set now) - */ - if (oc.tv_sec < 0) - oc.tv_sec = oc.tv_nsec = 0; - - if (oc.tv_sec | oc.tv_nsec) - set_normalized_timespec(&oc, oc.tv_sec, - oc.tv_nsec + clock->res); - tstojiffie(&oc, clock->res, exp); - - /* - * Check if the requested time is more than the timer code - * can handle (if so we error out but return the value too). - */ - if (*exp > ((u64)MAX_JIFFY_OFFSET)) - /* - * This is a considered response, not exactly in - * line with the standard (in fact it is silent on - * possible overflows). We assume such a large - * value is ALMOST always a programming error and - * try not to compound it by setting a really dumb - * value. - */ - rtn = -EINVAL; - /* - * return the actual jiffies expire time, full 64 bits - */ - *exp += jiffies_64_f; - return rtn; -} /* Set a POSIX.1b interval timer. */ /* timr->it_lock is taken. */ @@ -937,68 +691,48 @@ static inline int common_timer_set(struct k_itimer *timr, int flags, struct itimerspec *new_setting, struct itimerspec *old_setting) { - struct k_clock *clock = &posix_clocks[timr->it_clock]; - u64 expire_64; + struct hrtimer *timer = &timr->it.real.timer; if (old_setting) common_timer_get(timr, old_setting); /* disable the timer */ - timr->it.real.incr = 0; + timr->it.real.interval.tv64 = 0; /* * careful here. If smp we could be in the "fire" routine which will * be spinning as we hold the lock. But this is ONLY an SMP issue. */ - if (try_to_del_timer_sync(&timr->it.real.timer) < 0) { -#ifdef CONFIG_SMP - /* - * It can only be active if on an other cpu. Since - * we have cleared the interval stuff above, it should - * clear once we release the spin lock. Of course once - * we do that anything could happen, including the - * complete melt down of the timer. So return with - * a "retry" exit status. - */ + if (hrtimer_try_to_cancel(timer) < 0) return TIMER_RETRY; -#endif - } - - remove_from_abslist(timr); timr->it_requeue_pending = (timr->it_requeue_pending + 2) & ~REQUEUE_PENDING; timr->it_overrun_last = 0; - timr->it_overrun = -1; - /* - *switch off the timer when it_value is zero - */ - if (!new_setting->it_value.tv_sec && !new_setting->it_value.tv_nsec) { - timr->it.real.timer.expires = 0; - return 0; - } - if (adjust_abs_time(clock, - &new_setting->it_value, flags & TIMER_ABSTIME, - &expire_64, &(timr->it.real.wall_to_prev))) { - return -EINVAL; - } - timr->it.real.timer.expires = (unsigned long)expire_64; - tstojiffie(&new_setting->it_interval, clock->res, &expire_64); - timr->it.real.incr = (unsigned long)expire_64; + /* switch off the timer when it_value is zero */ + if (!new_setting->it_value.tv_sec && !new_setting->it_value.tv_nsec) + return 0; - /* - * We do not even queue SIGEV_NONE timers! But we do put them - * in the abs list so we can do that right. + /* Posix madness. Only absolute CLOCK_REALTIME timers + * are affected by clock sets. So we must reiniatilize + * the timer. */ - if (((timr->it_sigev_notify & ~SIGEV_THREAD_ID) != SIGEV_NONE)) - add_timer(&timr->it.real.timer); - - if (flags & TIMER_ABSTIME && clock->abs_struct) { - spin_lock(&clock->abs_struct->lock); - list_add_tail(&(timr->it.real.abs_timer_entry), - &(clock->abs_struct->list)); - spin_unlock(&clock->abs_struct->lock); - } + if (timr->it_clock == CLOCK_REALTIME && (flags & TIMER_ABSTIME)) + hrtimer_rebase(timer, CLOCK_REALTIME); + else + hrtimer_rebase(timer, CLOCK_MONOTONIC); + + timer->expires = timespec_to_ktime(new_setting->it_value); + + /* Convert interval */ + timr->it.real.interval = timespec_to_ktime(new_setting->it_interval); + + /* SIGEV_NONE timers are not queued ! See common_timer_get */ + if (((timr->it_sigev_notify & ~SIGEV_THREAD_ID) == SIGEV_NONE)) + return 0; + + hrtimer_start(timer, timer->expires, (flags & TIMER_ABSTIME) ? + HRTIMER_ABS : HRTIMER_REL); return 0; } @@ -1020,8 +754,8 @@ sys_timer_settime(timer_t timer_id, int flags, if (copy_from_user(&new_spec, new_setting, sizeof (new_spec))) return -EFAULT; - if ((!good_timespec(&new_spec.it_interval)) || - (!good_timespec(&new_spec.it_value))) + if (!timespec_valid(&new_spec.it_interval) || + !timespec_valid(&new_spec.it_value)) return -EINVAL; retry: timr = lock_timer(timer_id, &flag); @@ -1037,8 +771,8 @@ retry: goto retry; } - if (old_setting && !error && copy_to_user(old_setting, - &old_spec, sizeof (old_spec))) + if (old_setting && !error && + copy_to_user(old_setting, &old_spec, sizeof (old_spec))) error = -EFAULT; return error; @@ -1046,24 +780,10 @@ retry: static inline int common_timer_del(struct k_itimer *timer) { - timer->it.real.incr = 0; + timer->it.real.interval.tv64 = 0; - if (try_to_del_timer_sync(&timer->it.real.timer) < 0) { -#ifdef CONFIG_SMP - /* - * It can only be active if on an other cpu. Since - * we have cleared the interval stuff above, it should - * clear once we release the spin lock. Of course once - * we do that anything could happen, including the - * complete melt down of the timer. So return with - * a "retry" exit status. - */ + if (hrtimer_try_to_cancel(&timer->it.real.timer) < 0) return TIMER_RETRY; -#endif - } - - remove_from_abslist(timer); - return 0; } @@ -1079,24 +799,16 @@ sys_timer_delete(timer_t timer_id) struct k_itimer *timer; long flags; -#ifdef CONFIG_SMP - int error; retry_delete: -#endif timer = lock_timer(timer_id, &flags); if (!timer) return -EINVAL; -#ifdef CONFIG_SMP - error = timer_delete_hook(timer); - - if (error == TIMER_RETRY) { + if (timer_delete_hook(timer) == TIMER_RETRY) { unlock_timer(timer, flags); goto retry_delete; } -#else - timer_delete_hook(timer); -#endif + spin_lock(¤t->sighand->siglock); list_del(&timer->list); spin_unlock(¤t->sighand->siglock); @@ -1113,6 +825,7 @@ retry_delete: release_posix_timer(timer, IT_ID_SET); return 0; } + /* * return timer owned by the process, used by exit_itimers */ @@ -1120,22 +833,13 @@ static inline void itimer_delete(struct k_itimer *timer) { unsigned long flags; -#ifdef CONFIG_SMP - int error; retry_delete: -#endif spin_lock_irqsave(&timer->it_lock, flags); -#ifdef CONFIG_SMP - error = timer_delete_hook(timer); - - if (error == TIMER_RETRY) { + if (timer_delete_hook(timer) == TIMER_RETRY) { unlock_timer(timer, flags); goto retry_delete; } -#else - timer_delete_hook(timer); -#endif list_del(&timer->list); /* * This keeps any tasks waiting on the spin lock from thinking @@ -1164,57 +868,7 @@ void exit_itimers(struct signal_struct *sig) } } -/* - * And now for the "clock" calls - * - * These functions are called both from timer functions (with the timer - * spin_lock_irq() held and from clock calls with no locking. They must - * use the save flags versions of locks. - */ - -/* - * We do ticks here to avoid the irq lock ( they take sooo long). - * The seqlock is great here. Since we a reader, we don't really care - * if we are interrupted since we don't take lock that will stall us or - * any other cpu. Voila, no irq lock is needed. - * - */ - -static u64 do_posix_clock_monotonic_gettime_parts( - struct timespec *tp, struct timespec *mo) -{ - u64 jiff; - unsigned int seq; - - do { - seq = read_seqbegin(&xtime_lock); - getnstimeofday(tp); - *mo = wall_to_monotonic; - jiff = jiffies_64; - - } while(read_seqretry(&xtime_lock, seq)); - - return jiff; -} - -static int do_posix_clock_monotonic_get(const clockid_t clock, - struct timespec *tp) -{ - struct timespec wall_to_mono; - - do_posix_clock_monotonic_gettime_parts(tp, &wall_to_mono); - - set_normalized_timespec(tp, tp->tv_sec + wall_to_mono.tv_sec, - tp->tv_nsec + wall_to_mono.tv_nsec); - - return 0; -} - -int do_posix_clock_monotonic_gettime(struct timespec *tp) -{ - return do_posix_clock_monotonic_get(CLOCK_MONOTONIC, tp); -} - +/* Not available / possible... functions */ int do_posix_clock_nosettime(const clockid_t clockid, struct timespec *tp) { return -EINVAL; @@ -1287,107 +941,6 @@ sys_clock_getres(const clockid_t which_clock, struct timespec __user *tp) return error; } -/* - * The standard says that an absolute nanosleep call MUST wake up at - * the requested time in spite of clock settings. Here is what we do: - * For each nanosleep call that needs it (only absolute and not on - * CLOCK_MONOTONIC* (as it can not be set)) we thread a little structure - * into the "nanosleep_abs_list". All we need is the task_struct pointer. - * When ever the clock is set we just wake up all those tasks. The rest - * is done by the while loop in clock_nanosleep(). - * - * On locking, clock_was_set() is called from update_wall_clock which - * holds (or has held for it) a write_lock_irq( xtime_lock) and is - * called from the timer bh code. Thus we need the irq save locks. - * - * Also, on the call from update_wall_clock, that is done as part of a - * softirq thing. We don't want to delay the system that much (possibly - * long list of timers to fix), so we defer that work to keventd. - */ - -static DECLARE_WAIT_QUEUE_HEAD(nanosleep_abs_wqueue); -static DECLARE_WORK(clock_was_set_work, (void(*)(void*))clock_was_set, NULL); - -static DECLARE_MUTEX(clock_was_set_lock); - -void clock_was_set(void) -{ - struct k_itimer *timr; - struct timespec new_wall_to; - LIST_HEAD(cws_list); - unsigned long seq; - - - if (unlikely(in_interrupt())) { - schedule_work(&clock_was_set_work); - return; - } - wake_up_all(&nanosleep_abs_wqueue); - - /* - * Check if there exist TIMER_ABSTIME timers to correct. - * - * Notes on locking: This code is run in task context with irq - * on. We CAN be interrupted! All other usage of the abs list - * lock is under the timer lock which holds the irq lock as - * well. We REALLY don't want to scan the whole list with the - * interrupt system off, AND we would like a sequence lock on - * this code as well. Since we assume that the clock will not - * be set often, it seems ok to take and release the irq lock - * for each timer. In fact add_timer will do this, so this is - * not an issue. So we know when we are done, we will move the - * whole list to a new location. Then as we process each entry, - * we will move it to the actual list again. This way, when our - * copy is empty, we are done. We are not all that concerned - * about preemption so we will use a semaphore lock to protect - * aginst reentry. This way we will not stall another - * processor. It is possible that this may delay some timers - * that should have expired, given the new clock, but even this - * will be minimal as we will always update to the current time, - * even if it was set by a task that is waiting for entry to - * this code. Timers that expire too early will be caught by - * the expire code and restarted. - - * Absolute timers that repeat are left in the abs list while - * waiting for the task to pick up the signal. This means we - * may find timers that are not in the "add_timer" list, but are - * in the abs list. We do the same thing for these, save - * putting them back in the "add_timer" list. (Note, these are - * left in the abs list mainly to indicate that they are - * ABSOLUTE timers, a fact that is used by the re-arm code, and - * for which we have no other flag.) - - */ - - down(&clock_was_set_lock); - spin_lock_irq(&abs_list.lock); - list_splice_init(&abs_list.list, &cws_list); - spin_unlock_irq(&abs_list.lock); - do { - do { - seq = read_seqbegin(&xtime_lock); - new_wall_to = wall_to_monotonic; - } while (read_seqretry(&xtime_lock, seq)); - - spin_lock_irq(&abs_list.lock); - if (list_empty(&cws_list)) { - spin_unlock_irq(&abs_list.lock); - break; - } - timr = list_entry(cws_list.next, struct k_itimer, - it.real.abs_timer_entry); - - list_del_init(&timr->it.real.abs_timer_entry); - if (add_clockset_delta(timr, &new_wall_to) && - del_timer(&timr->it.real.timer)) /* timer run yet? */ - add_timer(&timr->it.real.timer); - list_add(&timr->it.real.abs_timer_entry, &abs_list.list); - spin_unlock_irq(&abs_list.lock); - } while (1); - - up(&clock_was_set_lock); -} - /* * nanosleep for monotonic and realtime clocks */ @@ -1401,7 +954,7 @@ static int common_nsleep(const clockid_t which_clock, int flags, case CLOCK_REALTIME: /* Posix madness. Only absolute timers on clock realtime are affected by clock set. */ - if (mode == HRTIMER_ABS) + if (mode != HRTIMER_ABS) clockid = CLOCK_MONOTONIC; case CLOCK_MONOTONIC: break; -- cgit v1.2.3-71-gd317 From d1c0b8f835aeba85aa428aaec6d521ef4639c7fa Mon Sep 17 00:00:00 2001 From: Matt Helsley Date: Mon, 9 Jan 2006 20:52:40 -0800 Subject: [PATCH] Remove getnstimestamp() Remove getnstimestamp() in favor of ktime.h's ktime_get_ts() Signed-off-by: Matt Helsley Cc: john stultz Cc: Thomas Gleixner Cc: Ingo Molnar Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/time.h | 1 - kernel/time.c | 22 ---------------------- 2 files changed, 23 deletions(-) (limited to 'include/linux') diff --git a/include/linux/time.h b/include/linux/time.h index 1201155b2202..f2aca7ec6325 100644 --- a/include/linux/time.h +++ b/include/linux/time.h @@ -80,7 +80,6 @@ extern int do_setitimer(int which, struct itimerval *value, struct itimerval *ovalue); extern int do_getitimer(int which, struct itimerval *value); extern void getnstimeofday(struct timespec *tv); -extern void getnstimestamp(struct timespec *ts); extern struct timespec timespec_trunc(struct timespec t, unsigned gran); diff --git a/kernel/time.c b/kernel/time.c index cf5a4582a672..169e8329e0b6 100644 --- a/kernel/time.c +++ b/kernel/time.c @@ -564,28 +564,6 @@ void getnstimeofday(struct timespec *tv) EXPORT_SYMBOL_GPL(getnstimeofday); #endif -void getnstimestamp(struct timespec *ts) -{ - unsigned int seq; - struct timespec wall2mono; - - /* synchronize with settimeofday() changes */ - do { - seq = read_seqbegin(&xtime_lock); - getnstimeofday(ts); - wall2mono = wall_to_monotonic; - } while(unlikely(read_seqretry(&xtime_lock, seq))); - - /* adjust to monotonicaly-increasing values */ - ts->tv_sec += wall2mono.tv_sec; - ts->tv_nsec += wall2mono.tv_nsec; - while (unlikely(ts->tv_nsec >= NSEC_PER_SEC)) { - ts->tv_nsec -= NSEC_PER_SEC; - ts->tv_sec++; - } -} -EXPORT_SYMBOL_GPL(getnstimestamp); - /* Converts Gregorian date to seconds since 1970-01-01 00:00:00. * Assumes input in normal date format, i.e. 1980-12-31 23:59:59 * => year=1980, mon=12, day=31, hour=23, min=59, sec=59. -- cgit v1.2.3-71-gd317 From 49a2a1b83ba6fa40c41968d6a28ba16e7ed0c3f7 Mon Sep 17 00:00:00 2001 From: Anil S Keshavamurthy Date: Mon, 9 Jan 2006 20:52:43 -0800 Subject: [PATCH] kprobes: changed from using spinlock to mutex Since Kprobes runtime exception handlers is now lock free as this code path is now using RCU to walk through the list, there is no need for the register/unregister{_kprobe} to use spin_{lock/unlock}_isr{save/restore}. The serialization during registration/unregistration is now possible using just a mutex. In the above process, this patch also fixes a minor memory leak for x86_64 and powerpc. Signed-off-by: Anil S Keshavamurthy Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- arch/i386/kernel/kprobes.c | 6 +-- arch/powerpc/kernel/kprobes.c | 14 +++---- arch/sparc64/kernel/kprobes.c | 6 +-- arch/x86_64/kernel/kprobes.c | 7 +--- include/asm-ia64/kprobes.h | 5 --- include/linux/kprobes.h | 1 - kernel/kprobes.c | 91 ++++++++++++++++++++----------------------- 7 files changed, 53 insertions(+), 77 deletions(-) (limited to 'include/linux') diff --git a/arch/i386/kernel/kprobes.c b/arch/i386/kernel/kprobes.c index 19edcd526ba4..68fe10250486 100644 --- a/arch/i386/kernel/kprobes.c +++ b/arch/i386/kernel/kprobes.c @@ -57,14 +57,10 @@ static inline int is_IF_modifier(kprobe_opcode_t opcode) } int __kprobes arch_prepare_kprobe(struct kprobe *p) -{ - return 0; -} - -void __kprobes arch_copy_kprobe(struct kprobe *p) { memcpy(p->ainsn.insn, p->addr, MAX_INSN_SIZE * sizeof(kprobe_opcode_t)); p->opcode = *p->addr; + return 0; } void __kprobes arch_arm_kprobe(struct kprobe *p) diff --git a/arch/powerpc/kernel/kprobes.c b/arch/powerpc/kernel/kprobes.c index 5368f9c2e6bf..331e169e8629 100644 --- a/arch/powerpc/kernel/kprobes.c +++ b/arch/powerpc/kernel/kprobes.c @@ -60,13 +60,13 @@ int __kprobes arch_prepare_kprobe(struct kprobe *p) if (!p->ainsn.insn) ret = -ENOMEM; } - return ret; -} -void __kprobes arch_copy_kprobe(struct kprobe *p) -{ - memcpy(p->ainsn.insn, p->addr, MAX_INSN_SIZE * sizeof(kprobe_opcode_t)); - p->opcode = *p->addr; + if (!ret) { + memcpy(p->ainsn.insn, p->addr, MAX_INSN_SIZE * sizeof(kprobe_opcode_t)); + p->opcode = *p->addr; + } + + return ret; } void __kprobes arch_arm_kprobe(struct kprobe *p) @@ -85,9 +85,7 @@ void __kprobes arch_disarm_kprobe(struct kprobe *p) void __kprobes arch_remove_kprobe(struct kprobe *p) { - down(&kprobe_mutex); free_insn_slot(p->ainsn.insn); - up(&kprobe_mutex); } static inline void prepare_singlestep(struct kprobe *p, struct pt_regs *regs) diff --git a/arch/sparc64/kernel/kprobes.c b/arch/sparc64/kernel/kprobes.c index a97b0f0727ab..bbd5aa6818ea 100644 --- a/arch/sparc64/kernel/kprobes.c +++ b/arch/sparc64/kernel/kprobes.c @@ -42,15 +42,11 @@ DEFINE_PER_CPU(struct kprobe *, current_kprobe) = NULL; DEFINE_PER_CPU(struct kprobe_ctlblk, kprobe_ctlblk); int __kprobes arch_prepare_kprobe(struct kprobe *p) -{ - return 0; -} - -void __kprobes arch_copy_kprobe(struct kprobe *p) { p->ainsn.insn[0] = *p->addr; p->ainsn.insn[1] = BREAKPOINT_INSTRUCTION_2; p->opcode = *p->addr; + return 0; } void __kprobes arch_arm_kprobe(struct kprobe *p) diff --git a/arch/x86_64/kernel/kprobes.c b/arch/x86_64/kernel/kprobes.c index afe11f4fbd1d..8b8943bfb89e 100644 --- a/arch/x86_64/kernel/kprobes.c +++ b/arch/x86_64/kernel/kprobes.c @@ -42,8 +42,8 @@ #include #include -static DECLARE_MUTEX(kprobe_mutex); void jprobe_return_end(void); +void __kprobes arch_copy_kprobe(struct kprobe *p); DEFINE_PER_CPU(struct kprobe *, current_kprobe) = NULL; DEFINE_PER_CPU(struct kprobe_ctlblk, kprobe_ctlblk); @@ -69,12 +69,11 @@ static inline int is_IF_modifier(kprobe_opcode_t *insn) int __kprobes arch_prepare_kprobe(struct kprobe *p) { /* insn: must be on special executable page on x86_64. */ - down(&kprobe_mutex); p->ainsn.insn = get_insn_slot(); - up(&kprobe_mutex); if (!p->ainsn.insn) { return -ENOMEM; } + arch_copy_kprobe(p); return 0; } @@ -223,9 +222,7 @@ void __kprobes arch_disarm_kprobe(struct kprobe *p) void __kprobes arch_remove_kprobe(struct kprobe *p) { - down(&kprobe_mutex); free_insn_slot(p->ainsn.insn); - up(&kprobe_mutex); } static inline void save_previous_kprobe(struct kprobe_ctlblk *kcb) diff --git a/include/asm-ia64/kprobes.h b/include/asm-ia64/kprobes.h index 5b26462674a7..12a0b93020da 100644 --- a/include/asm-ia64/kprobes.h +++ b/include/asm-ia64/kprobes.h @@ -110,11 +110,6 @@ struct arch_specific_insn { unsigned short target_br_reg; }; -/* ia64 does not need this */ -static inline void arch_copy_kprobe(struct kprobe *p) -{ -} - extern int kprobe_exceptions_notify(struct notifier_block *self, unsigned long val, void *data); diff --git a/include/linux/kprobes.h b/include/linux/kprobes.h index c03f2dc933de..ad6e4fe970fd 100644 --- a/include/linux/kprobes.h +++ b/include/linux/kprobes.h @@ -150,7 +150,6 @@ struct kretprobe_instance { extern spinlock_t kretprobe_lock; extern int arch_prepare_kprobe(struct kprobe *p); -extern void arch_copy_kprobe(struct kprobe *p); extern void arch_arm_kprobe(struct kprobe *p); extern void arch_disarm_kprobe(struct kprobe *p); extern void arch_remove_kprobe(struct kprobe *p); diff --git a/kernel/kprobes.c b/kernel/kprobes.c index 3897630d2335..f14ccd35e9b6 100644 --- a/kernel/kprobes.c +++ b/kernel/kprobes.c @@ -48,7 +48,7 @@ static struct hlist_head kprobe_table[KPROBE_TABLE_SIZE]; static struct hlist_head kretprobe_inst_table[KPROBE_TABLE_SIZE]; -static DEFINE_SPINLOCK(kprobe_lock); /* Protects kprobe_table */ +static DECLARE_MUTEX(kprobe_mutex); /* Protects kprobe_table */ DEFINE_SPINLOCK(kretprobe_lock); /* Protects kretprobe_inst_table */ static DEFINE_PER_CPU(struct kprobe *, kprobe_instance) = NULL; @@ -167,7 +167,7 @@ static inline void reset_kprobe_instance(void) /* * This routine is called either: - * - under the kprobe_lock spinlock - during kprobe_[un]register() + * - under the kprobe_mutex - during kprobe_[un]register() * OR * - with preemption disabled - from arch/xxx/kernel/kprobes.c */ @@ -420,7 +420,6 @@ static inline void add_aggr_kprobe(struct kprobe *ap, struct kprobe *p) /* * This is the second or subsequent kprobe at the address - handle * the intricacies - * TODO: Move kcalloc outside the spin_lock */ static int __kprobes register_aggr_kprobe(struct kprobe *old_p, struct kprobe *p) @@ -442,25 +441,6 @@ static int __kprobes register_aggr_kprobe(struct kprobe *old_p, return ret; } -/* kprobe removal house-keeping routines */ -static inline void cleanup_kprobe(struct kprobe *p, unsigned long flags) -{ - arch_disarm_kprobe(p); - hlist_del_rcu(&p->hlist); - spin_unlock_irqrestore(&kprobe_lock, flags); - arch_remove_kprobe(p); -} - -static inline void cleanup_aggr_kprobe(struct kprobe *old_p, - struct kprobe *p, unsigned long flags) -{ - list_del_rcu(&p->list); - if (list_empty(&old_p->list)) - cleanup_kprobe(old_p, flags); - else - spin_unlock_irqrestore(&kprobe_lock, flags); -} - static int __kprobes in_kprobes_functions(unsigned long addr) { if (addr >= (unsigned long)__kprobes_text_start @@ -472,7 +452,6 @@ static int __kprobes in_kprobes_functions(unsigned long addr) int __kprobes register_kprobe(struct kprobe *p) { int ret = 0; - unsigned long flags = 0; struct kprobe *old_p; struct module *mod; @@ -484,18 +463,17 @@ int __kprobes register_kprobe(struct kprobe *p) (unlikely(!try_module_get(mod)))) return -EINVAL; - if ((ret = arch_prepare_kprobe(p)) != 0) - goto rm_kprobe; - p->nmissed = 0; - spin_lock_irqsave(&kprobe_lock, flags); + down(&kprobe_mutex); old_p = get_kprobe(p->addr); if (old_p) { ret = register_aggr_kprobe(old_p, p); goto out; } - arch_copy_kprobe(p); + if ((ret = arch_prepare_kprobe(p)) != 0) + goto out; + INIT_HLIST_NODE(&p->hlist); hlist_add_head_rcu(&p->hlist, &kprobe_table[hash_ptr(p->addr, KPROBE_HASH_BITS)]); @@ -503,10 +481,8 @@ int __kprobes register_kprobe(struct kprobe *p) arch_arm_kprobe(p); out: - spin_unlock_irqrestore(&kprobe_lock, flags); -rm_kprobe: - if (ret == -EEXIST) - arch_remove_kprobe(p); + up(&kprobe_mutex); + if (ret && mod) module_put(mod); return ret; @@ -514,29 +490,48 @@ rm_kprobe: void __kprobes unregister_kprobe(struct kprobe *p) { - unsigned long flags; - struct kprobe *old_p; struct module *mod; + struct kprobe *old_p, *cleanup_p; - spin_lock_irqsave(&kprobe_lock, flags); + down(&kprobe_mutex); old_p = get_kprobe(p->addr); - if (old_p) { - /* cleanup_*_kprobe() does the spin_unlock_irqrestore */ - if (old_p->pre_handler == aggr_pre_handler) - cleanup_aggr_kprobe(old_p, p, flags); - else - cleanup_kprobe(p, flags); + if (unlikely(!old_p)) { + up(&kprobe_mutex); + return; + } - synchronize_sched(); + if ((old_p->pre_handler == aggr_pre_handler) && + (p->list.next == &old_p->list) && + (p->list.prev == &old_p->list)) { + /* Only one element in the aggregate list */ + arch_disarm_kprobe(p); + hlist_del_rcu(&old_p->hlist); + cleanup_p = old_p; + } else if (old_p == p) { + /* Only one kprobe element in the hash list */ + arch_disarm_kprobe(p); + hlist_del_rcu(&p->hlist); + cleanup_p = p; + } else { + list_del_rcu(&p->list); + cleanup_p = NULL; + } - if ((mod = module_text_address((unsigned long)p->addr))) - module_put(mod); + up(&kprobe_mutex); - if (old_p->pre_handler == aggr_pre_handler && - list_empty(&old_p->list)) + synchronize_sched(); + if ((mod = module_text_address((unsigned long)p->addr))) + module_put(mod); + + if (cleanup_p) { + if (cleanup_p->pre_handler == aggr_pre_handler) { + list_del_rcu(&p->list); kfree(old_p); - } else - spin_unlock_irqrestore(&kprobe_lock, flags); + } + down(&kprobe_mutex); + arch_remove_kprobe(p); + up(&kprobe_mutex); + } } static struct notifier_block kprobe_exceptions_nb = { -- cgit v1.2.3-71-gd317 From e597c2984c64609c6e1e1ac803f00f7550705860 Mon Sep 17 00:00:00 2001 From: Anil S Keshavamurthy Date: Mon, 9 Jan 2006 20:52:45 -0800 Subject: [PATCH] kprobes: arch_remove_kprobe Currently arch_remove_kprobes() is only implemented/required for x86_64 and powerpc. All other architecture like IA64, i386 and sparc64 implementes a dummy function which is being called from arch independent kprobes.c file. This patch removes the dummy functions and replaces it with #define arch_remove_kprobe(p, s) do { } while(0) Signed-off-by: Anil S Keshavamurthy Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- arch/i386/kernel/kprobes.c | 4 ---- arch/ia64/kernel/kprobes.c | 4 ---- arch/powerpc/kernel/kprobes.c | 4 +++- arch/sparc64/kernel/kprobes.c | 4 ---- arch/x86_64/kernel/kprobes.c | 4 +++- include/asm-i386/kprobes.h | 1 + include/asm-ia64/kprobes.h | 1 + include/asm-powerpc/kprobes.h | 1 + include/asm-sparc64/kprobes.h | 1 + include/asm-x86_64/kprobes.h | 1 + include/linux/kprobes.h | 1 - kernel/kprobes.c | 4 +--- 12 files changed, 12 insertions(+), 18 deletions(-) (limited to 'include/linux') diff --git a/arch/i386/kernel/kprobes.c b/arch/i386/kernel/kprobes.c index 68fe10250486..2f372dbd34fd 100644 --- a/arch/i386/kernel/kprobes.c +++ b/arch/i386/kernel/kprobes.c @@ -77,10 +77,6 @@ void __kprobes arch_disarm_kprobe(struct kprobe *p) (unsigned long) p->addr + sizeof(kprobe_opcode_t)); } -void __kprobes arch_remove_kprobe(struct kprobe *p) -{ -} - static inline void save_previous_kprobe(struct kprobe_ctlblk *kcb) { kcb->prev_kprobe.kp = kprobe_running(); diff --git a/arch/ia64/kernel/kprobes.c b/arch/ia64/kernel/kprobes.c index 89a70400c4f6..4de7f6759093 100644 --- a/arch/ia64/kernel/kprobes.c +++ b/arch/ia64/kernel/kprobes.c @@ -467,10 +467,6 @@ void __kprobes arch_disarm_kprobe(struct kprobe *p) flush_icache_range(arm_addr, arm_addr + sizeof(bundle_t)); } -void __kprobes arch_remove_kprobe(struct kprobe *p) -{ -} - /* * We are resuming execution after a single step fault, so the pt_regs * structure reflects the register state after we executed the instruction diff --git a/arch/powerpc/kernel/kprobes.c b/arch/powerpc/kernel/kprobes.c index 2cd32dd6898b..93444e32fccd 100644 --- a/arch/powerpc/kernel/kprobes.c +++ b/arch/powerpc/kernel/kprobes.c @@ -80,9 +80,11 @@ void __kprobes arch_disarm_kprobe(struct kprobe *p) (unsigned long) p->addr + sizeof(kprobe_opcode_t)); } -void __kprobes arch_remove_kprobe(struct kprobe *p) +void __kprobes arch_remove_kprobe(struct kprobe *p, struct semaphore *s) { + down(s); free_insn_slot(p->ainsn.insn); + up(s); } static inline void prepare_singlestep(struct kprobe *p, struct pt_regs *regs) diff --git a/arch/sparc64/kernel/kprobes.c b/arch/sparc64/kernel/kprobes.c index bbd5aa6818ea..ff5e9d5cad50 100644 --- a/arch/sparc64/kernel/kprobes.c +++ b/arch/sparc64/kernel/kprobes.c @@ -61,10 +61,6 @@ void __kprobes arch_disarm_kprobe(struct kprobe *p) flushi(p->addr); } -void __kprobes arch_remove_kprobe(struct kprobe *p) -{ -} - static inline void save_previous_kprobe(struct kprobe_ctlblk *kcb) { kcb->prev_kprobe.kp = kprobe_running(); diff --git a/arch/x86_64/kernel/kprobes.c b/arch/x86_64/kernel/kprobes.c index 128e18190f99..61a6a9369cbd 100644 --- a/arch/x86_64/kernel/kprobes.c +++ b/arch/x86_64/kernel/kprobes.c @@ -220,9 +220,11 @@ void __kprobes arch_disarm_kprobe(struct kprobe *p) (unsigned long) p->addr + sizeof(kprobe_opcode_t)); } -void __kprobes arch_remove_kprobe(struct kprobe *p) +void __kprobes arch_remove_kprobe(struct kprobe *p, struct semaphore *s) { + down(s); free_insn_slot(p->ainsn.insn); + up(s); } static inline void save_previous_kprobe(struct kprobe_ctlblk *kcb) diff --git a/include/asm-i386/kprobes.h b/include/asm-i386/kprobes.h index f9e150fa1d44..dc559267ce3e 100644 --- a/include/asm-i386/kprobes.h +++ b/include/asm-i386/kprobes.h @@ -40,6 +40,7 @@ typedef u8 kprobe_opcode_t; #define JPROBE_ENTRY(pentry) (kprobe_opcode_t *)pentry #define ARCH_SUPPORTS_KRETPROBES +#define arch_remove_kprobe(p, s) do { } while(0) void kretprobe_trampoline(void); diff --git a/include/asm-ia64/kprobes.h b/include/asm-ia64/kprobes.h index 12a0b93020da..698508f4a0cf 100644 --- a/include/asm-ia64/kprobes.h +++ b/include/asm-ia64/kprobes.h @@ -89,6 +89,7 @@ struct kprobe_ctlblk { #define IP_RELATIVE_PREDICT_OPCODE (7) #define LONG_BRANCH_OPCODE (0xC) #define LONG_CALL_OPCODE (0xD) +#define arch_remove_kprobe(p, s) do { } while(0) typedef struct kprobe_opcode { bundle_t bundle; diff --git a/include/asm-powerpc/kprobes.h b/include/asm-powerpc/kprobes.h index 42ece411435a..89dee13c2a4c 100644 --- a/include/asm-powerpc/kprobes.h +++ b/include/asm-powerpc/kprobes.h @@ -50,6 +50,7 @@ typedef unsigned int kprobe_opcode_t; #define ARCH_SUPPORTS_KRETPROBES void kretprobe_trampoline(void); +extern void arch_remove_kprobe(struct kprobe *p, struct semaphore *s); /* Architecture specific copy of original instruction */ struct arch_specific_insn { diff --git a/include/asm-sparc64/kprobes.h b/include/asm-sparc64/kprobes.h index 1b47e0d2ee93..27fbdcba724b 100644 --- a/include/asm-sparc64/kprobes.h +++ b/include/asm-sparc64/kprobes.h @@ -12,6 +12,7 @@ typedef u32 kprobe_opcode_t; #define MAX_INSN_SIZE 2 #define JPROBE_ENTRY(pentry) (kprobe_opcode_t *)pentry +#define arch_remove_kprobe(p, s) do { } while(0) /* Architecture specific copy of original instruction*/ struct arch_specific_insn { diff --git a/include/asm-x86_64/kprobes.h b/include/asm-x86_64/kprobes.h index 9e2532adf42c..3a19ad179220 100644 --- a/include/asm-x86_64/kprobes.h +++ b/include/asm-x86_64/kprobes.h @@ -78,6 +78,7 @@ static inline void restore_interrupts(struct pt_regs *regs) local_irq_enable(); } +extern void arch_remove_kprobe(struct kprobe *p, struct semaphore *s); extern int post_kprobe_handler(struct pt_regs *regs); extern int kprobe_fault_handler(struct pt_regs *regs, int trapnr); extern int kprobe_handler(struct pt_regs *regs); diff --git a/include/linux/kprobes.h b/include/linux/kprobes.h index ad6e4fe970fd..59bf240cdb9d 100644 --- a/include/linux/kprobes.h +++ b/include/linux/kprobes.h @@ -152,7 +152,6 @@ extern spinlock_t kretprobe_lock; extern int arch_prepare_kprobe(struct kprobe *p); extern void arch_arm_kprobe(struct kprobe *p); extern void arch_disarm_kprobe(struct kprobe *p); -extern void arch_remove_kprobe(struct kprobe *p); extern int arch_init_kprobes(void); extern void show_registers(struct pt_regs *regs); extern kprobe_opcode_t *get_insn_slot(void); diff --git a/kernel/kprobes.c b/kernel/kprobes.c index f1c0e61a2cb4..19c42cbf91a0 100644 --- a/kernel/kprobes.c +++ b/kernel/kprobes.c @@ -532,9 +532,7 @@ valid_p: list_del_rcu(&p->list); kfree(old_p); } - down(&kprobe_mutex); - arch_remove_kprobe(p); - up(&kprobe_mutex); + arch_remove_kprobe(p, &kprobe_mutex); } } -- cgit v1.2.3-71-gd317 From 0498b63504f818e5ab39c818cd6f7b41319a1187 Mon Sep 17 00:00:00 2001 From: Ananth N Mavinakayanahalli Date: Mon, 9 Jan 2006 20:52:46 -0800 Subject: [PATCH] kprobes: fix build breakage The following patch (against 2.6.15-rc5-mm3) fixes a kprobes build break due to changes introduced in the kprobe locking in 2.6.15-rc5-mm3. In addition, the patch reverts back the open-coding of kprobe_mutex. Signed-off-by: Ananth N Mavinakayanahalli Acked-by: Anil S Keshavamurthy Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- arch/powerpc/kernel/kprobes.c | 6 +++--- arch/x86_64/kernel/kprobes.c | 6 +++--- include/asm-i386/kprobes.h | 2 +- include/asm-ia64/kprobes.h | 2 +- include/asm-powerpc/kprobes.h | 3 ++- include/asm-sparc64/kprobes.h | 2 +- include/asm-x86_64/kprobes.h | 3 ++- include/linux/kprobes.h | 1 + kernel/kprobes.c | 4 ++-- 9 files changed, 16 insertions(+), 13 deletions(-) (limited to 'include/linux') diff --git a/arch/powerpc/kernel/kprobes.c b/arch/powerpc/kernel/kprobes.c index 93444e32fccd..27b0c40601fb 100644 --- a/arch/powerpc/kernel/kprobes.c +++ b/arch/powerpc/kernel/kprobes.c @@ -80,11 +80,11 @@ void __kprobes arch_disarm_kprobe(struct kprobe *p) (unsigned long) p->addr + sizeof(kprobe_opcode_t)); } -void __kprobes arch_remove_kprobe(struct kprobe *p, struct semaphore *s) +void __kprobes arch_remove_kprobe(struct kprobe *p) { - down(s); + down(&kprobe_mutex); free_insn_slot(p->ainsn.insn); - up(s); + up(&kprobe_mutex); } static inline void prepare_singlestep(struct kprobe *p, struct pt_regs *regs) diff --git a/arch/x86_64/kernel/kprobes.c b/arch/x86_64/kernel/kprobes.c index 61a6a9369cbd..b7dc1f816d13 100644 --- a/arch/x86_64/kernel/kprobes.c +++ b/arch/x86_64/kernel/kprobes.c @@ -220,11 +220,11 @@ void __kprobes arch_disarm_kprobe(struct kprobe *p) (unsigned long) p->addr + sizeof(kprobe_opcode_t)); } -void __kprobes arch_remove_kprobe(struct kprobe *p, struct semaphore *s) +void __kprobes arch_remove_kprobe(struct kprobe *p) { - down(s); + down(&kprobe_mutex); free_insn_slot(p->ainsn.insn); - up(s); + up(&kprobe_mutex); } static inline void save_previous_kprobe(struct kprobe_ctlblk *kcb) diff --git a/include/asm-i386/kprobes.h b/include/asm-i386/kprobes.h index dc559267ce3e..27cac050a60e 100644 --- a/include/asm-i386/kprobes.h +++ b/include/asm-i386/kprobes.h @@ -40,7 +40,7 @@ typedef u8 kprobe_opcode_t; #define JPROBE_ENTRY(pentry) (kprobe_opcode_t *)pentry #define ARCH_SUPPORTS_KRETPROBES -#define arch_remove_kprobe(p, s) do { } while(0) +#define arch_remove_kprobe(p) do {} while (0) void kretprobe_trampoline(void); diff --git a/include/asm-ia64/kprobes.h b/include/asm-ia64/kprobes.h index 698508f4a0cf..a74b68104559 100644 --- a/include/asm-ia64/kprobes.h +++ b/include/asm-ia64/kprobes.h @@ -89,7 +89,7 @@ struct kprobe_ctlblk { #define IP_RELATIVE_PREDICT_OPCODE (7) #define LONG_BRANCH_OPCODE (0xC) #define LONG_CALL_OPCODE (0xD) -#define arch_remove_kprobe(p, s) do { } while(0) +#define arch_remove_kprobe(p) do {} while (0) typedef struct kprobe_opcode { bundle_t bundle; diff --git a/include/asm-powerpc/kprobes.h b/include/asm-powerpc/kprobes.h index 89dee13c2a4c..f466bc804f41 100644 --- a/include/asm-powerpc/kprobes.h +++ b/include/asm-powerpc/kprobes.h @@ -33,6 +33,7 @@ #define __ARCH_WANT_KPROBES_INSN_SLOT struct pt_regs; +struct kprobe; typedef unsigned int kprobe_opcode_t; #define BREAKPOINT_INSTRUCTION 0x7fe00008 /* trap */ @@ -50,7 +51,7 @@ typedef unsigned int kprobe_opcode_t; #define ARCH_SUPPORTS_KRETPROBES void kretprobe_trampoline(void); -extern void arch_remove_kprobe(struct kprobe *p, struct semaphore *s); +extern void arch_remove_kprobe(struct kprobe *p); /* Architecture specific copy of original instruction */ struct arch_specific_insn { diff --git a/include/asm-sparc64/kprobes.h b/include/asm-sparc64/kprobes.h index 27fbdcba724b..e4efe652b54b 100644 --- a/include/asm-sparc64/kprobes.h +++ b/include/asm-sparc64/kprobes.h @@ -12,7 +12,7 @@ typedef u32 kprobe_opcode_t; #define MAX_INSN_SIZE 2 #define JPROBE_ENTRY(pentry) (kprobe_opcode_t *)pentry -#define arch_remove_kprobe(p, s) do { } while(0) +#define arch_remove_kprobe(p) do {} while (0) /* Architecture specific copy of original instruction*/ struct arch_specific_insn { diff --git a/include/asm-x86_64/kprobes.h b/include/asm-x86_64/kprobes.h index 3a19ad179220..98a1e95ddb98 100644 --- a/include/asm-x86_64/kprobes.h +++ b/include/asm-x86_64/kprobes.h @@ -30,6 +30,7 @@ #define __ARCH_WANT_KPROBES_INSN_SLOT struct pt_regs; +struct kprobe; typedef u8 kprobe_opcode_t; #define BREAKPOINT_INSTRUCTION 0xcc @@ -44,6 +45,7 @@ typedef u8 kprobe_opcode_t; #define ARCH_SUPPORTS_KRETPROBES void kretprobe_trampoline(void); +extern void arch_remove_kprobe(struct kprobe *p); /* Architecture specific copy of original instruction*/ struct arch_specific_insn { @@ -78,7 +80,6 @@ static inline void restore_interrupts(struct pt_regs *regs) local_irq_enable(); } -extern void arch_remove_kprobe(struct kprobe *p, struct semaphore *s); extern int post_kprobe_handler(struct pt_regs *regs); extern int kprobe_fault_handler(struct pt_regs *regs, int trapnr); extern int kprobe_handler(struct pt_regs *regs); diff --git a/include/linux/kprobes.h b/include/linux/kprobes.h index 59bf240cdb9d..10005bc92a31 100644 --- a/include/linux/kprobes.h +++ b/include/linux/kprobes.h @@ -149,6 +149,7 @@ struct kretprobe_instance { }; extern spinlock_t kretprobe_lock; +extern struct semaphore kprobe_mutex; extern int arch_prepare_kprobe(struct kprobe *p); extern void arch_arm_kprobe(struct kprobe *p); extern void arch_disarm_kprobe(struct kprobe *p); diff --git a/kernel/kprobes.c b/kernel/kprobes.c index 19c42cbf91a0..f24cbab558f1 100644 --- a/kernel/kprobes.c +++ b/kernel/kprobes.c @@ -48,7 +48,7 @@ static struct hlist_head kprobe_table[KPROBE_TABLE_SIZE]; static struct hlist_head kretprobe_inst_table[KPROBE_TABLE_SIZE]; -static DECLARE_MUTEX(kprobe_mutex); /* Protects kprobe_table */ +DECLARE_MUTEX(kprobe_mutex); /* Protects kprobe_table */ DEFINE_SPINLOCK(kretprobe_lock); /* Protects kretprobe_inst_table */ static DEFINE_PER_CPU(struct kprobe *, kprobe_instance) = NULL; @@ -532,7 +532,7 @@ valid_p: list_del_rcu(&p->list); kfree(old_p); } - arch_remove_kprobe(p, &kprobe_mutex); + arch_remove_kprobe(p); } } -- cgit v1.2.3-71-gd317 From 2b4f2f4b0132afa9f441171285cca354377bf5d0 Mon Sep 17 00:00:00 2001 From: "Antonino A. Daplas" Date: Mon, 9 Jan 2006 20:52:54 -0800 Subject: [PATCH] vesafb: Drop blank hook From: Bugzilla Bug 5351 "After resuming from S3 (suspended while in X), the LCD panel stays black . However, the laptop is up again, and I can SSH into it from another machine. I can get the panel working again, when I first direct video output to the CRT output of the laptop, and then back to LCD (done by repeatedly hitting Fn+F5 buttons on the Toshiba, which directs output to either LCD, CRT or TV) None of this ever happened with older kernels." This bug is due to the recently added vesafb_blank() method in vesafb. It works with CRT displays, but has a high incidence of problems in laptop users. Since CRT users don't really get that much benefit from hardware blanking, drop support for this. Signed-off-by: Antonino Daplas Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- arch/i386/boot/video.S | 5 ----- drivers/video/vesafb.c | 37 ------------------------------------- include/linux/screen_info.h | 3 +-- 3 files changed, 1 insertion(+), 44 deletions(-) (limited to 'include/linux') diff --git a/arch/i386/boot/video.S b/arch/i386/boot/video.S index 92f669470142..2ac40c8244c4 100644 --- a/arch/i386/boot/video.S +++ b/arch/i386/boot/video.S @@ -97,7 +97,6 @@ #define PARAM_VESAPM_OFF 0x30 #define PARAM_LFB_PAGES 0x32 #define PARAM_VESA_ATTRIB 0x34 -#define PARAM_CAPABILITIES 0x36 /* Define DO_STORE according to CONFIG_VIDEO_RETAIN */ #ifdef CONFIG_VIDEO_RETAIN @@ -234,10 +233,6 @@ mopar_gr: movw 18(%di), %ax movl %eax, %fs:(PARAM_LFB_SIZE) -# store mode capabilities - movl 10(%di), %eax - movl %eax, %fs:(PARAM_CAPABILITIES) - # switching the DAC to 8-bit is for <= 8 bpp only movw %fs:(PARAM_LFB_DEPTH), %ax cmpw $8, %ax diff --git a/drivers/video/vesafb.c b/drivers/video/vesafb.c index 3e58ddc2bc38..55e28ba57b43 100644 --- a/drivers/video/vesafb.c +++ b/drivers/video/vesafb.c @@ -57,7 +57,6 @@ static unsigned short *pmi_base = NULL; static void (*pmi_start)(void); static void (*pmi_pal)(void); static int depth; -static int vga_compat; /* --------------------------------------------------------------------- */ @@ -90,37 +89,6 @@ static int vesafb_pan_display(struct fb_var_screeninfo *var, return 0; } -static int vesafb_blank(int blank, struct fb_info *info) -{ - int err = 1; - - if (vga_compat) { - int loop = 10000; - u8 seq = 0, crtc17 = 0; - - if (blank == FB_BLANK_POWERDOWN) { - seq = 0x20; - crtc17 = 0x00; - err = 0; - } else { - seq = 0x00; - crtc17 = 0x80; - err = (blank == FB_BLANK_UNBLANK) ? 0 : -EINVAL; - } - - vga_wseq(NULL, 0x00, 0x01); - seq |= vga_rseq(NULL, 0x01) & ~0x20; - vga_wseq(NULL, 0x00, seq); - - crtc17 |= vga_rcrt(NULL, 0x17) & ~0x80; - while (loop--); - vga_wcrt(NULL, 0x17, crtc17); - vga_wseq(NULL, 0x00, 0x03); - } - - return err; -} - static void vesa_setpalette(int regno, unsigned red, unsigned green, unsigned blue) { @@ -205,7 +173,6 @@ static struct fb_ops vesafb_ops = { .owner = THIS_MODULE, .fb_setcolreg = vesafb_setcolreg, .fb_pan_display = vesafb_pan_display, - .fb_blank = vesafb_blank, .fb_fillrect = cfb_fillrect, .fb_copyarea = cfb_copyarea, .fb_imageblit = cfb_imageblit, @@ -459,10 +426,6 @@ static int __init vesafb_probe(struct platform_device *dev) info->flags = FBINFO_FLAG_DEFAULT | (ypan) ? FBINFO_HWACCEL_YPAN : 0; - vga_compat = (screen_info.capabilities & 2) ? 0 : 1; - printk("vesafb: Mode is %sVGA compatible\n", - (vga_compat) ? "" : "not "); - if (fb_alloc_cmap(&info->cmap, 256, 0) < 0) { err = -ENOMEM; goto err; diff --git a/include/linux/screen_info.h b/include/linux/screen_info.h index 76850b75b3f6..6336987dae62 100644 --- a/include/linux/screen_info.h +++ b/include/linux/screen_info.h @@ -41,8 +41,7 @@ struct screen_info { u16 vesapm_off; /* 0x30 */ u16 pages; /* 0x32 */ u16 vesa_attributes; /* 0x34 */ - u32 capabilities; /* 0x36 */ - /* 0x3a -- 0x3f reserved for future expansion */ + /* 0x36 -- 0x3f reserved for future expansion */ }; extern struct screen_info screen_info; -- cgit v1.2.3-71-gd317 From c549dc6422e4b720fed6702d70fddd8cee0f5c9a Mon Sep 17 00:00:00 2001 From: "Antonino A. Daplas" Date: Mon, 9 Jan 2006 20:53:33 -0800 Subject: [PATCH] nvidiafb: Add support for some pci-e chipsets Chipsets with PCI device ids & 0xf0 == 0x00f0 has their actual chipset type in offset 0x1800 of the mmio space. Add support for this. Signed-off-by: Antonino Daplas Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- drivers/video/nvidia/nvidia.c | 75 +++++++++++++++++++++++++++++++------------ include/linux/pci_ids.h | 5 +++ 2 files changed, 59 insertions(+), 21 deletions(-) (limited to 'include/linux') diff --git a/drivers/video/nvidia/nvidia.c b/drivers/video/nvidia/nvidia.c index dcf2e7a7d215..099d64af37be 100644 --- a/drivers/video/nvidia/nvidia.c +++ b/drivers/video/nvidia/nvidia.c @@ -284,6 +284,16 @@ static struct pci_device_id nvidiafb_pci_tbl[] = { PCI_ANY_ID, PCI_ANY_ID, 0, 0, 0}, {PCI_VENDOR_ID_NVIDIA, PCI_DEVICE_ID_NVIDIA_GEFORCE_6200, PCI_ANY_ID, PCI_ANY_ID, 0, 0, 0}, + {PCI_VENDOR_ID_NVIDIA, PCIE_DEVICE_ID_NVIDIA_GEFORCE_6800_ALT1, + PCI_ANY_ID, PCI_ANY_ID, 0, 0, 0}, + {PCI_VENDOR_ID_NVIDIA, PCIE_DEVICE_ID_NVIDIA_GEFORCE_6600_ALT1, + PCI_ANY_ID, PCI_ANY_ID, 0, 0, 0}, + {PCI_VENDOR_ID_NVIDIA, PCIE_DEVICE_ID_NVIDIA_GEFORCE_6600_ALT2, + PCI_ANY_ID, PCI_ANY_ID, 0, 0, 0}, + {PCI_VENDOR_ID_NVIDIA, PCIE_DEVICE_ID_NVIDIA_GEFORCE_6200_ALT1, + PCI_ANY_ID, PCI_ANY_ID, 0, 0, 0}, + {PCI_VENDOR_ID_NVIDIA, PCIE_DEVICE_ID_NVIDIA_GEFORCE_6800_GT, + PCI_ANY_ID, PCI_ANY_ID, 0, 0, 0}, {PCI_VENDOR_ID_NVIDIA, 0x0252, PCI_ANY_ID, PCI_ANY_ID, 0, 0, 0}, {PCI_VENDOR_ID_NVIDIA, 0x0313, @@ -1448,11 +1458,34 @@ static int __devinit nvidia_set_fbinfo(struct fb_info *info) return nvidiafb_check_var(&info->var, info); } -static u32 __devinit nvidia_get_arch(struct pci_dev *pd) +static u32 __devinit nvidia_get_chipset(struct fb_info *info) { + struct nvidia_par *par = info->par; + u32 id = (par->pci_dev->vendor << 16) | par->pci_dev->device; + + printk("nvidiafb: PCI id - %x\n", id); + if ((id & 0xfff0) == 0x00f0) { + /* pci-e */ + printk("nvidiafb: PCI-E card\n"); + id = NV_RD32(par->REGS, 0x1800); + + if ((id & 0x0000ffff) == 0x000010DE) + id = 0x10DE0000 | (id >> 16); + else if ((id & 0xffff0000) == 0xDE100000) /* wrong endian */ + id = 0x10DE0000 | ((id << 8) & 0x0000ff00) | + ((id >> 8) & 0x000000ff); + } + + printk("nvidiafb: Actual id - %x\n", id); + return id; +} + +static u32 __devinit nvidia_get_arch(struct fb_info *info) +{ + struct nvidia_par *par = info->par; u32 arch = 0; - switch (pd->device & 0x0ff0) { + switch (par->Chipset & 0x0ff0) { case 0x0100: /* GeForce 256 */ case 0x0110: /* GeForce2 MX */ case 0x0150: /* GeForce2 */ @@ -1535,18 +1568,6 @@ static int __devinit nvidiafb_probe(struct pci_dev *pd, goto err_out_request; } - par->Architecture = nvidia_get_arch(pd); - - par->Chipset = (pd->vendor << 16) | pd->device; - printk(KERN_INFO PFX "nVidia device/chipset %X\n", par->Chipset); - - if (par->Architecture == 0) { - printk(KERN_ERR PFX "unknown NV_ARCH\n"); - goto err_out_free_base0; - } - - sprintf(nvidiafb_fix.id, "NV%x", (pd->device & 0x0ff0) >> 4); - par->FlatPanel = flatpanel; if (flatpanel == 1) printk(KERN_INFO PFX "flatpanel support enabled\n"); @@ -1572,6 +1593,17 @@ static int __devinit nvidiafb_probe(struct pci_dev *pd, goto err_out_free_base0; } + par->Chipset = nvidia_get_chipset(info); + printk(KERN_INFO PFX "nVidia device/chipset %X\n", par->Chipset); + par->Architecture = nvidia_get_arch(info); + + if (par->Architecture == 0) { + printk(KERN_ERR PFX "unknown NV_ARCH\n"); + goto err_out_arch; + } + + sprintf(nvidiafb_fix.id, "NV%x", (pd->device & 0x0ff0) >> 4); + NVCommonSetup(info); par->FbAddress = nvidiafb_fix.smem_start; @@ -1647,21 +1679,22 @@ static int __devinit nvidiafb_probe(struct pci_dev *pd, NVTRACE_LEAVE(); return 0; - err_out_iounmap_fb: +err_out_iounmap_fb: iounmap(info->screen_base); - err_out_free_base1: +err_out_free_base1: fb_destroy_modedb(info->monspecs.modedb); nvidia_delete_i2c_busses(par); +err_out_arch: iounmap(par->REGS); - err_out_free_base0: +err_out_free_base0: pci_release_regions(pd); - err_out_request: +err_out_request: pci_disable_device(pd); - err_out_enable: +err_out_enable: kfree(info->pixmap.addr); - err_out_kfree: +err_out_kfree: framebuffer_release(info); - err_out: +err_out: return -ENODEV; } diff --git a/include/linux/pci_ids.h b/include/linux/pci_ids.h index 100eba0f4771..f55c98a68aa9 100644 --- a/include/linux/pci_ids.h +++ b/include/linux/pci_ids.h @@ -1050,6 +1050,11 @@ #define PCI_DEVICE_ID_NVIDIA_NVENET_6 0x00e6 #define PCI_DEVICE_ID_NVIDIA_CK8S_AUDIO 0x00ea #define PCI_DEVICE_ID_NVIDIA_NFORCE3S_SATA2 0x00ee +#define PCIE_DEVICE_ID_NVIDIA_GEFORCE_6800_ALT1 0x00f0 +#define PCIE_DEVICE_ID_NVIDIA_GEFORCE_6600_ALT1 0x00f1 +#define PCIE_DEVICE_ID_NVIDIA_GEFORCE_6600_ALT2 0x00f2 +#define PCIE_DEVICE_ID_NVIDIA_GEFORCE_6200_ALT1 0x00f3 +#define PCIE_DEVICE_ID_NVIDIA_GEFORCE_6800_GT 0x00f9 #define PCI_DEVICE_ID_NVIDIA_GEFORCE_SDR 0x0100 #define PCI_DEVICE_ID_NVIDIA_GEFORCE_DDR 0x0101 #define PCI_DEVICE_ID_NVIDIA_QUADRO 0x0103 -- cgit v1.2.3-71-gd317 From 0863afb32b77fc89c7110b3d10fb048cb56bb1b5 Mon Sep 17 00:00:00 2001 From: Martin Waitz Date: Mon, 9 Jan 2006 20:53:55 -0800 Subject: [PATCH] DocBook: fix kernel-doc comments Fix typos in comments to remove kernel-doc warnings. Signed-off-by: Martin Waitz Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- drivers/base/core.c | 4 ++-- include/linux/rio_drv.h | 4 ++-- include/sound/core.h | 3 +-- 3 files changed, 5 insertions(+), 6 deletions(-) (limited to 'include/linux') diff --git a/drivers/base/core.c b/drivers/base/core.c index fd8059920dbf..6b355bd7816d 100644 --- a/drivers/base/core.c +++ b/drivers/base/core.c @@ -161,8 +161,8 @@ static ssize_t store_uevent(struct device *dev, struct device_attribute *attr, return count; } -/** - * device_subsys - structure to be registered with kobject core. +/* + * devices_subsys - structure to be registered with kobject core. */ decl_subsys(devices, &ktype_device, &device_uevent_ops); diff --git a/include/linux/rio_drv.h b/include/linux/rio_drv.h index 157d7e3236b5..f54772d0e7f8 100644 --- a/include/linux/rio_drv.h +++ b/include/linux/rio_drv.h @@ -337,8 +337,8 @@ static inline void rio_init_dbell_res(struct resource *res, u16 start, u16 end) /** * RIO_DEVICE - macro used to describe a specific RIO device - * @vid: the 16 bit RIO vendor ID - * @did: the 16 bit RIO device ID + * @dev: the 16 bit RIO device ID + * @ven: the 16 bit RIO vendor ID * * This macro is used to create a struct rio_device_id that matches a * specific device. The assembly vendor and assembly device fields diff --git a/include/sound/core.h b/include/sound/core.h index 90ac6132ea3b..3093e3ddcf36 100644 --- a/include/sound/core.h +++ b/include/sound/core.h @@ -317,7 +317,7 @@ void snd_verbose_printd(const char *file, int line, const char *format, ...) #ifdef CONFIG_SND_VERBOSE_PRINTK /** * snd_printd - debug printk - * @format: format string + * @fmt: format string * * Compiled only when Works like snd_printk() for debugging purpose. * Ignored when CONFIG_SND_DEBUG is not set. @@ -331,7 +331,6 @@ void snd_verbose_printd(const char *file, int line, const char *format, ...) /** * snd_assert - run-time assertion macro * @expr: expression - * @args...: the action * * This macro checks the expression in run-time and invokes the commands * given in the rest arguments if the assertion is failed. -- cgit v1.2.3-71-gd317 From 87c2ce3b9305b9b723faeedf6e32ef703ec9b33a Mon Sep 17 00:00:00 2001 From: Adrian Bunk Date: Mon, 9 Jan 2006 20:54:07 -0800 Subject: [PATCH] lib/zlib*: cleanups This patch contains the following possible cleanups: - #if 0 the following unused functions: - zlib_deflate/deflate.c: zlib_deflateSetDictionary - zlib_deflate/deflate.c: zlib_deflateParams - zlib_deflate/deflate.c: zlib_deflateCopy - zlib_inflate/infblock.c: zlib_inflate_set_dictionary - zlib_inflate/infblock.c: zlib_inflate_blocks_sync_point - zlib_inflate/inflate_sync.c: zlib_inflateSync - zlib_inflate/inflate_sync.c: zlib_inflateSyncPoint - remove the following unneeded EXPORT_SYMBOL's: - zlib_deflate/deflate_syms.c: zlib_deflateCopy - zlib_deflate/deflate_syms.c: zlib_deflateParams - zlib_inflate/inflate_syms.c: zlib_inflateSync - zlib_inflate/inflate_syms.c: zlib_inflateSyncPoint Signed-off-by: Adrian Bunk Cc: Matt Mackall Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/zlib.h | 11 +++++++++++ lib/zlib_deflate/deflate.c | 6 ++++++ lib/zlib_deflate/deflate_syms.c | 2 -- lib/zlib_inflate/infblock.c | 4 ++++ lib/zlib_inflate/infblock.h | 4 ++++ lib/zlib_inflate/inflate_syms.c | 2 -- lib/zlib_inflate/inflate_sync.c | 4 ++++ 7 files changed, 29 insertions(+), 4 deletions(-) (limited to 'include/linux') diff --git a/include/linux/zlib.h b/include/linux/zlib.h index 74f7b78c22d2..4fa32f0d4df8 100644 --- a/include/linux/zlib.h +++ b/include/linux/zlib.h @@ -442,9 +442,11 @@ extern int deflateInit2 (z_streamp strm, not perform any compression: this will be done by deflate(). */ +#if 0 extern int zlib_deflateSetDictionary (z_streamp strm, const Byte *dictionary, uInt dictLength); +#endif /* Initializes the compression dictionary from the given byte sequence without producing any compressed output. This function must be called @@ -478,7 +480,10 @@ extern int zlib_deflateSetDictionary (z_streamp strm, perform any compression: this will be done by deflate(). */ +#if 0 extern int zlib_deflateCopy (z_streamp dest, z_streamp source); +#endif + /* Sets the destination stream as a complete copy of the source stream. @@ -511,7 +516,9 @@ static inline unsigned long deflateBound(unsigned long s) return s + ((s + 7) >> 3) + ((s + 63) >> 6) + 11; } +#if 0 extern int zlib_deflateParams (z_streamp strm, int level, int strategy); +#endif /* Dynamically update the compression level and compression strategy. The interpretation of level and strategy is as in deflateInit2. This can be @@ -571,7 +578,9 @@ extern int zlib_inflateSetDictionary (z_streamp strm, inflate(). */ +#if 0 extern int zlib_inflateSync (z_streamp strm); +#endif /* Skips invalid compressed data until a full flush point (see above the description of deflate with Z_FULL_FLUSH) can be found, or until all @@ -636,7 +645,9 @@ extern int zlib_inflateInit2_ (z_streamp strm, int windowBits, #endif extern const char * zlib_zError (int err); +#if 0 extern int zlib_inflateSyncPoint (z_streamp z); +#endif extern const uLong * zlib_get_crc_table (void); #endif /* _ZLIB_H */ diff --git a/lib/zlib_deflate/deflate.c b/lib/zlib_deflate/deflate.c index ad9a1bf4fc63..1653dd9bb01a 100644 --- a/lib/zlib_deflate/deflate.c +++ b/lib/zlib_deflate/deflate.c @@ -255,6 +255,7 @@ int zlib_deflateInit2_( } /* ========================================================================= */ +#if 0 int zlib_deflateSetDictionary( z_streamp strm, const Byte *dictionary, @@ -297,6 +298,7 @@ int zlib_deflateSetDictionary( if (hash_head) hash_head = 0; /* to make compiler happy */ return Z_OK; } +#endif /* 0 */ /* ========================================================================= */ int zlib_deflateReset( @@ -330,6 +332,7 @@ int zlib_deflateReset( } /* ========================================================================= */ +#if 0 int zlib_deflateParams( z_streamp strm, int level, @@ -365,6 +368,7 @@ int zlib_deflateParams( s->strategy = strategy; return err; } +#endif /* 0 */ /* ========================================================================= * Put a short in the pending buffer. The 16-bit value is put in MSB order. @@ -572,6 +576,7 @@ int zlib_deflateEnd( /* ========================================================================= * Copy the source state to the destination state. */ +#if 0 int zlib_deflateCopy ( z_streamp dest, z_streamp source @@ -624,6 +629,7 @@ int zlib_deflateCopy ( return Z_OK; #endif } +#endif /* 0 */ /* =========================================================================== * Read a new buffer from the current input stream, update the adler32 diff --git a/lib/zlib_deflate/deflate_syms.c b/lib/zlib_deflate/deflate_syms.c index 5985b28c8e30..767b573d1ef6 100644 --- a/lib/zlib_deflate/deflate_syms.c +++ b/lib/zlib_deflate/deflate_syms.c @@ -16,6 +16,4 @@ EXPORT_SYMBOL(zlib_deflateInit_); EXPORT_SYMBOL(zlib_deflateInit2_); EXPORT_SYMBOL(zlib_deflateEnd); EXPORT_SYMBOL(zlib_deflateReset); -EXPORT_SYMBOL(zlib_deflateCopy); -EXPORT_SYMBOL(zlib_deflateParams); MODULE_LICENSE("GPL"); diff --git a/lib/zlib_inflate/infblock.c b/lib/zlib_inflate/infblock.c index 50f21ca4ef7f..c16cdeff51aa 100644 --- a/lib/zlib_inflate/infblock.c +++ b/lib/zlib_inflate/infblock.c @@ -338,6 +338,7 @@ int zlib_inflate_blocks_free( } +#if 0 void zlib_inflate_set_dictionary( inflate_blocks_statef *s, const Byte *d, @@ -347,15 +348,18 @@ void zlib_inflate_set_dictionary( memcpy(s->window, d, n); s->read = s->write = s->window + n; } +#endif /* 0 */ /* Returns true if inflate is currently at the end of a block generated * by Z_SYNC_FLUSH or Z_FULL_FLUSH. * IN assertion: s != NULL */ +#if 0 int zlib_inflate_blocks_sync_point( inflate_blocks_statef *s ) { return s->mode == LENS; } +#endif /* 0 */ diff --git a/lib/zlib_inflate/infblock.h b/lib/zlib_inflate/infblock.h index f5221ddf6054..ceee60b5107c 100644 --- a/lib/zlib_inflate/infblock.h +++ b/lib/zlib_inflate/infblock.h @@ -33,12 +33,16 @@ extern int zlib_inflate_blocks_free ( inflate_blocks_statef *, z_streamp); +#if 0 extern void zlib_inflate_set_dictionary ( inflate_blocks_statef *s, const Byte *d, /* dictionary */ uInt n); /* dictionary length */ +#endif /* 0 */ +#if 0 extern int zlib_inflate_blocks_sync_point ( inflate_blocks_statef *s); +#endif /* 0 */ #endif /* _INFBLOCK_H */ diff --git a/lib/zlib_inflate/inflate_syms.c b/lib/zlib_inflate/inflate_syms.c index aa1b08189121..ef49738f57ec 100644 --- a/lib/zlib_inflate/inflate_syms.c +++ b/lib/zlib_inflate/inflate_syms.c @@ -15,8 +15,6 @@ EXPORT_SYMBOL(zlib_inflate); EXPORT_SYMBOL(zlib_inflateInit_); EXPORT_SYMBOL(zlib_inflateInit2_); EXPORT_SYMBOL(zlib_inflateEnd); -EXPORT_SYMBOL(zlib_inflateSync); EXPORT_SYMBOL(zlib_inflateReset); -EXPORT_SYMBOL(zlib_inflateSyncPoint); EXPORT_SYMBOL(zlib_inflateIncomp); MODULE_LICENSE("GPL"); diff --git a/lib/zlib_inflate/inflate_sync.c b/lib/zlib_inflate/inflate_sync.c index e07bdb21f55c..61411ff89d61 100644 --- a/lib/zlib_inflate/inflate_sync.c +++ b/lib/zlib_inflate/inflate_sync.c @@ -7,6 +7,7 @@ #include "infblock.h" #include "infutil.h" +#if 0 int zlib_inflateSync( z_streamp z ) @@ -57,6 +58,7 @@ int zlib_inflateSync( z->state->mode = BLOCKS; return Z_OK; } +#endif /* 0 */ /* Returns true if inflate is currently at the end of a block generated @@ -66,6 +68,7 @@ int zlib_inflateSync( * decompressing, PPP checks that at the end of input packet, inflate is * waiting for these length bytes. */ +#if 0 int zlib_inflateSyncPoint( z_streamp z ) @@ -74,6 +77,7 @@ int zlib_inflateSyncPoint( return Z_STREAM_ERROR; return zlib_inflate_blocks_sync_point(z->state->blocks); } +#endif /* 0 */ /* * This subroutine adds the data at next_in/avail_in to the output history -- cgit v1.2.3-71-gd317 From 33f0f88f1c51ae5c2d593d26960c760ea154c2e2 Mon Sep 17 00:00:00 2001 From: Alan Cox Date: Mon, 9 Jan 2006 20:54:13 -0800 Subject: [PATCH] TTY layer buffering revamp The API and code have been through various bits of initial review by serial driver people but they definitely need to live somewhere for a while so the unconverted drivers can get knocked into shape, existing drivers that have been updated can be better tuned and bugs whacked out. This replaces the tty flip buffers with kmalloc objects in rings. In the normal situation for an IRQ driven serial port at typical speeds the behaviour is pretty much the same, two buffers end up allocated and the kernel cycles between them as before. When there are delays or at high speed we now behave far better as the buffer pool can grow a bit rather than lose characters. This also means that we can operate at higher speeds reliably. For drivers that receive characters in blocks (DMA based, USB and especially virtualisation) the layer allows a lot of driver specific code that works around the tty layer with private secondary queues to be removed. The IBM folks need this sort of layer, the smart serial port people do, the virtualisers do (because a virtualised tty typically operates at infinite speed rather than emulating 9600 baud). Finally many drivers had invalid and unsafe attempts to avoid buffer overflows by directly invoking tty methods extracted out of the innards of work queue structs. These are no longer needed and all go away. That fixes various random hangs with serial ports on overflow. The other change in here is to optimise the receive_room path that is used by some callers. It turns out that only one ldisc uses receive room except asa constant and it updates it far far less than the value is read. We thus make it a variable not a function call. I expect the code to contain bugs due to the size alone but I'll be watching and squashing them and feeding out new patches as it goes. Because the buffers now dynamically expand you should only run out of buffering when the kernel runs out of memory for real. That means a lot of the horrible hacks high performance drivers used to do just aren't needed any more. Description: tty_insert_flip_char is an old API and continues to work as before, as does tty_flip_buffer_push() [this is why many drivers dont need modification]. It does now also return the number of chars inserted There are also tty_buffer_request_room(tty, len) which asks for a buffer block of the length requested and returns the space found. This improves efficiency with hardware that knows how much to transfer. and tty_insert_flip_string_flags(tty, str, flags, len) to insert a string of characters and flags For a smart interface the usual code is len = tty_request_buffer_room(tty, amount_hardware_says); tty_insert_flip_string(tty, buffer_from_card, len); More description! At the moment tty buffers are attached directly to the tty. This is causing a lot of the problems related to tty layer locking, also problems at high speed and also with bursty data (such as occurs in virtualised environments) I'm working on ripping out the flip buffers and replacing them with a pool of dynamically allocated buffers. This allows both for old style "byte I/O" devices and also helps virtualisation and smart devices where large blocks of data suddenely materialise and need storing. So far so good. Lots of drivers reference tty->flip.*. Several of them also call directly and unsafely into function pointers it provides. This will all break. Most drivers can use tty_insert_flip_char which can be kept as an API but others need more. At the moment I've added the following interfaces, if people think more will be needed now is a good time to say int tty_buffer_request_room(tty, size) Try and ensure at least size bytes are available, returns actual room (may be zero). At the moment it just uses the flipbuf space but that will change. Repeated calls without characters being added are not cumulative. (ie if you call it with 1, 1, 1, and then 4 you'll have four characters of space. The other functions will also try and grow buffers in future but this will be a more efficient way when you know block sizes. int tty_insert_flip_char(tty, ch, flag) As before insert a character if there is room. Now returns 1 for success, 0 for failure. int tty_insert_flip_string(tty, str, len) Insert a block of non error characters. Returns the number inserted. int tty_prepare_flip_string(tty, strptr, len) Adjust the buffer to allow len characters to be added. Returns a buffer pointer in strptr and the length available. This allows for hardware that needs to use functions like insl or mencpy_fromio. Signed-off-by: Alan Cox Cc: Paul Fulghum Signed-off-by: Hirokazu Takata Signed-off-by: Serge Hallyn Signed-off-by: Jeff Dike Signed-off-by: John Hawkes Signed-off-by: Martin Schwidefsky Signed-off-by: Adrian Bunk Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- arch/um/drivers/chan_kern.c | 6 +- drivers/bluetooth/hci_ldisc.c | 16 +- drivers/char/Kconfig | 6 +- drivers/char/amiserial.c | 33 ++--- drivers/char/cyclades.c | 89 ++++-------- drivers/char/epca.c | 17 +-- drivers/char/esp.c | 67 ++++----- drivers/char/hvc_console.c | 6 +- drivers/char/hvcs.c | 10 +- drivers/char/isicom.c | 24 ++- drivers/char/istallion.c | 26 ++-- drivers/char/moxa.c | 73 ++++------ drivers/char/mxser.c | 2 +- drivers/char/n_hdlc.c | 18 +-- drivers/char/n_r3964.c | 10 +- drivers/char/n_tty.c | 66 +++++---- drivers/char/pcmcia/synclink_cs.c | 28 ++-- drivers/char/pty.c | 4 +- drivers/char/rio/riointr.c | 13 +- drivers/char/riscom8.c | 39 ++--- drivers/char/rocket.c | 19 +-- drivers/char/selection.c | 5 +- drivers/char/ser_a2232.c | 8 +- drivers/char/serial167.c | 35 ++--- drivers/char/specialix.c | 38 ++--- drivers/char/stallion.c | 50 ++----- drivers/char/sx.c | 13 +- drivers/char/synclink.c | 43 ++---- drivers/char/synclink_gt.c | 35 +++-- drivers/char/synclinkmp.c | 34 ++--- drivers/char/tty_io.c | 266 ++++++++++++++++++++++++++++++---- drivers/char/viocons.c | 3 +- drivers/char/vme_scc.c | 16 +- drivers/input/serio/serport.c | 13 +- drivers/isdn/capi/capi.c | 3 +- drivers/isdn/i4l/isdn_common.c | 112 ++++++++++++++ drivers/isdn/i4l/isdn_common.h | 1 + drivers/isdn/i4l/isdn_tty.c | 75 +++++----- drivers/net/hamradio/6pack.c | 7 +- drivers/net/hamradio/mkiss.c | 7 +- drivers/net/irda/irtty-sir.c | 18 +-- drivers/net/ppp_async.c | 9 +- drivers/net/ppp_synctty.c | 9 +- drivers/net/slip.c | 11 +- drivers/net/wan/pc300_tty.c | 2 +- drivers/net/wan/x25_asy.c | 7 +- drivers/net/wireless/strip.c | 10 +- drivers/s390/char/con3215.c | 25 +--- drivers/s390/char/sclp_tty.c | 21 +-- drivers/s390/char/sclp_vt220.c | 12 +- drivers/s390/net/ctctty.c | 28 +--- drivers/serial/21285.c | 9 -- drivers/serial/68328serial.c | 18 +-- drivers/serial/68360serial.c | 54 ++----- drivers/serial/8250.c | 13 -- drivers/serial/amba-pl010.c | 9 -- drivers/serial/amba-pl011.c | 9 -- drivers/serial/au1x00_uart.c | 31 ++-- drivers/serial/clps711x.c | 2 - drivers/serial/dz.c | 2 - drivers/serial/icom.c | 57 +++----- drivers/serial/imx.c | 3 - drivers/serial/ioc4_serial.c | 10 +- drivers/serial/ip22zilog.c | 34 ++--- drivers/serial/m32r_sio.c | 31 ++-- drivers/serial/mcfserial.c | 23 +-- drivers/serial/mpc52xx_uart.c | 46 ++---- drivers/serial/mpsc.c | 6 +- drivers/serial/mux.c | 9 +- drivers/serial/pmac_zilog.c | 39 +---- drivers/serial/pxa.c | 8 - drivers/serial/s3c2410.c | 10 -- drivers/serial/sa1100.c | 2 - drivers/serial/serial_lh7a40x.c | 9 -- drivers/serial/serial_txx9.c | 11 -- drivers/serial/sh-sci.c | 81 ++++------- drivers/serial/sn_console.c | 6 +- drivers/serial/sunsab.c | 38 ++--- drivers/serial/sunsu.c | 32 ++-- drivers/serial/sunzilog.c | 34 +---- drivers/serial/vr41xx_siu.c | 5 - drivers/usb/class/cdc-acm.c | 11 +- drivers/usb/gadget/serial.c | 19 +-- drivers/usb/serial/Kconfig | 2 +- drivers/usb/serial/cyberjack.c | 11 +- drivers/usb/serial/cypress_m8.c | 4 +- drivers/usb/serial/digi_acceleport.c | 28 ++-- drivers/usb/serial/empeg.c | 16 +- drivers/usb/serial/ftdi_sio.c | 15 +- drivers/usb/serial/garmin_gps.c | 13 +- drivers/usb/serial/generic.c | 11 +- drivers/usb/serial/io_edgeport.c | 20 +-- drivers/usb/serial/io_ti.c | 20 +-- drivers/usb/serial/ipaq.c | 12 +- drivers/usb/serial/ipw.c | 11 +- drivers/usb/serial/kl5kusb105.c | 13 +- drivers/usb/serial/kobil_sct.c | 11 +- drivers/usb/serial/option.c | 9 +- drivers/usb/serial/pl2303.c | 8 +- drivers/usb/serial/ti_usb_3410_5052.c | 20 +-- drivers/usb/serial/visor.c | 11 +- drivers/usb/serial/whiteheat.c | 11 +- include/linux/kbd_kern.h | 2 +- include/linux/tty.h | 25 ++-- include/linux/tty_flip.h | 20 ++- include/linux/tty_ldisc.h | 9 -- net/bluetooth/rfcomm/tty.c | 9 +- 107 files changed, 1005 insertions(+), 1465 deletions(-) (limited to 'include/linux') diff --git a/arch/um/drivers/chan_kern.c b/arch/um/drivers/chan_kern.c index cd13b91b9ff6..ab0d0b170816 100644 --- a/arch/um/drivers/chan_kern.c +++ b/arch/um/drivers/chan_kern.c @@ -186,9 +186,6 @@ static void tty_receive_char(struct tty_struct *tty, char ch) } } - if((tty->flip.flag_buf_ptr == NULL) || - (tty->flip.char_buf_ptr == NULL)) - return; tty_insert_flip_char(tty, ch, TTY_NORMAL); } @@ -653,8 +650,7 @@ void chan_interrupt(struct list_head *chans, struct work_struct *task, chan = list_entry(ele, struct chan, list); if(!chan->input || (chan->ops->read == NULL)) continue; do { - if((tty != NULL) && - (tty->flip.count >= TTY_FLIPBUF_SIZE)){ + if (tty && !tty_buffer_request_room(tty, 1)) { schedule_delayed_work(task, 1); goto out; } diff --git a/drivers/bluetooth/hci_ldisc.c b/drivers/bluetooth/hci_ldisc.c index 573ff6c1be5f..613673b12fa6 100644 --- a/drivers/bluetooth/hci_ldisc.c +++ b/drivers/bluetooth/hci_ldisc.c @@ -279,6 +279,7 @@ static int hci_uart_tty_open(struct tty_struct *tty) tty->disc_data = hu; hu->tty = tty; + tty->receive_room = 65536; spin_lock_init(&hu->rx_lock); @@ -348,20 +349,6 @@ static void hci_uart_tty_wakeup(struct tty_struct *tty) hci_uart_tx_wakeup(hu); } -/* hci_uart_tty_room() - * - * Callback function from tty driver. Return the amount of - * space left in the receiver's buffer to decide if remote - * transmitter is to be throttled. - * - * Arguments: tty pointer to associated tty instance data - * Return Value: number of bytes left in receive buffer - */ -static int hci_uart_tty_room (struct tty_struct *tty) -{ - return 65536; -} - /* hci_uart_tty_receive() * * Called by tty low level driver when receive data is @@ -544,7 +531,6 @@ static int __init hci_uart_init(void) hci_uart_ldisc.write = hci_uart_tty_write; hci_uart_ldisc.ioctl = hci_uart_tty_ioctl; hci_uart_ldisc.poll = hci_uart_tty_poll; - hci_uart_ldisc.receive_room = hci_uart_tty_room; hci_uart_ldisc.receive_buf = hci_uart_tty_receive; hci_uart_ldisc.write_wakeup = hci_uart_tty_wakeup; hci_uart_ldisc.owner = THIS_MODULE; diff --git a/drivers/char/Kconfig b/drivers/char/Kconfig index 977a74e16efb..d6fcd0a36f9f 100644 --- a/drivers/char/Kconfig +++ b/drivers/char/Kconfig @@ -80,7 +80,7 @@ config SERIAL_NONSTANDARD config COMPUTONE tristate "Computone IntelliPort Plus serial support" - depends on SERIAL_NONSTANDARD && BROKEN_ON_SMP + depends on SERIAL_NONSTANDARD ---help--- This driver supports the entire family of Intelliport II/Plus controllers with the exception of the MicroChannel controllers and @@ -153,7 +153,7 @@ config DIGIEPCA config ESPSERIAL tristate "Hayes ESP serial port support" - depends on SERIAL_NONSTANDARD && ISA && BROKEN_ON_SMP && ISA_DMA_API + depends on SERIAL_NONSTANDARD && ISA && ISA_DMA_API help This is a driver which supports Hayes ESP serial ports. Both single port cards and multiport cards are supported. Make sure to read @@ -166,7 +166,7 @@ config ESPSERIAL config MOXA_INTELLIO tristate "Moxa Intellio support" - depends on SERIAL_NONSTANDARD && BROKEN_ON_SMP + depends on SERIAL_NONSTANDARD help Say Y here if you have a Moxa Intellio multiport serial card. diff --git a/drivers/char/amiserial.c b/drivers/char/amiserial.c index 10c81ecdace8..869518e4035f 100644 --- a/drivers/char/amiserial.c +++ b/drivers/char/amiserial.c @@ -265,8 +265,9 @@ static _INLINE_ void receive_chars(struct async_struct *info) int status; int serdatr; struct tty_struct *tty = info->tty; - unsigned char ch; + unsigned char ch, flag; struct async_icount *icount; + int oe = 0; icount = &info->state->icount; @@ -282,15 +283,12 @@ static _INLINE_ void receive_chars(struct async_struct *info) status |= UART_LSR_OE; ch = serdatr & 0xff; - if (tty->flip.count >= TTY_FLIPBUF_SIZE) - goto ignore_char; - *tty->flip.char_buf_ptr = ch; icount->rx++; #ifdef SERIAL_DEBUG_INTR printk("DR%02x:%02x...", ch, status); #endif - *tty->flip.flag_buf_ptr = 0; + flag = TTY_NORMAL; /* * We don't handle parity or frame errors - but I have left @@ -319,7 +317,7 @@ static _INLINE_ void receive_chars(struct async_struct *info) * should be ignored. */ if (status & info->ignore_status_mask) - goto ignore_char; + goto out; status &= info->read_status_mask; @@ -327,33 +325,28 @@ static _INLINE_ void receive_chars(struct async_struct *info) #ifdef SERIAL_DEBUG_INTR printk("handling break...."); #endif - *tty->flip.flag_buf_ptr = TTY_BREAK; + flag = TTY_BREAK; if (info->flags & ASYNC_SAK) do_SAK(tty); } else if (status & UART_LSR_PE) - *tty->flip.flag_buf_ptr = TTY_PARITY; + flag = TTY_PARITY; else if (status & UART_LSR_FE) - *tty->flip.flag_buf_ptr = TTY_FRAME; + flag = TTY_FRAME; if (status & UART_LSR_OE) { /* * Overrun is special, since it's * reported immediately, and doesn't * affect the current character */ - if (tty->flip.count < TTY_FLIPBUF_SIZE) { - tty->flip.count++; - tty->flip.flag_buf_ptr++; - tty->flip.char_buf_ptr++; - *tty->flip.flag_buf_ptr = TTY_OVERRUN; - } + oe = 1; } } - tty->flip.flag_buf_ptr++; - tty->flip.char_buf_ptr++; - tty->flip.count++; - ignore_char: - + tty_insert_flip_char(tty, ch, flag); + if (oe == 1) + tty_insert_flip_char(tty, 0, TTY_OVERRUN); tty_flip_buffer_push(tty); +out: + return; } static _INLINE_ void transmit_chars(struct async_struct *info) diff --git a/drivers/char/cyclades.c b/drivers/char/cyclades.c index c8e7daecad72..39c61a71176e 100644 --- a/drivers/char/cyclades.c +++ b/drivers/char/cyclades.c @@ -641,6 +641,7 @@ static char rcsid[] = #include #include #include +#include #include #include #include @@ -1086,7 +1087,7 @@ cyy_interrupt(int irq, void *dev_id, struct pt_regs *regs) int had_work; int mdm_change; int mdm_status; - + int len; if((cinfo = (struct cyclades_card *)dev_id) == 0){ #ifdef CY_DEBUG_INTERRUPTS printk("cyy_interrupt: spurious interrupt %d\n\r", irq); @@ -1163,63 +1164,43 @@ cyy_interrupt(int irq, void *dev_id, struct pt_regs *regs) info->icount.rx++; continue; } - if (tty->flip.count < TTY_FLIPBUF_SIZE){ - tty->flip.count++; + if (tty_buffer_request_room(tty, 1)) { if (data & info->read_status_mask){ if(data & CyBREAK){ - *tty->flip.flag_buf_ptr++ = - TTY_BREAK; - *tty->flip.char_buf_ptr++ = - cy_readb(base_addr+(CyRDSR<icount.rx++; if (info->flags & ASYNC_SAK){ do_SAK(tty); } }else if(data & CyFRAME){ - *tty->flip.flag_buf_ptr++ = - TTY_FRAME; - *tty->flip.char_buf_ptr++ = - cy_readb(base_addr+(CyRDSR<icount.rx++; info->idle_stats.frame_errs++; }else if(data & CyPARITY){ - *tty->flip.flag_buf_ptr++ = - TTY_PARITY; - *tty->flip.char_buf_ptr++ = - cy_readb(base_addr+(CyRDSR<icount.rx++; info->idle_stats.parity_errs++; }else if(data & CyOVERRUN){ - *tty->flip.flag_buf_ptr++ = - TTY_OVERRUN; - *tty->flip.char_buf_ptr++ = 0; + tty_insert_flip_char(tty, 0, TTY_OVERRUN); info->icount.rx++; /* If the flip buffer itself is overflowing, we still lose the next incoming character. */ - if(tty->flip.count - < TTY_FLIPBUF_SIZE){ - tty->flip.count++; - *tty->flip.flag_buf_ptr++ = - TTY_NORMAL; - *tty->flip.char_buf_ptr++ = - cy_readb(base_addr+(CyRDSR<icount.rx++; - } + tty_insert_flip_char(tty, cy_readb(base_addr+(CyRDSR<icount.rx++; info->idle_stats.overruns++; /* These two conditions may imply */ /* a normal read should be done. */ /* }else if(data & CyTIMEOUT){ */ /* }else if(data & CySPECHAR){ */ - }else{ - *tty->flip.flag_buf_ptr++ = 0; - *tty->flip.char_buf_ptr++ = 0; - info->icount.rx++; + }else { + tty_insert_flip_char(tty, 0, TTY_NORMAL); + info->icount.rx++; } }else{ - *tty->flip.flag_buf_ptr++ = 0; - *tty->flip.char_buf_ptr++ = 0; + tty_insert_flip_char(tty, 0, TTY_NORMAL); info->icount.rx++; } }else{ @@ -1240,14 +1221,10 @@ cyy_interrupt(int irq, void *dev_id, struct pt_regs *regs) info->mon.char_max = char_count; info->mon.char_last = char_count; #endif - while(char_count--){ - if (tty->flip.count >= TTY_FLIPBUF_SIZE){ - break; - } - tty->flip.count++; + len = tty_buffer_request_room(tty, char_count); + while(len--){ data = cy_readb(base_addr+(CyRDSR<flip.flag_buf_ptr++ = TTY_NORMAL; - *tty->flip.char_buf_ptr++ = data; + tty_insert_flip_char(tty, data, TTY_NORMAL); info->idle_stats.recv_bytes++; info->icount.rx++; #ifdef CY_16Y_HACK @@ -1256,7 +1233,7 @@ cyy_interrupt(int irq, void *dev_id, struct pt_regs *regs) } info->idle_stats.recv_idle = jiffies; } - schedule_delayed_work(&tty->flip.work, 1); + schedule_delayed_work(&tty->buf.work, 1); } /* end of service */ cy_writeb(base_addr+(CyRIR<card]; struct tty_struct *tty = info->tty; volatile int char_count; + int len; #ifdef BLOCKMOVE int small_count; #else @@ -1606,18 +1584,11 @@ cyz_handle_rx(struct cyclades_port *info, tty->flip.count += small_count; } #else - while(char_count--){ - if (tty->flip.count >= N_TTY_BUF_SIZE - tty->read_cnt) - break; - - if (tty->flip.count >= TTY_FLIPBUF_SIZE) - break; - + len = tty_buffer_request_room(tty, char_count); + while(len--){ data = cy_readb(cinfo->base_addr + rx_bufaddr + new_rx_get); new_rx_get = (new_rx_get + 1) & (rx_bufsize - 1); - tty->flip.count++; - *tty->flip.flag_buf_ptr++ = TTY_NORMAL; - *tty->flip.char_buf_ptr++ = data; + tty_insert_flip_char(tty, data, TTY_NORMAL); info->idle_stats.recv_bytes++; info->icount.rx++; } @@ -1635,7 +1606,7 @@ cyz_handle_rx(struct cyclades_port *info, } #endif info->idle_stats.recv_idle = jiffies; - schedule_delayed_work(&tty->flip.work, 1); + schedule_delayed_work(&tty->buf.work, 1); } /* Update rx_get */ cy_writel(&buf_ctrl->rx_get, new_rx_get); @@ -1763,23 +1734,17 @@ cyz_handle_cmd(struct cyclades_card *cinfo) switch(cmd) { case C_CM_PR_ERROR: - tty->flip.count++; - *tty->flip.flag_buf_ptr++ = TTY_PARITY; - *tty->flip.char_buf_ptr++ = 0; + tty_insert_flip_char(tty, 0, TTY_PARITY); info->icount.rx++; special_count++; break; case C_CM_FR_ERROR: - tty->flip.count++; - *tty->flip.flag_buf_ptr++ = TTY_FRAME; - *tty->flip.char_buf_ptr++ = 0; + tty_insert_flip_char(tty, 0, TTY_FRAME); info->icount.rx++; special_count++; break; case C_CM_RXBRK: - tty->flip.count++; - *tty->flip.flag_buf_ptr++ = TTY_BREAK; - *tty->flip.char_buf_ptr++ = 0; + tty_insert_flip_char(tty, 0, TTY_BREAK); info->icount.rx++; special_count++; break; @@ -1844,7 +1809,7 @@ cyz_handle_cmd(struct cyclades_card *cinfo) if(delta_count) cy_sched_event(info, Cy_EVENT_DELTA_WAKEUP); if(special_count) - schedule_delayed_work(&tty->flip.work, 1); + schedule_delayed_work(&tty->buf.work, 1); } } diff --git a/drivers/char/epca.c b/drivers/char/epca.c index 407708a001e4..765c5c108bf4 100644 --- a/drivers/char/epca.c +++ b/drivers/char/epca.c @@ -1786,9 +1786,7 @@ static void doevent(int crd) if (tty) { /* Begin if valid tty */ if (event & BREAK_IND) { /* Begin if BREAK_IND */ /* A break has been indicated */ - tty->flip.count++; - *tty->flip.flag_buf_ptr++ = TTY_BREAK; - *tty->flip.char_buf_ptr++ = 0; + tty_insert_flip_char(tty, 0, TTY_BREAK); tty_schedule_flip(tty); } else if (event & LOWTX_IND) { /* Begin LOWTX_IND */ if (ch->statusflags & LOWWAIT) @@ -2124,7 +2122,6 @@ static void receive_data(struct channel *ch) int dataToRead, wrapgap, bytesAvailable; unsigned int tail, head; unsigned int wrapmask; - int rc; /* --------------------------------------------------------------- This routine is called by doint when a receive data event @@ -2162,16 +2159,15 @@ static void receive_data(struct channel *ch) return; } - if (tty->flip.count == TTY_FLIPBUF_SIZE) + if (tty_buffer_request_room(tty, bytesAvailable + 1) == 0) return; if (readb(&bc->orun)) { writeb(0, &bc->orun); printk(KERN_WARNING "epca; overrun! DigiBoard device %s\n",tty->name); + tty_insert_flip_char(tty, 0, TTY_OVERRUN); } rxwinon(ch); - rptr = tty->flip.char_buf_ptr; - rc = tty->flip.count; while (bytesAvailable > 0) { /* Begin while there is data on the card */ wrapgap = (head >= tail) ? head - tail : ch->rxbufsize - tail; /* --------------------------------------------------------------- @@ -2183,8 +2179,7 @@ static void receive_data(struct channel *ch) /* -------------------------------------------------------------- Make sure we don't overflow the buffer ----------------------------------------------------------------- */ - if ((rc + dataToRead) > TTY_FLIPBUF_SIZE) - dataToRead = TTY_FLIPBUF_SIZE - rc; + dataToRead = tty_prepare_flip_string(tty, &rptr, dataToRead); if (dataToRead == 0) break; /* --------------------------------------------------------------- @@ -2192,13 +2187,9 @@ static void receive_data(struct channel *ch) for translation if necessary. ------------------------------------------------------------------ */ memcpy_fromio(rptr, ch->rxptr + tail, dataToRead); - rc += dataToRead; - rptr += dataToRead; tail = (tail + dataToRead) & wrapmask; bytesAvailable -= dataToRead; } /* End while there is data on the card */ - tty->flip.count = rc; - tty->flip.char_buf_ptr = rptr; globalwinon(ch); writew(tail, &bc->rout); /* Must be called with global data */ diff --git a/drivers/char/esp.c b/drivers/char/esp.c index 9f53d2fcc360..e469f641c728 100644 --- a/drivers/char/esp.c +++ b/drivers/char/esp.c @@ -345,26 +345,22 @@ static inline void receive_chars_pio(struct esp_struct *info, int num_bytes) for (i = 0; i < num_bytes; i++) { if (!(err_buf->data[i] & status_mask)) { - *(tty->flip.char_buf_ptr++) = pio_buf->data[i]; + int flag = 0; if (err_buf->data[i] & 0x04) { - *(tty->flip.flag_buf_ptr++) = TTY_BREAK; - + flag = TTY_BREAK; if (info->flags & ASYNC_SAK) do_SAK(tty); } else if (err_buf->data[i] & 0x02) - *(tty->flip.flag_buf_ptr++) = TTY_FRAME; + flag = TTY_FRAME; else if (err_buf->data[i] & 0x01) - *(tty->flip.flag_buf_ptr++) = TTY_PARITY; - else - *(tty->flip.flag_buf_ptr++) = 0; - - tty->flip.count++; + flag = TTY_PARITY; + tty_insert_flip_char(tty, pio_buf->data[i], flag); } } - schedule_delayed_work(&tty->flip.work, 1); + schedule_delayed_work(&tty->buf.work, 1); info->stat_flags &= ~ESP_STAT_RX_TIMEOUT; release_pio_buffer(pio_buf); @@ -397,7 +393,6 @@ static inline void receive_chars_dma_done(struct esp_struct *info, int num_bytes; unsigned long flags; - flags=claim_dma_lock(); disable_dma(dma); clear_dma_ff(dma); @@ -408,38 +403,31 @@ static inline void receive_chars_dma_done(struct esp_struct *info, info->icount.rx += num_bytes; - memcpy(tty->flip.char_buf_ptr, dma_buffer, num_bytes); - tty->flip.char_buf_ptr += num_bytes; - tty->flip.count += num_bytes; - memset(tty->flip.flag_buf_ptr, 0, num_bytes); - tty->flip.flag_buf_ptr += num_bytes; - if (num_bytes > 0) { - tty->flip.flag_buf_ptr--; + tty_insert_flip_string(tty, dma_buffer, num_bytes - 1); status &= (0x1c & info->read_status_mask); + + /* Is the status significant or do we throw the last byte ? */ + if (!(status & info->ignore_status_mask)) { + int statflag = 0; - if (status & info->ignore_status_mask) { - tty->flip.count--; - tty->flip.char_buf_ptr--; - tty->flip.flag_buf_ptr--; - } else if (status & 0x10) { - *tty->flip.flag_buf_ptr = TTY_BREAK; - (info->icount.brk)++; - if (info->flags & ASYNC_SAK) - do_SAK(tty); - } else if (status & 0x08) { - *tty->flip.flag_buf_ptr = TTY_FRAME; - (info->icount.frame)++; - } - else if (status & 0x04) { - *tty->flip.flag_buf_ptr = TTY_PARITY; - (info->icount.parity)++; + if (status & 0x10) { + statflag = TTY_BREAK; + (info->icount.brk)++; + if (info->flags & ASYNC_SAK) + do_SAK(tty); + } else if (status & 0x08) { + statflag = TTY_FRAME; + (info->icount.frame)++; + } + else if (status & 0x04) { + statflag = TTY_PARITY; + (info->icount.parity)++; + } + tty_insert_flip_char(tty, dma_buffer[num_bytes - 1], statflag); } - - tty->flip.flag_buf_ptr++; - - schedule_delayed_work(&tty->flip.work, 1); + schedule_delayed_work(&tty->buf.work, 1); } if (dma_bytes != num_bytes) { @@ -693,8 +681,7 @@ static irqreturn_t rs_interrupt_single(int irq, void *dev_id, num_bytes = serial_in(info, UART_ESI_STAT1) << 8; num_bytes |= serial_in(info, UART_ESI_STAT2); - if (num_bytes > (TTY_FLIPBUF_SIZE - info->tty->flip.count)) - num_bytes = TTY_FLIPBUF_SIZE - info->tty->flip.count; + num_bytes = tty_buffer_request_room(info->tty, num_bytes); if (num_bytes) { if (dma_bytes || diff --git a/drivers/char/hvc_console.c b/drivers/char/hvc_console.c index f92177634677..1994a92d4733 100644 --- a/drivers/char/hvc_console.c +++ b/drivers/char/hvc_console.c @@ -597,9 +597,7 @@ static int hvc_poll(struct hvc_struct *hp) /* Read data if any */ for (;;) { - int count = N_INBUF; - if (count > (TTY_FLIPBUF_SIZE - tty->flip.count)) - count = TTY_FLIPBUF_SIZE - tty->flip.count; + int count = tty_buffer_request_room(tty, N_INBUF); /* If flip is full, just reschedule a later read */ if (count == 0) { @@ -635,7 +633,7 @@ static int hvc_poll(struct hvc_struct *hp) tty_insert_flip_char(tty, buf[i], 0); } - if (tty->flip.count) + if (count) tty_schedule_flip(tty); /* diff --git a/drivers/char/hvcs.c b/drivers/char/hvcs.c index 53dc77c760fc..831eb4e8d9d3 100644 --- a/drivers/char/hvcs.c +++ b/drivers/char/hvcs.c @@ -456,12 +456,11 @@ static int hvcs_io(struct hvcs_struct *hvcsd) /* remove the read masks */ hvcsd->todo_mask &= ~(HVCS_READ_MASK); - if ((tty->flip.count + HVCS_BUFF_LEN) < TTY_FLIPBUF_SIZE) { + if (tty_buffer_request_room(tty, HVCS_BUFF_LEN) >= HVCS_BUFF_LEN) { got = hvc_get_chars(unit_address, &buf[0], HVCS_BUFF_LEN); - for (i=0;got && itodo_mask |= HVCS_QUICK_READ; spin_unlock_irqrestore(&hvcsd->lock, flags); - if (tty->flip.count) { - /* This is synch because tty->low_latency == 1 */ + /* This is synch because tty->low_latency == 1 */ + if(got) tty_flip_buffer_push(tty); - } if (!got) { /* Do this _after_ the flip_buffer_push */ diff --git a/drivers/char/isicom.c b/drivers/char/isicom.c index 1bbf507adda5..86033bed5d6c 100644 --- a/drivers/char/isicom.c +++ b/drivers/char/isicom.c @@ -115,6 +115,7 @@ #include #include #include +#include #include #include #include @@ -773,6 +774,7 @@ static irqreturn_t isicom_interrupt(int irq, void *dev_id, unsigned short base, header, word_count, count; unsigned char channel; short byte_count; + unsigned char *rp; card = (struct isi_board *) dev_id; @@ -903,14 +905,10 @@ static irqreturn_t isicom_interrupt(int irq, void *dev_id, break; case 1: /* Received Break !!! */ - if (tty->flip.count >= TTY_FLIPBUF_SIZE) - break; - *tty->flip.flag_buf_ptr++ = TTY_BREAK; - *tty->flip.char_buf_ptr++ = 0; - tty->flip.count++; + tty_insert_flip_char(tty, 0, TTY_BREAK); if (port->flags & ASYNC_SAK) do_SAK(tty); - schedule_delayed_work(&tty->flip.work, 1); + tty_flip_buffer_push(tty); break; case 2: /* Statistics */ @@ -923,23 +921,19 @@ static irqreturn_t isicom_interrupt(int irq, void *dev_id, } } else { /* Data Packet */ - count = min_t(unsigned short, byte_count, (TTY_FLIPBUF_SIZE - tty->flip.count)); + + count = tty_prepare_flip_string(tty, &rp, byte_count & ~1); #ifdef ISICOM_DEBUG printk(KERN_DEBUG "ISICOM: Intr: Can rx %d of %d bytes.\n", count, byte_count); #endif word_count = count >> 1; - insw(base, tty->flip.char_buf_ptr, word_count); - tty->flip.char_buf_ptr += (word_count << 1); + insw(base, rp, word_count); byte_count -= (word_count << 1); if (count & 0x0001) { - *tty->flip.char_buf_ptr++ = (char)(inw(base) & 0xff); + tty_insert_flip_char(tty, inw(base) & 0xff, TTY_NORMAL); byte_count -= 2; } - memset(tty->flip.flag_buf_ptr, 0, count); - tty->flip.flag_buf_ptr += count; - tty->flip.count += count; - if (byte_count > 0) { printk(KERN_DEBUG "ISICOM: Intr(0x%x:%d): Flip buffer overflow! dropping bytes...\n", base, channel+1); @@ -948,7 +942,7 @@ static irqreturn_t isicom_interrupt(int irq, void *dev_id, byte_count -= 2; } } - schedule_delayed_work(&tty->flip.work, 1); + tty_flip_buffer_push(tty); } if (card->isa == YES) ClearInterrupt(base); diff --git a/drivers/char/istallion.c b/drivers/char/istallion.c index 24435f8daa68..28c5a3193b81 100644 --- a/drivers/char/istallion.c +++ b/drivers/char/istallion.c @@ -2711,17 +2711,13 @@ static void stli_read(stlibrd_t *brdp, stliport_t *portp) stlen = size - tail; } - len = MIN(len, (TTY_FLIPBUF_SIZE - tty->flip.count)); + len = tty_buffer_request_room(tty, len); + /* FIXME : iomap ? */ shbuf = (volatile char *) EBRDGETMEMPTR(brdp, portp->rxoffset); while (len > 0) { stlen = MIN(len, stlen); - memcpy(tty->flip.char_buf_ptr, (char *) (shbuf + tail), stlen); - memset(tty->flip.flag_buf_ptr, 0, stlen); - tty->flip.char_buf_ptr += stlen; - tty->flip.flag_buf_ptr += stlen; - tty->flip.count += stlen; - + tty_insert_flip_string(tty, (char *)(shbuf + tail), stlen); len -= stlen; tail += stlen; if (tail >= size) { @@ -2906,16 +2902,12 @@ static int stli_hostcmd(stlibrd_t *brdp, stliport_t *portp) if ((nt.data & DT_RXBREAK) && (portp->rxmarkmsk & BRKINT)) { if (tty != (struct tty_struct *) NULL) { - if (tty->flip.count < TTY_FLIPBUF_SIZE) { - tty->flip.count++; - *tty->flip.flag_buf_ptr++ = TTY_BREAK; - *tty->flip.char_buf_ptr++ = 0; - if (portp->flags & ASYNC_SAK) { - do_SAK(tty); - EBRDENABLE(brdp); - } - tty_schedule_flip(tty); + tty_insert_flip_char(tty, 0, TTY_BREAK); + if (portp->flags & ASYNC_SAK) { + do_SAK(tty); + EBRDENABLE(brdp); } + tty_schedule_flip(tty); } } @@ -4940,7 +4932,7 @@ static int stli_portcmdstats(stliport_t *portp) if (portp->tty != (struct tty_struct *) NULL) { if (portp->tty->driver_data == portp) { stli_comstats.ttystate = portp->tty->flags; - stli_comstats.rxbuffered = portp->tty->flip.count; + stli_comstats.rxbuffered = -1 /*portp->tty->flip.count*/; if (portp->tty->termios != (struct termios *) NULL) { stli_comstats.cflags = portp->tty->termios->c_cflag; stli_comstats.iflags = portp->tty->termios->c_iflag; diff --git a/drivers/char/moxa.c b/drivers/char/moxa.c index 46a3a8ccd65f..5e3ef5522194 100644 --- a/drivers/char/moxa.c +++ b/drivers/char/moxa.c @@ -269,7 +269,7 @@ static int MoxaPortDCDChange(int); static int MoxaPortDCDON(int); static void MoxaPortFlushData(int, int); static int MoxaPortWriteData(int, unsigned char *, int); -static int MoxaPortReadData(int, unsigned char *, int); +static int MoxaPortReadData(int, struct tty_struct *tty); static int MoxaPortTxQueue(int); static int MoxaPortRxQueue(int); static int MoxaPortTxFree(int); @@ -301,6 +301,8 @@ static struct tty_operations moxa_ops = { .tiocmset = moxa_tiocmset, }; +static spinlock_t moxa_lock = SPIN_LOCK_UNLOCKED; + #ifdef CONFIG_PCI static int moxa_get_PCI_conf(struct pci_dev *p, int board_type, moxa_board_conf * board) { @@ -645,10 +647,10 @@ static int moxa_write(struct tty_struct *tty, if (ch == NULL) return (0); port = ch->port; - save_flags(flags); - cli(); + + spin_lock_irqsave(&moxa_lock, flags); len = MoxaPortWriteData(port, (unsigned char *) buf, count); - restore_flags(flags); + spin_unlock_irqrestore(&moxa_lock, flags); /********************************************* if ( !(ch->statusflags & LOWWAIT) && @@ -723,11 +725,10 @@ static void moxa_put_char(struct tty_struct *tty, unsigned char c) if (ch == NULL) return; port = ch->port; - save_flags(flags); - cli(); + spin_lock_irqsave(&moxa_lock, flags); moxaXmitBuff[0] = c; MoxaPortWriteData(port, moxaXmitBuff, 1); - restore_flags(flags); + spin_unlock_irqrestore(&moxa_lock, flags); /************************************************ if ( !(ch->statusflags & LOWWAIT) && (MoxaPortTxFree(port) <= 100) ) *************************************************/ @@ -1030,12 +1031,12 @@ static int block_till_ready(struct tty_struct *tty, struct file *filp, printk("block_til_ready before block: ttys%d, count = %d\n", ch->line, ch->count); #endif - save_flags(flags); - cli(); + spin_lock_irqsave(&moxa_lock, flags); if (!tty_hung_up_p(filp)) ch->count--; - restore_flags(flags); ch->blocked_open++; + spin_unlock_irqrestore(&moxa_lock, flags); + while (1) { set_current_state(TASK_INTERRUPTIBLE); if (tty_hung_up_p(filp) || @@ -1062,17 +1063,21 @@ static int block_till_ready(struct tty_struct *tty, struct file *filp, } set_current_state(TASK_RUNNING); remove_wait_queue(&ch->open_wait, &wait); + + spin_lock_irqsave(&moxa_lock, flags); if (!tty_hung_up_p(filp)) ch->count++; ch->blocked_open--; + spin_unlock_irqrestore(&moxa_lock, flags); #ifdef SERIAL_DEBUG_OPEN printk("block_til_ready after blocking: ttys%d, count = %d\n", ch->line, ch->count); #endif if (retval) return (retval); + /* FIXME: review to see if we need to use set_bit on these */ ch->asyncflags |= ASYNC_NORMAL_ACTIVE; - return (0); + return 0; } static void setup_empty_event(struct tty_struct *tty) @@ -1080,15 +1085,14 @@ static void setup_empty_event(struct tty_struct *tty) struct moxa_str *ch = tty->driver_data; unsigned long flags; - save_flags(flags); - cli(); + spin_lock_irqsave(&moxa_lock, flags); ch->statusflags |= EMPTYWAIT; moxaEmptyTimer_on[ch->port] = 0; del_timer(&moxaEmptyTimer[ch->port]); moxaEmptyTimer[ch->port].expires = jiffies + HZ; moxaEmptyTimer_on[ch->port] = 1; add_timer(&moxaEmptyTimer[ch->port]); - restore_flags(flags); + spin_unlock_irqrestore(&moxa_lock, flags); } static void check_xmit_empty(unsigned long data) @@ -1135,8 +1139,6 @@ static void receive_data(struct moxa_str *ch) { struct tty_struct *tp; struct termios *ts; - int i, count, rc, space; - unsigned char *charptr, *flagptr; unsigned long flags; ts = NULL; @@ -1150,24 +1152,10 @@ static void receive_data(struct moxa_str *ch) MoxaPortFlushData(ch->port, 0); return; } - space = TTY_FLIPBUF_SIZE - tp->flip.count; - if (space <= 0) - return; - charptr = tp->flip.char_buf_ptr; - flagptr = tp->flip.flag_buf_ptr; - rc = tp->flip.count; - save_flags(flags); - cli(); - count = MoxaPortReadData(ch->port, charptr, space); - restore_flags(flags); - for (i = 0; i < count; i++) - *flagptr++ = 0; - charptr += count; - rc += count; - tp->flip.count = rc; - tp->flip.char_buf_ptr = charptr; - tp->flip.flag_buf_ptr = flagptr; - tty_schedule_flip(ch->tty); + spin_lock_irqsave(&moxa_lock, flags); + MoxaPortReadData(ch->port, tp); + spin_unlock_irqrestore(&moxa_lock, flags); + tty_schedule_flip(tp); } #define Magic_code 0x404 @@ -1774,7 +1762,7 @@ int MoxaPortsOfCard(int cardno) * 14. MoxaPortDCDON(int port); * * 15. MoxaPortFlushData(int port, int mode); * * 16. MoxaPortWriteData(int port, unsigned char * buffer, int length); * - * 17. MoxaPortReadData(int port, unsigned char * buffer, int length); * + * 17. MoxaPortReadData(int port, struct tty_struct *tty); * * 18. MoxaPortTxBufSize(int port); * * 19. MoxaPortRxBufSize(int port); * * 20. MoxaPortTxQueue(int port); * @@ -2003,10 +1991,9 @@ int MoxaPortsOfCard(int cardno) * * Function 21: Read data. * Syntax: - * int MoxaPortReadData(int port, unsigned char * buffer, int length); + * int MoxaPortReadData(int port, struct tty_struct *tty); * int port : port number (0 - 127) - * unsigned char * buffer : pointer to read data buffer. - * int length : read data buffer length + * struct tty_struct *tty : tty for data * * return: 0 - length : real read data length * @@ -2504,7 +2491,7 @@ int MoxaPortWriteData(int port, unsigned char * buffer, int len) return (total); } -int MoxaPortReadData(int port, unsigned char * buffer, int space) +int MoxaPortReadData(int port, struct tty_struct *tty) { register ushort head, pageofs; int i, count, cnt, len, total, remain; @@ -2522,9 +2509,9 @@ int MoxaPortReadData(int port, unsigned char * buffer, int space) count = (tail >= head) ? (tail - head) : (tail - head + rx_mask + 1); if (count == 0) - return (0); + return 0; - total = (space > count) ? count : space; + total = count; remain = count - total; moxaLog.rxcnt[port] += total; count = total; @@ -2539,7 +2526,7 @@ int MoxaPortReadData(int port, unsigned char * buffer, int space) len = (count > len) ? len : count; ofs = baseAddr + DynPage_addr + bufhead + head; for (i = 0; i < len; i++) - *buffer++ = readb(ofs + i); + tty_insert_flip_char(tty, readb(ofs + i), TTY_NORMAL); head = (head + len) & rx_mask; count -= len; } @@ -2556,7 +2543,7 @@ int MoxaPortReadData(int port, unsigned char * buffer, int space) writew(pageno, baseAddr + Control_reg); ofs = baseAddr + DynPage_addr + pageofs; for (i = 0; i < cnt; i++) - *buffer++ = readb(ofs + i); + tty_insert_flip_char(tty, readb(ofs + i), TTY_NORMAL); if (count == 0) { writew((head + len) & rx_mask, ofsAddr + RXrptr); break; diff --git a/drivers/char/mxser.c b/drivers/char/mxser.c index 51bb2a3cf8b3..ea725a9964e2 100644 --- a/drivers/char/mxser.c +++ b/drivers/char/mxser.c @@ -1982,7 +1982,7 @@ static void mxser_receive_chars(struct mxser_struct *info, int *status) spin_lock_irqsave(&info->slock, flags); - recv_room = tty->ldisc.receive_room(tty); + recv_room = tty->receive_room; if ((recv_room == 0) && (!info->ldisc_stop_rx)) { //mxser_throttle(tty); mxser_stoprx(tty); diff --git a/drivers/char/n_hdlc.c b/drivers/char/n_hdlc.c index a133a62f3d55..70f487dd7b8d 100644 --- a/drivers/char/n_hdlc.c +++ b/drivers/char/n_hdlc.c @@ -212,7 +212,6 @@ static struct tty_ldisc n_hdlc_ldisc = { .ioctl = n_hdlc_tty_ioctl, .poll = n_hdlc_tty_poll, .receive_buf = n_hdlc_tty_receive, - .receive_room = n_hdlc_tty_room, .write_wakeup = n_hdlc_tty_wakeup, }; @@ -337,6 +336,7 @@ static int n_hdlc_tty_open (struct tty_struct *tty) tty->disc_data = n_hdlc; n_hdlc->tty = tty; + tty->receive_room = 65536; #if defined(TTY_NO_WRITE_SPLIT) /* change tty_io write() to not split large writes into 8K chunks */ @@ -477,22 +477,6 @@ static void n_hdlc_tty_wakeup(struct tty_struct *tty) } /* end of n_hdlc_tty_wakeup() */ -/** - * n_hdlc_tty_room - Return the amount of space left in the receiver's buffer - * @tty - pointer to associated tty instance data - * - * Callback function from tty driver. Return the amount of space left in the - * receiver's buffer to decide if remote transmitter is to be throttled. - */ -static int n_hdlc_tty_room(struct tty_struct *tty) -{ - if (debuglevel >= DEBUG_LEVEL_INFO) - printk("%s(%d)n_hdlc_tty_room() called\n",__FILE__,__LINE__); - /* always return a larger number to prevent */ - /* throttling of remote transmitter. */ - return 65536; -} /* end of n_hdlc_tty_root() */ - /** * n_hdlc_tty_receive - Called by tty driver when receive data is available * @tty - pointer to tty instance data diff --git a/drivers/char/n_r3964.c b/drivers/char/n_r3964.c index 853c98cee64f..c48de09d68f0 100644 --- a/drivers/char/n_r3964.c +++ b/drivers/char/n_r3964.c @@ -147,7 +147,6 @@ static unsigned int r3964_poll(struct tty_struct * tty, struct file * file, struct poll_table_struct *wait); static void r3964_receive_buf(struct tty_struct *tty, const unsigned char *cp, char *fp, int count); -static int r3964_receive_room(struct tty_struct *tty); static struct tty_ldisc tty_ldisc_N_R3964 = { .owner = THIS_MODULE, @@ -161,7 +160,6 @@ static struct tty_ldisc tty_ldisc_N_R3964 = { .set_termios = r3964_set_termios, .poll = r3964_poll, .receive_buf = r3964_receive_buf, - .receive_room = r3964_receive_room, }; @@ -1119,6 +1117,7 @@ static int r3964_open(struct tty_struct *tty) pInfo->nRetry = 0; tty->disc_data = pInfo; + tty->receive_room = 65536; init_timer(&pInfo->tmr); pInfo->tmr.data = (unsigned long)pInfo; @@ -1405,12 +1404,5 @@ static void r3964_receive_buf(struct tty_struct *tty, const unsigned char *cp, } } -static int r3964_receive_room(struct tty_struct *tty) -{ - TRACE_L("receive_room"); - return -1; -} - - MODULE_LICENSE("GPL"); MODULE_ALIAS_LDISC(N_R3964); diff --git a/drivers/char/n_tty.c b/drivers/char/n_tty.c index c556f4d3ccd7..ccad7ae94541 100644 --- a/drivers/char/n_tty.c +++ b/drivers/char/n_tty.c @@ -78,7 +78,32 @@ static inline void free_buf(unsigned char *buf) free_page((unsigned long) buf); } -static inline void put_tty_queue_nolock(unsigned char c, struct tty_struct *tty) +/** + * n_tty_set__room - receive space + * @tty: terminal + * + * Called by the driver to find out how much data it is + * permitted to feed to the line discipline without any being lost + * and thus to manage flow control. Not serialized. Answers for the + * "instant". + */ + +static void n_tty_set_room(struct tty_struct *tty) +{ + int left = N_TTY_BUF_SIZE - tty->read_cnt - 1; + + /* + * If we are doing input canonicalization, and there are no + * pending newlines, let characters through without limit, so + * that erase characters will be handled. Other excess + * characters will be beeped. + */ + if (left <= 0) + left = tty->icanon && !tty->canon_data; + tty->receive_room = left; +} + +static void put_tty_queue_nolock(unsigned char c, struct tty_struct *tty) { if (tty->read_cnt < N_TTY_BUF_SIZE) { tty->read_buf[tty->read_head] = c; @@ -87,7 +112,7 @@ static inline void put_tty_queue_nolock(unsigned char c, struct tty_struct *tty) } } -static inline void put_tty_queue(unsigned char c, struct tty_struct *tty) +static void put_tty_queue(unsigned char c, struct tty_struct *tty) { unsigned long flags; /* @@ -136,6 +161,7 @@ static void reset_buffer_flags(struct tty_struct *tty) spin_unlock_irqrestore(&tty->read_lock, flags); tty->canon_head = tty->canon_data = tty->erasing = 0; memset(&tty->read_flags, 0, sizeof tty->read_flags); + n_tty_set_room(tty); check_unthrottle(tty); } @@ -838,30 +864,6 @@ send_signal: put_tty_queue(c, tty); } -/** - * n_tty_receive_room - receive space - * @tty: terminal - * - * Called by the driver to find out how much data it is - * permitted to feed to the line discipline without any being lost - * and thus to manage flow control. Not serialized. Answers for the - * "instant". - */ - -static int n_tty_receive_room(struct tty_struct *tty) -{ - int left = N_TTY_BUF_SIZE - tty->read_cnt - 1; - - /* - * If we are doing input canonicalization, and there are no - * pending newlines, let characters through without limit, so - * that erase characters will be handled. Other excess - * characters will be beeped. - */ - if (left <= 0) - left = tty->icanon && !tty->canon_data; - return left; -} /** * n_tty_write_wakeup - asynchronous I/O notifier @@ -953,6 +955,8 @@ static void n_tty_receive_buf(struct tty_struct *tty, const unsigned char *cp, tty->driver->flush_chars(tty); } + n_tty_set_room(tty); + if (!tty->icanon && (tty->read_cnt >= tty->minimum_to_wake)) { kill_fasync(&tty->fasync, SIGIO, POLL_IN); if (waitqueue_active(&tty->read_wait)) @@ -964,7 +968,7 @@ static void n_tty_receive_buf(struct tty_struct *tty, const unsigned char *cp, * mode. We don't want to throttle the driver if we're in * canonical mode and don't have a newline yet! */ - if (n_tty_receive_room(tty) < TTY_THRESHOLD_THROTTLE) { + if (tty->receive_room < TTY_THRESHOLD_THROTTLE) { /* check TTY_THROTTLED first so it indicates our state */ if (!test_and_set_bit(TTY_THROTTLED, &tty->flags) && tty->driver->throttle) @@ -999,6 +1003,7 @@ static void n_tty_set_termios(struct tty_struct *tty, struct termios * old) if (test_bit(TTY_HW_COOK_IN, &tty->flags)) { tty->raw = 1; tty->real_raw = 1; + n_tty_set_room(tty); return; } if (I_ISTRIP(tty) || I_IUCLC(tty) || I_IGNCR(tty) || @@ -1051,6 +1056,7 @@ static void n_tty_set_termios(struct tty_struct *tty, struct termios * old) else tty->real_raw = 0; } + n_tty_set_room(tty); } /** @@ -1130,7 +1136,7 @@ static inline int input_available_p(struct tty_struct *tty, int amt) * */ -static inline int copy_from_read_buf(struct tty_struct *tty, +static int copy_from_read_buf(struct tty_struct *tty, unsigned char __user **b, size_t *nr) @@ -1308,6 +1314,7 @@ do_it_again: retval = -ERESTARTSYS; break; } + n_tty_set_room(tty); clear_bit(TTY_DONT_FLIP, &tty->flags); timeout = schedule_timeout(timeout); set_bit(TTY_DONT_FLIP, &tty->flags); @@ -1401,6 +1408,8 @@ do_it_again: } else if (test_and_clear_bit(TTY_PUSH, &tty->flags)) goto do_it_again; + n_tty_set_room(tty); + return retval; } @@ -1553,7 +1562,6 @@ struct tty_ldisc tty_ldisc_N_TTY = { normal_poll, /* poll */ NULL, /* hangup */ n_tty_receive_buf, /* receive_buf */ - n_tty_receive_room, /* receive_room */ n_tty_write_wakeup /* write_wakeup */ }; diff --git a/drivers/char/pcmcia/synclink_cs.c b/drivers/char/pcmcia/synclink_cs.c index 9fb10c9fec88..8a8ca32822ba 100644 --- a/drivers/char/pcmcia/synclink_cs.c +++ b/drivers/char/pcmcia/synclink_cs.c @@ -1007,8 +1007,9 @@ static void rx_ready_hdlc(MGSLPC_INFO *info, int eom) static void rx_ready_async(MGSLPC_INFO *info, int tcd) { - unsigned char data, status; + unsigned char data, status, flag; int fifo_count; + int work = 0; struct tty_struct *tty = info->tty; struct mgsl_icount *icount = &info->icount; @@ -1023,20 +1024,16 @@ static void rx_ready_async(MGSLPC_INFO *info, int tcd) fifo_count = 32; } else fifo_count = 32; - + + tty_buffer_request_room(tty, fifo_count); /* Flush received async data to receive data buffer. */ while (fifo_count) { data = read_reg(info, CHA + RXFIFO); status = read_reg(info, CHA + RXFIFO); fifo_count -= 2; - if (tty->flip.count >= TTY_FLIPBUF_SIZE) - break; - - *tty->flip.char_buf_ptr = data; icount->rx++; - - *tty->flip.flag_buf_ptr = 0; + flag = TTY_NORMAL; // if no frameing/crc error then save data // BIT7:parity error @@ -1055,26 +1052,23 @@ static void rx_ready_async(MGSLPC_INFO *info, int tcd) status &= info->read_status_mask; if (status & BIT7) - *tty->flip.flag_buf_ptr = TTY_PARITY; + flag = TTY_PARITY; else if (status & BIT6) - *tty->flip.flag_buf_ptr = TTY_FRAME; + flag = TTY_FRAME; } - - tty->flip.flag_buf_ptr++; - tty->flip.char_buf_ptr++; - tty->flip.count++; + work += tty_insert_flip_char(tty, data, flag); } issue_command(info, CHA, CMD_RXFIFO); if (debug_level >= DEBUG_LEVEL_ISR) { - printk("%s(%d):rx_ready_async count=%d\n", - __FILE__,__LINE__,tty->flip.count); + printk("%s(%d):rx_ready_async", + __FILE__,__LINE__); printk("%s(%d):rx=%d brk=%d parity=%d frame=%d overrun=%d\n", __FILE__,__LINE__,icount->rx,icount->brk, icount->parity,icount->frame,icount->overrun); } - if (tty->flip.count) + if (work) tty_flip_buffer_push(tty); } diff --git a/drivers/char/pty.c b/drivers/char/pty.c index 49f3997fd251..9b5a2c0e7008 100644 --- a/drivers/char/pty.c +++ b/drivers/char/pty.c @@ -111,7 +111,7 @@ static int pty_write(struct tty_struct * tty, const unsigned char *buf, int coun if (!to || tty->stopped) return 0; - c = to->ldisc.receive_room(to); + c = to->receive_room; if (c > count) c = count; to->ldisc.receive_buf(to, buf, NULL, c); @@ -126,7 +126,7 @@ static int pty_write_room(struct tty_struct *tty) if (!to || tty->stopped) return 0; - return to->ldisc.receive_room(to); + return to->receive_room; } /* diff --git a/drivers/char/rio/riointr.c b/drivers/char/rio/riointr.c index e42e7b50bf6b..ddda9c14e059 100644 --- a/drivers/char/rio/riointr.c +++ b/drivers/char/rio/riointr.c @@ -38,6 +38,7 @@ static char *_riointr_c_sccs_ = "@(#)riointr.c 1.2"; #include #include #include +#include #include #include #include @@ -560,6 +561,7 @@ struct Port * PortP; struct PKT *PacketP; register uint DataCnt; uchar * ptr; + unsigned char *buf; int copied =0; static int intCount, RxIntCnt; @@ -657,8 +659,7 @@ struct Port * PortP; ** and available space. */ - transCount = min_t(unsigned int, PacketP->len & PKT_LEN_MASK, - TTY_FLIPBUF_SIZE - TtyP->flip.count); + transCount = tty_buffer_request_room(TtyP, PacketP->len & PKT_LEN_MASK); rio_dprintk (RIO_DEBUG_REC, "port %d: Copy %d bytes\n", PortP->PortNum, transCount); /* @@ -678,9 +679,8 @@ struct Port * PortP; #endif ptr = (uchar *) PacketP->data + PortP->RxDataStart; - rio_memcpy_fromio (TtyP->flip.char_buf_ptr, ptr, transCount); - memset(TtyP->flip.flag_buf_ptr, TTY_NORMAL, transCount); - + tty_prepare_flip_string(TtyP, &buf, transCount); + rio_memcpy_fromio (buf, ptr, transCount); #ifdef STATS /* ** keep a count for statistical purposes @@ -690,9 +690,6 @@ struct Port * PortP; PortP->RxDataStart += transCount; PacketP->len -= transCount; copied += transCount; - TtyP->flip.count += transCount; - TtyP->flip.char_buf_ptr += transCount; - TtyP->flip.flag_buf_ptr += transCount; #ifdef ___DEBUG_IT___ diff --git a/drivers/char/riscom8.c b/drivers/char/riscom8.c index 5dae32521620..050e70ee5920 100644 --- a/drivers/char/riscom8.c +++ b/drivers/char/riscom8.c @@ -46,6 +46,7 @@ #include #include #include +#include #include @@ -354,28 +355,17 @@ static inline void rc_receive_exc(struct riscom_board const * bp) struct riscom_port *port; struct tty_struct *tty; unsigned char status; - unsigned char ch; + unsigned char ch, flag; if (!(port = rc_get_port(bp, "Receive"))) return; tty = port->tty; - if (tty->flip.count >= TTY_FLIPBUF_SIZE) { - printk(KERN_WARNING "rc%d: port %d: Working around flip " - "buffer overflow.\n", - board_No(bp), port_No(port)); - return; - } #ifdef RC_REPORT_OVERRUN status = rc_in(bp, CD180_RCSR); - if (status & RCSR_OE) { + if (status & RCSR_OE) port->overrun++; -#if 0 - printk(KERN_ERR "rc%d: port %d: Overrun. Total %ld overruns\n", - board_No(bp), port_No(port), port->overrun); -#endif - } status &= port->mark_mask; #else status = rc_in(bp, CD180_RCSR) & port->mark_mask; @@ -393,25 +383,24 @@ static inline void rc_receive_exc(struct riscom_board const * bp) } else if (status & RCSR_BREAK) { printk(KERN_INFO "rc%d: port %d: Handling break...\n", board_No(bp), port_No(port)); - *tty->flip.flag_buf_ptr++ = TTY_BREAK; + flag = TTY_BREAK; if (port->flags & ASYNC_SAK) do_SAK(tty); } else if (status & RCSR_PE) - *tty->flip.flag_buf_ptr++ = TTY_PARITY; + flag = TTY_PARITY; else if (status & RCSR_FE) - *tty->flip.flag_buf_ptr++ = TTY_FRAME; + flag = TTY_FRAME; else if (status & RCSR_OE) - *tty->flip.flag_buf_ptr++ = TTY_OVERRUN; + flag = TTY_OVERRUN; else - *tty->flip.flag_buf_ptr++ = 0; + flag = TTY_NORMAL; - *tty->flip.char_buf_ptr++ = ch; - tty->flip.count++; - schedule_delayed_work(&tty->flip.work, 1); + tty_insert_flip_char(tty, ch, flag); + tty_flip_buffer_push(tty); } static inline void rc_receive(struct riscom_board const * bp) @@ -432,17 +421,15 @@ static inline void rc_receive(struct riscom_board const * bp) #endif while (count--) { - if (tty->flip.count >= TTY_FLIPBUF_SIZE) { + if (tty_buffer_request_room(tty, 1) == 0) { printk(KERN_WARNING "rc%d: port %d: Working around " "flip buffer overflow.\n", board_No(bp), port_No(port)); break; } - *tty->flip.char_buf_ptr++ = rc_in(bp, CD180_RDR); - *tty->flip.flag_buf_ptr++ = 0; - tty->flip.count++; + tty_insert_flip_char(tty, rc_in(bp, CD180_RDR), TTY_NORMAL); } - schedule_delayed_work(&tty->flip.work, 1); + tty_flip_buffer_push(tty); } static inline void rc_transmit(struct riscom_board const * bp) diff --git a/drivers/char/rocket.c b/drivers/char/rocket.c index d3bc731fbb27..0949dcef0697 100644 --- a/drivers/char/rocket.c +++ b/drivers/char/rocket.c @@ -325,19 +325,16 @@ static void rp_do_receive(struct r_port *info, { unsigned int CharNStat; int ToRecv, wRecv, space = 0, count; - unsigned char *cbuf; - char *fbuf; + unsigned char *cbuf, *chead; + char *fbuf, *fhead; struct tty_ldisc *ld; ld = tty_ldisc_ref(tty); ToRecv = sGetRxCnt(cp); - if (ld) - space = ld->receive_room(tty); + space = tty->receive_room; if (space > 2 * TTY_FLIPBUF_SIZE) space = 2 * TTY_FLIPBUF_SIZE; - cbuf = tty->flip.char_buf; - fbuf = tty->flip.flag_buf; count = 0; #ifdef ROCKET_DEBUG_INTR printk(KERN_INFO "rp_do_receive(%d, %d)...", ToRecv, space); @@ -350,9 +347,13 @@ static void rp_do_receive(struct r_port *info, if (ToRecv > space) ToRecv = space; + ToRecv = tty_prepare_flip_string_flags(tty, &chead, &fhead, ToRecv); if (ToRecv <= 0) goto done; + cbuf = chead; + fbuf = fhead; + /* * if status indicates there are errored characters in the * FIFO, then enter status mode (a word in FIFO holds @@ -399,7 +400,7 @@ static void rp_do_receive(struct r_port *info, else if (CharNStat & STMRCVROVRH) *fbuf++ = TTY_OVERRUN; else - *fbuf++ = 0; + *fbuf++ = TTY_NORMAL; *cbuf++ = CharNStat & 0xff; count++; ToRecv--; @@ -426,13 +427,13 @@ static void rp_do_receive(struct r_port *info, sInStrW(sGetTxRxDataIO(cp), (unsigned short *) cbuf, wRecv); if (ToRecv & 1) cbuf[ToRecv - 1] = sInB(sGetTxRxDataIO(cp)); - memset(fbuf, 0, ToRecv); + memset(fbuf, TTY_NORMAL, ToRecv); cbuf += ToRecv; fbuf += ToRecv; count += ToRecv; } /* Push the data up to the tty layer */ - ld->receive_buf(tty, tty->flip.char_buf, tty->flip.flag_buf, count); + ld->receive_buf(tty, cbuf, fbuf, count); done: tty_ldisc_deref(ld); } diff --git a/drivers/char/selection.c b/drivers/char/selection.c index 5b187c895c18..71093a9fc462 100644 --- a/drivers/char/selection.c +++ b/drivers/char/selection.c @@ -275,7 +275,8 @@ int set_selection(const struct tiocl_selection __user *sel, struct tty_struct *t int paste_selection(struct tty_struct *tty) { struct vc_data *vc = (struct vc_data *)tty->driver_data; - int pasted = 0, count; + int pasted = 0; + unsigned int count; struct tty_ldisc *ld; DECLARE_WAITQUEUE(wait, current); @@ -293,7 +294,7 @@ int paste_selection(struct tty_struct *tty) continue; } count = sel_buffer_lth - pasted; - count = min(count, tty->ldisc.receive_room(tty)); + count = min(count, tty->receive_room); tty->ldisc.receive_buf(tty, sel_buffer + pasted, NULL, count); pasted += count; } diff --git a/drivers/char/ser_a2232.c b/drivers/char/ser_a2232.c index dda30e42ec79..80a5b840e22f 100644 --- a/drivers/char/ser_a2232.c +++ b/drivers/char/ser_a2232.c @@ -194,11 +194,6 @@ static inline void a2232_receive_char(struct a2232_port *port, int ch, int err) */ struct tty_struct *tty = port->gs.tty; - if (tty->flip.count >= TTY_FLIPBUF_SIZE) - return; - - tty->flip.count++; - #if 0 switch(err) { case TTY_BREAK: @@ -212,8 +207,7 @@ static inline void a2232_receive_char(struct a2232_port *port, int ch, int err) } #endif - *tty->flip.flag_buf_ptr++ = err; - *tty->flip.char_buf_ptr++ = ch; + tty_insert_flip_char(tty, ch, err); tty_flip_buffer_push(tty); } diff --git a/drivers/char/serial167.c b/drivers/char/serial167.c index a580748b92a1..f36342ae8e7e 100644 --- a/drivers/char/serial167.c +++ b/drivers/char/serial167.c @@ -422,45 +422,35 @@ cd2401_rxerr_interrupt(int irq, void *dev_id, struct pt_regs *fp) base_addr[CyREOIR] = rfoc ? 0 : CyNOTRANS; return IRQ_HANDLED; } - if (tty->flip.count < TTY_FLIPBUF_SIZE){ - tty->flip.count++; + if (tty_buffer_request_room(tty, 1) != 0){ if (err & info->read_status_mask){ if(err & CyBREAK){ - *tty->flip.flag_buf_ptr++ = TTY_BREAK; - *tty->flip.char_buf_ptr++ = data; + tty_insert_flip_char(tty, data, TTY_BREAK); if (info->flags & ASYNC_SAK){ do_SAK(tty); } }else if(err & CyFRAME){ - *tty->flip.flag_buf_ptr++ = TTY_FRAME; - *tty->flip.char_buf_ptr++ = data; + tty_insert_flip_char(tty, data, TTY_FRAME); }else if(err & CyPARITY){ - *tty->flip.flag_buf_ptr++ = TTY_PARITY; - *tty->flip.char_buf_ptr++ = data; + tty_insert_flip_char(tty, data, TTY_PARITY); }else if(err & CyOVERRUN){ - *tty->flip.flag_buf_ptr++ = TTY_OVERRUN; - *tty->flip.char_buf_ptr++ = 0; + tty_insert_flip_char(tty, 0, TTY_OVERRUN); /* If the flip buffer itself is overflowing, we still loose the next incoming character. */ - if(tty->flip.count < TTY_FLIPBUF_SIZE){ - tty->flip.count++; - *tty->flip.flag_buf_ptr++ = TTY_NORMAL; - *tty->flip.char_buf_ptr++ = data; - } + tty_insert_flip_char(tty, data, TTY_NORMAL); + } /* These two conditions may imply */ /* a normal read should be done. */ /* else if(data & CyTIMEOUT) */ /* else if(data & CySPECHAR) */ }else{ - *tty->flip.flag_buf_ptr++ = 0; - *tty->flip.char_buf_ptr++ = 0; + tty_insert_flip_char(tty, 0, TTY_NORMAL); } }else{ - *tty->flip.flag_buf_ptr++ = 0; - *tty->flip.char_buf_ptr++ = 0; + tty_insert_flip_char(tty, data, TTY_NORMAL); } }else{ /* there was a software buffer overrun @@ -692,12 +682,7 @@ cd2401_rx_interrupt(int irq, void *dev_id, struct pt_regs *fp) #endif while(char_count--){ data = base_addr[CyRDR]; - if (tty->flip.count >= TTY_FLIPBUF_SIZE){ - continue; - } - tty->flip.count++; - *tty->flip.flag_buf_ptr++ = TTY_NORMAL; - *tty->flip.char_buf_ptr++ = data; + tty_insert_flip_char(tty, data, TTY_NORMAL); #ifdef CYCLOM_16Y_HACK udelay(10L); #endif diff --git a/drivers/char/specialix.c b/drivers/char/specialix.c index 0bbfce43031c..0a574bdbce36 100644 --- a/drivers/char/specialix.c +++ b/drivers/char/specialix.c @@ -85,6 +85,7 @@ #include #include #include +#include #include #include #include @@ -665,7 +666,7 @@ static inline void sx_receive_exc(struct specialix_board * bp) struct specialix_port *port; struct tty_struct *tty; unsigned char status; - unsigned char ch; + unsigned char ch, flag; func_enter(); @@ -676,8 +677,6 @@ static inline void sx_receive_exc(struct specialix_board * bp) return; } tty = port->tty; - dprintk (SX_DEBUG_RX, "port: %p count: %d BUFF_SIZE: %d\n", - port, tty->flip.count, TTY_FLIPBUF_SIZE); status = sx_in(bp, CD186x_RCSR); @@ -691,7 +690,7 @@ static inline void sx_receive_exc(struct specialix_board * bp) /* This flip buffer check needs to be below the reading of the status register to reset the chip's IRQ.... */ - if (tty->flip.count >= TTY_FLIPBUF_SIZE) { + if (tty_buffer_request_room(tty, 1) == 0) { dprintk(SX_DEBUG_FIFO, "sx%d: port %d: Working around flip buffer overflow.\n", board_No(bp), port_No(port)); func_exit(); @@ -712,26 +711,24 @@ static inline void sx_receive_exc(struct specialix_board * bp) } else if (status & RCSR_BREAK) { dprintk(SX_DEBUG_RX, "sx%d: port %d: Handling break...\n", board_No(bp), port_No(port)); - *tty->flip.flag_buf_ptr++ = TTY_BREAK; + flag = TTY_BREAK; if (port->flags & ASYNC_SAK) do_SAK(tty); } else if (status & RCSR_PE) - *tty->flip.flag_buf_ptr++ = TTY_PARITY; + flag = TTY_PARITY; else if (status & RCSR_FE) - *tty->flip.flag_buf_ptr++ = TTY_FRAME; + flag = TTY_FRAME; else if (status & RCSR_OE) - *tty->flip.flag_buf_ptr++ = TTY_OVERRUN; + flag = TTY_OVERRUN; else - *tty->flip.flag_buf_ptr++ = 0; - - *tty->flip.char_buf_ptr++ = ch; - tty->flip.count++; - schedule_delayed_work(&tty->flip.work, 1); + flag = TTY_NORMAL; + if(tty_insert_flip_char(tty, ch, flag)) + tty_flip_buffer_push(tty); func_exit(); } @@ -755,18 +752,11 @@ static inline void sx_receive(struct specialix_board * bp) dprintk (SX_DEBUG_RX, "port: %p: count: %d\n", port, count); port->hits[count > 8 ? 9 : count]++; - while (count--) { - if (tty->flip.count >= TTY_FLIPBUF_SIZE) { - printk(KERN_INFO "sx%d: port %d: Working around flip buffer overflow.\n", - board_No(bp), port_No(port)); - break; - } - *tty->flip.char_buf_ptr++ = sx_in(bp, CD186x_RDR); - *tty->flip.flag_buf_ptr++ = 0; - tty->flip.count++; - } - schedule_delayed_work(&tty->flip.work, 1); + tty_buffer_request_room(tty, count); + while (count--) + tty_insert_flip_char(tty, sx_in(bp, CD186x_RDR), TTY_NORMAL); + tty_flip_buffer_push(tty); func_exit(); } diff --git a/drivers/char/stallion.c b/drivers/char/stallion.c index acef2abf3f0d..0e20780d4a29 100644 --- a/drivers/char/stallion.c +++ b/drivers/char/stallion.c @@ -2901,7 +2901,8 @@ static int stl_getportstats(stlport_t *portp, comstats_t __user *cp) if (portp->tty != (struct tty_struct *) NULL) { if (portp->tty->driver_data == portp) { portp->stats.ttystate = portp->tty->flags; - portp->stats.rxbuffered = portp->tty->flip.count; + /* No longer available as a statistic */ + portp->stats.rxbuffered = 1; /*portp->tty->flip.count; */ if (portp->tty->termios != (struct termios *) NULL) { portp->stats.cflags = portp->tty->termios->c_cflag; portp->stats.iflags = portp->tty->termios->c_iflag; @@ -4045,9 +4046,7 @@ static void stl_cd1400rxisr(stlpanel_t *panelp, int ioaddr) if ((ioack & ACK_TYPMASK) == ACK_TYPRXGOOD) { outb((RDCR + portp->uartaddr), ioaddr); len = inb(ioaddr + EREG_DATA); - if ((tty == (struct tty_struct *) NULL) || - (tty->flip.char_buf_ptr == (char *) NULL) || - ((buflen = TTY_FLIPBUF_SIZE - tty->flip.count) == 0)) { + if (tty == NULL || (buflen = tty_buffer_request_room(tty, len)) == 0) { len = MIN(len, sizeof(stl_unwanted)); outb((RDSR + portp->uartaddr), ioaddr); insb((ioaddr + EREG_DATA), &stl_unwanted[0], len); @@ -4056,12 +4055,10 @@ static void stl_cd1400rxisr(stlpanel_t *panelp, int ioaddr) } else { len = MIN(len, buflen); if (len > 0) { + unsigned char *ptr; outb((RDSR + portp->uartaddr), ioaddr); - insb((ioaddr + EREG_DATA), tty->flip.char_buf_ptr, len); - memset(tty->flip.flag_buf_ptr, 0, len); - tty->flip.flag_buf_ptr += len; - tty->flip.char_buf_ptr += len; - tty->flip.count += len; + tty_prepare_flip_string(tty, &ptr, len); + insb((ioaddr + EREG_DATA), ptr, len); tty_schedule_flip(tty); portp->stats.rxtotal += len; } @@ -4085,8 +4082,7 @@ static void stl_cd1400rxisr(stlpanel_t *panelp, int ioaddr) portp->stats.txxoff++; goto stl_rxalldone; } - if ((tty != (struct tty_struct *) NULL) && - ((portp->rxignoremsk & status) == 0)) { + if (tty != NULL && (portp->rxignoremsk & status) == 0) { if (portp->rxmarkmsk & status) { if (status & ST_BREAK) { status = TTY_BREAK; @@ -4106,14 +4102,8 @@ static void stl_cd1400rxisr(stlpanel_t *panelp, int ioaddr) } else { status = 0; } - if (tty->flip.char_buf_ptr != (char *) NULL) { - if (tty->flip.count < TTY_FLIPBUF_SIZE) { - *tty->flip.flag_buf_ptr++ = status; - *tty->flip.char_buf_ptr++ = ch; - tty->flip.count++; - } - tty_schedule_flip(tty); - } + tty_insert_flip_char(tty, ch, status); + tty_schedule_flip(tty); } } else { printk("STALLION: bad RX interrupt ack value=%x\n", ioack); @@ -5012,9 +5002,7 @@ static void stl_sc26198rxisr(stlport_t *portp, unsigned int iack) len = inb(ioaddr + XP_DATA) + 1; if ((iack & IVR_TYPEMASK) == IVR_RXDATA) { - if ((tty == (struct tty_struct *) NULL) || - (tty->flip.char_buf_ptr == (char *) NULL) || - ((buflen = TTY_FLIPBUF_SIZE - tty->flip.count) == 0)) { + if (tty == NULL || (buflen = tty_buffer_request_room(tty, len)) == 0) { len = MIN(len, sizeof(stl_unwanted)); outb(GRXFIFO, (ioaddr + XP_ADDR)); insb((ioaddr + XP_DATA), &stl_unwanted[0], len); @@ -5023,12 +5011,10 @@ static void stl_sc26198rxisr(stlport_t *portp, unsigned int iack) } else { len = MIN(len, buflen); if (len > 0) { + unsigned char *ptr; outb(GRXFIFO, (ioaddr + XP_ADDR)); - insb((ioaddr + XP_DATA), tty->flip.char_buf_ptr, len); - memset(tty->flip.flag_buf_ptr, 0, len); - tty->flip.flag_buf_ptr += len; - tty->flip.char_buf_ptr += len; - tty->flip.count += len; + tty_prepare_flip_string(tty, &ptr, len); + insb((ioaddr + XP_DATA), ptr, len); tty_schedule_flip(tty); portp->stats.rxtotal += len; } @@ -5096,14 +5082,8 @@ static inline void stl_sc26198rxbadch(stlport_t *portp, unsigned char status, ch status = 0; } - if (tty->flip.char_buf_ptr != (char *) NULL) { - if (tty->flip.count < TTY_FLIPBUF_SIZE) { - *tty->flip.flag_buf_ptr++ = status; - *tty->flip.char_buf_ptr++ = ch; - tty->flip.count++; - } - tty_schedule_flip(tty); - } + tty_insert_flip_char(tty, ch, status); + tty_schedule_flip(tty); if (status == 0) portp->stats.rxtotal++; diff --git a/drivers/char/sx.c b/drivers/char/sx.c index 564f31778eb3..64bf89cb574f 100644 --- a/drivers/char/sx.c +++ b/drivers/char/sx.c @@ -1085,6 +1085,7 @@ static inline void sx_receive_chars (struct sx_port *port) int rx_op; struct tty_struct *tty; int copied=0; + unsigned char *rp; func_enter2 (); tty = port->gs.tty; @@ -1095,8 +1096,8 @@ static inline void sx_receive_chars (struct sx_port *port) sx_dprintk (SX_DEBUG_RECEIVE, "rxop=%d, c = %d.\n", rx_op, c); /* Don't copy more bytes than there is room for in the buffer */ - if (tty->flip.count + c > TTY_FLIPBUF_SIZE) - c = TTY_FLIPBUF_SIZE - tty->flip.count; + + c = tty_prepare_flip_string(tty, &rp, c); sx_dprintk (SX_DEBUG_RECEIVE, "c = %d.\n", c); @@ -1111,14 +1112,8 @@ static inline void sx_receive_chars (struct sx_port *port) sx_dprintk (SX_DEBUG_RECEIVE , "Copying over %d chars. First is %d at %lx\n", c, read_sx_byte (port->board, CHAN_OFFSET(port,hi_rxbuf) + rx_op), CHAN_OFFSET(port, hi_rxbuf)); - memcpy_fromio (tty->flip.char_buf_ptr, + memcpy_fromio (rp, port->board->base + CHAN_OFFSET(port,hi_rxbuf) + rx_op, c); - memset(tty->flip.flag_buf_ptr, TTY_NORMAL, c); - - /* Update the kernel buffer end */ - tty->flip.count += c; - tty->flip.char_buf_ptr += c; - tty->flip.flag_buf_ptr += c; /* This one last. ( Not essential.) It allows the card to start putting more data into the buffer! diff --git a/drivers/char/synclink.c b/drivers/char/synclink.c index 789572fc002b..9f1b466c4f84 100644 --- a/drivers/char/synclink.c +++ b/drivers/char/synclink.c @@ -1467,6 +1467,7 @@ static void mgsl_isr_receive_data( struct mgsl_struct *info ) { int Fifocount; u16 status; + int work = 0; unsigned char DataByte; struct tty_struct *tty = info->tty; struct mgsl_icount *icount = &info->icount; @@ -1487,6 +1488,8 @@ static void mgsl_isr_receive_data( struct mgsl_struct *info ) /* flush the receive FIFO */ while( (Fifocount = (usc_InReg(info,RICR) >> 8)) ) { + int flag; + /* read one byte from RxFIFO */ outw( (inw(info->io_base + CCAR) & 0x0780) | (RDR+LSBONLY), info->io_base + CCAR ); @@ -1498,13 +1501,9 @@ static void mgsl_isr_receive_data( struct mgsl_struct *info ) RXSTATUS_OVERRUN + RXSTATUS_BREAK_RECEIVED) ) usc_UnlatchRxstatusBits(info,RXSTATUS_ALL); - if (tty->flip.count >= TTY_FLIPBUF_SIZE) - continue; - - *tty->flip.char_buf_ptr = DataByte; icount->rx++; - *tty->flip.flag_buf_ptr = 0; + flag = 0; if ( status & (RXSTATUS_FRAMING_ERROR + RXSTATUS_PARITY_ERROR + RXSTATUS_OVERRUN + RXSTATUS_BREAK_RECEIVED) ) { printk("rxerr=%04X\n",status); @@ -1530,41 +1529,31 @@ static void mgsl_isr_receive_data( struct mgsl_struct *info ) status &= info->read_status_mask; if (status & RXSTATUS_BREAK_RECEIVED) { - *tty->flip.flag_buf_ptr = TTY_BREAK; + flag = TTY_BREAK; if (info->flags & ASYNC_SAK) do_SAK(tty); } else if (status & RXSTATUS_PARITY_ERROR) - *tty->flip.flag_buf_ptr = TTY_PARITY; + flag = TTY_PARITY; else if (status & RXSTATUS_FRAMING_ERROR) - *tty->flip.flag_buf_ptr = TTY_FRAME; - if (status & RXSTATUS_OVERRUN) { - /* Overrun is special, since it's - * reported immediately, and doesn't - * affect the current character - */ - if (tty->flip.count < TTY_FLIPBUF_SIZE) { - tty->flip.count++; - tty->flip.flag_buf_ptr++; - tty->flip.char_buf_ptr++; - *tty->flip.flag_buf_ptr = TTY_OVERRUN; - } - } + flag = TTY_FRAME; } /* end of if (error) */ - - tty->flip.flag_buf_ptr++; - tty->flip.char_buf_ptr++; - tty->flip.count++; + tty_insert_flip_char(tty, DataByte, flag); + if (status & RXSTATUS_OVERRUN) { + /* Overrun is special, since it's + * reported immediately, and doesn't + * affect the current character + */ + work += tty_insert_flip_char(tty, 0, TTY_OVERRUN); + } } if ( debug_level >= DEBUG_LEVEL_ISR ) { - printk("%s(%d):mgsl_isr_receive_data flip count=%d\n", - __FILE__,__LINE__,tty->flip.count); printk("%s(%d):rx=%d brk=%d parity=%d frame=%d overrun=%d\n", __FILE__,__LINE__,icount->rx,icount->brk, icount->parity,icount->frame,icount->overrun); } - if ( tty->flip.count ) + if(work) tty_flip_buffer_push(tty); } diff --git a/drivers/char/synclink_gt.c b/drivers/char/synclink_gt.c index 41759cd70a4f..79c81def4104 100644 --- a/drivers/char/synclink_gt.c +++ b/drivers/char/synclink_gt.c @@ -1749,6 +1749,9 @@ static void rx_async(struct slgt_info *info) unsigned char status; struct slgt_desc *bufs = info->rbufs; int i, count; + int chars = 0; + int stat; + unsigned char ch; start = end = info->rbuf_current; @@ -1760,16 +1763,15 @@ static void rx_async(struct slgt_info *info) DBGDATA(info, p, count, "rx"); for(i=0 ; i < count; i+=2, p+=2) { - if (tty) { - if (tty->flip.count >= TTY_FLIPBUF_SIZE) - tty_flip_buffer_push(tty); - if (tty->flip.count >= TTY_FLIPBUF_SIZE) - break; - *tty->flip.char_buf_ptr = *p; - *tty->flip.flag_buf_ptr = 0; + if (tty && chars) { + tty_flip_buffer_push(tty); + chars = 0; } + ch = *p; icount->rx++; + stat = 0; + if ((status = *(p+1) & (BIT9 + BIT8))) { if (status & BIT9) icount->parity++; @@ -1778,17 +1780,14 @@ static void rx_async(struct slgt_info *info) /* discard char if tty control flags say so */ if (status & info->ignore_status_mask) continue; - if (tty) { - if (status & BIT9) - *tty->flip.flag_buf_ptr = TTY_PARITY; - else if (status & BIT8) - *tty->flip.flag_buf_ptr = TTY_FRAME; - } + if (status & BIT9) + stat = TTY_PARITY; + else if (status & BIT8) + stat = TTY_FRAME; } if (tty) { - tty->flip.flag_buf_ptr++; - tty->flip.char_buf_ptr++; - tty->flip.count++; + tty_insert_flip_char(tty, ch, stat); + chars++; } } @@ -1811,7 +1810,7 @@ static void rx_async(struct slgt_info *info) break; } - if (tty && tty->flip.count) + if (tty && chars) tty_flip_buffer_push(tty); } @@ -2029,7 +2028,7 @@ static void isr_serial(struct slgt_info *info) if (info->tty) { if (!(status & info->ignore_status_mask)) { if (info->read_status_mask & MASK_BREAK) { - *info->tty->flip.flag_buf_ptr = TTY_BREAK; + tty_insert_flip_char(info->tty, 0, TTY_BREAK); if (info->flags & ASYNC_SAK) do_SAK(info->tty); } diff --git a/drivers/char/synclinkmp.c b/drivers/char/synclinkmp.c index a9467e7d3747..960adb256fbb 100644 --- a/drivers/char/synclinkmp.c +++ b/drivers/char/synclinkmp.c @@ -2196,7 +2196,7 @@ void isr_rxint(SLMP_INFO * info) if ( tty ) { if (!(status & info->ignore_status_mask1)) { if (info->read_status_mask1 & BRKD) { - *tty->flip.flag_buf_ptr = TTY_BREAK; + tty_insert_flip_char(tty, 0, TTY_BREAK); if (info->flags & ASYNC_SAK) do_SAK(tty); } @@ -2240,16 +2240,10 @@ void isr_rxrdy(SLMP_INFO * info) while((status = read_reg(info,CST0)) & BIT0) { + int flag = 0; + int over = 0; DataByte = read_reg(info,TRB); - if ( tty ) { - if (tty->flip.count >= TTY_FLIPBUF_SIZE) - continue; - - *tty->flip.char_buf_ptr = DataByte; - *tty->flip.flag_buf_ptr = 0; - } - icount->rx++; if ( status & (PE + FRME + OVRN) ) { @@ -2272,42 +2266,34 @@ void isr_rxrdy(SLMP_INFO * info) if ( tty ) { if (status & PE) - *tty->flip.flag_buf_ptr = TTY_PARITY; + flag = TTY_PARITY; else if (status & FRME) - *tty->flip.flag_buf_ptr = TTY_FRAME; + flag = TTY_FRAME; if (status & OVRN) { /* Overrun is special, since it's * reported immediately, and doesn't * affect the current character */ - if (tty->flip.count < TTY_FLIPBUF_SIZE) { - tty->flip.count++; - tty->flip.flag_buf_ptr++; - tty->flip.char_buf_ptr++; - *tty->flip.flag_buf_ptr = TTY_OVERRUN; - } + over = 1; } } } /* end of if (error) */ if ( tty ) { - tty->flip.flag_buf_ptr++; - tty->flip.char_buf_ptr++; - tty->flip.count++; + tty_insert_flip_char(tty, DataByte, flag); + if (over) + tty_insert_flip_char(tty, 0, TTY_OVERRUN); } } if ( debug_level >= DEBUG_LEVEL_ISR ) { - printk("%s(%d):%s isr_rxrdy() flip count=%d\n", - __FILE__,__LINE__,info->device_name, - tty ? tty->flip.count : 0); printk("%s(%d):%s rx=%d brk=%d parity=%d frame=%d overrun=%d\n", __FILE__,__LINE__,info->device_name, icount->rx,icount->brk,icount->parity, icount->frame,icount->overrun); } - if ( tty && tty->flip.count ) + if ( tty ) tty_flip_buffer_push(tty); } diff --git a/drivers/char/tty_io.c b/drivers/char/tty_io.c index 4b1eef51ec59..1eda82b31a61 100644 --- a/drivers/char/tty_io.c +++ b/drivers/char/tty_io.c @@ -166,9 +166,12 @@ static struct tty_struct *alloc_tty_struct(void) return tty; } +static void tty_buffer_free_all(struct tty_struct *); + static inline void free_tty_struct(struct tty_struct *tty) { kfree(tty->write_buf); + tty_buffer_free_all(tty); kfree(tty); } @@ -230,6 +233,201 @@ static int check_tty_count(struct tty_struct *tty, const char *routine) return 0; } +/* + * Tty buffer allocation management + */ + +static void tty_buffer_free_all(struct tty_struct *tty) +{ + struct tty_buffer *thead; + while((thead = tty->buf.head) != NULL) { + tty->buf.head = thead->next; + kfree(thead); + } + while((thead = tty->buf.free) != NULL) { + tty->buf.free = thead->next; + kfree(thead); + } + tty->buf.tail = NULL; +} + +static void tty_buffer_init(struct tty_struct *tty) +{ + tty->buf.head = NULL; + tty->buf.tail = NULL; + tty->buf.free = NULL; +} + +static struct tty_buffer *tty_buffer_alloc(size_t size) +{ + struct tty_buffer *p = kmalloc(sizeof(struct tty_buffer) + 2 * size, GFP_ATOMIC); + if(p == NULL) + return NULL; + p->used = 0; + p->size = size; + p->next = NULL; + p->char_buf_ptr = (char *)(p->data); + p->flag_buf_ptr = (unsigned char *)p->char_buf_ptr + size; +/* printk("Flip create %p\n", p); */ + return p; +} + +/* Must be called with the tty_read lock held. This needs to acquire strategy + code to decide if we should kfree or relink a given expired buffer */ + +static void tty_buffer_free(struct tty_struct *tty, struct tty_buffer *b) +{ + /* Dumb strategy for now - should keep some stats */ +/* printk("Flip dispose %p\n", b); */ + if(b->size >= 512) + kfree(b); + else { + b->next = tty->buf.free; + tty->buf.free = b; + } +} + +static struct tty_buffer *tty_buffer_find(struct tty_struct *tty, size_t size) +{ + struct tty_buffer **tbh = &tty->buf.free; + while((*tbh) != NULL) { + struct tty_buffer *t = *tbh; + if(t->size >= size) { + *tbh = t->next; + t->next = NULL; + t->used = 0; + /* DEBUG ONLY */ + memset(t->data, '*', size); +/* printk("Flip recycle %p\n", t); */ + return t; + } + tbh = &((*tbh)->next); + } + /* Round the buffer size out */ + size = (size + 0xFF) & ~ 0xFF; + return tty_buffer_alloc(size); + /* Should possibly check if this fails for the largest buffer we + have queued and recycle that ? */ +} + +int tty_buffer_request_room(struct tty_struct *tty, size_t size) +{ + struct tty_buffer *b = tty->buf.head, *n; + int left = 0; + + /* OPTIMISATION: We could keep a per tty "zero" sized buffer to + remove this conditional if its worth it. This would be invisible + to the callers */ + if(b != NULL) + left = b->size - b->used; + if(left >= size) + return size; + /* This is the slow path - looking for new buffers to use */ + n = tty_buffer_find(tty, size); + if(n == NULL) + return left; + n->next = b; + if(b != NULL) + b->next = n; + else + tty->buf.head = n; + tty->buf.tail = n; + return size; +} + +EXPORT_SYMBOL_GPL(tty_buffer_request_room); + +int tty_insert_flip_string(struct tty_struct *tty, unsigned char *chars, size_t size) +{ + int copied = 0; + do { + int space = tty_buffer_request_room(tty, size - copied); + struct tty_buffer *tb = tty->buf.tail; + /* If there is no space then tb may be NULL */ + if(unlikely(space == 0)) + break; + memcpy(tb->char_buf_ptr + tb->used, chars, space); + memset(tb->flag_buf_ptr + tb->used, TTY_NORMAL, space); + tb->used += space; + copied += space; + chars += space; +/* printk("Flip insert %d.\n", space); */ + } + /* There is a small chance that we need to split the data over + several buffers. If this is the case we must loop */ + while (unlikely(size > copied)); + return copied; +} + +EXPORT_SYMBOL_GPL(tty_insert_flip_string); + +int tty_insert_flip_string_flags(struct tty_struct *tty, unsigned char *chars, char *flags, size_t size) +{ + int copied = 0; + do { + int space = tty_buffer_request_room(tty, size - copied); + struct tty_buffer *tb = tty->buf.tail; + /* If there is no space then tb may be NULL */ + if(unlikely(space == 0)) + break; + memcpy(tb->char_buf_ptr + tb->used, chars, space); + memcpy(tb->flag_buf_ptr + tb->used, flags, space); + tb->used += space; + copied += space; + chars += space; + flags += space; + } + /* There is a small chance that we need to split the data over + several buffers. If this is the case we must loop */ + while (unlikely(size > copied)); + return copied; +} + +EXPORT_SYMBOL_GPL(tty_insert_flip_string_flags); + + +/* + * Prepare a block of space in the buffer for data. Returns the length + * available and buffer pointer to the space which is now allocated and + * accounted for as ready for normal characters. This is used for drivers + * that need their own block copy routines into the buffer. There is no + * guarantee the buffer is a DMA target! + */ + +int tty_prepare_flip_string(struct tty_struct *tty, unsigned char **chars, size_t size) +{ + int space = tty_buffer_request_room(tty, size); + struct tty_buffer *tb = tty->buf.tail; + *chars = tb->char_buf_ptr + tb->used; + memset(tb->flag_buf_ptr + tb->used, TTY_NORMAL, space); + tb->used += space; + return space; +} + +EXPORT_SYMBOL_GPL(tty_prepare_flip_string); + +/* + * Prepare a block of space in the buffer for data. Returns the length + * available and buffer pointer to the space which is now allocated and + * accounted for as ready for characters. This is used for drivers + * that need their own block copy routines into the buffer. There is no + * guarantee the buffer is a DMA target! + */ + +int tty_prepare_flip_string_flags(struct tty_struct *tty, unsigned char **chars, char **flags, size_t size) +{ + int space = tty_buffer_request_room(tty, size); + struct tty_buffer *tb = tty->buf.tail; + *chars = tb->char_buf_ptr + tb->used; + *flags = tb->flag_buf_ptr + tb->used; + tb->used += space; + return space; +} + +EXPORT_SYMBOL_GPL(tty_prepare_flip_string_flags); + + + /* * This is probably overkill for real world processors but * they are not on hot paths so a little discipline won't do @@ -492,6 +690,17 @@ restart: if (ld == NULL) return -EINVAL; + /* + * No more input please, we are switching. The new ldisc + * will update this value in the ldisc open function + */ + + tty->receive_room = 0; + + /* + * Problem: What do we do if this blocks ? + */ + tty_wait_until_sent(tty, 0); if (tty->ldisc.num == ldisc) { @@ -560,9 +769,9 @@ restart: * we say so later on. */ - work = cancel_delayed_work(&tty->flip.work); + work = cancel_delayed_work(&tty->buf.work); /* - * Wait for ->hangup_work and ->flip.work handlers to terminate + * Wait for ->hangup_work and ->buf.work handlers to terminate */ flush_scheduled_work(); @@ -616,7 +825,7 @@ restart: /* Restart it in case no characters kick it off. Safe if already running */ if (work) - schedule_delayed_work(&tty->flip.work, 1); + schedule_delayed_work(&tty->buf.work, 1); return retval; } @@ -1721,10 +1930,10 @@ static void release_dev(struct file * filp) */ clear_bit(TTY_LDISC, &tty->flags); clear_bit(TTY_DONT_FLIP, &tty->flags); - cancel_delayed_work(&tty->flip.work); + cancel_delayed_work(&tty->buf.work); /* - * Wait for ->hangup_work and ->flip.work handlers to terminate + * Wait for ->hangup_work and ->buf.work handlers to terminate */ flush_scheduled_work(); @@ -2518,17 +2727,15 @@ EXPORT_SYMBOL(do_SAK); /* * This routine is called out of the software interrupt to flush data - * from the flip buffer to the line discipline. + * from the buffer chain to the line discipline. */ static void flush_to_ldisc(void *private_) { struct tty_struct *tty = (struct tty_struct *) private_; - unsigned char *cp; - char *fp; - int count; unsigned long flags; struct tty_ldisc *disc; + struct tty_buffer *tbuf; disc = tty_ldisc_ref(tty); if (disc == NULL) /* !TTY_LDISC */ @@ -2538,28 +2745,22 @@ static void flush_to_ldisc(void *private_) /* * Do it after the next timer tick: */ - schedule_delayed_work(&tty->flip.work, 1); + schedule_delayed_work(&tty->buf.work, 1); goto out; } spin_lock_irqsave(&tty->read_lock, flags); - if (tty->flip.buf_num) { - cp = tty->flip.char_buf + TTY_FLIPBUF_SIZE; - fp = tty->flip.flag_buf + TTY_FLIPBUF_SIZE; - tty->flip.buf_num = 0; - tty->flip.char_buf_ptr = tty->flip.char_buf; - tty->flip.flag_buf_ptr = tty->flip.flag_buf; - } else { - cp = tty->flip.char_buf; - fp = tty->flip.flag_buf; - tty->flip.buf_num = 1; - tty->flip.char_buf_ptr = tty->flip.char_buf + TTY_FLIPBUF_SIZE; - tty->flip.flag_buf_ptr = tty->flip.flag_buf + TTY_FLIPBUF_SIZE; - } - count = tty->flip.count; - tty->flip.count = 0; + while((tbuf = tty->buf.head) != NULL) { + tty->buf.head = tbuf->next; + spin_unlock_irqrestore(&tty->read_lock, flags); + /* printk("Process buffer %p for %d\n", tbuf, tbuf->used); */ + disc->receive_buf(tty, tbuf->char_buf_ptr, + tbuf->flag_buf_ptr, + tbuf->used); + spin_lock_irqsave(&tty->read_lock, flags); + tty_buffer_free(tty, tbuf); + } + tty->buf.tail = NULL; spin_unlock_irqrestore(&tty->read_lock, flags); - - disc->receive_buf(tty, cp, fp, count); out: tty_ldisc_deref(disc); } @@ -2654,11 +2855,12 @@ void tty_flip_buffer_push(struct tty_struct *tty) if (tty->low_latency) flush_to_ldisc((void *) tty); else - schedule_delayed_work(&tty->flip.work, 1); + schedule_delayed_work(&tty->buf.work, 1); } EXPORT_SYMBOL(tty_flip_buffer_push); + /* * This subroutine initializes a tty structure. */ @@ -2669,10 +2871,10 @@ static void initialize_tty_struct(struct tty_struct *tty) tty_ldisc_assign(tty, tty_ldisc_get(N_TTY)); tty->pgrp = -1; tty->overrun_time = jiffies; - tty->flip.char_buf_ptr = tty->flip.char_buf; - tty->flip.flag_buf_ptr = tty->flip.flag_buf; - INIT_WORK(&tty->flip.work, flush_to_ldisc, tty); - init_MUTEX(&tty->flip.pty_sem); + tty->buf.head = tty->buf.tail = NULL; + tty_buffer_init(tty); + INIT_WORK(&tty->buf.work, flush_to_ldisc, tty); + init_MUTEX(&tty->buf.pty_sem); init_MUTEX(&tty->termios_sem); init_waitqueue_head(&tty->write_wait); init_waitqueue_head(&tty->read_wait); diff --git a/drivers/char/viocons.c b/drivers/char/viocons.c index 4d75c261f98a..cb82ebf4cb07 100644 --- a/drivers/char/viocons.c +++ b/drivers/char/viocons.c @@ -993,11 +993,10 @@ static void vioHandleData(struct HvLpEvent *event) * Don't attempt to copy more data into the buffer than we * have room for because it would fail without indication. */ - if ((tty->flip.count + 1) > TTY_FLIPBUF_SIZE) { + if(tty_insert_flip_char(tty, cevent->data[index], TTY_NORMAL) == 0) { printk(VIOCONS_KERN_WARN "input buffer overflow!\n"); break; } - tty_insert_flip_char(tty, cevent->data[index], TTY_NORMAL); } /* if cevent->len == 0 then no data was added to the buffer and flip.count == 0 */ diff --git a/drivers/char/vme_scc.c b/drivers/char/vme_scc.c index 33e71e23b212..d9325281e482 100644 --- a/drivers/char/vme_scc.c +++ b/drivers/char/vme_scc.c @@ -434,13 +434,7 @@ static irqreturn_t scc_rx_int(int irq, void *data, struct pt_regs *fp) SCCwrite_NB(COMMAND_REG, CR_HIGHEST_IUS_RESET); return IRQ_HANDLED; } - if (tty->flip.count < TTY_FLIPBUF_SIZE) { - *tty->flip.char_buf_ptr = ch; - *tty->flip.flag_buf_ptr = 0; - tty->flip.flag_buf_ptr++; - tty->flip.char_buf_ptr++; - tty->flip.count++; - } + tty_insert_flip_char(tty, ch, 0); /* Check if another character is already ready; in that case, the * spcond_int() function must be used, because this character may have an @@ -487,13 +481,7 @@ static irqreturn_t scc_spcond_int(int irq, void *data, struct pt_regs *fp) else err = 0; - if (tty->flip.count < TTY_FLIPBUF_SIZE) { - *tty->flip.char_buf_ptr = ch; - *tty->flip.flag_buf_ptr = err; - tty->flip.flag_buf_ptr++; - tty->flip.char_buf_ptr++; - tty->flip.count++; - } + tty_insert_flip_char(tty, ch, err); /* ++TeSche: *All* errors have to be cleared manually, * else the condition persists for the next chars diff --git a/drivers/input/serio/serport.c b/drivers/input/serio/serport.c index 1bd88fca0542..54a680cc704d 100644 --- a/drivers/input/serio/serport.c +++ b/drivers/input/serio/serport.c @@ -96,6 +96,7 @@ static int serport_ldisc_open(struct tty_struct *tty) init_waitqueue_head(&serport->wait); tty->disc_data = serport; + tty->receive_room = 256; set_bit(TTY_DO_WRITE_WAKEUP, &tty->flags); return 0; @@ -139,17 +140,6 @@ out: spin_unlock_irqrestore(&serport->lock, flags); } -/* - * serport_ldisc_room() reports how much room we do have for receiving data. - * Although we in fact have infinite room, we need to specify some value - * here, and 256 seems to be reasonable. - */ - -static int serport_ldisc_room(struct tty_struct *tty) -{ - return 256; -} - /* * serport_ldisc_read() just waits indefinitely if everything goes well. * However, when the serio driver closes the serio port, it finishes, @@ -237,7 +227,6 @@ static struct tty_ldisc serport_ldisc = { .read = serport_ldisc_read, .ioctl = serport_ldisc_ioctl, .receive_buf = serport_ldisc_receive, - .receive_room = serport_ldisc_room, .write_wakeup = serport_ldisc_write_wakeup }; diff --git a/drivers/isdn/capi/capi.c b/drivers/isdn/capi/capi.c index 11ae0fddea04..623adbb0d13a 100644 --- a/drivers/isdn/capi/capi.c +++ b/drivers/isdn/capi/capi.c @@ -463,8 +463,7 @@ static int handle_recv_skb(struct capiminor *mp, struct sk_buff *skb) #endif goto bad; } - if (ld->receive_room && - ld->receive_room(mp->tty) < datalen) { + if (mp->tty->receive_room < datalen) { #if defined(_DEBUG_DATAFLOW) || defined(_DEBUG_TTYFUNCS) printk(KERN_DEBUG "capi: no room in tty\n"); #endif diff --git a/drivers/isdn/i4l/isdn_common.c b/drivers/isdn/i4l/isdn_common.c index 4643df097bfe..22759c01746a 100644 --- a/drivers/isdn/i4l/isdn_common.c +++ b/drivers/isdn/i4l/isdn_common.c @@ -857,6 +857,118 @@ isdn_readbchan(int di, int channel, u_char * buf, u_char * fp, int len, wait_que return count; } +/* + * isdn_readbchan_tty() tries to get data from the read-queue. + * It MUST be called with interrupts off. + * + * Be aware that this is not an atomic operation when sleep != 0, even though + * interrupts are turned off! Well, like that we are currently only called + * on behalf of a read system call on raw device files (which are documented + * to be dangerous and for for debugging purpose only). The inode semaphore + * takes care that this is not called for the same minor device number while + * we are sleeping, but access is not serialized against simultaneous read() + * from the corresponding ttyI device. Can other ugly events, like changes + * of the mapping (di,ch)<->minor, happen during the sleep? --he + */ +int +isdn_readbchan_tty(int di, int channel, struct tty_struct *tty, int cisco_hack) +{ + int count; + int count_pull; + int count_put; + int dflag; + struct sk_buff *skb; + char last = 0; + int len; + + if (!dev->drv[di]) + return 0; + if (skb_queue_empty(&dev->drv[di]->rpqueue[channel])) + return 0; + + len = tty_buffer_request_room(tty, dev->drv[di]->rcvcount[channel]); + if(len == 0) + return len; + + count = 0; + while (len) { + if (!(skb = skb_peek(&dev->drv[di]->rpqueue[channel]))) + break; +#ifdef CONFIG_ISDN_AUDIO + if (ISDN_AUDIO_SKB_LOCK(skb)) + break; + ISDN_AUDIO_SKB_LOCK(skb) = 1; + if ((ISDN_AUDIO_SKB_DLECOUNT(skb)) || (dev->drv[di]->DLEflag & (1 << channel))) { + char *p = skb->data; + unsigned long DLEmask = (1 << channel); + + dflag = 0; + count_pull = count_put = 0; + while ((count_pull < skb->len) && (len > 0)) { + len--; + if (dev->drv[di]->DLEflag & DLEmask) { + last = DLE; + dev->drv[di]->DLEflag &= ~DLEmask; + } else { + last = *p; + if (last == DLE) { + dev->drv[di]->DLEflag |= DLEmask; + (ISDN_AUDIO_SKB_DLECOUNT(skb))--; + } + p++; + count_pull++; + } + count_put++; + } + if (count_pull >= skb->len) + dflag = 1; + } else { +#endif + /* No DLE's in buff, so simply copy it */ + dflag = 1; + if ((count_pull = skb->len) > len) { + count_pull = len; + dflag = 0; + } + count_put = count_pull; + if(count_put > 1) + tty_insert_flip_string(tty, skb->data, count_put - 1); + last = skb->data[count_put] - 1; + len -= count_put; +#ifdef CONFIG_ISDN_AUDIO + } +#endif + count += count_put; + if (dflag) { + /* We got all the data in this buff. + * Now we can dequeue it. + */ + if(cisco_hack) + tty_insert_flip_char(tty, last, 0xFF); + else + tty_insert_flip_char(tty, last, TTY_NORMAL); +#ifdef CONFIG_ISDN_AUDIO + ISDN_AUDIO_SKB_LOCK(skb) = 0; +#endif + skb = skb_dequeue(&dev->drv[di]->rpqueue[channel]); + dev_kfree_skb(skb); + } else { + tty_insert_flip_char(tty, last, TTY_NORMAL); + /* Not yet emptied this buff, so it + * must stay in the queue, for further calls + * but we pull off the data we got until now. + */ + skb_pull(skb, count_pull); +#ifdef CONFIG_ISDN_AUDIO + ISDN_AUDIO_SKB_LOCK(skb) = 0; +#endif + } + dev->drv[di]->rcvcount[channel] -= count_put; + } + return count; +} + + static __inline int isdn_minor2drv(int minor) { diff --git a/drivers/isdn/i4l/isdn_common.h b/drivers/isdn/i4l/isdn_common.h index e27e9c3a81ed..082735dbb412 100644 --- a/drivers/isdn/i4l/isdn_common.h +++ b/drivers/isdn/i4l/isdn_common.h @@ -37,6 +37,7 @@ extern void isdn_timer_ctrl(int tf, int onoff); extern void isdn_unexclusive_channel(int di, int ch); extern int isdn_getnum(char **); extern int isdn_readbchan(int, int, u_char *, u_char *, int, wait_queue_head_t *); +extern int isdn_readbchan_tty(int, int, struct tty_struct *, int); extern int isdn_get_free_channel(int, int, int, int, int, char *); extern int isdn_writebuf_skb_stub(int, int, int, struct sk_buff *); extern int register_isdn(isdn_if * i); diff --git a/drivers/isdn/i4l/isdn_tty.c b/drivers/isdn/i4l/isdn_tty.c index 8c404b4e2482..f190a99604f0 100644 --- a/drivers/isdn/i4l/isdn_tty.c +++ b/drivers/isdn/i4l/isdn_tty.c @@ -64,37 +64,42 @@ isdn_tty_try_read(modem_info * info, struct sk_buff *skb) int c; int len; struct tty_struct *tty; + char last; if (info->online) { if ((tty = info->tty)) { if (info->mcr & UART_MCR_RTS) { - c = TTY_FLIPBUF_SIZE - tty->flip.count; len = skb->len #ifdef CONFIG_ISDN_AUDIO + ISDN_AUDIO_SKB_DLECOUNT(skb) #endif ; + + c = tty_buffer_request_room(tty, len); if (c >= len) { #ifdef CONFIG_ISDN_AUDIO - if (ISDN_AUDIO_SKB_DLECOUNT(skb)) - while (skb->len--) { + if (ISDN_AUDIO_SKB_DLECOUNT(skb)) { + int l = skb->len; + unsigned char *dp = skb->data; + while (--l) { if (*skb->data == DLE) tty_insert_flip_char(tty, DLE, 0); - tty_insert_flip_char(tty, *skb->data++, 0); + tty_insert_flip_char(tty, *dp++, 0); + } + last = *dp; } else { #endif - memcpy(tty->flip.char_buf_ptr, - skb->data, len); - tty->flip.count += len; - tty->flip.char_buf_ptr += len; - memset(tty->flip.flag_buf_ptr, 0, len); - tty->flip.flag_buf_ptr += len; + if(len > 1) + tty_insert_flip_string(tty, skb->data, len - 1); + last = skb->data[len - 1]; #ifdef CONFIG_ISDN_AUDIO } #endif if (info->emu.mdmreg[REG_CPPP] & BIT_CPPP) - tty->flip.flag_buf_ptr[len - 1] = 0xff; - schedule_delayed_work(&tty->flip.work, 1); + tty_insert_flip_char(tty, last, 0xFF); + else + tty_insert_flip_char(tty, last, TTY_NORMAL); + tty_flip_buffer_push(tty); kfree_skb(skb); return 1; } @@ -114,7 +119,6 @@ isdn_tty_readmodem(void) int resched = 0; int midx; int i; - int c; int r; struct tty_struct *tty; modem_info *info; @@ -131,20 +135,13 @@ isdn_tty_readmodem(void) #endif if ((tty = info->tty)) { if (info->mcr & UART_MCR_RTS) { - c = TTY_FLIPBUF_SIZE - tty->flip.count; - if (c > 0) { - r = isdn_readbchan(info->isdn_driver, info->isdn_channel, - tty->flip.char_buf_ptr, - tty->flip.flag_buf_ptr, c, NULL); - /* CISCO AsyncPPP Hack */ - if (!(info->emu.mdmreg[REG_CPPP] & BIT_CPPP)) - memset(tty->flip.flag_buf_ptr, 0, r); - tty->flip.count += r; - tty->flip.flag_buf_ptr += r; - tty->flip.char_buf_ptr += r; - if (r) - schedule_delayed_work(&tty->flip.work, 1); - } + /* CISCO AsyncPPP Hack */ + if (!(info->emu.mdmreg[REG_CPPP] & BIT_CPPP)) + r = isdn_readbchan_tty(info->isdn_driver, info->isdn_channel, tty, 0); + else + r = isdn_readbchan_tty(info->isdn_driver, info->isdn_channel, tty, 1); + if (r) + tty_flip_buffer_push(tty); } else r = 1; } else @@ -249,7 +246,7 @@ isdn_tty_rcv_skb(int i, int di, int channel, struct sk_buff *skb) } #endif #endif - /* Try to deliver directly via tty-flip-buf if queue is empty */ + /* Try to deliver directly via tty-buf if queue is empty */ spin_lock_irqsave(&info->readlock, flags); if (skb_queue_empty(&dev->drv[di]->rpqueue[channel])) if (isdn_tty_try_read(info, skb)) { @@ -534,7 +531,7 @@ isdn_tty_senddown(modem_info * info) /* The next routine is called once from within timer-interrupt * triggered within isdn_tty_modem_ncarrier(). It calls * isdn_tty_modem_result() to stuff a "NO CARRIER" Message - * into the tty's flip-buffer. + * into the tty's buffer. */ static void isdn_tty_modem_do_ncarrier(unsigned long data) @@ -2347,6 +2344,7 @@ isdn_tty_at_cout(char *msg, modem_info * info) u_long flags; struct sk_buff *skb = NULL; char *sp = NULL; + int l = strlen(msg); if (!msg) { printk(KERN_WARNING "isdn_tty: Null-Message in isdn_tty_at_cout\n"); @@ -2359,16 +2357,16 @@ isdn_tty_at_cout(char *msg, modem_info * info) return; } - /* use queue instead of direct flip, if online and */ - /* data is in queue or flip buffer is full */ - if ((info->online) && (((tty->flip.count + strlen(msg)) >= TTY_FLIPBUF_SIZE) || - (!skb_queue_empty(&dev->drv[info->isdn_driver]->rpqueue[info->isdn_channel])))) { - skb = alloc_skb(strlen(msg), GFP_ATOMIC); + /* use queue instead of direct, if online and */ + /* data is in queue or buffer is full */ + if ((info->online && tty_buffer_request_room(tty, l) < l) || + (!skb_queue_empty(&dev->drv[info->isdn_driver]->rpqueue[info->isdn_channel]))) { + skb = alloc_skb(l, GFP_ATOMIC); if (!skb) { spin_unlock_irqrestore(&info->readlock, flags); return; } - sp = skb_put(skb, strlen(msg)); + sp = skb_put(skb, l); #ifdef CONFIG_ISDN_AUDIO ISDN_AUDIO_SKB_DLECOUNT(skb) = 0; ISDN_AUDIO_SKB_LOCK(skb) = 0; @@ -2392,9 +2390,8 @@ isdn_tty_at_cout(char *msg, modem_info * info) if (skb) { *sp++ = c; } else { - if (tty->flip.count >= TTY_FLIPBUF_SIZE) + if(tty_insert_flip_char(tty, c, TTY_NORMAL) == 0) break; - tty_insert_flip_char(tty, c, 0); } } if (skb) { @@ -2402,12 +2399,12 @@ isdn_tty_at_cout(char *msg, modem_info * info) dev->drv[info->isdn_driver]->rcvcount[info->isdn_channel] += skb->len; spin_unlock_irqrestore(&info->readlock, flags); /* Schedule dequeuing */ - if ((dev->modempoll) && (info->rcvsched)) + if (dev->modempoll && info->rcvsched) isdn_timer_ctrl(ISDN_TIMER_MODEMREAD, 1); } else { spin_unlock_irqrestore(&info->readlock, flags); - schedule_delayed_work(&tty->flip.work, 1); + tty_flip_buffer_push(tty); } } diff --git a/drivers/net/hamradio/6pack.c b/drivers/net/hamradio/6pack.c index 90999867a32c..102c1f0b90da 100644 --- a/drivers/net/hamradio/6pack.c +++ b/drivers/net/hamradio/6pack.c @@ -456,11 +456,6 @@ out: /* ----------------------------------------------------------------------- */ -static int sixpack_receive_room(struct tty_struct *tty) -{ - return 65536; /* We can handle an infinite amount of data. :-) */ -} - /* * Handle the 'receiver data ready' interrupt. * This function is called by the 'tty_io' module in the kernel when @@ -671,6 +666,7 @@ static int sixpack_open(struct tty_struct *tty) /* Done. We have linked the TTY line to a channel. */ tty->disc_data = sp; + tty->receive_room = 65536; /* Now we're ready to register. */ if (register_netdev(dev)) @@ -802,7 +798,6 @@ static struct tty_ldisc sp_ldisc = { .close = sixpack_close, .ioctl = sixpack_ioctl, .receive_buf = sixpack_receive_buf, - .receive_room = sixpack_receive_room, .write_wakeup = sixpack_write_wakeup, }; diff --git a/drivers/net/hamradio/mkiss.c b/drivers/net/hamradio/mkiss.c index f4424cf886c5..dc5e9d59deed 100644 --- a/drivers/net/hamradio/mkiss.c +++ b/drivers/net/hamradio/mkiss.c @@ -753,6 +753,7 @@ static int mkiss_open(struct tty_struct *tty) ax->tty = tty; tty->disc_data = ax; + tty->receive_room = 65535; if (tty->driver->flush_buffer) tty->driver->flush_buffer(tty); @@ -940,11 +941,6 @@ static void mkiss_receive_buf(struct tty_struct *tty, const unsigned char *cp, tty->driver->unthrottle(tty); } -static int mkiss_receive_room(struct tty_struct *tty) -{ - return 65536; /* We can handle an infinite amount of data. :-) */ -} - /* * Called by the driver when there's room for more data. If we have * more packets to send, we send them here. @@ -983,7 +979,6 @@ static struct tty_ldisc ax_ldisc = { .close = mkiss_close, .ioctl = mkiss_ioctl, .receive_buf = mkiss_receive_buf, - .receive_room = mkiss_receive_room, .write_wakeup = mkiss_write_wakeup }; diff --git a/drivers/net/irda/irtty-sir.c b/drivers/net/irda/irtty-sir.c index b8d112348ba4..101750bf210f 100644 --- a/drivers/net/irda/irtty-sir.c +++ b/drivers/net/irda/irtty-sir.c @@ -288,22 +288,6 @@ static void irtty_receive_buf(struct tty_struct *tty, const unsigned char *cp, sirdev_receive(dev, cp, count); } -/* - * Function irtty_receive_room (tty) - * - * Used by the TTY to find out how much data we can receive at a time - * -*/ -static int irtty_receive_room(struct tty_struct *tty) -{ - struct sirtty_cb *priv = tty->disc_data; - - IRDA_ASSERT(priv != NULL, return 0;); - IRDA_ASSERT(priv->magic == IRTTY_MAGIC, return 0;); - - return 65536; /* We can handle an infinite amount of data. :-) */ -} - /* * Function irtty_write_wakeup (tty) * @@ -534,6 +518,7 @@ static int irtty_open(struct tty_struct *tty) dev->priv = priv; tty->disc_data = priv; + tty->receive_room = 65536; up(&irtty_sem); @@ -605,7 +590,6 @@ static struct tty_ldisc irda_ldisc = { .ioctl = irtty_ioctl, .poll = NULL, .receive_buf = irtty_receive_buf, - .receive_room = irtty_receive_room, .write_wakeup = irtty_write_wakeup, .owner = THIS_MODULE, }; diff --git a/drivers/net/ppp_async.c b/drivers/net/ppp_async.c index 400f652282d7..aa6540b39466 100644 --- a/drivers/net/ppp_async.c +++ b/drivers/net/ppp_async.c @@ -189,7 +189,7 @@ ppp_asynctty_open(struct tty_struct *tty) goto out_free; tty->disc_data = ap; - + tty->receive_room = 65536; return 0; out_free: @@ -343,12 +343,6 @@ ppp_asynctty_poll(struct tty_struct *tty, struct file *file, poll_table *wait) return 0; } -static int -ppp_asynctty_room(struct tty_struct *tty) -{ - return 65535; -} - /* * This can now be called from hard interrupt level as well * as soft interrupt level or mainline. @@ -398,7 +392,6 @@ static struct tty_ldisc ppp_ldisc = { .write = ppp_asynctty_write, .ioctl = ppp_asynctty_ioctl, .poll = ppp_asynctty_poll, - .receive_room = ppp_asynctty_room, .receive_buf = ppp_asynctty_receive, .write_wakeup = ppp_asynctty_wakeup, }; diff --git a/drivers/net/ppp_synctty.c b/drivers/net/ppp_synctty.c index 4d51c0c8023d..33cb8254e79d 100644 --- a/drivers/net/ppp_synctty.c +++ b/drivers/net/ppp_synctty.c @@ -237,7 +237,7 @@ ppp_sync_open(struct tty_struct *tty) goto out_free; tty->disc_data = ap; - + tty->receive_room = 65536; return 0; out_free: @@ -384,12 +384,6 @@ ppp_sync_poll(struct tty_struct *tty, struct file *file, poll_table *wait) return 0; } -static int -ppp_sync_room(struct tty_struct *tty) -{ - return 65535; -} - /* * This can now be called from hard interrupt level as well * as soft interrupt level or mainline. @@ -439,7 +433,6 @@ static struct tty_ldisc ppp_sync_ldisc = { .write = ppp_sync_write, .ioctl = ppp_synctty_ioctl, .poll = ppp_sync_poll, - .receive_room = ppp_sync_room, .receive_buf = ppp_sync_receive, .write_wakeup = ppp_sync_wakeup, }; diff --git a/drivers/net/slip.c b/drivers/net/slip.c index 404ea4297e32..b2e18d28850d 100644 --- a/drivers/net/slip.c +++ b/drivers/net/slip.c @@ -651,11 +651,6 @@ static void sl_setup(struct net_device *dev) ******************************************/ -static int slip_receive_room(struct tty_struct *tty) -{ - return 65536; /* We can handle an infinite amount of data. :-) */ -} - /* * Handle the 'receiver data ready' interrupt. * This function is called by the 'tty_io' module in the kernel when @@ -869,10 +864,6 @@ static int slip_open(struct tty_struct *tty) sl->line = tty_devnum(tty); sl->pid = current->pid; - /* FIXME: already done before we were called - seems this can go */ - if (tty->driver->flush_buffer) - tty->driver->flush_buffer(tty); - if (!test_bit(SLF_INUSE, &sl->flags)) { /* Perform the low-level SLIP initialization. */ if ((err = sl_alloc_bufs(sl, SL_MTU)) != 0) @@ -897,6 +888,7 @@ static int slip_open(struct tty_struct *tty) /* Done. We have linked the TTY line to a channel. */ rtnl_unlock(); + tty->receive_room = 65536; /* We don't flow control */ return sl->dev->base_addr; err_free_bufs: @@ -1329,7 +1321,6 @@ static struct tty_ldisc sl_ldisc = { .close = slip_close, .ioctl = slip_ioctl, .receive_buf = slip_receive_buf, - .receive_room = slip_receive_room, .write_wakeup = slip_write_wakeup, }; diff --git a/drivers/net/wan/pc300_tty.c b/drivers/net/wan/pc300_tty.c index 52f26b9c69d2..931cbdf6d791 100644 --- a/drivers/net/wan/pc300_tty.c +++ b/drivers/net/wan/pc300_tty.c @@ -689,7 +689,7 @@ static void cpc_tty_rx_work(void * data) } } cpc_tty->buf_rx.first = cpc_tty->buf_rx.first->next; - kfree(buf); + kfree((void *)buf); buf = cpc_tty->buf_rx.first; flg_rx = 1; } diff --git a/drivers/net/wan/x25_asy.c b/drivers/net/wan/x25_asy.c index bdf672c48182..9c3ccc669143 100644 --- a/drivers/net/wan/x25_asy.c +++ b/drivers/net/wan/x25_asy.c @@ -515,11 +515,6 @@ static int x25_asy_close(struct net_device *dev) return 0; } -static int x25_asy_receive_room(struct tty_struct *tty) -{ - return 65536; /* We can handle an infinite amount of data. :-) */ -} - /* * Handle the 'receiver data ready' interrupt. * This function is called by the 'tty_io' module in the kernel when @@ -573,6 +568,7 @@ static int x25_asy_open_tty(struct tty_struct *tty) sl->tty = tty; tty->disc_data = sl; + tty->receive_room = 65536; if (tty->driver->flush_buffer) { tty->driver->flush_buffer(tty); } @@ -779,7 +775,6 @@ static struct tty_ldisc x25_ldisc = { .close = x25_asy_close_tty, .ioctl = x25_asy_ioctl, .receive_buf = x25_asy_receive_buf, - .receive_room = x25_asy_receive_room, .write_wakeup = x25_asy_write_wakeup, }; diff --git a/drivers/net/wireless/strip.c b/drivers/net/wireless/strip.c index d25264ba0c0e..18baacfc5a2c 100644 --- a/drivers/net/wireless/strip.c +++ b/drivers/net/wireless/strip.c @@ -1675,11 +1675,6 @@ static int strip_rebuild_header(struct sk_buff *skb) /************************************************************************/ /* Receiving routines */ -static int strip_receive_room(struct tty_struct *tty) -{ - return 0x10000; /* We can handle an infinite amount of data. :-) */ -} - /* * This function parses the response to the ATS300? command, * extracting the radio version and serial number. @@ -2424,7 +2419,7 @@ static struct net_device_stats *strip_get_stats(struct net_device *dev) /* * Here's the order things happen: * When the user runs "slattach -p strip ..." - * 1. The TTY module calls strip_open + * 1. The TTY module calls strip_open;; * 2. strip_open calls strip_alloc * 3. strip_alloc calls register_netdev * 4. register_netdev calls strip_dev_init @@ -2652,6 +2647,8 @@ static int strip_open(struct tty_struct *tty) strip_info->tty = tty; tty->disc_data = strip_info; + tty->receive_room = 65536; + if (tty->driver->flush_buffer) tty->driver->flush_buffer(tty); @@ -2762,7 +2759,6 @@ static struct tty_ldisc strip_ldisc = { .close = strip_close, .ioctl = strip_ioctl, .receive_buf = strip_receive_buf, - .receive_room = strip_receive_room, .write_wakeup = strip_write_some_more, }; diff --git a/drivers/s390/char/con3215.c b/drivers/s390/char/con3215.c index 75419cf9d353..1f060914cfa4 100644 --- a/drivers/s390/char/con3215.c +++ b/drivers/s390/char/con3215.c @@ -16,6 +16,7 @@ #include #include #include +#include #include #include #include @@ -432,8 +433,6 @@ raw3215_irq(struct ccw_device *cdev, unsigned long intparm, struct irb *irb) if (count > slen) count = slen; } else - if (count >= TTY_FLIPBUF_SIZE - tty->flip.count) - count = TTY_FLIPBUF_SIZE - tty->flip.count - 1; EBCASC(raw->inbuf, count); cchar = ctrlchar_handle(raw->inbuf, count, tty); switch (cchar & CTRLCHAR_MASK) { @@ -441,28 +440,20 @@ raw3215_irq(struct ccw_device *cdev, unsigned long intparm, struct irb *irb) break; case CTRLCHAR_CTRL: - tty->flip.count++; - *tty->flip.flag_buf_ptr++ = TTY_NORMAL; - *tty->flip.char_buf_ptr++ = cchar; + tty_insert_flip_char(tty, cchar, TTY_NORMAL); tty_flip_buffer_push(raw->tty); break; case CTRLCHAR_NONE: - memcpy(tty->flip.char_buf_ptr, - raw->inbuf, count); if (count < 2 || - (strncmp(raw->inbuf+count-2, "^n", 2) && - strncmp(raw->inbuf+count-2, "\252n", 2)) ) { - /* don't add the auto \n */ - tty->flip.char_buf_ptr[count] = '\n'; - memset(tty->flip.flag_buf_ptr, - TTY_NORMAL, count + 1); + (strncmp(raw->inbuf+count-2, "\252n", 2) && + strncmp(raw->inbuf+count-2, "^n", 2)) ) { + /* add the auto \n */ + raw->inbuf[count] = '\n'; count++; } else - count-=2; - tty->flip.char_buf_ptr += count; - tty->flip.flag_buf_ptr += count; - tty->flip.count += count; + count -= 2; + tty_insert_flip_string(tty, raw->inbuf, count); tty_flip_buffer_push(raw->tty); break; } diff --git a/drivers/s390/char/sclp_tty.c b/drivers/s390/char/sclp_tty.c index a20d7c89341d..6cbf067f1a8f 100644 --- a/drivers/s390/char/sclp_tty.c +++ b/drivers/s390/char/sclp_tty.c @@ -13,6 +13,7 @@ #include #include #include +#include #include #include #include @@ -496,25 +497,19 @@ sclp_tty_input(unsigned char* buf, unsigned int count) case CTRLCHAR_SYSRQ: break; case CTRLCHAR_CTRL: - sclp_tty->flip.count++; - *sclp_tty->flip.flag_buf_ptr++ = TTY_NORMAL; - *sclp_tty->flip.char_buf_ptr++ = cchar; + tty_insert_flip_char(sclp_tty, cchar, TTY_NORMAL); tty_flip_buffer_push(sclp_tty); break; case CTRLCHAR_NONE: /* send (normal) input to line discipline */ - memcpy(sclp_tty->flip.char_buf_ptr, buf, count); if (count < 2 || - (strncmp ((const char *) buf + count - 2, "^n", 2) && - strncmp ((const char *) buf + count - 2, "\0252n", 2))) { - sclp_tty->flip.char_buf_ptr[count] = '\n'; - count++; + (strncmp((const char *) buf + count - 2, "^n", 2) && + strncmp((const char *) buf + count - 2, "\252n", 2))) { + /* add the auto \n */ + tty_insert_flip_string(sclp_tty, buf, count); + tty_insert_flip_char(sclp_tty, '\n', TTY_NORMAL); } else - count -= 2; - memset(sclp_tty->flip.flag_buf_ptr, TTY_NORMAL, count); - sclp_tty->flip.char_buf_ptr += count; - sclp_tty->flip.flag_buf_ptr += count; - sclp_tty->flip.count += count; + tty_insert_flip_string(sclp_tty, buf, count - 2); tty_flip_buffer_push(sclp_tty); break; } diff --git a/drivers/s390/char/sclp_vt220.c b/drivers/s390/char/sclp_vt220.c index 06bd85824d7b..9e02625c82cf 100644 --- a/drivers/s390/char/sclp_vt220.c +++ b/drivers/s390/char/sclp_vt220.c @@ -16,6 +16,7 @@ #include #include #include +#include #include #include #include @@ -482,16 +483,7 @@ sclp_vt220_receiver_fn(struct evbuf_header *evbuf) /* Send input to line discipline */ buffer++; count--; - /* Prevent buffer overrun by discarding input. Note that - * because buffer_push works asynchronously, we cannot wait - * for the buffer to be emptied. */ - if (count + sclp_vt220_tty->flip.count > TTY_FLIPBUF_SIZE) - count = TTY_FLIPBUF_SIZE - sclp_vt220_tty->flip.count; - memcpy(sclp_vt220_tty->flip.char_buf_ptr, buffer, count); - memset(sclp_vt220_tty->flip.flag_buf_ptr, TTY_NORMAL, count); - sclp_vt220_tty->flip.char_buf_ptr += count; - sclp_vt220_tty->flip.flag_buf_ptr += count; - sclp_vt220_tty->flip.count += count; + tty_insert_flip_string(sclp_vt220_tty, buffer, count); tty_flip_buffer_push(sclp_vt220_tty); break; } diff --git a/drivers/s390/net/ctctty.c b/drivers/s390/net/ctctty.c index 968f2c113efe..93d1725eb79b 100644 --- a/drivers/s390/net/ctctty.c +++ b/drivers/s390/net/ctctty.c @@ -25,6 +25,7 @@ #include #include #include +#include #include #include #include @@ -101,25 +102,17 @@ static spinlock_t ctc_tty_lock; static int ctc_tty_try_read(ctc_tty_info * info, struct sk_buff *skb) { - int c; int len; struct tty_struct *tty; DBF_TEXT(trace, 5, __FUNCTION__); if ((tty = info->tty)) { if (info->mcr & UART_MCR_RTS) { - c = TTY_FLIPBUF_SIZE - tty->flip.count; len = skb->len; - if (c >= len) { - memcpy(tty->flip.char_buf_ptr, skb->data, len); - memset(tty->flip.flag_buf_ptr, 0, len); - tty->flip.count += len; - tty->flip.char_buf_ptr += len; - tty->flip.flag_buf_ptr += len; - tty_flip_buffer_push(tty); - kfree_skb(skb); - return 1; - } + tty_insert_flip_string(tty, skb->data, len); + tty_flip_buffer_push(tty); + kfree_skb(skb); + return 1; } } return 0; @@ -138,19 +131,12 @@ ctc_tty_readmodem(ctc_tty_info *info) DBF_TEXT(trace, 5, __FUNCTION__); if ((tty = info->tty)) { if (info->mcr & UART_MCR_RTS) { - int c = TTY_FLIPBUF_SIZE - tty->flip.count; struct sk_buff *skb; - if ((c > 0) && (skb = skb_dequeue(&info->rx_queue))) { + if ((skb = skb_dequeue(&info->rx_queue))) { int len = skb->len; - if (len > c) - len = c; - memcpy(tty->flip.char_buf_ptr, skb->data, len); + tty_insert_flip_string(tty, skb->data, len); skb_pull(skb, len); - memset(tty->flip.flag_buf_ptr, 0, len); - tty->flip.count += len; - tty->flip.char_buf_ptr += len; - tty->flip.flag_buf_ptr += len; tty_flip_buffer_push(tty); if (skb->len > 0) skb_queue_head(&info->rx_queue, skb); diff --git a/drivers/serial/21285.c b/drivers/serial/21285.c index b5cf39468d18..221999bcf8fe 100644 --- a/drivers/serial/21285.c +++ b/drivers/serial/21285.c @@ -94,15 +94,6 @@ static irqreturn_t serial21285_rx_chars(int irq, void *dev_id, struct pt_regs *r status = *CSR_UARTFLG; while (!(status & 0x10) && max_count--) { - if (tty->flip.count >= TTY_FLIPBUF_SIZE) { - if (tty->low_latency) - tty_flip_buffer_push(tty); - /* - * If this failed then we will throw away the - * bytes but must do so to clear interrupts - */ - } - ch = *CSR_UARTDR; flag = TTY_NORMAL; port->icount.rx++; diff --git a/drivers/serial/68328serial.c b/drivers/serial/68328serial.c index 67e9afa000c1..4dd5c3f98167 100644 --- a/drivers/serial/68328serial.c +++ b/drivers/serial/68328serial.c @@ -294,7 +294,7 @@ static _INLINE_ void receive_chars(struct m68k_serial *info, struct pt_regs *reg { struct tty_struct *tty = info->tty; m68328_uart *uart = &uart_addr[info->line]; - unsigned char ch; + unsigned char ch, flag; /* * This do { } while() loop will get ALL chars out of Rx FIFO @@ -332,26 +332,24 @@ static _INLINE_ void receive_chars(struct m68k_serial *info, struct pt_regs *reg /* * Make sure that we do not overflow the buffer */ - if (tty->flip.count >= TTY_FLIPBUF_SIZE) { + if (tty_request_buffer_room(tty, 1) == 0) { schedule_work(&tty->flip.work); return; } + flag = TTY_NORMAL; + if(rx & URX_PARITY_ERROR) { - *tty->flip.flag_buf_ptr++ = TTY_PARITY; + flag = TTY_PARITY; status_handle(info, rx); } else if(rx & URX_OVRUN) { - *tty->flip.flag_buf_ptr++ = TTY_OVERRUN; + flag = TTY_OVERRUN; status_handle(info, rx); } else if(rx & URX_FRAME_ERROR) { - *tty->flip.flag_buf_ptr++ = TTY_FRAME; + flag = TTY_FRAME; status_handle(info, rx); - } else { - *tty->flip.flag_buf_ptr++ = 0; /* XXX */ } - *tty->flip.char_buf_ptr++ = ch; - tty->flip.count++; - + tty_insert_flip_char(tty, ch, flag); #ifndef CONFIG_XCOPILOT_BUGS } while((rx = uart->urx.w) & URX_DATA_READY); #endif diff --git a/drivers/serial/68360serial.c b/drivers/serial/68360serial.c index 170c9d2a749c..60f5a5dc17f1 100644 --- a/drivers/serial/68360serial.c +++ b/drivers/serial/68360serial.c @@ -394,7 +394,7 @@ static void rs_360_start(struct tty_struct *tty) static _INLINE_ void receive_chars(ser_info_t *info) { struct tty_struct *tty = info->tty; - unsigned char ch, *cp; + unsigned char ch, flag, *cp; /*int ignored = 0;*/ int i; ushort status; @@ -438,24 +438,15 @@ static _INLINE_ void receive_chars(ser_info_t *info) cp = (char *)bdp->buf; status = bdp->status; - /* Check to see if there is room in the tty buffer for - * the characters in our BD buffer. If not, we exit - * now, leaving the BD with the characters. We'll pick - * them up again on the next receive interrupt (which could - * be a timeout). - */ - if ((tty->flip.count + i) >= TTY_FLIPBUF_SIZE) - break; - while (i-- > 0) { ch = *cp++; - *tty->flip.char_buf_ptr = ch; icount->rx++; #ifdef SERIAL_DEBUG_INTR printk("DR%02x:%02x...", ch, status); #endif - *tty->flip.flag_buf_ptr = 0; + flag = TTY_NORMAL; + if (status & (BD_SC_BR | BD_SC_FR | BD_SC_PR | BD_SC_OV)) { /* @@ -490,30 +481,18 @@ static _INLINE_ void receive_chars(ser_info_t *info) if (info->flags & ASYNC_SAK) do_SAK(tty); } else if (status & BD_SC_PR) - *tty->flip.flag_buf_ptr = TTY_PARITY; + flag = TTY_PARITY; else if (status & BD_SC_FR) - *tty->flip.flag_buf_ptr = TTY_FRAME; - if (status & BD_SC_OV) { - /* - * Overrun is special, since it's - * reported immediately, and doesn't - * affect the current character - */ - if (tty->flip.count < TTY_FLIPBUF_SIZE) { - tty->flip.count++; - tty->flip.flag_buf_ptr++; - tty->flip.char_buf_ptr++; - *tty->flip.flag_buf_ptr = - TTY_OVERRUN; - } - } + flag = TTY_FRAME; } - if (tty->flip.count >= TTY_FLIPBUF_SIZE) - break; - - tty->flip.flag_buf_ptr++; - tty->flip.char_buf_ptr++; - tty->flip.count++; + tty_insert_flip_char(tty, ch, flag); + if (status & BD_SC_OV) + /* + * Overrun is special, since it's + * reported immediately, and doesn't + * affect the current character + */ + tty_insert_flip_char(tty, 0, TTY_OVERRUN); } /* This BD is ready to be used again. Clear status. @@ -541,12 +520,7 @@ static _INLINE_ void receive_break(ser_info_t *info) /* Check to see if there is room in the tty buffer for * the break. If not, we exit now, losing the break. FIXME */ - if ((tty->flip.count + 1) >= TTY_FLIPBUF_SIZE) - return; - *(tty->flip.flag_buf_ptr++) = TTY_BREAK; - *(tty->flip.char_buf_ptr++) = 0; - tty->flip.count++; - + tty_insert_flip_char(tty, 0, TTY_BREAK); schedule_work(&tty->flip.work); } diff --git a/drivers/serial/8250.c b/drivers/serial/8250.c index e8454611cb65..54e5cc0dd5f8 100644 --- a/drivers/serial/8250.c +++ b/drivers/serial/8250.c @@ -1142,19 +1142,6 @@ receive_chars(struct uart_8250_port *up, int *status, struct pt_regs *regs) char flag; do { - /* The following is not allowed by the tty layer and - unsafe. It should be fixed ASAP */ - if (unlikely(tty->flip.count >= TTY_FLIPBUF_SIZE)) { - if (tty->low_latency) { - spin_unlock(&up->port.lock); - tty_flip_buffer_push(tty); - spin_lock(&up->port.lock); - } - /* - * If this failed then we will throw away the - * bytes but must do so to clear interrupts - */ - } ch = serial_inp(up, UART_RX); flag = TTY_NORMAL; up->port.icount.rx++; diff --git a/drivers/serial/amba-pl010.c b/drivers/serial/amba-pl010.c index 48f6e872314b..3490022e9fdc 100644 --- a/drivers/serial/amba-pl010.c +++ b/drivers/serial/amba-pl010.c @@ -154,15 +154,6 @@ pl010_rx_chars(struct uart_port *port) status = UART_GET_FR(port); while (UART_RX_DATA(status) && max_count--) { - if (tty->flip.count >= TTY_FLIPBUF_SIZE) { - if (tty->low_latency) - tty_flip_buffer_push(tty); - /* - * If this failed then we will throw away the - * bytes but must do so to clear interrupts. - */ - } - ch = UART_GET_CHAR(port); flag = TTY_NORMAL; diff --git a/drivers/serial/amba-pl011.c b/drivers/serial/amba-pl011.c index 129670556162..034a029e356e 100644 --- a/drivers/serial/amba-pl011.c +++ b/drivers/serial/amba-pl011.c @@ -120,15 +120,6 @@ pl011_rx_chars(struct uart_amba_port *uap) status = readw(uap->port.membase + UART01x_FR); while ((status & UART01x_FR_RXFE) == 0 && max_count--) { - if (tty->flip.count >= TTY_FLIPBUF_SIZE) { - if (tty->low_latency) - tty_flip_buffer_push(tty); - /* - * If this failed then we will throw away the - * bytes but must do so to clear interrupts - */ - } - ch = readw(uap->port.membase + UART01x_DR) | UART_DUMMY_DR_RX; flag = TTY_NORMAL; uap->port.icount.rx++; diff --git a/drivers/serial/au1x00_uart.c b/drivers/serial/au1x00_uart.c index a274ebf256a1..ceb5d7f37bbd 100644 --- a/drivers/serial/au1x00_uart.c +++ b/drivers/serial/au1x00_uart.c @@ -241,18 +241,12 @@ static _INLINE_ void receive_chars(struct uart_8250_port *up, int *status, struct pt_regs *regs) { struct tty_struct *tty = up->port.info->tty; - unsigned char ch; + unsigned char ch, flag; int max_count = 256; do { - if (unlikely(tty->flip.count >= TTY_FLIPBUF_SIZE)) { - tty->flip.work.func((void *)tty); - if (tty->flip.count >= TTY_FLIPBUF_SIZE) - return; // if TTY_DONT_FLIP is set - } ch = serial_inp(up, UART_RX); - *tty->flip.char_buf_ptr = ch; - *tty->flip.flag_buf_ptr = TTY_NORMAL; + flag = TTY_NORMAL; up->port.icount.rx++; if (unlikely(*status & (UART_LSR_BI | UART_LSR_PE | @@ -292,30 +286,23 @@ receive_chars(struct uart_8250_port *up, int *status, struct pt_regs *regs) #endif if (*status & UART_LSR_BI) { DEBUG_INTR("handling break...."); - *tty->flip.flag_buf_ptr = TTY_BREAK; + flag = TTY_BREAK; } else if (*status & UART_LSR_PE) - *tty->flip.flag_buf_ptr = TTY_PARITY; + flag = TTY_PARITY; else if (*status & UART_LSR_FE) - *tty->flip.flag_buf_ptr = TTY_FRAME; + flag = TTY_FRAME; } if (uart_handle_sysrq_char(&up->port, ch, regs)) goto ignore_char; - if ((*status & up->port.ignore_status_mask) == 0) { - tty->flip.flag_buf_ptr++; - tty->flip.char_buf_ptr++; - tty->flip.count++; - } - if ((*status & UART_LSR_OE) && - tty->flip.count < TTY_FLIPBUF_SIZE) { + if ((*status & up->port.ignore_status_mask) == 0) + tty_insert_flip_char(tty, ch, flag); + if (*status & UART_LSR_OE) /* * Overrun is special, since it's reported * immediately, and doesn't affect the current * character. */ - *tty->flip.flag_buf_ptr = TTY_OVERRUN; - tty->flip.flag_buf_ptr++; - tty->flip.char_buf_ptr++; - tty->flip.count++; + tty_insert_flip_char(tty, 0, TTY_OVERRUN); } ignore_char: *status = serial_inp(up, UART_LSR); diff --git a/drivers/serial/clps711x.c b/drivers/serial/clps711x.c index 87ef368384fb..8ef999481f93 100644 --- a/drivers/serial/clps711x.c +++ b/drivers/serial/clps711x.c @@ -104,8 +104,6 @@ static irqreturn_t clps711xuart_int_rx(int irq, void *dev_id, struct pt_regs *re while (!(status & SYSFLG_URXFE)) { ch = clps_readl(UARTDR(port)); - if (tty->flip.count >= TTY_FLIPBUF_SIZE) - goto ignore_char; port->icount.rx++; flg = TTY_NORMAL; diff --git a/drivers/serial/dz.c b/drivers/serial/dz.c index 4d8516d1bb71..a64ba26a94e8 100644 --- a/drivers/serial/dz.c +++ b/drivers/serial/dz.c @@ -216,8 +216,6 @@ static inline void dz_receive_chars(struct dz_port *dport) if (!tty) break; - if (tty->flip.count >= TTY_FLIPBUF_SIZE) - break; icount->rx++; diff --git a/drivers/serial/icom.c b/drivers/serial/icom.c index eb31125c6a30..144a7a352b28 100644 --- a/drivers/serial/icom.c +++ b/drivers/serial/icom.c @@ -729,19 +729,20 @@ static void recv_interrupt(u16 port_int_reg, struct icom_port *icom_port) unsigned short int status; struct uart_icount *icount; unsigned long offset; + unsigned char flag; trace(icom_port, "RCV_COMPLETE", 0); rcv_buff = icom_port->next_rcv; status = cpu_to_le16(icom_port->statStg->rcv[rcv_buff].flags); while (status & SA_FL_RCV_DONE) { + int first = -1; trace(icom_port, "FID_STATUS", status); count = cpu_to_le16(icom_port->statStg->rcv[rcv_buff].leLength); + count = tty_buffer_request_room(tty, count); trace(icom_port, "RCV_COUNT", count); - if (count > (TTY_FLIPBUF_SIZE - tty->flip.count)) - count = TTY_FLIPBUF_SIZE - tty->flip.count; trace(icom_port, "REAL_COUNT", count); @@ -749,15 +750,10 @@ static void recv_interrupt(u16 port_int_reg, struct icom_port *icom_port) cpu_to_le32(icom_port->statStg->rcv[rcv_buff].leBuffer) - icom_port->recv_buf_pci; - memcpy(tty->flip.char_buf_ptr,(unsigned char *) - ((unsigned long)icom_port->recv_buf + offset), count); - + /* Block copy all but the last byte as this may have status */ if (count > 0) { - tty->flip.count += count - 1; - tty->flip.char_buf_ptr += count - 1; - - memset(tty->flip.flag_buf_ptr, 0, count); - tty->flip.flag_buf_ptr += count - 1; + first = icom_port->recv_buf[offset]; + tty_insert_flip_string(tty, icom_port->recv_buf + offset, count - 1); } icount = &icom_port->uart_port.icount; @@ -765,12 +761,14 @@ static void recv_interrupt(u16 port_int_reg, struct icom_port *icom_port) /* Break detect logic */ if ((status & SA_FLAGS_FRAME_ERROR) - && (tty->flip.char_buf_ptr[0] == 0x00)) { + && first == 0) { status &= ~SA_FLAGS_FRAME_ERROR; status |= SA_FLAGS_BREAK_DET; trace(icom_port, "BREAK_DET", 0); } + flag = TTY_NORMAL; + if (status & (SA_FLAGS_BREAK_DET | SA_FLAGS_PARITY_ERROR | SA_FLAGS_FRAME_ERROR | SA_FLAGS_OVERRUN)) { @@ -797,33 +795,26 @@ static void recv_interrupt(u16 port_int_reg, struct icom_port *icom_port) status &= icom_port->read_status_mask; if (status & SA_FLAGS_BREAK_DET) { - *tty->flip.flag_buf_ptr = TTY_BREAK; + flag = TTY_BREAK; } else if (status & SA_FLAGS_PARITY_ERROR) { trace(icom_port, "PARITY_ERROR", 0); - *tty->flip.flag_buf_ptr = TTY_PARITY; + flag = TTY_PARITY; } else if (status & SA_FLAGS_FRAME_ERROR) - *tty->flip.flag_buf_ptr = TTY_FRAME; - - if (status & SA_FLAGS_OVERRUN) { - /* - * Overrun is special, since it's - * reported immediately, and doesn't - * affect the current character - */ - if (tty->flip.count < TTY_FLIPBUF_SIZE) { - tty->flip.count++; - tty->flip.flag_buf_ptr++; - tty->flip.char_buf_ptr++; - *tty->flip.flag_buf_ptr = TTY_OVERRUN; - } - } + flag = TTY_FRAME; + } - tty->flip.flag_buf_ptr++; - tty->flip.char_buf_ptr++; - tty->flip.count++; - ignore_char: - icom_port->statStg->rcv[rcv_buff].flags = 0; + tty_insert_flip_char(tty, *(icom_port->recv_buf + offset + count - 1), flag); + + if (status & SA_FLAGS_OVERRUN) + /* + * Overrun is special, since it's + * reported immediately, and doesn't + * affect the current character + */ + tty_insert_flip_char(tty, 0, TTY_OVERRUN); +ignore_char: + icom_port->statStg->rcv[rcv_buff].flags = 0; icom_port->statStg->rcv[rcv_buff].leLength = 0; icom_port->statStg->rcv[rcv_buff].WorkingLength = (unsigned short int) cpu_to_le16(RCV_BUFF_SZ); diff --git a/drivers/serial/imx.c b/drivers/serial/imx.c index 83c4c1216587..5c098be9346b 100644 --- a/drivers/serial/imx.c +++ b/drivers/serial/imx.c @@ -256,9 +256,6 @@ static irqreturn_t imx_rxint(int irq, void *dev_id, struct pt_regs *regs) error_return: tty_insert_flip_char(tty, rx, flg); - if (tty->flip.count >= TTY_FLIPBUF_SIZE) - goto out; - ignore_char: rx = URXD0((u32)sport->port.membase); } while(rx & URXD_CHARRDY); diff --git a/drivers/serial/ioc4_serial.c b/drivers/serial/ioc4_serial.c index 771676abee60..1d85533d46d2 100644 --- a/drivers/serial/ioc4_serial.c +++ b/drivers/serial/ioc4_serial.c @@ -2327,19 +2327,13 @@ static void receive_chars(struct uart_port *the_port) spin_lock_irqsave(&the_port->lock, pflags); tty = info->tty; - if (request_count > TTY_FLIPBUF_SIZE - tty->flip.count) - request_count = TTY_FLIPBUF_SIZE - tty->flip.count; + request_count = tty_buffer_request_room(tty, IOC4_MAX_CHARS - 2); if (request_count > 0) { icount = &the_port->icount; read_count = do_read(the_port, ch, request_count); if (read_count > 0) { - flip = 1; - memcpy(tty->flip.char_buf_ptr, ch, read_count); - memset(tty->flip.flag_buf_ptr, TTY_NORMAL, read_count); - tty->flip.char_buf_ptr += read_count; - tty->flip.flag_buf_ptr += read_count; - tty->flip.count += read_count; + tty_insert_flip_string(tty, ch, read_count); icount->rx += read_count; } } diff --git a/drivers/serial/ip22zilog.c b/drivers/serial/ip22zilog.c index ef132349f310..66f117d15065 100644 --- a/drivers/serial/ip22zilog.c +++ b/drivers/serial/ip22zilog.c @@ -259,13 +259,7 @@ static void ip22zilog_receive_chars(struct uart_ip22zilog_port *up, struct tty_struct *tty = up->port.info->tty; /* XXX info==NULL? */ while (1) { - unsigned char ch, r1; - - if (unlikely(tty->flip.count >= TTY_FLIPBUF_SIZE)) { - tty->flip.work.func((void *)tty); - if (tty->flip.count >= TTY_FLIPBUF_SIZE) - return; /* XXX Ignores SysRq when we need it most. Fix. */ - } + unsigned char ch, r1, flag; r1 = read_zsreg(channel, R1); if (r1 & (PAR_ERR | Rx_OVR | CRC_ERR)) { @@ -303,8 +297,7 @@ static void ip22zilog_receive_chars(struct uart_ip22zilog_port *up, } /* A real serial line, record the character and status. */ - *tty->flip.char_buf_ptr = ch; - *tty->flip.flag_buf_ptr = TTY_NORMAL; + flag = TTY_NORMAL; up->port.icount.rx++; if (r1 & (BRK_ABRT | PAR_ERR | Rx_OVR | CRC_ERR)) { if (r1 & BRK_ABRT) { @@ -321,28 +314,21 @@ static void ip22zilog_receive_chars(struct uart_ip22zilog_port *up, up->port.icount.overrun++; r1 &= up->port.read_status_mask; if (r1 & BRK_ABRT) - *tty->flip.flag_buf_ptr = TTY_BREAK; + flag = TTY_BREAK; else if (r1 & PAR_ERR) - *tty->flip.flag_buf_ptr = TTY_PARITY; + flag = TTY_PARITY; else if (r1 & CRC_ERR) - *tty->flip.flag_buf_ptr = TTY_FRAME; + flag = TTY_FRAME; } if (uart_handle_sysrq_char(&up->port, ch, regs)) goto next_char; if (up->port.ignore_status_mask == 0xff || - (r1 & up->port.ignore_status_mask) == 0) { - tty->flip.flag_buf_ptr++; - tty->flip.char_buf_ptr++; - tty->flip.count++; - } - if ((r1 & Rx_OVR) && - tty->flip.count < TTY_FLIPBUF_SIZE) { - *tty->flip.flag_buf_ptr = TTY_OVERRUN; - tty->flip.flag_buf_ptr++; - tty->flip.char_buf_ptr++; - tty->flip.count++; - } + (r1 & up->port.ignore_status_mask) == 0) + tty_insert_flip_char(tty, ch, flag); + + if (r1 & Rx_OVR) + tty_insert_flip_char(tty, 0, TTY_OVERRUN); next_char: ch = readb(&channel->control); ZSDELAY(); diff --git a/drivers/serial/m32r_sio.c b/drivers/serial/m32r_sio.c index b0ecc7537ce5..b48066a64a7d 100644 --- a/drivers/serial/m32r_sio.c +++ b/drivers/serial/m32r_sio.c @@ -331,17 +331,12 @@ static _INLINE_ void receive_chars(struct uart_sio_port *up, int *status, { struct tty_struct *tty = up->port.info->tty; unsigned char ch; + unsigned char flag; int max_count = 256; do { - if (unlikely(tty->flip.count >= TTY_FLIPBUF_SIZE)) { - tty->flip.work.func((void *)tty); - if (tty->flip.count >= TTY_FLIPBUF_SIZE) - return; // if TTY_DONT_FLIP is set - } ch = sio_in(up, SIORXB); - *tty->flip.char_buf_ptr = ch; - *tty->flip.flag_buf_ptr = TTY_NORMAL; + flag = TTY_NORMAL; up->port.icount.rx++; if (unlikely(*status & (UART_LSR_BI | UART_LSR_PE | @@ -380,30 +375,24 @@ static _INLINE_ void receive_chars(struct uart_sio_port *up, int *status, if (*status & UART_LSR_BI) { DEBUG_INTR("handling break...."); - *tty->flip.flag_buf_ptr = TTY_BREAK; + flag = TTY_BREAK; } else if (*status & UART_LSR_PE) - *tty->flip.flag_buf_ptr = TTY_PARITY; + flag = TTY_PARITY; else if (*status & UART_LSR_FE) - *tty->flip.flag_buf_ptr = TTY_FRAME; + flag = TTY_FRAME; } if (uart_handle_sysrq_char(&up->port, ch, regs)) goto ignore_char; - if ((*status & up->port.ignore_status_mask) == 0) { - tty->flip.flag_buf_ptr++; - tty->flip.char_buf_ptr++; - tty->flip.count++; - } - if ((*status & UART_LSR_OE) && - tty->flip.count < TTY_FLIPBUF_SIZE) { + if ((*status & up->port.ignore_status_mask) == 0) + tty_insert_flip_char(tty, ch, flag); + + if (*status & UART_LSR_OE) { /* * Overrun is special, since it's reported * immediately, and doesn't affect the current * character. */ - *tty->flip.flag_buf_ptr = TTY_OVERRUN; - tty->flip.flag_buf_ptr++; - tty->flip.char_buf_ptr++; - tty->flip.count++; + tty_insert_flip_char(tty, 0, TTY_OVERRUN); } ignore_char: *status = serial_in(up, UART_LSR); diff --git a/drivers/serial/mcfserial.c b/drivers/serial/mcfserial.c index 47f7404cb045..f2a51e61eec7 100644 --- a/drivers/serial/mcfserial.c +++ b/drivers/serial/mcfserial.c @@ -313,7 +313,7 @@ static inline void receive_chars(struct mcf_serial *info) { volatile unsigned char *uartp; struct tty_struct *tty = info->tty; - unsigned char status, ch; + unsigned char status, ch, flag; if (!tty) return; @@ -321,10 +321,6 @@ static inline void receive_chars(struct mcf_serial *info) uartp = info->addr; while ((status = uartp[MCFUART_USR]) & MCFUART_USR_RXREADY) { - - if (tty->flip.count >= TTY_FLIPBUF_SIZE) - break; - ch = uartp[MCFUART_URB]; info->stats.rx++; @@ -335,29 +331,24 @@ static inline void receive_chars(struct mcf_serial *info) } #endif - tty->flip.count++; + flag = TTY_NORMAL; if (status & MCFUART_USR_RXERR) { uartp[MCFUART_UCR] = MCFUART_UCR_CMDRESETERR; if (status & MCFUART_USR_RXBREAK) { info->stats.rxbreak++; - *tty->flip.flag_buf_ptr++ = TTY_BREAK; + flag = TTY_BREAK; } else if (status & MCFUART_USR_RXPARITY) { info->stats.rxparity++; - *tty->flip.flag_buf_ptr++ = TTY_PARITY; + flag = TTY_PARITY; } else if (status & MCFUART_USR_RXOVERRUN) { info->stats.rxoverrun++; - *tty->flip.flag_buf_ptr++ = TTY_OVERRUN; + flag = TTY_OVERRUN; } else if (status & MCFUART_USR_RXFRAMING) { info->stats.rxframing++; - *tty->flip.flag_buf_ptr++ = TTY_FRAME; - } else { - /* This should never happen... */ - *tty->flip.flag_buf_ptr++ = 0; + flag = TTY_FRAME; } - } else { - *tty->flip.flag_buf_ptr++ = 0; } - *tty->flip.char_buf_ptr++ = ch; + tty_insert_flip_char(tty, ch, flag); } schedule_work(&tty->flip.work); diff --git a/drivers/serial/mpc52xx_uart.c b/drivers/serial/mpc52xx_uart.c index 1288d6203e94..61dd17d7bace 100644 --- a/drivers/serial/mpc52xx_uart.c +++ b/drivers/serial/mpc52xx_uart.c @@ -405,17 +405,13 @@ static inline int mpc52xx_uart_int_rx_chars(struct uart_port *port, struct pt_regs *regs) { struct tty_struct *tty = port->info->tty; - unsigned char ch; + unsigned char ch, flag; unsigned short status; /* While we can read, do so ! */ while ( (status = in_be16(&PSC(port)->mpc52xx_psc_status)) & MPC52xx_PSC_SR_RXRDY) { - /* If we are full, just stop reading */ - if (tty->flip.count >= TTY_FLIPBUF_SIZE) - break; - /* Get the char */ ch = in_8(&PSC(port)->mpc52xx_psc_buffer_8); @@ -428,45 +424,35 @@ mpc52xx_uart_int_rx_chars(struct uart_port *port, struct pt_regs *regs) #endif /* Store it */ - *tty->flip.char_buf_ptr = ch; - *tty->flip.flag_buf_ptr = 0; + + flag = TTY_NORMAL; port->icount.rx++; if ( status & (MPC52xx_PSC_SR_PE | MPC52xx_PSC_SR_FE | - MPC52xx_PSC_SR_RB | - MPC52xx_PSC_SR_OE) ) { + MPC52xx_PSC_SR_RB) ) { if (status & MPC52xx_PSC_SR_RB) { - *tty->flip.flag_buf_ptr = TTY_BREAK; + flag = TTY_BREAK; uart_handle_break(port); } else if (status & MPC52xx_PSC_SR_PE) - *tty->flip.flag_buf_ptr = TTY_PARITY; + flag = TTY_PARITY; else if (status & MPC52xx_PSC_SR_FE) - *tty->flip.flag_buf_ptr = TTY_FRAME; - if (status & MPC52xx_PSC_SR_OE) { - /* - * Overrun is special, since it's - * reported immediately, and doesn't - * affect the current character - */ - if (tty->flip.count < (TTY_FLIPBUF_SIZE-1)) { - tty->flip.flag_buf_ptr++; - tty->flip.char_buf_ptr++; - tty->flip.count++; - } - *tty->flip.flag_buf_ptr = TTY_OVERRUN; - } + flag = TTY_FRAME; /* Clear error condition */ out_8(&PSC(port)->command,MPC52xx_PSC_RST_ERR_STAT); } - - tty->flip.char_buf_ptr++; - tty->flip.flag_buf_ptr++; - tty->flip.count++; - + tty_insert_flip_char(tty, ch, flag); + if (status & MPC52xx_PSC_SR_OE) { + /* + * Overrun is special, since it's + * reported immediately, and doesn't + * affect the current character + */ + tty_insert_flip_char(tty, 0, TTY_OVERRUN); + } } tty_flip_buffer_push(tty); diff --git a/drivers/serial/mpsc.c b/drivers/serial/mpsc.c index 8f83e4007ecd..0ca83ac31d07 100644 --- a/drivers/serial/mpsc.c +++ b/drivers/serial/mpsc.c @@ -769,12 +769,12 @@ mpsc_rx_intr(struct mpsc_port_info *pi, struct pt_regs *regs) bytes_in = be16_to_cpu(rxre->bytecnt); /* Following use of tty struct directly is deprecated */ - if (unlikely((tty->flip.count + bytes_in) >= TTY_FLIPBUF_SIZE)){ + if (unlikely(tty_buffer_request_room(tty, bytes_in) < bytes_in)) { if (tty->low_latency) tty_flip_buffer_push(tty); /* - * If this failed then we will throw awa the bytes - * but mst do so to clear interrupts. + * If this failed then we will throw away the bytes + * but must do so to clear interrupts. */ } diff --git a/drivers/serial/mux.c b/drivers/serial/mux.c index 7633132a10aa..4e49168c3176 100644 --- a/drivers/serial/mux.c +++ b/drivers/serial/mux.c @@ -223,11 +223,6 @@ static void mux_read(struct uart_port *port) if (MUX_EOFIFO(data)) break; - if (tty->flip.count >= TTY_FLIPBUF_SIZE) - continue; - - *tty->flip.char_buf_ptr = data & 0xffu; - *tty->flip.flag_buf_ptr = TTY_NORMAL; port->icount.rx++; if (MUX_BREAK(data)) { @@ -239,9 +234,7 @@ static void mux_read(struct uart_port *port) if (uart_handle_sysrq_char(port, data & 0xffu, NULL)) continue; - tty->flip.flag_buf_ptr++; - tty->flip.char_buf_ptr++; - tty->flip.count++; + tty_insert_flip_char(tty, data & 0xFF, TTY_NORMAL); } if (start_count != port->icount.rx) { diff --git a/drivers/serial/pmac_zilog.c b/drivers/serial/pmac_zilog.c index ea24129eb6b9..f330d6c0e0df 100644 --- a/drivers/serial/pmac_zilog.c +++ b/drivers/serial/pmac_zilog.c @@ -210,10 +210,9 @@ static struct tty_struct *pmz_receive_chars(struct uart_pmac_port *uap, struct pt_regs *regs) { struct tty_struct *tty = NULL; - unsigned char ch, r1, drop, error; + unsigned char ch, r1, drop, error, flag; int loops = 0; - retry: /* The interrupt can be enabled when the port isn't open, typically * that happens when using one port is open and the other closed (stale * interrupt) or when one port is used as a console. @@ -246,20 +245,6 @@ static struct tty_struct *pmz_receive_chars(struct uart_pmac_port *uap, error = 0; drop = 0; - if (unlikely(tty->flip.count >= TTY_FLIPBUF_SIZE)) { - /* Have to drop the lock here */ - pmz_debug("pmz: flip overflow\n"); - spin_unlock(&uap->port.lock); - tty->flip.work.func((void *)tty); - spin_lock(&uap->port.lock); - if (tty->flip.count >= TTY_FLIPBUF_SIZE) - drop = 1; - if (ZS_IS_ASLEEP(uap)) - return NULL; - if (!ZS_IS_OPEN(uap)) - goto retry; - } - r1 = read_zsreg(uap, R1); ch = read_zsdata(uap); @@ -295,8 +280,7 @@ static struct tty_struct *pmz_receive_chars(struct uart_pmac_port *uap, if (drop) goto next_char; - *tty->flip.char_buf_ptr = ch; - *tty->flip.flag_buf_ptr = TTY_NORMAL; + flag = TTY_NORMAL; uap->port.icount.rx++; if (r1 & (PAR_ERR | Rx_OVR | CRC_ERR | BRK_ABRT)) { @@ -316,26 +300,19 @@ static struct tty_struct *pmz_receive_chars(struct uart_pmac_port *uap, uap->port.icount.overrun++; r1 &= uap->port.read_status_mask; if (r1 & BRK_ABRT) - *tty->flip.flag_buf_ptr = TTY_BREAK; + flag = TTY_BREAK; else if (r1 & PAR_ERR) - *tty->flip.flag_buf_ptr = TTY_PARITY; + flag = TTY_PARITY; else if (r1 & CRC_ERR) - *tty->flip.flag_buf_ptr = TTY_FRAME; + flag = TTY_FRAME; } if (uap->port.ignore_status_mask == 0xff || (r1 & uap->port.ignore_status_mask) == 0) { - tty->flip.flag_buf_ptr++; - tty->flip.char_buf_ptr++; - tty->flip.count++; - } - if ((r1 & Rx_OVR) && - tty->flip.count < TTY_FLIPBUF_SIZE) { - *tty->flip.flag_buf_ptr = TTY_OVERRUN; - tty->flip.flag_buf_ptr++; - tty->flip.char_buf_ptr++; - tty->flip.count++; + tty_insert_flip_char(tty, ch, flag); } + if (r1 & Rx_OVR) + tty_insert_flip_char(tty, 0, TTY_OVERRUN); next_char: /* We can get stuck in an infinite loop getting char 0 when the * line is in a wrong HW state, we break that here. diff --git a/drivers/serial/pxa.c b/drivers/serial/pxa.c index cc998b99a19f..10535f00301f 100644 --- a/drivers/serial/pxa.c +++ b/drivers/serial/pxa.c @@ -107,14 +107,6 @@ receive_chars(struct uart_pxa_port *up, int *status, struct pt_regs *regs) int max_count = 256; do { - if (unlikely(tty->flip.count >= TTY_FLIPBUF_SIZE)) { - if (tty->low_latency) - tty_flip_buffer_push(tty); - /* - * If this failed then we will throw away the - * bytes but must do so to clear interrupts - */ - } ch = serial_in(up, UART_RX); flag = TTY_NORMAL; up->port.icount.rx++; diff --git a/drivers/serial/s3c2410.c b/drivers/serial/s3c2410.c index fe83ce6fef52..eb4883efb7c6 100644 --- a/drivers/serial/s3c2410.c +++ b/drivers/serial/s3c2410.c @@ -323,16 +323,6 @@ s3c24xx_serial_rx_chars(int irq, void *dev_id, struct pt_regs *regs) if (s3c24xx_serial_rx_fifocnt(ourport, ufstat) == 0) break; - if (tty->flip.count >= TTY_FLIPBUF_SIZE) { - if (tty->low_latency) - tty_flip_buffer_push(tty); - - /* - * If this failed then we will throw away the - * bytes but must do so to clear interrupts - */ - } - uerstat = rd_regl(port, S3C2410_UERSTAT); ch = rd_regb(port, S3C2410_URXH); diff --git a/drivers/serial/sa1100.c b/drivers/serial/sa1100.c index 25a086458ab9..1bd93168f504 100644 --- a/drivers/serial/sa1100.c +++ b/drivers/serial/sa1100.c @@ -201,8 +201,6 @@ sa1100_rx_chars(struct sa1100_port *sport, struct pt_regs *regs) while (status & UTSR1_TO_SM(UTSR1_RNE)) { ch = UART_GET_CHAR(sport); - if (tty->flip.count >= TTY_FLIPBUF_SIZE) - goto ignore_char; sport->port.icount.rx++; flg = TTY_NORMAL; diff --git a/drivers/serial/serial_lh7a40x.c b/drivers/serial/serial_lh7a40x.c index d01dbe5da3b9..d4a1f0e798c1 100644 --- a/drivers/serial/serial_lh7a40x.c +++ b/drivers/serial/serial_lh7a40x.c @@ -148,15 +148,6 @@ lh7a40xuart_rx_chars (struct uart_port* port) unsigned int data, flag;/* Received data and status */ while (!(UR (port, UART_R_STATUS) & nRxRdy) && --cbRxMax) { - if (tty->flip.count >= TTY_FLIPBUF_SIZE) { - if (tty->low_latency) - tty_flip_buffer_push(tty); - /* - * If this failed then we will throw away the - * bytes but must do so to clear interrupts - */ - } - data = UR (port, UART_R_DATA); flag = TTY_NORMAL; ++port->icount.rx; diff --git a/drivers/serial/serial_txx9.c b/drivers/serial/serial_txx9.c index 995d9dd9ddd5..fdd1f1915a42 100644 --- a/drivers/serial/serial_txx9.c +++ b/drivers/serial/serial_txx9.c @@ -303,17 +303,6 @@ receive_chars(struct uart_txx9_port *up, unsigned int *status, struct pt_regs *r char flag; do { - /* The following is not allowed by the tty layer and - unsafe. It should be fixed ASAP */ - if (unlikely(tty->flip.count >= TTY_FLIPBUF_SIZE)) { - if (tty->low_latency) { - spin_unlock(&up->port.lock); - tty_flip_buffer_push(tty); - spin_lock(&up->port.lock); - } - /* If this failed then we will throw away the - bytes but must do so to clear interrupts */ - } ch = sio_in(up, TXX9_SIRFIFO); flag = TTY_NORMAL; up->port.icount.rx++; diff --git a/drivers/serial/sh-sci.c b/drivers/serial/sh-sci.c index 430754ebac8a..a9e070759628 100644 --- a/drivers/serial/sh-sci.c +++ b/drivers/serial/sh-sci.c @@ -482,6 +482,7 @@ static inline void sci_receive_chars(struct uart_port *port, struct tty_struct *tty = port->info->tty; int i, count, copied = 0; unsigned short status; + unsigned char flag; status = sci_in(port, SCxSR); if (!(status & SCxSR_RDxF(port))) @@ -499,8 +500,7 @@ static inline void sci_receive_chars(struct uart_port *port, #endif /* Don't copy more bytes than there is room for in the buffer */ - if (tty->flip.count + count > TTY_FLIPBUF_SIZE) - count = TTY_FLIPBUF_SIZE - tty->flip.count; + count = tty_buffer_request_room(tty, count); /* If for any reason we can't copy more data, we're done! */ if (count == 0) @@ -512,8 +512,7 @@ static inline void sci_receive_chars(struct uart_port *port, || uart_handle_sysrq_char(port, c, regs)) { count = 0; } else { - tty->flip.char_buf_ptr[0] = c; - tty->flip.flag_buf_ptr[0] = TTY_NORMAL; + tty_insert_flip_char(tty, c, TTY_NORMAL); } } else { for (i=0; iflip.char_buf_ptr[i] = c; if (status&SCxSR_FER(port)) { - tty->flip.flag_buf_ptr[i] = TTY_FRAME; + flag = TTY_FRAME; pr_debug("sci: frame error\n"); } else if (status&SCxSR_PER(port)) { - tty->flip.flag_buf_ptr[i] = TTY_PARITY; + flag = TTY_PARITY; pr_debug("sci: parity error\n"); - } else { - tty->flip.flag_buf_ptr[i] = TTY_NORMAL; - } + } else + flag = TTY_NORMAL; + tty_insert_flip_char(tty, c, flag); } } sci_in(port, SCxSR); /* dummy read */ sci_out(port, SCxSR, SCxSR_RDxF_CLEAR(port)); - /* Update the kernel buffer end */ - tty->flip.count += count; - tty->flip.char_buf_ptr += count; - tty->flip.flag_buf_ptr += count; copied += count; port->icount.rx += count; } @@ -608,48 +602,45 @@ static inline int sci_handle_errors(struct uart_port *port) unsigned short status = sci_in(port, SCxSR); struct tty_struct *tty = port->info->tty; - if (status&SCxSR_ORER(port) && tty->flip.countflip.flag_buf_ptr++ = TTY_OVERRUN; + if(tty_insert_flip_char(tty, 0, TTY_OVERRUN)) + copied++; pr_debug("sci: overrun error\n"); } - if (status&SCxSR_FER(port) && tty->flip.countbreak_flag) { - sci_port->break_flag = 1; - sci_schedule_break_timer((struct sci_port *)port); + if(!sci_port->break_flag) { + sci_port->break_flag = 1; + sci_schedule_break_timer((struct sci_port *)port); /* Do sysrq handling. */ - if(uart_handle_break(port)) { + if(uart_handle_break(port)) return 0; - } pr_debug("sci: BREAK detected\n"); - copied++; - *tty->flip.flag_buf_ptr++ = TTY_BREAK; + if(tty_insert_flip_char(tty, 0, TTY_BREAK)) + copied++; } } else { /* frame error */ - copied++; - *tty->flip.flag_buf_ptr++ = TTY_FRAME; + if(tty_insert_flip_char(tty, 0, TTY_FRAME)) + copied++; pr_debug("sci: frame error\n"); } } - if (status&SCxSR_PER(port) && tty->flip.countflip.flag_buf_ptr++ = TTY_PARITY; pr_debug("sci: parity error\n"); } - if (copied) { - tty->flip.count += copied; + if (copied) tty_flip_buffer_push(tty); - } return copied; } @@ -661,15 +652,14 @@ static inline int sci_handle_breaks(struct uart_port *port) struct tty_struct *tty = port->info->tty; struct sci_port *s = &sci_ports[port->line]; - if (!s->break_flag && status & SCxSR_BRK(port) && - tty->flip.count < TTY_FLIPBUF_SIZE) { + if (!s->break_flag && status & SCxSR_BRK(port)) #if defined(CONFIG_CPU_SH3) /* Debounce break */ s->break_flag = 1; #endif /* Notify of BREAK */ - copied++; - *tty->flip.flag_buf_ptr++ = TTY_BREAK; + if(tty_insert_flip_char(tty, 0, TTY_BREAK)) + copied++; pr_debug("sci: BREAK detected\n"); } @@ -677,19 +667,15 @@ static inline int sci_handle_breaks(struct uart_port *port) /* XXX: Handle SCIF overrun error */ if (port->type == PORT_SCIF && (sci_in(port, SCLSR) & SCIF_ORER) != 0) { sci_out(port, SCLSR, 0); - if(tty->flip.countflip.flag_buf_ptr++ = TTY_OVERRUN; pr_debug("sci: overrun error\n"); } } #endif - if (copied) { - tty->flip.count += copied; + if (copied) tty_flip_buffer_push(tty); - } - return copied; } @@ -732,12 +718,9 @@ static irqreturn_t sci_er_interrupt(int irq, void *ptr, struct pt_regs *regs) struct tty_struct *tty = port->info->tty; sci_out(port, SCLSR, 0); - if(tty->flip.countflip.flag_buf_ptr++ = TTY_OVERRUN; - tty->flip.count++; - tty_flip_buffer_push(tty); - pr_debug("scif: overrun error\n"); - } + tty_insert_flip_char(tty, 0, TTY_OVERRUN); + tty_flip_buffer_push(tty); + pr_debug("scif: overrun error\n"); } #endif sci_rx_interrupt(irq, ptr, regs); diff --git a/drivers/serial/sn_console.c b/drivers/serial/sn_console.c index 313f9df24a2d..5468e5a767e2 100644 --- a/drivers/serial/sn_console.c +++ b/drivers/serial/sn_console.c @@ -519,11 +519,7 @@ sn_receive_chars(struct sn_cons_port *port, struct pt_regs *regs, /* record the character to pass up to the tty layer */ if (tty) { - *tty->flip.char_buf_ptr = ch; - *tty->flip.flag_buf_ptr = TTY_NORMAL; - tty->flip.char_buf_ptr++; - tty->flip.count++; - if (tty->flip.count == TTY_FLIPBUF_SIZE) + if(tty_insert_flip_char(tty, ch, TTY_NORMAL) == 0) break; } port->sc_port.icount.rx++; diff --git a/drivers/serial/sunsab.c b/drivers/serial/sunsab.c index ba9381fd3f2d..7e773ff76c61 100644 --- a/drivers/serial/sunsab.c +++ b/drivers/serial/sunsab.c @@ -159,21 +159,14 @@ receive_chars(struct uart_sunsab_port *up, saw_console_brk = 1; for (i = 0; i < count; i++) { - unsigned char ch = buf[i]; + unsigned char ch = buf[i], flag; if (tty == NULL) { uart_handle_sysrq_char(&up->port, ch, regs); continue; } - if (unlikely(tty->flip.count >= TTY_FLIPBUF_SIZE)) { - tty->flip.work.func((void *)tty); - if (tty->flip.count >= TTY_FLIPBUF_SIZE) - return tty; // if TTY_DONT_FLIP is set - } - - *tty->flip.char_buf_ptr = ch; - *tty->flip.flag_buf_ptr = TTY_NORMAL; + flag = TTY_NORMAL; up->port.icount.rx++; if (unlikely(stat->sreg.isr0 & (SAB82532_ISR0_PERR | @@ -209,34 +202,21 @@ receive_chars(struct uart_sunsab_port *up, stat->sreg.isr1 &= ((up->port.read_status_mask >> 8) & 0xff); if (stat->sreg.isr1 & SAB82532_ISR1_BRK) { - *tty->flip.flag_buf_ptr = TTY_BREAK; + flag = TTY_BREAK; } else if (stat->sreg.isr0 & SAB82532_ISR0_PERR) - *tty->flip.flag_buf_ptr = TTY_PARITY; + flag = TTY_PARITY; else if (stat->sreg.isr0 & SAB82532_ISR0_FERR) - *tty->flip.flag_buf_ptr = TTY_FRAME; + flag = TTY_FRAME; } if (uart_handle_sysrq_char(&up->port, ch, regs)) continue; if ((stat->sreg.isr0 & (up->port.ignore_status_mask & 0xff)) == 0 && - (stat->sreg.isr1 & ((up->port.ignore_status_mask >> 8) & 0xff)) == 0){ - tty->flip.flag_buf_ptr++; - tty->flip.char_buf_ptr++; - tty->flip.count++; - } - if ((stat->sreg.isr0 & SAB82532_ISR0_RFO) && - tty->flip.count < TTY_FLIPBUF_SIZE) { - /* - * Overrun is special, since it's reported - * immediately, and doesn't affect the current - * character. - */ - *tty->flip.flag_buf_ptr = TTY_OVERRUN; - tty->flip.flag_buf_ptr++; - tty->flip.char_buf_ptr++; - tty->flip.count++; - } + (stat->sreg.isr1 & ((up->port.ignore_status_mask >> 8) & 0xff)) == 0) + tty_insert_flip_char(tty, ch, flag); + if (stat->sreg.isr0 & SAB82532_ISR0_RFO) + tty_insert_flip_char(tty, 0, TTY_OVERRUN); } if (saw_console_brk) diff --git a/drivers/serial/sunsu.c b/drivers/serial/sunsu.c index f0738533f39a..9a3665b34d97 100644 --- a/drivers/serial/sunsu.c +++ b/drivers/serial/sunsu.c @@ -323,19 +323,13 @@ static _INLINE_ struct tty_struct * receive_chars(struct uart_sunsu_port *up, unsigned char *status, struct pt_regs *regs) { struct tty_struct *tty = up->port.info->tty; - unsigned char ch; + unsigned char ch, flag; int max_count = 256; int saw_console_brk = 0; do { - if (unlikely(tty->flip.count >= TTY_FLIPBUF_SIZE)) { - tty->flip.work.func((void *)tty); - if (tty->flip.count >= TTY_FLIPBUF_SIZE) - return tty; // if TTY_DONT_FLIP is set - } ch = serial_inp(up, UART_RX); - *tty->flip.char_buf_ptr = ch; - *tty->flip.flag_buf_ptr = TTY_NORMAL; + flag = TTY_NORMAL; up->port.icount.rx++; if (unlikely(*status & (UART_LSR_BI | UART_LSR_PE | @@ -377,31 +371,23 @@ receive_chars(struct uart_sunsu_port *up, unsigned char *status, struct pt_regs } if (*status & UART_LSR_BI) { - *tty->flip.flag_buf_ptr = TTY_BREAK; + flag = TTY_BREAK; } else if (*status & UART_LSR_PE) - *tty->flip.flag_buf_ptr = TTY_PARITY; + flag = TTY_PARITY; else if (*status & UART_LSR_FE) - *tty->flip.flag_buf_ptr = TTY_FRAME; + flag = TTY_FRAME; } if (uart_handle_sysrq_char(&up->port, ch, regs)) goto ignore_char; - if ((*status & up->port.ignore_status_mask) == 0) { - tty->flip.flag_buf_ptr++; - tty->flip.char_buf_ptr++; - tty->flip.count++; - } - if ((*status & UART_LSR_OE) && - tty->flip.count < TTY_FLIPBUF_SIZE) { + if ((*status & up->port.ignore_status_mask) == 0) + tty_insert_flip_char(tty, ch, flag); + if (*status & UART_LSR_OE) /* * Overrun is special, since it's reported * immediately, and doesn't affect the current * character. */ - *tty->flip.flag_buf_ptr = TTY_OVERRUN; - tty->flip.flag_buf_ptr++; - tty->flip.char_buf_ptr++; - tty->flip.count++; - } + tty_insert_flip_char(tty, 0, TTY_OVERRUN); ignore_char: *status = serial_inp(up, UART_LSR); } while ((*status & UART_LSR_DR) && (max_count-- > 0)); diff --git a/drivers/serial/sunzilog.c b/drivers/serial/sunzilog.c index 7653d6cf05af..3c72484adea7 100644 --- a/drivers/serial/sunzilog.c +++ b/drivers/serial/sunzilog.c @@ -319,7 +319,7 @@ sunzilog_receive_chars(struct uart_sunzilog_port *up, struct pt_regs *regs) { struct tty_struct *tty; - unsigned char ch, r1; + unsigned char ch, r1, flag; tty = NULL; if (up->port.info != NULL && /* Unopened serial console */ @@ -362,19 +362,8 @@ sunzilog_receive_chars(struct uart_sunzilog_port *up, continue; } - if (unlikely(tty->flip.count >= TTY_FLIPBUF_SIZE)) { - tty->flip.work.func((void *)tty); - /* - * The 8250 bails out of the loop here, - * but we need to read everything, or die. - */ - if (tty->flip.count >= TTY_FLIPBUF_SIZE) - continue; - } - /* A real serial line, record the character and status. */ - *tty->flip.char_buf_ptr = ch; - *tty->flip.flag_buf_ptr = TTY_NORMAL; + flag = TTY_NORMAL; up->port.icount.rx++; if (r1 & (BRK_ABRT | PAR_ERR | Rx_OVR | CRC_ERR)) { if (r1 & BRK_ABRT) { @@ -391,28 +380,21 @@ sunzilog_receive_chars(struct uart_sunzilog_port *up, up->port.icount.overrun++; r1 &= up->port.read_status_mask; if (r1 & BRK_ABRT) - *tty->flip.flag_buf_ptr = TTY_BREAK; + flag = TTY_BREAK; else if (r1 & PAR_ERR) - *tty->flip.flag_buf_ptr = TTY_PARITY; + flag = TTY_PARITY; else if (r1 & CRC_ERR) - *tty->flip.flag_buf_ptr = TTY_FRAME; + flag = TTY_FRAME; } if (uart_handle_sysrq_char(&up->port, ch, regs)) continue; if (up->port.ignore_status_mask == 0xff || (r1 & up->port.ignore_status_mask) == 0) { - tty->flip.flag_buf_ptr++; - tty->flip.char_buf_ptr++; - tty->flip.count++; - } - if ((r1 & Rx_OVR) && - tty->flip.count < TTY_FLIPBUF_SIZE) { - *tty->flip.flag_buf_ptr = TTY_OVERRUN; - tty->flip.flag_buf_ptr++; - tty->flip.char_buf_ptr++; - tty->flip.count++; + tty_insert_flip_char(tty, ch, flag); } + if (r1 & Rx_OVR) + tty_insert_flip_char(tty, 0, TTY_OVERRUN); } return tty; diff --git a/drivers/serial/vr41xx_siu.c b/drivers/serial/vr41xx_siu.c index 865d4dea65df..0a28deeb098d 100644 --- a/drivers/serial/vr41xx_siu.c +++ b/drivers/serial/vr41xx_siu.c @@ -371,11 +371,6 @@ static inline void receive_chars(struct uart_port *port, uint8_t *status, lsr = *status; do { - if (unlikely(tty->flip.count >= TTY_FLIPBUF_SIZE)) { - if (tty->low_latency) - tty_flip_buffer_push(tty); - } - ch = siu_read(port, UART_RX); port->icount.rx++; flag = TTY_NORMAL; diff --git a/drivers/usb/class/cdc-acm.c b/drivers/usb/class/cdc-acm.c index 248279e44c99..b9fd39fd1b5b 100644 --- a/drivers/usb/class/cdc-acm.c +++ b/drivers/usb/class/cdc-acm.c @@ -335,14 +335,9 @@ next_buffer: dbg("acm_rx_tasklet: procesing buf 0x%p, size = %d\n", buf, buf->size); - for (i = 0; i < buf->size && !acm->throttle; i++) { - /* if we insert more than TTY_FLIPBUF_SIZE characters, - we drop them. */ - if (tty->flip.count >= TTY_FLIPBUF_SIZE) { - tty_flip_buffer_push(tty); - } - tty_insert_flip_char(tty, buf->base[i], 0); - } + tty_buffer_request_room(tty, buf->size); + if (!acm->throttle) + tty_insert_flip_string(tty, buf->base, buf->size); tty_flip_buffer_push(tty); spin_lock(&acm->throttle_lock); diff --git a/drivers/usb/gadget/serial.c b/drivers/usb/gadget/serial.c index 65e084a2c87e..2e6926b33455 100644 --- a/drivers/usb/gadget/serial.c +++ b/drivers/usb/gadget/serial.c @@ -1271,6 +1271,7 @@ static int gs_recv_packet(struct gs_dev *dev, char *packet, unsigned int size) unsigned int len; struct gs_port *port; int ret; + struct tty_struct *tty; /* TEMPORARY -- only port 0 is supported right now */ port = dev->dev_port[0]; @@ -1290,7 +1291,10 @@ static int gs_recv_packet(struct gs_dev *dev, char *packet, unsigned int size) goto exit; } - if (port->port_tty == NULL) { + + tty = port->port_tty; + + if (tty == NULL) { printk(KERN_ERR "gs_recv_packet: port=%d, NULL tty pointer\n", port->port_num); ret = -EIO; @@ -1304,20 +1308,13 @@ static int gs_recv_packet(struct gs_dev *dev, char *packet, unsigned int size) goto exit; } - len = (unsigned int)(TTY_FLIPBUF_SIZE - port->port_tty->flip.count); - if (len < size) - size = len; - - if (size > 0) { - memcpy(port->port_tty->flip.char_buf_ptr, packet, size); - port->port_tty->flip.char_buf_ptr += size; - port->port_tty->flip.count += size; + len = tty_buffer_request_room(tty, size); + if (len > 0) { + tty_insert_flip_string(tty, packet, len); tty_flip_buffer_push(port->port_tty); wake_up_interruptible(&port->port_tty->read_wait); } - ret = 0; - exit: spin_unlock(&port->port_lock); return ret; diff --git a/drivers/usb/serial/Kconfig b/drivers/usb/serial/Kconfig index 14f55fd26a64..be5dc80836c3 100644 --- a/drivers/usb/serial/Kconfig +++ b/drivers/usb/serial/Kconfig @@ -84,7 +84,7 @@ config USB_SERIAL_BELKIN config USB_SERIAL_WHITEHEAT tristate "USB ConnectTech WhiteHEAT Serial Driver" - depends on USB_SERIAL && BROKEN_ON_SMP + depends on USB_SERIAL help Say Y here if you want to use a ConnectTech WhiteHEAT 4 port USB to serial converter device. diff --git a/drivers/usb/serial/cyberjack.c b/drivers/usb/serial/cyberjack.c index 6d18d4eaba35..2357b1d102d7 100644 --- a/drivers/usb/serial/cyberjack.c +++ b/drivers/usb/serial/cyberjack.c @@ -364,7 +364,6 @@ static void cyberjack_read_bulk_callback (struct urb *urb, struct pt_regs *regs) struct tty_struct *tty; unsigned char *data = urb->transfer_buffer; short todo; - int i; int result; dbg("%s - port %d", __FUNCTION__, port->number); @@ -381,14 +380,8 @@ static void cyberjack_read_bulk_callback (struct urb *urb, struct pt_regs *regs) return; } if (urb->actual_length) { - for (i = 0; i < urb->actual_length ; ++i) { - /* if we insert more than TTY_FLIPBUF_SIZE characters, we drop them. */ - if(tty->flip.count >= TTY_FLIPBUF_SIZE) { - tty_flip_buffer_push(tty); - } - /* this doesn't actually push the data through unless tty->low_latency is set */ - tty_insert_flip_char(tty, data[i], 0); - } + tty_buffer_request_room(tty, urb->actual_length); + tty_insert_flip_string(tty, data, urb->actual_length); tty_flip_buffer_push(tty); } diff --git a/drivers/usb/serial/cypress_m8.c b/drivers/usb/serial/cypress_m8.c index 4e9637eb6137..68067fe117a4 100644 --- a/drivers/usb/serial/cypress_m8.c +++ b/drivers/usb/serial/cypress_m8.c @@ -1263,12 +1263,10 @@ static void cypress_read_int_callback(struct urb *urb, struct pt_regs *regs) /* process read if there is data other than line status */ if (tty && (bytes > i)) { + bytes = tty_buffer_request_room(tty, bytes); for (; i < bytes ; ++i) { dbg("pushing byte number %d - %d - %c", i, data[i], data[i]); - if(tty->flip.count >= TTY_FLIPBUF_SIZE) { - tty_flip_buffer_push(tty); - } tty_insert_flip_char(tty, data[i], tty_flag); } tty_flip_buffer_push(port->tty); diff --git a/drivers/usb/serial/digi_acceleport.c b/drivers/usb/serial/digi_acceleport.c index 8fc414bd5b24..b3f776a90c93 100644 --- a/drivers/usb/serial/digi_acceleport.c +++ b/drivers/usb/serial/digi_acceleport.c @@ -946,13 +946,10 @@ dbg( "digi_rx_unthrottle: TOP: port=%d", priv->dp_port_num ); spin_lock_irqsave( &priv->dp_port_lock, flags ); /* send any buffered chars from throttle time on to tty subsystem */ - len = min(priv->dp_in_buf_len, TTY_FLIPBUF_SIZE - tty->flip.count ); + + len = tty_buffer_request_room(tty, priv->dp_in_buf_len); if( len > 0 ) { - memcpy( tty->flip.char_buf_ptr, priv->dp_in_buf, len ); - memcpy( tty->flip.flag_buf_ptr, priv->dp_in_flag_buf, len ); - tty->flip.char_buf_ptr += len; - tty->flip.flag_buf_ptr += len; - tty->flip.count += len; + tty_insert_flip_string_flags(tty, priv->dp_in_buf, priv->dp_in_flag_buf, len); tty_flip_buffer_push( tty ); } @@ -1827,6 +1824,7 @@ static int digi_read_inb_callback( struct urb *urb ) int status = ((unsigned char *)urb->transfer_buffer)[2]; unsigned char *data = ((unsigned char *)urb->transfer_buffer)+3; int flag,throttled; + int i; /* do not process callbacks on closed ports */ /* but do continue the read chain */ @@ -1885,20 +1883,18 @@ static int digi_read_inb_callback( struct urb *urb ) } } else { - - len = min( len, TTY_FLIPBUF_SIZE - tty->flip.count ); - + len = tty_buffer_request_room(tty, len); if( len > 0 ) { - memcpy( tty->flip.char_buf_ptr, data, len ); - memset( tty->flip.flag_buf_ptr, flag, len ); - tty->flip.char_buf_ptr += len; - tty->flip.flag_buf_ptr += len; - tty->flip.count += len; + /* Hot path */ + if(flag == TTY_NORMAL) + tty_insert_flip_string(tty, data, len); + else { + for(i = 0; i < len; i++) + tty_insert_flip_char(tty, data[i], flag); + } tty_flip_buffer_push( tty ); } - } - } spin_unlock( &priv->dp_port_lock ); diff --git a/drivers/usb/serial/empeg.c b/drivers/usb/serial/empeg.c index 79a766e9ca23..63f7c78a1152 100644 --- a/drivers/usb/serial/empeg.c +++ b/drivers/usb/serial/empeg.c @@ -344,7 +344,6 @@ static void empeg_read_bulk_callback (struct urb *urb, struct pt_regs *regs) struct usb_serial_port *port = (struct usb_serial_port *)urb->context; struct tty_struct *tty; unsigned char *data = urb->transfer_buffer; - int i; int result; dbg("%s - port %d", __FUNCTION__, port->number); @@ -359,19 +358,8 @@ static void empeg_read_bulk_callback (struct urb *urb, struct pt_regs *regs) tty = port->tty; if (urb->actual_length) { - for (i = 0; i < urb->actual_length ; ++i) { - /* gb - 2000/11/13 - * If we insert too many characters we'll overflow the buffer. - * This means we'll lose bytes - Decidedly bad. - */ - if(tty->flip.count >= TTY_FLIPBUF_SIZE) { - tty_flip_buffer_push(tty); - } - tty_insert_flip_char(tty, data[i], 0); - } - /* gb - 2000/11/13 - * Goes straight through instead of scheduling - if tty->low_latency is set. - */ + tty_buffer_request_room(tty, urb->actual_length); + tty_insert_flip_string(tty, data, urb->actual_length); tty_flip_buffer_push(tty); bytes_in += urb->actual_length; } diff --git a/drivers/usb/serial/ftdi_sio.c b/drivers/usb/serial/ftdi_sio.c index eb863b3f2d79..10bc1bf23b35 100644 --- a/drivers/usb/serial/ftdi_sio.c +++ b/drivers/usb/serial/ftdi_sio.c @@ -1610,24 +1610,11 @@ static void ftdi_process_read (void *param) length = 0; } - /* have to make sure we don't overflow the buffer - with tty_insert_flip_char's */ - if (tty->flip.count+length > TTY_FLIPBUF_SIZE) { - tty_flip_buffer_push(tty); - need_flip = 0; - - if (tty->flip.count != 0) { - /* flip didn't work, this happens when ftdi_process_read() is - * called from ftdi_unthrottle, because TTY_DONT_FLIP is set */ - dbg("%s - flip buffer push failed", __FUNCTION__); - break; - } - } if (priv->rx_flags & THROTTLED) { dbg("%s - throttled", __FUNCTION__); break; } - if (tty->ldisc.receive_room(tty)-tty->flip.count < length) { + if (tty_buffer_request_room(tty, length) < length) { /* break out & wait for throttling/unthrottling to happen */ dbg("%s - receive room low", __FUNCTION__); break; diff --git a/drivers/usb/serial/garmin_gps.c b/drivers/usb/serial/garmin_gps.c index 452efce72714..d6f55e9dccae 100644 --- a/drivers/usb/serial/garmin_gps.c +++ b/drivers/usb/serial/garmin_gps.c @@ -275,23 +275,14 @@ static void send_to_tty(struct usb_serial_port *port, char *data, unsigned int actual_length) { struct tty_struct *tty = port->tty; - int i; if (tty && actual_length) { usb_serial_debug_data(debug, &port->dev, __FUNCTION__, actual_length, data); - for (i = 0; i < actual_length ; ++i) { - /* if we insert more than TTY_FLIPBUF_SIZE characters, - we drop them. */ - if(tty->flip.count >= TTY_FLIPBUF_SIZE) { - tty_flip_buffer_push(tty); - } - /* this doesn't actually push the data through unless - tty->low_latency is set */ - tty_insert_flip_char(tty, data[i], 0); - } + tty_buffer_request_room(tty, actual_length); + tty_insert_flip_string(tty, data, actual_length); tty_flip_buffer_push(tty); } } diff --git a/drivers/usb/serial/generic.c b/drivers/usb/serial/generic.c index 4ddac620fc0c..476cda107f4f 100644 --- a/drivers/usb/serial/generic.c +++ b/drivers/usb/serial/generic.c @@ -254,7 +254,6 @@ void usb_serial_generic_read_bulk_callback (struct urb *urb, struct pt_regs *reg struct usb_serial *serial = port->serial; struct tty_struct *tty; unsigned char *data = urb->transfer_buffer; - int i; int result; dbg("%s - port %d", __FUNCTION__, port->number); @@ -268,14 +267,8 @@ void usb_serial_generic_read_bulk_callback (struct urb *urb, struct pt_regs *reg tty = port->tty; if (tty && urb->actual_length) { - for (i = 0; i < urb->actual_length ; ++i) { - /* if we insert more than TTY_FLIPBUF_SIZE characters, we drop them. */ - if(tty->flip.count >= TTY_FLIPBUF_SIZE) { - tty_flip_buffer_push(tty); - } - /* this doesn't actually push the data through unless tty->low_latency is set */ - tty_insert_flip_char(tty, data[i], 0); - } + tty_buffer_request_room(tty, urb->actual_length); + tty_insert_flip_string(tty, data, urb->actual_length); tty_flip_buffer_push(tty); } diff --git a/drivers/usb/serial/io_edgeport.c b/drivers/usb/serial/io_edgeport.c index faedbeb6ba49..3f29e6b0fd19 100644 --- a/drivers/usb/serial/io_edgeport.c +++ b/drivers/usb/serial/io_edgeport.c @@ -1965,20 +1965,14 @@ static void edge_tty_recv(struct device *dev, struct tty_struct *tty, unsigned c int cnt; do { - if (tty->flip.count >= TTY_FLIPBUF_SIZE) { - tty_flip_buffer_push(tty); - if (tty->flip.count >= TTY_FLIPBUF_SIZE) { - dev_err(dev, "%s - dropping data, %d bytes lost\n", - __FUNCTION__, length); - return; - } + cnt = tty_buffer_request_room(tty, length); + if (cnt < length) { + dev_err(dev, "%s - dropping data, %d bytes lost\n", + __FUNCTION__, length - cnt); + if(cnt == 0) + break; } - cnt = min(length, TTY_FLIPBUF_SIZE - tty->flip.count); - memcpy(tty->flip.char_buf_ptr, data, cnt); - memset(tty->flip.flag_buf_ptr, 0, cnt); - tty->flip.char_buf_ptr += cnt; - tty->flip.flag_buf_ptr += cnt; - tty->flip.count += cnt; + tty_insert_flip_string(tty, data, cnt); data += cnt; length -= cnt; } while (length > 0); diff --git a/drivers/usb/serial/io_ti.c b/drivers/usb/serial/io_ti.c index 2edf9cabad20..afc0f34b3a46 100644 --- a/drivers/usb/serial/io_ti.c +++ b/drivers/usb/serial/io_ti.c @@ -1865,20 +1865,14 @@ static void edge_tty_recv(struct device *dev, struct tty_struct *tty, unsigned c int cnt; do { - if (tty->flip.count >= TTY_FLIPBUF_SIZE) { - tty_flip_buffer_push(tty); - if (tty->flip.count >= TTY_FLIPBUF_SIZE) { - dev_err(dev, "%s - dropping data, %d bytes lost\n", - __FUNCTION__, length); - return; - } + cnt = tty_buffer_request_room(tty, length); + if (cnt < length) { + dev_err(dev, "%s - dropping data, %d bytes lost\n", + __FUNCTION__, length - cnt); + if(cnt == 0) + break; } - cnt = min(length, TTY_FLIPBUF_SIZE - tty->flip.count); - memcpy(tty->flip.char_buf_ptr, data, cnt); - memset(tty->flip.flag_buf_ptr, 0, cnt); - tty->flip.char_buf_ptr += cnt; - tty->flip.flag_buf_ptr += cnt; - tty->flip.count += cnt; + tty_insert_flip_string(tty, data, cnt); data += cnt; length -= cnt; } while (length > 0); diff --git a/drivers/usb/serial/ipaq.c b/drivers/usb/serial/ipaq.c index 06d07cea0b70..9a5c97989562 100644 --- a/drivers/usb/serial/ipaq.c +++ b/drivers/usb/serial/ipaq.c @@ -711,7 +711,7 @@ static void ipaq_read_bulk_callback(struct urb *urb, struct pt_regs *regs) struct usb_serial_port *port = (struct usb_serial_port *)urb->context; struct tty_struct *tty; unsigned char *data = urb->transfer_buffer; - int i, result; + int result; dbg("%s - port %d", __FUNCTION__, port->number); @@ -724,14 +724,8 @@ static void ipaq_read_bulk_callback(struct urb *urb, struct pt_regs *regs) tty = port->tty; if (tty && urb->actual_length) { - for (i = 0; i < urb->actual_length ; ++i) { - /* if we insert more than TTY_FLIPBUF_SIZE characters, we drop them. */ - if(tty->flip.count >= TTY_FLIPBUF_SIZE) { - tty_flip_buffer_push(tty); - } - /* this doesn't actually push the data through unless tty->low_latency is set */ - tty_insert_flip_char(tty, data[i], 0); - } + tty_buffer_request_room(tty, urb->actual_length); + tty_insert_flip_string(tty, data, urb->actual_length); tty_flip_buffer_push(tty); bytes_in += urb->actual_length; } diff --git a/drivers/usb/serial/ipw.c b/drivers/usb/serial/ipw.c index 2dd191f5fe76..e760a70242c1 100644 --- a/drivers/usb/serial/ipw.c +++ b/drivers/usb/serial/ipw.c @@ -166,7 +166,6 @@ static void ipw_read_bulk_callback(struct urb *urb, struct pt_regs *regs) struct usb_serial_port *port = urb->context; unsigned char *data = urb->transfer_buffer; struct tty_struct *tty; - int i; int result; dbg("%s - port %d", __FUNCTION__, port->number); @@ -180,14 +179,8 @@ static void ipw_read_bulk_callback(struct urb *urb, struct pt_regs *regs) tty = port->tty; if (tty && urb->actual_length) { - for (i = 0; i < urb->actual_length ; ++i) { - /* if we insert more than TTY_FLIPBUF_SIZE characters, we drop them. */ - if(tty->flip.count >= TTY_FLIPBUF_SIZE) { - tty_flip_buffer_push(tty); - } - /* this doesn't actually push the data through unless tty->low_latency is set */ - tty_insert_flip_char(tty, data[i], 0); - } + tty_buffer_request_room(tty, urb->actual_length); + tty_insert_flip_string(tty, data, urb->actual_length); tty_flip_buffer_push(tty); } diff --git a/drivers/usb/serial/kl5kusb105.c b/drivers/usb/serial/kl5kusb105.c index 4e2f7dfb58b2..78335a5f7743 100644 --- a/drivers/usb/serial/kl5kusb105.c +++ b/drivers/usb/serial/kl5kusb105.c @@ -648,7 +648,6 @@ static void klsi_105_read_bulk_callback (struct urb *urb, struct pt_regs *regs) usb_serial_debug_data(debug, &port->dev, __FUNCTION__, urb->actual_length, data); } else { - int i; int bytes_sent = ((__u8 *) data)[0] + ((unsigned int) ((__u8 *) data)[1] << 8); tty = port->tty; @@ -669,16 +668,8 @@ static void klsi_105_read_bulk_callback (struct urb *urb, struct pt_regs *regs) bytes_sent = urb->actual_length - 2; } - for (i = 2; i < 2+bytes_sent; i++) { - /* if we insert more than TTY_FLIPBUF_SIZE characters, - * we drop them. */ - if(tty->flip.count >= TTY_FLIPBUF_SIZE) { - tty_flip_buffer_push(tty); - } - /* this doesn't actually push the data through unless - * tty->low_latency is set */ - tty_insert_flip_char(tty, ((__u8*) data)[i], 0); - } + tty_buffer_request_room(tty, bytes_sent); + tty_insert_flip_string(tty, data + 2, bytes_sent); tty_flip_buffer_push(tty); /* again lockless, but debug info only */ diff --git a/drivers/usb/serial/kobil_sct.c b/drivers/usb/serial/kobil_sct.c index d9c21e275130..b8b213185d0f 100644 --- a/drivers/usb/serial/kobil_sct.c +++ b/drivers/usb/serial/kobil_sct.c @@ -365,7 +365,6 @@ static void kobil_close (struct usb_serial_port *port, struct file *filp) static void kobil_read_int_callback( struct urb *purb, struct pt_regs *regs) { - int i; int result; struct usb_serial_port *port = (struct usb_serial_port *) purb->context; struct tty_struct *tty; @@ -397,14 +396,8 @@ static void kobil_read_int_callback( struct urb *purb, struct pt_regs *regs) */ // END DEBUG - for (i = 0; i < purb->actual_length; ++i) { - // if we insert more than TTY_FLIPBUF_SIZE characters, we drop them. - if(tty->flip.count >= TTY_FLIPBUF_SIZE) { - tty_flip_buffer_push(tty); - } - // this doesn't actually push the data through unless tty->low_latency is set - tty_insert_flip_char(tty, data[i], 0); - } + tty_buffer_request_room(tty, purb->actual_length); + tty_insert_flip_string(tty, data, purb->actual_length); tty_flip_buffer_push(tty); } diff --git a/drivers/usb/serial/option.c b/drivers/usb/serial/option.c index 3fd2405304fd..52bdf6fe46f2 100644 --- a/drivers/usb/serial/option.c +++ b/drivers/usb/serial/option.c @@ -321,7 +321,7 @@ static int option_write(struct usb_serial_port *port, static void option_indat_callback(struct urb *urb, struct pt_regs *regs) { - int i, err; + int err; int endpoint; struct usb_serial_port *port; struct tty_struct *tty; @@ -338,11 +338,8 @@ static void option_indat_callback(struct urb *urb, struct pt_regs *regs) } else { tty = port->tty; if (urb->actual_length) { - for (i = 0; i < urb->actual_length ; ++i) { - if (tty->flip.count >= TTY_FLIPBUF_SIZE) - tty_flip_buffer_push(tty); - tty_insert_flip_char(tty, data[i], 0); - } + tty_buffer_request_room(tty, urb->actual_length); + tty_insert_flip_string(tty, data, urb->actual_length); tty_flip_buffer_push(tty); } else { dbg("%s: empty read urb received", __FUNCTION__); diff --git a/drivers/usb/serial/pl2303.c b/drivers/usb/serial/pl2303.c index f03721056190..9ffff1938239 100644 --- a/drivers/usb/serial/pl2303.c +++ b/drivers/usb/serial/pl2303.c @@ -924,16 +924,12 @@ static void pl2303_read_bulk_callback (struct urb *urb, struct pt_regs *regs) tty = port->tty; if (tty && urb->actual_length) { + tty_buffer_request_room(tty, urb->actual_length + 1); /* overrun is special, not associated with a char */ if (status & UART_OVERRUN_ERROR) tty_insert_flip_char(tty, 0, TTY_OVERRUN); - - for (i = 0; i < urb->actual_length; ++i) { - if (tty->flip.count >= TTY_FLIPBUF_SIZE) { - tty_flip_buffer_push(tty); - } + for (i = 0; i < urb->actual_length; ++i) tty_insert_flip_char (tty, data[i], tty_flag); - } tty_flip_buffer_push (tty); } diff --git a/drivers/usb/serial/ti_usb_3410_5052.c b/drivers/usb/serial/ti_usb_3410_5052.c index abb830cb77bd..c18db3257073 100644 --- a/drivers/usb/serial/ti_usb_3410_5052.c +++ b/drivers/usb/serial/ti_usb_3410_5052.c @@ -1280,24 +1280,18 @@ static void ti_recv(struct device *dev, struct tty_struct *tty, int cnt; do { - if (tty->flip.count >= TTY_FLIPBUF_SIZE) { - tty_flip_buffer_push(tty); - if (tty->flip.count >= TTY_FLIPBUF_SIZE) { - dev_err(dev, "%s - dropping data, %d bytes lost\n", __FUNCTION__, length); - return; - } + cnt = tty_buffer_request_room(tty, length); + if (cnt < length) { + dev_err(dev, "%s - dropping data, %d bytes lost\n", __FUNCTION__, length - cnt); + if(cnt == 0) + break; } - cnt = min(length, TTY_FLIPBUF_SIZE - tty->flip.count); - memcpy(tty->flip.char_buf_ptr, data, cnt); - memset(tty->flip.flag_buf_ptr, 0, cnt); - tty->flip.char_buf_ptr += cnt; - tty->flip.flag_buf_ptr += cnt; - tty->flip.count += cnt; + tty_insert_flip_string(tty, data, cnt); + tty_flip_buffer_push(tty); data += cnt; length -= cnt; } while (length > 0); - tty_flip_buffer_push(tty); } diff --git a/drivers/usb/serial/visor.c b/drivers/usb/serial/visor.c index 49b1fbe61f25..bce3d55affd8 100644 --- a/drivers/usb/serial/visor.c +++ b/drivers/usb/serial/visor.c @@ -488,7 +488,6 @@ static void visor_read_bulk_callback (struct urb *urb, struct pt_regs *regs) unsigned char *data = urb->transfer_buffer; struct tty_struct *tty; unsigned long flags; - int i; int throttled; int result; @@ -503,14 +502,8 @@ static void visor_read_bulk_callback (struct urb *urb, struct pt_regs *regs) tty = port->tty; if (tty && urb->actual_length) { - for (i = 0; i < urb->actual_length ; ++i) { - /* if we insert more than TTY_FLIPBUF_SIZE characters, we drop them. */ - if(tty->flip.count >= TTY_FLIPBUF_SIZE) { - tty_flip_buffer_push(tty); - } - /* this doesn't actually push the data through unless tty->low_latency is set */ - tty_insert_flip_char(tty, data[i], 0); - } + tty_buffer_request_room(tty, urb->actual_length); + tty_insert_flip_string(tty, data, urb->actual_length); tty_flip_buffer_push(tty); } spin_lock_irqsave(&priv->lock, flags); diff --git a/drivers/usb/serial/whiteheat.c b/drivers/usb/serial/whiteheat.c index a7c3c4734d83..557411c6e7c7 100644 --- a/drivers/usb/serial/whiteheat.c +++ b/drivers/usb/serial/whiteheat.c @@ -1434,7 +1434,9 @@ static void rx_data_softint(void *private) urb = wrap->urb; if (tty && urb->actual_length) { - if (urb->actual_length > TTY_FLIPBUF_SIZE - tty->flip.count) { + int len = tty_buffer_request_room(tty, urb->actual_length); + /* This stuff can go away now I suspect */ + if (unlikely(len < urb->actual_length)) { spin_lock_irqsave(&info->lock, flags); list_add(tmp, &info->rx_urb_q); spin_unlock_irqrestore(&info->lock, flags); @@ -1442,11 +1444,8 @@ static void rx_data_softint(void *private) schedule_work(&info->rx_work); return; } - - memcpy(tty->flip.char_buf_ptr, urb->transfer_buffer, urb->actual_length); - tty->flip.char_buf_ptr += urb->actual_length; - tty->flip.count += urb->actual_length; - sent += urb->actual_length; + tty_insert_flip_string(tty, urb->transfer_buffer, len); + sent += len; } urb->dev = port->serial->dev; diff --git a/include/linux/kbd_kern.h b/include/linux/kbd_kern.h index 7428198111eb..45f625d7d0b2 100644 --- a/include/linux/kbd_kern.h +++ b/include/linux/kbd_kern.h @@ -151,7 +151,7 @@ extern unsigned int keymap_count; static inline void con_schedule_flip(struct tty_struct *t) { - schedule_work(&t->flip.work); + schedule_work(&t->buf.work); } #endif diff --git a/include/linux/tty.h b/include/linux/tty.h index 57449704a47b..3787102e4b12 100644 --- a/include/linux/tty.h +++ b/include/linux/tty.h @@ -51,16 +51,22 @@ */ #define TTY_FLIPBUF_SIZE 512 -struct tty_flip_buffer { +struct tty_buffer { + struct tty_buffer *next; + char *char_buf_ptr; + unsigned char *flag_buf_ptr; + int used; + int size; + /* Data points here */ + unsigned long data[0]; +}; + +struct tty_bufhead { struct work_struct work; struct semaphore pty_sem; - char *char_buf_ptr; - unsigned char *flag_buf_ptr; - int count; - int buf_num; - unsigned char char_buf[2*TTY_FLIPBUF_SIZE]; - char flag_buf[2*TTY_FLIPBUF_SIZE]; - unsigned char slop[4]; /* N.B. bug overwrites buffer by 1 */ + struct tty_buffer *head; /* Queue head */ + struct tty_buffer *tail; /* Active buffer */ + struct tty_buffer *free; /* Free queue head */ }; /* * The pty uses char_buf and flag_buf as a contiguous buffer @@ -186,10 +192,11 @@ struct tty_struct { unsigned char stopped:1, hw_stopped:1, flow_stopped:1, packet:1; unsigned char low_latency:1, warned:1; unsigned char ctrl_status; + unsigned int receive_room; /* Bytes free for queue */ struct tty_struct *link; struct fasync_struct *fasync; - struct tty_flip_buffer flip; + struct tty_bufhead buf; int max_flip_cnt; int alt_speed; /* For magic substitution of 38400 bps */ wait_queue_head_t write_wait; diff --git a/include/linux/tty_flip.h b/include/linux/tty_flip.h index abe9bfcf226c..be1400e82482 100644 --- a/include/linux/tty_flip.h +++ b/include/linux/tty_flip.h @@ -1,25 +1,33 @@ #ifndef _LINUX_TTY_FLIP_H #define _LINUX_TTY_FLIP_H +extern int tty_buffer_request_room(struct tty_struct *tty, size_t size); +extern int tty_insert_flip_string(struct tty_struct *tty, unsigned char *chars, size_t size); +extern int tty_insert_flip_string_flags(struct tty_struct *tty, unsigned char *chars, char *flags, size_t size); +extern int tty_prepare_flip_string(struct tty_struct *tty, unsigned char **chars, size_t size); +extern int tty_prepare_flip_string_flags(struct tty_struct *tty, unsigned char **chars, char **flags, size_t size); + #ifdef INCLUDE_INLINE_FUNCS #define _INLINE_ extern #else #define _INLINE_ static __inline__ #endif -_INLINE_ void tty_insert_flip_char(struct tty_struct *tty, +_INLINE_ int tty_insert_flip_char(struct tty_struct *tty, unsigned char ch, char flag) { - if (tty->flip.count < TTY_FLIPBUF_SIZE) { - tty->flip.count++; - *tty->flip.flag_buf_ptr++ = flag; - *tty->flip.char_buf_ptr++ = ch; + struct tty_buffer *tb = tty->buf.tail; + if (tb && tb->used < tb->size) { + tb->flag_buf_ptr[tb->used] = flag; + tb->char_buf_ptr[tb->used++] = ch; + return 1; } + return tty_insert_flip_string_flags(tty, &ch, &flag, 1); } _INLINE_ void tty_schedule_flip(struct tty_struct *tty) { - schedule_delayed_work(&tty->flip.work, 1); + schedule_delayed_work(&tty->buf.work, 1); } #undef _INLINE_ diff --git a/include/linux/tty_ldisc.h b/include/linux/tty_ldisc.h index 6066afde5ce4..83c6e6c10ebb 100644 --- a/include/linux/tty_ldisc.h +++ b/include/linux/tty_ldisc.h @@ -81,14 +81,6 @@ * pointer of flag bytes which indicate whether a character was * received with a parity error, etc. * - * int (*receive_room)(struct tty_struct *); - * - * This function is called by the low-level tty driver to - * determine how many characters the line discpline can accept. - * The low-level driver must not send more characters than was - * indicated by receive_room, or the line discpline may drop - * those characters. - * * void (*write_wakeup)(struct tty_struct *); * * This function is called by the low-level tty driver to signal @@ -136,7 +128,6 @@ struct tty_ldisc { */ void (*receive_buf)(struct tty_struct *, const unsigned char *cp, char *fp, int count); - int (*receive_room)(struct tty_struct *); void (*write_wakeup)(struct tty_struct *); struct module *owner; diff --git a/net/bluetooth/rfcomm/tty.c b/net/bluetooth/rfcomm/tty.c index 158a9c46d863..f57cde78c3de 100644 --- a/net/bluetooth/rfcomm/tty.c +++ b/net/bluetooth/rfcomm/tty.c @@ -480,13 +480,8 @@ static void rfcomm_dev_data_ready(struct rfcomm_dlc *dlc, struct sk_buff *skb) BT_DBG("dlc %p tty %p len %d", dlc, tty, skb->len); if (test_bit(TTY_DONT_FLIP, &tty->flags)) { - register int i; - for (i = 0; i < skb->len; i++) { - if (tty->flip.count >= TTY_FLIPBUF_SIZE) - tty_flip_buffer_push(tty); - - tty_insert_flip_char(tty, skb->data[i], 0); - } + tty_buffer_request_room(tty, skb->len); + tty_insert_flip_string(tty, skb->data, skb->len); tty_flip_buffer_push(tty); } else tty->ldisc.receive_buf(tty, skb->data, NULL, skb->len); -- cgit v1.2.3-71-gd317 From aaa246ea78c68cd205f505070650cda7c5a95d34 Mon Sep 17 00:00:00 2001 From: Jiri Slaby Date: Mon, 9 Jan 2006 20:54:23 -0800 Subject: [PATCH] char/isicom: Other little changes Move some code from one place to another. Get rid of ugly ifdefs in code in next p[patches, so here create functions and macros to enable it. Rename some functions and align some code to 80 chars. Signed-off-by: Jiri Slaby Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- drivers/char/isicom.c | 163 ++++++++++++++++++++++++------------------------- include/linux/isicom.h | 3 - 2 files changed, 81 insertions(+), 85 deletions(-) (limited to 'include/linux') diff --git a/drivers/char/isicom.c b/drivers/char/isicom.c index 9ede7c211cc1..49f88c9006cb 100644 --- a/drivers/char/isicom.c +++ b/drivers/char/isicom.c @@ -135,6 +135,17 @@ #include +#define InterruptTheCard(base) outw(0, (base) + 0xc) +#define ClearInterrupt(base) inw((base) + 0x0a) + +#ifdef DEBUG +#define pr_dbg(str...) printk(KERN_DEBUG "ISICOM: " str) +#define isicom_paranoia_check(a, b, c) __isicom_paranoia_check((a), (b), (c)) +#else +#define pr_dbg(str...) do { } while (0) +#define isicom_paranoia_check(a, b, c) 0 +#endif + static struct pci_device_id isicom_pci_tbl[] = { { VENDOR_ID, 0x2028, PCI_ANY_ID, PCI_ANY_ID, 0, 0, 0 }, { VENDOR_ID, 0x2051, PCI_ANY_ID, PCI_ANY_ID, 0, 0, 0 }, @@ -161,7 +172,6 @@ static void isicom_tx(unsigned long _data); static void isicom_start(struct tty_struct *tty); static unsigned char *tmp_buf; -static DECLARE_MUTEX(tmp_buf_sem); /* baud index mappings from linux defns to isi */ @@ -599,23 +609,20 @@ static int ISILoad_ioctl(struct inode *inode, struct file *filp, * */ -static inline int isicom_paranoia_check(struct isi_port const *port, char *name, - const char *routine) +static inline int __isicom_paranoia_check(struct isi_port const *port, + char *name, const char *routine) { -#ifdef ISICOM_DEBUG - static const char *badmagic = - KERN_WARNING "ISICOM: Warning: bad isicom magic for dev %s in %s.\n"; - static const char *badport = - KERN_WARNING "ISICOM: Warning: NULL isicom port for dev %s in %s.\n"; if (!port) { - printk(badport, name, routine); + printk(KERN_WARNING "ISICOM: Warning: bad isicom magic for " + "dev %s in %s.\n", name, routine); return 1; } if (port->magic != ISICOM_MAGIC) { - printk(badmagic, name, routine); + printk(KERN_WARNING "ISICOM: Warning: NULL isicom port for " + "dev %s in %s.\n", name, routine); return 1; } -#endif + return 0; } @@ -674,12 +681,10 @@ static void isicom_tx(unsigned long _data) unlock_card(&isi_card[card]); continue; } -#ifdef ISICOM_DEBUG - printk(KERN_DEBUG "ISICOM: txing %d bytes, port%d.\n", - txcount, port->channel+1); -#endif - outw((port->channel << isi_card[card].shift_count) | txcount - , base); + pr_dbg("txing %d bytes, port%d.\n", txcount, + port->channel + 1); + outw((port->channel << isi_card[card].shift_count) | txcount, + base); residue = NO; wrd = 0; while (1) { @@ -725,8 +730,11 @@ static void isicom_tx(unsigned long _data) /* schedule another tx for hopefully in about 10ms */ sched_again: - if (!re_schedule) - return; + if (!re_schedule) { + re_schedule = 2; + return; + } + init_timer(&tx); tx.expires = jiffies + HZ/100; tx.data = 0; @@ -830,9 +838,7 @@ static irqreturn_t isicom_interrupt(int irq, void *dev_id, struct pt_regs *regs) if (port->status & ISI_DCD) { if (!(header & ISI_DCD)) { /* Carrier has been lost */ -#ifdef ISICOM_DEBUG - printk(KERN_DEBUG "ISICOM: interrupt: DCD->low.\n"); -#endif + pr_dbg("interrupt: DCD->low.\n"); port->status &= ~ISI_DCD; schedule_work(&port->hangup_tq); } @@ -840,9 +846,7 @@ static irqreturn_t isicom_interrupt(int irq, void *dev_id, struct pt_regs *regs) else { if (header & ISI_DCD) { /* Carrier has been detected */ -#ifdef ISICOM_DEBUG - printk(KERN_DEBUG "ISICOM: interrupt: DCD->high.\n"); -#endif + pr_dbg("interrupt: DCD->high.\n"); port->status |= ISI_DCD; wake_up_interruptible(&port->open_wait); } @@ -899,21 +903,18 @@ static irqreturn_t isicom_interrupt(int irq, void *dev_id, struct pt_regs *regs) break; case 2: /* Statistics */ - printk(KERN_DEBUG "ISICOM: isicom_interrupt: stats!!!.\n"); + pr_dbg("isicom_interrupt: stats!!!.\n"); break; default: - printk(KERN_WARNING "ISICOM: Intr: Unknown code in status packet.\n"); + pr_dbg("Intr: Unknown code in status packet.\n"); break; } } else { /* Data Packet */ count = tty_prepare_flip_string(tty, &rp, byte_count & ~1); -#ifdef ISICOM_DEBUG - printk(KERN_DEBUG "ISICOM: Intr: Can rx %d of %d bytes.\n", - count, byte_count); -#endif + pr_dbg("Intr: Can rx %d of %d bytes.\n", count, byte_count); word_count = count >> 1; insw(base, rp, word_count); byte_count -= (word_count << 1); @@ -922,8 +923,8 @@ static irqreturn_t isicom_interrupt(int irq, void *dev_id, struct pt_regs *regs) byte_count -= 2; } if (byte_count > 0) { - printk(KERN_DEBUG "ISICOM: Intr(0x%lx:%d): Flip buffer overflow! dropping bytes...\n", - base, channel+1); + pr_dbg("Intr(0x%lx:%d): Flip buffer overflow! dropping " + "bytes...\n", base, channel + 1); while(byte_count > 0) { /* drain out unread xtra data */ inw(base); byte_count -= 2; @@ -1116,9 +1117,7 @@ static int block_til_ready(struct tty_struct *tty, struct file *filp, struct isi /* block if port is in the process of being closed */ if (tty_hung_up_p(filp) || port->flags & ASYNC_CLOSING) { -#ifdef ISICOM_DEBUG - printk(KERN_DEBUG "ISICOM: block_til_ready: close in progress.\n"); -#endif + pr_dbg("block_til_ready: close in progress.\n"); interruptible_sleep_on(&port->close_wait); if (port->flags & ASYNC_HUP_NOTIFY) return -EAGAIN; @@ -1129,9 +1128,7 @@ static int block_til_ready(struct tty_struct *tty, struct file *filp, struct isi /* if non-blocking mode is set ... */ if ((filp->f_flags & O_NONBLOCK) || (tty->flags & (1 << TTY_IO_ERROR))) { -#ifdef ISICOM_DEBUG - printk(KERN_DEBUG "ISICOM: block_til_ready: non-block mode.\n"); -#endif + pr_dbg("block_til_ready: non-block mode.\n"); port->flags |= ASYNC_NORMAL_ACTIVE; return 0; } @@ -1271,7 +1268,7 @@ static void isicom_shutdown_port(struct isi_port *port) set_bit(TTY_IO_ERROR, &tty->flags); if (--card->count < 0) { - printk(KERN_DEBUG "ISICOM: isicom_shutdown_port: bad board(0x%lx) count %d.\n", + pr_dbg("isicom_shutdown_port: bad board(0x%lx) count %d.\n", card->base, card->count); card->count = 0; } @@ -1294,9 +1291,7 @@ static void isicom_close(struct tty_struct *tty, struct file *filp) if (isicom_paranoia_check(port, tty->name, "isicom_close")) return; -#ifdef ISICOM_DEBUG - printk(KERN_DEBUG "ISICOM: Close start!!!.\n"); -#endif + pr_dbg("Close start!!!.\n"); spin_lock_irqsave(&card->card_lock, flags); if (tty_hung_up_p(filp)) { @@ -1347,9 +1342,7 @@ static void isicom_close(struct tty_struct *tty, struct file *filp) if (port->blocked_open) { spin_unlock_irqrestore(&card->card_lock, flags); if (port->close_delay) { -#ifdef ISICOM_DEBUG - printk(KERN_DEBUG "ISICOM: scheduling until time out.\n"); -#endif + pr_dbg("scheduling until time out.\n"); msleep_interruptible(jiffies_to_msecs(port->close_delay)); } spin_lock_irqsave(&card->card_lock, flags); @@ -1786,42 +1779,44 @@ static struct tty_operations isicom_ops = { .tiocmset = isicom_tiocmset, }; -static int __devinit register_drivers(void) +static int __devinit isicom_register_tty_driver(void) { - int error; + int error = -ENOMEM; /* tty driver structure initialization */ isicom_normal = alloc_tty_driver(PORT_COUNT); if (!isicom_normal) - return -ENOMEM; - - isicom_normal->owner = THIS_MODULE; - isicom_normal->name = "ttyM"; - isicom_normal->devfs_name = "isicom/"; - isicom_normal->major = ISICOM_NMAJOR; - isicom_normal->minor_start = 0; - isicom_normal->type = TTY_DRIVER_TYPE_SERIAL; - isicom_normal->subtype = SERIAL_TYPE_NORMAL; - isicom_normal->init_termios = tty_std_termios; - isicom_normal->init_termios.c_cflag = - B9600 | CS8 | CREAD | HUPCL |CLOCAL; - isicom_normal->flags = TTY_DRIVER_REAL_RAW; + goto end; + + isicom_normal->owner = THIS_MODULE; + isicom_normal->name = "ttyM"; + isicom_normal->devfs_name = "isicom/"; + isicom_normal->major = ISICOM_NMAJOR; + isicom_normal->minor_start = 0; + isicom_normal->type = TTY_DRIVER_TYPE_SERIAL; + isicom_normal->subtype = SERIAL_TYPE_NORMAL; + isicom_normal->init_termios = tty_std_termios; + isicom_normal->init_termios.c_cflag = B9600 | CS8 | CREAD | HUPCL | + CLOCAL; + isicom_normal->flags = TTY_DRIVER_REAL_RAW; tty_set_operations(isicom_normal, &isicom_ops); - if ((error=tty_register_driver(isicom_normal))!=0) { - printk(KERN_DEBUG "ISICOM: Couldn't register the dialin driver, error=%d\n", + if ((error = tty_register_driver(isicom_normal))) { + pr_dbg("Couldn't register the dialin driver, error=%d\n", error); put_tty_driver(isicom_normal); - return error; } - return 0; +end: + return error; } -static void unregister_drivers(void) +static void isicom_unregister_tty_driver(void) { - int error = tty_unregister_driver(isicom_normal); - if (error) - printk(KERN_DEBUG "ISICOM: couldn't unregister normal driver error=%d.\n",error); + int error; + + if ((error = tty_unregister_driver(isicom_normal))) + pr_dbg("couldn't unregister normal driver, error=%d.\n", error); + put_tty_driver(isicom_normal); } @@ -1891,7 +1886,7 @@ static int __devinit isicom_init(void) free_page((unsigned long)tmp_buf); return 0; } - if (register_drivers()) + if (isicom_register_tty_driver()) { unregister_ioregion(); free_page((unsigned long)tmp_buf); @@ -1899,7 +1894,7 @@ static int __devinit isicom_init(void) } if (!register_isr()) { - unregister_drivers(); + isicom_unregister_tty_driver(); /* ioports already uregistered in register_isr */ free_page((unsigned long)tmp_buf); return 0; @@ -1936,14 +1931,6 @@ static int __devinit isicom_init(void) static int io[4]; static int irq[4]; -MODULE_AUTHOR("MultiTech"); -MODULE_DESCRIPTION("Driver for the ISI series of cards by MultiTech"); -MODULE_LICENSE("GPL"); -module_param_array(io, int, NULL, 0); -MODULE_PARM_DESC(io, "I/O ports for the cards"); -module_param_array(irq, int, NULL, 0); -MODULE_PARM_DESC(irq, "Interrupts for the cards"); - static int __devinit isicom_setup(void) { struct pci_dev *dev = NULL; @@ -2047,11 +2034,15 @@ static int __devinit isicom_setup(void) static void __exit isicom_exit(void) { + unsigned int index = 0; + re_schedule = 0; - /* FIXME */ - msleep(1000); + + while (re_schedule != 2 && index++ < 100) + msleep(10); + unregister_isr(); - unregister_drivers(); + isicom_unregister_tty_driver(); unregister_ioregion(); if (tmp_buf) free_page((unsigned long)tmp_buf); @@ -2061,3 +2052,11 @@ static void __exit isicom_exit(void) module_init(isicom_setup); module_exit(isicom_exit); + +MODULE_AUTHOR("MultiTech"); +MODULE_DESCRIPTION("Driver for the ISI series of cards by MultiTech"); +MODULE_LICENSE("GPL"); +module_param_array(io, int, NULL, 0); +MODULE_PARM_DESC(io, "I/O ports for the cards"); +module_param_array(irq, int, NULL, 0); +MODULE_PARM_DESC(irq, "Interrupts for the cards"); diff --git a/include/linux/isicom.h b/include/linux/isicom.h index 7c6eae7f6ed7..06cb7baa6db8 100644 --- a/include/linux/isicom.h +++ b/include/linux/isicom.h @@ -98,9 +98,6 @@ typedef struct { #define ISICOM_INITIATE_XONXOFF 0x04 #define ISICOM_RESPOND_XONXOFF 0x08 -#define InterruptTheCard(base) (outw(0,(base)+0xc)) -#define ClearInterrupt(base) (inw((base)+0x0a)) - #define BOARD(line) (((line) >> 4) & 0x3) /* isi kill queue bitmap */ -- cgit v1.2.3-71-gd317 From e65c1db19fe8177fa2da53e3e0bddffe585b2d47 Mon Sep 17 00:00:00 2001 From: Jiri Slaby Date: Mon, 9 Jan 2006 20:54:25 -0800 Subject: [PATCH] char/isicom: Firmware loading Firmware loading via hotplug added. Cleanup firmware old-way fields in header file. Signed-off-by: Jiri Slaby Cc: Greg KH Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- drivers/char/isicom.c | 410 +++++++++++++++++++++---------------------------- include/linux/isicom.h | 35 ----- 2 files changed, 174 insertions(+), 271 deletions(-) (limited to 'include/linux') diff --git a/drivers/char/isicom.c b/drivers/char/isicom.c index 9ef8ab301768..55a47b33ff34 100644 --- a/drivers/char/isicom.c +++ b/drivers/char/isicom.c @@ -112,6 +112,7 @@ */ #include +#include #include #include #include @@ -120,7 +121,6 @@ #include #include #include -#include #include #include #include @@ -175,8 +175,6 @@ static struct tty_driver *isicom_normal; static struct timer_list tx; static char re_schedule = 1; -static int ISILoad_ioctl(struct inode *inode, struct file *filp, unsigned int cmd, unsigned long arg); - static void isicom_tx(unsigned long _data); static void isicom_start(struct tty_struct *tty); @@ -384,233 +382,6 @@ static inline void kill_queue(struct isi_port *port, short queue) unlock_card(card); } - -/* - * Firmware loader driver specific routines. This needs to mostly die - * and be replaced with request_firmware. - */ - -static struct file_operations ISILoad_fops = { - .owner = THIS_MODULE, - .ioctl = ISILoad_ioctl, -}; - -static struct miscdevice isiloader_device = { - ISILOAD_MISC_MINOR, "isictl", &ISILoad_fops -}; - - -static inline int WaitTillCardIsFree(unsigned long base) -{ - unsigned long count=0; - while( (!(inw(base+0xe) & 0x1)) && (count++ < 6000000)); - if (inw(base+0xe)&0x1) - return 0; - else - return 1; -} - -static int ISILoad_ioctl(struct inode *inode, struct file *filp, - unsigned int cmd, unsigned long arg) -{ - unsigned int card, i, j, signature, status, portcount = 0; - unsigned long t, base; - u16 word_count; - bin_frame frame; - void __user *argp = (void __user *)arg; - /* exec_record exec_rec; */ - - if (get_user(card, (int __user *)argp)) - return -EFAULT; - - if (card < 0 || card >= BOARD_COUNT) - return -ENXIO; - - base=isi_card[card].base; - - if (base==0) - return -ENXIO; /* disabled or not used */ - - switch(cmd) { - case MIOCTL_RESET_CARD: - if (!capable(CAP_SYS_ADMIN)) - return -EPERM; - printk(KERN_DEBUG "ISILoad:Resetting Card%d at 0x%lx ",card+1,base); - - inw(base+0x8); - - for (t=jiffies+HZ/100;time_before(jiffies, t);); - - outw(0,base+0x8); /* Reset */ - - for (j=1;j<=3;j++) { - for (t=jiffies+HZ;time_before(jiffies, t);); - printk("."); - } - signature=(inw(base+0x4)) & 0xff; - if (isi_card[card].isa) { - - if (!(inw(base+0xe) & 0x1) || (inw(base+0x2))) { -#ifdef ISICOM_DEBUG - printk("\nbase+0x2=0x%x , base+0xe=0x%x",inw(base+0x2),inw(base+0xe)); -#endif - printk("\nISILoad:ISA Card%d reset failure (Possible bad I/O Port Address 0x%lx).\n",card+1,base); - return -EIO; - } - } - else { - portcount = inw(base+0x2); - if (!(inw(base+0xe) & 0x1) || ((portcount!=0) && (portcount!=4) && (portcount!=8))) { -#ifdef ISICOM_DEBUG - printk("\nbase+0x2=0x%x , base+0xe=0x%x",inw(base+0x2),inw(base+0xe)); -#endif - printk("\nISILoad:PCI Card%d reset failure (Possible bad I/O Port Address 0x%lx).\n",card+1,base); - return -EIO; - } - } - switch(signature) { - case 0xa5: - case 0xbb: - case 0xdd: - if (isi_card[card].isa) - isi_card[card].port_count = 8; - else { - if (portcount == 4) - isi_card[card].port_count = 4; - else - isi_card[card].port_count = 8; - } - isi_card[card].shift_count = 12; - break; - - case 0xcc: isi_card[card].port_count = 16; - isi_card[card].shift_count = 11; - break; - - default: printk("ISILoad:Card%d reset failure (Possible bad I/O Port Address 0x%lx).\n",card+1,base); -#ifdef ISICOM_DEBUG - printk("Sig=0x%x\n",signature); -#endif - return -EIO; - } - printk("-Done\n"); - return put_user(signature,(unsigned __user *)argp); - - case MIOCTL_LOAD_FIRMWARE: - if (!capable(CAP_SYS_ADMIN)) - return -EPERM; - - if (copy_from_user(&frame, argp, sizeof(bin_frame))) - return -EFAULT; - - if (WaitTillCardIsFree(base)) - return -EIO; - - outw(0xf0,base); /* start upload sequence */ - outw(0x00,base); - outw((frame.addr), base); /* lsb of adderess */ - - word_count=(frame.count >> 1) + frame.count % 2; - outw(word_count, base); - InterruptTheCard(base); - - for (i=0;i<=0x2f;i++); /* a wee bit of delay */ - - if (WaitTillCardIsFree(base)) - return -EIO; - - if ((status=inw(base+0x4))!=0) { - printk(KERN_WARNING "ISILoad:Card%d rejected load header:\nAddress:0x%x \nCount:0x%x \nStatus:0x%x \n", - card+1, frame.addr, frame.count, status); - return -EIO; - } - outsw(base, (void *) frame.bin_data, word_count); - - InterruptTheCard(base); - - for (i=0;i<=0x0f;i++); /* another wee bit of delay */ - - if (WaitTillCardIsFree(base)) - return -EIO; - - if ((status=inw(base+0x4))!=0) { - printk(KERN_ERR "ISILoad:Card%d got out of sync.Card Status:0x%x\n",card+1, status); - return -EIO; - } - return 0; - - case MIOCTL_READ_FIRMWARE: - if (!capable(CAP_SYS_ADMIN)) - return -EPERM; - - if (copy_from_user(&frame, argp, sizeof(bin_header))) - return -EFAULT; - - if (WaitTillCardIsFree(base)) - return -EIO; - - outw(0xf1,base); /* start download sequence */ - outw(0x00,base); - outw((frame.addr), base); /* lsb of adderess */ - - word_count=(frame.count >> 1) + frame.count % 2; - outw(word_count+1, base); - InterruptTheCard(base); - - for (i=0;i<=0xf;i++); /* a wee bit of delay */ - - if (WaitTillCardIsFree(base)) - return -EIO; - - if ((status=inw(base+0x4))!=0) { - printk(KERN_WARNING "ISILoad:Card%d rejected verify header:\nAddress:0x%x \nCount:0x%x \nStatus:0x%x \n", - card+1, frame.addr, frame.count, status); - return -EIO; - } - - inw(base); - insw(base, frame.bin_data, word_count); - InterruptTheCard(base); - - for (i=0;i<=0x0f;i++); /* another wee bit of delay */ - - if (WaitTillCardIsFree(base)) - return -EIO; - - if ((status=inw(base+0x4))!=0) { - printk(KERN_ERR "ISILoad:Card%d verify got out of sync.Card Status:0x%x\n",card+1, status); - return -EIO; - } - - if (copy_to_user(argp, &frame, sizeof(bin_frame))) - return -EFAULT; - return 0; - - case MIOCTL_XFER_CTRL: - if (!capable(CAP_SYS_ADMIN)) - return -EPERM; - if (WaitTillCardIsFree(base)) - return -EIO; - - outw(0xf2, base); - outw(0x800, base); - outw(0x0, base); - outw(0x0, base); - InterruptTheCard(base); - outw(0x0, base+0x4); /* for ISI4608 cards */ - - isi_card[card].status |= FIRMWARE_LOADED; - return 0; - - default: -#ifdef ISICOM_DEBUG - printk(KERN_DEBUG "ISILoad: Received Ioctl cmd 0x%x.\n", cmd); -#endif - return -ENOIOCTLCMD; - } -} - - /* * ISICOM Driver specific routines ... * @@ -1927,6 +1698,175 @@ end: return retval; } +static inline int WaitTillCardIsFree(u16 base) +{ + unsigned long count = 0; + + while (!(inw(base + 0xe) & 0x1) && count++ < 100) + msleep(5); + + return !(inw(base + 0xe) & 0x1); +} + +static int __devinit load_firmware(struct pci_dev *pdev, + const unsigned int index, const unsigned int signature) +{ + struct isi_board *board = pci_get_drvdata(pdev); + const struct firmware *fw; + unsigned long base = board->base; + unsigned int a; + u16 word_count, status; + int retval = -EIO; + char *name; + u8 *data; + + struct stframe { + u16 addr; + u16 count; + u8 data[0]; + } *frame; + + switch (signature) { + case 0xa5: + name = "isi608.bin"; + break; + case 0xbb: + name = "isi608em.bin"; + break; + case 0xcc: + name = "isi616em.bin"; + break; + case 0xdd: + name = "isi4608.bin"; + break; + case 0xee: + name = "isi4616.bin"; + break; + default: + dev_err(&pdev->dev, "Unknown signature.\n"); + goto end; + } + + retval = request_firmware(&fw, name, &pdev->dev); + if (retval) + goto end; + + for (frame = (struct stframe *)fw->data; + frame < (struct stframe *)(fw->data + fw->size); + frame++) { + if (WaitTillCardIsFree(base)) + goto errrelfw; + + outw(0xf0, base); /* start upload sequence */ + outw(0x00, base); + outw(frame->addr, base); /* lsb of address */ + + word_count = frame->count / 2 + frame->count % 2; + outw(word_count, base); + InterruptTheCard(base); + + udelay(100); /* 0x2f */ + + if (WaitTillCardIsFree(base)) + goto errrelfw; + + if ((status = inw(base + 0x4)) != 0) { + dev_warn(&pdev->dev, "Card%d rejected load header:\n" + "Address:0x%x\nCount:0x%x\nStatus:0x%x\n", + index + 1, frame->addr, frame->count, status); + goto errrelfw; + } + outsw(base, frame->data, word_count); + + InterruptTheCard(base); + + udelay(50); /* 0x0f */ + + if (WaitTillCardIsFree(base)) + goto errrelfw; + + if ((status = inw(base + 0x4)) != 0) { + dev_err(&pdev->dev, "Card%d got out of sync.Card " + "Status:0x%x\n", index + 1, status); + goto errrelfw; + } + } + + retval = -EIO; + + if (WaitTillCardIsFree(base)) + goto errrelfw; + + outw(0xf2, base); + outw(0x800, base); + outw(0x0, base); + outw(0x0, base); + InterruptTheCard(base); + outw(0x0, base + 0x4); /* for ISI4608 cards */ + +/* XXX: should we test it by reading it back and comparing with original like + * in load firmware package? */ + for (frame = (struct stframe*)fw->data; + frame < (struct stframe*)(fw->data + fw->size); + frame++) { + if (WaitTillCardIsFree(base)) + goto errrelfw; + + outw(0xf1, base); /* start download sequence */ + outw(0x00, base); + outw(frame->addr, base); /* lsb of address */ + + word_count = (frame->count >> 1) + frame->count % 2; + outw(word_count + 1, base); + InterruptTheCard(base); + + udelay(50); /* 0xf */ + + if (WaitTillCardIsFree(base)) + goto errrelfw; + + if ((status = inw(base + 0x4)) != 0) { + dev_warn(&pdev->dev, "Card%d rejected verify header:\n" + "Address:0x%x\nCount:0x%x\nStatus: 0x%x\n", + index + 1, frame->addr, frame->count, status); + goto errrelfw; + } + + data = kmalloc(word_count * 2, GFP_KERNEL); + inw(base); + insw(base, data, word_count); + InterruptTheCard(base); + + for (a = 0; a < frame->count; a++) + if (data[a] != frame->data[a]) { + kfree(data); + dev_err(&pdev->dev, "Card%d, firmware upload " + "failed\n", index + 1); + goto errrelfw; + } + kfree(data); + + udelay(50); /* 0xf */ + + if (WaitTillCardIsFree(base)) + goto errrelfw; + + if ((status = inw(base + 0x4)) != 0) { + dev_err(&pdev->dev, "Card%d verify got out of sync. " + "Card Status:0x%x\n", index + 1, status); + goto errrelfw; + } + } + + board->status |= FIRMWARE_LOADED; + retval = 0; + +errrelfw: + release_firmware(fw); +end: + return retval; +} + /* * Insmod can set static symbols so keep these static */ @@ -1976,6 +1916,10 @@ static int __devinit isicom_probe(struct pci_dev *pdev, if (retval < 0) goto errunri; + retval = load_firmware(pdev, index, signature); + if (retval < 0) + goto errunri; + return 0; errunri: @@ -2048,10 +1992,6 @@ static int __devinit isicom_setup(void) goto errtty; } - retval = misc_register(&isiloader_device); - if (retval < 0) - goto errpci; - init_timer(&tx); tx.expires = jiffies + 1; tx.data = 0; @@ -2060,8 +2000,6 @@ static int __devinit isicom_setup(void) add_timer(&tx); return 0; -errpci: - pci_unregister_driver(&isicom_driver); errtty: isicom_unregister_tty_driver(); error: diff --git a/include/linux/isicom.h b/include/linux/isicom.h index 06cb7baa6db8..ba5291a9f993 100644 --- a/include/linux/isicom.h +++ b/include/linux/isicom.h @@ -4,46 +4,11 @@ /*#define ISICOM_DEBUG*/ /*#define ISICOM_DEBUG_DTR_RTS*/ - -/* - * Firmware Loader definitions ... - */ - -#define __MultiTech ('M'<<8) -#define MIOCTL_LOAD_FIRMWARE (__MultiTech | 0x01) -#define MIOCTL_READ_FIRMWARE (__MultiTech | 0x02) -#define MIOCTL_XFER_CTRL (__MultiTech | 0x03) -#define MIOCTL_RESET_CARD (__MultiTech | 0x04) - -#define DATA_SIZE 16 - -typedef struct { - unsigned short exec_segment; - unsigned short exec_addr; -} exec_record; - -typedef struct { - int board; /* Board to load */ - unsigned short addr; - unsigned short count; -} bin_header; - -typedef struct { - int board; /* Board to load */ - unsigned short addr; - unsigned short count; - unsigned short segment; - unsigned char bin_data[DATA_SIZE]; -} bin_frame; - #ifdef __KERNEL__ #define YES 1 #define NO 0 -#define ISILOAD_MISC_MINOR 155 /* /dev/isctl */ -#define ISILOAD_NAME "ISILoad" - /* * ISICOM Driver definitions ... * -- cgit v1.2.3-71-gd317 From a547dfe9563c49fd0f9743640e01d1d652119ec7 Mon Sep 17 00:00:00 2001 From: Jiri Slaby Date: Mon, 9 Jan 2006 20:54:26 -0800 Subject: [PATCH] char/isicom: More whitespaces and coding style Wrap all the code to 80 chars on a line. `}\nelse' changed to `} else'. Clean whitespaces in header file. Signed-off-by: Jiri Slaby Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- drivers/char/isicom.c | 112 ++++++++++++++++++++++++++----------------------- include/linux/isicom.h | 21 +++++----- 2 files changed, 69 insertions(+), 64 deletions(-) (limited to 'include/linux') diff --git a/drivers/char/isicom.c b/drivers/char/isicom.c index 55a47b33ff34..e9ebabaf8cb0 100644 --- a/drivers/char/isicom.c +++ b/drivers/char/isicom.c @@ -189,7 +189,7 @@ struct isi_board { unsigned char irq; unsigned char port_count; unsigned short status; - unsigned short port_status; /* each bit represents a single port */ + unsigned short port_status; /* each bit for each port */ unsigned short shift_count; struct isi_port * ports; signed char count; @@ -242,7 +242,9 @@ static int lock_card(struct isi_board *card) udelay(1000); /* 1ms */ } } - printk(KERN_WARNING "ISICOM: Failed to lock Card (0x%lx)\n", card->base); + printk(KERN_WARNING "ISICOM: Failed to lock Card (0x%lx)\n", + card->base); + return 0; /* Failed to aquire the card! */ } @@ -466,33 +468,36 @@ static void isicom_tx(unsigned long _data) residue = NO; wrd = 0; while (1) { - cnt = min_t(int, txcount, (SERIAL_XMIT_SIZE - port->xmit_tail)); + cnt = min_t(int, txcount, (SERIAL_XMIT_SIZE + - port->xmit_tail)); if (residue == YES) { residue = NO; if (cnt > 0) { - wrd |= (port->xmit_buf[port->xmit_tail] << 8); - port->xmit_tail = (port->xmit_tail + 1) & (SERIAL_XMIT_SIZE - 1); + wrd |= (port->xmit_buf[port->xmit_tail] + << 8); + port->xmit_tail = (port->xmit_tail + 1) + & (SERIAL_XMIT_SIZE - 1); port->xmit_cnt--; txcount--; cnt--; outw(wrd, base); - } - else { + } else { outw(wrd, base); break; } } if (cnt <= 0) break; word_count = cnt >> 1; - outsw(base, port->xmit_buf+port->xmit_tail, word_count); - port->xmit_tail = (port->xmit_tail + (word_count << 1)) & - (SERIAL_XMIT_SIZE - 1); + outsw(base, port->xmit_buf+port->xmit_tail,word_count); + port->xmit_tail = (port->xmit_tail + + (word_count << 1)) & (SERIAL_XMIT_SIZE - 1); txcount -= (word_count << 1); port->xmit_cnt -= (word_count << 1); if (cnt & 0x0001) { residue = YES; wrd = port->xmit_buf[port->xmit_tail]; - port->xmit_tail = (port->xmit_tail + 1) & (SERIAL_XMIT_SIZE - 1); + port->xmit_tail = (port->xmit_tail + 1) + & (SERIAL_XMIT_SIZE - 1); port->xmit_cnt--; txcount--; } @@ -572,8 +577,8 @@ static irqreturn_t isicom_interrupt(int irq, void *dev_id, struct pt_regs *regs) byte_count = header & 0xff; if (channel + 1 > card->port_count) { - printk(KERN_WARNING "ISICOM: isicom_interrupt(0x%lx): %d(channel) > port_count.\n", - base, channel+1); + printk(KERN_WARNING "ISICOM: isicom_interrupt(0x%lx): " + "%d(channel) > port_count.\n", base, channel+1); if (card->isa) ClearInterrupt(base); else @@ -611,26 +616,22 @@ static irqreturn_t isicom_interrupt(int irq, void *dev_id, struct pt_regs *regs) header = inw(base); switch(header & 0xff) { case 0: /* Change in EIA signals */ - if (port->flags & ASYNC_CHECK_CD) { if (port->status & ISI_DCD) { if (!(header & ISI_DCD)) { /* Carrier has been lost */ - pr_dbg("interrupt: DCD->low.\n"); + pr_dbg("interrupt: DCD->low.\n" + ); port->status &= ~ISI_DCD; schedule_work(&port->hangup_tq); } + } else if (header & ISI_DCD) { + /* Carrier has been detected */ + pr_dbg("interrupt: DCD->high.\n"); + port->status |= ISI_DCD; + wake_up_interruptible(&port->open_wait); } - else { - if (header & ISI_DCD) { - /* Carrier has been detected */ - pr_dbg("interrupt: DCD->high.\n"); - port->status |= ISI_DCD; - wake_up_interruptible(&port->open_wait); - } - } - } - else { + } else { if (header & ISI_DCD) port->status |= ISI_DCD; else @@ -642,19 +643,16 @@ static irqreturn_t isicom_interrupt(int irq, void *dev_id, struct pt_regs *regs) if (header & ISI_CTS) { port->tty->hw_stopped = 0; /* start tx ing */ - port->status |= (ISI_TXOK | ISI_CTS); + port->status |= (ISI_TXOK + | ISI_CTS); schedule_work(&port->bh_tqueue); } + } else if (!(header & ISI_CTS)) { + port->tty->hw_stopped = 1; + /* stop tx ing */ + port->status &= ~(ISI_TXOK | ISI_CTS); } - else { - if (!(header & ISI_CTS)) { - port->tty->hw_stopped = 1; - /* stop tx ing */ - port->status &= ~(ISI_TXOK | ISI_CTS); - } - } - } - else { + } else { if (header & ISI_CTS) port->status |= ISI_CTS; else @@ -673,7 +671,7 @@ static irqreturn_t isicom_interrupt(int irq, void *dev_id, struct pt_regs *regs) break; - case 1: /* Received Break !!! */ + case 1: /* Received Break !!! */ tty_insert_flip_char(tty, 0, TTY_BREAK); if (port->flags & ASYNC_SAK) do_SAK(tty); @@ -688,8 +686,7 @@ static irqreturn_t isicom_interrupt(int irq, void *dev_id, struct pt_regs *regs) pr_dbg("Intr: Unknown code in status packet.\n"); break; } - } - else { /* Data Packet */ + } else { /* Data Packet */ count = tty_prepare_flip_string(tty, &rp, byte_count & ~1); pr_dbg("Intr: Can rx %d of %d bytes.\n", count, byte_count); @@ -697,7 +694,8 @@ static irqreturn_t isicom_interrupt(int irq, void *dev_id, struct pt_regs *regs) insw(base, rp, word_count); byte_count -= (word_count << 1); if (count & 0x0001) { - tty_insert_flip_char(tty, inw(base) & 0xff, TTY_NORMAL); + tty_insert_flip_char(tty, inw(base) & 0xff, + TTY_NORMAL); byte_count -= 2; } if (byte_count > 0) { @@ -714,6 +712,7 @@ static irqreturn_t isicom_interrupt(int irq, void *dev_id, struct pt_regs *regs) ClearInterrupt(base); else outw(0x0000, base+0x04); /* enable interrupts */ + return IRQ_HANDLED; } @@ -885,7 +884,8 @@ static int isicom_setup_port(struct isi_port *port) return 0; } -static int block_til_ready(struct tty_struct *tty, struct file *filp, struct isi_port *port) +static int block_til_ready(struct tty_struct *tty, struct file *filp, + struct isi_port *port) { struct isi_board *card = port->card; int do_clocal = 0, retval; @@ -905,7 +905,8 @@ static int block_til_ready(struct tty_struct *tty, struct file *filp, struct isi /* if non-blocking mode is set ... */ - if ((filp->f_flags & O_NONBLOCK) || (tty->flags & (1 << TTY_IO_ERROR))) { + if ((filp->f_flags & O_NONBLOCK) || + (tty->flags & (1 << TTY_IO_ERROR))) { pr_dbg("block_til_ready: non-block mode.\n"); port->flags |= ASYNC_NORMAL_ACTIVE; return 0; @@ -1051,7 +1052,7 @@ static void isicom_shutdown_port(struct isi_port *port) card->count = 0; } - /* last port was closed , shutdown that boad too */ + /* last port was closed, shutdown that boad too */ if (C_HUPCL(tty)) { if (!card->count) isicom_shutdown_board(card); @@ -1078,14 +1079,14 @@ static void isicom_close(struct tty_struct *tty, struct file *filp) } if (tty->count == 1 && port->count != 1) { - printk(KERN_WARNING "ISICOM:(0x%lx) isicom_close: bad port count" - "tty->count = 1 port count = %d.\n", + printk(KERN_WARNING "ISICOM:(0x%lx) isicom_close: bad port " + "count tty->count = 1 port count = %d.\n", card->base, port->count); port->count = 1; } if (--port->count < 0) { - printk(KERN_WARNING "ISICOM:(0x%lx) isicom_close: bad port count for" - "channel%d = %d", card->base, port->channel, + printk(KERN_WARNING "ISICOM:(0x%lx) isicom_close: bad port " + "count for channel%d = %d", card->base, port->channel, port->count); port->count = 0; } @@ -1121,7 +1122,8 @@ static void isicom_close(struct tty_struct *tty, struct file *filp) spin_unlock_irqrestore(&card->card_lock, flags); if (port->close_delay) { pr_dbg("scheduling until time out.\n"); - msleep_interruptible(jiffies_to_msecs(port->close_delay)); + msleep_interruptible( + jiffies_to_msecs(port->close_delay)); } spin_lock_irqsave(&card->card_lock, flags); wake_up_interruptible(&port->open_wait); @@ -1149,13 +1151,14 @@ static int isicom_write(struct tty_struct *tty, const unsigned char *buf, spin_lock_irqsave(&card->card_lock, flags); while(1) { - cnt = min_t(int, count, min(SERIAL_XMIT_SIZE - port->xmit_cnt - 1, - SERIAL_XMIT_SIZE - port->xmit_head)); + cnt = min_t(int, count, min(SERIAL_XMIT_SIZE - port->xmit_cnt + - 1, SERIAL_XMIT_SIZE - port->xmit_head)); if (cnt <= 0) break; memcpy(port->xmit_buf + port->xmit_head, buf, cnt); - port->xmit_head = (port->xmit_head + cnt) & (SERIAL_XMIT_SIZE - 1); + port->xmit_head = (port->xmit_head + cnt) & (SERIAL_XMIT_SIZE + - 1); port->xmit_cnt += cnt; buf += cnt; count -= cnt; @@ -1200,7 +1203,8 @@ static void isicom_flush_chars(struct tty_struct *tty) if (isicom_paranoia_check(port, tty->name, "isicom_flush_chars")) return; - if (port->xmit_cnt <= 0 || tty->stopped || tty->hw_stopped || !port->xmit_buf) + if (port->xmit_cnt <= 0 || tty->stopped || tty->hw_stopped || + !port->xmit_buf) return; /* this tells the transmitter to consider this port for @@ -1233,7 +1237,8 @@ static int isicom_chars_in_buffer(struct tty_struct *tty) } /* ioctl et all */ -static inline void isicom_send_break(struct isi_port *port, unsigned long length) +static inline void isicom_send_break(struct isi_port *port, + unsigned long length) { struct isi_board *card = port->card; unsigned long base = card->base; @@ -1368,7 +1373,8 @@ static int isicom_ioctl(struct tty_struct *tty, struct file *filp, return 0; case TIOCGSOFTCAR: - return put_user(C_CLOCAL(tty) ? 1 : 0, (unsigned long __user *)argp); + return put_user(C_CLOCAL(tty) ? 1 : 0, + (unsigned long __user *)argp); case TIOCSSOFTCAR: if (get_user(arg, (unsigned long __user *) argp)) diff --git a/include/linux/isicom.h b/include/linux/isicom.h index ba5291a9f993..45b3d48f0978 100644 --- a/include/linux/isicom.h +++ b/include/linux/isicom.h @@ -9,7 +9,7 @@ #define YES 1 #define NO 0 -/* +/* * ISICOM Driver definitions ... * */ @@ -20,8 +20,8 @@ * PCI definitions */ - #define DEVID_COUNT 9 - #define VENDOR_ID 0x10b5 +#define DEVID_COUNT 9 +#define VENDOR_ID 0x10b5 /* * These are now officially allocated numbers @@ -31,9 +31,9 @@ #define ISICOM_CMAJOR 113 /* callout */ #define ISICOM_MAGIC (('M' << 8) | 'T') -#define WAKEUP_CHARS 256 /* hard coded for now */ -#define TX_SIZE 254 - +#define WAKEUP_CHARS 256 /* hard coded for now */ +#define TX_SIZE 254 + #define BOARD_COUNT 4 #define PORT_COUNT (BOARD_COUNT*16) @@ -66,12 +66,12 @@ #define BOARD(line) (((line) >> 4) & 0x3) /* isi kill queue bitmap */ - + #define ISICOM_KILLTX 0x01 #define ISICOM_KILLRX 0x02 /* isi_board status bitmap */ - + #define FIRMWARE_LOADED 0x0001 #define BOARD_ACTIVE 0x0002 @@ -85,9 +85,8 @@ #define ISI_RTS 0x0200 -#define ISI_TXOK 0x0001 - +#define ISI_TXOK 0x0001 + #endif /* __KERNEL__ */ #endif /* ISICOM_H */ - -- cgit v1.2.3-71-gd317 From 4c29c4c5f28616f2a87f0e6499aa9776d9be58ad Mon Sep 17 00:00:00 2001 From: Adrian Bunk Date: Mon, 9 Jan 2006 20:54:50 -0800 Subject: [PATCH] include/linux/sched.h: no need to guard the normalize_rt_tasks() prototype There's no need to guard the normalize_rt_tasks() prototype with an #ifdef CONFIG_MAGIC_SYSRQ. Signed-off-by: Adrian Bunk Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/sched.h | 4 ---- 1 file changed, 4 deletions(-) (limited to 'include/linux') diff --git a/include/linux/sched.h b/include/linux/sched.h index ee4677ad204e..c4ee35dd18ae 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h @@ -1390,12 +1390,8 @@ static inline void arch_pick_mmap_layout(struct mm_struct *mm) extern long sched_setaffinity(pid_t pid, cpumask_t new_mask); extern long sched_getaffinity(pid_t pid, cpumask_t *mask); -#ifdef CONFIG_MAGIC_SYSRQ - extern void normalize_rt_tasks(void); -#endif - #ifdef CONFIG_PM /* * Check if a process has been frozen -- cgit v1.2.3-71-gd317 From c8d52465f95c4187871f8e65666c07806ca06d41 Mon Sep 17 00:00:00 2001 From: Anton Blanchard Date: Tue, 10 Jan 2006 18:21:20 +1100 Subject: [PATCH] Work around ppc64 compiler bug In the process of optimising our per cpu data code, I found a ppc64 compiler bug that has been around forever. Basically the current RELOC_HIDE can end up trashing r30. Details of the bug can be found at http://gcc.gnu.org/bugzilla/show_bug.cgi?id=25572 This bug is present in all compilers before 4.1. It is masked by the fact that our current per cpu data code is inefficient and causes other loads that end up marking r30 as used. A workaround identified by Alan Modra is to use the =r asm constraint instead of =g. Signed-off-by: Anton Blanchard [ Verified that this makes no real difference on x86[-64] */ Signed-off-by: Linus Torvalds --- include/linux/compiler-gcc.h | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/compiler-gcc.h b/include/linux/compiler-gcc.h index 2e05e1e6b0e6..6e1c44a935d4 100644 --- a/include/linux/compiler-gcc.h +++ b/include/linux/compiler-gcc.h @@ -11,9 +11,15 @@ /* This macro obfuscates arithmetic on a variable address so that gcc shouldn't recognize the original var, and make assumptions about it */ +/* + * Versions of the ppc64 compiler before 4.1 had a bug where use of + * RELOC_HIDE could trash r30. The bug can be worked around by changing + * the inline assembly constraint from =g to =r, in this particular + * case either is valid. + */ #define RELOC_HIDE(ptr, off) \ ({ unsigned long __ptr; \ - __asm__ ("" : "=g"(__ptr) : "0"(ptr)); \ + __asm__ ("" : "=r"(__ptr) : "0"(ptr)); \ (typeof(ptr)) (__ptr + (off)); }) -- cgit v1.2.3-71-gd317 From 69a0b3157983925f14fe0bdc49622d5389538d8d Mon Sep 17 00:00:00 2001 From: Oleg Nesterov Date: Tue, 10 Jan 2006 16:48:02 +0300 Subject: [PATCH] rcu: join rcu_ctrlblk and rcu_state This patch moves rcu_state into the rcu_ctrlblk. I think there are no reasons why we should have 2 different variables to control rcu state. Every user of rcu_state has also "rcu_ctrlblk *rcp" in the parameter list. Signed-off-by: Oleg Nesterov Acked-by: Paul E. McKenney Signed-off-by: Linus Torvalds --- include/linux/rcupdate.h | 4 +++ kernel/rcupdate.c | 82 ++++++++++++++++++++++-------------------------- 2 files changed, 42 insertions(+), 44 deletions(-) (limited to 'include/linux') diff --git a/include/linux/rcupdate.h b/include/linux/rcupdate.h index a1d26cb28925..981f9aa43353 100644 --- a/include/linux/rcupdate.h +++ b/include/linux/rcupdate.h @@ -65,6 +65,10 @@ struct rcu_ctrlblk { long cur; /* Current batch number. */ long completed; /* Number of the last completed batch */ int next_pending; /* Is the next batch already waiting? */ + + spinlock_t lock ____cacheline_internodealigned_in_smp; + cpumask_t cpumask; /* CPUs that need to switch in order */ + /* for current batch to proceed. */ } ____cacheline_internodealigned_in_smp; /* Is batch a before batch b ? */ diff --git a/kernel/rcupdate.c b/kernel/rcupdate.c index 05ee48316f70..e18f9190eafa 100644 --- a/kernel/rcupdate.c +++ b/kernel/rcupdate.c @@ -49,22 +49,18 @@ #include /* Definition for rcupdate control block. */ -struct rcu_ctrlblk rcu_ctrlblk = - { .cur = -300, .completed = -300 }; -struct rcu_ctrlblk rcu_bh_ctrlblk = - { .cur = -300, .completed = -300 }; - -/* Bookkeeping of the progress of the grace period */ -struct rcu_state { - spinlock_t lock; /* Guard this struct and writes to rcu_ctrlblk */ - cpumask_t cpumask; /* CPUs that need to switch in order */ - /* for current batch to proceed. */ +struct rcu_ctrlblk rcu_ctrlblk = { + .cur = -300, + .completed = -300, + .lock = SPIN_LOCK_UNLOCKED, + .cpumask = CPU_MASK_NONE, +}; +struct rcu_ctrlblk rcu_bh_ctrlblk = { + .cur = -300, + .completed = -300, + .lock = SPIN_LOCK_UNLOCKED, + .cpumask = CPU_MASK_NONE, }; - -static struct rcu_state rcu_state ____cacheline_internodealigned_in_smp = - {.lock = SPIN_LOCK_UNLOCKED, .cpumask = CPU_MASK_NONE }; -static struct rcu_state rcu_bh_state ____cacheline_internodealigned_in_smp = - {.lock = SPIN_LOCK_UNLOCKED, .cpumask = CPU_MASK_NONE }; DEFINE_PER_CPU(struct rcu_data, rcu_data) = { 0L }; DEFINE_PER_CPU(struct rcu_data, rcu_bh_data) = { 0L }; @@ -220,13 +216,13 @@ static void rcu_do_batch(struct rcu_data *rdp) * This is done by rcu_start_batch. The start is not broadcasted to * all cpus, they must pick this up by comparing rcp->cur with * rdp->quiescbatch. All cpus are recorded in the - * rcu_state.cpumask bitmap. + * rcu_ctrlblk.cpumask bitmap. * - All cpus must go through a quiescent state. * Since the start of the grace period is not broadcasted, at least two * calls to rcu_check_quiescent_state are required: * The first call just notices that a new grace period is running. The * following calls check if there was a quiescent state since the beginning - * of the grace period. If so, it updates rcu_state.cpumask. If + * of the grace period. If so, it updates rcu_ctrlblk.cpumask. If * the bitmap is empty, then the grace period is completed. * rcu_check_quiescent_state calls rcu_start_batch(0) to start the next grace * period (if necessary). @@ -234,9 +230,9 @@ static void rcu_do_batch(struct rcu_data *rdp) /* * Register a new batch of callbacks, and start it up if there is currently no * active batch and the batch to be registered has not already occurred. - * Caller must hold rcu_state.lock. + * Caller must hold rcu_ctrlblk.lock. */ -static void rcu_start_batch(struct rcu_ctrlblk *rcp, struct rcu_state *rsp) +static void rcu_start_batch(struct rcu_ctrlblk *rcp) { if (rcp->next_pending && rcp->completed == rcp->cur) { @@ -251,11 +247,11 @@ static void rcu_start_batch(struct rcu_ctrlblk *rcp, struct rcu_state *rsp) /* * Accessing nohz_cpu_mask before incrementing rcp->cur needs a * Barrier Otherwise it can cause tickless idle CPUs to be - * included in rsp->cpumask, which will extend graceperiods + * included in rcp->cpumask, which will extend graceperiods * unnecessarily. */ smp_mb(); - cpus_andnot(rsp->cpumask, cpu_online_map, nohz_cpu_mask); + cpus_andnot(rcp->cpumask, cpu_online_map, nohz_cpu_mask); } } @@ -265,13 +261,13 @@ static void rcu_start_batch(struct rcu_ctrlblk *rcp, struct rcu_state *rsp) * Clear it from the cpu mask and complete the grace period if it was the last * cpu. Start another grace period if someone has further entries pending */ -static void cpu_quiet(int cpu, struct rcu_ctrlblk *rcp, struct rcu_state *rsp) +static void cpu_quiet(int cpu, struct rcu_ctrlblk *rcp) { - cpu_clear(cpu, rsp->cpumask); - if (cpus_empty(rsp->cpumask)) { + cpu_clear(cpu, rcp->cpumask); + if (cpus_empty(rcp->cpumask)) { /* batch completed ! */ rcp->completed = rcp->cur; - rcu_start_batch(rcp, rsp); + rcu_start_batch(rcp); } } @@ -281,7 +277,7 @@ static void cpu_quiet(int cpu, struct rcu_ctrlblk *rcp, struct rcu_state *rsp) * quiescent cycle, then indicate that it has done so. */ static void rcu_check_quiescent_state(struct rcu_ctrlblk *rcp, - struct rcu_state *rsp, struct rcu_data *rdp) + struct rcu_data *rdp) { if (rdp->quiescbatch != rcp->cur) { /* start new grace period: */ @@ -306,15 +302,15 @@ static void rcu_check_quiescent_state(struct rcu_ctrlblk *rcp, return; rdp->qs_pending = 0; - spin_lock(&rsp->lock); + spin_lock(&rcp->lock); /* * rdp->quiescbatch/rcp->cur and the cpu bitmap can come out of sync * during cpu startup. Ignore the quiescent state. */ if (likely(rdp->quiescbatch == rcp->cur)) - cpu_quiet(rdp->cpu, rcp, rsp); + cpu_quiet(rdp->cpu, rcp); - spin_unlock(&rsp->lock); + spin_unlock(&rcp->lock); } @@ -335,16 +331,16 @@ static void rcu_move_batch(struct rcu_data *this_rdp, struct rcu_head *list, } static void __rcu_offline_cpu(struct rcu_data *this_rdp, - struct rcu_ctrlblk *rcp, struct rcu_state *rsp, struct rcu_data *rdp) + struct rcu_ctrlblk *rcp, struct rcu_data *rdp) { /* if the cpu going offline owns the grace period * we can block indefinitely waiting for it, so flush * it here */ - spin_lock_bh(&rsp->lock); + spin_lock_bh(&rcp->lock); if (rcp->cur != rcp->completed) - cpu_quiet(rdp->cpu, rcp, rsp); - spin_unlock_bh(&rsp->lock); + cpu_quiet(rdp->cpu, rcp); + spin_unlock_bh(&rcp->lock); rcu_move_batch(this_rdp, rdp->curlist, rdp->curtail); rcu_move_batch(this_rdp, rdp->nxtlist, rdp->nxttail); @@ -354,9 +350,9 @@ static void rcu_offline_cpu(int cpu) struct rcu_data *this_rdp = &get_cpu_var(rcu_data); struct rcu_data *this_bh_rdp = &get_cpu_var(rcu_bh_data); - __rcu_offline_cpu(this_rdp, &rcu_ctrlblk, &rcu_state, + __rcu_offline_cpu(this_rdp, &rcu_ctrlblk, &per_cpu(rcu_data, cpu)); - __rcu_offline_cpu(this_bh_rdp, &rcu_bh_ctrlblk, &rcu_bh_state, + __rcu_offline_cpu(this_bh_rdp, &rcu_bh_ctrlblk, &per_cpu(rcu_bh_data, cpu)); put_cpu_var(rcu_data); put_cpu_var(rcu_bh_data); @@ -375,7 +371,7 @@ static void rcu_offline_cpu(int cpu) * This does the RCU processing work from tasklet context. */ static void __rcu_process_callbacks(struct rcu_ctrlblk *rcp, - struct rcu_state *rsp, struct rcu_data *rdp) + struct rcu_data *rdp) { if (rdp->curlist && !rcu_batch_before(rcp->completed, rdp->batch)) { *rdp->donetail = rdp->curlist; @@ -405,25 +401,23 @@ static void __rcu_process_callbacks(struct rcu_ctrlblk *rcp, if (!rcp->next_pending) { /* and start it/schedule start if it's a new batch */ - spin_lock(&rsp->lock); + spin_lock(&rcp->lock); rcp->next_pending = 1; - rcu_start_batch(rcp, rsp); - spin_unlock(&rsp->lock); + rcu_start_batch(rcp); + spin_unlock(&rcp->lock); } } else { local_irq_enable(); } - rcu_check_quiescent_state(rcp, rsp, rdp); + rcu_check_quiescent_state(rcp, rdp); if (rdp->donelist) rcu_do_batch(rdp); } static void rcu_process_callbacks(unsigned long unused) { - __rcu_process_callbacks(&rcu_ctrlblk, &rcu_state, - &__get_cpu_var(rcu_data)); - __rcu_process_callbacks(&rcu_bh_ctrlblk, &rcu_bh_state, - &__get_cpu_var(rcu_bh_data)); + __rcu_process_callbacks(&rcu_ctrlblk, &__get_cpu_var(rcu_data)); + __rcu_process_callbacks(&rcu_bh_ctrlblk, &__get_cpu_var(rcu_bh_data)); } static int __rcu_pending(struct rcu_ctrlblk *rcp, struct rcu_data *rdp) -- cgit v1.2.3-71-gd317 From 1e6c9c2878c9c1f301449c78551e0b7c5f3e3ae5 Mon Sep 17 00:00:00 2001 From: Andrew Victor Date: Tue, 10 Jan 2006 16:59:27 +0000 Subject: [ARM] 3242/2: AT91RM9200 support for 2.6 (Serial) Patch from Andrew Victor This patch adds support to the 2.6 kernel series for the Atmel AT91RM9200 processor. This patch is the Serial driver. This version uses the newly re-written GPL'ed hardware headers. Signed-off-by: Andrew Victor Signed-off-by: Russell King --- drivers/serial/Kconfig | 34 + drivers/serial/Makefile | 1 + drivers/serial/at91_serial.c | 894 +++++++++++++++++++++ include/asm-arm/arch-at91rm9200/at91rm9200_pdc.h | 36 + include/asm-arm/arch-at91rm9200/at91rm9200_usart.h | 123 +++ include/asm-arm/mach/serial_at91rm9200.h | 36 + include/linux/serial_core.h | 3 + 7 files changed, 1127 insertions(+) create mode 100644 drivers/serial/at91_serial.c create mode 100644 include/asm-arm/arch-at91rm9200/at91rm9200_pdc.h create mode 100644 include/asm-arm/arch-at91rm9200/at91rm9200_usart.h create mode 100644 include/asm-arm/mach/serial_at91rm9200.h (limited to 'include/linux') diff --git a/drivers/serial/Kconfig b/drivers/serial/Kconfig index 1bae26a8a503..a256a020b15e 100644 --- a/drivers/serial/Kconfig +++ b/drivers/serial/Kconfig @@ -280,6 +280,40 @@ config SERIAL_AMBA_PL011_CONSOLE your boot loader (lilo or loadlin) about how to pass options to the kernel at boot time.) +config SERIAL_AT91 + bool "AT91RM9200 serial port support" + depends on ARM && ARCH_AT91RM9200 + select SERIAL_CORE + help + This enables the driver for the on-chip UARTs of the AT91RM9200 + processor. + +config SERIAL_AT91_CONSOLE + bool "Support for console on AT91RM9200 serial port" + depends on SERIAL_AT91=y + select SERIAL_CORE_CONSOLE + help + Say Y here if you wish to use a UART on the AT91RM9200 as the system + console (the system console is the device which receives all kernel + messages and warnings and which allows logins in single user mode). + +config SERIAL_AT91_TTYAT + bool "Install as device ttyAT0-4 instead of ttyS0-4" + depends on SERIAL_AT91=y + help + Say Y here if you wish to have the five internal AT91RM9200 UARTs + appear as /dev/ttyAT0-4 (major 240, minor 0-4) instead of the + normal /dev/ttyS0-4 (major 4, minor 64-68). This is necessary if + you also want other UARTs, such as external 8250/16C550 compatible + UARTs. + The ttySn nodes are legally reserved for the 8250 serial driver + but are often misused by other serial drivers. + + To use this, you should create suitable ttyATn device nodes in + /dev/, and pass "console=ttyATn" to the kernel. + + Say Y if you have an external 8250/16C550 UART. If unsure, say N. + config SERIAL_CLPS711X tristate "CLPS711X serial port support" depends on ARM && ARCH_CLPS711X diff --git a/drivers/serial/Makefile b/drivers/serial/Makefile index 137148bba4fa..24a583e482bb 100644 --- a/drivers/serial/Makefile +++ b/drivers/serial/Makefile @@ -56,3 +56,4 @@ obj-$(CONFIG_SERIAL_JSM) += jsm/ obj-$(CONFIG_SERIAL_TXX9) += serial_txx9.o obj-$(CONFIG_SERIAL_VR41XX) += vr41xx_siu.o obj-$(CONFIG_SERIAL_SGI_IOC4) += ioc4_serial.o +obj-$(CONFIG_SERIAL_AT91) += at91_serial.o diff --git a/drivers/serial/at91_serial.c b/drivers/serial/at91_serial.c new file mode 100644 index 000000000000..0e206063d685 --- /dev/null +++ b/drivers/serial/at91_serial.c @@ -0,0 +1,894 @@ +/* + * linux/drivers/char/at91_serial.c + * + * Driver for Atmel AT91RM9200 Serial ports + * + * Copyright (C) 2003 Rick Bronson + * + * Based on drivers/char/serial_sa1100.c, by Deep Blue Solutions Ltd. + * Based on drivers/char/serial.c, by Linus Torvalds, Theodore Ts'o. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + * + */ +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include + +#include +#include +#include +#include + + +#if defined(CONFIG_SERIAL_AT91_CONSOLE) && defined(CONFIG_MAGIC_SYSRQ) +#define SUPPORT_SYSRQ +#endif + +#include + +#ifdef CONFIG_SERIAL_AT91_TTYAT + +/* Use device name ttyAT, major 204 and minor 154-169. This is necessary if we + * should coexist with the 8250 driver, such as if we have an external 16C550 + * UART. */ +#define SERIAL_AT91_MAJOR 204 +#define MINOR_START 154 +#define AT91_DEVICENAME "ttyAT" + +#else + +/* Use device name ttyS, major 4, minor 64-68. This is the usual serial port + * name, but it is legally reserved for the 8250 driver. */ +#define SERIAL_AT91_MAJOR TTY_MAJOR +#define MINOR_START 64 +#define AT91_DEVICENAME "ttyS" + +#endif + +#define AT91_VA_BASE_DBGU ((unsigned long) AT91_VA_BASE_SYS + AT91_DBGU) +#define AT91_ISR_PASS_LIMIT 256 + +#define UART_PUT_CR(port,v) writel(v, (port)->membase + AT91_US_CR) +#define UART_GET_MR(port) readl((port)->membase + AT91_US_MR) +#define UART_PUT_MR(port,v) writel(v, (port)->membase + AT91_US_MR) +#define UART_PUT_IER(port,v) writel(v, (port)->membase + AT91_US_IER) +#define UART_PUT_IDR(port,v) writel(v, (port)->membase + AT91_US_IDR) +#define UART_GET_IMR(port) readl((port)->membase + AT91_US_IMR) +#define UART_GET_CSR(port) readl((port)->membase + AT91_US_CSR) +#define UART_GET_CHAR(port) readl((port)->membase + AT91_US_RHR) +#define UART_PUT_CHAR(port,v) writel(v, (port)->membase + AT91_US_THR) +#define UART_GET_BRGR(port) readl((port)->membase + AT91_US_BRGR) +#define UART_PUT_BRGR(port,v) writel(v, (port)->membase + AT91_US_BRGR) +#define UART_PUT_RTOR(port,v) writel(v, (port)->membase + AT91_US_RTOR) + +// #define UART_GET_CR(port) readl((port)->membase + AT91_US_CR) // is write-only + + /* PDC registers */ +#define UART_PUT_PTCR(port,v) writel(v, (port)->membase + AT91_PDC_PTCR) +#define UART_PUT_RPR(port,v) writel(v, (port)->membase + AT91_PDC_RPR) +#define UART_PUT_RCR(port,v) writel(v, (port)->membase + AT91_PDC_RCR) +#define UART_GET_RCR(port) readl((port)->membase + AT91_PDC_RCR) +#define UART_PUT_RNPR(port,v) writel(v, (port)->membase + AT91_PDC_RNPR) +#define UART_PUT_RNCR(port,v) writel(v, (port)->membase + AT91_PDC_RNCR) + + +static int (*at91_open)(struct uart_port *); +static void (*at91_close)(struct uart_port *); + +#ifdef SUPPORT_SYSRQ +static struct console at91_console; +#endif + +/* + * Return TIOCSER_TEMT when transmitter FIFO and Shift register is empty. + */ +static u_int at91_tx_empty(struct uart_port *port) +{ + return (UART_GET_CSR(port) & AT91_US_TXEMPTY) ? TIOCSER_TEMT : 0; +} + +/* + * Set state of the modem control output lines + */ +static void at91_set_mctrl(struct uart_port *port, u_int mctrl) +{ + unsigned int control = 0; + + /* + * Errata #39: RTS0 is not internally connected to PA21. We need to drive + * the pin manually. + */ + if (port->mapbase == AT91_VA_BASE_US0) { + if (mctrl & TIOCM_RTS) + at91_sys_write(AT91_PIOA + PIO_CODR, AT91_PA21_RTS0); + else + at91_sys_write(AT91_PIOA + PIO_SODR, AT91_PA21_RTS0); + } + + if (mctrl & TIOCM_RTS) + control |= AT91_US_RTSEN; + else + control |= AT91_US_RTSDIS; + + if (mctrl & TIOCM_DTR) + control |= AT91_US_DTREN; + else + control |= AT91_US_DTRDIS; + + UART_PUT_CR(port,control); +} + +/* + * Get state of the modem control input lines + */ +static u_int at91_get_mctrl(struct uart_port *port) +{ + unsigned int status, ret = 0; + + status = UART_GET_CSR(port); + + /* + * The control signals are active low. + */ + if (!(status & AT91_US_DCD)) + ret |= TIOCM_CD; + if (!(status & AT91_US_CTS)) + ret |= TIOCM_CTS; + if (!(status & AT91_US_DSR)) + ret |= TIOCM_DSR; + if (!(status & AT91_US_RI)) + ret |= TIOCM_RI; + + return ret; +} + +/* + * Stop transmitting. + */ +static void at91_stop_tx(struct uart_port *port) +{ + UART_PUT_IDR(port, AT91_US_TXRDY); + port->read_status_mask &= ~AT91_US_TXRDY; +} + +/* + * Start transmitting. + */ +static void at91_start_tx(struct uart_port *port) +{ + port->read_status_mask |= AT91_US_TXRDY; + UART_PUT_IER(port, AT91_US_TXRDY); +} + +/* + * Stop receiving - port is in process of being closed. + */ +static void at91_stop_rx(struct uart_port *port) +{ + UART_PUT_IDR(port, AT91_US_RXRDY); +} + +/* + * Enable modem status interrupts + */ +static void at91_enable_ms(struct uart_port *port) +{ + port->read_status_mask |= (AT91_US_RIIC | AT91_US_DSRIC | AT91_US_DCDIC | AT91_US_CTSIC); + UART_PUT_IER(port, AT91_US_RIIC | AT91_US_DSRIC | AT91_US_DCDIC | AT91_US_CTSIC); +} + +/* + * Control the transmission of a break signal + */ +static void at91_break_ctl(struct uart_port *port, int break_state) +{ + if (break_state != 0) + UART_PUT_CR(port, AT91_US_STTBRK); /* start break */ + else + UART_PUT_CR(port, AT91_US_STPBRK); /* stop break */ +} + +/* + * Characters received (called from interrupt handler) + */ +static void at91_rx_chars(struct uart_port *port, struct pt_regs *regs) +{ + struct tty_struct *tty = port->info->tty; + unsigned int status, ch, flg; + + status = UART_GET_CSR(port) & port->read_status_mask; + while (status & (AT91_US_RXRDY)) { + ch = UART_GET_CHAR(port); + + if (tty->flip.count >= TTY_FLIPBUF_SIZE) + goto ignore_char; + port->icount.rx++; + + flg = TTY_NORMAL; + + /* + * note that the error handling code is + * out of the main execution path + */ + if (unlikely(status & (AT91_US_PARE | AT91_US_FRAME | AT91_US_OVRE))) { + UART_PUT_CR(port, AT91_US_RSTSTA); /* clear error */ + if (status & (AT91_US_PARE)) + port->icount.parity++; + if (status & (AT91_US_FRAME)) + port->icount.frame++; + if (status & (AT91_US_OVRE)) + port->icount.overrun++; + + if (status & AT91_US_PARE) + flg = TTY_PARITY; + else if (status & AT91_US_FRAME) + flg = TTY_FRAME; + if (status & AT91_US_OVRE) { + /* + * overrun does *not* affect the character + * we read from the FIFO + */ + tty_insert_flip_char(tty, ch, flg); + ch = 0; + flg = TTY_OVERRUN; + } +#ifdef SUPPORT_SYSRQ + port->sysrq = 0; +#endif + } + + if (uart_handle_sysrq_char(port, ch, regs)) + goto ignore_char; + + tty_insert_flip_char(tty, ch, flg); + + ignore_char: + status = UART_GET_CSR(port) & port->read_status_mask; + } + + tty_flip_buffer_push(tty); +} + +/* + * Transmit characters (called from interrupt handler) + */ +static void at91_tx_chars(struct uart_port *port) +{ + struct circ_buf *xmit = &port->info->xmit; + + if (port->x_char) { + UART_PUT_CHAR(port, port->x_char); + port->icount.tx++; + port->x_char = 0; + return; + } + if (uart_circ_empty(xmit) || uart_tx_stopped(port)) { + at91_stop_tx(port); + return; + } + + while (UART_GET_CSR(port) & AT91_US_TXRDY) { + UART_PUT_CHAR(port, xmit->buf[xmit->tail]); + xmit->tail = (xmit->tail + 1) & (UART_XMIT_SIZE - 1); + port->icount.tx++; + if (uart_circ_empty(xmit)) + break; + } + + if (uart_circ_chars_pending(xmit) < WAKEUP_CHARS) + uart_write_wakeup(port); + + if (uart_circ_empty(xmit)) + at91_stop_tx(port); +} + +/* + * Interrupt handler + */ +static irqreturn_t at91_interrupt(int irq, void *dev_id, struct pt_regs *regs) +{ + struct uart_port *port = dev_id; + unsigned int status, pending, pass_counter = 0; + + status = UART_GET_CSR(port); + pending = status & port->read_status_mask; + if (pending) { + do { + if (pending & AT91_US_RXRDY) + at91_rx_chars(port, regs); + + /* Clear the relevent break bits */ + if (pending & AT91_US_RXBRK) { + UART_PUT_CR(port, AT91_US_RSTSTA); + port->icount.brk++; + uart_handle_break(port); + } + + // TODO: All reads to CSR will clear these interrupts! + if (pending & AT91_US_RIIC) port->icount.rng++; + if (pending & AT91_US_DSRIC) port->icount.dsr++; + if (pending & AT91_US_DCDIC) + uart_handle_dcd_change(port, !(status & AT91_US_DCD)); + if (pending & AT91_US_CTSIC) + uart_handle_cts_change(port, !(status & AT91_US_CTS)); + if (pending & (AT91_US_RIIC | AT91_US_DSRIC | AT91_US_DCDIC | AT91_US_CTSIC)) + wake_up_interruptible(&port->info->delta_msr_wait); + + if (pending & AT91_US_TXRDY) + at91_tx_chars(port); + if (pass_counter++ > AT91_ISR_PASS_LIMIT) + break; + + status = UART_GET_CSR(port); + pending = status & port->read_status_mask; + } while (pending); + } + return IRQ_HANDLED; +} + +/* + * Perform initialization and enable port for reception + */ +static int at91_startup(struct uart_port *port) +{ + int retval; + + /* + * Ensure that no interrupts are enabled otherwise when + * request_irq() is called we could get stuck trying to + * handle an unexpected interrupt + */ + UART_PUT_IDR(port, -1); + + /* + * Allocate the IRQ + */ + retval = request_irq(port->irq, at91_interrupt, SA_SHIRQ, "at91_serial", port); + if (retval) { + printk("at91_serial: at91_startup - Can't get irq\n"); + return retval; + } + + /* + * If there is a specific "open" function (to register + * control line interrupts) + */ + if (at91_open) { + retval = at91_open(port); + if (retval) { + free_irq(port->irq, port); + return retval; + } + } + + port->read_status_mask = AT91_US_RXRDY | AT91_US_TXRDY | AT91_US_OVRE + | AT91_US_FRAME | AT91_US_PARE | AT91_US_RXBRK; + /* + * Finally, enable the serial port + */ + UART_PUT_CR(port, AT91_US_RSTSTA | AT91_US_RSTRX); + UART_PUT_CR(port, AT91_US_TXEN | AT91_US_RXEN); /* enable xmit & rcvr */ + UART_PUT_IER(port, AT91_US_RXRDY); /* do receive only */ + return 0; +} + +/* + * Disable the port + */ +static void at91_shutdown(struct uart_port *port) +{ + /* + * Disable all interrupts, port and break condition. + */ + UART_PUT_CR(port, AT91_US_RSTSTA); + UART_PUT_IDR(port, -1); + + /* + * Free the interrupt + */ + free_irq(port->irq, port); + + /* + * If there is a specific "close" function (to unregister + * control line interrupts) + */ + if (at91_close) + at91_close(port); +} + +/* + * Power / Clock management. + */ +static void at91_serial_pm(struct uart_port *port, unsigned int state, unsigned int oldstate) +{ + switch (state) { + case 0: + /* + * Enable the peripheral clock for this serial port. + * This is called on uart_open() or a resume event. + */ + at91_sys_write(AT91_PMC_PCER, 1 << port->irq); + break; + case 3: + /* + * Disable the peripheral clock for this serial port. + * This is called on uart_close() or a suspend event. + */ + if (port->irq != AT91_ID_SYS) /* is this a shared clock? */ + at91_sys_write(AT91_PMC_PCDR, 1 << port->irq); + break; + default: + printk(KERN_ERR "at91_serial: unknown pm %d\n", state); + } +} + +/* + * Change the port parameters + */ +static void at91_set_termios(struct uart_port *port, struct termios * termios, struct termios * old) +{ + unsigned long flags; + unsigned int mode, imr, quot, baud; + + baud = uart_get_baud_rate(port, termios, old, 0, port->uartclk/16); + quot = uart_get_divisor(port, baud); + + /* Get current mode register */ + mode = UART_GET_MR(port) & ~(AT91_US_CHRL | AT91_US_NBSTOP | AT91_US_PAR); + + /* byte size */ + switch (termios->c_cflag & CSIZE) { + case CS5: + mode |= AT91_US_CHRL_5; + break; + case CS6: + mode |= AT91_US_CHRL_6; + break; + case CS7: + mode |= AT91_US_CHRL_7; + break; + default: + mode |= AT91_US_CHRL_8; + break; + } + + /* stop bits */ + if (termios->c_cflag & CSTOPB) + mode |= AT91_US_NBSTOP_2; + + /* parity */ + if (termios->c_cflag & PARENB) { + if (termios->c_cflag & CMSPAR) { /* Mark or Space parity */ + if (termios->c_cflag & PARODD) + mode |= AT91_US_PAR_MARK; + else + mode |= AT91_US_PAR_SPACE; + } + else if (termios->c_cflag & PARODD) + mode |= AT91_US_PAR_ODD; + else + mode |= AT91_US_PAR_EVEN; + } + else + mode |= AT91_US_PAR_NONE; + + spin_lock_irqsave(&port->lock, flags); + + port->read_status_mask |= AT91_US_OVRE; + if (termios->c_iflag & INPCK) + port->read_status_mask |= AT91_US_FRAME | AT91_US_PARE; + if (termios->c_iflag & (BRKINT | PARMRK)) + port->read_status_mask |= AT91_US_RXBRK; + + /* + * Characters to ignore + */ + port->ignore_status_mask = 0; + if (termios->c_iflag & IGNPAR) + port->ignore_status_mask |= (AT91_US_FRAME | AT91_US_PARE); + if (termios->c_iflag & IGNBRK) { + port->ignore_status_mask |= AT91_US_RXBRK; + /* + * If we're ignoring parity and break indicators, + * ignore overruns too (for real raw support). + */ + if (termios->c_iflag & IGNPAR) + port->ignore_status_mask |= AT91_US_OVRE; + } + + // TODO: Ignore all characters if CREAD is set. + + /* update the per-port timeout */ + uart_update_timeout(port, termios->c_cflag, baud); + + /* disable interrupts and drain transmitter */ + imr = UART_GET_IMR(port); /* get interrupt mask */ + UART_PUT_IDR(port, -1); /* disable all interrupts */ + while (!(UART_GET_CSR(port) & AT91_US_TXEMPTY)) { barrier(); } + + /* disable receiver and transmitter */ + UART_PUT_CR(port, AT91_US_TXDIS | AT91_US_RXDIS); + + /* set the parity, stop bits and data size */ + UART_PUT_MR(port, mode); + + /* set the baud rate */ + UART_PUT_BRGR(port, quot); + UART_PUT_CR(port, AT91_US_RSTSTA | AT91_US_RSTRX); + UART_PUT_CR(port, AT91_US_TXEN | AT91_US_RXEN); + + /* restore interrupts */ + UART_PUT_IER(port, imr); + + /* CTS flow-control and modem-status interrupts */ + if (UART_ENABLE_MS(port, termios->c_cflag)) + port->ops->enable_ms(port); + + spin_unlock_irqrestore(&port->lock, flags); +} + +/* + * Return string describing the specified port + */ +static const char *at91_type(struct uart_port *port) +{ + return (port->type == PORT_AT91RM9200) ? "AT91_SERIAL" : NULL; +} + +/* + * Release the memory region(s) being used by 'port'. + */ +static void at91_release_port(struct uart_port *port) +{ + release_mem_region(port->mapbase, + (port->mapbase == AT91_VA_BASE_DBGU) ? 512 : SZ_16K); +} + +/* + * Request the memory region(s) being used by 'port'. + */ +static int at91_request_port(struct uart_port *port) +{ + return request_mem_region(port->mapbase, + (port->mapbase == AT91_VA_BASE_DBGU) ? 512 : SZ_16K, + "at91_serial") != NULL ? 0 : -EBUSY; + +} + +/* + * Configure/autoconfigure the port. + */ +static void at91_config_port(struct uart_port *port, int flags) +{ + if (flags & UART_CONFIG_TYPE) { + port->type = PORT_AT91RM9200; + at91_request_port(port); + } +} + +/* + * Verify the new serial_struct (for TIOCSSERIAL). + */ +static int at91_verify_port(struct uart_port *port, struct serial_struct *ser) +{ + int ret = 0; + if (ser->type != PORT_UNKNOWN && ser->type != PORT_AT91RM9200) + ret = -EINVAL; + if (port->irq != ser->irq) + ret = -EINVAL; + if (ser->io_type != SERIAL_IO_MEM) + ret = -EINVAL; + if (port->uartclk / 16 != ser->baud_base) + ret = -EINVAL; + if ((void *)port->mapbase != ser->iomem_base) + ret = -EINVAL; + if (port->iobase != ser->port) + ret = -EINVAL; + if (ser->hub6 != 0) + ret = -EINVAL; + return ret; +} + +static struct uart_ops at91_pops = { + .tx_empty = at91_tx_empty, + .set_mctrl = at91_set_mctrl, + .get_mctrl = at91_get_mctrl, + .stop_tx = at91_stop_tx, + .start_tx = at91_start_tx, + .stop_rx = at91_stop_rx, + .enable_ms = at91_enable_ms, + .break_ctl = at91_break_ctl, + .startup = at91_startup, + .shutdown = at91_shutdown, + .set_termios = at91_set_termios, + .type = at91_type, + .release_port = at91_release_port, + .request_port = at91_request_port, + .config_port = at91_config_port, + .verify_port = at91_verify_port, + .pm = at91_serial_pm, +}; + +static struct uart_port at91_ports[AT91_NR_UART]; + +void __init at91_init_ports(void) +{ + static int first = 1; + int i; + + if (!first) + return; + first = 0; + + for (i = 0; i < AT91_NR_UART; i++) { + at91_ports[i].iotype = UPIO_MEM; + at91_ports[i].flags = UPF_BOOT_AUTOCONF; + at91_ports[i].uartclk = at91_master_clock; + at91_ports[i].ops = &at91_pops; + at91_ports[i].fifosize = 1; + at91_ports[i].line = i; + } +} + +void __init at91_register_uart_fns(struct at91rm9200_port_fns *fns) +{ + if (fns->enable_ms) + at91_pops.enable_ms = fns->enable_ms; + if (fns->get_mctrl) + at91_pops.get_mctrl = fns->get_mctrl; + if (fns->set_mctrl) + at91_pops.set_mctrl = fns->set_mctrl; + at91_open = fns->open; + at91_close = fns->close; + at91_pops.pm = fns->pm; + at91_pops.set_wake = fns->set_wake; +} + +/* + * Setup ports. + */ +void __init at91_register_uart(int idx, int port) +{ + if ((idx < 0) || (idx >= AT91_NR_UART)) { + printk(KERN_ERR "%s: bad index number %d\n", __FUNCTION__, idx); + return; + } + + switch (port) { + case 0: + at91_ports[idx].membase = (void __iomem *) AT91_VA_BASE_US0; + at91_ports[idx].mapbase = AT91_VA_BASE_US0; + at91_ports[idx].irq = AT91_ID_US0; + AT91_CfgPIO_USART0(); + break; + case 1: + at91_ports[idx].membase = (void __iomem *) AT91_VA_BASE_US1; + at91_ports[idx].mapbase = AT91_VA_BASE_US1; + at91_ports[idx].irq = AT91_ID_US1; + AT91_CfgPIO_USART1(); + break; + case 2: + at91_ports[idx].membase = (void __iomem *) AT91_VA_BASE_US2; + at91_ports[idx].mapbase = AT91_VA_BASE_US2; + at91_ports[idx].irq = AT91_ID_US2; + AT91_CfgPIO_USART2(); + break; + case 3: + at91_ports[idx].membase = (void __iomem *) AT91_VA_BASE_US3; + at91_ports[idx].mapbase = AT91_VA_BASE_US3; + at91_ports[idx].irq = AT91_ID_US3; + AT91_CfgPIO_USART3(); + break; + case 4: + at91_ports[idx].membase = (void __iomem *) AT91_VA_BASE_DBGU; + at91_ports[idx].mapbase = AT91_VA_BASE_DBGU; + at91_ports[idx].irq = AT91_ID_SYS; + AT91_CfgPIO_DBGU(); + break; + default: + printk(KERN_ERR "%s : bad port number %d\n", __FUNCTION__, port); + } +} + +#ifdef CONFIG_SERIAL_AT91_CONSOLE + +/* + * Interrupts are disabled on entering + */ +static void at91_console_write(struct console *co, const char *s, u_int count) +{ + struct uart_port *port = at91_ports + co->index; + unsigned int status, i, imr; + + /* + * First, save IMR and then disable interrupts + */ + imr = UART_GET_IMR(port); /* get interrupt mask */ + UART_PUT_IDR(port, AT91_US_RXRDY | AT91_US_TXRDY); + + /* + * Now, do each character + */ + for (i = 0; i < count; i++) { + do { + status = UART_GET_CSR(port); + } while (!(status & AT91_US_TXRDY)); + UART_PUT_CHAR(port, s[i]); + if (s[i] == '\n') { + do { + status = UART_GET_CSR(port); + } while (!(status & AT91_US_TXRDY)); + UART_PUT_CHAR(port, '\r'); + } + } + + /* + * Finally, wait for transmitter to become empty + * and restore IMR + */ + do { + status = UART_GET_CSR(port); + } while (!(status & AT91_US_TXRDY)); + UART_PUT_IER(port, imr); /* set interrupts back the way they were */ +} + +/* + * If the port was already initialised (eg, by a boot loader), try to determine + * the current setup. + */ +static void __init at91_console_get_options(struct uart_port *port, int *baud, int *parity, int *bits) +{ + unsigned int mr, quot; + +// TODO: CR is a write-only register +// unsigned int cr; +// +// cr = UART_GET_CR(port) & (AT91_US_RXEN | AT91_US_TXEN); +// if (cr == (AT91_US_RXEN | AT91_US_TXEN)) { +// /* ok, the port was enabled */ +// } + + mr = UART_GET_MR(port) & AT91_US_CHRL; + if (mr == AT91_US_CHRL_8) + *bits = 8; + else + *bits = 7; + + mr = UART_GET_MR(port) & AT91_US_PAR; + if (mr == AT91_US_PAR_EVEN) + *parity = 'e'; + else if (mr == AT91_US_PAR_ODD) + *parity = 'o'; + + quot = UART_GET_BRGR(port); + *baud = port->uartclk / (16 * (quot)); +} + +static int __init at91_console_setup(struct console *co, char *options) +{ + struct uart_port *port; + int baud = 115200; + int bits = 8; + int parity = 'n'; + int flow = 'n'; + + /* + * Check whether an invalid uart number has been specified, and + * if so, search for the first available port that does have + * console support. + */ + port = uart_get_console(at91_ports, AT91_NR_UART, co); + + /* + * Enable the serial console, in-case bootloader did not do it. + */ + at91_sys_write(AT91_PMC_PCER, 1 << port->irq); /* enable clock */ + UART_PUT_IDR(port, -1); /* disable interrupts */ + UART_PUT_CR(port, AT91_US_RSTSTA | AT91_US_RSTRX); + UART_PUT_CR(port, AT91_US_TXEN | AT91_US_RXEN); + + if (options) + uart_parse_options(options, &baud, &parity, &bits, &flow); + else + at91_console_get_options(port, &baud, &parity, &bits); + + return uart_set_options(port, co, baud, parity, bits, flow); +} + +static struct uart_driver at91_uart; + +static struct console at91_console = { + .name = AT91_DEVICENAME, + .write = at91_console_write, + .device = uart_console_device, + .setup = at91_console_setup, + .flags = CON_PRINTBUFFER, + .index = -1, + .data = &at91_uart, +}; + +#define AT91_CONSOLE_DEVICE &at91_console + +static int __init at91_console_init(void) +{ + at91_init_ports(); + + at91_console.index = at91_console_port; + register_console(&at91_console); + return 0; +} +console_initcall(at91_console_init); + +#else +#define AT91_CONSOLE_DEVICE NULL +#endif + +static struct uart_driver at91_uart = { + .owner = THIS_MODULE, + .driver_name = AT91_DEVICENAME, + .dev_name = AT91_DEVICENAME, + .devfs_name = AT91_DEVICENAME, + .major = SERIAL_AT91_MAJOR, + .minor = MINOR_START, + .nr = AT91_NR_UART, + .cons = AT91_CONSOLE_DEVICE, +}; + +static int __init at91_serial_init(void) +{ + int ret, i; + + at91_init_ports(); + + ret = uart_register_driver(&at91_uart); + if (ret) + return ret; + + for (i = 0; i < AT91_NR_UART; i++) { + if (at91_serial_map[i] >= 0) + uart_add_one_port(&at91_uart, &at91_ports[i]); + } + + return 0; +} + +static void __exit at91_serial_exit(void) +{ + int i; + + for (i = 0; i < AT91_NR_UART; i++) { + if (at91_serial_map[i] >= 0) + uart_remove_one_port(&at91_uart, &at91_ports[i]); + } + + uart_unregister_driver(&at91_uart); +} + +module_init(at91_serial_init); +module_exit(at91_serial_exit); + +MODULE_AUTHOR("Rick Bronson"); +MODULE_DESCRIPTION("AT91 generic serial port driver"); +MODULE_LICENSE("GPL"); diff --git a/include/asm-arm/arch-at91rm9200/at91rm9200_pdc.h b/include/asm-arm/arch-at91rm9200/at91rm9200_pdc.h new file mode 100644 index 000000000000..ce1150d4438d --- /dev/null +++ b/include/asm-arm/arch-at91rm9200/at91rm9200_pdc.h @@ -0,0 +1,36 @@ +/* + * include/asm-arm/arch-at91rm9200/at91rm9200_pdc.h + * + * Copyright (C) 2005 Ivan Kokshaysky + * Copyright (C) SAN People + * + * Peripheral Data Controller (PDC) registers. + * Based on AT91RM9200 datasheet revision E. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + */ + +#ifndef AT91RM9200_PDC_H +#define AT91RM9200_PDC_H + +#define AT91_PDC_RPR 0x100 /* Receive Pointer Register */ +#define AT91_PDC_RCR 0x104 /* Receive Counter Register */ +#define AT91_PDC_TPR 0x108 /* Transmit Pointer Register */ +#define AT91_PDC_TCR 0x10c /* Transmit Counter Register */ +#define AT91_PDC_RNPR 0x110 /* Receive Next Pointer Register */ +#define AT91_PDC_RNCR 0x114 /* Receive Next Counter Register */ +#define AT91_PDC_TNPR 0x118 /* Transmit Next Pointer Register */ +#define AT91_PDC_TNCR 0x11c /* Transmit Next Counter Register */ + +#define AT91_PDC_PTCR 0x120 /* Transfer Control Register */ +#define AT91_PDC_RXTEN (1 << 0) /* Receiver Transfer Enable */ +#define AT91_PDC_RXTDIS (1 << 1) /* Receiver Transfer Disable */ +#define AT91_PDC_TXTEN (1 << 8) /* Transmitter Transfer Enable */ +#define AT91_PDC_TXTDIS (1 << 9) /* Transmitter Transfer Disable */ + +#define AT91_PDC_PTSR 0x124 /* Transfer Status Register */ + +#endif diff --git a/include/asm-arm/arch-at91rm9200/at91rm9200_usart.h b/include/asm-arm/arch-at91rm9200/at91rm9200_usart.h new file mode 100644 index 000000000000..79f851e31b9c --- /dev/null +++ b/include/asm-arm/arch-at91rm9200/at91rm9200_usart.h @@ -0,0 +1,123 @@ +/* + * include/asm-arm/arch-at91rm9200/at91rm9200_usart.h + * + * Copyright (C) 2005 Ivan Kokshaysky + * Copyright (C) SAN People + * + * USART registers. + * Based on AT91RM9200 datasheet revision E. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + */ + +#ifndef AT91RM9200_USART_H +#define AT91RM9200_USART_H + +#define AT91_US_CR 0x00 /* Control Register */ +#define AT91_US_RSTRX (1 << 2) /* Reset Receiver */ +#define AT91_US_RSTTX (1 << 3) /* Reset Transmitter */ +#define AT91_US_RXEN (1 << 4) /* Receiver Enable */ +#define AT91_US_RXDIS (1 << 5) /* Receiver Disable */ +#define AT91_US_TXEN (1 << 6) /* Transmitter Enable */ +#define AT91_US_TXDIS (1 << 7) /* Transmitter Disable */ +#define AT91_US_RSTSTA (1 << 8) /* Reset Status Bits */ +#define AT91_US_STTBRK (1 << 9) /* Start Break */ +#define AT91_US_STPBRK (1 << 10) /* Stop Break */ +#define AT91_US_STTTO (1 << 11) /* Start Time-out */ +#define AT91_US_SENDA (1 << 12) /* Send Address */ +#define AT91_US_RSTIT (1 << 13) /* Reset Iterations */ +#define AT91_US_RSTNACK (1 << 14) /* Reset Non Acknowledge */ +#define AT91_US_RETTO (1 << 15) /* Rearm Time-out */ +#define AT91_US_DTREN (1 << 16) /* Data Terminal Ready Enable */ +#define AT91_US_DTRDIS (1 << 17) /* Data Terminal Ready Disable */ +#define AT91_US_RTSEN (1 << 18) /* Request To Send Enable */ +#define AT91_US_RTSDIS (1 << 19) /* Request To Send Disable */ + +#define AT91_US_MR 0x04 /* Mode Register */ +#define AT91_US_USMODE (0xf << 0) /* Mode of the USART */ +#define AT91_US_USMODE_NORMAL 0 +#define AT91_US_USMODE_RS485 1 +#define AT91_US_USMODE_HWHS 2 +#define AT91_US_USMODE_MODEM 3 +#define AT91_US_USMODE_ISO7816_T0 4 +#define AT91_US_USMODE_ISO7816_T1 6 +#define AT91_US_USMODE_IRDA 8 +#define AT91_US_USCLKS (3 << 4) /* Clock Selection */ +#define AT91_US_CHRL (3 << 6) /* Character Length */ +#define AT91_US_CHRL_5 (0 << 6) +#define AT91_US_CHRL_6 (1 << 6) +#define AT91_US_CHRL_7 (2 << 6) +#define AT91_US_CHRL_8 (3 << 6) +#define AT91_US_SYNC (1 << 8) /* Synchronous Mode Select */ +#define AT91_US_PAR (7 << 9) /* Parity Type */ +#define AT91_US_PAR_EVEN (0 << 9) +#define AT91_US_PAR_ODD (1 << 9) +#define AT91_US_PAR_SPACE (2 << 9) +#define AT91_US_PAR_MARK (3 << 9) +#define AT91_US_PAR_NONE (4 << 9) +#define AT91_US_PAR_MULTI_DROP (6 << 9) +#define AT91_US_NBSTOP (3 << 12) /* Number of Stop Bits */ +#define AT91_US_NBSTOP_1 (0 << 12) +#define AT91_US_NBSTOP_1_5 (1 << 12) +#define AT91_US_NBSTOP_2 (2 << 12) +#define AT91_US_CHMODE (3 << 14) /* Channel Mode */ +#define AT91_US_CHMODE_NORMAL (0 << 14) +#define AT91_US_CHMODE_ECHO (1 << 14) +#define AT91_US_CHMODE_LOC_LOOP (2 << 14) +#define AT91_US_CHMODE_REM_LOOP (3 << 14) +#define AT91_US_MSBF (1 << 16) /* Bit Order */ +#define AT91_US_MODE9 (1 << 17) /* 9-bit Character Length */ +#define AT91_US_CLKO (1 << 18) /* Clock Output Select */ +#define AT91_US_OVER (1 << 19) /* Oversampling Mode */ +#define AT91_US_INACK (1 << 20) /* Inhibit Non Acknowledge */ +#define AT91_US_DSNACK (1 << 21) /* Disable Successive NACK */ +#define AT91_US_MAX_ITER (7 << 24) /* Max Iterations */ +#define AT91_US_FILTER (1 << 28) /* Infrared Receive Line Filter */ + +#define AT91_US_IER 0x08 /* Interrupt Enable Register */ +#define AT91_US_RXRDY (1 << 0) /* Receiver Ready */ +#define AT91_US_TXRDY (1 << 1) /* Transmitter Ready */ +#define AT91_US_RXBRK (1 << 2) /* Break Received / End of Break */ +#define AT91_US_ENDRX (1 << 3) /* End of Receiver Transfer */ +#define AT91_US_ENDTX (1 << 4) /* End of Transmitter Transfer */ +#define AT91_US_OVRE (1 << 5) /* Overrun Error */ +#define AT91_US_FRAME (1 << 6) /* Framing Error */ +#define AT91_US_PARE (1 << 7) /* Parity Error */ +#define AT91_US_TIMEOUT (1 << 8) /* Receiver Time-out */ +#define AT91_US_TXEMPTY (1 << 9) /* Transmitter Empty */ +#define AT91_US_ITERATION (1 << 10) /* Max number of Repetitions Reached */ +#define AT91_US_TXBUFE (1 << 11) /* Transmission Buffer Empty */ +#define AT91_US_RXBUFF (1 << 12) /* Reception Buffer Full */ +#define AT91_US_NACK (1 << 13) /* Non Acknowledge */ +#define AT91_US_RIIC (1 << 16) /* Ring Indicator Input Change */ +#define AT91_US_DSRIC (1 << 17) /* Data Set Ready Input Change */ +#define AT91_US_DCDIC (1 << 18) /* Data Carrier Detect Input Change */ +#define AT91_US_CTSIC (1 << 19) /* Clear to Send Input Change */ +#define AT91_US_RI (1 << 20) /* RI */ +#define AT91_US_DSR (1 << 21) /* DSR */ +#define AT91_US_DCD (1 << 22) /* DCD */ +#define AT91_US_CTS (1 << 23) /* CTS */ + +#define AT91_US_IDR 0x0c /* Interrupt Disable Register */ +#define AT91_US_IMR 0x10 /* Interrupt Mask Register */ +#define AT91_US_CSR 0x14 /* Channel Status Register */ +#define AT91_US_RHR 0x18 /* Receiver Holding Register */ +#define AT91_US_THR 0x1c /* Transmitter Holding Register */ + +#define AT91_US_BRGR 0x20 /* Baud Rate Generator Register */ +#define AT91_US_CD (0xffff << 0) /* Clock Divider */ + +#define AT91_US_RTOR 0x24 /* Receiver Time-out Register */ +#define AT91_US_TO (0xffff << 0) /* Time-out Value */ + +#define AT91_US_TTGR 0x28 /* Transmitter Timeguard Register */ +#define AT91_US_TG (0xff << 0) /* Timeguard Value */ + +#define AT91_US_FIDI 0x40 /* FI DI Ratio Register */ +#define AT91_US_NER 0x44 /* Number of Errors Register */ +#define AT91_US_IF 0x4c /* IrDA Filter Register */ + +#endif diff --git a/include/asm-arm/mach/serial_at91rm9200.h b/include/asm-arm/mach/serial_at91rm9200.h new file mode 100644 index 000000000000..98f4b0cb883c --- /dev/null +++ b/include/asm-arm/mach/serial_at91rm9200.h @@ -0,0 +1,36 @@ +/* + * linux/include/asm-arm/mach/serial_at91rm9200.h + * + * Based on serial_sa1100.h by Nicolas Pitre + * + * Copyright (C) 2002 ATMEL Rousset + * + * Low level machine dependent UART functions. + */ +#include + +struct uart_port; + +/* + * This is a temporary structure for registering these + * functions; it is intended to be discarded after boot. + */ +struct at91rm9200_port_fns { + void (*set_mctrl)(struct uart_port *, u_int); + u_int (*get_mctrl)(struct uart_port *); + void (*enable_ms)(struct uart_port *); + void (*pm)(struct uart_port *, u_int, u_int); + int (*set_wake)(struct uart_port *, u_int); + int (*open)(struct uart_port *); + void (*close)(struct uart_port *); +}; + +#if defined(CONFIG_SERIAL_AT91) +void at91_register_uart_fns(struct at91rm9200_port_fns *fns); +void at91_register_uart(int idx, int port); +#else +#define at91_register_uart_fns(fns) do { } while (0) +#define at91_register_uart(idx,port) do { } while (0) +#endif + + diff --git a/include/linux/serial_core.h b/include/linux/serial_core.h index e3710d7e260a..a8187c3c8a7b 100644 --- a/include/linux/serial_core.h +++ b/include/linux/serial_core.h @@ -67,6 +67,9 @@ /* Parisc type numbers. */ #define PORT_MUX 48 +/* Atmel AT91RM9200 SoC */ +#define PORT_AT91RM9200 49 + /* Macintosh Zilog type numbers */ #define PORT_MAC_ZILOG 50 /* m68k : not yet implemented */ #define PORT_PMAC_ZILOG 51 -- cgit v1.2.3-71-gd317 From bb94aa169eaa6e713a429370d37388722f08666f Mon Sep 17 00:00:00 2001 From: Patrick McHardy Date: Mon, 9 Jan 2006 16:43:13 -0800 Subject: [NETFILTER]: net/ipv[46]/netfilter.c cleanups Don't wrap entire file in #ifdef CONFIG_NETFILTER, remove a few unneccessary includes. Signed-off-by: Patrick McHardy Signed-off-by: David S. Miller --- include/linux/netfilter_ipv6.h | 5 +++++ net/ipv4/Makefile | 4 ++-- net/ipv4/netfilter.c | 10 ---------- net/ipv6/Makefile | 5 ++--- net/ipv6/netfilter.c | 19 ++----------------- 5 files changed, 11 insertions(+), 32 deletions(-) (limited to 'include/linux') diff --git a/include/linux/netfilter_ipv6.h b/include/linux/netfilter_ipv6.h index 53b2983f6278..14f2bd010884 100644 --- a/include/linux/netfilter_ipv6.h +++ b/include/linux/netfilter_ipv6.h @@ -72,7 +72,12 @@ enum nf_ip6_hook_priorities { NF_IP6_PRI_LAST = INT_MAX, }; +#ifdef CONFIG_NETFILTER extern int ipv6_netfilter_init(void); extern void ipv6_netfilter_fini(void); +#else /* CONFIG_NETFILTER */ +static inline int ipv6_netfilter_init(void) { return 0; } +static inline void ipv6_netfilter_fini(void) { return; } +#endif /* CONFIG_NETFILTER */ #endif /*__LINUX_IP6_NETFILTER_H*/ diff --git a/net/ipv4/Makefile b/net/ipv4/Makefile index c54edd76de09..35e5f5999092 100644 --- a/net/ipv4/Makefile +++ b/net/ipv4/Makefile @@ -9,7 +9,7 @@ obj-y := route.o inetpeer.o protocol.o \ tcp.o tcp_input.o tcp_output.o tcp_timer.o tcp_ipv4.o \ tcp_minisocks.o tcp_cong.o \ datagram.o raw.o udp.o arp.o icmp.o devinet.o af_inet.o igmp.o \ - sysctl_net_ipv4.o fib_frontend.o fib_semantics.o netfilter.o + sysctl_net_ipv4.o fib_frontend.o fib_semantics.o obj-$(CONFIG_IP_FIB_HASH) += fib_hash.o obj-$(CONFIG_IP_FIB_TRIE) += fib_trie.o @@ -28,7 +28,7 @@ obj-$(CONFIG_IP_ROUTE_MULTIPATH_RR) += multipath_rr.o obj-$(CONFIG_IP_ROUTE_MULTIPATH_RANDOM) += multipath_random.o obj-$(CONFIG_IP_ROUTE_MULTIPATH_WRANDOM) += multipath_wrandom.o obj-$(CONFIG_IP_ROUTE_MULTIPATH_DRR) += multipath_drr.o -obj-$(CONFIG_NETFILTER) += netfilter/ +obj-$(CONFIG_NETFILTER) += netfilter.o netfilter/ obj-$(CONFIG_IP_VS) += ipvs/ obj-$(CONFIG_INET_DIAG) += inet_diag.o obj-$(CONFIG_IP_ROUTE_MULTIPATH_CACHED) += multipath.o diff --git a/net/ipv4/netfilter.c b/net/ipv4/netfilter.c index 3321092b0914..52a3d7c57907 100644 --- a/net/ipv4/netfilter.c +++ b/net/ipv4/netfilter.c @@ -1,16 +1,8 @@ /* IPv4 specific functions of netfilter core */ - -#include -#ifdef CONFIG_NETFILTER - #include #include #include - #include -#include -#include -#include #include #include #include @@ -146,5 +138,3 @@ static void fini(void) module_init(init); module_exit(fini); - -#endif /* CONFIG_NETFILTER */ diff --git a/net/ipv6/Makefile b/net/ipv6/Makefile index 9601fd7f9d66..bf18cff13120 100644 --- a/net/ipv6/Makefile +++ b/net/ipv6/Makefile @@ -8,8 +8,7 @@ ipv6-objs := af_inet6.o anycast.o ip6_output.o ip6_input.o addrconf.o sit.o \ route.o ip6_fib.o ipv6_sockglue.o ndisc.o udp.o raw.o \ protocol.o icmp.o mcast.o reassembly.o tcp_ipv6.o \ exthdrs.o sysctl_net_ipv6.o datagram.o proc.o \ - ip6_flowlabel.o ipv6_syms.o netfilter.o \ - inet6_connection_sock.o + ip6_flowlabel.o ipv6_syms.o inet6_connection_sock.o ipv6-$(CONFIG_XFRM) += xfrm6_policy.o xfrm6_state.o xfrm6_input.o \ xfrm6_output.o @@ -19,7 +18,7 @@ obj-$(CONFIG_INET6_AH) += ah6.o obj-$(CONFIG_INET6_ESP) += esp6.o obj-$(CONFIG_INET6_IPCOMP) += ipcomp6.o obj-$(CONFIG_INET6_TUNNEL) += xfrm6_tunnel.o -obj-$(CONFIG_NETFILTER) += netfilter/ +obj-$(CONFIG_NETFILTER) += netfilter.o netfilter/ obj-$(CONFIG_IPV6_TUNNEL) += ip6_tunnel.o diff --git a/net/ipv6/netfilter.c b/net/ipv6/netfilter.c index b63678328a3b..1ab62f033664 100644 --- a/net/ipv6/netfilter.c +++ b/net/ipv6/netfilter.c @@ -1,9 +1,5 @@ -#include -#include - -#ifdef CONFIG_NETFILTER - #include +#include #include #include #include @@ -94,18 +90,7 @@ int __init ipv6_netfilter_init(void) return nf_register_queue_rerouter(PF_INET6, &ip6_reroute); } -void ipv6_netfilter_fini(void) +void __exit ipv6_netfilter_fini(void) { nf_unregister_queue_rerouter(PF_INET6); } - -#else /* CONFIG_NETFILTER */ -int __init ipv6_netfilter_init(void) -{ - return 0; -} - -void ipv6_netfilter_fini(void) -{ -} -#endif /* CONFIG_NETFILTER */ -- cgit v1.2.3-71-gd317 From 9d28026b7ec0f3e2a407d5c03fcb37d0b59d1add Mon Sep 17 00:00:00 2001 From: Patrick McHardy Date: Mon, 9 Jan 2006 16:44:36 -0800 Subject: [NETFILTER]: Remove unused function from NAT protocol helpers ->print and ->print_range are not used (and apparently never were). Signed-off-by: Patrick McHardy Signed-off-by: David S. Miller --- include/linux/netfilter_ipv4/ip_nat_protocol.h | 7 ----- net/ipv4/netfilter/ip_nat_proto_gre.c | 38 -------------------------- net/ipv4/netfilter/ip_nat_proto_icmp.c | 34 ----------------------- net/ipv4/netfilter/ip_nat_proto_tcp.c | 36 ------------------------ net/ipv4/netfilter/ip_nat_proto_udp.c | 36 ------------------------ net/ipv4/netfilter/ip_nat_proto_unknown.c | 16 ----------- 6 files changed, 167 deletions(-) (limited to 'include/linux') diff --git a/include/linux/netfilter_ipv4/ip_nat_protocol.h b/include/linux/netfilter_ipv4/ip_nat_protocol.h index ef63aa991a06..612a43614e7b 100644 --- a/include/linux/netfilter_ipv4/ip_nat_protocol.h +++ b/include/linux/netfilter_ipv4/ip_nat_protocol.h @@ -42,13 +42,6 @@ struct ip_nat_protocol enum ip_nat_manip_type maniptype, const struct ip_conntrack *conntrack); - unsigned int (*print)(char *buffer, - const struct ip_conntrack_tuple *match, - const struct ip_conntrack_tuple *mask); - - unsigned int (*print_range)(char *buffer, - const struct ip_nat_range *range); - int (*range_to_nfattr)(struct sk_buff *skb, const struct ip_nat_range *range); diff --git a/net/ipv4/netfilter/ip_nat_proto_gre.c b/net/ipv4/netfilter/ip_nat_proto_gre.c index f7cad7cf1aec..6c4899d8046a 100644 --- a/net/ipv4/netfilter/ip_nat_proto_gre.c +++ b/net/ipv4/netfilter/ip_nat_proto_gre.c @@ -151,42 +151,6 @@ gre_manip_pkt(struct sk_buff **pskb, return 1; } -/* print out a nat tuple */ -static unsigned int -gre_print(char *buffer, - const struct ip_conntrack_tuple *match, - const struct ip_conntrack_tuple *mask) -{ - unsigned int len = 0; - - if (mask->src.u.gre.key) - len += sprintf(buffer + len, "srckey=0x%x ", - ntohl(match->src.u.gre.key)); - - if (mask->dst.u.gre.key) - len += sprintf(buffer + len, "dstkey=0x%x ", - ntohl(match->src.u.gre.key)); - - return len; -} - -/* print a range of keys */ -static unsigned int -gre_print_range(char *buffer, const struct ip_nat_range *range) -{ - if (range->min.gre.key != 0 - || range->max.gre.key != 0xFFFF) { - if (range->min.gre.key == range->max.gre.key) - return sprintf(buffer, "key 0x%x ", - ntohl(range->min.gre.key)); - else - return sprintf(buffer, "keys 0x%u-0x%u ", - ntohl(range->min.gre.key), - ntohl(range->max.gre.key)); - } else - return 0; -} - /* nat helper struct */ static struct ip_nat_protocol gre = { .name = "GRE", @@ -194,8 +158,6 @@ static struct ip_nat_protocol gre = { .manip_pkt = gre_manip_pkt, .in_range = gre_in_range, .unique_tuple = gre_unique_tuple, - .print = gre_print, - .print_range = gre_print_range, #if defined(CONFIG_IP_NF_CONNTRACK_NETLINK) || \ defined(CONFIG_IP_NF_CONNTRACK_NETLINK_MODULE) .range_to_nfattr = ip_nat_port_range_to_nfattr, diff --git a/net/ipv4/netfilter/ip_nat_proto_icmp.c b/net/ipv4/netfilter/ip_nat_proto_icmp.c index 938719043999..31a3f4ccb99c 100644 --- a/net/ipv4/netfilter/ip_nat_proto_icmp.c +++ b/net/ipv4/netfilter/ip_nat_proto_icmp.c @@ -74,38 +74,6 @@ icmp_manip_pkt(struct sk_buff **pskb, return 1; } -static unsigned int -icmp_print(char *buffer, - const struct ip_conntrack_tuple *match, - const struct ip_conntrack_tuple *mask) -{ - unsigned int len = 0; - - if (mask->src.u.icmp.id) - len += sprintf(buffer + len, "id=%u ", - ntohs(match->src.u.icmp.id)); - - if (mask->dst.u.icmp.type) - len += sprintf(buffer + len, "type=%u ", - ntohs(match->dst.u.icmp.type)); - - if (mask->dst.u.icmp.code) - len += sprintf(buffer + len, "code=%u ", - ntohs(match->dst.u.icmp.code)); - - return len; -} - -static unsigned int -icmp_print_range(char *buffer, const struct ip_nat_range *range) -{ - if (range->min.icmp.id != 0 || range->max.icmp.id != 0xFFFF) - return sprintf(buffer, "id %u-%u ", - ntohs(range->min.icmp.id), - ntohs(range->max.icmp.id)); - else return 0; -} - struct ip_nat_protocol ip_nat_protocol_icmp = { .name = "ICMP", .protonum = IPPROTO_ICMP, @@ -113,8 +81,6 @@ struct ip_nat_protocol ip_nat_protocol_icmp = { .manip_pkt = icmp_manip_pkt, .in_range = icmp_in_range, .unique_tuple = icmp_unique_tuple, - .print = icmp_print, - .print_range = icmp_print_range, #if defined(CONFIG_IP_NF_CONNTRACK_NETLINK) || \ defined(CONFIG_IP_NF_CONNTRACK_NETLINK_MODULE) .range_to_nfattr = ip_nat_port_range_to_nfattr, diff --git a/net/ipv4/netfilter/ip_nat_proto_tcp.c b/net/ipv4/netfilter/ip_nat_proto_tcp.c index 1d381bf68574..a3d14079eba6 100644 --- a/net/ipv4/netfilter/ip_nat_proto_tcp.c +++ b/net/ipv4/netfilter/ip_nat_proto_tcp.c @@ -136,40 +136,6 @@ tcp_manip_pkt(struct sk_buff **pskb, return 1; } -static unsigned int -tcp_print(char *buffer, - const struct ip_conntrack_tuple *match, - const struct ip_conntrack_tuple *mask) -{ - unsigned int len = 0; - - if (mask->src.u.tcp.port) - len += sprintf(buffer + len, "srcpt=%u ", - ntohs(match->src.u.tcp.port)); - - - if (mask->dst.u.tcp.port) - len += sprintf(buffer + len, "dstpt=%u ", - ntohs(match->dst.u.tcp.port)); - - return len; -} - -static unsigned int -tcp_print_range(char *buffer, const struct ip_nat_range *range) -{ - if (range->min.tcp.port != 0 || range->max.tcp.port != 0xFFFF) { - if (range->min.tcp.port == range->max.tcp.port) - return sprintf(buffer, "port %u ", - ntohs(range->min.tcp.port)); - else - return sprintf(buffer, "ports %u-%u ", - ntohs(range->min.tcp.port), - ntohs(range->max.tcp.port)); - } - else return 0; -} - struct ip_nat_protocol ip_nat_protocol_tcp = { .name = "TCP", .protonum = IPPROTO_TCP, @@ -177,8 +143,6 @@ struct ip_nat_protocol ip_nat_protocol_tcp = { .manip_pkt = tcp_manip_pkt, .in_range = tcp_in_range, .unique_tuple = tcp_unique_tuple, - .print = tcp_print, - .print_range = tcp_print_range, #if defined(CONFIG_IP_NF_CONNTRACK_NETLINK) || \ defined(CONFIG_IP_NF_CONNTRACK_NETLINK_MODULE) .range_to_nfattr = ip_nat_port_range_to_nfattr, diff --git a/net/ipv4/netfilter/ip_nat_proto_udp.c b/net/ipv4/netfilter/ip_nat_proto_udp.c index c4906e1aa24a..ec6053fdc867 100644 --- a/net/ipv4/netfilter/ip_nat_proto_udp.c +++ b/net/ipv4/netfilter/ip_nat_proto_udp.c @@ -122,40 +122,6 @@ udp_manip_pkt(struct sk_buff **pskb, return 1; } -static unsigned int -udp_print(char *buffer, - const struct ip_conntrack_tuple *match, - const struct ip_conntrack_tuple *mask) -{ - unsigned int len = 0; - - if (mask->src.u.udp.port) - len += sprintf(buffer + len, "srcpt=%u ", - ntohs(match->src.u.udp.port)); - - - if (mask->dst.u.udp.port) - len += sprintf(buffer + len, "dstpt=%u ", - ntohs(match->dst.u.udp.port)); - - return len; -} - -static unsigned int -udp_print_range(char *buffer, const struct ip_nat_range *range) -{ - if (range->min.udp.port != 0 || range->max.udp.port != 0xFFFF) { - if (range->min.udp.port == range->max.udp.port) - return sprintf(buffer, "port %u ", - ntohs(range->min.udp.port)); - else - return sprintf(buffer, "ports %u-%u ", - ntohs(range->min.udp.port), - ntohs(range->max.udp.port)); - } - else return 0; -} - struct ip_nat_protocol ip_nat_protocol_udp = { .name = "UDP", .protonum = IPPROTO_UDP, @@ -163,8 +129,6 @@ struct ip_nat_protocol ip_nat_protocol_udp = { .manip_pkt = udp_manip_pkt, .in_range = udp_in_range, .unique_tuple = udp_unique_tuple, - .print = udp_print, - .print_range = udp_print_range, #if defined(CONFIG_IP_NF_CONNTRACK_NETLINK) || \ defined(CONFIG_IP_NF_CONNTRACK_NETLINK_MODULE) .range_to_nfattr = ip_nat_port_range_to_nfattr, diff --git a/net/ipv4/netfilter/ip_nat_proto_unknown.c b/net/ipv4/netfilter/ip_nat_proto_unknown.c index f0099a646a0b..3bf049517246 100644 --- a/net/ipv4/netfilter/ip_nat_proto_unknown.c +++ b/net/ipv4/netfilter/ip_nat_proto_unknown.c @@ -46,26 +46,10 @@ unknown_manip_pkt(struct sk_buff **pskb, return 1; } -static unsigned int -unknown_print(char *buffer, - const struct ip_conntrack_tuple *match, - const struct ip_conntrack_tuple *mask) -{ - return 0; -} - -static unsigned int -unknown_print_range(char *buffer, const struct ip_nat_range *range) -{ - return 0; -} - struct ip_nat_protocol ip_nat_unknown_protocol = { .name = "unknown", /* .me isn't set: getting a ref to this cannot fail. */ .manip_pkt = unknown_manip_pkt, .in_range = unknown_in_range, .unique_tuple = unknown_unique_tuple, - .print = unknown_print, - .print_range = unknown_print_range }; -- cgit v1.2.3-71-gd317 From 8039de10aae3cd4cf0ef0ccebd58aff0e8810df2 Mon Sep 17 00:00:00 2001 From: Helge Deller Date: Tue, 10 Jan 2006 20:35:03 -0500 Subject: [PARISC] Add __read_mostly section for parisc Flag a whole bunch of things as __read_mostly on parisc. Also flag a few branches as unlikely() and cleanup a bit of code. Signed-off-by: Helge Deller Signed-off-by: Kyle McMartin --- arch/parisc/kernel/cache.c | 12 ++++++------ arch/parisc/kernel/drivers.c | 2 +- arch/parisc/kernel/firmware.c | 2 +- arch/parisc/kernel/inventory.c | 6 +++--- arch/parisc/kernel/pci-dma.c | 6 +++--- arch/parisc/kernel/pdc_chassis.c | 13 +++++++------ arch/parisc/kernel/perf.c | 6 +++--- arch/parisc/kernel/process.c | 2 +- arch/parisc/kernel/processor.c | 8 ++++---- arch/parisc/kernel/setup.c | 10 +++++----- arch/parisc/kernel/smp.c | 8 ++++---- arch/parisc/kernel/time.c | 4 ++-- arch/parisc/kernel/topology.c | 3 ++- arch/parisc/kernel/unaligned.c | 2 +- arch/parisc/kernel/unwind.c | 2 +- arch/parisc/kernel/vmlinux.lds.S | 4 ++++ arch/parisc/mm/init.c | 20 ++++++++++---------- drivers/parisc/eisa.c | 4 ++-- drivers/parisc/lasi.c | 2 +- drivers/parisc/lba_pci.c | 2 +- drivers/parisc/led.c | 18 +++++++++--------- drivers/parisc/pdc_stable.c | 2 +- drivers/parisc/power.c | 12 ++++++------ include/linux/cache.h | 2 +- 24 files changed, 79 insertions(+), 73 deletions(-) (limited to 'include/linux') diff --git a/arch/parisc/kernel/cache.c b/arch/parisc/kernel/cache.c index a065349aee37..63047c6d2d04 100644 --- a/arch/parisc/kernel/cache.c +++ b/arch/parisc/kernel/cache.c @@ -29,9 +29,9 @@ #include #include -int split_tlb; -int dcache_stride; -int icache_stride; +int split_tlb __read_mostly; +int dcache_stride __read_mostly; +int icache_stride __read_mostly; EXPORT_SYMBOL(dcache_stride); @@ -45,9 +45,9 @@ DEFINE_SPINLOCK(pa_tlb_lock); EXPORT_SYMBOL(pa_tlb_lock); #endif -struct pdc_cache_info cache_info; +struct pdc_cache_info cache_info __read_mostly; #ifndef CONFIG_PA20 -static struct pdc_btlb_info btlb_info; +static struct pdc_btlb_info btlb_info __read_mostly; #endif #ifdef CONFIG_SMP @@ -332,7 +332,7 @@ void clear_user_page_asm(void *page, unsigned long vaddr) } #define FLUSH_THRESHOLD 0x80000 /* 0.5MB */ -int parisc_cache_flush_threshold = FLUSH_THRESHOLD; +int parisc_cache_flush_threshold __read_mostly = FLUSH_THRESHOLD; void parisc_setup_cache_timing(void) { diff --git a/arch/parisc/kernel/drivers.c b/arch/parisc/kernel/drivers.c index d016d672ec2b..041524d24ef1 100644 --- a/arch/parisc/kernel/drivers.c +++ b/arch/parisc/kernel/drivers.c @@ -39,7 +39,7 @@ #include /* See comments in include/asm-parisc/pci.h */ -struct hppa_dma_ops *hppa_dma_ops; +struct hppa_dma_ops *hppa_dma_ops __read_mostly; EXPORT_SYMBOL(hppa_dma_ops); static struct device root = { diff --git a/arch/parisc/kernel/firmware.c b/arch/parisc/kernel/firmware.c index 553f8fe03224..2dc06b8e1817 100644 --- a/arch/parisc/kernel/firmware.c +++ b/arch/parisc/kernel/firmware.c @@ -80,7 +80,7 @@ static unsigned long pdc_result2[32] __attribute__ ((aligned (8))); /* Firmware needs to be initially set to narrow to determine the * actual firmware width. */ -int parisc_narrow_firmware = 1; +int parisc_narrow_firmware __read_mostly = 1; #endif /* On most currently-supported platforms, IODC I/O calls are 32-bit calls diff --git a/arch/parisc/kernel/inventory.c b/arch/parisc/kernel/inventory.c index 8f563871e83c..4e847ba53180 100644 --- a/arch/parisc/kernel/inventory.c +++ b/arch/parisc/kernel/inventory.c @@ -38,7 +38,7 @@ */ #undef DEBUG_PAT -int pdc_type = PDC_TYPE_ILLEGAL; +int pdc_type __read_mostly = PDC_TYPE_ILLEGAL; void __init setup_pdc(void) { @@ -120,8 +120,8 @@ set_pmem_entry(physmem_range_t *pmem_ptr, unsigned long start, * pdc info is bad in this case). */ - if ( ((start & (PAGE_SIZE - 1)) != 0) - || ((pages4k & ((1UL << PDC_PAGE_ADJ_SHIFT) - 1)) != 0) ) { + if (unlikely( ((start & (PAGE_SIZE - 1)) != 0) + || ((pages4k & ((1UL << PDC_PAGE_ADJ_SHIFT) - 1)) != 0) )) { panic("Memory range doesn't align with page size!\n"); } diff --git a/arch/parisc/kernel/pci-dma.c b/arch/parisc/kernel/pci-dma.c index f94a02ef3d95..a6caf1073085 100644 --- a/arch/parisc/kernel/pci-dma.c +++ b/arch/parisc/kernel/pci-dma.c @@ -33,10 +33,10 @@ #include #include /* for purge_tlb_*() macros */ -static struct proc_dir_entry * proc_gsc_root = NULL; +static struct proc_dir_entry * proc_gsc_root __read_mostly = NULL; static int pcxl_proc_info(char *buffer, char **start, off_t offset, int length); -static unsigned long pcxl_used_bytes = 0; -static unsigned long pcxl_used_pages = 0; +static unsigned long pcxl_used_bytes __read_mostly = 0; +static unsigned long pcxl_used_pages __read_mostly = 0; extern unsigned long pcxl_dma_start; /* Start of pcxl dma mapping area */ static spinlock_t pcxl_res_lock; diff --git a/arch/parisc/kernel/pdc_chassis.c b/arch/parisc/kernel/pdc_chassis.c index 52004ae28d20..2a01fe1bdc98 100644 --- a/arch/parisc/kernel/pdc_chassis.c +++ b/arch/parisc/kernel/pdc_chassis.c @@ -30,6 +30,7 @@ #include #include #include +#include #include #include @@ -38,8 +39,8 @@ #ifdef CONFIG_PDC_CHASSIS -static int pdc_chassis_old = 0; -static unsigned int pdc_chassis_enabled = 1; +static int pdc_chassis_old __read_mostly = 0; +static unsigned int pdc_chassis_enabled __read_mostly = 1; /** @@ -132,7 +133,7 @@ void __init parisc_pdc_chassis_init(void) { #ifdef CONFIG_PDC_CHASSIS int handle = 0; - if (pdc_chassis_enabled) { + if (likely(pdc_chassis_enabled)) { DPRINTK(KERN_DEBUG "%s: parisc_pdc_chassis_init()\n", __FILE__); /* Let see if we have something to handle... */ @@ -142,7 +143,7 @@ void __init parisc_pdc_chassis_init(void) printk(KERN_INFO "Enabling PDC_PAT chassis codes support.\n"); handle = 1; } - else if (pdc_chassis_old) { + else if (unlikely(pdc_chassis_old)) { printk(KERN_INFO "Enabling old style chassis LED panel support.\n"); handle = 1; } @@ -178,7 +179,7 @@ int pdc_chassis_send_status(int message) /* Maybe we should do that in an other way ? */ int retval = 0; #ifdef CONFIG_PDC_CHASSIS - if (pdc_chassis_enabled) { + if (likely(pdc_chassis_enabled)) { DPRINTK(KERN_DEBUG "%s: pdc_chassis_send_status(%d)\n", __FILE__, message); @@ -214,7 +215,7 @@ int pdc_chassis_send_status(int message) } } else retval = -1; #else - if (pdc_chassis_old) { + if (unlikely(pdc_chassis_old)) { switch (message) { case PDC_CHASSIS_DIRECT_BSTART: case PDC_CHASSIS_DIRECT_BCOMPLETE: diff --git a/arch/parisc/kernel/perf.c b/arch/parisc/kernel/perf.c index f6fec62b6a2f..79dcbcccecb8 100644 --- a/arch/parisc/kernel/perf.c +++ b/arch/parisc/kernel/perf.c @@ -66,10 +66,10 @@ struct rdr_tbl_ent { uint8_t write_control; }; -static int perf_processor_interface = UNKNOWN_INTF; -static int perf_enabled = 0; +static int perf_processor_interface __read_mostly = UNKNOWN_INTF; +static int perf_enabled __read_mostly = 0; static spinlock_t perf_lock; -struct parisc_device *cpu_device = NULL; +struct parisc_device *cpu_device __read_mostly = NULL; /* RDRs to write for PCX-W */ static int perf_rdrs_W[] = diff --git a/arch/parisc/kernel/process.c b/arch/parisc/kernel/process.c index fee4f1f09adc..4eb70a40ec7e 100644 --- a/arch/parisc/kernel/process.c +++ b/arch/parisc/kernel/process.c @@ -54,7 +54,7 @@ #include #include -static int hlt_counter; +static int hlt_counter __read_mostly; /* * Power off function, if any diff --git a/arch/parisc/kernel/processor.c b/arch/parisc/kernel/processor.c index 4f5bbcf1f5a4..6df9f62cecb5 100644 --- a/arch/parisc/kernel/processor.c +++ b/arch/parisc/kernel/processor.c @@ -44,10 +44,10 @@ #include /* for struct irq_region */ #include -struct system_cpuinfo_parisc boot_cpu_data; +struct system_cpuinfo_parisc boot_cpu_data __read_mostly; EXPORT_SYMBOL(boot_cpu_data); -struct cpuinfo_parisc cpu_data[NR_CPUS]; +struct cpuinfo_parisc cpu_data[NR_CPUS] __read_mostly; /* ** PARISC CPU driver - claim "device" and initialize CPU data structures. @@ -378,12 +378,12 @@ show_cpuinfo (struct seq_file *m, void *v) return 0; } -static struct parisc_device_id processor_tbl[] = { +static struct parisc_device_id processor_tbl[] __read_mostly = { { HPHW_NPROC, HVERSION_REV_ANY_ID, HVERSION_ANY_ID, SVERSION_ANY_ID }, { 0, } }; -static struct parisc_driver cpu_driver = { +static struct parisc_driver cpu_driver __read_mostly = { .name = "CPU", .id_table = processor_tbl, .probe = processor_probe diff --git a/arch/parisc/kernel/setup.c b/arch/parisc/kernel/setup.c index 73e9c34b0948..4a36ec3f6ac1 100644 --- a/arch/parisc/kernel/setup.c +++ b/arch/parisc/kernel/setup.c @@ -46,15 +46,15 @@ #include #include -char command_line[COMMAND_LINE_SIZE]; +char command_line[COMMAND_LINE_SIZE] __read_mostly; /* Intended for ccio/sba/cpu statistics under /proc/bus/{runway|gsc} */ -struct proc_dir_entry * proc_runway_root = NULL; -struct proc_dir_entry * proc_gsc_root = NULL; -struct proc_dir_entry * proc_mckinley_root = NULL; +struct proc_dir_entry * proc_runway_root __read_mostly = NULL; +struct proc_dir_entry * proc_gsc_root __read_mostly = NULL; +struct proc_dir_entry * proc_mckinley_root __read_mostly = NULL; #if !defined(CONFIG_PA20) && (defined(CONFIG_IOMMU_CCIO) || defined(CONFIG_IOMMU_SBA)) -int parisc_bus_is_phys = 1; /* Assume no IOMMU is present */ +int parisc_bus_is_phys __read_mostly = 1; /* Assume no IOMMU is present */ EXPORT_SYMBOL(parisc_bus_is_phys); #endif diff --git a/arch/parisc/kernel/smp.c b/arch/parisc/kernel/smp.c index ce89da0f654d..fb3ca84f1b97 100644 --- a/arch/parisc/kernel/smp.c +++ b/arch/parisc/kernel/smp.c @@ -58,9 +58,9 @@ DEFINE_SPINLOCK(smp_lock); volatile struct task_struct *smp_init_current_idle_task; -static volatile int cpu_now_booting = 0; /* track which CPU is booting */ +static volatile int cpu_now_booting __read_mostly = 0; /* track which CPU is booting */ -static int parisc_max_cpus = 1; +static int parisc_max_cpus __read_mostly = 1; /* online cpus are ones that we've managed to bring up completely * possible cpus are all valid cpu @@ -71,8 +71,8 @@ static int parisc_max_cpus = 1; * empty in the beginning. */ -cpumask_t cpu_online_map = CPU_MASK_NONE; /* Bitmap of online CPUs */ -cpumask_t cpu_possible_map = CPU_MASK_ALL; /* Bitmap of Present CPUs */ +cpumask_t cpu_online_map __read_mostly = CPU_MASK_NONE; /* Bitmap of online CPUs */ +cpumask_t cpu_possible_map __read_mostly = CPU_MASK_ALL; /* Bitmap of Present CPUs */ EXPORT_SYMBOL(cpu_online_map); EXPORT_SYMBOL(cpu_possible_map); diff --git a/arch/parisc/kernel/time.c b/arch/parisc/kernel/time.c index cded25680787..594930bc4bcf 100644 --- a/arch/parisc/kernel/time.c +++ b/arch/parisc/kernel/time.c @@ -36,8 +36,8 @@ /* xtime and wall_jiffies keep wall-clock time */ extern unsigned long wall_jiffies; -static long clocktick; /* timer cycles per tick */ -static long halftick; +static long clocktick __read_mostly; /* timer cycles per tick */ +static long halftick __read_mostly; #ifdef CONFIG_SMP extern void smp_do_timer(struct pt_regs *regs); diff --git a/arch/parisc/kernel/topology.c b/arch/parisc/kernel/topology.c index ac2a40681414..3ba040050e4c 100644 --- a/arch/parisc/kernel/topology.c +++ b/arch/parisc/kernel/topology.c @@ -20,8 +20,9 @@ #include #include #include +#include -static struct cpu cpu_devices[NR_CPUS]; +static struct cpu cpu_devices[NR_CPUS] __read_mostly; static int __init topology_init(void) { diff --git a/arch/parisc/kernel/unaligned.c b/arch/parisc/kernel/unaligned.c index eaae8a021f9f..de0a1b21cb40 100644 --- a/arch/parisc/kernel/unaligned.c +++ b/arch/parisc/kernel/unaligned.c @@ -122,7 +122,7 @@ #define ERR_NOTHANDLED -1 #define ERR_PAGEFAULT -2 -int unaligned_enabled = 1; +int unaligned_enabled __read_mostly = 1; void die_if_kernel (char *str, struct pt_regs *regs, long err); diff --git a/arch/parisc/kernel/unwind.c b/arch/parisc/kernel/unwind.c index db141108412e..cc1c1afc3187 100644 --- a/arch/parisc/kernel/unwind.c +++ b/arch/parisc/kernel/unwind.c @@ -35,7 +35,7 @@ static spinlock_t unwind_lock; * we can call unwind_init as early in the bootup process as * possible (before the slab allocator is initialized) */ -static struct unwind_table kernel_unwind_table; +static struct unwind_table kernel_unwind_table __read_mostly; static LIST_HEAD(unwind_tables); static inline const struct unwind_table_entry * diff --git a/arch/parisc/kernel/vmlinux.lds.S b/arch/parisc/kernel/vmlinux.lds.S index e5fac3e08c7a..b8b9174f6425 100644 --- a/arch/parisc/kernel/vmlinux.lds.S +++ b/arch/parisc/kernel/vmlinux.lds.S @@ -105,6 +105,10 @@ SECTIONS . = ALIGN(16); .data.lock_aligned : { *(.data.lock_aligned) } + /* rarely changed data like cpu maps */ + . = ALIGN(16); + .data.read_mostly : { *(.data.read_mostly) } + _edata = .; /* End of data section */ . = ALIGN(16384); /* init_task */ diff --git a/arch/parisc/mm/init.c b/arch/parisc/mm/init.c index 29b998e430e6..a992cb8cfe61 100644 --- a/arch/parisc/mm/init.c +++ b/arch/parisc/mm/init.c @@ -36,9 +36,9 @@ extern char _end; /* end of BSS, defined by linker */ extern char __init_begin, __init_end; #ifdef CONFIG_DISCONTIGMEM -struct node_map_data node_data[MAX_NUMNODES]; -bootmem_data_t bmem_data[MAX_NUMNODES]; -unsigned char pfnnid_map[PFNNID_MAP_MAX]; +struct node_map_data node_data[MAX_NUMNODES] __read_mostly; +bootmem_data_t bmem_data[MAX_NUMNODES] __read_mostly; +unsigned char pfnnid_map[PFNNID_MAP_MAX] __read_mostly; #endif static struct resource data_resource = { @@ -58,14 +58,14 @@ static struct resource pdcdata_resource = { .flags = IORESOURCE_BUSY | IORESOURCE_MEM, }; -static struct resource sysram_resources[MAX_PHYSMEM_RANGES]; +static struct resource sysram_resources[MAX_PHYSMEM_RANGES] __read_mostly; /* The following array is initialized from the firmware specific * information retrieved in kernel/inventory.c. */ -physmem_range_t pmem_ranges[MAX_PHYSMEM_RANGES]; -int npmem_ranges; +physmem_range_t pmem_ranges[MAX_PHYSMEM_RANGES] __read_mostly; +int npmem_ranges __read_mostly; #ifdef __LP64__ #define MAX_MEM (~0UL) @@ -73,7 +73,7 @@ int npmem_ranges; #define MAX_MEM (3584U*1024U*1024U) #endif /* !__LP64__ */ -static unsigned long mem_limit = MAX_MEM; +static unsigned long mem_limit __read_mostly = MAX_MEM; static void __init mem_limit_func(void) { @@ -431,11 +431,11 @@ void free_initmem(void) #define SET_MAP_OFFSET(x) ((void *)(((unsigned long)(x) + VM_MAP_OFFSET) \ & ~(VM_MAP_OFFSET-1))) -void *vmalloc_start; +void *vmalloc_start __read_mostly; EXPORT_SYMBOL(vmalloc_start); #ifdef CONFIG_PA11 -unsigned long pcxl_dma_start; +unsigned long pcxl_dma_start __read_mostly; #endif void __init mem_init(void) @@ -475,7 +475,7 @@ int do_check_pgt_cache(int low, int high) return 0; } -unsigned long *empty_zero_page; +unsigned long *empty_zero_page __read_mostly; void show_mem(void) { diff --git a/drivers/parisc/eisa.c b/drivers/parisc/eisa.c index 6362bf99eff6..3d94d86c1c9f 100644 --- a/drivers/parisc/eisa.c +++ b/drivers/parisc/eisa.c @@ -57,7 +57,7 @@ static DEFINE_SPINLOCK(eisa_irq_lock); -void __iomem *eisa_eeprom_addr; +void __iomem *eisa_eeprom_addr __read_mostly; /* We can only have one EISA adapter in the system because neither * implementation can be flexed. @@ -141,7 +141,7 @@ static int slave_mask; * in the furure. */ /* irq 13,8,2,1,0 must be edge */ -static unsigned int eisa_irq_level; /* default to edge triggered */ +static unsigned int eisa_irq_level __read_mostly; /* default to edge triggered */ /* called by free irq */ diff --git a/drivers/parisc/lasi.c b/drivers/parisc/lasi.c index a8c20396ffbe..2b3ba1dcf332 100644 --- a/drivers/parisc/lasi.c +++ b/drivers/parisc/lasi.c @@ -150,7 +150,7 @@ void __init lasi_led_init(unsigned long lasi_hpa) * */ -static unsigned long lasi_power_off_hpa; +static unsigned long lasi_power_off_hpa __read_mostly; static void lasi_power_off(void) { diff --git a/drivers/parisc/lba_pci.c b/drivers/parisc/lba_pci.c index 5e495dcbc58a..4f6bdf0881b5 100644 --- a/drivers/parisc/lba_pci.c +++ b/drivers/parisc/lba_pci.c @@ -167,7 +167,7 @@ /* non-postable I/O port space, densely packed */ #define LBA_PORT_BASE (PCI_F_EXTEND | 0xfee00000UL) -static void __iomem *astro_iop_base; +static void __iomem *astro_iop_base __read_mostly; #define ELROY_HVERS 0x782 #define MERCURY_HVERS 0x783 diff --git a/drivers/parisc/led.c b/drivers/parisc/led.c index 315be4770d3e..f357d3f60360 100644 --- a/drivers/parisc/led.c +++ b/drivers/parisc/led.c @@ -3,7 +3,7 @@ * * (c) Copyright 2000 Red Hat Software * (c) Copyright 2000 Helge Deller - * (c) Copyright 2001-2004 Helge Deller + * (c) Copyright 2001-2005 Helge Deller * (c) Copyright 2001 Randolph Chung * * This program is free software; you can redistribute it and/or modify @@ -56,13 +56,13 @@ relatively large amount of CPU time, some of the calculations can be turned off with the following variables (controlled via procfs) */ -static int led_type = -1; +static int led_type __read_mostly = -1; static unsigned char lastleds; /* LED state from most recent update */ -static unsigned int led_heartbeat = 1; -static unsigned int led_diskio = 1; -static unsigned int led_lanrxtx = 1; -static char lcd_text[32]; -static char lcd_text_default[32]; +static unsigned int led_heartbeat __read_mostly = 1; +static unsigned int led_diskio __read_mostly = 1; +static unsigned int led_lanrxtx __read_mostly = 1; +static char lcd_text[32] __read_mostly; +static char lcd_text_default[32] __read_mostly; static struct workqueue_struct *led_wq; @@ -108,7 +108,7 @@ struct pdc_chassis_lcd_info_ret_block { /* lcd_info is pre-initialized to the values needed to program KittyHawk LCD's * HP seems to have used Sharp/Hitachi HD44780 LCDs most of the time. */ static struct pdc_chassis_lcd_info_ret_block -lcd_info __attribute__((aligned(8))) = +lcd_info __attribute__((aligned(8))) __read_mostly = { .model = DISPLAY_MODEL_LCD, .lcd_width = 16, @@ -144,7 +144,7 @@ static int start_task(void) device_initcall(start_task); /* ptr to LCD/LED-specific function */ -static void (*led_func_ptr) (unsigned char); +static void (*led_func_ptr) (unsigned char) __read_mostly; #ifdef CONFIG_PROC_FS static int led_proc_read(char *page, char **start, off_t off, int count, diff --git a/drivers/parisc/pdc_stable.c b/drivers/parisc/pdc_stable.c index 273a74179720..11750cbb05c6 100644 --- a/drivers/parisc/pdc_stable.c +++ b/drivers/parisc/pdc_stable.c @@ -70,7 +70,7 @@ MODULE_DESCRIPTION("sysfs interface to HP PDC Stable Storage data"); MODULE_LICENSE("GPL"); MODULE_VERSION(PDCS_VERSION); -static unsigned long pdcs_size = 0; +static unsigned long pdcs_size __read_mostly; /* This struct defines what we need to deal with a parisc pdc path entry */ struct pdcspath_entry { diff --git a/drivers/parisc/power.c b/drivers/parisc/power.c index ff75e9296df9..54b2b7f20b96 100644 --- a/drivers/parisc/power.c +++ b/drivers/parisc/power.c @@ -2,7 +2,7 @@ * linux/arch/parisc/kernel/power.c * HP PARISC soft power switch support driver * - * Copyright (c) 2001-2002 Helge Deller + * Copyright (c) 2001-2005 Helge Deller * All rights reserved. * * @@ -102,7 +102,7 @@ static DECLARE_WORK(poweroff_work, deferred_poweroff, NULL); static void poweroff(void) { - static int powering_off; + static int powering_off __read_mostly; if (powering_off) return; @@ -113,7 +113,7 @@ static void poweroff(void) /* local time-counter for shutdown */ -static int shutdown_timer; +static int shutdown_timer __read_mostly; /* check, give feedback and start shutdown after one second */ static void process_shutdown(void) @@ -139,7 +139,7 @@ static void process_shutdown(void) DECLARE_TASKLET_DISABLED(power_tasklet, NULL, 0); /* soft power switch enabled/disabled */ -int pwrsw_enabled = 1; +int pwrsw_enabled __read_mostly = 1; /* * On gecko style machines (e.g. 712/xx and 715/xx) @@ -149,7 +149,7 @@ int pwrsw_enabled = 1; */ static void gecko_tasklet_func(unsigned long unused) { - if (!pwrsw_enabled) + if (unlikely(!pwrsw_enabled)) return; if (__getDIAG(25) & 0x80000000) { @@ -173,7 +173,7 @@ static void polling_tasklet_func(unsigned long soft_power_reg) { unsigned long current_status; - if (!pwrsw_enabled) + if (unlikely(!pwrsw_enabled)) return; current_status = gsc_readl(soft_power_reg); diff --git a/include/linux/cache.h b/include/linux/cache.h index ffe52210fc4f..d22e632f41fb 100644 --- a/include/linux/cache.h +++ b/include/linux/cache.h @@ -13,7 +13,7 @@ #define SMP_CACHE_BYTES L1_CACHE_BYTES #endif -#if defined(CONFIG_X86) || defined(CONFIG_SPARC64) || defined(CONFIG_IA64) +#if defined(CONFIG_X86) || defined(CONFIG_SPARC64) || defined(CONFIG_IA64) || defined(CONFIG_PARISC) #define __read_mostly __attribute__((__section__(".data.read_mostly"))) #else #define __read_mostly -- cgit v1.2.3-71-gd317 From a8b9ee7396ccc8db3bdb4108993556acbe2d3527 Mon Sep 17 00:00:00 2001 From: "David S. Miller" Date: Wed, 11 Jan 2006 00:15:16 -0800 Subject: [MUTEX]: linux/mutex.h needs linux/linkage.h too Signed-off-by: David S. Miller --- include/linux/mutex.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux') diff --git a/include/linux/mutex.h b/include/linux/mutex.h index 9bce0fee68d4..f1c84b1252f5 100644 --- a/include/linux/mutex.h +++ b/include/linux/mutex.h @@ -12,6 +12,7 @@ #include #include +#include #include -- cgit v1.2.3-71-gd317 From a4fc7ab1d065a9dd89ed0e74439ef87d4a16e980 Mon Sep 17 00:00:00 2001 From: David Woodhouse Date: Wed, 11 Jan 2006 14:41:26 +0000 Subject: [PATCH] fix/simplify mutex debugging code Let's switch mutex_debug_check_no_locks_freed() to take (addr, len) as arguments instead, since all its callers were just calculating the 'to' address for themselves anyway... (and sometimes doing so badly). Signed-off-by: David Woodhouse Acked-by: Ingo Molnar Signed-off-by: Linus Torvalds --- arch/i386/mm/pageattr.c | 2 +- include/linux/mm.h | 2 +- include/linux/mutex-debug.h | 2 +- include/linux/mutex.h | 2 +- kernel/mutex-debug.c | 5 +++-- mm/page_alloc.c | 2 +- mm/slab.c | 2 +- 7 files changed, 9 insertions(+), 8 deletions(-) (limited to 'include/linux') diff --git a/arch/i386/mm/pageattr.c b/arch/i386/mm/pageattr.c index e8a53552b13d..d0cadb33b54c 100644 --- a/arch/i386/mm/pageattr.c +++ b/arch/i386/mm/pageattr.c @@ -224,7 +224,7 @@ void kernel_map_pages(struct page *page, int numpages, int enable) return; if (!enable) mutex_debug_check_no_locks_freed(page_address(page), - page_address(page+numpages)); + numpages * PAGE_SIZE); /* the return value is ignored - the calls cannot fail, * large pages are disabled at boot time. diff --git a/include/linux/mm.h b/include/linux/mm.h index 3f1fafc0245e..e53d2c6fd5f4 100644 --- a/include/linux/mm.h +++ b/include/linux/mm.h @@ -1027,7 +1027,7 @@ kernel_map_pages(struct page *page, int numpages, int enable) { if (!PageHighMem(page) && !enable) mutex_debug_check_no_locks_freed(page_address(page), - page_address(page + numpages)); + numpages * PAGE_SIZE); } #endif diff --git a/include/linux/mutex-debug.h b/include/linux/mutex-debug.h index 8138d9eb58ec..8b5769f00467 100644 --- a/include/linux/mutex-debug.h +++ b/include/linux/mutex-debug.h @@ -18,6 +18,6 @@ extern void FASTCALL(mutex_destroy(struct mutex *lock)); extern void mutex_debug_show_all_locks(void); extern void mutex_debug_show_held_locks(struct task_struct *filter); extern void mutex_debug_check_no_locks_held(struct task_struct *task); -extern void mutex_debug_check_no_locks_freed(const void *from, const void *to); +extern void mutex_debug_check_no_locks_freed(const void *from, unsigned long len); #endif diff --git a/include/linux/mutex.h b/include/linux/mutex.h index f1c84b1252f5..f1ac507fa20d 100644 --- a/include/linux/mutex.h +++ b/include/linux/mutex.h @@ -79,7 +79,7 @@ struct mutex_waiter { # define mutex_debug_show_all_locks() do { } while (0) # define mutex_debug_show_held_locks(p) do { } while (0) # define mutex_debug_check_no_locks_held(task) do { } while (0) -# define mutex_debug_check_no_locks_freed(from, to) do { } while (0) +# define mutex_debug_check_no_locks_freed(from, len) do { } while (0) #endif #define __MUTEX_INITIALIZER(lockname) \ diff --git a/kernel/mutex-debug.c b/kernel/mutex-debug.c index 6f829058ae4a..f4913c376950 100644 --- a/kernel/mutex-debug.c +++ b/kernel/mutex-debug.c @@ -333,9 +333,10 @@ void mutex_debug_check_no_locks_held(struct task_struct *task) * is destroyed or reinitialized - this code checks whether there is * any held lock in the memory range of to : */ -void mutex_debug_check_no_locks_freed(const void *from, const void *to) +void mutex_debug_check_no_locks_freed(const void *from, unsigned long len) { struct list_head *curr, *next; + const void *to = from + len; unsigned long flags; struct mutex *lock; void *lock_addr; @@ -437,7 +438,7 @@ void debug_mutex_init(struct mutex *lock, const char *name) /* * Make sure we are not reinitializing a held lock: */ - mutex_debug_check_no_locks_freed((void *)lock, (void *)(lock + 1)); + mutex_debug_check_no_locks_freed((void *)lock, sizeof(*lock)); lock->owner = NULL; INIT_LIST_HEAD(&lock->held_list); lock->name = name; diff --git a/mm/page_alloc.c b/mm/page_alloc.c index a5e6891f7bb6..8e363536e2da 100644 --- a/mm/page_alloc.c +++ b/mm/page_alloc.c @@ -417,7 +417,7 @@ static void __free_pages_ok(struct page *page, unsigned int order) arch_free_page(page, order); if (!PageHighMem(page)) mutex_debug_check_no_locks_freed(page_address(page), - page_address(page+(1< Date: Wed, 11 Jan 2006 12:17:19 -0800 Subject: [PATCH] mm: gfp_atomic comments Clarify in comments that GFP_ATOMIC means both "don't sleep" and "use emergency pools", hence both ALLOC_HARDER and ALLOC_HIGH. Signed-off-by: Paul Jackson Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/gfp.h | 1 + mm/page_alloc.c | 3 ++- 2 files changed, 3 insertions(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/gfp.h b/include/linux/gfp.h index 8b2eab90abb6..da7ce8730e97 100644 --- a/include/linux/gfp.h +++ b/include/linux/gfp.h @@ -57,6 +57,7 @@ struct vm_area_struct; __GFP_NOFAIL|__GFP_NORETRY|__GFP_NO_GROW|__GFP_COMP| \ __GFP_NOMEMALLOC|__GFP_HARDWALL) +/* GFP_ATOMIC means both !wait (__GFP_WAIT not set) and use emergency pool */ #define GFP_ATOMIC (__GFP_HIGH) #define GFP_NOIO (__GFP_WAIT) #define GFP_NOFS (__GFP_WAIT | __GFP_IO) diff --git a/mm/page_alloc.c b/mm/page_alloc.c index ce991b173aa9..d41a0662d4da 100644 --- a/mm/page_alloc.c +++ b/mm/page_alloc.c @@ -931,7 +931,8 @@ restart: * * The caller may dip into page reserves a bit more if the caller * cannot run direct reclaim, or if the caller has realtime scheduling - * policy. + * policy or is asking for __GFP_HIGH memory. GFP_ATOMIC requests will + * set both ALLOC_HARDER (!wait) and ALLOC_HIGH (__GFP_HIGH). */ alloc_flags = ALLOC_WMARK_MIN; if ((unlikely(rt_task(p)) && !in_interrupt()) || !wait) -- cgit v1.2.3-71-gd317 From df019b1d8b893d0f0ee5a9b0f71486f0892561ae Mon Sep 17 00:00:00 2001 From: Keshavamurthy Anil S Date: Wed, 11 Jan 2006 12:17:41 -0800 Subject: [PATCH] kprobes: fix unloading of self probed module When a kprobes modules is written in such a way that probes are inserted on itself, then unload of that moudle was not possible due to reference couning on the same module. The below patch makes a check and incrementes the module refcount only if it is not a self probed module. We need to allow modules to probe themself for kprobes performance measurements This patch has been tested on several x86_64, ppc64 and IA64 architectures. Signed-off-by: Anil S Keshavamurthy Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/kprobes.h | 3 +++ kernel/kprobes.c | 42 ++++++++++++++++++++++++++++++++---------- 2 files changed, 35 insertions(+), 10 deletions(-) (limited to 'include/linux') diff --git a/include/linux/kprobes.h b/include/linux/kprobes.h index 10005bc92a31..669756bc20a2 100644 --- a/include/linux/kprobes.h +++ b/include/linux/kprobes.h @@ -68,6 +68,9 @@ struct kprobe { /* list of kprobes for multi-handler support */ struct list_head list; + /* Indicates that the corresponding module has been ref counted */ + unsigned int mod_refcounted; + /*count the number of times this probe was temporarily disarmed */ unsigned long nmissed; diff --git a/kernel/kprobes.c b/kernel/kprobes.c index 34a885bb82e0..3ea6325228da 100644 --- a/kernel/kprobes.c +++ b/kernel/kprobes.c @@ -449,19 +449,32 @@ static int __kprobes in_kprobes_functions(unsigned long addr) return 0; } -int __kprobes register_kprobe(struct kprobe *p) +static int __kprobes __register_kprobe(struct kprobe *p, + unsigned long called_from) { int ret = 0; struct kprobe *old_p; - struct module *mod; + struct module *probed_mod; if ((!kernel_text_address((unsigned long) p->addr)) || in_kprobes_functions((unsigned long) p->addr)) return -EINVAL; - if ((mod = module_text_address((unsigned long) p->addr)) && - (unlikely(!try_module_get(mod)))) - return -EINVAL; + p->mod_refcounted = 0; + /* Check are we probing a module */ + if ((probed_mod = module_text_address((unsigned long) p->addr))) { + struct module *calling_mod = module_text_address(called_from); + /* We must allow modules to probe themself and + * in this case avoid incrementing the module refcount, + * so as to allow unloading of self probing modules. + */ + if (calling_mod && (calling_mod != probed_mod)) { + if (unlikely(!try_module_get(probed_mod))) + return -EINVAL; + p->mod_refcounted = 1; + } else + probed_mod = NULL; + } p->nmissed = 0; down(&kprobe_mutex); @@ -483,11 +496,17 @@ int __kprobes register_kprobe(struct kprobe *p) out: up(&kprobe_mutex); - if (ret && mod) - module_put(mod); + if (ret && probed_mod) + module_put(probed_mod); return ret; } +int __kprobes register_kprobe(struct kprobe *p) +{ + return __register_kprobe(p, + (unsigned long)__builtin_return_address(0)); +} + void __kprobes unregister_kprobe(struct kprobe *p) { struct module *mod; @@ -524,7 +543,8 @@ valid_p: up(&kprobe_mutex); synchronize_sched(); - if ((mod = module_text_address((unsigned long)p->addr))) + if (p->mod_refcounted && + (mod = module_text_address((unsigned long)p->addr))) module_put(mod); if (cleanup_p) { @@ -547,7 +567,8 @@ int __kprobes register_jprobe(struct jprobe *jp) jp->kp.pre_handler = setjmp_pre_handler; jp->kp.break_handler = longjmp_break_handler; - return register_kprobe(&jp->kp); + return __register_kprobe(&jp->kp, + (unsigned long)__builtin_return_address(0)); } void __kprobes unregister_jprobe(struct jprobe *jp) @@ -587,7 +608,8 @@ int __kprobes register_kretprobe(struct kretprobe *rp) rp->nmissed = 0; /* Establish function entry probe point */ - if ((ret = register_kprobe(&rp->kp)) != 0) + if ((ret = __register_kprobe(&rp->kp, + (unsigned long)__builtin_return_address(0))) != 0) free_rp_inst(rp); return ret; } -- cgit v1.2.3-71-gd317 From e16885c5ad624a6efe1b1bf764e075d75f65a788 Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Wed, 11 Jan 2006 12:17:45 -0800 Subject: [PATCH] uninline capable() Uninline capable(). Saves 2K of kernel text on a generic .config, and 1K on a tiny config. In addition it makes the use of capable more consistent between CONFIG_SECURITY and !CONFIG_SECURITY Signed-off-by: Ingo Molnar Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/sched.h | 13 +------------ kernel/sys.c | 12 ++++++++++++ 2 files changed, 13 insertions(+), 12 deletions(-) (limited to 'include/linux') diff --git a/include/linux/sched.h b/include/linux/sched.h index c4ee35dd18ae..2ae8711bfba1 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h @@ -1113,19 +1113,8 @@ static inline int sas_ss_flags(unsigned long sp) } -#ifdef CONFIG_SECURITY -/* code is in security.c */ +/* code is in security.c or kernel/sys.c if !SECURITY */ extern int capable(int cap); -#else -static inline int capable(int cap) -{ - if (cap_raised(current->cap_effective, cap)) { - current->flags |= PF_SUPERPRIV; - return 1; - } - return 0; -} -#endif /* * Routines for handling mm_structs diff --git a/kernel/sys.c b/kernel/sys.c index b6941e06d5d5..9ccf713491f9 100644 --- a/kernel/sys.c +++ b/kernel/sys.c @@ -223,6 +223,18 @@ int unregister_reboot_notifier(struct notifier_block * nb) EXPORT_SYMBOL(unregister_reboot_notifier); +#ifndef CONFIG_SECURITY +int capable(int cap) +{ + if (cap_raised(current->cap_effective, cap)) { + current->flags |= PF_SUPERPRIV; + return 1; + } + return 0; +} +EXPORT_SYMBOL(capable); +#endif + static int set_one_prio(struct task_struct *p, int niceval, int error) { int no_nice; -- cgit v1.2.3-71-gd317 From c59ede7b78db329949d9cdcd7064e22d357560ef Mon Sep 17 00:00:00 2001 From: "Randy.Dunlap" Date: Wed, 11 Jan 2006 12:17:46 -0800 Subject: [PATCH] move capable() to capability.h - Move capable() from sched.h to capability.h; - Use where capable() is used (in include/, block/, ipc/, kernel/, a few drivers/, mm/, security/, & sound/; many more drivers/ to go) Signed-off-by: Randy Dunlap Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- block/ioctl.c | 2 +- block/scsi_ioctl.c | 1 + drivers/acorn/char/i2c.c | 1 + drivers/base/firmware_class.c | 1 + drivers/base/memory.c | 2 +- drivers/firmware/efivars.c | 2 +- drivers/oprofile/event_buffer.c | 1 + drivers/parisc/led.c | 1 + drivers/parisc/pdc_stable.c | 2 +- fs/xfs/linux-2.6/xfs_cred.h | 4 +++- include/linux/capability.h | 3 +++ include/linux/mm.h | 1 + include/linux/sched.h | 4 ---- ipc/mqueue.c | 1 + ipc/msg.c | 1 + ipc/sem.c | 1 + ipc/shm.c | 1 + ipc/util.c | 1 + kernel/acct.c | 1 + kernel/capability.c | 1 + kernel/exit.c | 1 + kernel/fork.c | 1 + kernel/kexec.c | 1 + kernel/module.c | 1 + kernel/ptrace.c | 1 + kernel/sched.c | 1 + kernel/signal.c | 1 + kernel/sys.c | 1 + kernel/sysctl.c | 1 + kernel/time.c | 1 + kernel/uid16.c | 1 + mm/filemap.c | 1 + mm/mlock.c | 1 + mm/mmap.c | 1 + mm/mremap.c | 1 + mm/swapfile.c | 1 + security/commoncap.c | 1 + security/dummy.c | 1 + security/keys/keyctl.c | 1 + security/security.c | 1 + sound/pci/emu10k1/emufx.c | 1 + 41 files changed, 44 insertions(+), 9 deletions(-) (limited to 'include/linux') diff --git a/block/ioctl.c b/block/ioctl.c index 82030e1dfd63..e1109491c234 100644 --- a/block/ioctl.c +++ b/block/ioctl.c @@ -1,4 +1,4 @@ -#include /* for capable() */ +#include #include #include #include diff --git a/block/scsi_ioctl.c b/block/scsi_ioctl.c index 18de84c8ccd8..cc72210687eb 100644 --- a/block/scsi_ioctl.c +++ b/block/scsi_ioctl.c @@ -21,6 +21,7 @@ #include #include #include +#include #include #include #include diff --git a/drivers/acorn/char/i2c.c b/drivers/acorn/char/i2c.c index c22bb9dca1ec..c26c08b36829 100644 --- a/drivers/acorn/char/i2c.c +++ b/drivers/acorn/char/i2c.c @@ -12,6 +12,7 @@ * On Acorn machines, the following i2c devices are on the bus: * - PCF8583 real time clock & static RAM */ +#include #include #include #include diff --git a/drivers/base/firmware_class.c b/drivers/base/firmware_class.c index 5b3d5e9ddcb6..3d384e3d34de 100644 --- a/drivers/base/firmware_class.c +++ b/drivers/base/firmware_class.c @@ -7,6 +7,7 @@ * */ +#include #include #include #include diff --git a/drivers/base/memory.c b/drivers/base/memory.c index 58801d718cc2..d1a05224627e 100644 --- a/drivers/base/memory.c +++ b/drivers/base/memory.c @@ -13,8 +13,8 @@ #include #include #include -#include /* capable() */ #include +#include #include #include #include diff --git a/drivers/firmware/efivars.c b/drivers/firmware/efivars.c index bda5bce681b6..343379f23a53 100644 --- a/drivers/firmware/efivars.c +++ b/drivers/firmware/efivars.c @@ -65,11 +65,11 @@ * v0.01 release to linux-ia64@linuxia64.org */ +#include #include #include #include #include -#include /* for capable() */ #include #include #include diff --git a/drivers/oprofile/event_buffer.c b/drivers/oprofile/event_buffer.c index 166bca790133..b80318f03420 100644 --- a/drivers/oprofile/event_buffer.c +++ b/drivers/oprofile/event_buffer.c @@ -15,6 +15,7 @@ #include #include #include +#include #include #include #include diff --git a/drivers/parisc/led.c b/drivers/parisc/led.c index f357d3f60360..3627a2d7f79f 100644 --- a/drivers/parisc/led.c +++ b/drivers/parisc/led.c @@ -30,6 +30,7 @@ #include #include #include +#include #include #include #include diff --git a/drivers/parisc/pdc_stable.c b/drivers/parisc/pdc_stable.c index 38bdca2fac6b..42a3c54e8e6c 100644 --- a/drivers/parisc/pdc_stable.c +++ b/drivers/parisc/pdc_stable.c @@ -42,9 +42,9 @@ #include #include -#include /* for capable() */ #include #include +#include #include #include #include diff --git a/fs/xfs/linux-2.6/xfs_cred.h b/fs/xfs/linux-2.6/xfs_cred.h index 4af491024727..e7f3da61c6c3 100644 --- a/fs/xfs/linux-2.6/xfs_cred.h +++ b/fs/xfs/linux-2.6/xfs_cred.h @@ -18,6 +18,8 @@ #ifndef __XFS_CRED_H__ #define __XFS_CRED_H__ +#include + /* * Credentials */ @@ -27,7 +29,7 @@ typedef struct cred { extern struct cred *sys_cred; -/* this is a hack.. (assums sys_cred is the only cred_t in the system) */ +/* this is a hack.. (assumes sys_cred is the only cred_t in the system) */ static __inline int capable_cred(cred_t *cr, int cid) { return (cr == sys_cred) ? 1 : capable(cid); diff --git a/include/linux/capability.h b/include/linux/capability.h index 6b4618902d3d..5a23ce752629 100644 --- a/include/linux/capability.h +++ b/include/linux/capability.h @@ -43,6 +43,7 @@ typedef struct __user_cap_data_struct { #ifdef __KERNEL__ #include +#include /* #define STRICT_CAP_T_TYPECHECKS */ @@ -356,6 +357,8 @@ static inline kernel_cap_t cap_invert(kernel_cap_t c) #define cap_is_fs_cap(c) (CAP_TO_MASK(c) & CAP_FS_MASK) +extern int capable(int cap); + #endif /* __KERNEL__ */ #endif /* !_LINUX_CAPABILITY_H */ diff --git a/include/linux/mm.h b/include/linux/mm.h index e53d2c6fd5f4..c643016499a1 100644 --- a/include/linux/mm.h +++ b/include/linux/mm.h @@ -3,6 +3,7 @@ #include #include +#include #ifdef __KERNEL__ diff --git a/include/linux/sched.h b/include/linux/sched.h index 2ae8711bfba1..3b74c4bf2934 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h @@ -1112,10 +1112,6 @@ static inline int sas_ss_flags(unsigned long sp) : on_sig_stack(sp) ? SS_ONSTACK : 0); } - -/* code is in security.c or kernel/sys.c if !SECURITY */ -extern int capable(int cap); - /* * Routines for handling mm_structs */ diff --git a/ipc/mqueue.c b/ipc/mqueue.c index a8aa6152eea6..4e776f9c80e7 100644 --- a/ipc/mqueue.c +++ b/ipc/mqueue.c @@ -11,6 +11,7 @@ * This file is released under the GPL. */ +#include #include #include #include diff --git a/ipc/msg.c b/ipc/msg.c index d035bd2aba96..a91b64763b86 100644 --- a/ipc/msg.c +++ b/ipc/msg.c @@ -15,6 +15,7 @@ * (c) 1999 Manfred Spraul */ +#include #include #include #include diff --git a/ipc/sem.c b/ipc/sem.c index cb5bb2a5df96..46bb8a678dec 100644 --- a/ipc/sem.c +++ b/ipc/sem.c @@ -73,6 +73,7 @@ #include #include #include +#include #include #include #include "util.h" diff --git a/ipc/shm.c b/ipc/shm.c index 0b92e874fc06..4c28d2d8e305 100644 --- a/ipc/shm.c +++ b/ipc/shm.c @@ -27,6 +27,7 @@ #include #include #include +#include #include #include diff --git a/ipc/util.c b/ipc/util.c index 23f1cec150c1..38b9a0af3bd8 100644 --- a/ipc/util.c +++ b/ipc/util.c @@ -20,6 +20,7 @@ #include #include #include +#include #include #include #include diff --git a/kernel/acct.c b/kernel/acct.c index 38d57fa6b78f..065d8b4e51ef 100644 --- a/kernel/acct.c +++ b/kernel/acct.c @@ -47,6 +47,7 @@ #include #include #include +#include #include #include #include diff --git a/kernel/capability.c b/kernel/capability.c index 8986a37a67ea..bfa3c92e16f2 100644 --- a/kernel/capability.c +++ b/kernel/capability.c @@ -7,6 +7,7 @@ * 30 May 2002: Cleanup, Robert M. Love */ +#include #include #include #include diff --git a/kernel/exit.c b/kernel/exit.c index 802722814925..f8e609ff1893 100644 --- a/kernel/exit.c +++ b/kernel/exit.c @@ -10,6 +10,7 @@ #include #include #include +#include #include #include #include diff --git a/kernel/fork.c b/kernel/fork.c index 3bdcab49998d..16a776ec2c0b 100644 --- a/kernel/fork.c +++ b/kernel/fork.c @@ -28,6 +28,7 @@ #include #include #include +#include #include #include #include diff --git a/kernel/kexec.c b/kernel/kexec.c index de1441656efd..bf39d28e4c0e 100644 --- a/kernel/kexec.c +++ b/kernel/kexec.c @@ -6,6 +6,7 @@ * Version 2. See the file COPYING for more details. */ +#include #include #include #include diff --git a/kernel/module.c b/kernel/module.c index e4276046a1b6..618ed6e23ecc 100644 --- a/kernel/module.c +++ b/kernel/module.c @@ -28,6 +28,7 @@ #include #include #include +#include #include #include #include diff --git a/kernel/ptrace.c b/kernel/ptrace.c index cceaf09ac413..5f33cdb6fff5 100644 --- a/kernel/ptrace.c +++ b/kernel/ptrace.c @@ -7,6 +7,7 @@ * to continually duplicate across every architecture. */ +#include #include #include #include diff --git a/kernel/sched.c b/kernel/sched.c index 34a945bcc022..d129e560cc0d 100644 --- a/kernel/sched.c +++ b/kernel/sched.c @@ -27,6 +27,7 @@ #include #include #include +#include #include #include #include diff --git a/kernel/signal.c b/kernel/signal.c index 08aa5b263f36..1da2e74beb97 100644 --- a/kernel/signal.c +++ b/kernel/signal.c @@ -25,6 +25,7 @@ #include #include #include +#include #include #include #include diff --git a/kernel/sys.c b/kernel/sys.c index 9ccf713491f9..d09cac23fdfd 100644 --- a/kernel/sys.c +++ b/kernel/sys.c @@ -19,6 +19,7 @@ #include #include #include +#include #include #include #include diff --git a/kernel/sysctl.c b/kernel/sysctl.c index 03b0598f2369..62d4d9566876 100644 --- a/kernel/sysctl.c +++ b/kernel/sysctl.c @@ -25,6 +25,7 @@ #include #include #include +#include #include #include #include diff --git a/kernel/time.c b/kernel/time.c index 169e8329e0b6..7477b1d2079e 100644 --- a/kernel/time.c +++ b/kernel/time.c @@ -29,6 +29,7 @@ #include #include +#include #include #include #include diff --git a/kernel/uid16.c b/kernel/uid16.c index f669941e8b26..aa25605027c8 100644 --- a/kernel/uid16.c +++ b/kernel/uid16.c @@ -10,6 +10,7 @@ #include #include #include +#include #include #include #include diff --git a/mm/filemap.c b/mm/filemap.c index 96de772be487..a965b6b35f26 100644 --- a/mm/filemap.c +++ b/mm/filemap.c @@ -15,6 +15,7 @@ #include #include #include +#include #include #include #include diff --git a/mm/mlock.c b/mm/mlock.c index 4ae3a46ff768..b90c59573abf 100644 --- a/mm/mlock.c +++ b/mm/mlock.c @@ -5,6 +5,7 @@ * (C) Copyright 2002 Christoph Hellwig */ +#include #include #include #include diff --git a/mm/mmap.c b/mm/mmap.c index 64ba4dbcb7de..47556d2b3e90 100644 --- a/mm/mmap.c +++ b/mm/mmap.c @@ -13,6 +13,7 @@ #include #include #include +#include #include #include #include diff --git a/mm/mremap.c b/mm/mremap.c index ddaeee9a0b69..1903bdf65e42 100644 --- a/mm/mremap.c +++ b/mm/mremap.c @@ -13,6 +13,7 @@ #include #include #include +#include #include #include #include diff --git a/mm/swapfile.c b/mm/swapfile.c index d8a5afc8b2a3..957fef43fa60 100644 --- a/mm/swapfile.c +++ b/mm/swapfile.c @@ -25,6 +25,7 @@ #include #include #include +#include #include #include diff --git a/security/commoncap.c b/security/commoncap.c index 04c12f58d656..8a6e097f99ea 100644 --- a/security/commoncap.c +++ b/security/commoncap.c @@ -7,6 +7,7 @@ * */ +#include #include #include #include diff --git a/security/dummy.c b/security/dummy.c index a15c54709fde..f1a5bd98bf10 100644 --- a/security/dummy.c +++ b/security/dummy.c @@ -14,6 +14,7 @@ #undef DEBUG +#include #include #include #include diff --git a/security/keys/keyctl.c b/security/keys/keyctl.c index 3d2ebae029c1..90db5c76cf6e 100644 --- a/security/keys/keyctl.c +++ b/security/keys/keyctl.c @@ -16,6 +16,7 @@ #include #include #include +#include #include #include #include "internal.h" diff --git a/security/security.c b/security/security.c index ed5fb80769c3..f693e1f66b98 100644 --- a/security/security.c +++ b/security/security.c @@ -11,6 +11,7 @@ * (at your option) any later version. */ +#include #include #include #include diff --git a/sound/pci/emu10k1/emufx.c b/sound/pci/emu10k1/emufx.c index 1a903390ad6d..509837252735 100644 --- a/sound/pci/emu10k1/emufx.c +++ b/sound/pci/emu10k1/emufx.c @@ -27,6 +27,7 @@ #include #include +#include #include #include #include -- cgit v1.2.3-71-gd317 From 1f6818b90dbb887261c616a318733703ed526f0a Mon Sep 17 00:00:00 2001 From: Andi Kleen Date: Wed, 11 Jan 2006 22:42:26 +0100 Subject: [PATCH] x86_64: Minor GFP_DMA32 comment fix Pretty obvious Signed-off-by: Andi Kleen Signed-off-by: Linus Torvalds --- include/linux/mmzone.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/mmzone.h b/include/linux/mmzone.h index 7e4ae6ab1977..34cbefd2ebde 100644 --- a/include/linux/mmzone.h +++ b/include/linux/mmzone.h @@ -98,7 +98,7 @@ struct per_cpu_pageset { /* * On machines where it is needed (eg PCs) we divide physical memory - * into multiple physical zones. On a PC we have 4 zones: + * into multiple physical zones. On a 32bit PC we have 4 zones: * * ZONE_DMA < 16 MB ISA DMA capable memory * ZONE_DMA32 0 MB Empty -- cgit v1.2.3-71-gd317 From e99286744599a66195de4cd975d7ef4d643c2789 Mon Sep 17 00:00:00 2001 From: Andi Kleen Date: Wed, 11 Jan 2006 22:43:33 +0100 Subject: [PATCH] x86_64: Generalize DMI and enable for x86-64 Some people need it now on 64bit so reuse the i386 code for x86-64. This will be also useful for future bug workarounds. It is a bit simplified there because there is no need to do it very early on x86-64. This means it doesn't need early ioremap et.al. We run it as a core initcall right now. I hope it's not needed for early setup. I added a general CONFIG_DMI symbol in case IA64 or someone else wants to reuse the code later too. Signed-off-by: Andi Kleen Signed-off-by: Linus Torvalds --- arch/i386/Kconfig | 4 ++++ arch/i386/kernel/dmi_scan.c | 16 ++++++++-------- arch/i386/kernel/setup.c | 2 +- arch/x86_64/Kconfig | 4 ++++ arch/x86_64/kernel/Makefile | 5 ++++- arch/x86_64/kernel/setup.c | 9 +++++++++ include/asm-i386/io.h | 5 +++++ include/asm-x86_64/io.h | 5 +++++ include/linux/dmi.h | 5 ++++- 9 files changed, 44 insertions(+), 11 deletions(-) (limited to 'include/linux') diff --git a/arch/i386/Kconfig b/arch/i386/Kconfig index 815878ebd30f..81ae9627701d 100644 --- a/arch/i386/Kconfig +++ b/arch/i386/Kconfig @@ -41,6 +41,10 @@ config ARCH_MAY_HAVE_PC_FDC bool default y +config DMI + bool + default y + source "init/Kconfig" menu "Processor type and features" diff --git a/arch/i386/kernel/dmi_scan.c b/arch/i386/kernel/dmi_scan.c index 58516e2ac172..6a93d75db431 100644 --- a/arch/i386/kernel/dmi_scan.c +++ b/arch/i386/kernel/dmi_scan.c @@ -4,7 +4,7 @@ #include #include #include - +#include static char * __init dmi_string(struct dmi_header *dm, u8 s) { @@ -19,7 +19,7 @@ static char * __init dmi_string(struct dmi_header *dm, u8 s) } if (*bp != 0) { - str = alloc_bootmem(strlen(bp) + 1); + str = dmi_alloc(strlen(bp) + 1); if (str != NULL) strcpy(str, bp); else @@ -40,7 +40,7 @@ static int __init dmi_table(u32 base, int len, int num, u8 *buf, *data; int i = 0; - buf = bt_ioremap(base, len); + buf = dmi_ioremap(base, len); if (buf == NULL) return -1; @@ -65,7 +65,7 @@ static int __init dmi_table(u32 base, int len, int num, data += 2; i++; } - bt_iounmap(buf, len); + dmi_iounmap(buf, len); return 0; } @@ -112,7 +112,7 @@ static void __init dmi_save_devices(struct dmi_header *dm) if ((*d & 0x80) == 0) continue; - dev = alloc_bootmem(sizeof(*dev)); + dev = dmi_alloc(sizeof(*dev)); if (!dev) { printk(KERN_ERR "dmi_save_devices: out of memory.\n"); break; @@ -131,7 +131,7 @@ static void __init dmi_save_ipmi_device(struct dmi_header *dm) struct dmi_device *dev; void * data; - data = alloc_bootmem(dm->length); + data = dmi_alloc(dm->length); if (data == NULL) { printk(KERN_ERR "dmi_save_ipmi_device: out of memory.\n"); return; @@ -139,7 +139,7 @@ static void __init dmi_save_ipmi_device(struct dmi_header *dm) memcpy(data, dm, dm->length); - dev = alloc_bootmem(sizeof(*dev)); + dev = dmi_alloc(sizeof(*dev)); if (!dev) { printk(KERN_ERR "dmi_save_ipmi_device: out of memory.\n"); return; @@ -221,7 +221,7 @@ void __init dmi_scan_machine(void) } } -out: printk(KERN_INFO "DMI not present.\n"); +out: printk(KERN_INFO "DMI not present or invalid.\n"); } diff --git a/arch/i386/kernel/setup.c b/arch/i386/kernel/setup.c index f685637a100d..a720f743ea6e 100644 --- a/arch/i386/kernel/setup.c +++ b/arch/i386/kernel/setup.c @@ -45,6 +45,7 @@ #include #include #include +#include #include