From df46b9a44ceb5af2ea2351ce8e28ae7bd840b00f Mon Sep 17 00:00:00 2001 From: Mike Christie Date: Mon, 20 Jun 2005 14:04:44 +0200 Subject: [PATCH] Add blk_rq_map_kern() Add blk_rq_map_kern which takes a kernel buffer and maps it into a request and bio. This can be used by the dm hw_handlers, old sg_scsi_ioctl, and one day scsi special requests so all requests comming into scsi will have bios. All requests having bios should allow scsi to use scatter lists for all IO and allow it to use block layer functions. Signed-off-by: Jens Axboe --- include/linux/bio.h | 2 ++ include/linux/blkdev.h | 2 ++ 2 files changed, 4 insertions(+) (limited to 'include/linux') diff --git a/include/linux/bio.h b/include/linux/bio.h index 038022763f09..1dd2bc2e84ae 100644 --- a/include/linux/bio.h +++ b/include/linux/bio.h @@ -282,6 +282,8 @@ extern int bio_get_nr_vecs(struct block_device *); extern struct bio *bio_map_user(struct request_queue *, struct block_device *, unsigned long, unsigned int, int); extern void bio_unmap_user(struct bio *); +extern struct bio *bio_map_kern(struct request_queue *, void *, unsigned int, + unsigned int); extern void bio_set_pages_dirty(struct bio *bio); extern void bio_check_pages_dirty(struct bio *bio); extern struct bio *bio_copy_user(struct request_queue *, unsigned long, unsigned int, int); diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h index 4a99b76c5a33..67339bc5f6bc 100644 --- a/include/linux/blkdev.h +++ b/include/linux/blkdev.h @@ -560,6 +560,8 @@ extern void blk_run_queue(request_queue_t *); extern void blk_queue_activity_fn(request_queue_t *, activity_fn *, void *); extern struct request *blk_rq_map_user(request_queue_t *, int, void __user *, unsigned int); extern int blk_rq_unmap_user(struct request *, struct bio *, unsigned int); +extern struct request *blk_rq_map_kern(request_queue_t *, int, void *, + unsigned int, unsigned int); extern int blk_execute_rq(request_queue_t *, struct gendisk *, struct request *); static inline request_queue_t *bdev_get_queue(struct block_device *bdev) -- cgit v1.2.3-71-gd317 From dd1cab95f356f1395278633565f198463cf6bd24 Mon Sep 17 00:00:00 2001 From: Jens Axboe Date: Mon, 20 Jun 2005 14:06:01 +0200 Subject: [PATCH] Cleanup blk_rq_map_* interfaces Change the blk_rq_map_user() and blk_rq_map_kern() interface to require a previously allocated request to be passed in. This is both more efficient for multiple iterations of mapping data to the same request, and it is also a much nicer API. Signed-off-by: Jens Axboe --- drivers/block/ll_rw_blk.c | 68 ++++++++++++++++++---------------------------- drivers/block/scsi_ioctl.c | 23 ++++++++++------ drivers/cdrom/cdrom.c | 13 ++++++--- include/linux/blkdev.h | 7 ++--- 4 files changed, 53 insertions(+), 58 deletions(-) (limited to 'include/linux') diff --git a/drivers/block/ll_rw_blk.c b/drivers/block/ll_rw_blk.c index 1471aca6fa18..42c4f3651cf8 100644 --- a/drivers/block/ll_rw_blk.c +++ b/drivers/block/ll_rw_blk.c @@ -2107,21 +2107,19 @@ EXPORT_SYMBOL(blk_insert_request); * original bio must be passed back in to blk_rq_unmap_user() for proper * unmapping. */ -struct request *blk_rq_map_user(request_queue_t *q, int rw, void __user *ubuf, - unsigned int len) +int blk_rq_map_user(request_queue_t *q, struct request *rq, void __user *ubuf, + unsigned int len) { unsigned long uaddr; - struct request *rq; struct bio *bio; + int reading; if (len > (q->max_sectors << 9)) - return ERR_PTR(-EINVAL); - if ((!len && ubuf) || (len && !ubuf)) - return ERR_PTR(-EINVAL); + return -EINVAL; + if (!len || !ubuf) + return -EINVAL; - rq = blk_get_request(q, rw, __GFP_WAIT); - if (!rq) - return ERR_PTR(-ENOMEM); + reading = rq_data_dir(rq) == READ; /* * if alignment requirement is satisfied, map in user pages for @@ -2129,9 +2127,9 @@ struct request *blk_rq_map_user(request_queue_t *q, int rw, void __user *ubuf, */ uaddr = (unsigned long) ubuf; if (!(uaddr & queue_dma_alignment(q)) && !(len & queue_dma_alignment(q))) - bio = bio_map_user(q, NULL, uaddr, len, rw == READ); + bio = bio_map_user(q, NULL, uaddr, len, reading); else - bio = bio_copy_user(q, uaddr, len, rw == READ); + bio = bio_copy_user(q, uaddr, len, reading); if (!IS_ERR(bio)) { rq->bio = rq->biotail = bio; @@ -2139,14 +2137,13 @@ struct request *blk_rq_map_user(request_queue_t *q, int rw, void __user *ubuf, rq->buffer = rq->data = NULL; rq->data_len = len; - return rq; + return 0; } /* * bio is the err-ptr */ - blk_put_request(rq); - return (struct request *) bio; + return PTR_ERR(bio); } EXPORT_SYMBOL(blk_rq_map_user); @@ -2160,7 +2157,7 @@ EXPORT_SYMBOL(blk_rq_map_user); * Description: * Unmap a request previously mapped by blk_rq_map_user(). */ -int blk_rq_unmap_user(struct request *rq, struct bio *bio, unsigned int ulen) +int blk_rq_unmap_user(struct bio *bio, unsigned int ulen) { int ret = 0; @@ -2171,8 +2168,7 @@ int blk_rq_unmap_user(struct request *rq, struct bio *bio, unsigned int ulen) ret = bio_uncopy_user(bio); } - blk_put_request(rq); - return ret; + return 0; } EXPORT_SYMBOL(blk_rq_unmap_user); @@ -2184,39 +2180,29 @@ EXPORT_SYMBOL(blk_rq_unmap_user); * @kbuf: the kernel buffer * @len: length of user data */ -struct request *blk_rq_map_kern(request_queue_t *q, int rw, void *kbuf, - unsigned int len, unsigned int gfp_mask) +int blk_rq_map_kern(request_queue_t *q, struct request *rq, void *kbuf, + unsigned int len, unsigned int gfp_mask) { - struct request *rq; struct bio *bio; if (len > (q->max_sectors << 9)) - return ERR_PTR(-EINVAL); - if ((!len && kbuf) || (len && !kbuf)) - return ERR_PTR(-EINVAL); - - rq = blk_get_request(q, rw, gfp_mask); - if (!rq) - return ERR_PTR(-ENOMEM); + return -EINVAL; + if (!len || !kbuf) + return -EINVAL; bio = bio_map_kern(q, kbuf, len, gfp_mask); - if (!IS_ERR(bio)) { - if (rw) - bio->bi_rw |= (1 << BIO_RW); + if (IS_ERR(bio)) + return PTR_ERR(bio); - rq->bio = rq->biotail = bio; - blk_rq_bio_prep(q, rq, bio); + if (rq_data_dir(rq) == WRITE) + bio->bi_rw |= (1 << BIO_RW); - rq->buffer = rq->data = NULL; - rq->data_len = len; - return rq; - } + rq->bio = rq->biotail = bio; + blk_rq_bio_prep(q, rq, bio); - /* - * bio is the err-ptr - */ - blk_put_request(rq); - return (struct request *) bio; + rq->buffer = rq->data = NULL; + rq->data_len = len; + return 0; } EXPORT_SYMBOL(blk_rq_map_kern); diff --git a/drivers/block/scsi_ioctl.c b/drivers/block/scsi_ioctl.c index 681871ca5d60..93c4ca874be3 100644 --- a/drivers/block/scsi_ioctl.c +++ b/drivers/block/scsi_ioctl.c @@ -216,7 +216,7 @@ static int sg_io(struct file *file, request_queue_t *q, struct gendisk *bd_disk, struct sg_io_hdr *hdr) { unsigned long start_time; - int reading, writing; + int reading, writing, ret; struct request *rq; struct bio *bio; char sense[SCSI_SENSE_BUFFERSIZE]; @@ -255,14 +255,17 @@ static int sg_io(struct file *file, request_queue_t *q, reading = 1; break; } + } - rq = blk_rq_map_user(q, writing ? WRITE : READ, hdr->dxferp, - hdr->dxfer_len); + rq = blk_get_request(q, writing ? WRITE : READ, GFP_KERNEL); + if (!rq) + return -ENOMEM; - if (IS_ERR(rq)) - return PTR_ERR(rq); - } else - rq = blk_get_request(q, READ, __GFP_WAIT); + if (reading || writing) { + ret = blk_rq_map_user(q, rq, hdr->dxferp, hdr->dxfer_len); + if (ret) + goto out; + } /* * fill in request structure @@ -321,11 +324,13 @@ static int sg_io(struct file *file, request_queue_t *q, } if (blk_rq_unmap_user(rq, bio, hdr->dxfer_len)) - return -EFAULT; + ret = -EFAULT; /* may not have succeeded, but output values written to control * structure (struct sg_io_hdr). */ - return 0; +out: + blk_put_request(rq); + return ret; } #define OMAX_SB_LEN 16 /* For backward compatibility */ diff --git a/drivers/cdrom/cdrom.c b/drivers/cdrom/cdrom.c index beaa561f2ed8..6a7d926774a1 100644 --- a/drivers/cdrom/cdrom.c +++ b/drivers/cdrom/cdrom.c @@ -2097,6 +2097,10 @@ static int cdrom_read_cdda_bpc(struct cdrom_device_info *cdi, __u8 __user *ubuf, if (!q) return -ENXIO; + rq = blk_get_request(q, READ, GFP_KERNEL); + if (!rq) + return -ENOMEM; + cdi->last_sense = 0; while (nframes) { @@ -2108,9 +2112,9 @@ static int cdrom_read_cdda_bpc(struct cdrom_device_info *cdi, __u8 __user *ubuf, len = nr * CD_FRAMESIZE_RAW; - rq = blk_rq_map_user(q, READ, ubuf, len); - if (IS_ERR(rq)) - return PTR_ERR(rq); + ret = blk_rq_map_user(q, rq, ubuf, len); + if (ret) + break; memset(rq->cmd, 0, sizeof(rq->cmd)); rq->cmd[0] = GPCMD_READ_CD; @@ -2138,7 +2142,7 @@ static int cdrom_read_cdda_bpc(struct cdrom_device_info *cdi, __u8 __user *ubuf, cdi->last_sense = s->sense_key; } - if (blk_rq_unmap_user(rq, bio, len)) + if (blk_rq_unmap_user(bio, len)) ret = -EFAULT; if (ret) @@ -2149,6 +2153,7 @@ static int cdrom_read_cdda_bpc(struct cdrom_device_info *cdi, __u8 __user *ubuf, ubuf += len; } + blk_put_request(rq); return ret; } diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h index 67339bc5f6bc..fc0dce078616 100644 --- a/include/linux/blkdev.h +++ b/include/linux/blkdev.h @@ -558,10 +558,9 @@ extern void blk_sync_queue(struct request_queue *q); extern void __blk_stop_queue(request_queue_t *q); extern void blk_run_queue(request_queue_t *); extern void blk_queue_activity_fn(request_queue_t *, activity_fn *, void *); -extern struct request *blk_rq_map_user(request_queue_t *, int, void __user *, unsigned int); -extern int blk_rq_unmap_user(struct request *, struct bio *, unsigned int); -extern struct request *blk_rq_map_kern(request_queue_t *, int, void *, - unsigned int, unsigned int); +extern int blk_rq_map_user(request_queue_t *, struct request *, void __user *, unsigned int); +extern int blk_rq_unmap_user(struct bio *, unsigned int); +extern int blk_rq_map_kern(request_queue_t *, struct request *, void *, unsigned int, unsigned int); extern int blk_execute_rq(request_queue_t *, struct gendisk *, struct request *); static inline request_queue_t *bdev_get_queue(struct block_device *bdev) -- cgit v1.2.3-71-gd317 From f1970baf6d74e03bd32072ab453f2fc01bc1b8d3 Mon Sep 17 00:00:00 2001 From: James Bottomley Date: Mon, 20 Jun 2005 14:06:52 +0200 Subject: [PATCH] Add scatter-gather support for the block layer SG_IO Signed-off-by: Jens Axboe --- drivers/block/ll_rw_blk.c | 64 +++++++++++++++++-- drivers/block/scsi_ioctl.c | 34 ++++++---- fs/bio.c | 150 +++++++++++++++++++++++++++++++-------------- include/linux/bio.h | 4 ++ include/linux/blkdev.h | 1 + 5 files changed, 191 insertions(+), 62 deletions(-) (limited to 'include/linux') diff --git a/drivers/block/ll_rw_blk.c b/drivers/block/ll_rw_blk.c index 42c4f3651cf8..874e46fc3748 100644 --- a/drivers/block/ll_rw_blk.c +++ b/drivers/block/ll_rw_blk.c @@ -2148,6 +2148,50 @@ int blk_rq_map_user(request_queue_t *q, struct request *rq, void __user *ubuf, EXPORT_SYMBOL(blk_rq_map_user); +/** + * blk_rq_map_user_iov - map user data to a request, for REQ_BLOCK_PC usage + * @q: request queue where request should be inserted + * @rq: request to map data to + * @iov: pointer to the iovec + * @iov_count: number of elements in the iovec + * + * Description: + * Data will be mapped directly for zero copy io, if possible. Otherwise + * a kernel bounce buffer is used. + * + * A matching blk_rq_unmap_user() must be issued at the end of io, while + * still in process context. + * + * Note: The mapped bio may need to be bounced through blk_queue_bounce() + * before being submitted to the device, as pages mapped may be out of + * reach. It's the callers responsibility to make sure this happens. The + * original bio must be passed back in to blk_rq_unmap_user() for proper + * unmapping. + */ +int blk_rq_map_user_iov(request_queue_t *q, struct request *rq, + struct sg_iovec *iov, int iov_count) +{ + struct bio *bio; + + if (!iov || iov_count <= 0) + return -EINVAL; + + /* we don't allow misaligned data like bio_map_user() does. If the + * user is using sg, they're expected to know the alignment constraints + * and respect them accordingly */ + bio = bio_map_user_iov(q, NULL, iov, iov_count, rq_data_dir(rq)== READ); + if (IS_ERR(bio)) + return PTR_ERR(bio); + + rq->bio = rq->biotail = bio; + blk_rq_bio_prep(q, rq, bio); + rq->buffer = rq->data = NULL; + rq->data_len = bio->bi_size; + return 0; +} + +EXPORT_SYMBOL(blk_rq_map_user_iov); + /** * blk_rq_unmap_user - unmap a request with user data * @rq: request to be unmapped @@ -2207,6 +2251,19 @@ int blk_rq_map_kern(request_queue_t *q, struct request *rq, void *kbuf, EXPORT_SYMBOL(blk_rq_map_kern); +void blk_execute_rq_nowait(request_queue_t *q, struct gendisk *bd_disk, + struct request *rq, int at_head, + void (*done)(struct request *)) +{ + int where = at_head ? ELEVATOR_INSERT_FRONT : ELEVATOR_INSERT_BACK; + + rq->rq_disk = bd_disk; + rq->flags |= REQ_NOMERGE; + rq->end_io = done; + elv_add_request(q, rq, where, 1); + generic_unplug_device(q); +} + /** * blk_execute_rq - insert a request into queue for execution * @q: queue to insert the request in @@ -2224,8 +2281,6 @@ int blk_execute_rq(request_queue_t *q, struct gendisk *bd_disk, char sense[SCSI_SENSE_BUFFERSIZE]; int err = 0; - rq->rq_disk = bd_disk; - /* * we need an extra reference to the request, so we can look at * it after io completion @@ -2238,11 +2293,8 @@ int blk_execute_rq(request_queue_t *q, struct gendisk *bd_disk, rq->sense_len = 0; } - rq->flags |= REQ_NOMERGE; rq->waiting = &wait; - rq->end_io = blk_end_sync_rq; - elv_add_request(q, rq, ELEVATOR_INSERT_BACK, 1); - generic_unplug_device(q); + blk_execute_rq_nowait(q, bd_disk, rq, 0, blk_end_sync_rq); wait_for_completion(&wait); rq->waiting = NULL; diff --git a/drivers/block/scsi_ioctl.c b/drivers/block/scsi_ioctl.c index 93c4ca874be3..09a7e73a0812 100644 --- a/drivers/block/scsi_ioctl.c +++ b/drivers/block/scsi_ioctl.c @@ -231,17 +231,11 @@ static int sg_io(struct file *file, request_queue_t *q, if (verify_command(file, cmd)) return -EPERM; - /* - * we'll do that later - */ - if (hdr->iovec_count) - return -EOPNOTSUPP; - if (hdr->dxfer_len > (q->max_sectors << 9)) return -EIO; reading = writing = 0; - if (hdr->dxfer_len) { + if (hdr->dxfer_len) switch (hdr->dxfer_direction) { default: return -EINVAL; @@ -261,11 +255,29 @@ static int sg_io(struct file *file, request_queue_t *q, if (!rq) return -ENOMEM; - if (reading || writing) { - ret = blk_rq_map_user(q, rq, hdr->dxferp, hdr->dxfer_len); - if (ret) + if (hdr->iovec_count) { + const int size = sizeof(struct sg_iovec) * hdr->iovec_count; + struct sg_iovec *iov; + + iov = kmalloc(size, GFP_KERNEL); + if (!iov) { + ret = -ENOMEM; goto out; - } + } + + if (copy_from_user(iov, hdr->dxferp, size)) { + kfree(iov); + ret = -EFAULT; + goto out; + } + + ret = blk_rq_map_user_iov(q, rq, iov, hdr->iovec_count); + kfree(iov); + } else if (hdr->dxfer_len) + ret = blk_rq_map_user(q, rq, hdr->dxferp, hdr->dxfer_len); + + if (ret) + goto out; /* * fill in request structure diff --git a/fs/bio.c b/fs/bio.c index c0d9140e470c..24e4045788e2 100644 --- a/fs/bio.c +++ b/fs/bio.c @@ -25,6 +25,7 @@ #include #include #include +#include /* for struct sg_iovec */ #define BIO_POOL_SIZE 256 @@ -549,22 +550,34 @@ out_bmd: return ERR_PTR(ret); } -static struct bio *__bio_map_user(request_queue_t *q, struct block_device *bdev, - unsigned long uaddr, unsigned int len, - int write_to_vm) +static struct bio *__bio_map_user_iov(request_queue_t *q, + struct block_device *bdev, + struct sg_iovec *iov, int iov_count, + int write_to_vm) { - unsigned long end = (uaddr + len + PAGE_SIZE - 1) >> PAGE_SHIFT; - unsigned long start = uaddr >> PAGE_SHIFT; - const int nr_pages = end - start; - int ret, offset, i; + int i, j; + int nr_pages = 0; struct page **pages; struct bio *bio; + int cur_page = 0; + int ret, offset; - /* - * transfer and buffer must be aligned to at least hardsector - * size for now, in the future we can relax this restriction - */ - if ((uaddr & queue_dma_alignment(q)) || (len & queue_dma_alignment(q))) + for (i = 0; i < iov_count; i++) { + unsigned long uaddr = (unsigned long)iov[i].iov_base; + unsigned long len = iov[i].iov_len; + unsigned long end = (uaddr + len + PAGE_SIZE - 1) >> PAGE_SHIFT; + unsigned long start = uaddr >> PAGE_SHIFT; + + nr_pages += end - start; + /* + * transfer and buffer must be aligned to at least hardsector + * size for now, in the future we can relax this restriction + */ + if ((uaddr & queue_dma_alignment(q)) || (len & queue_dma_alignment(q))) + return ERR_PTR(-EINVAL); + } + + if (!nr_pages) return ERR_PTR(-EINVAL); bio = bio_alloc(GFP_KERNEL, nr_pages); @@ -576,42 +589,54 @@ static struct bio *__bio_map_user(request_queue_t *q, struct block_device *bdev, if (!pages) goto out; - down_read(¤t->mm->mmap_sem); - ret = get_user_pages(current, current->mm, uaddr, nr_pages, - write_to_vm, 0, pages, NULL); - up_read(¤t->mm->mmap_sem); - - if (ret < nr_pages) - goto out; - - bio->bi_bdev = bdev; - - offset = uaddr & ~PAGE_MASK; - for (i = 0; i < nr_pages; i++) { - unsigned int bytes = PAGE_SIZE - offset; - - if (len <= 0) - break; - - if (bytes > len) - bytes = len; + memset(pages, 0, nr_pages * sizeof(struct page *)); + + for (i = 0; i < iov_count; i++) { + unsigned long uaddr = (unsigned long)iov[i].iov_base; + unsigned long len = iov[i].iov_len; + unsigned long end = (uaddr + len + PAGE_SIZE - 1) >> PAGE_SHIFT; + unsigned long start = uaddr >> PAGE_SHIFT; + const int local_nr_pages = end - start; + const int page_limit = cur_page + local_nr_pages; + + down_read(¤t->mm->mmap_sem); + ret = get_user_pages(current, current->mm, uaddr, + local_nr_pages, + write_to_vm, 0, &pages[cur_page], NULL); + up_read(¤t->mm->mmap_sem); + + if (ret < local_nr_pages) + goto out_unmap; + + + offset = uaddr & ~PAGE_MASK; + for (j = cur_page; j < page_limit; j++) { + unsigned int bytes = PAGE_SIZE - offset; + + if (len <= 0) + break; + + if (bytes > len) + bytes = len; + + /* + * sorry... + */ + if (__bio_add_page(q, bio, pages[j], bytes, offset) < bytes) + break; + + len -= bytes; + offset = 0; + } + cur_page = j; /* - * sorry... + * release the pages we didn't map into the bio, if any */ - if (__bio_add_page(q, bio, pages[i], bytes, offset) < bytes) - break; - - len -= bytes; - offset = 0; + while (j < page_limit) + page_cache_release(pages[j++]); } - /* - * release the pages we didn't map into the bio, if any - */ - while (i < nr_pages) - page_cache_release(pages[i++]); - kfree(pages); /* @@ -620,9 +645,17 @@ static struct bio *__bio_map_user(request_queue_t *q, struct block_device *bdev, if (!write_to_vm) bio->bi_rw |= (1 << BIO_RW); + bio->bi_bdev = bdev; bio->bi_flags |= (1 << BIO_USER_MAPPED); return bio; -out: + + out_unmap: + for (i = 0; i < nr_pages; i++) { + if(!pages[i]) + break; + page_cache_release(pages[i]); + } + out: kfree(pages); bio_put(bio); return ERR_PTR(ret); @@ -641,10 +674,34 @@ out: */ struct bio *bio_map_user(request_queue_t *q, struct block_device *bdev, unsigned long uaddr, unsigned int len, int write_to_vm) +{ + struct sg_iovec iov; + + iov.iov_base = (__user void *)uaddr; + iov.iov_len = len; + + return bio_map_user_iov(q, bdev, &iov, 1, write_to_vm); +} + +/** + * bio_map_user_iov - map user sg_iovec table into bio + * @q: the request_queue_t for the bio + * @bdev: destination block device + * @iov: the iovec. + * @iov_count: number of elements in the iovec + * @write_to_vm: bool indicating writing to pages or not + * + * Map the user space address into a bio suitable for io to a block + * device. Returns an error pointer in case of error. + */ +struct bio *bio_map_user_iov(request_queue_t *q, struct block_device *bdev, + struct sg_iovec *iov, int iov_count, + int write_to_vm) { struct bio *bio; + int len = 0, i; - bio = __bio_map_user(q, bdev, uaddr, len, write_to_vm); + bio = __bio_map_user_iov(q, bdev, iov, iov_count, write_to_vm); if (IS_ERR(bio)) return bio; @@ -657,6 +714,9 @@ struct bio *bio_map_user(request_queue_t *q, struct block_device *bdev, */ bio_get(bio); + for (i = 0; i < iov_count; i++) + len += iov[i].iov_len; + if (bio->bi_size == len) return bio; diff --git a/include/linux/bio.h b/include/linux/bio.h index 1dd2bc2e84ae..ebcd03ba2e20 100644 --- a/include/linux/bio.h +++ b/include/linux/bio.h @@ -281,6 +281,10 @@ extern int bio_add_page(struct bio *, struct page *, unsigned int,unsigned int); extern int bio_get_nr_vecs(struct block_device *); extern struct bio *bio_map_user(struct request_queue *, struct block_device *, unsigned long, unsigned int, int); +struct sg_iovec; +extern struct bio *bio_map_user_iov(struct request_queue *, + struct block_device *, + struct sg_iovec *, int, int); extern void bio_unmap_user(struct bio *); extern struct bio *bio_map_kern(struct request_queue *, void *, unsigned int, unsigned int); diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h index fc0dce078616..0430ea3e5f2e 100644 --- a/include/linux/blkdev.h +++ b/include/linux/blkdev.h @@ -561,6 +561,7 @@ extern void blk_queue_activity_fn(request_queue_t *, activity_fn *, void *); extern int blk_rq_map_user(request_queue_t *, struct request *, void __user *, unsigned int); extern int blk_rq_unmap_user(struct bio *, unsigned int); extern int blk_rq_map_kern(request_queue_t *, struct request *, void *, unsigned int, unsigned int); +extern int blk_rq_map_user_iov(request_queue_t *, struct request *, struct sg_iovec *, int); extern int blk_execute_rq(request_queue_t *, struct gendisk *, struct request *); static inline request_queue_t *bdev_get_queue(struct block_device *bdev) -- cgit v1.2.3-71-gd317 From 994ca9a19616f0d4161a9e825f0835925d522426 Mon Sep 17 00:00:00 2001 From: James Bottomley Date: Mon, 20 Jun 2005 14:11:09 +0200 Subject: [PATCH] update blk_execute_rq to take an at_head parameter Original From: Mike Christie Modified to split out block changes (this patch) and SCSI pieces. Signed-off-by: Jens Axboe Signed-off-by: James Bottomley --- drivers/block/ll_rw_blk.c | 7 ++++--- drivers/block/scsi_ioctl.c | 6 +++--- drivers/cdrom/cdrom.c | 2 +- drivers/ide/ide-disk.c | 2 +- include/linux/blkdev.h | 4 ++-- 5 files changed, 11 insertions(+), 10 deletions(-) (limited to 'include/linux') diff --git a/drivers/block/ll_rw_blk.c b/drivers/block/ll_rw_blk.c index 874e46fc3748..d260a2ce9a70 100644 --- a/drivers/block/ll_rw_blk.c +++ b/drivers/block/ll_rw_blk.c @@ -2269,13 +2269,14 @@ void blk_execute_rq_nowait(request_queue_t *q, struct gendisk *bd_disk, * @q: queue to insert the request in * @bd_disk: matching gendisk * @rq: request to insert + * @at_head: insert request at head or tail of queue * * Description: * Insert a fully prepared request at the back of the io scheduler queue * for execution. */ int blk_execute_rq(request_queue_t *q, struct gendisk *bd_disk, - struct request *rq) + struct request *rq, int at_head) { DECLARE_COMPLETION(wait); char sense[SCSI_SENSE_BUFFERSIZE]; @@ -2294,7 +2295,7 @@ int blk_execute_rq(request_queue_t *q, struct gendisk *bd_disk, } rq->waiting = &wait; - blk_execute_rq_nowait(q, bd_disk, rq, 0, blk_end_sync_rq); + blk_execute_rq_nowait(q, bd_disk, rq, at_head, blk_end_sync_rq); wait_for_completion(&wait); rq->waiting = NULL; @@ -2361,7 +2362,7 @@ int blkdev_scsi_issue_flush_fn(request_queue_t *q, struct gendisk *disk, rq->data_len = 0; rq->timeout = 60 * HZ; - ret = blk_execute_rq(q, disk, rq); + ret = blk_execute_rq(q, disk, rq, 0); if (ret && error_sector) *error_sector = rq->sector; diff --git a/drivers/block/scsi_ioctl.c b/drivers/block/scsi_ioctl.c index 7717b76f7f20..abb2df249fd3 100644 --- a/drivers/block/scsi_ioctl.c +++ b/drivers/block/scsi_ioctl.c @@ -308,7 +308,7 @@ static int sg_io(struct file *file, request_queue_t *q, * (if he doesn't check that is his problem). * N.B. a non-zero SCSI status is _not_ necessarily an error. */ - blk_execute_rq(q, bd_disk, rq); + blk_execute_rq(q, bd_disk, rq, 0); /* write to all output members */ hdr->status = 0xff & rq->errors; @@ -420,7 +420,7 @@ static int sg_scsi_ioctl(struct file *file, request_queue_t *q, rq->data_len = bytes; rq->flags |= REQ_BLOCK_PC; - blk_execute_rq(q, bd_disk, rq); + blk_execute_rq(q, bd_disk, rq, 0); err = rq->errors & 0xff; /* only 8 bit SCSI status */ if (err) { if (rq->sense_len && rq->sense) { @@ -573,7 +573,7 @@ int scsi_cmd_ioctl(struct file *file, struct gendisk *bd_disk, unsigned int cmd, rq->cmd[0] = GPCMD_START_STOP_UNIT; rq->cmd[4] = 0x02 + (close != 0); rq->cmd_len = 6; - err = blk_execute_rq(q, bd_disk, rq); + err = blk_execute_rq(q, bd_disk, rq, 0); blk_put_request(rq); break; default: diff --git a/drivers/cdrom/cdrom.c b/drivers/cdrom/cdrom.c index 6a7d926774a1..153960348414 100644 --- a/drivers/cdrom/cdrom.c +++ b/drivers/cdrom/cdrom.c @@ -2136,7 +2136,7 @@ static int cdrom_read_cdda_bpc(struct cdrom_device_info *cdi, __u8 __user *ubuf, if (rq->bio) blk_queue_bounce(q, &rq->bio); - if (blk_execute_rq(q, cdi->disk, rq)) { + if (blk_execute_rq(q, cdi->disk, rq, 0)) { struct request_sense *s = rq->sense; ret = -EIO; cdi->last_sense = s->sense_key; diff --git a/drivers/ide/ide-disk.c b/drivers/ide/ide-disk.c index 3302cd8eab4c..9176da7a9858 100644 --- a/drivers/ide/ide-disk.c +++ b/drivers/ide/ide-disk.c @@ -750,7 +750,7 @@ static int idedisk_issue_flush(request_queue_t *q, struct gendisk *disk, idedisk_prepare_flush(q, rq); - ret = blk_execute_rq(q, disk, rq); + ret = blk_execute_rq(q, disk, rq, 0); /* * if we failed and caller wants error offset, get it diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h index 0430ea3e5f2e..a48dc12c6699 100644 --- a/include/linux/blkdev.h +++ b/include/linux/blkdev.h @@ -562,8 +562,8 @@ extern int blk_rq_map_user(request_queue_t *, struct request *, void __user *, u extern int blk_rq_unmap_user(struct bio *, unsigned int); extern int blk_rq_map_kern(request_queue_t *, struct request *, void *, unsigned int, unsigned int); extern int blk_rq_map_user_iov(request_queue_t *, struct request *, struct sg_iovec *, int); -extern int blk_execute_rq(request_queue_t *, struct gendisk *, struct request *); - +extern int blk_execute_rq(request_queue_t *, struct gendisk *, + struct request *, int); static inline request_queue_t *bdev_get_queue(struct block_device *bdev) { return bdev->bd_disk->queue; -- cgit v1.2.3-71-gd317 From d0a7e574007fd547d72ec693bfa35778623d0738 Mon Sep 17 00:00:00 2001 From: James Bottomley Date: Sun, 14 Aug 2005 17:09:01 -0500 Subject: [SCSI] correct transport class abstraction to work outside SCSI I recently tried to construct a totally generic transport class and found there were certain features missing from the current abstract transport class. Most notable is that you have to hang the data on the class_device but most of the API is framed in terms of the generic device, not the class_device. These changes are two fold - Provide the class_device to all of the setup and configure APIs - Provide and extra API to take the device and the attribute class and return the corresponding class_device Signed-off-by: James Bottomley --- drivers/base/attribute_container.c | 38 +++++++++++++++++++++++++++++++++++++ drivers/base/transport_class.c | 17 +++++++++++------ drivers/scsi/scsi_transport_fc.c | 6 ++++-- drivers/scsi/scsi_transport_spi.c | 11 ++++++++--- include/linux/attribute_container.h | 9 +++------ include/linux/transport_class.h | 11 ++++++++--- 6 files changed, 72 insertions(+), 20 deletions(-) (limited to 'include/linux') diff --git a/drivers/base/attribute_container.c b/drivers/base/attribute_container.c index ec615d854be9..62c093db11e6 100644 --- a/drivers/base/attribute_container.c +++ b/drivers/base/attribute_container.c @@ -58,6 +58,7 @@ attribute_container_register(struct attribute_container *cont) { INIT_LIST_HEAD(&cont->node); INIT_LIST_HEAD(&cont->containers); + spin_lock_init(&cont->containers_lock); down(&attribute_container_mutex); list_add_tail(&cont->node, &attribute_container_list); @@ -77,11 +78,13 @@ attribute_container_unregister(struct attribute_container *cont) { int retval = -EBUSY; down(&attribute_container_mutex); + spin_lock(&cont->containers_lock); if (!list_empty(&cont->containers)) goto out; retval = 0; list_del(&cont->node); out: + spin_unlock(&cont->containers_lock); up(&attribute_container_mutex); return retval; @@ -151,7 +154,9 @@ attribute_container_add_device(struct device *dev, fn(cont, dev, &ic->classdev); else attribute_container_add_class_device(&ic->classdev); + spin_lock(&cont->containers_lock); list_add_tail(&ic->node, &cont->containers); + spin_unlock(&cont->containers_lock); } up(&attribute_container_mutex); } @@ -189,6 +194,7 @@ attribute_container_remove_device(struct device *dev, if (!cont->match(cont, dev)) continue; + spin_lock(&cont->containers_lock); list_for_each_entry_safe(ic, tmp, &cont->containers, node) { if (dev != ic->classdev.dev) continue; @@ -200,6 +206,7 @@ attribute_container_remove_device(struct device *dev, class_device_unregister(&ic->classdev); } } + spin_unlock(&cont->containers_lock); } up(&attribute_container_mutex); } @@ -230,10 +237,12 @@ attribute_container_device_trigger(struct device *dev, if (!cont->match(cont, dev)) continue; + spin_lock(&cont->containers_lock); list_for_each_entry_safe(ic, tmp, &cont->containers, node) { if (dev == ic->classdev.dev) fn(cont, dev, &ic->classdev); } + spin_unlock(&cont->containers_lock); } up(&attribute_container_mutex); } @@ -368,6 +377,35 @@ attribute_container_class_device_del(struct class_device *classdev) } EXPORT_SYMBOL_GPL(attribute_container_class_device_del); +/** + * attribute_container_find_class_device - find the corresponding class_device + * + * @cont: the container + * @dev: the generic device + * + * Looks up the device in the container's list of class devices and returns + * the corresponding class_device. + */ +struct class_device * +attribute_container_find_class_device(struct attribute_container *cont, + struct device *dev) +{ + struct class_device *cdev = NULL; + struct internal_container *ic; + + spin_lock(&cont->containers_lock); + list_for_each_entry(ic, &cont->containers, node) { + if (ic->classdev.dev == dev) { + cdev = &ic->classdev; + break; + } + } + spin_unlock(&cont->containers_lock); + + return cdev; +} +EXPORT_SYMBOL_GPL(attribute_container_find_class_device); + int __init attribute_container_init(void) { diff --git a/drivers/base/transport_class.c b/drivers/base/transport_class.c index 6c2b447a3336..4fb4c5de8470 100644 --- a/drivers/base/transport_class.c +++ b/drivers/base/transport_class.c @@ -64,7 +64,9 @@ void transport_class_unregister(struct transport_class *tclass) } EXPORT_SYMBOL_GPL(transport_class_unregister); -static int anon_transport_dummy_function(struct device *dev) +static int anon_transport_dummy_function(struct transport_container *tc, + struct device *dev, + struct class_device *cdev) { /* do nothing */ return 0; @@ -115,9 +117,10 @@ static int transport_setup_classdev(struct attribute_container *cont, struct class_device *classdev) { struct transport_class *tclass = class_to_transport_class(cont->class); + struct transport_container *tcont = attribute_container_to_transport_container(cont); if (tclass->setup) - tclass->setup(dev); + tclass->setup(tcont, dev, classdev); return 0; } @@ -178,12 +181,14 @@ void transport_add_device(struct device *dev) EXPORT_SYMBOL_GPL(transport_add_device); static int transport_configure(struct attribute_container *cont, - struct device *dev) + struct device *dev, + struct class_device *cdev) { struct transport_class *tclass = class_to_transport_class(cont->class); + struct transport_container *tcont = attribute_container_to_transport_container(cont); if (tclass->configure) - tclass->configure(dev); + tclass->configure(tcont, dev, cdev); return 0; } @@ -202,7 +207,7 @@ static int transport_configure(struct attribute_container *cont, */ void transport_configure_device(struct device *dev) { - attribute_container_trigger(dev, transport_configure); + attribute_container_device_trigger(dev, transport_configure); } EXPORT_SYMBOL_GPL(transport_configure_device); @@ -215,7 +220,7 @@ static int transport_remove_classdev(struct attribute_container *cont, struct transport_class *tclass = class_to_transport_class(cont->class); if (tclass->remove) - tclass->remove(dev); + tclass->remove(tcont, dev, classdev); if (tclass->remove != anon_transport_dummy_function) { if (tcont->statistics) diff --git a/drivers/scsi/scsi_transport_fc.c b/drivers/scsi/scsi_transport_fc.c index 35d1c1e8e345..96243c7fe110 100644 --- a/drivers/scsi/scsi_transport_fc.c +++ b/drivers/scsi/scsi_transport_fc.c @@ -252,7 +252,8 @@ struct fc_internal { #define to_fc_internal(tmpl) container_of(tmpl, struct fc_internal, t) -static int fc_target_setup(struct device *dev) +static int fc_target_setup(struct transport_container *tc, struct device *dev, + struct class_device *cdev) { struct scsi_target *starget = to_scsi_target(dev); struct fc_rport *rport = starget_to_rport(starget); @@ -281,7 +282,8 @@ static DECLARE_TRANSPORT_CLASS(fc_transport_class, NULL, NULL); -static int fc_host_setup(struct device *dev) +static int fc_host_setup(struct transport_container *tc, struct device *dev, + struct class_device *cdev) { struct Scsi_Host *shost = dev_to_shost(dev); diff --git a/drivers/scsi/scsi_transport_spi.c b/drivers/scsi/scsi_transport_spi.c index 02134fce2174..89f6b7feb9c2 100644 --- a/drivers/scsi/scsi_transport_spi.c +++ b/drivers/scsi/scsi_transport_spi.c @@ -162,7 +162,8 @@ static inline enum spi_signal_type spi_signal_to_value(const char *name) return SPI_SIGNAL_UNKNOWN; } -static int spi_host_setup(struct device *dev) +static int spi_host_setup(struct transport_container *tc, struct device *dev, + struct class_device *cdev) { struct Scsi_Host *shost = dev_to_shost(dev); @@ -196,7 +197,9 @@ static int spi_host_match(struct attribute_container *cont, return &i->t.host_attrs.ac == cont; } -static int spi_device_configure(struct device *dev) +static int spi_device_configure(struct transport_container *tc, + struct device *dev, + struct class_device *cdev) { struct scsi_device *sdev = to_scsi_device(dev); struct scsi_target *starget = sdev->sdev_target; @@ -214,7 +217,9 @@ static int spi_device_configure(struct device *dev) return 0; } -static int spi_setup_transport_attrs(struct device *dev) +static int spi_setup_transport_attrs(struct transport_container *tc, + struct device *dev, + struct class_device *cdev) { struct scsi_target *starget = to_scsi_target(dev); diff --git a/include/linux/attribute_container.h b/include/linux/attribute_container.h index af1010b6dab7..f54b05b052b3 100644 --- a/include/linux/attribute_container.h +++ b/include/linux/attribute_container.h @@ -11,10 +11,12 @@ #include #include +#include struct attribute_container { struct list_head node; struct list_head containers; + spinlock_t containers_lock; struct class *class; struct class_device_attribute **attrs; int (*match)(struct attribute_container *, struct device *); @@ -62,12 +64,7 @@ int attribute_container_add_class_device_adapter(struct attribute_container *con struct class_device *classdev); void attribute_container_remove_attrs(struct class_device *classdev); void attribute_container_class_device_del(struct class_device *classdev); - - - - - - +struct class_device *attribute_container_find_class_device(struct attribute_container *, struct device *); struct class_device_attribute **attribute_container_classdev_to_attrs(const struct class_device *classdev); #endif diff --git a/include/linux/transport_class.h b/include/linux/transport_class.h index 87d98d1faefb..1d6cc22e5f42 100644 --- a/include/linux/transport_class.h +++ b/include/linux/transport_class.h @@ -12,11 +12,16 @@ #include #include +struct transport_container; + struct transport_class { struct class class; - int (*setup)(struct device *); - int (*configure)(struct device *); - int (*remove)(struct device *); + int (*setup)(struct transport_container *, struct device *, + struct class_device *); + int (*configure)(struct transport_container *, struct device *, + struct class_device *); + int (*remove)(struct transport_container *, struct device *, + struct class_device *); }; #define DECLARE_TRANSPORT_CLASS(cls, nm, su, rm, cfg) \ -- cgit v1.2.3-71-gd317 From caca1779870b1bcc0fb07e48ebd2403901f356b8 Mon Sep 17 00:00:00 2001 From: James Bottomley Date: Tue, 16 Aug 2005 17:26:10 -0500 Subject: [SCSI] add missing attribute container function prototype attribute_container_classdev_to_container is an exported function of the attribute_container.c file. However, there's no prototype for it. Now I actually want to use it, so add one. Signed-off-by: James Bottomley --- include/linux/attribute_container.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux') diff --git a/include/linux/attribute_container.h b/include/linux/attribute_container.h index f54b05b052b3..ee83fe64a102 100644 --- a/include/linux/attribute_container.h +++ b/include/linux/attribute_container.h @@ -64,6 +64,7 @@ int attribute_container_add_class_device_adapter(struct attribute_container *con struct class_device *classdev); void attribute_container_remove_attrs(struct class_device *classdev); void attribute_container_class_device_del(struct class_device *classdev); +struct attribute_container *attribute_container_classdev_to_container(struct class_device *); struct class_device *attribute_container_find_class_device(struct attribute_container *, struct device *); struct class_device_attribute **attribute_container_classdev_to_attrs(const struct class_device *classdev); -- cgit v1.2.3-71-gd317 From 53c165e0a6c8a4ff7df316557528fa7a52d20711 Mon Sep 17 00:00:00 2001 From: James Bottomley Date: Mon, 22 Aug 2005 10:06:19 -0500 Subject: [SCSI] correct attribute_container list usage One of the changes in the attribute_container code in the scsi-misc tree was to add a lock to protect the list of devices per container. This, unfortunately, leads to potential scheduling while atomic problems if there's a sleep in the function called by a trigger. The correct solution is to use the kernel klist infrastructure instead which allows lockless traversal of a list. Signed-off-by: James Bottomley --- drivers/base/attribute_container.c | 51 +++++++++++++++++++++---------------- include/linux/attribute_container.h | 4 +-- 2 files changed, 31 insertions(+), 24 deletions(-) (limited to 'include/linux') diff --git a/drivers/base/attribute_container.c b/drivers/base/attribute_container.c index ebcae5c34133..6c0f49340eb2 100644 --- a/drivers/base/attribute_container.c +++ b/drivers/base/attribute_container.c @@ -22,7 +22,7 @@ /* This is a private structure used to tie the classdev and the * container .. it should never be visible outside this file */ struct internal_container { - struct list_head node; + struct klist_node node; struct attribute_container *cont; struct class_device classdev; }; @@ -57,8 +57,7 @@ int attribute_container_register(struct attribute_container *cont) { INIT_LIST_HEAD(&cont->node); - INIT_LIST_HEAD(&cont->containers); - spin_lock_init(&cont->containers_lock); + klist_init(&cont->containers); down(&attribute_container_mutex); list_add_tail(&cont->node, &attribute_container_list); @@ -78,13 +77,13 @@ attribute_container_unregister(struct attribute_container *cont) { int retval = -EBUSY; down(&attribute_container_mutex); - spin_lock(&cont->containers_lock); - if (!list_empty(&cont->containers)) + spin_lock(&cont->containers.k_lock); + if (!list_empty(&cont->containers.k_list)) goto out; retval = 0; list_del(&cont->node); out: - spin_unlock(&cont->containers_lock); + spin_unlock(&cont->containers.k_lock); up(&attribute_container_mutex); return retval; @@ -143,7 +142,6 @@ attribute_container_add_device(struct device *dev, continue; } memset(ic, 0, sizeof(struct internal_container)); - INIT_LIST_HEAD(&ic->node); ic->cont = cont; class_device_initialize(&ic->classdev); ic->classdev.dev = get_device(dev); @@ -154,13 +152,22 @@ attribute_container_add_device(struct device *dev, fn(cont, dev, &ic->classdev); else attribute_container_add_class_device(&ic->classdev); - spin_lock(&cont->containers_lock); - list_add_tail(&ic->node, &cont->containers); - spin_unlock(&cont->containers_lock); + klist_add_tail(&ic->node, &cont->containers); } up(&attribute_container_mutex); } +/* FIXME: can't break out of this unless klist_iter_exit is also + * called before doing the break + */ +#define klist_for_each_entry(pos, head, member, iter) \ + for (klist_iter_init(head, iter); (pos = ({ \ + struct klist_node *n = klist_next(iter); \ + n ? ({ klist_iter_exit(iter) ; NULL; }) : \ + container_of(n, typeof(*pos), member);\ + }) ) != NULL; ) + + /** * attribute_container_remove_device - make device eligible for removal. * @@ -187,18 +194,19 @@ attribute_container_remove_device(struct device *dev, down(&attribute_container_mutex); list_for_each_entry(cont, &attribute_container_list, node) { - struct internal_container *ic, *tmp; + struct internal_container *ic; + struct klist_iter iter; if (attribute_container_no_classdevs(cont)) continue; if (!cont->match(cont, dev)) continue; - spin_lock(&cont->containers_lock); - list_for_each_entry_safe(ic, tmp, &cont->containers, node) { + + klist_for_each_entry(ic, &cont->containers, node, &iter) { if (dev != ic->classdev.dev) continue; - list_del(&ic->node); + klist_remove(&ic->node); if (fn) fn(cont, dev, &ic->classdev); else { @@ -206,7 +214,6 @@ attribute_container_remove_device(struct device *dev, class_device_unregister(&ic->classdev); } } - spin_unlock(&cont->containers_lock); } up(&attribute_container_mutex); } @@ -232,7 +239,8 @@ attribute_container_device_trigger(struct device *dev, down(&attribute_container_mutex); list_for_each_entry(cont, &attribute_container_list, node) { - struct internal_container *ic, *tmp; + struct internal_container *ic; + struct klist_iter iter; if (!cont->match(cont, dev)) continue; @@ -242,12 +250,10 @@ attribute_container_device_trigger(struct device *dev, continue; } - spin_lock(&cont->containers_lock); - list_for_each_entry_safe(ic, tmp, &cont->containers, node) { + klist_for_each_entry(ic, &cont->containers, node, &iter) { if (dev == ic->classdev.dev) fn(cont, dev, &ic->classdev); } - spin_unlock(&cont->containers_lock); } up(&attribute_container_mutex); } @@ -397,15 +403,16 @@ attribute_container_find_class_device(struct attribute_container *cont, { struct class_device *cdev = NULL; struct internal_container *ic; + struct klist_iter iter; - spin_lock(&cont->containers_lock); - list_for_each_entry(ic, &cont->containers, node) { + klist_for_each_entry(ic, &cont->containers, node, &iter) { if (ic->classdev.dev == dev) { cdev = &ic->classdev; + /* FIXME: must exit iterator then break */ + klist_iter_exit(&iter); break; } } - spin_unlock(&cont->containers_lock); return cdev; } diff --git a/include/linux/attribute_container.h b/include/linux/attribute_container.h index ee83fe64a102..93bfb0beb62a 100644 --- a/include/linux/attribute_container.h +++ b/include/linux/attribute_container.h @@ -11,12 +11,12 @@ #include #include +#include #include struct attribute_container { struct list_head node; - struct list_head containers; - spinlock_t containers_lock; + struct klist containers; struct class *class; struct class_device_attribute **attrs; int (*match)(struct attribute_container *, struct device *); -- cgit v1.2.3-71-gd317 From 61a7afa2c476a3be261cf88a95b0dea0c3bd29d4 Mon Sep 17 00:00:00 2001 From: James Bottomley Date: Tue, 16 Aug 2005 18:27:34 -0500 Subject: [SCSI] embryonic RAID class The idea behind a RAID class is to provide a uniform interface to all RAID subsystems (both hardware and software) in the kernel. To do that, I've made this class a transport class that's entirely subsystem independent (although the matching routines have to match per subsystem, as you'll see looking at the code). I put it in the scsi subdirectory purely because I needed somewhere to play with it, but it's not a scsi specific module. I used a fusion raid card as the test bed for this; with that kind of card, this is the type of class output you get: jejb@titanic> ls -l /sys/class/raid_devices/20\:0\:0\:0/ total 0 lrwxrwxrwx 1 root root 0 Aug 16 17:21 component-0 -> ../../../devices/pci0000:80/0000:80:04.0/host20/target20:1:0/20:1:0:0/ lrwxrwxrwx 1 root root 0 Aug 16 17:21 component-1 -> ../../../devices/pci0000:80/0000:80:04.0/host20/target20:1:1/20:1:1:0/ lrwxrwxrwx 1 root root 0 Aug 16 17:21 device -> ../../../devices/pci0000:80/0000:80:04.0/host20/target20:0:0/20:0:0:0/ -r--r--r-- 1 root root 16384 Aug 16 17:21 level -r--r--r-- 1 root root 16384 Aug 16 17:21 resync -r--r--r-- 1 root root 16384 Aug 16 17:21 state So it's really simple: for a SCSI device representing a hardware raid, it shows the raid level, the array state, the resync % complete (if the state is resyncing) and the underlying components of the RAID (these are exposed in fusion on the virtual channel 1). As you can see, this type of information can be exported by almost anything, including software raid. The more difficult trick, of course, is going to be getting it to perform configuration type actions with writable attributes. Signed-off-by: James Bottomley --- drivers/scsi/Kconfig | 6 ++ drivers/scsi/Makefile | 2 + drivers/scsi/raid_class.c | 250 +++++++++++++++++++++++++++++++++++++++++++++ include/linux/raid_class.h | 59 +++++++++++ 4 files changed, 317 insertions(+) create mode 100644 drivers/scsi/raid_class.c create mode 100644 include/linux/raid_class.h (limited to 'include/linux') diff --git a/drivers/scsi/Kconfig b/drivers/scsi/Kconfig index 96df148ed969..68adc3cc8ad2 100644 --- a/drivers/scsi/Kconfig +++ b/drivers/scsi/Kconfig @@ -1,5 +1,11 @@ menu "SCSI device support" +config RAID_ATTRS + tristate "RAID Transport Class" + default n + ---help--- + Provides RAID + config SCSI tristate "SCSI device support" ---help--- diff --git a/drivers/scsi/Makefile b/drivers/scsi/Makefile index 3746fb9fa2f5..85f9e6bb34b9 100644 --- a/drivers/scsi/Makefile +++ b/drivers/scsi/Makefile @@ -22,6 +22,8 @@ subdir-$(CONFIG_PCMCIA) += pcmcia obj-$(CONFIG_SCSI) += scsi_mod.o +obj-$(CONFIG_RAID_ATTRS) += raid_class.o + # --- NOTE ORDERING HERE --- # For kernel non-modular link, transport attributes need to # be initialised before drivers diff --git a/drivers/scsi/raid_class.c b/drivers/scsi/raid_class.c new file mode 100644 index 000000000000..f1ea5027865f --- /dev/null +++ b/drivers/scsi/raid_class.c @@ -0,0 +1,250 @@ +/* + * RAID Attributes + */ +#include +#include +#include +#include +#include +#include + +#define RAID_NUM_ATTRS 3 + +struct raid_internal { + struct raid_template r; + struct raid_function_template *f; + /* The actual attributes */ + struct class_device_attribute private_attrs[RAID_NUM_ATTRS]; + /* The array of null terminated pointers to attributes + * needed by scsi_sysfs.c */ + struct class_device_attribute *attrs[RAID_NUM_ATTRS + 1]; +}; + +struct raid_component { + struct list_head node; + struct device *dev; + int num; +}; + +#define to_raid_internal(tmpl) container_of(tmpl, struct raid_internal, r) + +#define tc_to_raid_internal(tcont) ({ \ + struct raid_template *r = \ + container_of(tcont, struct raid_template, raid_attrs); \ + to_raid_internal(r); \ +}) + +#define ac_to_raid_internal(acont) ({ \ + struct transport_container *tc = \ + container_of(acont, struct transport_container, ac); \ + tc_to_raid_internal(tc); \ +}) + +#define class_device_to_raid_internal(cdev) ({ \ + struct attribute_container *ac = \ + attribute_container_classdev_to_container(cdev); \ + ac_to_raid_internal(ac); \ +}) + + +static int raid_match(struct attribute_container *cont, struct device *dev) +{ + /* We have to look for every subsystem that could house + * emulated RAID devices, so start with SCSI */ + struct raid_internal *i = ac_to_raid_internal(cont); + + if (scsi_is_sdev_device(dev)) { + struct scsi_device *sdev = to_scsi_device(dev); + + if (i->f->cookie != sdev->host->hostt) + return 0; + + return i->f->is_raid(dev); + } + /* FIXME: look at other subsystems too */ + return 0; +} + +static int raid_setup(struct transport_container *tc, struct device *dev, + struct class_device *cdev) +{ + struct raid_data *rd; + + BUG_ON(class_get_devdata(cdev)); + + rd = kmalloc(sizeof(*rd), GFP_KERNEL); + if (!rd) + return -ENOMEM; + + memset(rd, 0, sizeof(*rd)); + INIT_LIST_HEAD(&rd->component_list); + class_set_devdata(cdev, rd); + + return 0; +} + +static int raid_remove(struct transport_container *tc, struct device *dev, + struct class_device *cdev) +{ + struct raid_data *rd = class_get_devdata(cdev); + struct raid_component *rc, *next; + class_set_devdata(cdev, NULL); + list_for_each_entry_safe(rc, next, &rd->component_list, node) { + char buf[40]; + snprintf(buf, sizeof(buf), "component-%d", rc->num); + list_del(&rc->node); + sysfs_remove_link(&cdev->kobj, buf); + kfree(rc); + } + kfree(class_get_devdata(cdev)); + return 0; +} + +static DECLARE_TRANSPORT_CLASS(raid_class, + "raid_devices", + raid_setup, + raid_remove, + NULL); + +static struct { + enum raid_state value; + char *name; +} raid_states[] = { + { RAID_ACTIVE, "active" }, + { RAID_DEGRADED, "degraded" }, + { RAID_RESYNCING, "resyncing" }, + { RAID_OFFLINE, "offline" }, +}; + +static const char *raid_state_name(enum raid_state state) +{ + int i; + char *name = NULL; + + for (i = 0; i < sizeof(raid_states)/sizeof(raid_states[0]); i++) { + if (raid_states[i].value == state) { + name = raid_states[i].name; + break; + } + } + return name; +} + + +#define raid_attr_show_internal(attr, fmt, var, code) \ +static ssize_t raid_show_##attr(struct class_device *cdev, char *buf) \ +{ \ + struct raid_data *rd = class_get_devdata(cdev); \ + code \ + return snprintf(buf, 20, #fmt "\n", var); \ +} + +#define raid_attr_ro_states(attr, states, code) \ +raid_attr_show_internal(attr, %s, name, \ + const char *name; \ + code \ + name = raid_##states##_name(rd->attr); \ +) \ +static CLASS_DEVICE_ATTR(attr, S_IRUGO, raid_show_##attr, NULL) + + +#define raid_attr_ro_internal(attr, code) \ +raid_attr_show_internal(attr, %d, rd->attr, code) \ +static CLASS_DEVICE_ATTR(attr, S_IRUGO, raid_show_##attr, NULL) + +#define ATTR_CODE(attr) \ + struct raid_internal *i = class_device_to_raid_internal(cdev); \ + if (i->f->get_##attr) \ + i->f->get_##attr(cdev->dev); + +#define raid_attr_ro(attr) raid_attr_ro_internal(attr, ) +#define raid_attr_ro_fn(attr) raid_attr_ro_internal(attr, ATTR_CODE(attr)) +#define raid_attr_ro_state(attr) raid_attr_ro_states(attr, attr, ATTR_CODE(attr)) + +raid_attr_ro(level); +raid_attr_ro_fn(resync); +raid_attr_ro_state(state); + +void raid_component_add(struct raid_template *r,struct device *raid_dev, + struct device *component_dev) +{ + struct class_device *cdev = + attribute_container_find_class_device(&r->raid_attrs.ac, + raid_dev); + struct raid_component *rc; + struct raid_data *rd = class_get_devdata(cdev); + char buf[40]; + + rc = kmalloc(sizeof(*rc), GFP_KERNEL); + if (!rc) + return; + + INIT_LIST_HEAD(&rc->node); + rc->dev = component_dev; + rc->num = rd->component_count++; + + snprintf(buf, sizeof(buf), "component-%d", rc->num); + list_add_tail(&rc->node, &rd->component_list); + sysfs_create_link(&cdev->kobj, &component_dev->kobj, buf); +} +EXPORT_SYMBOL(raid_component_add); + +struct raid_template * +raid_class_attach(struct raid_function_template *ft) +{ + struct raid_internal *i = kmalloc(sizeof(struct raid_internal), + GFP_KERNEL); + int count = 0; + + if (unlikely(!i)) + return NULL; + + memset(i, 0, sizeof(*i)); + + i->f = ft; + + i->r.raid_attrs.ac.class = &raid_class.class; + i->r.raid_attrs.ac.match = raid_match; + i->r.raid_attrs.ac.attrs = &i->attrs[0]; + + attribute_container_register(&i->r.raid_attrs.ac); + + i->attrs[count++] = &class_device_attr_level; + i->attrs[count++] = &class_device_attr_resync; + i->attrs[count++] = &class_device_attr_state; + + i->attrs[count] = NULL; + BUG_ON(count > RAID_NUM_ATTRS); + + return &i->r; +} +EXPORT_SYMBOL(raid_class_attach); + +void +raid_class_release(struct raid_template *r) +{ + struct raid_internal *i = to_raid_internal(r); + + attribute_container_unregister(&i->r.raid_attrs.ac); + + kfree(i); +} +EXPORT_SYMBOL(raid_class_release); + +static __init int raid_init(void) +{ + return transport_class_register(&raid_class); +} + +static __exit void raid_exit(void) +{ + transport_class_unregister(&raid_class); +} + +MODULE_AUTHOR("James Bottomley"); +MODULE_DESCRIPTION("RAID device class"); +MODULE_LICENSE("GPL"); + +module_init(raid_init); +module_exit(raid_exit); + diff --git a/include/linux/raid_class.h b/include/linux/raid_class.h new file mode 100644 index 000000000000..a71123c28272 --- /dev/null +++ b/include/linux/raid_class.h @@ -0,0 +1,59 @@ +/* + */ +#include + +struct raid_template { + struct transport_container raid_attrs; +}; + +struct raid_function_template { + void *cookie; + int (*is_raid)(struct device *); + void (*get_resync)(struct device *); + void (*get_state)(struct device *); +}; + +enum raid_state { + RAID_ACTIVE = 1, + RAID_DEGRADED, + RAID_RESYNCING, + RAID_OFFLINE, +}; + +struct raid_data { + struct list_head component_list; + int component_count; + int level; + enum raid_state state; + int resync; +}; + +#define DEFINE_RAID_ATTRIBUTE(type, attr) \ +static inline void \ +raid_set_##attr(struct raid_template *r, struct device *dev, type value) { \ + struct class_device *cdev = \ + attribute_container_find_class_device(&r->raid_attrs.ac, dev);\ + struct raid_data *rd; \ + BUG_ON(!cdev); \ + rd = class_get_devdata(cdev); \ + rd->attr = value; \ +} \ +static inline type \ +raid_get_##attr(struct raid_template *r, struct device *dev) { \ + struct class_device *cdev = \ + attribute_container_find_class_device(&r->raid_attrs.ac, dev);\ + struct raid_data *rd; \ + BUG_ON(!cdev); \ + rd = class_get_devdata(cdev); \ + return rd->attr; \ +} + +DEFINE_RAID_ATTRIBUTE(int, level) +DEFINE_RAID_ATTRIBUTE(int, resync) +DEFINE_RAID_ATTRIBUTE(enum raid_state, state) + +struct raid_template *raid_class_attach(struct raid_function_template *); +void raid_class_release(struct raid_template *); + +void raid_component_add(struct raid_template *, struct device *, + struct device *); -- cgit v1.2.3-71-gd317