From 492dfb489658dfe4a755fa29dd0e34e9c8bd8fb8 Mon Sep 17 00:00:00 2001
From: James Bottomley <James.Bottomley@steeleye.com>
Date: Wed, 30 Aug 2006 15:48:45 -0400
Subject: [SCSI] block: add support for shared tag maps

The current block queue implementation already contains most of the
machinery for shared tag maps.  The only remaining pieces are a way to
allocate and destroy a tag map independently of the queues (so that
the maps can be managed on the life cycle of the overseeing entity)

Acked-by: Jens Axboe <axboe@kernel.dk>
Signed-off-by: James Bottomley <James.Bottomley@SteelEye.com>
---
 include/linux/blkdev.h | 2 ++
 1 file changed, 2 insertions(+)

(limited to 'include/linux/blkdev.h')

diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h
index aafe82788b4e..427b0d61be6c 100644
--- a/include/linux/blkdev.h
+++ b/include/linux/blkdev.h
@@ -746,6 +746,8 @@ extern void blk_queue_free_tags(request_queue_t *);
 extern int blk_queue_resize_tags(request_queue_t *, int);
 extern void blk_queue_invalidate_tags(request_queue_t *);
 extern long blk_congestion_wait(int rw, long timeout);
+extern struct blk_queue_tag *blk_init_tags(int);
+extern void blk_free_tags(struct blk_queue_tag *);
 
 extern void blk_rq_bio_prep(request_queue_t *, struct request *, struct bio *);
 extern int blkdev_issue_flush(struct block_device *, sector_t *);
-- 
cgit v1.2.3-71-gd317


From 275a082fe9308e710324e26ccb5363c53d8fd45f Mon Sep 17 00:00:00 2001
From: Trond Myklebust <Trond.Myklebust@netapp.com>
Date: Tue, 22 Aug 2006 20:06:24 -0400
Subject: Add a real API for dealing with blk_congestion_wait()

Signed-off-by: Trond Myklebust <Trond.Myklebust@netapp.com>
---
 block/ll_rw_blk.c         | 12 ++++++++++++
 fs/nfs/write.c            |  1 +
 include/linux/blkdev.h    |  1 +
 include/linux/writeback.h |  1 +
 mm/page-writeback.c       |  9 +++++++++
 5 files changed, 24 insertions(+)

(limited to 'include/linux/blkdev.h')

diff --git a/block/ll_rw_blk.c b/block/ll_rw_blk.c
index ddd9253f9d55..dcbd6ff1fa33 100644
--- a/block/ll_rw_blk.c
+++ b/block/ll_rw_blk.c
@@ -2734,6 +2734,18 @@ long blk_congestion_wait(int rw, long timeout)
 
 EXPORT_SYMBOL(blk_congestion_wait);
 
+/**
+ * blk_congestion_end - wake up sleepers on a congestion queue
+ * @rw: READ or WRITE
+ */
+void blk_congestion_end(int rw)
+{
+	wait_queue_head_t *wqh = &congestion_wqh[rw];
+
+	if (waitqueue_active(wqh))
+		wake_up(wqh);
+}
+
 /*
  * Has to be called with the request spinlock acquired
  */
diff --git a/fs/nfs/write.c b/fs/nfs/write.c
index 453d44666ea5..38ba5c09af08 100644
--- a/fs/nfs/write.c
+++ b/fs/nfs/write.c
@@ -396,6 +396,7 @@ int nfs_writepages(struct address_space *mapping, struct writeback_control *wbc)
 out:
 	clear_bit(BDI_write_congested, &bdi->state);
 	wake_up_all(&nfs_write_congestion);
+	writeback_congestion_end();
 	return err;
 }
 
diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h
index aafe82788b4e..96c9040c00a8 100644
--- a/include/linux/blkdev.h
+++ b/include/linux/blkdev.h
@@ -746,6 +746,7 @@ extern void blk_queue_free_tags(request_queue_t *);
 extern int blk_queue_resize_tags(request_queue_t *, int);
 extern void blk_queue_invalidate_tags(request_queue_t *);
 extern long blk_congestion_wait(int rw, long timeout);
+extern void blk_congestion_end(int rw);
 
 extern void blk_rq_bio_prep(request_queue_t *, struct request *, struct bio *);
 extern int blkdev_issue_flush(struct block_device *, sector_t *);
diff --git a/include/linux/writeback.h b/include/linux/writeback.h
index 9e38b566d0e7..0422036af4eb 100644
--- a/include/linux/writeback.h
+++ b/include/linux/writeback.h
@@ -85,6 +85,7 @@ int wakeup_pdflush(long nr_pages);
 void laptop_io_completion(void);
 void laptop_sync_completion(void);
 void throttle_vm_writeout(void);
+void writeback_congestion_end(void);
 
 /* These are exported to sysctl. */
 extern int dirty_background_ratio;
diff --git a/mm/page-writeback.c b/mm/page-writeback.c
index e630188ccc40..77a0bc4e261a 100644
--- a/mm/page-writeback.c
+++ b/mm/page-writeback.c
@@ -802,6 +802,15 @@ int test_set_page_writeback(struct page *page)
 }
 EXPORT_SYMBOL(test_set_page_writeback);
 
+/*
+ * Wakes up tasks that are being throttled due to writeback congestion
+ */
+void writeback_congestion_end(void)
+{
+	blk_congestion_end(WRITE);
+}
+EXPORT_SYMBOL(writeback_congestion_end);
+
 /*
  * Return true if any of the pages in the mapping are marged with the
  * passed tag.
-- 
cgit v1.2.3-71-gd317


From 6c5c934153513dc72e2d6464f39e8ef1f27c0a3e Mon Sep 17 00:00:00 2001
From: Alexey Dobriyan <adobriyan@gmail.com>
Date: Fri, 29 Sep 2006 01:59:40 -0700
Subject: [PATCH] ifdef blktrace debugging fields

Signed-off-by: Alexey Dobriyan <adobriyan@gmail.com>
Acked-by: Jens Axboe <axboe@suse.de>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 block/blktrace.c       | 6 ++++--
 block/ll_rw_blk.c      | 3 +--
 include/linux/blkdev.h | 4 ++--
 include/linux/sched.h  | 3 ++-
 kernel/fork.c          | 2 ++
 5 files changed, 11 insertions(+), 7 deletions(-)

(limited to 'include/linux/blkdev.h')

diff --git a/block/blktrace.c b/block/blktrace.c
index 2b4ef2b89b8d..8ff33441d8a2 100644
--- a/block/blktrace.c
+++ b/block/blktrace.c
@@ -450,8 +450,10 @@ int blk_trace_ioctl(struct block_device *bdev, unsigned cmd, char __user *arg)
  **/
 void blk_trace_shutdown(request_queue_t *q)
 {
-	blk_trace_startstop(q, 0);
-	blk_trace_remove(q);
+	if (q->blk_trace) {
+		blk_trace_startstop(q, 0);
+		blk_trace_remove(q);
+	}
 }
 
 /*
diff --git a/block/ll_rw_blk.c b/block/ll_rw_blk.c
index 9c3a06bcb7ba..51dc0edf76e0 100644
--- a/block/ll_rw_blk.c
+++ b/block/ll_rw_blk.c
@@ -1847,8 +1847,7 @@ static void blk_release_queue(struct kobject *kobj)
 	if (q->queue_tags)
 		__blk_queue_free_tags(q);
 
-	if (q->blk_trace)
-		blk_trace_shutdown(q);
+	blk_trace_shutdown(q);
 
 	kmem_cache_free(requestq_cachep, q);
 }
diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h
index c773ee545ebd..cfde8b3ee919 100644
--- a/include/linux/blkdev.h
+++ b/include/linux/blkdev.h
@@ -417,9 +417,9 @@ struct request_queue
 	unsigned int		sg_timeout;
 	unsigned int		sg_reserved_size;
 	int			node;
-
+#ifdef CONFIG_BLK_DEV_IO_TRACE
 	struct blk_trace	*blk_trace;
-
+#endif
 	/*
 	 * reserved for flush operations
 	 */
diff --git a/include/linux/sched.h b/include/linux/sched.h
index 27122575d902..3696f2f7126d 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -784,8 +784,9 @@ struct task_struct {
 	struct prio_array *array;
 
 	unsigned short ioprio;
+#ifdef CONFIG_BLK_DEV_IO_TRACE
 	unsigned int btrace_seq;
-
+#endif
 	unsigned long sleep_avg;
 	unsigned long long timestamp, last_ran;
 	unsigned long long sched_time; /* sched_clock time spent running */
diff --git a/kernel/fork.c b/kernel/fork.c
index 802b1cf0e63f..bca6ce6d3ded 100644
--- a/kernel/fork.c
+++ b/kernel/fork.c
@@ -183,7 +183,9 @@ static struct task_struct *dup_task_struct(struct task_struct *orig)
 	/* One for us, one for whoever does the "release_task()" (usually parent) */
 	atomic_set(&tsk->usage,2);
 	atomic_set(&tsk->fs_excl, 0);
+#ifdef CONFIG_BLK_DEV_IO_TRACE
 	tsk->btrace_seq = 0;
+#endif
 	tsk->splice_pipe = NULL;
 	return tsk;
 }
-- 
cgit v1.2.3-71-gd317


From 4aff5e2333c9a1609662f2091f55c3f6fffdad36 Mon Sep 17 00:00:00 2001
From: Jens Axboe <axboe@suse.de>
Date: Thu, 10 Aug 2006 08:44:47 +0200
Subject: [PATCH] Split struct request ->flags into two parts

Right now ->flags is a bit of a mess: some are request types, and
others are just modifiers. Clean this up by splitting it into
->cmd_type and ->cmd_flags. This allows introduction of generic
Linux block message types, useful for sending generic Linux commands
to block devices.

Signed-off-by: Jens Axboe <axboe@suse.de>
---
 block/as-iosched.c              |   2 +-
 block/elevator.c                |  26 +++---
 block/ll_rw_blk.c               | 101 ++++++++--------------
 block/scsi_ioctl.c              |   6 +-
 drivers/block/floppy.c          |   4 +-
 drivers/block/nbd.c             |   8 +-
 drivers/block/paride/pd.c       |   2 +-
 drivers/block/pktcdvd.c         |   6 +-
 drivers/block/xd.c              |   2 +-
 drivers/cdrom/cdrom.c           |   2 +-
 drivers/cdrom/cdu31a.c          |   4 +-
 drivers/ide/ide-cd.c            |  69 +++++++--------
 drivers/ide/ide-disk.c          |   5 +-
 drivers/ide/ide-dma.c           |   2 +-
 drivers/ide/ide-floppy.c        |  14 ++--
 drivers/ide/ide-io.c            |  36 ++++----
 drivers/ide/ide-lib.c           |   5 +-
 drivers/ide/ide-tape.c          |   8 +-
 drivers/ide/ide-taskfile.c      |   8 +-
 drivers/ide/ide.c               |   4 +-
 drivers/ide/legacy/hd.c         |   2 +-
 drivers/md/dm-emc.c             |   3 +-
 drivers/message/i2o/i2o_block.c |   7 +-
 drivers/mmc/mmc_queue.c         |   6 +-
 drivers/mtd/mtd_blkdevs.c       |   2 +-
 drivers/s390/block/dasd_diag.c  |   2 +-
 drivers/s390/block/dasd_eckd.c  |   2 +-
 drivers/s390/block/dasd_fba.c   |   2 +-
 drivers/scsi/aic7xxx_old.c      |   4 +-
 drivers/scsi/ide-scsi.c         |  14 ++--
 drivers/scsi/pluto.c            |   6 +-
 drivers/scsi/scsi_lib.c         |  37 +++++----
 drivers/scsi/sd.c               |   5 +-
 drivers/scsi/sun3_NCR5380.c     |   2 +-
 drivers/scsi/sun3_scsi.c        |   2 +-
 drivers/scsi/sun3_scsi_vme.c    |   2 +-
 include/linux/blkdev.h          | 180 ++++++++++++++++++++++------------------
 include/linux/blktrace_api.h    |   2 +-
 include/scsi/scsi_tcq.h         |   2 +-
 39 files changed, 295 insertions(+), 301 deletions(-)

(limited to 'include/linux/blkdev.h')

diff --git a/block/as-iosched.c b/block/as-iosched.c
index 5da56d48fbd3..ad1cc4077819 100644
--- a/block/as-iosched.c
+++ b/block/as-iosched.c
@@ -1335,7 +1335,7 @@ static void as_add_request(request_queue_t *q, struct request *rq)
 	arq->state = AS_RQ_NEW;
 
 	if (rq_data_dir(arq->request) == READ
-			|| (arq->request->flags & REQ_RW_SYNC))
+			|| (arq->request->cmd_flags & REQ_RW_SYNC))
 		arq->is_sync = 1;
 	else
 		arq->is_sync = 0;
diff --git a/block/elevator.c b/block/elevator.c
index 9b72dc7c8a5c..4ac97b642042 100644
--- a/block/elevator.c
+++ b/block/elevator.c
@@ -242,7 +242,7 @@ void elv_dispatch_sort(request_queue_t *q, struct request *rq)
 	list_for_each_prev(entry, &q->queue_head) {
 		struct request *pos = list_entry_rq(entry);
 
-		if (pos->flags & (REQ_SOFTBARRIER|REQ_HARDBARRIER|REQ_STARTED))
+		if (pos->cmd_flags & (REQ_SOFTBARRIER|REQ_HARDBARRIER|REQ_STARTED))
 			break;
 		if (rq->sector >= boundary) {
 			if (pos->sector < boundary)
@@ -313,7 +313,7 @@ void elv_requeue_request(request_queue_t *q, struct request *rq)
 			e->ops->elevator_deactivate_req_fn(q, rq);
 	}
 
-	rq->flags &= ~REQ_STARTED;
+	rq->cmd_flags &= ~REQ_STARTED;
 
 	elv_insert(q, rq, ELEVATOR_INSERT_REQUEUE);
 }
@@ -344,13 +344,13 @@ void elv_insert(request_queue_t *q, struct request *rq, int where)
 
 	switch (where) {
 	case ELEVATOR_INSERT_FRONT:
-		rq->flags |= REQ_SOFTBARRIER;
+		rq->cmd_flags |= REQ_SOFTBARRIER;
 
 		list_add(&rq->queuelist, &q->queue_head);
 		break;
 
 	case ELEVATOR_INSERT_BACK:
-		rq->flags |= REQ_SOFTBARRIER;
+		rq->cmd_flags |= REQ_SOFTBARRIER;
 		elv_drain_elevator(q);
 		list_add_tail(&rq->queuelist, &q->queue_head);
 		/*
@@ -369,7 +369,7 @@ void elv_insert(request_queue_t *q, struct request *rq, int where)
 
 	case ELEVATOR_INSERT_SORT:
 		BUG_ON(!blk_fs_request(rq));
-		rq->flags |= REQ_SORTED;
+		rq->cmd_flags |= REQ_SORTED;
 		q->nr_sorted++;
 		if (q->last_merge == NULL && rq_mergeable(rq))
 			q->last_merge = rq;
@@ -387,7 +387,7 @@ void elv_insert(request_queue_t *q, struct request *rq, int where)
 		 * insertion; otherwise, requests should be requeued
 		 * in ordseq order.
 		 */
-		rq->flags |= REQ_SOFTBARRIER;
+		rq->cmd_flags |= REQ_SOFTBARRIER;
 
 		if (q->ordseq == 0) {
 			list_add(&rq->queuelist, &q->queue_head);
@@ -429,9 +429,9 @@ void __elv_add_request(request_queue_t *q, struct request *rq, int where,
 		       int plug)
 {
 	if (q->ordcolor)
-		rq->flags |= REQ_ORDERED_COLOR;
+		rq->cmd_flags |= REQ_ORDERED_COLOR;
 
-	if (rq->flags & (REQ_SOFTBARRIER | REQ_HARDBARRIER)) {
+	if (rq->cmd_flags & (REQ_SOFTBARRIER | REQ_HARDBARRIER)) {
 		/*
 		 * toggle ordered color
 		 */
@@ -452,7 +452,7 @@ void __elv_add_request(request_queue_t *q, struct request *rq, int where,
 			q->end_sector = rq_end_sector(rq);
 			q->boundary_rq = rq;
 		}
-	} else if (!(rq->flags & REQ_ELVPRIV) && where == ELEVATOR_INSERT_SORT)
+	} else if (!(rq->cmd_flags & REQ_ELVPRIV) && where == ELEVATOR_INSERT_SORT)
 		where = ELEVATOR_INSERT_BACK;
 
 	if (plug)
@@ -493,7 +493,7 @@ struct request *elv_next_request(request_queue_t *q)
 	int ret;
 
 	while ((rq = __elv_next_request(q)) != NULL) {
-		if (!(rq->flags & REQ_STARTED)) {
+		if (!(rq->cmd_flags & REQ_STARTED)) {
 			elevator_t *e = q->elevator;
 
 			/*
@@ -510,7 +510,7 @@ struct request *elv_next_request(request_queue_t *q)
 			 * it, a request that has been delayed should
 			 * not be passed by new incoming requests
 			 */
-			rq->flags |= REQ_STARTED;
+			rq->cmd_flags |= REQ_STARTED;
 			blk_add_trace_rq(q, rq, BLK_TA_ISSUE);
 		}
 
@@ -519,7 +519,7 @@ struct request *elv_next_request(request_queue_t *q)
 			q->boundary_rq = NULL;
 		}
 
-		if ((rq->flags & REQ_DONTPREP) || !q->prep_rq_fn)
+		if ((rq->cmd_flags & REQ_DONTPREP) || !q->prep_rq_fn)
 			break;
 
 		ret = q->prep_rq_fn(q, rq);
@@ -541,7 +541,7 @@ struct request *elv_next_request(request_queue_t *q)
 				nr_bytes = rq->data_len;
 
 			blkdev_dequeue_request(rq);
-			rq->flags |= REQ_QUIET;
+			rq->cmd_flags |= REQ_QUIET;
 			end_that_request_chunk(rq, 0, nr_bytes);
 			end_that_request_last(rq, 0);
 		} else {
diff --git a/block/ll_rw_blk.c b/block/ll_rw_blk.c
index 51dc0edf76e0..9b91bb70c5ed 100644
--- a/block/ll_rw_blk.c
+++ b/block/ll_rw_blk.c
@@ -382,8 +382,8 @@ unsigned blk_ordered_req_seq(struct request *rq)
 	if (rq == &q->post_flush_rq)
 		return QUEUE_ORDSEQ_POSTFLUSH;
 
-	if ((rq->flags & REQ_ORDERED_COLOR) ==
-	    (q->orig_bar_rq->flags & REQ_ORDERED_COLOR))
+	if ((rq->cmd_flags & REQ_ORDERED_COLOR) ==
+	    (q->orig_bar_rq->cmd_flags & REQ_ORDERED_COLOR))
 		return QUEUE_ORDSEQ_DRAIN;
 	else
 		return QUEUE_ORDSEQ_DONE;
@@ -446,8 +446,8 @@ static void queue_flush(request_queue_t *q, unsigned which)
 		end_io = post_flush_end_io;
 	}
 
+	rq->cmd_flags = REQ_HARDBARRIER;
 	rq_init(q, rq);
-	rq->flags = REQ_HARDBARRIER;
 	rq->elevator_private = NULL;
 	rq->rq_disk = q->bar_rq.rq_disk;
 	rq->rl = NULL;
@@ -471,9 +471,11 @@ static inline struct request *start_ordered(request_queue_t *q,
 	blkdev_dequeue_request(rq);
 	q->orig_bar_rq = rq;
 	rq = &q->bar_rq;
+	rq->cmd_flags = 0;
 	rq_init(q, rq);
-	rq->flags = bio_data_dir(q->orig_bar_rq->bio);
-	rq->flags |= q->ordered & QUEUE_ORDERED_FUA ? REQ_FUA : 0;
+	if (bio_data_dir(q->orig_bar_rq->bio) == WRITE)
+		rq->cmd_flags |= REQ_RW;
+	rq->cmd_flags |= q->ordered & QUEUE_ORDERED_FUA ? REQ_FUA : 0;
 	rq->elevator_private = NULL;
 	rq->rl = NULL;
 	init_request_from_bio(rq, q->orig_bar_rq->bio);
@@ -1124,7 +1126,7 @@ void blk_queue_end_tag(request_queue_t *q, struct request *rq)
 	}
 
 	list_del_init(&rq->queuelist);
-	rq->flags &= ~REQ_QUEUED;
+	rq->cmd_flags &= ~REQ_QUEUED;
 	rq->tag = -1;
 
 	if (unlikely(bqt->tag_index[tag] == NULL))
@@ -1160,7 +1162,7 @@ int blk_queue_start_tag(request_queue_t *q, struct request *rq)
 	struct blk_queue_tag *bqt = q->queue_tags;
 	int tag;
 
-	if (unlikely((rq->flags & REQ_QUEUED))) {
+	if (unlikely((rq->cmd_flags & REQ_QUEUED))) {
 		printk(KERN_ERR 
 		       "%s: request %p for device [%s] already tagged %d",
 		       __FUNCTION__, rq,
@@ -1174,7 +1176,7 @@ int blk_queue_start_tag(request_queue_t *q, struct request *rq)
 
 	__set_bit(tag, bqt->tag_map);
 
-	rq->flags |= REQ_QUEUED;
+	rq->cmd_flags |= REQ_QUEUED;
 	rq->tag = tag;
 	bqt->tag_index[tag] = rq;
 	blkdev_dequeue_request(rq);
@@ -1210,65 +1212,31 @@ void blk_queue_invalidate_tags(request_queue_t *q)
 			printk(KERN_ERR
 			       "%s: bad tag found on list\n", __FUNCTION__);
 			list_del_init(&rq->queuelist);
-			rq->flags &= ~REQ_QUEUED;
+			rq->cmd_flags &= ~REQ_QUEUED;
 		} else
 			blk_queue_end_tag(q, rq);
 
-		rq->flags &= ~REQ_STARTED;
+		rq->cmd_flags &= ~REQ_STARTED;
 		__elv_add_request(q, rq, ELEVATOR_INSERT_BACK, 0);
 	}
 }
 
 EXPORT_SYMBOL(blk_queue_invalidate_tags);
 
-static const char * const rq_flags[] = {
-	"REQ_RW",
-	"REQ_FAILFAST",
-	"REQ_SORTED",
-	"REQ_SOFTBARRIER",
-	"REQ_HARDBARRIER",
-	"REQ_FUA",
-	"REQ_CMD",
-	"REQ_NOMERGE",
-	"REQ_STARTED",
-	"REQ_DONTPREP",
-	"REQ_QUEUED",
-	"REQ_ELVPRIV",
-	"REQ_PC",
-	"REQ_BLOCK_PC",
-	"REQ_SENSE",
-	"REQ_FAILED",
-	"REQ_QUIET",
-	"REQ_SPECIAL",
-	"REQ_DRIVE_CMD",
-	"REQ_DRIVE_TASK",
-	"REQ_DRIVE_TASKFILE",
-	"REQ_PREEMPT",
-	"REQ_PM_SUSPEND",
-	"REQ_PM_RESUME",
-	"REQ_PM_SHUTDOWN",
-	"REQ_ORDERED_COLOR",
-};
-
 void blk_dump_rq_flags(struct request *rq, char *msg)
 {
 	int bit;
 
-	printk("%s: dev %s: flags = ", msg,
-		rq->rq_disk ? rq->rq_disk->disk_name : "?");
-	bit = 0;
-	do {
-		if (rq->flags & (1 << bit))
-			printk("%s ", rq_flags[bit]);
-		bit++;
-	} while (bit < __REQ_NR_BITS);
+	printk("%s: dev %s: type=%x, flags=%x\n", msg,
+		rq->rq_disk ? rq->rq_disk->disk_name : "?", rq->cmd_type,
+		rq->cmd_flags);
 
 	printk("\nsector %llu, nr/cnr %lu/%u\n", (unsigned long long)rq->sector,
 						       rq->nr_sectors,
 						       rq->current_nr_sectors);
 	printk("bio %p, biotail %p, buffer %p, data %p, len %u\n", rq->bio, rq->biotail, rq->buffer, rq->data, rq->data_len);
 
-	if (rq->flags & (REQ_BLOCK_PC | REQ_PC)) {
+	if (blk_pc_request(rq)) {
 		printk("cdb: ");
 		for (bit = 0; bit < sizeof(rq->cmd); bit++)
 			printk("%02x ", rq->cmd[bit]);
@@ -1441,7 +1409,7 @@ static inline int ll_new_mergeable(request_queue_t *q,
 	int nr_phys_segs = bio_phys_segments(q, bio);
 
 	if (req->nr_phys_segments + nr_phys_segs > q->max_phys_segments) {
-		req->flags |= REQ_NOMERGE;
+		req->cmd_flags |= REQ_NOMERGE;
 		if (req == q->last_merge)
 			q->last_merge = NULL;
 		return 0;
@@ -1464,7 +1432,7 @@ static inline int ll_new_hw_segment(request_queue_t *q,
 
 	if (req->nr_hw_segments + nr_hw_segs > q->max_hw_segments
 	    || req->nr_phys_segments + nr_phys_segs > q->max_phys_segments) {
-		req->flags |= REQ_NOMERGE;
+		req->cmd_flags |= REQ_NOMERGE;
 		if (req == q->last_merge)
 			q->last_merge = NULL;
 		return 0;
@@ -1491,7 +1459,7 @@ static int ll_back_merge_fn(request_queue_t *q, struct request *req,
 		max_sectors = q->max_sectors;
 
 	if (req->nr_sectors + bio_sectors(bio) > max_sectors) {
-		req->flags |= REQ_NOMERGE;
+		req->cmd_flags |= REQ_NOMERGE;
 		if (req == q->last_merge)
 			q->last_merge = NULL;
 		return 0;
@@ -1530,7 +1498,7 @@ static int ll_front_merge_fn(request_queue_t *q, struct request *req,
 
 
 	if (req->nr_sectors + bio_sectors(bio) > max_sectors) {
-		req->flags |= REQ_NOMERGE;
+		req->cmd_flags |= REQ_NOMERGE;
 		if (req == q->last_merge)
 			q->last_merge = NULL;
 		return 0;
@@ -2029,7 +1997,7 @@ EXPORT_SYMBOL(blk_get_queue);
 
 static inline void blk_free_request(request_queue_t *q, struct request *rq)
 {
-	if (rq->flags & REQ_ELVPRIV)
+	if (rq->cmd_flags & REQ_ELVPRIV)
 		elv_put_request(q, rq);
 	mempool_free(rq, q->rq.rq_pool);
 }
@@ -2044,17 +2012,17 @@ blk_alloc_request(request_queue_t *q, int rw, struct bio *bio,
 		return NULL;
 
 	/*
-	 * first three bits are identical in rq->flags and bio->bi_rw,
+	 * first three bits are identical in rq->cmd_flags and bio->bi_rw,
 	 * see bio.h and blkdev.h
 	 */
-	rq->flags = rw;
+	rq->cmd_flags = rw;
 
 	if (priv) {
 		if (unlikely(elv_set_request(q, rq, bio, gfp_mask))) {
 			mempool_free(rq, q->rq.rq_pool);
 			return NULL;
 		}
-		rq->flags |= REQ_ELVPRIV;
+		rq->cmd_flags |= REQ_ELVPRIV;
 	}
 
 	return rq;
@@ -2351,7 +2319,8 @@ void blk_insert_request(request_queue_t *q, struct request *rq,
 	 * must not attempt merges on this) and that it acts as a soft
 	 * barrier
 	 */
-	rq->flags |= REQ_SPECIAL | REQ_SOFTBARRIER;
+	rq->cmd_type = REQ_TYPE_SPECIAL;
+	rq->cmd_flags |= REQ_SOFTBARRIER;
 
 	rq->special = data;
 
@@ -2558,7 +2527,7 @@ void blk_execute_rq_nowait(request_queue_t *q, struct gendisk *bd_disk,
 	int where = at_head ? ELEVATOR_INSERT_FRONT : ELEVATOR_INSERT_BACK;
 
 	rq->rq_disk = bd_disk;
-	rq->flags |= REQ_NOMERGE;
+	rq->cmd_flags |= REQ_NOMERGE;
 	rq->end_io = done;
 	WARN_ON(irqs_disabled());
 	spin_lock_irq(q->queue_lock);
@@ -2728,7 +2697,7 @@ void __blk_put_request(request_queue_t *q, struct request *req)
 	 */
 	if (rl) {
 		int rw = rq_data_dir(req);
-		int priv = req->flags & REQ_ELVPRIV;
+		int priv = req->cmd_flags & REQ_ELVPRIV;
 
 		BUG_ON(!list_empty(&req->queuelist));
 
@@ -2890,22 +2859,22 @@ static inline int attempt_front_merge(request_queue_t *q, struct request *rq)
 
 static void init_request_from_bio(struct request *req, struct bio *bio)
 {
-	req->flags |= REQ_CMD;
+	req->cmd_type = REQ_TYPE_FS;
 
 	/*
 	 * inherit FAILFAST from bio (for read-ahead, and explicit FAILFAST)
 	 */
 	if (bio_rw_ahead(bio) || bio_failfast(bio))
-		req->flags |= REQ_FAILFAST;
+		req->cmd_flags |= REQ_FAILFAST;
 
 	/*
 	 * REQ_BARRIER implies no merging, but lets make it explicit
 	 */
 	if (unlikely(bio_barrier(bio)))
-		req->flags |= (REQ_HARDBARRIER | REQ_NOMERGE);
+		req->cmd_flags |= (REQ_HARDBARRIER | REQ_NOMERGE);
 
 	if (bio_sync(bio))
-		req->flags |= REQ_RW_SYNC;
+		req->cmd_flags |= REQ_RW_SYNC;
 
 	req->errors = 0;
 	req->hard_sector = req->sector = bio->bi_sector;
@@ -3306,7 +3275,7 @@ static int __end_that_request_first(struct request *req, int uptodate,
 		req->errors = 0;
 
 	if (!uptodate) {
-		if (blk_fs_request(req) && !(req->flags & REQ_QUIET))
+		if (blk_fs_request(req) && !(req->cmd_flags & REQ_QUIET))
 			printk("end_request: I/O error, dev %s, sector %llu\n",
 				req->rq_disk ? req->rq_disk->disk_name : "?",
 				(unsigned long long)req->sector);
@@ -3569,8 +3538,8 @@ EXPORT_SYMBOL(end_request);
 
 void blk_rq_bio_prep(request_queue_t *q, struct request *rq, struct bio *bio)
 {
-	/* first two bits are identical in rq->flags and bio->bi_rw */
-	rq->flags |= (bio->bi_rw & 3);
+	/* first two bits are identical in rq->cmd_flags and bio->bi_rw */
+	rq->cmd_flags |= (bio->bi_rw & 3);
 
 	rq->nr_phys_segments = bio_phys_segments(q, bio);
 	rq->nr_hw_segments = bio_hw_segments(q, bio);
diff --git a/block/scsi_ioctl.c b/block/scsi_ioctl.c
index b33eda26e205..2dc326421a24 100644
--- a/block/scsi_ioctl.c
+++ b/block/scsi_ioctl.c
@@ -294,7 +294,7 @@ static int sg_io(struct file *file, request_queue_t *q,
 	rq->sense = sense;
 	rq->sense_len = 0;
 
-	rq->flags |= REQ_BLOCK_PC;
+	rq->cmd_type = REQ_TYPE_BLOCK_PC;
 	bio = rq->bio;
 
 	/*
@@ -470,7 +470,7 @@ int sg_scsi_ioctl(struct file *file, struct request_queue *q,
 	memset(sense, 0, sizeof(sense));
 	rq->sense = sense;
 	rq->sense_len = 0;
-	rq->flags |= REQ_BLOCK_PC;
+	rq->cmd_type = REQ_TYPE_BLOCK_PC;
 
 	blk_execute_rq(q, disk, rq, 0);
 
@@ -502,7 +502,7 @@ static int __blk_send_generic(request_queue_t *q, struct gendisk *bd_disk, int c
 	int err;
 
 	rq = blk_get_request(q, WRITE, __GFP_WAIT);
-	rq->flags |= REQ_BLOCK_PC;
+	rq->cmd_type = REQ_TYPE_BLOCK_PC;
 	rq->data = NULL;
 	rq->data_len = 0;
 	rq->timeout = BLK_DEFAULT_TIMEOUT;
diff --git a/drivers/block/floppy.c b/drivers/block/floppy.c
index ad1d7065a1b2..629c5769d994 100644
--- a/drivers/block/floppy.c
+++ b/drivers/block/floppy.c
@@ -2991,8 +2991,8 @@ static void do_fd_request(request_queue_t * q)
 	if (usage_count == 0) {
 		printk("warning: usage count=0, current_req=%p exiting\n",
 		       current_req);
-		printk("sect=%ld flags=%lx\n", (long)current_req->sector,
-		       current_req->flags);
+		printk("sect=%ld type=%x flags=%x\n", (long)current_req->sector,
+		       current_req->cmd_type, current_req->cmd_flags);
 		return;
 	}
 	if (test_bit(0, &fdc_busy)) {
diff --git a/drivers/block/nbd.c b/drivers/block/nbd.c
index bdbade9a5cf5..9d1035e8d9d8 100644
--- a/drivers/block/nbd.c
+++ b/drivers/block/nbd.c
@@ -407,10 +407,10 @@ static void do_nbd_request(request_queue_t * q)
 		struct nbd_device *lo;
 
 		blkdev_dequeue_request(req);
-		dprintk(DBG_BLKDEV, "%s: request %p: dequeued (flags=%lx)\n",
-				req->rq_disk->disk_name, req, req->flags);
+		dprintk(DBG_BLKDEV, "%s: request %p: dequeued (flags=%x)\n",
+				req->rq_disk->disk_name, req, req->cmd_type);
 
-		if (!(req->flags & REQ_CMD))
+		if (!blk_fs_request(req))
 			goto error_out;
 
 		lo = req->rq_disk->private_data;
@@ -489,7 +489,7 @@ static int nbd_ioctl(struct inode *inode, struct file *file,
 	switch (cmd) {
 	case NBD_DISCONNECT:
 	        printk(KERN_INFO "%s: NBD_DISCONNECT\n", lo->disk->disk_name);
-		sreq.flags = REQ_SPECIAL;
+		sreq.cmd_type = REQ_TYPE_SPECIAL;
 		nbd_cmd(&sreq) = NBD_CMD_DISC;
 		/*
 		 * Set these to sane values in case server implementation
diff --git a/drivers/block/paride/pd.c b/drivers/block/paride/pd.c
index 2403721f9db1..12ff1a274d91 100644
--- a/drivers/block/paride/pd.c
+++ b/drivers/block/paride/pd.c
@@ -437,7 +437,7 @@ static char *pd_buf;		/* buffer for request in progress */
 
 static enum action do_pd_io_start(void)
 {
-	if (pd_req->flags & REQ_SPECIAL) {
+	if (blk_special_request(pd_req)) {
 		phase = pd_special;
 		return pd_special();
 	}
diff --git a/drivers/block/pktcdvd.c b/drivers/block/pktcdvd.c
index 451b996bba91..42891d2b054e 100644
--- a/drivers/block/pktcdvd.c
+++ b/drivers/block/pktcdvd.c
@@ -365,16 +365,16 @@ static int pkt_generic_packet(struct pktcdvd_device *pd, struct packet_command *
 	rq->sense = sense;
 	memset(sense, 0, sizeof(sense));
 	rq->sense_len = 0;
-	rq->flags |= REQ_BLOCK_PC | REQ_HARDBARRIER;
+	rq->cmd_type = REQ_TYPE_BLOCK_PC;
+	rq->cmd_flags |= REQ_HARDBARRIER;
 	if (cgc->quiet)
-		rq->flags |= REQ_QUIET;
+		rq->cmd_flags |= REQ_QUIET;
 	memcpy(rq->cmd, cgc->cmd, CDROM_PACKET_SIZE);
 	if (sizeof(rq->cmd) > CDROM_PACKET_SIZE)
 		memset(rq->cmd + CDROM_PACKET_SIZE, 0, sizeof(rq->cmd) - CDROM_PACKET_SIZE);
 	rq->cmd_len = COMMAND_SIZE(rq->cmd[0]);
 
 	rq->ref_count++;
-	rq->flags |= REQ_NOMERGE;
 	rq->waiting = &wait;
 	rq->end_io = blk_end_sync_rq;
 	elv_add_request(q, rq, ELEVATOR_INSERT_BACK, 1);
diff --git a/drivers/block/xd.c b/drivers/block/xd.c
index e828e4cbd3e1..ebf3025721d1 100644
--- a/drivers/block/xd.c
+++ b/drivers/block/xd.c
@@ -313,7 +313,7 @@ static void do_xd_request (request_queue_t * q)
 		int res = 0;
 		int retry;
 
-		if (!(req->flags & REQ_CMD)) {
+		if (!blk_fs_request(req)) {
 			end_request(req, 0);
 			continue;
 		}
diff --git a/drivers/cdrom/cdrom.c b/drivers/cdrom/cdrom.c
index d239cf8b20bd..b38c84a7a8e3 100644
--- a/drivers/cdrom/cdrom.c
+++ b/drivers/cdrom/cdrom.c
@@ -2129,7 +2129,7 @@ static int cdrom_read_cdda_bpc(struct cdrom_device_info *cdi, __u8 __user *ubuf,
 		rq->cmd[9] = 0xf8;
 
 		rq->cmd_len = 12;
-		rq->flags |= REQ_BLOCK_PC;
+		rq->cmd_type = REQ_TYPE_BLOCK_PC;
 		rq->timeout = 60 * HZ;
 		bio = rq->bio;
 
diff --git a/drivers/cdrom/cdu31a.c b/drivers/cdrom/cdu31a.c
index 37bdb0163f0d..ccd91c1a84bd 100644
--- a/drivers/cdrom/cdu31a.c
+++ b/drivers/cdrom/cdu31a.c
@@ -1338,8 +1338,10 @@ static void do_cdu31a_request(request_queue_t * q)
 		}
 
 		/* WTF??? */
-		if (!(req->flags & REQ_CMD))
+		if (!blk_fs_request(req)) {
+			end_request(req, 0);
 			continue;
+		}
 		if (rq_data_dir(req) == WRITE) {
 			end_request(req, 0);
 			continue;
diff --git a/drivers/ide/ide-cd.c b/drivers/ide/ide-cd.c
index 654d4cd09847..69bbb6206a00 100644
--- a/drivers/ide/ide-cd.c
+++ b/drivers/ide/ide-cd.c
@@ -372,7 +372,7 @@ static int cdrom_log_sense(ide_drive_t *drive, struct request *rq,
 {
 	int log = 0;
 
-	if (!sense || !rq || (rq->flags & REQ_QUIET))
+	if (!sense || !rq || (rq->cmd_flags & REQ_QUIET))
 		return 0;
 
 	switch (sense->sense_key) {
@@ -597,7 +597,7 @@ static void cdrom_prepare_request(ide_drive_t *drive, struct request *rq)
 	struct cdrom_info *cd = drive->driver_data;
 
 	ide_init_drive_cmd(rq);
-	rq->flags = REQ_PC;
+	rq->cmd_type = REQ_TYPE_BLOCK_PC;
 	rq->rq_disk = cd->disk;
 }
 
@@ -617,7 +617,7 @@ static void cdrom_queue_request_sense(ide_drive_t *drive, void *sense,
 	rq->cmd[0] = GPCMD_REQUEST_SENSE;
 	rq->cmd[4] = rq->data_len = 18;
 
-	rq->flags = REQ_SENSE;
+	rq->cmd_type = REQ_TYPE_SENSE;
 
 	/* NOTE! Save the failed command in "rq->buffer" */
 	rq->buffer = (void *) failed_command;
@@ -630,10 +630,10 @@ static void cdrom_end_request (ide_drive_t *drive, int uptodate)
 	struct request *rq = HWGROUP(drive)->rq;
 	int nsectors = rq->hard_cur_sectors;
 
-	if ((rq->flags & REQ_SENSE) && uptodate) {
+	if (blk_sense_request(rq) && uptodate) {
 		/*
-		 * For REQ_SENSE, "rq->buffer" points to the original failed
-		 * request
+		 * For REQ_TYPE_SENSE, "rq->buffer" points to the original
+		 * failed request
 		 */
 		struct request *failed = (struct request *) rq->buffer;
 		struct cdrom_info *info = drive->driver_data;
@@ -706,17 +706,17 @@ static int cdrom_decode_status(ide_drive_t *drive, int good_stat, int *stat_ret)
 		return 1;
 	}
 
-	if (rq->flags & REQ_SENSE) {
+	if (blk_sense_request(rq)) {
 		/* We got an error trying to get sense info
 		   from the drive (probably while trying
 		   to recover from a former error).  Just give up. */
 
-		rq->flags |= REQ_FAILED;
+		rq->cmd_flags |= REQ_FAILED;
 		cdrom_end_request(drive, 0);
 		ide_error(drive, "request sense failure", stat);
 		return 1;
 
-	} else if (rq->flags & (REQ_PC | REQ_BLOCK_PC)) {
+	} else if (blk_pc_request(rq)) {
 		/* All other functions, except for READ. */
 		unsigned long flags;
 
@@ -724,7 +724,7 @@ static int cdrom_decode_status(ide_drive_t *drive, int good_stat, int *stat_ret)
 		 * if we have an error, pass back CHECK_CONDITION as the
 		 * scsi status byte
 		 */
-		if ((rq->flags & REQ_BLOCK_PC) && !rq->errors)
+		if (!rq->errors)
 			rq->errors = SAM_STAT_CHECK_CONDITION;
 
 		/* Check for tray open. */
@@ -735,12 +735,12 @@ static int cdrom_decode_status(ide_drive_t *drive, int good_stat, int *stat_ret)
 			cdrom_saw_media_change (drive);
 			/*printk("%s: media changed\n",drive->name);*/
 			return 0;
-		} else if (!(rq->flags & REQ_QUIET)) {
+		} else if (!(rq->cmd_flags & REQ_QUIET)) {
 			/* Otherwise, print an error. */
 			ide_dump_status(drive, "packet command error", stat);
 		}
 		
-		rq->flags |= REQ_FAILED;
+		rq->cmd_flags |= REQ_FAILED;
 
 		/*
 		 * instead of playing games with moving completions around,
@@ -881,7 +881,7 @@ static int cdrom_timer_expiry(ide_drive_t *drive)
 			wait = ATAPI_WAIT_PC;
 			break;
 		default:
-			if (!(rq->flags & REQ_QUIET))
+			if (!(rq->cmd_flags & REQ_QUIET))
 				printk(KERN_INFO "ide-cd: cmd 0x%x timed out\n", rq->cmd[0]);
 			wait = 0;
 			break;
@@ -1124,7 +1124,7 @@ static ide_startstop_t cdrom_read_intr (ide_drive_t *drive)
 		if (rq->current_nr_sectors > 0) {
 			printk (KERN_ERR "%s: cdrom_read_intr: data underrun (%d blocks)\n",
 				drive->name, rq->current_nr_sectors);
-			rq->flags |= REQ_FAILED;
+			rq->cmd_flags |= REQ_FAILED;
 			cdrom_end_request(drive, 0);
 		} else
 			cdrom_end_request(drive, 1);
@@ -1456,7 +1456,7 @@ static ide_startstop_t cdrom_pc_intr (ide_drive_t *drive)
 			printk ("%s: cdrom_pc_intr: data underrun %d\n",
 				drive->name, pc->buflen);
 			*/
-			rq->flags |= REQ_FAILED;
+			rq->cmd_flags |= REQ_FAILED;
 			cdrom_end_request(drive, 0);
 		}
 		return ide_stopped;
@@ -1509,7 +1509,7 @@ static ide_startstop_t cdrom_pc_intr (ide_drive_t *drive)
 		rq->data += thislen;
 		rq->data_len -= thislen;
 
-		if (rq->flags & REQ_SENSE)
+		if (blk_sense_request(rq))
 			rq->sense_len += thislen;
 	} else {
 confused:
@@ -1517,7 +1517,7 @@ confused:
 			"appears confused (ireason = 0x%02x). "
 			"Trying to recover by ending request.\n",
 			drive->name, ireason);
-		rq->flags |= REQ_FAILED;
+		rq->cmd_flags |= REQ_FAILED;
 		cdrom_end_request(drive, 0);
 		return ide_stopped;
 	}
@@ -1546,7 +1546,7 @@ static ide_startstop_t cdrom_do_packet_command (ide_drive_t *drive)
 	struct cdrom_info *info = drive->driver_data;
 
 	info->dma = 0;
-	rq->flags &= ~REQ_FAILED;
+	rq->cmd_flags &= ~REQ_FAILED;
 	len = rq->data_len;
 
 	/* Start sending the command to the drive. */
@@ -1558,7 +1558,7 @@ static int cdrom_queue_packet_command(ide_drive_t *drive, struct request *rq)
 {
 	struct request_sense sense;
 	int retries = 10;
-	unsigned int flags = rq->flags;
+	unsigned int flags = rq->cmd_flags;
 
 	if (rq->sense == NULL)
 		rq->sense = &sense;
@@ -1567,14 +1567,14 @@ static int cdrom_queue_packet_command(ide_drive_t *drive, struct request *rq)
 	do {
 		int error;
 		unsigned long time = jiffies;
-		rq->flags = flags;
+		rq->cmd_flags = flags;
 
 		error = ide_do_drive_cmd(drive, rq, ide_wait);
 		time = jiffies - time;
 
 		/* FIXME: we should probably abort/retry or something 
 		 * in case of failure */
-		if (rq->flags & REQ_FAILED) {
+		if (rq->cmd_flags & REQ_FAILED) {
 			/* The request failed.  Retry if it was due to a unit
 			   attention status
 			   (usually means media was changed). */
@@ -1596,10 +1596,10 @@ static int cdrom_queue_packet_command(ide_drive_t *drive, struct request *rq)
 		}
 
 		/* End of retry loop. */
-	} while ((rq->flags & REQ_FAILED) && retries >= 0);
+	} while ((rq->cmd_flags & REQ_FAILED) && retries >= 0);
 
 	/* Return an error if the command failed. */
-	return (rq->flags & REQ_FAILED) ? -EIO : 0;
+	return (rq->cmd_flags & REQ_FAILED) ? -EIO : 0;
 }
 
 /*
@@ -1963,7 +1963,7 @@ static ide_startstop_t cdrom_do_block_pc(ide_drive_t *drive, struct request *rq)
 {
 	struct cdrom_info *info = drive->driver_data;
 
-	rq->flags |= REQ_QUIET;
+	rq->cmd_flags |= REQ_QUIET;
 
 	info->dma = 0;
 
@@ -2023,11 +2023,11 @@ ide_do_rw_cdrom (ide_drive_t *drive, struct request *rq, sector_t block)
 		}
 		info->last_block = block;
 		return action;
-	} else if (rq->flags & (REQ_PC | REQ_SENSE)) {
+	} else if (rq->cmd_type == REQ_TYPE_SENSE) {
 		return cdrom_do_packet_command(drive);
-	} else if (rq->flags & REQ_BLOCK_PC) {
+	} else if (blk_pc_request(rq)) {
 		return cdrom_do_block_pc(drive, rq);
-	} else if (rq->flags & REQ_SPECIAL) {
+	} else if (blk_special_request(rq)) {
 		/*
 		 * right now this can only be a reset...
 		 */
@@ -2105,7 +2105,7 @@ static int cdrom_check_status(ide_drive_t *drive, struct request_sense *sense)
 
 	req.sense = sense;
 	req.cmd[0] = GPCMD_TEST_UNIT_READY;
-	req.flags |= REQ_QUIET;
+	req.cmd_flags |= REQ_QUIET;
 
 #if ! STANDARD_ATAPI
         /* the Sanyo 3 CD changer uses byte 7 of TEST_UNIT_READY to 
@@ -2207,7 +2207,7 @@ static int cdrom_read_capacity(ide_drive_t *drive, unsigned long *capacity,
 	req.cmd[0] = GPCMD_READ_CDVD_CAPACITY;
 	req.data = (char *)&capbuf;
 	req.data_len = sizeof(capbuf);
-	req.flags |= REQ_QUIET;
+	req.cmd_flags |= REQ_QUIET;
 
 	stat = cdrom_queue_packet_command(drive, &req);
 	if (stat == 0) {
@@ -2230,7 +2230,7 @@ static int cdrom_read_tocentry(ide_drive_t *drive, int trackno, int msf_flag,
 	req.sense = sense;
 	req.data =  buf;
 	req.data_len = buflen;
-	req.flags |= REQ_QUIET;
+	req.cmd_flags |= REQ_QUIET;
 	req.cmd[0] = GPCMD_READ_TOC_PMA_ATIP;
 	req.cmd[6] = trackno;
 	req.cmd[7] = (buflen >> 8);
@@ -2531,7 +2531,7 @@ static int ide_cdrom_packet(struct cdrom_device_info *cdi,
 	req.timeout = cgc->timeout;
 
 	if (cgc->quiet)
-		req.flags |= REQ_QUIET;
+		req.cmd_flags |= REQ_QUIET;
 
 	req.sense = cgc->sense;
 	cgc->stat = cdrom_queue_packet_command(drive, &req);
@@ -2629,7 +2629,8 @@ int ide_cdrom_reset (struct cdrom_device_info *cdi)
 	int ret;
 
 	cdrom_prepare_request(drive, &req);
-	req.flags = REQ_SPECIAL | REQ_QUIET;
+	req.cmd_type = REQ_TYPE_SPECIAL;
+	req.cmd_flags = REQ_QUIET;
 	ret = ide_do_drive_cmd(drive, &req, ide_wait);
 
 	/*
@@ -3116,9 +3117,9 @@ static int ide_cdrom_prep_pc(struct request *rq)
 
 static int ide_cdrom_prep_fn(request_queue_t *q, struct request *rq)
 {
-	if (rq->flags & REQ_CMD)
+	if (blk_fs_request(rq))
 		return ide_cdrom_prep_fs(q, rq);
-	else if (rq->flags & REQ_BLOCK_PC)
+	else if (blk_pc_request(rq))
 		return ide_cdrom_prep_pc(rq);
 
 	return 0;
diff --git a/drivers/ide/ide-disk.c b/drivers/ide/ide-disk.c
index 7cf3eb023521..0a05a377d66a 100644
--- a/drivers/ide/ide-disk.c
+++ b/drivers/ide/ide-disk.c
@@ -699,7 +699,8 @@ static void idedisk_prepare_flush(request_queue_t *q, struct request *rq)
 		rq->cmd[0] = WIN_FLUSH_CACHE;
 
 
-	rq->flags |= REQ_DRIVE_TASK;
+	rq->cmd_type = REQ_TYPE_ATA_TASK;
+	rq->cmd_flags |= REQ_SOFTBARRIER;
 	rq->buffer = rq->cmd;
 }
 
@@ -740,7 +741,7 @@ static int set_multcount(ide_drive_t *drive, int arg)
 	if (drive->special.b.set_multmode)
 		return -EBUSY;
 	ide_init_drive_cmd (&rq);
-	rq.flags = REQ_DRIVE_CMD;
+	rq.cmd_type = REQ_TYPE_ATA_CMD;
 	drive->mult_req = arg;
 	drive->special.b.set_multmode = 1;
 	(void) ide_do_drive_cmd (drive, &rq, ide_wait);
diff --git a/drivers/ide/ide-dma.c b/drivers/ide/ide-dma.c
index 7c3a13e1cf64..c3546fe9af63 100644
--- a/drivers/ide/ide-dma.c
+++ b/drivers/ide/ide-dma.c
@@ -205,7 +205,7 @@ int ide_build_sglist(ide_drive_t *drive, struct request *rq)
 	ide_hwif_t *hwif = HWIF(drive);
 	struct scatterlist *sg = hwif->sg_table;
 
-	BUG_ON((rq->flags & REQ_DRIVE_TASKFILE) && rq->nr_sectors > 256);
+	BUG_ON((rq->cmd_type == REQ_TYPE_ATA_TASKFILE) && rq->nr_sectors > 256);
 
 	ide_map_sg(drive, rq);
 
diff --git a/drivers/ide/ide-floppy.c b/drivers/ide/ide-floppy.c
index adbe9f76a505..0edc32204915 100644
--- a/drivers/ide/ide-floppy.c
+++ b/drivers/ide/ide-floppy.c
@@ -588,7 +588,7 @@ static int idefloppy_do_end_request(ide_drive_t *drive, int uptodate, int nsecs)
 	/* Why does this happen? */
 	if (!rq)
 		return 0;
-	if (!(rq->flags & REQ_SPECIAL)) { //if (!IDEFLOPPY_RQ_CMD (rq->cmd)) {
+	if (!blk_special_request(rq)) {
 		/* our real local end request function */
 		ide_end_request(drive, uptodate, nsecs);
 		return 0;
@@ -689,7 +689,7 @@ static void idefloppy_queue_pc_head (ide_drive_t *drive,idefloppy_pc_t *pc,struc
 
 	ide_init_drive_cmd(rq);
 	rq->buffer = (char *) pc;
-	rq->flags = REQ_SPECIAL;	//rq->cmd = IDEFLOPPY_PC_RQ;
+	rq->cmd_type = REQ_TYPE_SPECIAL;
 	rq->rq_disk = floppy->disk;
 	(void) ide_do_drive_cmd(drive, rq, ide_preempt);
 }
@@ -1250,7 +1250,7 @@ static void idefloppy_create_rw_cmd (idefloppy_floppy_t *floppy, idefloppy_pc_t
 	pc->callback = &idefloppy_rw_callback;
 	pc->rq = rq;
 	pc->b_count = cmd == READ ? 0 : rq->bio->bi_size;
-	if (rq->flags & REQ_RW)
+	if (rq->cmd_flags & REQ_RW)
 		set_bit(PC_WRITING, &pc->flags);
 	pc->buffer = NULL;
 	pc->request_transfer = pc->buffer_size = blocks * floppy->block_size;
@@ -1303,7 +1303,7 @@ static ide_startstop_t idefloppy_do_request (ide_drive_t *drive, struct request
 		idefloppy_do_end_request(drive, 0, 0);
 		return ide_stopped;
 	}
-	if (rq->flags & REQ_CMD) {
+	if (blk_fs_request(rq)) {
 		if (((long)rq->sector % floppy->bs_factor) ||
 		    (rq->nr_sectors % floppy->bs_factor)) {
 			printk("%s: unsupported r/w request size\n",
@@ -1313,9 +1313,9 @@ static ide_startstop_t idefloppy_do_request (ide_drive_t *drive, struct request
 		}
 		pc = idefloppy_next_pc_storage(drive);
 		idefloppy_create_rw_cmd(floppy, pc, rq, block);
-	} else if (rq->flags & REQ_SPECIAL) {
+	} else if (blk_special_request(rq)) {
 		pc = (idefloppy_pc_t *) rq->buffer;
-	} else if (rq->flags & REQ_BLOCK_PC) {
+	} else if (blk_pc_request(rq)) {
 		pc = idefloppy_next_pc_storage(drive);
 		if (idefloppy_blockpc_cmd(floppy, pc, rq)) {
 			idefloppy_do_end_request(drive, 0, 0);
@@ -1343,7 +1343,7 @@ static int idefloppy_queue_pc_tail (ide_drive_t *drive,idefloppy_pc_t *pc)
 
 	ide_init_drive_cmd (&rq);
 	rq.buffer = (char *) pc;
-	rq.flags = REQ_SPECIAL;		//	rq.cmd = IDEFLOPPY_PC_RQ;
+	rq.cmd_type = REQ_TYPE_SPECIAL;
 	rq.rq_disk = floppy->disk;
 
 	return ide_do_drive_cmd(drive, &rq, ide_wait);
diff --git a/drivers/ide/ide-io.c b/drivers/ide/ide-io.c
index fb6795236e76..3436b1f104eb 100644
--- a/drivers/ide/ide-io.c
+++ b/drivers/ide/ide-io.c
@@ -59,7 +59,7 @@ static int __ide_end_request(ide_drive_t *drive, struct request *rq,
 {
 	int ret = 1;
 
-	BUG_ON(!(rq->flags & REQ_STARTED));
+	BUG_ON(!blk_rq_started(rq));
 
 	/*
 	 * if failfast is set on a request, override number of sectors and
@@ -244,7 +244,7 @@ int ide_end_dequeued_request(ide_drive_t *drive, struct request *rq,
 
 	spin_lock_irqsave(&ide_lock, flags);
 
-	BUG_ON(!(rq->flags & REQ_STARTED));
+	BUG_ON(!blk_rq_started(rq));
 
 	/*
 	 * if failfast is set on a request, override number of sectors and
@@ -366,7 +366,7 @@ void ide_end_drive_cmd (ide_drive_t *drive, u8 stat, u8 err)
 	rq = HWGROUP(drive)->rq;
 	spin_unlock_irqrestore(&ide_lock, flags);
 
-	if (rq->flags & REQ_DRIVE_CMD) {
+	if (rq->cmd_type == REQ_TYPE_ATA_CMD) {
 		u8 *args = (u8 *) rq->buffer;
 		if (rq->errors == 0)
 			rq->errors = !OK_STAT(stat,READY_STAT,BAD_STAT);
@@ -376,7 +376,7 @@ void ide_end_drive_cmd (ide_drive_t *drive, u8 stat, u8 err)
 			args[1] = err;
 			args[2] = hwif->INB(IDE_NSECTOR_REG);
 		}
-	} else if (rq->flags & REQ_DRIVE_TASK) {
+	} else if (rq->cmd_type == REQ_TYPE_ATA_TASK) {
 		u8 *args = (u8 *) rq->buffer;
 		if (rq->errors == 0)
 			rq->errors = !OK_STAT(stat,READY_STAT,BAD_STAT);
@@ -390,7 +390,7 @@ void ide_end_drive_cmd (ide_drive_t *drive, u8 stat, u8 err)
 			args[5] = hwif->INB(IDE_HCYL_REG);
 			args[6] = hwif->INB(IDE_SELECT_REG);
 		}
-	} else if (rq->flags & REQ_DRIVE_TASKFILE) {
+	} else if (rq->cmd_type == REQ_TYPE_ATA_TASKFILE) {
 		ide_task_t *args = (ide_task_t *) rq->special;
 		if (rq->errors == 0)
 			rq->errors = !OK_STAT(stat,READY_STAT,BAD_STAT);
@@ -587,7 +587,7 @@ ide_startstop_t ide_error (ide_drive_t *drive, const char *msg, u8 stat)
 		return ide_stopped;
 
 	/* retry only "normal" I/O: */
-	if (rq->flags & (REQ_DRIVE_CMD | REQ_DRIVE_TASK | REQ_DRIVE_TASKFILE)) {
+	if (!blk_fs_request(rq)) {
 		rq->errors = 1;
 		ide_end_drive_cmd(drive, stat, err);
 		return ide_stopped;
@@ -638,7 +638,7 @@ ide_startstop_t ide_abort(ide_drive_t *drive, const char *msg)
 		return ide_stopped;
 
 	/* retry only "normal" I/O: */
-	if (rq->flags & (REQ_DRIVE_CMD | REQ_DRIVE_TASK | REQ_DRIVE_TASKFILE)) {
+	if (!blk_fs_request(rq)) {
 		rq->errors = 1;
 		ide_end_drive_cmd(drive, BUSY_STAT, 0);
 		return ide_stopped;
@@ -808,7 +808,7 @@ void ide_map_sg(ide_drive_t *drive, struct request *rq)
 	if (hwif->sg_mapped)	/* needed by ide-scsi */
 		return;
 
-	if ((rq->flags & REQ_DRIVE_TASKFILE) == 0) {
+	if (rq->cmd_type != REQ_TYPE_ATA_TASKFILE) {
 		hwif->sg_nents = blk_rq_map_sg(drive->queue, rq, sg);
 	} else {
 		sg_init_one(sg, rq->buffer, rq->nr_sectors * SECTOR_SIZE);
@@ -844,7 +844,7 @@ static ide_startstop_t execute_drive_cmd (ide_drive_t *drive,
 		struct request *rq)
 {
 	ide_hwif_t *hwif = HWIF(drive);
-	if (rq->flags & REQ_DRIVE_TASKFILE) {
+	if (rq->cmd_type == REQ_TYPE_ATA_TASKFILE) {
  		ide_task_t *args = rq->special;
  
 		if (!args)
@@ -866,7 +866,7 @@ static ide_startstop_t execute_drive_cmd (ide_drive_t *drive,
 		if (args->tf_out_flags.all != 0) 
 			return flagged_taskfile(drive, args);
 		return do_rw_taskfile(drive, args);
-	} else if (rq->flags & REQ_DRIVE_TASK) {
+	} else if (rq->cmd_type == REQ_TYPE_ATA_TASK) {
 		u8 *args = rq->buffer;
 		u8 sel;
  
@@ -892,7 +892,7 @@ static ide_startstop_t execute_drive_cmd (ide_drive_t *drive,
  		hwif->OUTB(sel, IDE_SELECT_REG);
  		ide_cmd(drive, args[0], args[2], &drive_cmd_intr);
  		return ide_started;
- 	} else if (rq->flags & REQ_DRIVE_CMD) {
+ 	} else if (rq->cmd_type == REQ_TYPE_ATA_CMD) {
  		u8 *args = rq->buffer;
 
 		if (!args)
@@ -980,7 +980,7 @@ static ide_startstop_t start_request (ide_drive_t *drive, struct request *rq)
 	ide_startstop_t startstop;
 	sector_t block;
 
-	BUG_ON(!(rq->flags & REQ_STARTED));
+	BUG_ON(!blk_rq_started(rq));
 
 #ifdef DEBUG
 	printk("%s: start_request: current=0x%08lx\n",
@@ -1013,9 +1013,9 @@ static ide_startstop_t start_request (ide_drive_t *drive, struct request *rq)
 	if (!drive->special.all) {
 		ide_driver_t *drv;
 
-		if (rq->flags & (REQ_DRIVE_CMD | REQ_DRIVE_TASK))
-			return execute_drive_cmd(drive, rq);
-		else if (rq->flags & REQ_DRIVE_TASKFILE)
+		if (rq->cmd_type == REQ_TYPE_ATA_CMD ||
+		    rq->cmd_type == REQ_TYPE_ATA_TASK ||
+		    rq->cmd_type == REQ_TYPE_ATA_TASKFILE)
 			return execute_drive_cmd(drive, rq);
 		else if (blk_pm_request(rq)) {
 			struct request_pm_state *pm = rq->end_io_data;
@@ -1264,7 +1264,7 @@ static void ide_do_request (ide_hwgroup_t *hwgroup, int masked_irq)
 		 * We count how many times we loop here to make sure we service
 		 * all drives in the hwgroup without looping for ever
 		 */
-		if (drive->blocked && !blk_pm_request(rq) && !(rq->flags & REQ_PREEMPT)) {
+		if (drive->blocked && !blk_pm_request(rq) && !(rq->cmd_flags & REQ_PREEMPT)) {
 			drive = drive->next ? drive->next : hwgroup->drive;
 			if (loops++ < 4 && !blk_queue_plugged(drive->queue))
 				goto again;
@@ -1670,7 +1670,7 @@ irqreturn_t ide_intr (int irq, void *dev_id, struct pt_regs *regs)
 void ide_init_drive_cmd (struct request *rq)
 {
 	memset(rq, 0, sizeof(*rq));
-	rq->flags = REQ_DRIVE_CMD;
+	rq->cmd_type = REQ_TYPE_ATA_CMD;
 	rq->ref_count = 1;
 }
 
@@ -1727,7 +1727,7 @@ int ide_do_drive_cmd (ide_drive_t *drive, struct request *rq, ide_action_t actio
 		hwgroup->rq = NULL;
 	if (action == ide_preempt || action == ide_head_wait) {
 		where = ELEVATOR_INSERT_FRONT;
-		rq->flags |= REQ_PREEMPT;
+		rq->cmd_flags |= REQ_PREEMPT;
 	}
 	__elv_add_request(drive->queue, rq, where, 0);
 	ide_do_request(hwgroup, IDE_NO_IRQ);
diff --git a/drivers/ide/ide-lib.c b/drivers/ide/ide-lib.c
index 1feff23487d4..850ef63cc986 100644
--- a/drivers/ide/ide-lib.c
+++ b/drivers/ide/ide-lib.c
@@ -456,13 +456,14 @@ static void ide_dump_opcode(ide_drive_t *drive)
 	spin_unlock(&ide_lock);
 	if (!rq)
 		return;
-	if (rq->flags & (REQ_DRIVE_CMD | REQ_DRIVE_TASK)) {
+	if (rq->cmd_type == REQ_TYPE_ATA_CMD ||
+	    rq->cmd_type == REQ_TYPE_ATA_TASK) {
 		char *args = rq->buffer;
 		if (args) {
 			opcode = args[0];
 			found = 1;
 		}
-	} else if (rq->flags & REQ_DRIVE_TASKFILE) {
+	} else if (rq->cmd_type == REQ_TYPE_ATA_TASKFILE) {
 		ide_task_t *args = rq->special;
 		if (args) {
 			task_struct_t *tf = (task_struct_t *) args->tfRegister;
diff --git a/drivers/ide/ide-tape.c b/drivers/ide/ide-tape.c
index 7067ab997927..643e4b9ac651 100644
--- a/drivers/ide/ide-tape.c
+++ b/drivers/ide/ide-tape.c
@@ -1776,7 +1776,7 @@ static void idetape_create_request_sense_cmd (idetape_pc_t *pc)
 static void idetape_init_rq(struct request *rq, u8 cmd)
 {
 	memset(rq, 0, sizeof(*rq));
-	rq->flags = REQ_SPECIAL;
+	rq->cmd_type = REQ_TYPE_SPECIAL;
 	rq->cmd[0] = cmd;
 }
 
@@ -2433,12 +2433,12 @@ static ide_startstop_t idetape_do_request(ide_drive_t *drive,
 			rq->sector, rq->nr_sectors, rq->current_nr_sectors);
 #endif /* IDETAPE_DEBUG_LOG */
 
-	if ((rq->flags & REQ_SPECIAL) == 0) {
+	if (!blk_special_request(rq)) {
 		/*
 		 * We do not support buffer cache originated requests.
 		 */
 		printk(KERN_NOTICE "ide-tape: %s: Unsupported request in "
-			"request queue (%ld)\n", drive->name, rq->flags);
+			"request queue (%d)\n", drive->name, rq->cmd_type);
 		ide_end_request(drive, 0, 0);
 		return ide_stopped;
 	}
@@ -2768,7 +2768,7 @@ static void idetape_wait_for_request (ide_drive_t *drive, struct request *rq)
 	idetape_tape_t *tape = drive->driver_data;
 
 #if IDETAPE_DEBUG_BUGS
-	if (rq == NULL || (rq->flags & REQ_SPECIAL) == 0) {
+	if (rq == NULL || !blk_special_request(rq)) {
 		printk (KERN_ERR "ide-tape: bug: Trying to sleep on non-valid request\n");
 		return;
 	}
diff --git a/drivers/ide/ide-taskfile.c b/drivers/ide/ide-taskfile.c
index 97a9244312fc..1d0470c1f957 100644
--- a/drivers/ide/ide-taskfile.c
+++ b/drivers/ide/ide-taskfile.c
@@ -363,7 +363,7 @@ static ide_startstop_t task_error(ide_drive_t *drive, struct request *rq,
 
 static void task_end_request(ide_drive_t *drive, struct request *rq, u8 stat)
 {
-	if (rq->flags & REQ_DRIVE_TASKFILE) {
+	if (rq->cmd_type == REQ_TYPE_ATA_TASKFILE) {
 		ide_task_t *task = rq->special;
 
 		if (task->tf_out_flags.all) {
@@ -474,7 +474,7 @@ static int ide_diag_taskfile(ide_drive_t *drive, ide_task_t *args, unsigned long
 	struct request rq;
 
 	memset(&rq, 0, sizeof(rq));
-	rq.flags = REQ_DRIVE_TASKFILE;
+	rq.cmd_type = REQ_TYPE_ATA_TASKFILE;
 	rq.buffer = buf;
 
 	/*
@@ -499,7 +499,7 @@ static int ide_diag_taskfile(ide_drive_t *drive, ide_task_t *args, unsigned long
 		rq.hard_cur_sectors = rq.current_nr_sectors = rq.nr_sectors;
 
 		if (args->command_type == IDE_DRIVE_TASK_RAW_WRITE)
-			rq.flags |= REQ_RW;
+			rq.cmd_flags |= REQ_RW;
 	}
 
 	rq.special = args;
@@ -737,7 +737,7 @@ static int ide_wait_cmd_task(ide_drive_t *drive, u8 *buf)
 	struct request rq;
 
 	ide_init_drive_cmd(&rq);
-	rq.flags = REQ_DRIVE_TASK;
+	rq.cmd_type = REQ_TYPE_ATA_TASK;
 	rq.buffer = buf;
 	return ide_do_drive_cmd(drive, &rq, ide_wait);
 }
diff --git a/drivers/ide/ide.c b/drivers/ide/ide.c
index 9c8468de1a75..9384a3fdde6c 100644
--- a/drivers/ide/ide.c
+++ b/drivers/ide/ide.c
@@ -1217,7 +1217,7 @@ static int generic_ide_suspend(struct device *dev, pm_message_t mesg)
 	memset(&rq, 0, sizeof(rq));
 	memset(&rqpm, 0, sizeof(rqpm));
 	memset(&args, 0, sizeof(args));
-	rq.flags = REQ_PM_SUSPEND;
+	rq.cmd_type = REQ_TYPE_PM_SUSPEND;
 	rq.special = &args;
 	rq.end_io_data = &rqpm;
 	rqpm.pm_step = ide_pm_state_start_suspend;
@@ -1238,7 +1238,7 @@ static int generic_ide_resume(struct device *dev)
 	memset(&rq, 0, sizeof(rq));
 	memset(&rqpm, 0, sizeof(rqpm));
 	memset(&args, 0, sizeof(args));
-	rq.flags = REQ_PM_RESUME;
+	rq.cmd_type = REQ_TYPE_PM_RESUME;
 	rq.special = &args;
 	rq.end_io_data = &rqpm;
 	rqpm.pm_step = ide_pm_state_start_resume;
diff --git a/drivers/ide/legacy/hd.c b/drivers/ide/legacy/hd.c
index aebecd8f51cc..4ab931145673 100644
--- a/drivers/ide/legacy/hd.c
+++ b/drivers/ide/legacy/hd.c
@@ -626,7 +626,7 @@ repeat:
 		req->rq_disk->disk_name, (req->cmd == READ)?"read":"writ",
 		cyl, head, sec, nsect, req->buffer);
 #endif
-	if (req->flags & REQ_CMD) {
+	if (blk_fs_request(req)) {
 		switch (rq_data_dir(req)) {
 		case READ:
 			hd_out(disk,nsect,sec,head,cyl,WIN_READ,&read_intr);
diff --git a/drivers/md/dm-emc.c b/drivers/md/dm-emc.c
index 2a374ccb30dd..2b2d45d7baaa 100644
--- a/drivers/md/dm-emc.c
+++ b/drivers/md/dm-emc.c
@@ -126,7 +126,8 @@ static struct request *get_failover_req(struct emc_handler *h,
 	memset(&rq->cmd, 0, BLK_MAX_CDB);
 
 	rq->timeout = EMC_FAILOVER_TIMEOUT;
-	rq->flags |= (REQ_BLOCK_PC | REQ_FAILFAST | REQ_NOMERGE);
+	rq->cmd_type = REQ_TYPE_BLOCK_PC;
+	rq->cmd_flags |= REQ_FAILFAST | REQ_NOMERGE;
 
 	return rq;
 }
diff --git a/drivers/message/i2o/i2o_block.c b/drivers/message/i2o/i2o_block.c
index 1ddc2fb429d5..eaba81bf2eca 100644
--- a/drivers/message/i2o/i2o_block.c
+++ b/drivers/message/i2o/i2o_block.c
@@ -390,9 +390,9 @@ static int i2o_block_prep_req_fn(struct request_queue *q, struct request *req)
 	}
 
 	/* request is already processed by us, so return */
-	if (req->flags & REQ_SPECIAL) {
+	if (blk_special_request(req)) {
 		osm_debug("REQ_SPECIAL already set!\n");
-		req->flags |= REQ_DONTPREP;
+		req->cmd_flags |= REQ_DONTPREP;
 		return BLKPREP_OK;
 	}
 
@@ -411,7 +411,8 @@ static int i2o_block_prep_req_fn(struct request_queue *q, struct request *req)
 		ireq = req->special;
 
 	/* do not come back here */
-	req->flags |= REQ_DONTPREP | REQ_SPECIAL;
+	req->cmd_type = REQ_TYPE_SPECIAL;
+	req->cmd_flags |= REQ_DONTPREP;
 
 	return BLKPREP_OK;
 };
diff --git a/drivers/mmc/mmc_queue.c b/drivers/mmc/mmc_queue.c
index 74f8cdeeff0f..4ccdd82b680f 100644
--- a/drivers/mmc/mmc_queue.c
+++ b/drivers/mmc/mmc_queue.c
@@ -28,7 +28,7 @@ static int mmc_prep_request(struct request_queue *q, struct request *req)
 	struct mmc_queue *mq = q->queuedata;
 	int ret = BLKPREP_KILL;
 
-	if (req->flags & REQ_SPECIAL) {
+	if (blk_special_request(req)) {
 		/*
 		 * Special commands already have the command
 		 * blocks already setup in req->special.
@@ -36,7 +36,7 @@ static int mmc_prep_request(struct request_queue *q, struct request *req)
 		BUG_ON(!req->special);
 
 		ret = BLKPREP_OK;
-	} else if (req->flags & (REQ_CMD | REQ_BLOCK_PC)) {
+	} else if (blk_fs_request(req) || blk_pc_request(req)) {
 		/*
 		 * Block I/O requests need translating according
 		 * to the protocol.
@@ -50,7 +50,7 @@ static int mmc_prep_request(struct request_queue *q, struct request *req)
 	}
 
 	if (ret == BLKPREP_OK)
-		req->flags |= REQ_DONTPREP;
+		req->cmd_flags |= REQ_DONTPREP;
 
 	return ret;
 }
diff --git a/drivers/mtd/mtd_blkdevs.c b/drivers/mtd/mtd_blkdevs.c
index 458d3c8ae1ee..6baf5fe14230 100644
--- a/drivers/mtd/mtd_blkdevs.c
+++ b/drivers/mtd/mtd_blkdevs.c
@@ -46,7 +46,7 @@ static int do_blktrans_request(struct mtd_blktrans_ops *tr,
 	nsect = req->current_nr_sectors;
 	buf = req->buffer;
 
-	if (!(req->flags & REQ_CMD))
+	if (!blk_fs_request(req))
 		return 0;
 
 	if (block + nsect > get_capacity(req->rq_disk))
diff --git a/drivers/s390/block/dasd_diag.c b/drivers/s390/block/dasd_diag.c
index 9d051e5687ea..222a8a71a5e8 100644
--- a/drivers/s390/block/dasd_diag.c
+++ b/drivers/s390/block/dasd_diag.c
@@ -529,7 +529,7 @@ dasd_diag_build_cp(struct dasd_device * device, struct request *req)
 	}
 	cqr->retries = DIAG_MAX_RETRIES;
 	cqr->buildclk = get_clock();
-	if (req->flags & REQ_FAILFAST)
+	if (req->cmd_flags & REQ_FAILFAST)
 		set_bit(DASD_CQR_FLAGS_FAILFAST, &cqr->flags);
 	cqr->device = device;
 	cqr->expires = DIAG_TIMEOUT;
diff --git a/drivers/s390/block/dasd_eckd.c b/drivers/s390/block/dasd_eckd.c
index b7a7fac3f7c3..5ecea3e4fdef 100644
--- a/drivers/s390/block/dasd_eckd.c
+++ b/drivers/s390/block/dasd_eckd.c
@@ -1266,7 +1266,7 @@ dasd_eckd_build_cp(struct dasd_device * device, struct request *req)
 			recid++;
 		}
 	}
-	if (req->flags & REQ_FAILFAST)
+	if (req->cmd_flags & REQ_FAILFAST)
 		set_bit(DASD_CQR_FLAGS_FAILFAST, &cqr->flags);
 	cqr->device = device;
 	cqr->expires = 5 * 60 * HZ;	/* 5 minutes */
diff --git a/drivers/s390/block/dasd_fba.c b/drivers/s390/block/dasd_fba.c
index e85015be109b..80926c548228 100644
--- a/drivers/s390/block/dasd_fba.c
+++ b/drivers/s390/block/dasd_fba.c
@@ -344,7 +344,7 @@ dasd_fba_build_cp(struct dasd_device * device, struct request *req)
 			recid++;
 		}
 	}
-	if (req->flags & REQ_FAILFAST)
+	if (req->cmd_flags & REQ_FAILFAST)
 		set_bit(DASD_CQR_FLAGS_FAILFAST, &cqr->flags);
 	cqr->device = device;
 	cqr->expires = 5 * 60 * HZ;	/* 5 minutes */
diff --git a/drivers/scsi/aic7xxx_old.c b/drivers/scsi/aic7xxx_old.c
index 5dcef48d414f..10353379a074 100644
--- a/drivers/scsi/aic7xxx_old.c
+++ b/drivers/scsi/aic7xxx_old.c
@@ -2862,7 +2862,7 @@ aic7xxx_done(struct aic7xxx_host *p, struct aic7xxx_scb *scb)
       aic_dev->r_total++;
       ptr = aic_dev->r_bins;
     }
-    if(cmd->device->simple_tags && cmd->request->flags & REQ_HARDBARRIER)
+    if(cmd->device->simple_tags && cmd->request->cmd_flags & REQ_HARDBARRIER)
     {
       aic_dev->barrier_total++;
       if(scb->tag_action == MSG_ORDERED_Q_TAG)
@@ -10158,7 +10158,7 @@ aic7xxx_buildscb(struct aic7xxx_host *p, Scsi_Cmnd *cmd,
     /* We always force TEST_UNIT_READY to untagged */
     if (cmd->cmnd[0] != TEST_UNIT_READY && sdptr->simple_tags)
     {
-      if (req->flags & REQ_HARDBARRIER)
+      if (req->cmd_flags & REQ_HARDBARRIER)
       {
 	if(sdptr->ordered_tags)
 	{
diff --git a/drivers/scsi/ide-scsi.c b/drivers/scsi/ide-scsi.c
index 94d1de55607f..65b19695ebe2 100644
--- a/drivers/scsi/ide-scsi.c
+++ b/drivers/scsi/ide-scsi.c
@@ -344,7 +344,7 @@ static int idescsi_check_condition(ide_drive_t *drive, struct request *failed_co
 	pc->buffer = buf;
 	pc->c[0] = REQUEST_SENSE;
 	pc->c[4] = pc->request_transfer = pc->buffer_size = SCSI_SENSE_BUFFERSIZE;
-	rq->flags = REQ_SENSE;
+	rq->cmd_type = REQ_TYPE_SENSE;
 	pc->timeout = jiffies + WAIT_READY;
 	/* NOTE! Save the failed packet command in "rq->buffer" */
 	rq->buffer = (void *) failed_command->special;
@@ -398,12 +398,12 @@ static int idescsi_end_request (ide_drive_t *drive, int uptodate, int nrsecs)
 	int errors = rq->errors;
 	unsigned long flags;
 
-	if (!(rq->flags & (REQ_SPECIAL|REQ_SENSE))) {
+	if (!blk_special_request(rq) && !blk_sense_request(rq)) {
 		ide_end_request(drive, uptodate, nrsecs);
 		return 0;
 	}
 	ide_end_drive_cmd (drive, 0, 0);
-	if (rq->flags & REQ_SENSE) {
+	if (blk_sense_request(rq)) {
 		idescsi_pc_t *opc = (idescsi_pc_t *) rq->buffer;
 		if (log) {
 			printk ("ide-scsi: %s: wrap up check %lu, rst = ", drive->name, opc->scsi_cmd->serial_number);
@@ -712,7 +712,7 @@ static ide_startstop_t idescsi_do_request (ide_drive_t *drive, struct request *r
 	printk (KERN_INFO "sector: %ld, nr_sectors: %ld, current_nr_sectors: %d\n",rq->sector,rq->nr_sectors,rq->current_nr_sectors);
 #endif /* IDESCSI_DEBUG_LOG */
 
-	if (rq->flags & (REQ_SPECIAL|REQ_SENSE)) {
+	if (blk_sense_request(rq) || blk_special_request(rq)) {
 		return idescsi_issue_pc (drive, (idescsi_pc_t *) rq->special);
 	}
 	blk_dump_rq_flags(rq, "ide-scsi: unsup command");
@@ -938,7 +938,7 @@ static int idescsi_queue (struct scsi_cmnd *cmd,
 
 	ide_init_drive_cmd (rq);
 	rq->special = (char *) pc;
-	rq->flags = REQ_SPECIAL;
+	rq->cmd_type = REQ_TYPE_SPECIAL;
 	spin_unlock_irq(host->host_lock);
 	rq->rq_disk = scsi->disk;
 	(void) ide_do_drive_cmd (drive, rq, ide_end);
@@ -992,7 +992,7 @@ static int idescsi_eh_abort (struct scsi_cmnd *cmd)
 		 */
 		printk (KERN_ERR "ide-scsi: cmd aborted!\n");
 
-		if (scsi->pc->rq->flags & REQ_SENSE)
+		if (blk_sense_request(scsi->pc->rq))
 			kfree(scsi->pc->buffer);
 		kfree(scsi->pc->rq);
 		kfree(scsi->pc);
@@ -1042,7 +1042,7 @@ static int idescsi_eh_reset (struct scsi_cmnd *cmd)
 	/* kill current request */
 	blkdev_dequeue_request(req);
 	end_that_request_last(req, 0);
-	if (req->flags & REQ_SENSE)
+	if (blk_sense_request(req))
 		kfree(scsi->pc->buffer);
 	kfree(scsi->pc);
 	scsi->pc = NULL;
diff --git a/drivers/scsi/pluto.c b/drivers/scsi/pluto.c
index 0bd9c60e6455..aa60a5f1fbc3 100644
--- a/drivers/scsi/pluto.c
+++ b/drivers/scsi/pluto.c
@@ -67,7 +67,6 @@ static void __init pluto_detect_done(Scsi_Cmnd *SCpnt)
 
 static void __init pluto_detect_scsi_done(Scsi_Cmnd *SCpnt)
 {
-	SCpnt->request->rq_status = RQ_SCSI_DONE;
 	PLND(("Detect done %08lx\n", (long)SCpnt))
 	if (atomic_dec_and_test (&fcss))
 		up(&fc_sem);
@@ -166,7 +165,7 @@ int __init pluto_detect(struct scsi_host_template *tpnt)
 		
 		SCpnt->cmd_len = COMMAND_SIZE(INQUIRY);
 	
-		SCpnt->request->rq_status = RQ_SCSI_BUSY;
+		SCpnt->request->cmd_flags &= ~REQ_STARTED;
 		
 		SCpnt->done = pluto_detect_done;
 		SCpnt->request_bufflen = 256;
@@ -178,7 +177,8 @@ int __init pluto_detect(struct scsi_host_template *tpnt)
 	for (retry = 0; retry < 5; retry++) {
 		for (i = 0; i < fcscount; i++) {
 			if (!fcs[i].fc) break;
-			if (fcs[i].cmd.request->rq_status != RQ_SCSI_DONE) {
+			if (!(fcs[i].cmd.request->cmd_flags & REQ_STARTED)) {
+				fcs[i].cmd.request->cmd_flags |= REQ_STARTED;
 				disable_irq(fcs[i].fc->irq);
 				PLND(("queuecommand %d %d\n", retry, i))
 				fcp_scsi_queuecommand (&(fcs[i].cmd), 
diff --git a/drivers/scsi/scsi_lib.c b/drivers/scsi/scsi_lib.c
index d6743b959a72..71084728eb42 100644
--- a/drivers/scsi/scsi_lib.c
+++ b/drivers/scsi/scsi_lib.c
@@ -82,7 +82,7 @@ static void scsi_unprep_request(struct request *req)
 {
 	struct scsi_cmnd *cmd = req->special;
 
-	req->flags &= ~REQ_DONTPREP;
+	req->cmd_flags &= ~REQ_DONTPREP;
 	req->special = NULL;
 
 	scsi_put_command(cmd);
@@ -196,7 +196,8 @@ int scsi_execute(struct scsi_device *sdev, const unsigned char *cmd,
 	req->sense_len = 0;
 	req->retries = retries;
 	req->timeout = timeout;
-	req->flags |= flags | REQ_BLOCK_PC | REQ_SPECIAL | REQ_QUIET;
+	req->cmd_type = REQ_TYPE_BLOCK_PC;
+	req->cmd_flags |= flags | REQ_QUIET | REQ_PREEMPT;
 
 	/*
 	 * head injection *required* here otherwise quiesce won't work
@@ -397,7 +398,8 @@ int scsi_execute_async(struct scsi_device *sdev, const unsigned char *cmd,
 	req = blk_get_request(sdev->request_queue, write, gfp);
 	if (!req)
 		goto free_sense;
-	req->flags |= REQ_BLOCK_PC | REQ_QUIET;
+	req->cmd_type = REQ_TYPE_BLOCK_PC;
+	req->cmd_flags |= REQ_QUIET;
 
 	if (use_sg)
 		err = scsi_req_map_sg(req, buffer, use_sg, bufflen, gfp);
@@ -933,7 +935,7 @@ void scsi_io_completion(struct scsi_cmnd *cmd, unsigned int good_bytes)
 					break;
 				}
 			}
-			if (!(req->flags & REQ_QUIET)) {
+			if (!(req->cmd_flags & REQ_QUIET)) {
 				scmd_printk(KERN_INFO, cmd,
 					    "Device not ready: ");
 				scsi_print_sense_hdr("", &sshdr);
@@ -941,7 +943,7 @@ void scsi_io_completion(struct scsi_cmnd *cmd, unsigned int good_bytes)
 			scsi_end_request(cmd, 0, this_count, 1);
 			return;
 		case VOLUME_OVERFLOW:
-			if (!(req->flags & REQ_QUIET)) {
+			if (!(req->cmd_flags & REQ_QUIET)) {
 				scmd_printk(KERN_INFO, cmd,
 					    "Volume overflow, CDB: ");
 				__scsi_print_command(cmd->cmnd);
@@ -963,7 +965,7 @@ void scsi_io_completion(struct scsi_cmnd *cmd, unsigned int good_bytes)
 		return;
 	}
 	if (result) {
-		if (!(req->flags & REQ_QUIET)) {
+		if (!(req->cmd_flags & REQ_QUIET)) {
 			scmd_printk(KERN_INFO, cmd,
 				    "SCSI error: return code = 0x%08x\n",
 				    result);
@@ -995,7 +997,7 @@ static int scsi_init_io(struct scsi_cmnd *cmd)
 	/*
 	 * if this is a rq->data based REQ_BLOCK_PC, setup for a non-sg xfer
 	 */
-	if ((req->flags & REQ_BLOCK_PC) && !req->bio) {
+	if (blk_pc_request(req) && !req->bio) {
 		cmd->request_bufflen = req->data_len;
 		cmd->request_buffer = req->data;
 		req->buffer = req->data;
@@ -1139,13 +1141,12 @@ static int scsi_prep_fn(struct request_queue *q, struct request *req)
 	 * these two cases differently.  We differentiate by looking
 	 * at request->cmd, as this tells us the real story.
 	 */
-	if (req->flags & REQ_SPECIAL && req->special) {
+	if (blk_special_request(req) && req->special)
 		cmd = req->special;
-	} else if (req->flags & (REQ_CMD | REQ_BLOCK_PC)) {
-
-		if(unlikely(specials_only) && !(req->flags & REQ_SPECIAL)) {
-			if(specials_only == SDEV_QUIESCE ||
-					specials_only == SDEV_BLOCK)
+	else if (blk_pc_request(req) || blk_fs_request(req)) {
+		if (unlikely(specials_only) && !(req->cmd_flags & REQ_PREEMPT)){
+			if (specials_only == SDEV_QUIESCE ||
+			    specials_only == SDEV_BLOCK)
 				goto defer;
 			
 			sdev_printk(KERN_ERR, sdev,
@@ -1153,7 +1154,6 @@ static int scsi_prep_fn(struct request_queue *q, struct request *req)
 			goto kill;
 		}
 			
-			
 		/*
 		 * Now try and find a command block that we can use.
 		 */
@@ -1184,7 +1184,7 @@ static int scsi_prep_fn(struct request_queue *q, struct request *req)
 	 * lock.  We hope REQ_STARTED prevents anything untoward from
 	 * happening now.
 	 */
-	if (req->flags & (REQ_CMD | REQ_BLOCK_PC)) {
+	if (blk_fs_request(req) || blk_pc_request(req)) {
 		int ret;
 
 		/*
@@ -1216,7 +1216,7 @@ static int scsi_prep_fn(struct request_queue *q, struct request *req)
 		/*
 		 * Initialize the actual SCSI command for this request.
 		 */
-		if (req->flags & REQ_BLOCK_PC) {
+		if (blk_pc_request(req)) {
 			scsi_setup_blk_pc_cmnd(cmd);
 		} else if (req->rq_disk) {
 			struct scsi_driver *drv;
@@ -1233,7 +1233,7 @@ static int scsi_prep_fn(struct request_queue *q, struct request *req)
 	/*
 	 * The request is now prepped, no need to come back here
 	 */
-	req->flags |= REQ_DONTPREP;
+	req->cmd_flags |= REQ_DONTPREP;
 	return BLKPREP_OK;
 
  defer:
@@ -1454,8 +1454,9 @@ static void scsi_request_fn(struct request_queue *q)
 		if (unlikely(cmd == NULL)) {
 			printk(KERN_CRIT "impossible request in %s.\n"
 					 "please mail a stack trace to "
-					 "linux-scsi@vger.kernel.org",
+					 "linux-scsi@vger.kernel.org\n",
 					 __FUNCTION__);
+			blk_dump_rq_flags(req, "foo");
 			BUG();
 		}
 		spin_lock(shost->host_lock);
diff --git a/drivers/scsi/sd.c b/drivers/scsi/sd.c
index 638cff41d436..10bc99c911fa 100644
--- a/drivers/scsi/sd.c
+++ b/drivers/scsi/sd.c
@@ -443,8 +443,7 @@ static int sd_init_command(struct scsi_cmnd * SCpnt)
 		SCpnt->cmnd[0] = READ_6;
 		SCpnt->sc_data_direction = DMA_FROM_DEVICE;
 	} else {
-		printk(KERN_ERR "sd: Unknown command %lx\n", rq->flags);
-/* overkill 	panic("Unknown sd command %lx\n", rq->flags); */
+		printk(KERN_ERR "sd: Unknown command %x\n", rq->cmd_flags);
 		return 0;
 	}
 
@@ -840,7 +839,7 @@ static int sd_issue_flush(struct device *dev, sector_t *error_sector)
 static void sd_prepare_flush(request_queue_t *q, struct request *rq)
 {
 	memset(rq->cmd, 0, sizeof(rq->cmd));
-	rq->flags |= REQ_BLOCK_PC;
+	rq->cmd_type = REQ_TYPE_BLOCK_PC;
 	rq->timeout = SD_TIMEOUT;
 	rq->cmd[0] = SYNCHRONIZE_CACHE;
 	rq->cmd_len = 10;
diff --git a/drivers/scsi/sun3_NCR5380.c b/drivers/scsi/sun3_NCR5380.c
index 2f8073b73bf3..7f9bcef6adfa 100644
--- a/drivers/scsi/sun3_NCR5380.c
+++ b/drivers/scsi/sun3_NCR5380.c
@@ -2017,7 +2017,7 @@ static void NCR5380_information_transfer (struct Scsi_Host *instance)
 		if((count > SUN3_DMA_MINSIZE) && (sun3_dma_setup_done
 						  != cmd))
 		{
-			if(cmd->request->flags & REQ_CMD) {
+			if(blk_fs_request(cmd->request)) {
 				sun3scsi_dma_setup(d, count,
 						   rq_data_dir(cmd->request));
 				sun3_dma_setup_done = cmd;
diff --git a/drivers/scsi/sun3_scsi.c b/drivers/scsi/sun3_scsi.c
index 837173415d4c..44a99aeb8180 100644
--- a/drivers/scsi/sun3_scsi.c
+++ b/drivers/scsi/sun3_scsi.c
@@ -524,7 +524,7 @@ static inline unsigned long sun3scsi_dma_residual(struct Scsi_Host *instance)
 static inline unsigned long sun3scsi_dma_xfer_len(unsigned long wanted, Scsi_Cmnd *cmd,
 				    int write_flag)
 {
-	if(cmd->request->flags & REQ_CMD)
+	if(blk_fs_request(cmd->request))
  		return wanted;
 	else
 		return 0;
diff --git a/drivers/scsi/sun3_scsi_vme.c b/drivers/scsi/sun3_scsi_vme.c
index 008a82ab8521..f5742b84b27a 100644
--- a/drivers/scsi/sun3_scsi_vme.c
+++ b/drivers/scsi/sun3_scsi_vme.c
@@ -458,7 +458,7 @@ static inline unsigned long sun3scsi_dma_residual(struct Scsi_Host *instance)
 static inline unsigned long sun3scsi_dma_xfer_len(unsigned long wanted, Scsi_Cmnd *cmd,
 				    int write_flag)
 {
-	if(cmd->request->flags & REQ_CMD)
+	if(blk_fs_request(cmd->request))
  		return wanted;
 	else
 		return 0;
diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h
index cfde8b3ee919..b2a412cf468f 100644
--- a/include/linux/blkdev.h
+++ b/include/linux/blkdev.h
@@ -120,6 +120,86 @@ struct request_list {
 	wait_queue_head_t wait[2];
 };
 
+/*
+ * request command types
+ */
+enum rq_cmd_type_bits {
+	REQ_TYPE_FS		= 1,	/* fs request */
+	REQ_TYPE_BLOCK_PC,		/* scsi command */
+	REQ_TYPE_SENSE,			/* sense request */
+	REQ_TYPE_PM_SUSPEND,		/* suspend request */
+	REQ_TYPE_PM_RESUME,		/* resume request */
+	REQ_TYPE_PM_SHUTDOWN,		/* shutdown request */
+	REQ_TYPE_FLUSH,			/* flush request */
+	REQ_TYPE_SPECIAL,		/* driver defined type */
+	REQ_TYPE_LINUX_BLOCK,		/* generic block layer message */
+	/*
+	 * for ATA/ATAPI devices. this really doesn't belong here, ide should
+	 * use REQ_TYPE_SPECIAL and use rq->cmd[0] with the range of driver
+	 * private REQ_LB opcodes to differentiate what type of request this is
+	 */
+	REQ_TYPE_ATA_CMD,
+	REQ_TYPE_ATA_TASK,
+	REQ_TYPE_ATA_TASKFILE,
+};
+
+/*
+ * For request of type REQ_TYPE_LINUX_BLOCK, rq->cmd[0] is the opcode being
+ * sent down (similar to how REQ_TYPE_BLOCK_PC means that ->cmd[] holds a
+ * SCSI cdb.
+ *
+ * 0x00 -> 0x3f are driver private, to be used for whatever purpose they need,
+ * typically to differentiate REQ_TYPE_SPECIAL requests.
+ *
+ */
+enum {
+	/*
+	 * just examples for now
+	 */
+	REQ_LB_OP_EJECT	= 0x40,		/* eject request */
+	REQ_LB_OP_FLUSH = 0x41,		/* flush device */
+};
+
+/*
+ * request type modified bits. first three bits match BIO_RW* bits, important
+ */
+enum rq_flag_bits {
+	__REQ_RW,		/* not set, read. set, write */
+	__REQ_FAILFAST,		/* no low level driver retries */
+	__REQ_SORTED,		/* elevator knows about this request */
+	__REQ_SOFTBARRIER,	/* may not be passed by ioscheduler */
+	__REQ_HARDBARRIER,	/* may not be passed by drive either */
+	__REQ_FUA,		/* forced unit access */
+	__REQ_NOMERGE,		/* don't touch this for merging */
+	__REQ_STARTED,		/* drive already may have started this one */
+	__REQ_DONTPREP,		/* don't call prep for this one */
+	__REQ_QUEUED,		/* uses queueing */
+	__REQ_ELVPRIV,		/* elevator private data attached */
+	__REQ_FAILED,		/* set if the request failed */
+	__REQ_QUIET,		/* don't worry about errors */
+	__REQ_PREEMPT,		/* set for "ide_preempt" requests */
+	__REQ_ORDERED_COLOR,	/* is before or after barrier */
+	__REQ_RW_SYNC,		/* request is sync (O_DIRECT) */
+	__REQ_NR_BITS,		/* stops here */
+};
+
+#define REQ_RW		(1 << __REQ_RW)
+#define REQ_FAILFAST	(1 << __REQ_FAILFAST)
+#define REQ_SORTED	(1 << __REQ_SORTED)
+#define REQ_SOFTBARRIER	(1 << __REQ_SOFTBARRIER)
+#define REQ_HARDBARRIER	(1 << __REQ_HARDBARRIER)
+#define REQ_FUA		(1 << __REQ_FUA)
+#define REQ_NOMERGE	(1 << __REQ_NOMERGE)
+#define REQ_STARTED	(1 << __REQ_STARTED)
+#define REQ_DONTPREP	(1 << __REQ_DONTPREP)
+#define REQ_QUEUED	(1 << __REQ_QUEUED)
+#define REQ_ELVPRIV	(1 << __REQ_ELVPRIV)
+#define REQ_FAILED	(1 << __REQ_FAILED)
+#define REQ_QUIET	(1 << __REQ_QUIET)
+#define REQ_PREEMPT	(1 << __REQ_PREEMPT)
+#define REQ_ORDERED_COLOR	(1 << __REQ_ORDERED_COLOR)
+#define REQ_RW_SYNC	(1 << __REQ_RW_SYNC)
+
 #define BLK_MAX_CDB	16
 
 /*
@@ -129,7 +209,8 @@ struct request {
 	struct list_head queuelist;
 	struct list_head donelist;
 
-	unsigned long flags;		/* see REQ_ bits below */
+	unsigned int cmd_flags;
+	enum rq_cmd_type_bits cmd_type;
 
 	/* Maintain bio traversal state for part by part I/O submission.
 	 * hard_* are block layer internals, no driver should touch them!
@@ -202,73 +283,7 @@ struct request {
 };
 
 /*
- * first three bits match BIO_RW* bits, important
- */
-enum rq_flag_bits {
-	__REQ_RW,		/* not set, read. set, write */
-	__REQ_FAILFAST,		/* no low level driver retries */
-	__REQ_SORTED,		/* elevator knows about this request */
-	__REQ_SOFTBARRIER,	/* may not be passed by ioscheduler */
-	__REQ_HARDBARRIER,	/* may not be passed by drive either */
-	__REQ_FUA,		/* forced unit access */
-	__REQ_CMD,		/* is a regular fs rw request */
-	__REQ_NOMERGE,		/* don't touch this for merging */
-	__REQ_STARTED,		/* drive already may have started this one */
-	__REQ_DONTPREP,		/* don't call prep for this one */
-	__REQ_QUEUED,		/* uses queueing */
-	__REQ_ELVPRIV,		/* elevator private data attached */
-	/*
-	 * for ATA/ATAPI devices
-	 */
-	__REQ_PC,		/* packet command (special) */
-	__REQ_BLOCK_PC,		/* queued down pc from block layer */
-	__REQ_SENSE,		/* sense retrival */
-
-	__REQ_FAILED,		/* set if the request failed */
-	__REQ_QUIET,		/* don't worry about errors */
-	__REQ_SPECIAL,		/* driver suplied command */
-	__REQ_DRIVE_CMD,
-	__REQ_DRIVE_TASK,
-	__REQ_DRIVE_TASKFILE,
-	__REQ_PREEMPT,		/* set for "ide_preempt" requests */
-	__REQ_PM_SUSPEND,	/* suspend request */
-	__REQ_PM_RESUME,	/* resume request */
-	__REQ_PM_SHUTDOWN,	/* shutdown request */
-	__REQ_ORDERED_COLOR,	/* is before or after barrier */
-	__REQ_RW_SYNC,		/* request is sync (O_DIRECT) */
-	__REQ_NR_BITS,		/* stops here */
-};
-
-#define REQ_RW		(1 << __REQ_RW)
-#define REQ_FAILFAST	(1 << __REQ_FAILFAST)
-#define REQ_SORTED	(1 << __REQ_SORTED)
-#define REQ_SOFTBARRIER	(1 << __REQ_SOFTBARRIER)
-#define REQ_HARDBARRIER	(1 << __REQ_HARDBARRIER)
-#define REQ_FUA		(1 << __REQ_FUA)
-#define REQ_CMD		(1 << __REQ_CMD)
-#define REQ_NOMERGE	(1 << __REQ_NOMERGE)
-#define REQ_STARTED	(1 << __REQ_STARTED)
-#define REQ_DONTPREP	(1 << __REQ_DONTPREP)
-#define REQ_QUEUED	(1 << __REQ_QUEUED)
-#define REQ_ELVPRIV	(1 << __REQ_ELVPRIV)
-#define REQ_PC		(1 << __REQ_PC)
-#define REQ_BLOCK_PC	(1 << __REQ_BLOCK_PC)
-#define REQ_SENSE	(1 << __REQ_SENSE)
-#define REQ_FAILED	(1 << __REQ_FAILED)
-#define REQ_QUIET	(1 << __REQ_QUIET)
-#define REQ_SPECIAL	(1 << __REQ_SPECIAL)
-#define REQ_DRIVE_CMD	(1 << __REQ_DRIVE_CMD)
-#define REQ_DRIVE_TASK	(1 << __REQ_DRIVE_TASK)
-#define REQ_DRIVE_TASKFILE	(1 << __REQ_DRIVE_TASKFILE)
-#define REQ_PREEMPT	(1 << __REQ_PREEMPT)
-#define REQ_PM_SUSPEND	(1 << __REQ_PM_SUSPEND)
-#define REQ_PM_RESUME	(1 << __REQ_PM_RESUME)
-#define REQ_PM_SHUTDOWN	(1 << __REQ_PM_SHUTDOWN)
-#define REQ_ORDERED_COLOR	(1 << __REQ_ORDERED_COLOR)
-#define REQ_RW_SYNC	(1 << __REQ_RW_SYNC)
-
-/*
- * State information carried for REQ_PM_SUSPEND and REQ_PM_RESUME
+ * State information carried for REQ_TYPE_PM_SUSPEND and REQ_TYPE_PM_RESUME
  * requests. Some step values could eventually be made generic.
  */
 struct request_pm_state
@@ -490,25 +505,28 @@ enum {
 #define blk_queue_stopped(q)	test_bit(QUEUE_FLAG_STOPPED, &(q)->queue_flags)
 #define blk_queue_flushing(q)	((q)->ordseq)
 
-#define blk_fs_request(rq)	((rq)->flags & REQ_CMD)
-#define blk_pc_request(rq)	((rq)->flags & REQ_BLOCK_PC)
-#define blk_noretry_request(rq)	((rq)->flags & REQ_FAILFAST)
-#define blk_rq_started(rq)	((rq)->flags & REQ_STARTED)
+#define blk_fs_request(rq)	((rq)->cmd_type == REQ_TYPE_FS)
+#define blk_pc_request(rq)	((rq)->cmd_type == REQ_TYPE_BLOCK_PC)
+#define blk_special_request(rq)	((rq)->cmd_type == REQ_TYPE_SPECIAL)
+#define blk_sense_request(rq)	((rq)->cmd_type == REQ_TYPE_SENSE)
+
+#define blk_noretry_request(rq)	((rq)->cmd_flags & REQ_FAILFAST)
+#define blk_rq_started(rq)	((rq)->cmd_flags & REQ_STARTED)
 
 #define blk_account_rq(rq)	(blk_rq_started(rq) && blk_fs_request(rq))
 
-#define blk_pm_suspend_request(rq)	((rq)->flags & REQ_PM_SUSPEND)
-#define blk_pm_resume_request(rq)	((rq)->flags & REQ_PM_RESUME)
+#define blk_pm_suspend_request(rq)	((rq)->cmd_type == REQ_TYPE_PM_SUSPEND)
+#define blk_pm_resume_request(rq)	((rq)->cmd_type == REQ_TYPE_PM_RESUME)
 #define blk_pm_request(rq)	\
-	((rq)->flags & (REQ_PM_SUSPEND | REQ_PM_RESUME))
+	(blk_pm_suspend_request(rq) || blk_pm_resume_request(rq))
 
-#define blk_sorted_rq(rq)	((rq)->flags & REQ_SORTED)
-#define blk_barrier_rq(rq)	((rq)->flags & REQ_HARDBARRIER)
-#define blk_fua_rq(rq)		((rq)->flags & REQ_FUA)
+#define blk_sorted_rq(rq)	((rq)->cmd_flags & REQ_SORTED)
+#define blk_barrier_rq(rq)	((rq)->cmd_flags & REQ_HARDBARRIER)
+#define blk_fua_rq(rq)		((rq)->cmd_flags & REQ_FUA)
 
 #define list_entry_rq(ptr)	list_entry((ptr), struct request, queuelist)
 
-#define rq_data_dir(rq)		((rq)->flags & 1)
+#define rq_data_dir(rq)		((rq)->cmd_flags & 1)
 
 static inline int blk_queue_full(struct request_queue *q, int rw)
 {
@@ -541,7 +559,7 @@ static inline void blk_clear_queue_full(struct request_queue *q, int rw)
 #define RQ_NOMERGE_FLAGS	\
 	(REQ_NOMERGE | REQ_STARTED | REQ_HARDBARRIER | REQ_SOFTBARRIER)
 #define rq_mergeable(rq)	\
-	(!((rq)->flags & RQ_NOMERGE_FLAGS) && blk_fs_request((rq)))
+	(!((rq)->cmd_flags & RQ_NOMERGE_FLAGS) && blk_fs_request((rq)))
 
 /*
  * noop, requests are automagically marked as active/inactive by I/O
@@ -737,7 +755,7 @@ extern void blk_put_queue(request_queue_t *);
  */
 #define blk_queue_tag_depth(q)		((q)->queue_tags->busy)
 #define blk_queue_tag_queue(q)		((q)->queue_tags->busy < (q)->queue_tags->max_depth)
-#define blk_rq_tagged(rq)		((rq)->flags & REQ_QUEUED)
+#define blk_rq_tagged(rq)		((rq)->cmd_flags & REQ_QUEUED)
 extern int blk_queue_start_tag(request_queue_t *, struct request *);
 extern struct request *blk_queue_find_tag(request_queue_t *, int);
 extern void blk_queue_end_tag(request_queue_t *, struct request *);
diff --git a/include/linux/blktrace_api.h b/include/linux/blktrace_api.h
index 7520cc1ff9e2..ea48eb1b3fd3 100644
--- a/include/linux/blktrace_api.h
+++ b/include/linux/blktrace_api.h
@@ -148,7 +148,7 @@ static inline void blk_add_trace_rq(struct request_queue *q, struct request *rq,
 				    u32 what)
 {
 	struct blk_trace *bt = q->blk_trace;
-	int rw = rq->flags & 0x03;
+	int rw = rq->cmd_flags & 0x03;
 
 	if (likely(!bt))
 		return;
diff --git a/include/scsi/scsi_tcq.h b/include/scsi/scsi_tcq.h
index d04d05adfa9b..bbf66219b769 100644
--- a/include/scsi/scsi_tcq.h
+++ b/include/scsi/scsi_tcq.h
@@ -100,7 +100,7 @@ static inline int scsi_populate_tag_msg(struct scsi_cmnd *cmd, char *msg)
 	struct scsi_device *sdev = cmd->device;
 
         if (blk_rq_tagged(req)) {
-		if (sdev->ordered_tags && req->flags & REQ_HARDBARRIER)
+		if (sdev->ordered_tags && req->cmd_flags & REQ_HARDBARRIER)
         	        *msg++ = MSG_ORDERED_TAG;
         	else
         	        *msg++ = MSG_SIMPLE_TAG;
-- 
cgit v1.2.3-71-gd317


From 9817064b68fef7e4580c6df1ea597e106b9ff88b Mon Sep 17 00:00:00 2001
From: Jens Axboe <axboe@suse.de>
Date: Fri, 28 Jul 2006 09:23:08 +0200
Subject: [PATCH] elevator: move the backmerging logic into the elevator core

Right now, every IO scheduler implements its own backmerging (except for
noop, which does no merging). That results in duplicated code for
essentially the same operation, which is never a good thing. This patch
moves the backmerging out of the io schedulers and into the elevator
core. We save 1.6kb of text and as a bonus get backmerging for noop as
well. Win-win!

Signed-off-by: Jens Axboe <axboe@suse.de>
---
 block/as-iosched.c       | 139 +-------------------------------------------
 block/cfq-iosched.c      |  86 +--------------------------
 block/deadline-iosched.c | 128 +----------------------------------------
 block/elevator.c         | 147 ++++++++++++++++++++++++++++++++++++++++++-----
 block/ll_rw_blk.c        |   2 +
 include/linux/blkdev.h   |  17 +-----
 include/linux/elevator.h |   2 +
 7 files changed, 146 insertions(+), 375 deletions(-)

(limited to 'include/linux/blkdev.h')

diff --git a/block/as-iosched.c b/block/as-iosched.c
index ad1cc4077819..6db494333c3a 100644
--- a/block/as-iosched.c
+++ b/block/as-iosched.c
@@ -14,7 +14,6 @@
 #include <linux/slab.h>
 #include <linux/init.h>
 #include <linux/compiler.h>
-#include <linux/hash.h>
 #include <linux/rbtree.h>
 #include <linux/interrupt.h>
 
@@ -95,7 +94,6 @@ struct as_data {
 
 	struct as_rq *next_arq[2];	/* next in sort order */
 	sector_t last_sector[2];	/* last REQ_SYNC & REQ_ASYNC sectors */
-	struct hlist_head *hash;	/* request hash */
 
 	unsigned long exit_prob;	/* probability a task will exit while
 					   being waited on */
@@ -161,11 +159,6 @@ struct as_rq {
 
 	struct io_context *io_context;	/* The submitting task */
 
-	/*
-	 * request hash, key is the ending offset (for back merge lookup)
-	 */
-	struct hlist_node hash;
-
 	/*
 	 * expire fifo
 	 */
@@ -272,77 +265,6 @@ static void as_put_io_context(struct as_rq *arq)
 	put_io_context(arq->io_context);
 }
 
-/*
- * the back merge hash support functions
- */
-static const int as_hash_shift = 6;
-#define AS_HASH_BLOCK(sec)	((sec) >> 3)
-#define AS_HASH_FN(sec)		(hash_long(AS_HASH_BLOCK((sec)), as_hash_shift))
-#define AS_HASH_ENTRIES		(1 << as_hash_shift)
-#define rq_hash_key(rq)		((rq)->sector + (rq)->nr_sectors)
-
-static inline void __as_del_arq_hash(struct as_rq *arq)
-{
-	hlist_del_init(&arq->hash);
-}
-
-static inline void as_del_arq_hash(struct as_rq *arq)
-{
-	if (!hlist_unhashed(&arq->hash))
-		__as_del_arq_hash(arq);
-}
-
-static void as_add_arq_hash(struct as_data *ad, struct as_rq *arq)
-{
-	struct request *rq = arq->request;
-
-	BUG_ON(!hlist_unhashed(&arq->hash));
-
-	hlist_add_head(&arq->hash, &ad->hash[AS_HASH_FN(rq_hash_key(rq))]);
-}
-
-/*
- * move hot entry to front of chain
- */
-static inline void as_hot_arq_hash(struct as_data *ad, struct as_rq *arq)
-{
-	struct request *rq = arq->request;
-	struct hlist_head *head = &ad->hash[AS_HASH_FN(rq_hash_key(rq))];
-
-	if (hlist_unhashed(&arq->hash)) {
-		WARN_ON(1);
-		return;
-	}
-
-	if (&arq->hash != head->first) {
-		hlist_del(&arq->hash);
-		hlist_add_head(&arq->hash, head);
-	}
-}
-
-static struct request *as_find_arq_hash(struct as_data *ad, sector_t offset)
-{
-	struct hlist_head *hash_list = &ad->hash[AS_HASH_FN(offset)];
-	struct hlist_node *entry, *next;
-	struct as_rq *arq;
-
-	hlist_for_each_entry_safe(arq, entry, next, hash_list, hash) {
-		struct request *__rq = arq->request;
-
-		BUG_ON(hlist_unhashed(&arq->hash));
-
-		if (!rq_mergeable(__rq)) {
-			as_del_arq_hash(arq);
-			continue;
-		}
-
-		if (rq_hash_key(__rq) == offset)
-			return __rq;
-	}
-
-	return NULL;
-}
-
 /*
  * rb tree support functions
  */
@@ -1060,7 +982,6 @@ static void as_remove_queued_request(request_queue_t *q, struct request *rq)
 		ad->next_arq[data_dir] = as_find_next_arq(ad, arq);
 
 	list_del_init(&arq->fifo);
-	as_del_arq_hash(arq);
 	as_del_arq_rb(ad, arq);
 }
 
@@ -1349,8 +1270,6 @@ static void as_add_request(request_queue_t *q, struct request *rq)
 	}
 
 	as_add_arq_rb(ad, arq);
-	if (rq_mergeable(arq->request))
-		as_add_arq_hash(ad, arq);
 
 	/*
 	 * set expire time (only used for reads) and add to fifo list
@@ -1428,42 +1347,17 @@ as_merge(request_queue_t *q, struct request **req, struct bio *bio)
 	struct as_data *ad = q->elevator->elevator_data;
 	sector_t rb_key = bio->bi_sector + bio_sectors(bio);
 	struct request *__rq;
-	int ret;
-
-	/*
-	 * see if the merge hash can satisfy a back merge
-	 */
-	__rq = as_find_arq_hash(ad, bio->bi_sector);
-	if (__rq) {
-		BUG_ON(__rq->sector + __rq->nr_sectors != bio->bi_sector);
-
-		if (elv_rq_merge_ok(__rq, bio)) {
-			ret = ELEVATOR_BACK_MERGE;
-			goto out;
-		}
-	}
 
 	/*
 	 * check for front merge
 	 */
 	__rq = as_find_arq_rb(ad, rb_key, bio_data_dir(bio));
-	if (__rq) {
-		BUG_ON(rb_key != rq_rb_key(__rq));
-
-		if (elv_rq_merge_ok(__rq, bio)) {
-			ret = ELEVATOR_FRONT_MERGE;
-			goto out;
-		}
+	if (__rq && elv_rq_merge_ok(__rq, bio)) {
+		*req = __rq;
+		return ELEVATOR_FRONT_MERGE;
 	}
 
 	return ELEVATOR_NO_MERGE;
-out:
-	if (ret) {
-		if (rq_mergeable(__rq))
-			as_hot_arq_hash(ad, RQ_DATA(__rq));
-	}
-	*req = __rq;
-	return ret;
 }
 
 static void as_merged_request(request_queue_t *q, struct request *req)
@@ -1471,12 +1365,6 @@ static void as_merged_request(request_queue_t *q, struct request *req)
 	struct as_data *ad = q->elevator->elevator_data;
 	struct as_rq *arq = RQ_DATA(req);
 
-	/*
-	 * hash always needs to be repositioned, key is end sector
-	 */
-	as_del_arq_hash(arq);
-	as_add_arq_hash(ad, arq);
-
 	/*
 	 * if the merge was a front merge, we need to reposition request
 	 */
@@ -1501,13 +1389,6 @@ static void as_merged_requests(request_queue_t *q, struct request *req,
 	BUG_ON(!arq);
 	BUG_ON(!anext);
 
-	/*
-	 * reposition arq (this is the merged request) in hash, and in rbtree
-	 * in case of a front merge
-	 */
-	as_del_arq_hash(arq);
-	as_add_arq_hash(ad, arq);
-
 	if (rq_rb_key(req) != arq->rb_key) {
 		as_del_arq_rb(ad, arq);
 		as_add_arq_rb(ad, arq);
@@ -1591,7 +1472,6 @@ static int as_set_request(request_queue_t *q, struct request *rq,
 		arq->request = rq;
 		arq->state = AS_RQ_PRESCHED;
 		arq->io_context = NULL;
-		INIT_HLIST_NODE(&arq->hash);
 		INIT_LIST_HEAD(&arq->fifo);
 		rq->elevator_private = arq;
 		return 0;
@@ -1628,7 +1508,6 @@ static void as_exit_queue(elevator_t *e)
 
 	mempool_destroy(ad->arq_pool);
 	put_io_context(ad->io_context);
-	kfree(ad->hash);
 	kfree(ad);
 }
 
@@ -1639,7 +1518,6 @@ static void as_exit_queue(elevator_t *e)
 static void *as_init_queue(request_queue_t *q, elevator_t *e)
 {
 	struct as_data *ad;
-	int i;
 
 	if (!arq_pool)
 		return NULL;
@@ -1651,17 +1529,9 @@ static void *as_init_queue(request_queue_t *q, elevator_t *e)
 
 	ad->q = q; /* Identify what queue the data belongs to */
 
-	ad->hash = kmalloc_node(sizeof(struct hlist_head)*AS_HASH_ENTRIES,
-				GFP_KERNEL, q->node);
-	if (!ad->hash) {
-		kfree(ad);
-		return NULL;
-	}
-
 	ad->arq_pool = mempool_create_node(BLKDEV_MIN_RQ, mempool_alloc_slab,
 				mempool_free_slab, arq_pool, q->node);
 	if (!ad->arq_pool) {
-		kfree(ad->hash);
 		kfree(ad);
 		return NULL;
 	}
@@ -1672,9 +1542,6 @@ static void *as_init_queue(request_queue_t *q, elevator_t *e)
 	init_timer(&ad->antic_timer);
 	INIT_WORK(&ad->antic_work, as_work_handler, q);
 
-	for (i = 0; i < AS_HASH_ENTRIES; i++)
-		INIT_HLIST_HEAD(&ad->hash[i]);
-
 	INIT_LIST_HEAD(&ad->fifo_list[REQ_SYNC]);
 	INIT_LIST_HEAD(&ad->fifo_list[REQ_ASYNC]);
 	ad->sort_list[REQ_SYNC] = RB_ROOT;
diff --git a/block/cfq-iosched.c b/block/cfq-iosched.c
index 3a3aee08ec5f..1b803c0c90f1 100644
--- a/block/cfq-iosched.c
+++ b/block/cfq-iosched.c
@@ -41,16 +41,6 @@ static DEFINE_SPINLOCK(cfq_exit_lock);
 #define CFQ_QHASH_ENTRIES	(1 << CFQ_QHASH_SHIFT)
 #define list_entry_qhash(entry)	hlist_entry((entry), struct cfq_queue, cfq_hash)
 
-/*
- * for the hash of crq inside the cfqq
- */
-#define CFQ_MHASH_SHIFT		6
-#define CFQ_MHASH_BLOCK(sec)	((sec) >> 3)
-#define CFQ_MHASH_ENTRIES	(1 << CFQ_MHASH_SHIFT)
-#define CFQ_MHASH_FN(sec)	hash_long(CFQ_MHASH_BLOCK(sec), CFQ_MHASH_SHIFT)
-#define rq_hash_key(rq)		((rq)->sector + (rq)->nr_sectors)
-#define list_entry_hash(ptr)	hlist_entry((ptr), struct cfq_rq, hash)
-
 #define list_entry_cfqq(ptr)	list_entry((ptr), struct cfq_queue, cfq_list)
 #define list_entry_fifo(ptr)	list_entry((ptr), struct request, queuelist)
 
@@ -112,11 +102,6 @@ struct cfq_data {
 	 */
 	struct hlist_head *cfq_hash;
 
-	/*
-	 * global crq hash for all queues
-	 */
-	struct hlist_head *crq_hash;
-
 	mempool_t *crq_pool;
 
 	int rq_in_driver;
@@ -203,7 +188,6 @@ struct cfq_rq {
 	struct rb_node rb_node;
 	sector_t rb_key;
 	struct request *request;
-	struct hlist_node hash;
 
 	struct cfq_queue *cfq_queue;
 	struct cfq_io_context *io_context;
@@ -271,42 +255,6 @@ static struct cfq_queue *cfq_find_cfq_hash(struct cfq_data *, unsigned int, unsi
 static void cfq_dispatch_insert(request_queue_t *, struct cfq_rq *);
 static struct cfq_queue *cfq_get_queue(struct cfq_data *cfqd, unsigned int key, struct task_struct *tsk, gfp_t gfp_mask);
 
-/*
- * lots of deadline iosched dupes, can be abstracted later...
- */
-static inline void cfq_del_crq_hash(struct cfq_rq *crq)
-{
-	hlist_del_init(&crq->hash);
-}
-
-static inline void cfq_add_crq_hash(struct cfq_data *cfqd, struct cfq_rq *crq)
-{
-	const int hash_idx = CFQ_MHASH_FN(rq_hash_key(crq->request));
-
-	hlist_add_head(&crq->hash, &cfqd->crq_hash[hash_idx]);
-}
-
-static struct request *cfq_find_rq_hash(struct cfq_data *cfqd, sector_t offset)
-{
-	struct hlist_head *hash_list = &cfqd->crq_hash[CFQ_MHASH_FN(offset)];
-	struct hlist_node *entry, *next;
-
-	hlist_for_each_safe(entry, next, hash_list) {
-		struct cfq_rq *crq = list_entry_hash(entry);
-		struct request *__rq = crq->request;
-
-		if (!rq_mergeable(__rq)) {
-			cfq_del_crq_hash(crq);
-			continue;
-		}
-
-		if (rq_hash_key(__rq) == offset)
-			return __rq;
-	}
-
-	return NULL;
-}
-
 /*
  * scheduler run of queue, if there are requests pending and no one in the
  * driver that will restart queueing
@@ -677,7 +625,6 @@ static void cfq_remove_request(struct request *rq)
 
 	list_del_init(&rq->queuelist);
 	cfq_del_crq_rb(crq);
-	cfq_del_crq_hash(crq);
 }
 
 static int
@@ -685,34 +632,20 @@ cfq_merge(request_queue_t *q, struct request **req, struct bio *bio)
 {
 	struct cfq_data *cfqd = q->elevator->elevator_data;
 	struct request *__rq;
-	int ret;
-
-	__rq = cfq_find_rq_hash(cfqd, bio->bi_sector);
-	if (__rq && elv_rq_merge_ok(__rq, bio)) {
-		ret = ELEVATOR_BACK_MERGE;
-		goto out;
-	}
 
 	__rq = cfq_find_rq_fmerge(cfqd, bio);
 	if (__rq && elv_rq_merge_ok(__rq, bio)) {
-		ret = ELEVATOR_FRONT_MERGE;
-		goto out;
+		*req = __rq;
+		return ELEVATOR_FRONT_MERGE;
 	}
 
 	return ELEVATOR_NO_MERGE;
-out:
-	*req = __rq;
-	return ret;
 }
 
 static void cfq_merged_request(request_queue_t *q, struct request *req)
 {
-	struct cfq_data *cfqd = q->elevator->elevator_data;
 	struct cfq_rq *crq = RQ_DATA(req);
 
-	cfq_del_crq_hash(crq);
-	cfq_add_crq_hash(cfqd, crq);
-
 	if (rq_rb_key(req) != crq->rb_key) {
 		struct cfq_queue *cfqq = crq->cfq_queue;
 
@@ -1825,9 +1758,6 @@ static void cfq_insert_request(request_queue_t *q, struct request *rq)
 
 	list_add_tail(&rq->queuelist, &cfqq->fifo);
 
-	if (rq_mergeable(rq))
-		cfq_add_crq_hash(cfqd, crq);
-
 	cfq_crq_enqueued(cfqd, cfqq, crq);
 }
 
@@ -2055,7 +1985,6 @@ cfq_set_request(request_queue_t *q, struct request *rq, struct bio *bio,
 		RB_CLEAR_NODE(&crq->rb_node);
 		crq->rb_key = 0;
 		crq->request = rq;
-		INIT_HLIST_NODE(&crq->hash);
 		crq->cfq_queue = cfqq;
 		crq->io_context = cic;
 
@@ -2221,7 +2150,6 @@ static void cfq_exit_queue(elevator_t *e)
 	cfq_shutdown_timer_wq(cfqd);
 
 	mempool_destroy(cfqd->crq_pool);
-	kfree(cfqd->crq_hash);
 	kfree(cfqd->cfq_hash);
 	kfree(cfqd);
 }
@@ -2246,20 +2174,14 @@ static void *cfq_init_queue(request_queue_t *q, elevator_t *e)
 	INIT_LIST_HEAD(&cfqd->empty_list);
 	INIT_LIST_HEAD(&cfqd->cic_list);
 
-	cfqd->crq_hash = kmalloc(sizeof(struct hlist_head) * CFQ_MHASH_ENTRIES, GFP_KERNEL);
-	if (!cfqd->crq_hash)
-		goto out_crqhash;
-
 	cfqd->cfq_hash = kmalloc(sizeof(struct hlist_head) * CFQ_QHASH_ENTRIES, GFP_KERNEL);
 	if (!cfqd->cfq_hash)
-		goto out_cfqhash;
+		goto out_crqhash;
 
 	cfqd->crq_pool = mempool_create_slab_pool(BLKDEV_MIN_RQ, crq_pool);
 	if (!cfqd->crq_pool)
 		goto out_crqpool;
 
-	for (i = 0; i < CFQ_MHASH_ENTRIES; i++)
-		INIT_HLIST_HEAD(&cfqd->crq_hash[i]);
 	for (i = 0; i < CFQ_QHASH_ENTRIES; i++)
 		INIT_HLIST_HEAD(&cfqd->cfq_hash[i]);
 
@@ -2289,8 +2211,6 @@ static void *cfq_init_queue(request_queue_t *q, elevator_t *e)
 	return cfqd;
 out_crqpool:
 	kfree(cfqd->cfq_hash);
-out_cfqhash:
-	kfree(cfqd->crq_hash);
 out_crqhash:
 	kfree(cfqd);
 	return NULL;
diff --git a/block/deadline-iosched.c b/block/deadline-iosched.c
index c7ca9f0b6498..b66e820f544d 100644
--- a/block/deadline-iosched.c
+++ b/block/deadline-iosched.c
@@ -12,7 +12,6 @@
 #include <linux/slab.h>
 #include <linux/init.h>
 #include <linux/compiler.h>
-#include <linux/hash.h>
 #include <linux/rbtree.h>
 
 /*
@@ -24,13 +23,6 @@ static const int writes_starved = 2;    /* max times reads can starve a write */
 static const int fifo_batch = 16;       /* # of sequential requests treated as one
 				     by the above parameters. For throughput. */
 
-static const int deadline_hash_shift = 5;
-#define DL_HASH_BLOCK(sec)	((sec) >> 3)
-#define DL_HASH_FN(sec)		(hash_long(DL_HASH_BLOCK((sec)), deadline_hash_shift))
-#define DL_HASH_ENTRIES		(1 << deadline_hash_shift)
-#define rq_hash_key(rq)		((rq)->sector + (rq)->nr_sectors)
-#define ON_HASH(drq)		(!hlist_unhashed(&(drq)->hash))
-
 struct deadline_data {
 	/*
 	 * run time data
@@ -46,7 +38,6 @@ struct deadline_data {
 	 * next in sort order. read, write or both are NULL
 	 */
 	struct deadline_rq *next_drq[2];
-	struct hlist_head *hash;	/* request hash */
 	unsigned int batching;		/* number of sequential requests made */
 	sector_t last_sector;		/* head position */
 	unsigned int starved;		/* times reads have starved writes */
@@ -74,11 +65,6 @@ struct deadline_rq {
 
 	struct request *request;
 
-	/*
-	 * request hash, key is the ending offset (for back merge lookup)
-	 */
-	struct hlist_node hash;
-
 	/*
 	 * expire fifo
 	 */
@@ -92,69 +78,6 @@ static kmem_cache_t *drq_pool;
 
 #define RQ_DATA(rq)	((struct deadline_rq *) (rq)->elevator_private)
 
-/*
- * the back merge hash support functions
- */
-static inline void __deadline_del_drq_hash(struct deadline_rq *drq)
-{
-	hlist_del_init(&drq->hash);
-}
-
-static inline void deadline_del_drq_hash(struct deadline_rq *drq)
-{
-	if (ON_HASH(drq))
-		__deadline_del_drq_hash(drq);
-}
-
-static inline void
-deadline_add_drq_hash(struct deadline_data *dd, struct deadline_rq *drq)
-{
-	struct request *rq = drq->request;
-
-	BUG_ON(ON_HASH(drq));
-
-	hlist_add_head(&drq->hash, &dd->hash[DL_HASH_FN(rq_hash_key(rq))]);
-}
-
-/*
- * move hot entry to front of chain
- */
-static inline void
-deadline_hot_drq_hash(struct deadline_data *dd, struct deadline_rq *drq)
-{
-	struct request *rq = drq->request;
-	struct hlist_head *head = &dd->hash[DL_HASH_FN(rq_hash_key(rq))];
-
-	if (ON_HASH(drq) && &drq->hash != head->first) {
-		hlist_del(&drq->hash);
-		hlist_add_head(&drq->hash, head);
-	}
-}
-
-static struct request *
-deadline_find_drq_hash(struct deadline_data *dd, sector_t offset)
-{
-	struct hlist_head *hash_list = &dd->hash[DL_HASH_FN(offset)];
-	struct hlist_node *entry, *next;
-	struct deadline_rq *drq;
-
-	hlist_for_each_entry_safe(drq, entry, next, hash_list, hash) {
-		struct request *__rq = drq->request;
-
-		BUG_ON(!ON_HASH(drq));
-
-		if (!rq_mergeable(__rq)) {
-			__deadline_del_drq_hash(drq);
-			continue;
-		}
-
-		if (rq_hash_key(__rq) == offset)
-			return __rq;
-	}
-
-	return NULL;
-}
-
 /*
  * rb tree support functions
  */
@@ -267,22 +190,19 @@ deadline_add_request(struct request_queue *q, struct request *rq)
 {
 	struct deadline_data *dd = q->elevator->elevator_data;
 	struct deadline_rq *drq = RQ_DATA(rq);
-
 	const int data_dir = rq_data_dir(drq->request);
 
 	deadline_add_drq_rb(dd, drq);
+
 	/*
 	 * set expire time (only used for reads) and add to fifo list
 	 */
 	drq->expires = jiffies + dd->fifo_expire[data_dir];
 	list_add_tail(&drq->fifo, &dd->fifo_list[data_dir]);
-
-	if (rq_mergeable(rq))
-		deadline_add_drq_hash(dd, drq);
 }
 
 /*
- * remove rq from rbtree, fifo, and hash
+ * remove rq from rbtree and fifo.
  */
 static void deadline_remove_request(request_queue_t *q, struct request *rq)
 {
@@ -291,7 +211,6 @@ static void deadline_remove_request(request_queue_t *q, struct request *rq)
 
 	list_del_init(&drq->fifo);
 	deadline_del_drq_rb(dd, drq);
-	deadline_del_drq_hash(drq);
 }
 
 static int
@@ -301,19 +220,6 @@ deadline_merge(request_queue_t *q, struct request **req, struct bio *bio)
 	struct request *__rq;
 	int ret;
 
-	/*
-	 * see if the merge hash can satisfy a back merge
-	 */
-	__rq = deadline_find_drq_hash(dd, bio->bi_sector);
-	if (__rq) {
-		BUG_ON(__rq->sector + __rq->nr_sectors != bio->bi_sector);
-
-		if (elv_rq_merge_ok(__rq, bio)) {
-			ret = ELEVATOR_BACK_MERGE;
-			goto out;
-		}
-	}
-
 	/*
 	 * check for front merge
 	 */
@@ -333,8 +239,6 @@ deadline_merge(request_queue_t *q, struct request **req, struct bio *bio)
 
 	return ELEVATOR_NO_MERGE;
 out:
-	if (ret)
-		deadline_hot_drq_hash(dd, RQ_DATA(__rq));
 	*req = __rq;
 	return ret;
 }
@@ -344,12 +248,6 @@ static void deadline_merged_request(request_queue_t *q, struct request *req)
 	struct deadline_data *dd = q->elevator->elevator_data;
 	struct deadline_rq *drq = RQ_DATA(req);
 
-	/*
-	 * hash always needs to be repositioned, key is end sector
-	 */
-	deadline_del_drq_hash(drq);
-	deadline_add_drq_hash(dd, drq);
-
 	/*
 	 * if the merge was a front merge, we need to reposition request
 	 */
@@ -370,13 +268,6 @@ deadline_merged_requests(request_queue_t *q, struct request *req,
 	BUG_ON(!drq);
 	BUG_ON(!dnext);
 
-	/*
-	 * reposition drq (this is the merged request) in hash, and in rbtree
-	 * in case of a front merge
-	 */
-	deadline_del_drq_hash(drq);
-	deadline_add_drq_hash(dd, drq);
-
 	if (rq_rb_key(req) != drq->rb_key) {
 		deadline_del_drq_rb(dd, drq);
 		deadline_add_drq_rb(dd, drq);
@@ -594,7 +485,6 @@ static void deadline_exit_queue(elevator_t *e)
 	BUG_ON(!list_empty(&dd->fifo_list[WRITE]));
 
 	mempool_destroy(dd->drq_pool);
-	kfree(dd->hash);
 	kfree(dd);
 }
 
@@ -605,7 +495,6 @@ static void deadline_exit_queue(elevator_t *e)
 static void *deadline_init_queue(request_queue_t *q, elevator_t *e)
 {
 	struct deadline_data *dd;
-	int i;
 
 	if (!drq_pool)
 		return NULL;
@@ -615,24 +504,13 @@ static void *deadline_init_queue(request_queue_t *q, elevator_t *e)
 		return NULL;
 	memset(dd, 0, sizeof(*dd));
 
-	dd->hash = kmalloc_node(sizeof(struct hlist_head)*DL_HASH_ENTRIES,
-				GFP_KERNEL, q->node);
-	if (!dd->hash) {
-		kfree(dd);
-		return NULL;
-	}
-
 	dd->drq_pool = mempool_create_node(BLKDEV_MIN_RQ, mempool_alloc_slab,
 					mempool_free_slab, drq_pool, q->node);
 	if (!dd->drq_pool) {
-		kfree(dd->hash);
 		kfree(dd);
 		return NULL;
 	}
 
-	for (i = 0; i < DL_HASH_ENTRIES; i++)
-		INIT_HLIST_HEAD(&dd->hash[i]);
-
 	INIT_LIST_HEAD(&dd->fifo_list[READ]);
 	INIT_LIST_HEAD(&dd->fifo_list[WRITE]);
 	dd->sort_list[READ] = RB_ROOT;
@@ -667,8 +545,6 @@ deadline_set_request(request_queue_t *q, struct request *rq, struct bio *bio,
 		RB_CLEAR_NODE(&drq->rb_node);
 		drq->request = rq;
 
-		INIT_HLIST_NODE(&drq->hash);
-
 		INIT_LIST_HEAD(&drq->fifo);
 
 		rq->elevator_private = drq;
diff --git a/block/elevator.c b/block/elevator.c
index 4ac97b642042..cff1102dac9d 100644
--- a/block/elevator.c
+++ b/block/elevator.c
@@ -33,12 +33,23 @@
 #include <linux/compiler.h>
 #include <linux/delay.h>
 #include <linux/blktrace_api.h>
+#include <linux/hash.h>
 
 #include <asm/uaccess.h>
 
 static DEFINE_SPINLOCK(elv_list_lock);
 static LIST_HEAD(elv_list);
 
+/*
+ * Merge hash stuff.
+ */
+static const int elv_hash_shift = 6;
+#define ELV_HASH_BLOCK(sec)	((sec) >> 3)
+#define ELV_HASH_FN(sec)	(hash_long(ELV_HASH_BLOCK((sec)), elv_hash_shift))
+#define ELV_HASH_ENTRIES	(1 << elv_hash_shift)
+#define rq_hash_key(rq)		((rq)->sector + (rq)->nr_sectors)
+#define ELV_ON_HASH(rq)		(!hlist_unhashed(&(rq)->hash))
+
 /*
  * can we safely merge with this request?
  */
@@ -153,25 +164,41 @@ static struct kobj_type elv_ktype;
 
 static elevator_t *elevator_alloc(struct elevator_type *e)
 {
-	elevator_t *eq = kmalloc(sizeof(elevator_t), GFP_KERNEL);
-	if (eq) {
-		memset(eq, 0, sizeof(*eq));
-		eq->ops = &e->ops;
-		eq->elevator_type = e;
-		kobject_init(&eq->kobj);
-		snprintf(eq->kobj.name, KOBJ_NAME_LEN, "%s", "iosched");
-		eq->kobj.ktype = &elv_ktype;
-		mutex_init(&eq->sysfs_lock);
-	} else {
-		elevator_put(e);
-	}
+	elevator_t *eq;
+	int i;
+
+	eq = kmalloc(sizeof(elevator_t), GFP_KERNEL);
+	if (unlikely(!eq))
+		goto err;
+
+	memset(eq, 0, sizeof(*eq));
+	eq->ops = &e->ops;
+	eq->elevator_type = e;
+	kobject_init(&eq->kobj);
+	snprintf(eq->kobj.name, KOBJ_NAME_LEN, "%s", "iosched");
+	eq->kobj.ktype = &elv_ktype;
+	mutex_init(&eq->sysfs_lock);
+
+	eq->hash = kmalloc(sizeof(struct hlist_head) * ELV_HASH_ENTRIES, GFP_KERNEL);
+	if (!eq->hash)
+		goto err;
+
+	for (i = 0; i < ELV_HASH_ENTRIES; i++)
+		INIT_HLIST_HEAD(&eq->hash[i]);
+
 	return eq;
+err:
+	kfree(eq);
+	elevator_put(e);
+	return NULL;
 }
 
 static void elevator_release(struct kobject *kobj)
 {
 	elevator_t *e = container_of(kobj, elevator_t, kobj);
+
 	elevator_put(e->elevator_type);
+	kfree(e->hash);
 	kfree(e);
 }
 
@@ -223,6 +250,53 @@ void elevator_exit(elevator_t *e)
 	kobject_put(&e->kobj);
 }
 
+static inline void __elv_rqhash_del(struct request *rq)
+{
+	hlist_del_init(&rq->hash);
+}
+
+static void elv_rqhash_del(request_queue_t *q, struct request *rq)
+{
+	if (ELV_ON_HASH(rq))
+		__elv_rqhash_del(rq);
+}
+
+static void elv_rqhash_add(request_queue_t *q, struct request *rq)
+{
+	elevator_t *e = q->elevator;
+
+	BUG_ON(ELV_ON_HASH(rq));
+	hlist_add_head(&rq->hash, &e->hash[ELV_HASH_FN(rq_hash_key(rq))]);
+}
+
+static void elv_rqhash_reposition(request_queue_t *q, struct request *rq)
+{
+	__elv_rqhash_del(rq);
+	elv_rqhash_add(q, rq);
+}
+
+static struct request *elv_rqhash_find(request_queue_t *q, sector_t offset)
+{
+	elevator_t *e = q->elevator;
+	struct hlist_head *hash_list = &e->hash[ELV_HASH_FN(offset)];
+	struct hlist_node *entry, *next;
+	struct request *rq;
+
+	hlist_for_each_entry_safe(rq, entry, next, hash_list, hash) {
+		BUG_ON(!ELV_ON_HASH(rq));
+
+		if (unlikely(!rq_mergeable(rq))) {
+			__elv_rqhash_del(rq);
+			continue;
+		}
+
+		if (rq_hash_key(rq) == offset)
+			return rq;
+	}
+
+	return NULL;
+}
+
 /*
  * Insert rq into dispatch queue of q.  Queue lock must be held on
  * entry.  If sort != 0, rq is sort-inserted; otherwise, rq will be
@@ -235,6 +309,9 @@ void elv_dispatch_sort(request_queue_t *q, struct request *rq)
 
 	if (q->last_merge == rq)
 		q->last_merge = NULL;
+
+	elv_rqhash_del(q, rq);
+
 	q->nr_sorted--;
 
 	boundary = q->end_sector;
@@ -258,11 +335,32 @@ void elv_dispatch_sort(request_queue_t *q, struct request *rq)
 	list_add(&rq->queuelist, entry);
 }
 
+/*
+ * This should be in elevator.h, but that requires pulling in rq and q
+ */
+void elv_dispatch_add_tail(struct request_queue *q, struct request *rq)
+{
+	if (q->last_merge == rq)
+		q->last_merge = NULL;
+
+	elv_rqhash_del(q, rq);
+
+	q->nr_sorted--;
+
+	q->end_sector = rq_end_sector(rq);
+	q->boundary_rq = rq;
+	list_add_tail(&rq->queuelist, &q->queue_head);
+}
+
 int elv_merge(request_queue_t *q, struct request **req, struct bio *bio)
 {
 	elevator_t *e = q->elevator;
+	struct request *__rq;
 	int ret;
 
+	/*
+	 * First try one-hit cache.
+	 */
 	if (q->last_merge) {
 		ret = elv_try_merge(q->last_merge, bio);
 		if (ret != ELEVATOR_NO_MERGE) {
@@ -271,6 +369,15 @@ int elv_merge(request_queue_t *q, struct request **req, struct bio *bio)
 		}
 	}
 
+	/*
+	 * See if our hash lookup can find a potential backmerge.
+	 */
+	__rq = elv_rqhash_find(q, bio->bi_sector);
+	if (__rq && elv_rq_merge_ok(__rq, bio)) {
+		*req = __rq;
+		return ELEVATOR_BACK_MERGE;
+	}
+
 	if (e->ops->elevator_merge_fn)
 		return e->ops->elevator_merge_fn(q, req, bio);
 
@@ -284,6 +391,8 @@ void elv_merged_request(request_queue_t *q, struct request *rq)
 	if (e->ops->elevator_merged_fn)
 		e->ops->elevator_merged_fn(q, rq);
 
+	elv_rqhash_reposition(q, rq);
+
 	q->last_merge = rq;
 }
 
@@ -294,8 +403,11 @@ void elv_merge_requests(request_queue_t *q, struct request *rq,
 
 	if (e->ops->elevator_merge_req_fn)
 		e->ops->elevator_merge_req_fn(q, rq, next);
-	q->nr_sorted--;
 
+	elv_rqhash_reposition(q, rq);
+	elv_rqhash_del(q, next);
+
+	q->nr_sorted--;
 	q->last_merge = rq;
 }
 
@@ -371,8 +483,12 @@ void elv_insert(request_queue_t *q, struct request *rq, int where)
 		BUG_ON(!blk_fs_request(rq));
 		rq->cmd_flags |= REQ_SORTED;
 		q->nr_sorted++;
-		if (q->last_merge == NULL && rq_mergeable(rq))
-			q->last_merge = rq;
+		if (rq_mergeable(rq)) {
+			elv_rqhash_add(q, rq);
+			if (!q->last_merge)
+				q->last_merge = rq;
+		}
+
 		/*
 		 * Some ioscheds (cfq) run q->request_fn directly, so
 		 * rq cannot be accessed after calling
@@ -557,6 +673,7 @@ struct request *elv_next_request(request_queue_t *q)
 void elv_dequeue_request(request_queue_t *q, struct request *rq)
 {
 	BUG_ON(list_empty(&rq->queuelist));
+	BUG_ON(ELV_ON_HASH(rq));
 
 	list_del_init(&rq->queuelist);
 
diff --git a/block/ll_rw_blk.c b/block/ll_rw_blk.c
index 9b91bb70c5ed..9cbf7b550c78 100644
--- a/block/ll_rw_blk.c
+++ b/block/ll_rw_blk.c
@@ -281,6 +281,7 @@ static inline void rq_init(request_queue_t *q, struct request *rq)
 {
 	INIT_LIST_HEAD(&rq->queuelist);
 	INIT_LIST_HEAD(&rq->donelist);
+	INIT_HLIST_NODE(&rq->hash);
 
 	rq->errors = 0;
 	rq->rq_status = RQ_ACTIVE;
@@ -2700,6 +2701,7 @@ void __blk_put_request(request_queue_t *q, struct request *req)
 		int priv = req->cmd_flags & REQ_ELVPRIV;
 
 		BUG_ON(!list_empty(&req->queuelist));
+		BUG_ON(!hlist_unhashed(&req->hash));
 
 		blk_free_request(q, req);
 		freed_request(q, rw, priv);
diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h
index b2a412cf468f..8f5486964671 100644
--- a/include/linux/blkdev.h
+++ b/include/linux/blkdev.h
@@ -229,6 +229,8 @@ struct request {
 	struct bio *bio;
 	struct bio *biotail;
 
+	struct hlist_node hash;	/* merge hash */
+
 	void *elevator_private;
 	void *completion_data;
 
@@ -696,21 +698,6 @@ static inline void blkdev_dequeue_request(struct request *req)
 	elv_dequeue_request(req->q, req);
 }
 
-/*
- * This should be in elevator.h, but that requires pulling in rq and q
- */
-static inline void elv_dispatch_add_tail(struct request_queue *q,
-					 struct request *rq)
-{
-	if (q->last_merge == rq)
-		q->last_merge = NULL;
-	q->nr_sorted--;
-
-	q->end_sector = rq_end_sector(rq);
-	q->boundary_rq = rq;
-	list_add_tail(&rq->queuelist, &q->queue_head);
-}
-
 /*
  * Access functions for manipulating queue properties
  */
diff --git a/include/linux/elevator.h b/include/linux/elevator.h
index 1713ace808bf..2c270e90b33e 100644
--- a/include/linux/elevator.h
+++ b/include/linux/elevator.h
@@ -82,12 +82,14 @@ struct elevator_queue
 	struct kobject kobj;
 	struct elevator_type *elevator_type;
 	struct mutex sysfs_lock;
+	struct hlist_head *hash;
 };
 
 /*
  * block elevator interface
  */
 extern void elv_dispatch_sort(request_queue_t *, struct request *);
+extern void elv_dispatch_add_tail(request_queue_t *, struct request *);
 extern void elv_add_request(request_queue_t *, struct request *, int, int);
 extern void __elv_add_request(request_queue_t *, struct request *, int, int);
 extern void elv_insert(request_queue_t *, struct request *, int);
-- 
cgit v1.2.3-71-gd317


From 2e662b65f05d550b6799ed6bfa9963b82279e6b7 Mon Sep 17 00:00:00 2001
From: Jens Axboe <axboe@suse.de>
Date: Thu, 13 Jul 2006 11:55:04 +0200
Subject: [PATCH] elevator: abstract out the rbtree sort handling

The rbtree sort/lookup/reposition logic is mostly duplicated in
cfq/deadline/as, so move it to the elevator core. The io schedulers
still provide the actual rb root, as we don't want to impose any sort
of specific handling on the schedulers.

Introduce the helpers and rb_node in struct request to help migrate the
IO schedulers.

Signed-off-by: Jens Axboe <axboe@suse.de>
---
 block/elevator.c         | 123 +++++++++++++++++++++++++++++++++++++++++------
 block/ll_rw_blk.c        |   7 +--
 include/linux/blkdev.h   |   1 +
 include/linux/elevator.h |  18 ++++++-
 4 files changed, 130 insertions(+), 19 deletions(-)

(limited to 'include/linux/blkdev.h')

diff --git a/block/elevator.c b/block/elevator.c
index cff1102dac9d..cbbc36ba016a 100644
--- a/block/elevator.c
+++ b/block/elevator.c
@@ -239,6 +239,8 @@ int elevator_init(request_queue_t *q, char *name)
 	return ret;
 }
 
+EXPORT_SYMBOL(elevator_init);
+
 void elevator_exit(elevator_t *e)
 {
 	mutex_lock(&e->sysfs_lock);
@@ -250,6 +252,8 @@ void elevator_exit(elevator_t *e)
 	kobject_put(&e->kobj);
 }
 
+EXPORT_SYMBOL(elevator_exit);
+
 static inline void __elv_rqhash_del(struct request *rq)
 {
 	hlist_del_init(&rq->hash);
@@ -297,10 +301,69 @@ static struct request *elv_rqhash_find(request_queue_t *q, sector_t offset)
 	return NULL;
 }
 
+/*
+ * RB-tree support functions for inserting/lookup/removal of requests
+ * in a sorted RB tree.
+ */
+struct request *elv_rb_add(struct rb_root *root, struct request *rq)
+{
+	struct rb_node **p = &root->rb_node;
+	struct rb_node *parent = NULL;
+	struct request *__rq;
+
+	while (*p) {
+		parent = *p;
+		__rq = rb_entry(parent, struct request, rb_node);
+
+		if (rq->sector < __rq->sector)
+			p = &(*p)->rb_left;
+		else if (rq->sector > __rq->sector)
+			p = &(*p)->rb_right;
+		else
+			return __rq;
+	}
+
+	rb_link_node(&rq->rb_node, parent, p);
+	rb_insert_color(&rq->rb_node, root);
+	return NULL;
+}
+
+EXPORT_SYMBOL(elv_rb_add);
+
+void elv_rb_del(struct rb_root *root, struct request *rq)
+{
+	BUG_ON(RB_EMPTY_NODE(&rq->rb_node));
+	rb_erase(&rq->rb_node, root);
+	RB_CLEAR_NODE(&rq->rb_node);
+}
+
+EXPORT_SYMBOL(elv_rb_del);
+
+struct request *elv_rb_find(struct rb_root *root, sector_t sector)
+{
+	struct rb_node *n = root->rb_node;
+	struct request *rq;
+
+	while (n) {
+		rq = rb_entry(n, struct request, rb_node);
+
+		if (sector < rq->sector)
+			n = n->rb_left;
+		else if (sector > rq->sector)
+			n = n->rb_right;
+		else
+			return rq;
+	}
+
+	return NULL;
+}
+
+EXPORT_SYMBOL(elv_rb_find);
+
 /*
  * Insert rq into dispatch queue of q.  Queue lock must be held on
- * entry.  If sort != 0, rq is sort-inserted; otherwise, rq will be
- * appended to the dispatch queue.  To be used by specific elevators.
+ * entry.  rq is sort insted into the dispatch queue. To be used by
+ * specific elevators.
  */
 void elv_dispatch_sort(request_queue_t *q, struct request *rq)
 {
@@ -335,8 +398,12 @@ void elv_dispatch_sort(request_queue_t *q, struct request *rq)
 	list_add(&rq->queuelist, entry);
 }
 
+EXPORT_SYMBOL(elv_dispatch_sort);
+
 /*
- * This should be in elevator.h, but that requires pulling in rq and q
+ * Insert rq into dispatch queue of q.  Queue lock must be held on
+ * entry.  rq is added to the back of the dispatch queue. To be used by
+ * specific elevators.
  */
 void elv_dispatch_add_tail(struct request_queue *q, struct request *rq)
 {
@@ -352,6 +419,8 @@ void elv_dispatch_add_tail(struct request_queue *q, struct request *rq)
 	list_add_tail(&rq->queuelist, &q->queue_head);
 }
 
+EXPORT_SYMBOL(elv_dispatch_add_tail);
+
 int elv_merge(request_queue_t *q, struct request **req, struct bio *bio)
 {
 	elevator_t *e = q->elevator;
@@ -384,14 +453,15 @@ int elv_merge(request_queue_t *q, struct request **req, struct bio *bio)
 	return ELEVATOR_NO_MERGE;
 }
 
-void elv_merged_request(request_queue_t *q, struct request *rq)
+void elv_merged_request(request_queue_t *q, struct request *rq, int type)
 {
 	elevator_t *e = q->elevator;
 
 	if (e->ops->elevator_merged_fn)
-		e->ops->elevator_merged_fn(q, rq);
+		e->ops->elevator_merged_fn(q, rq, type);
 
-	elv_rqhash_reposition(q, rq);
+	if (type == ELEVATOR_BACK_MERGE)
+		elv_rqhash_reposition(q, rq);
 
 	q->last_merge = rq;
 }
@@ -577,6 +647,8 @@ void __elv_add_request(request_queue_t *q, struct request *rq, int where,
 	elv_insert(q, rq, where);
 }
 
+EXPORT_SYMBOL(__elv_add_request);
+
 void elv_add_request(request_queue_t *q, struct request *rq, int where,
 		     int plug)
 {
@@ -587,6 +659,8 @@ void elv_add_request(request_queue_t *q, struct request *rq, int where,
 	spin_unlock_irqrestore(q->queue_lock, flags);
 }
 
+EXPORT_SYMBOL(elv_add_request);
+
 static inline struct request *__elv_next_request(request_queue_t *q)
 {
 	struct request *rq;
@@ -670,6 +744,8 @@ struct request *elv_next_request(request_queue_t *q)
 	return rq;
 }
 
+EXPORT_SYMBOL(elv_next_request);
+
 void elv_dequeue_request(request_queue_t *q, struct request *rq)
 {
 	BUG_ON(list_empty(&rq->queuelist));
@@ -686,6 +762,8 @@ void elv_dequeue_request(request_queue_t *q, struct request *rq)
 		q->in_flight++;
 }
 
+EXPORT_SYMBOL(elv_dequeue_request);
+
 int elv_queue_empty(request_queue_t *q)
 {
 	elevator_t *e = q->elevator;
@@ -699,6 +777,8 @@ int elv_queue_empty(request_queue_t *q)
 	return 1;
 }
 
+EXPORT_SYMBOL(elv_queue_empty);
+
 struct request *elv_latter_request(request_queue_t *q, struct request *rq)
 {
 	elevator_t *e = q->elevator;
@@ -1025,11 +1105,26 @@ ssize_t elv_iosched_show(request_queue_t *q, char *name)
 	return len;
 }
 
-EXPORT_SYMBOL(elv_dispatch_sort);
-EXPORT_SYMBOL(elv_add_request);
-EXPORT_SYMBOL(__elv_add_request);
-EXPORT_SYMBOL(elv_next_request);
-EXPORT_SYMBOL(elv_dequeue_request);
-EXPORT_SYMBOL(elv_queue_empty);
-EXPORT_SYMBOL(elevator_exit);
-EXPORT_SYMBOL(elevator_init);
+struct request *elv_rb_former_request(request_queue_t *q, struct request *rq)
+{
+	struct rb_node *rbprev = rb_prev(&rq->rb_node);
+
+	if (rbprev)
+		return rb_entry_rq(rbprev);
+
+	return NULL;
+}
+
+EXPORT_SYMBOL(elv_rb_former_request);
+
+struct request *elv_rb_latter_request(request_queue_t *q, struct request *rq)
+{
+	struct rb_node *rbnext = rb_next(&rq->rb_node);
+
+	if (rbnext)
+		return rb_entry_rq(rbnext);
+
+	return NULL;
+}
+
+EXPORT_SYMBOL(elv_rb_latter_request);
diff --git a/block/ll_rw_blk.c b/block/ll_rw_blk.c
index 9cbf7b550c78..d388486e98bb 100644
--- a/block/ll_rw_blk.c
+++ b/block/ll_rw_blk.c
@@ -281,11 +281,12 @@ static inline void rq_init(request_queue_t *q, struct request *rq)
 {
 	INIT_LIST_HEAD(&rq->queuelist);
 	INIT_LIST_HEAD(&rq->donelist);
-	INIT_HLIST_NODE(&rq->hash);
 
 	rq->errors = 0;
 	rq->rq_status = RQ_ACTIVE;
 	rq->bio = rq->biotail = NULL;
+	INIT_HLIST_NODE(&rq->hash);
+	RB_CLEAR_NODE(&rq->rb_node);
 	rq->ioprio = 0;
 	rq->buffer = NULL;
 	rq->ref_count = 1;
@@ -2943,7 +2944,7 @@ static int __make_request(request_queue_t *q, struct bio *bio)
 			req->ioprio = ioprio_best(req->ioprio, prio);
 			drive_stat_acct(req, nr_sectors, 0);
 			if (!attempt_back_merge(q, req))
-				elv_merged_request(q, req);
+				elv_merged_request(q, req, el_ret);
 			goto out;
 
 		case ELEVATOR_FRONT_MERGE:
@@ -2970,7 +2971,7 @@ static int __make_request(request_queue_t *q, struct bio *bio)
 			req->ioprio = ioprio_best(req->ioprio, prio);
 			drive_stat_acct(req, nr_sectors, 0);
 			if (!attempt_front_merge(q, req))
-				elv_merged_request(q, req);
+				elv_merged_request(q, req, el_ret);
 			goto out;
 
 		/* ELV_NO_MERGE: elevator says don't/can't merge. */
diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h
index 8f5486964671..a905c4934a55 100644
--- a/include/linux/blkdev.h
+++ b/include/linux/blkdev.h
@@ -230,6 +230,7 @@ struct request {
 	struct bio *biotail;
 
 	struct hlist_node hash;	/* merge hash */
+	struct rb_node rb_node;	/* sort/lookup */
 
 	void *elevator_private;
 	void *completion_data;
diff --git a/include/linux/elevator.h b/include/linux/elevator.h
index 2c270e90b33e..95b2a04b969c 100644
--- a/include/linux/elevator.h
+++ b/include/linux/elevator.h
@@ -6,7 +6,7 @@ typedef int (elevator_merge_fn) (request_queue_t *, struct request **,
 
 typedef void (elevator_merge_req_fn) (request_queue_t *, struct request *, struct request *);
 
-typedef void (elevator_merged_fn) (request_queue_t *, struct request *);
+typedef void (elevator_merged_fn) (request_queue_t *, struct request *, int);
 
 typedef int (elevator_dispatch_fn) (request_queue_t *, int);
 
@@ -96,7 +96,7 @@ extern void elv_insert(request_queue_t *, struct request *, int);
 extern int elv_merge(request_queue_t *, struct request **, struct bio *);
 extern void elv_merge_requests(request_queue_t *, struct request *,
 			       struct request *);
-extern void elv_merged_request(request_queue_t *, struct request *);
+extern void elv_merged_request(request_queue_t *, struct request *, int);
 extern void elv_dequeue_request(request_queue_t *, struct request *);
 extern void elv_requeue_request(request_queue_t *, struct request *);
 extern int elv_queue_empty(request_queue_t *);
@@ -126,6 +126,19 @@ extern int elevator_init(request_queue_t *, char *);
 extern void elevator_exit(elevator_t *);
 extern int elv_rq_merge_ok(struct request *, struct bio *);
 
+/*
+ * Helper functions.
+ */
+extern struct request *elv_rb_former_request(request_queue_t *, struct request *);
+extern struct request *elv_rb_latter_request(request_queue_t *, struct request *);
+
+/*
+ * rb support functions.
+ */
+extern struct request *elv_rb_add(struct rb_root *, struct request *);
+extern void elv_rb_del(struct rb_root *, struct request *);
+extern struct request *elv_rb_find(struct rb_root *, sector_t);
+
 /*
  * Return values from elevator merger
  */
@@ -151,5 +164,6 @@ enum {
 };
 
 #define rq_end_sector(rq)	((rq)->sector + (rq)->nr_sectors)
+#define rb_entry_rq(node)	rb_entry((node), struct request, rb_node)
 
 #endif
-- 
cgit v1.2.3-71-gd317


From 9e2585a8a23f3a42f815b2a638725d85a921cd65 Mon Sep 17 00:00:00 2001
From: Jens Axboe <axboe@suse.de>
Date: Fri, 28 Jul 2006 09:26:13 +0200
Subject: [PATCH] as-iosched: remove arq->is_sync member

We can track this in struct request.

Signed-off-by: Jens Axboe <axboe@suse.de>
Signed-off-by: Nick Piggin <npiggin@suse.de>
---
 block/as-iosched.c     | 36 ++++++++++++++----------------------
 include/linux/blkdev.h |  5 +++++
 2 files changed, 19 insertions(+), 22 deletions(-)

(limited to 'include/linux/blkdev.h')

diff --git a/block/as-iosched.c b/block/as-iosched.c
index c2665467950e..dca0b0563ca0 100644
--- a/block/as-iosched.c
+++ b/block/as-iosched.c
@@ -151,7 +151,6 @@ struct as_rq {
 
 	struct io_context *io_context;	/* The submitting task */
 
-	unsigned int is_sync;
 	enum arq_state state;
 };
 
@@ -241,7 +240,7 @@ static void as_put_io_context(struct as_rq *arq)
 
 	aic = arq->io_context->aic;
 
-	if (arq->is_sync == REQ_SYNC && aic) {
+	if (rq_is_sync(arq->request) && aic) {
 		spin_lock(&aic->lock);
 		set_bit(AS_TASK_IORUNNING, &aic->state);
 		aic->last_end_request = jiffies;
@@ -254,14 +253,13 @@ static void as_put_io_context(struct as_rq *arq)
 /*
  * rb tree support functions
  */
-#define ARQ_RB_ROOT(ad, arq)	(&(ad)->sort_list[(arq)->is_sync])
+#define RQ_RB_ROOT(ad, rq)	(&(ad)->sort_list[rq_is_sync((rq))])
 
 static void as_add_arq_rb(struct as_data *ad, struct request *rq)
 {
-	struct as_rq *arq = RQ_DATA(rq);
 	struct request *alias;
 
-	while ((unlikely(alias = elv_rb_add(ARQ_RB_ROOT(ad, arq), rq)))) {
+	while ((unlikely(alias = elv_rb_add(RQ_RB_ROOT(ad, rq), rq)))) {
 		as_move_to_dispatch(ad, RQ_DATA(alias));
 		as_antic_stop(ad);
 	}
@@ -269,7 +267,7 @@ static void as_add_arq_rb(struct as_data *ad, struct request *rq)
 
 static inline void as_del_arq_rb(struct as_data *ad, struct request *rq)
 {
-	elv_rb_del(ARQ_RB_ROOT(ad, RQ_DATA(rq)), rq);
+	elv_rb_del(RQ_RB_ROOT(ad, rq), rq);
 }
 
 /*
@@ -300,13 +298,13 @@ as_choose_req(struct as_data *ad, struct as_rq *arq1, struct as_rq *arq2)
 	if (arq2 == NULL)
 		return arq1;
 
-	data_dir = arq1->is_sync;
+	data_dir = rq_is_sync(arq1->request);
 
 	last = ad->last_sector[data_dir];
 	s1 = arq1->request->sector;
 	s2 = arq2->request->sector;
 
-	BUG_ON(data_dir != arq2->is_sync);
+	BUG_ON(data_dir != rq_is_sync(arq2->request));
 
 	/*
 	 * Strict one way elevator _except_ in the case where we allow
@@ -377,7 +375,7 @@ static struct as_rq *as_find_next_arq(struct as_data *ad, struct as_rq *arq)
 	if (rbnext)
 		next = RQ_DATA(rb_entry_rq(rbnext));
 	else {
-		const int data_dir = arq->is_sync;
+		const int data_dir = rq_is_sync(last);
 
 		rbnext = rb_first(&ad->sort_list[data_dir]);
 		if (rbnext && rbnext != &last->rb_node)
@@ -538,8 +536,7 @@ static void as_update_seekdist(struct as_data *ad, struct as_io_context *aic,
 static void as_update_iohist(struct as_data *ad, struct as_io_context *aic,
 				struct request *rq)
 {
-	struct as_rq *arq = RQ_DATA(rq);
-	int data_dir = arq->is_sync;
+	int data_dir = rq_is_sync(rq);
 	unsigned long thinktime = 0;
 	sector_t seek_dist;
 
@@ -674,7 +671,7 @@ static int as_can_break_anticipation(struct as_data *ad, struct as_rq *arq)
 		return 1;
 	}
 
-	if (arq && arq->is_sync == REQ_SYNC && as_close_req(ad, aic, arq)) {
+	if (arq && rq_is_sync(arq->request) && as_close_req(ad, aic, arq)) {
 		/*
 		 * Found a close request that is not one of ours.
 		 *
@@ -758,7 +755,7 @@ static int as_can_anticipate(struct as_data *ad, struct as_rq *arq)
  */
 static void as_update_arq(struct as_data *ad, struct as_rq *arq)
 {
-	const int data_dir = arq->is_sync;
+	const int data_dir = rq_is_sync(arq->request);
 
 	/* keep the next_arq cache up to date */
 	ad->next_arq[data_dir] = as_choose_req(ad, arq, ad->next_arq[data_dir]);
@@ -835,7 +832,7 @@ static void as_completed_request(request_queue_t *q, struct request *rq)
 	 * actually serviced. This should help devices with big TCQ windows
 	 * and writeback caches
 	 */
-	if (ad->new_batch && ad->batch_data_dir == arq->is_sync) {
+	if (ad->new_batch && ad->batch_data_dir == rq_is_sync(rq)) {
 		update_write_batch(ad);
 		ad->current_batch_expires = jiffies +
 				ad->batch_expire[REQ_SYNC];
@@ -868,7 +865,7 @@ out:
 static void as_remove_queued_request(request_queue_t *q, struct request *rq)
 {
 	struct as_rq *arq = RQ_DATA(rq);
-	const int data_dir = arq->is_sync;
+	const int data_dir = rq_is_sync(rq);
 	struct as_data *ad = q->elevator->elevator_data;
 
 	WARN_ON(arq->state != AS_RQ_QUEUED);
@@ -941,7 +938,7 @@ static inline int as_batch_expired(struct as_data *ad)
 static void as_move_to_dispatch(struct as_data *ad, struct as_rq *arq)
 {
 	struct request *rq = arq->request;
-	const int data_dir = arq->is_sync;
+	const int data_dir = rq_is_sync(rq);
 
 	BUG_ON(RB_EMPTY_NODE(&rq->rb_node));
 
@@ -1158,12 +1155,7 @@ static void as_add_request(request_queue_t *q, struct request *rq)
 
 	arq->state = AS_RQ_NEW;
 
-	if (rq_data_dir(arq->request) == READ
-			|| (arq->request->cmd_flags & REQ_RW_SYNC))
-		arq->is_sync = 1;
-	else
-		arq->is_sync = 0;
-	data_dir = arq->is_sync;
+	data_dir = rq_is_sync(rq);
 
 	arq->io_context = as_get_io_context();
 
diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h
index a905c4934a55..55ef6efe3eb5 100644
--- a/include/linux/blkdev.h
+++ b/include/linux/blkdev.h
@@ -531,6 +531,11 @@ enum {
 
 #define rq_data_dir(rq)		((rq)->cmd_flags & 1)
 
+/*
+ * We regard a request as sync, if it's a READ or a SYNC write.
+ */
+#define rq_is_sync(rq)		(rq_data_dir((rq)) == READ || (rq)->cmd_flags & REQ_RW_SYNC)
+
 static inline int blk_queue_full(struct request_queue *q, int rw)
 {
 	if (rw == READ)
-- 
cgit v1.2.3-71-gd317


From ff7d145fd911266ae42af7552edc32681c01addb Mon Sep 17 00:00:00 2001
From: Jens Axboe <axboe@suse.de>
Date: Wed, 12 Jul 2006 14:04:37 +0200
Subject: [PATCH] Add one more pointer to struct request for IO scheduler usage

Then we have enough room in the request to get rid of the dynamic
allocations in CFQ/AS.

Signed-off-by: Jens Axboe <axboe@suse.de>
---
 include/linux/blkdev.h | 6 ++++++
 1 file changed, 6 insertions(+)

(limited to 'include/linux/blkdev.h')

diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h
index 55ef6efe3eb5..d2dc17151f6c 100644
--- a/include/linux/blkdev.h
+++ b/include/linux/blkdev.h
@@ -232,7 +232,13 @@ struct request {
 	struct hlist_node hash;	/* merge hash */
 	struct rb_node rb_node;	/* sort/lookup */
 
+	/*
+	 * two pointers are available for the IO schedulers, if they need
+	 * more they have to dynamically allocate it.
+	 */
 	void *elevator_private;
+	void *elevator_private2;
+
 	void *completion_data;
 
 	int rq_status;	/* should split this into a few status bits */
-- 
cgit v1.2.3-71-gd317


From c00895ab2f08df7044e58ee01c38bf0a661ea0eb Mon Sep 17 00:00:00 2001
From: Jens Axboe <axboe@kernel.dk>
Date: Sat, 30 Sep 2006 20:29:12 +0200
Subject: [PATCH] Remove ->waiting member from struct request

As the comments indicates in blkdev.h, we can fold it into ->end_io_data
usage as that is really what ->waiting is. Fixup the users of
blk_end_sync_rq().

Signed-off-by: Jens Axboe <axboe@kernel.dk>
---
 block/elevator.c          |  3 +--
 block/ll_rw_blk.c         | 13 ++++++-------
 drivers/block/DAC960.c    |  2 +-
 drivers/block/paride/pd.c |  3 +--
 drivers/block/pktcdvd.c   |  2 +-
 drivers/ide/ide-io.c      | 13 ++++++-------
 drivers/ide/ide-tape.c    |  2 +-
 drivers/ide/ide.c         |  4 ++--
 include/linux/blkdev.h    |  3 +--
 9 files changed, 20 insertions(+), 25 deletions(-)

(limited to 'include/linux/blkdev.h')

diff --git a/block/elevator.c b/block/elevator.c
index cbbc36ba016a..924b81b08f86 100644
--- a/block/elevator.c
+++ b/block/elevator.c
@@ -67,8 +67,7 @@ inline int elv_rq_merge_ok(struct request *rq, struct bio *bio)
 	/*
 	 * same device and no special stuff set, merge is ok
 	 */
-	if (rq->rq_disk == bio->bi_bdev->bd_disk &&
-	    !rq->waiting && !rq->special)
+	if (rq->rq_disk == bio->bi_bdev->bd_disk && !rq->special)
 		return 1;
 
 	return 0;
diff --git a/block/ll_rw_blk.c b/block/ll_rw_blk.c
index d388486e98bb..3b6aad2affd8 100644
--- a/block/ll_rw_blk.c
+++ b/block/ll_rw_blk.c
@@ -291,7 +291,6 @@ static inline void rq_init(request_queue_t *q, struct request *rq)
 	rq->buffer = NULL;
 	rq->ref_count = 1;
 	rq->q = q;
-	rq->waiting = NULL;
 	rq->special = NULL;
 	rq->data_len = 0;
 	rq->data = NULL;
@@ -451,6 +450,7 @@ static void queue_flush(request_queue_t *q, unsigned which)
 	rq->cmd_flags = REQ_HARDBARRIER;
 	rq_init(q, rq);
 	rq->elevator_private = NULL;
+	rq->elevator_private2 = NULL;
 	rq->rq_disk = q->bar_rq.rq_disk;
 	rq->rl = NULL;
 	rq->end_io = end_io;
@@ -479,6 +479,7 @@ static inline struct request *start_ordered(request_queue_t *q,
 		rq->cmd_flags |= REQ_RW;
 	rq->cmd_flags |= q->ordered & QUEUE_ORDERED_FUA ? REQ_FUA : 0;
 	rq->elevator_private = NULL;
+	rq->elevator_private2 = NULL;
 	rq->rl = NULL;
 	init_request_from_bio(rq, q->orig_bar_rq->bio);
 	rq->end_io = bar_end_io;
@@ -2569,10 +2570,9 @@ int blk_execute_rq(request_queue_t *q, struct gendisk *bd_disk,
 		rq->sense_len = 0;
 	}
 
-	rq->waiting = &wait;
+	rq->end_io_data = &wait;
 	blk_execute_rq_nowait(q, bd_disk, rq, at_head, blk_end_sync_rq);
 	wait_for_completion(&wait);
-	rq->waiting = NULL;
 
 	if (rq->errors)
 		err = -EIO;
@@ -2736,9 +2736,9 @@ EXPORT_SYMBOL(blk_put_request);
  */
 void blk_end_sync_rq(struct request *rq, int error)
 {
-	struct completion *waiting = rq->waiting;
+	struct completion *waiting = rq->end_io_data;
 
-	rq->waiting = NULL;
+	rq->end_io_data = NULL;
 	__blk_put_request(rq->q, rq);
 
 	/*
@@ -2801,7 +2801,7 @@ static int attempt_merge(request_queue_t *q, struct request *req,
 
 	if (rq_data_dir(req) != rq_data_dir(next)
 	    || req->rq_disk != next->rq_disk
-	    || next->waiting || next->special)
+	    || next->special)
 		return 0;
 
 	/*
@@ -2886,7 +2886,6 @@ static void init_request_from_bio(struct request *req, struct bio *bio)
 	req->nr_phys_segments = bio_phys_segments(req->q, bio);
 	req->nr_hw_segments = bio_hw_segments(req->q, bio);
 	req->buffer = bio_data(bio);	/* see ->buffer comment above */
-	req->waiting = NULL;
 	req->bio = req->biotail = bio;
 	req->ioprio = bio_prio(bio);
 	req->rq_disk = bio->bi_bdev->bd_disk;
diff --git a/drivers/block/DAC960.c b/drivers/block/DAC960.c
index a360215dbce7..2568640430fb 100644
--- a/drivers/block/DAC960.c
+++ b/drivers/block/DAC960.c
@@ -3331,7 +3331,7 @@ static int DAC960_process_queue(DAC960_Controller_T *Controller, struct request_
 		Command->DmaDirection = PCI_DMA_TODEVICE;
 		Command->CommandType = DAC960_WriteCommand;
 	}
-	Command->Completion = Request->waiting;
+	Command->Completion = Request->end_io_data;
 	Command->LogicalDriveNumber = (long)Request->rq_disk->private_data;
 	Command->BlockNumber = Request->sector;
 	Command->BlockCount = Request->nr_sectors;
diff --git a/drivers/block/paride/pd.c b/drivers/block/paride/pd.c
index 12ff1a274d91..500d2ebb41e4 100644
--- a/drivers/block/paride/pd.c
+++ b/drivers/block/paride/pd.c
@@ -722,11 +722,10 @@ static int pd_special_command(struct pd_unit *disk,
 	rq.rq_status = RQ_ACTIVE;
 	rq.rq_disk = disk->gd;
 	rq.ref_count = 1;
-	rq.waiting = &wait;
+	rq.end_io_data = &wait;
 	rq.end_io = blk_end_sync_rq;
 	blk_insert_request(disk->gd->queue, &rq, 0, func);
 	wait_for_completion(&wait);
-	rq.waiting = NULL;
 	if (rq.errors)
 		err = -EIO;
 	blk_put_request(&rq);
diff --git a/drivers/block/pktcdvd.c b/drivers/block/pktcdvd.c
index 42891d2b054e..888d1aceeeff 100644
--- a/drivers/block/pktcdvd.c
+++ b/drivers/block/pktcdvd.c
@@ -375,7 +375,7 @@ static int pkt_generic_packet(struct pktcdvd_device *pd, struct packet_command *
 	rq->cmd_len = COMMAND_SIZE(rq->cmd[0]);
 
 	rq->ref_count++;
-	rq->waiting = &wait;
+	rq->end_io_data = &wait;
 	rq->end_io = blk_end_sync_rq;
 	elv_add_request(q, rq, ELEVATOR_INSERT_BACK, 1);
 	generic_unplug_device(q);
diff --git a/drivers/ide/ide-io.c b/drivers/ide/ide-io.c
index 3436b1f104eb..a3ffb04436bd 100644
--- a/drivers/ide/ide-io.c
+++ b/drivers/ide/ide-io.c
@@ -141,7 +141,7 @@ enum {
 
 static void ide_complete_power_step(ide_drive_t *drive, struct request *rq, u8 stat, u8 error)
 {
-	struct request_pm_state *pm = rq->end_io_data;
+	struct request_pm_state *pm = rq->data;
 
 	if (drive->media != ide_disk)
 		return;
@@ -164,7 +164,7 @@ static void ide_complete_power_step(ide_drive_t *drive, struct request *rq, u8 s
 
 static ide_startstop_t ide_start_power_step(ide_drive_t *drive, struct request *rq)
 {
-	struct request_pm_state *pm = rq->end_io_data;
+	struct request_pm_state *pm = rq->data;
 	ide_task_t *args = rq->special;
 
 	memset(args, 0, sizeof(*args));
@@ -421,7 +421,7 @@ void ide_end_drive_cmd (ide_drive_t *drive, u8 stat, u8 err)
 			}
 		}
 	} else if (blk_pm_request(rq)) {
-		struct request_pm_state *pm = rq->end_io_data;
+		struct request_pm_state *pm = rq->data;
 #ifdef DEBUG_PM
 		printk("%s: complete_power_step(step: %d, stat: %x, err: %x)\n",
 			drive->name, rq->pm->pm_step, stat, err);
@@ -933,7 +933,7 @@ done:
 
 static void ide_check_pm_state(ide_drive_t *drive, struct request *rq)
 {
-	struct request_pm_state *pm = rq->end_io_data;
+	struct request_pm_state *pm = rq->data;
 
 	if (blk_pm_suspend_request(rq) &&
 	    pm->pm_step == ide_pm_state_start_suspend)
@@ -1018,7 +1018,7 @@ static ide_startstop_t start_request (ide_drive_t *drive, struct request *rq)
 		    rq->cmd_type == REQ_TYPE_ATA_TASKFILE)
 			return execute_drive_cmd(drive, rq);
 		else if (blk_pm_request(rq)) {
-			struct request_pm_state *pm = rq->end_io_data;
+			struct request_pm_state *pm = rq->data;
 #ifdef DEBUG_PM
 			printk("%s: start_power_step(step: %d)\n",
 				drive->name, rq->pm->pm_step);
@@ -1718,7 +1718,7 @@ int ide_do_drive_cmd (ide_drive_t *drive, struct request *rq, ide_action_t actio
 	 */
 	if (must_wait) {
 		rq->ref_count++;
-		rq->waiting = &wait;
+		rq->end_io_data = &wait;
 		rq->end_io = blk_end_sync_rq;
 	}
 
@@ -1736,7 +1736,6 @@ int ide_do_drive_cmd (ide_drive_t *drive, struct request *rq, ide_action_t actio
 	err = 0;
 	if (must_wait) {
 		wait_for_completion(&wait);
-		rq->waiting = NULL;
 		if (rq->errors)
 			err = -EIO;
 
diff --git a/drivers/ide/ide-tape.c b/drivers/ide/ide-tape.c
index 643e4b9ac651..66f9678d2f10 100644
--- a/drivers/ide/ide-tape.c
+++ b/drivers/ide/ide-tape.c
@@ -2773,7 +2773,7 @@ static void idetape_wait_for_request (ide_drive_t *drive, struct request *rq)
 		return;
 	}
 #endif /* IDETAPE_DEBUG_BUGS */
-	rq->waiting = &wait;
+	rq->end_io_data = &wait;
 	rq->end_io = blk_end_sync_rq;
 	spin_unlock_irq(&tape->spinlock);
 	wait_for_completion(&wait);
diff --git a/drivers/ide/ide.c b/drivers/ide/ide.c
index 9384a3fdde6c..2b1a1389c318 100644
--- a/drivers/ide/ide.c
+++ b/drivers/ide/ide.c
@@ -1219,7 +1219,7 @@ static int generic_ide_suspend(struct device *dev, pm_message_t mesg)
 	memset(&args, 0, sizeof(args));
 	rq.cmd_type = REQ_TYPE_PM_SUSPEND;
 	rq.special = &args;
-	rq.end_io_data = &rqpm;
+	rq.data = &rqpm;
 	rqpm.pm_step = ide_pm_state_start_suspend;
 	if (mesg.event == PM_EVENT_PRETHAW)
 		mesg.event = PM_EVENT_FREEZE;
@@ -1240,7 +1240,7 @@ static int generic_ide_resume(struct device *dev)
 	memset(&args, 0, sizeof(args));
 	rq.cmd_type = REQ_TYPE_PM_RESUME;
 	rq.special = &args;
-	rq.end_io_data = &rqpm;
+	rq.data = &rqpm;
 	rqpm.pm_step = ide_pm_state_start_resume;
 	rqpm.pm_state = PM_EVENT_ON;
 
diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h
index d2dc17151f6c..604f23189097 100644
--- a/include/linux/blkdev.h
+++ b/include/linux/blkdev.h
@@ -266,7 +266,6 @@ struct request {
 	request_queue_t *q;
 	struct request_list *rl;
 
-	struct completion *waiting;
 	void *special;
 	char *buffer;
 
@@ -285,7 +284,7 @@ struct request {
 	int retries;
 
 	/*
-	 * completion callback. end_io_data should be folded in with waiting
+	 * completion callback.
 	 */
 	rq_end_io_fn *end_io;
 	void *end_io_data;
-- 
cgit v1.2.3-71-gd317


From 49171e5c6f414d49a061b5c1c84967c2eb569822 Mon Sep 17 00:00:00 2001
From: Jens Axboe <axboe@suse.de>
Date: Thu, 10 Aug 2006 08:59:11 +0200
Subject: [PATCH] Remove struct request_list from struct request

It is always identical to &q->rq, and we only use it for detecting
whether this request came out of our mempool or not. So replace it
with an additional ->flags bit flag.

Signed-off-by: Jens Axboe <axboe@suse.de>
---
 block/ll_rw_blk.c      | 10 ++--------
 include/linux/blkdev.h |  3 ++-
 2 files changed, 4 insertions(+), 9 deletions(-)

(limited to 'include/linux/blkdev.h')

diff --git a/block/ll_rw_blk.c b/block/ll_rw_blk.c
index 3b6aad2affd8..f7462502bfdf 100644
--- a/block/ll_rw_blk.c
+++ b/block/ll_rw_blk.c
@@ -452,7 +452,6 @@ static void queue_flush(request_queue_t *q, unsigned which)
 	rq->elevator_private = NULL;
 	rq->elevator_private2 = NULL;
 	rq->rq_disk = q->bar_rq.rq_disk;
-	rq->rl = NULL;
 	rq->end_io = end_io;
 	q->prepare_flush_fn(q, rq);
 
@@ -480,7 +479,6 @@ static inline struct request *start_ordered(request_queue_t *q,
 	rq->cmd_flags |= q->ordered & QUEUE_ORDERED_FUA ? REQ_FUA : 0;
 	rq->elevator_private = NULL;
 	rq->elevator_private2 = NULL;
-	rq->rl = NULL;
 	init_request_from_bio(rq, q->orig_bar_rq->bio);
 	rq->end_io = bar_end_io;
 
@@ -2018,7 +2016,7 @@ blk_alloc_request(request_queue_t *q, int rw, struct bio *bio,
 	 * first three bits are identical in rq->cmd_flags and bio->bi_rw,
 	 * see bio.h and blkdev.h
 	 */
-	rq->cmd_flags = rw;
+	rq->cmd_flags = rw | REQ_ALLOCED;
 
 	if (priv) {
 		if (unlikely(elv_set_request(q, rq, bio, gfp_mask))) {
@@ -2196,7 +2194,6 @@ rq_starved:
 		ioc->nr_batch_requests--;
 	
 	rq_init(q, rq);
-	rq->rl = rl;
 
 	blk_add_trace_generic(q, bio, rw, BLK_TA_GETRQ);
 out:
@@ -2681,8 +2678,6 @@ EXPORT_SYMBOL_GPL(disk_round_stats);
  */
 void __blk_put_request(request_queue_t *q, struct request *req)
 {
-	struct request_list *rl = req->rl;
-
 	if (unlikely(!q))
 		return;
 	if (unlikely(--req->ref_count))
@@ -2691,13 +2686,12 @@ void __blk_put_request(request_queue_t *q, struct request *req)
 	elv_completed_request(q, req);
 
 	req->rq_status = RQ_INACTIVE;
-	req->rl = NULL;
 
 	/*
 	 * Request may not have originated from ll_rw_blk. if not,
 	 * it didn't come out of our reserved rq pools
 	 */
-	if (rl) {
+	if (req->cmd_flags & REQ_ALLOCED) {
 		int rw = rq_data_dir(req);
 		int priv = req->cmd_flags & REQ_ELVPRIV;
 
diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h
index 604f23189097..d4c1dd046e27 100644
--- a/include/linux/blkdev.h
+++ b/include/linux/blkdev.h
@@ -180,6 +180,7 @@ enum rq_flag_bits {
 	__REQ_PREEMPT,		/* set for "ide_preempt" requests */
 	__REQ_ORDERED_COLOR,	/* is before or after barrier */
 	__REQ_RW_SYNC,		/* request is sync (O_DIRECT) */
+	__REQ_ALLOCED,		/* request came from our alloc pool */
 	__REQ_NR_BITS,		/* stops here */
 };
 
@@ -199,6 +200,7 @@ enum rq_flag_bits {
 #define REQ_PREEMPT	(1 << __REQ_PREEMPT)
 #define REQ_ORDERED_COLOR	(1 << __REQ_ORDERED_COLOR)
 #define REQ_RW_SYNC	(1 << __REQ_RW_SYNC)
+#define REQ_ALLOCED	(1 << __REQ_ALLOCED)
 
 #define BLK_MAX_CDB	16
 
@@ -264,7 +266,6 @@ struct request {
 
 	int ref_count;
 	request_queue_t *q;
-	struct request_list *rl;
 
 	void *special;
 	char *buffer;
-- 
cgit v1.2.3-71-gd317


From cdd6026217c0e4cda2efce1bdc318661bef1f66f Mon Sep 17 00:00:00 2001
From: Jens Axboe <axboe@suse.de>
Date: Fri, 28 Jul 2006 09:32:07 +0200
Subject: [PATCH] Remove ->rq_status from struct request

After Christophs SCSI change, the only usage left is RQ_ACTIVE
and RQ_INACTIVE. The block layer sets RQ_INACTIVE right before freeing
the request, so any check for RQ_INACTIVE in a driver is a bug and
indicates use-after-free.

So kill/clean the remaining users, straight forward.

Signed-off-by: Jens Axboe <axboe@suse.de>
---
 arch/um/drivers/ubd_kern.c |  2 --
 block/ll_rw_blk.c          |  3 ---
 drivers/block/paride/pd.c  |  1 -
 drivers/block/swim3.c      |  4 ++--
 drivers/block/swim_iop.c   |  4 ++--
 drivers/fc4/fc.c           |  1 -
 drivers/ide/ide-floppy.c   |  3 +--
 drivers/ide/ide-io.c       |  1 -
 drivers/ide/ide-tape.c     |  4 ++--
 drivers/scsi/ide-scsi.c    |  2 +-
 drivers/scsi/scsi.c        |  2 +-
 include/linux/blkdev.h     | 13 +++++--------
 12 files changed, 14 insertions(+), 26 deletions(-)

(limited to 'include/linux/blkdev.h')

diff --git a/arch/um/drivers/ubd_kern.c b/arch/um/drivers/ubd_kern.c
index 5fa4c8e258a4..fda4a3940698 100644
--- a/arch/um/drivers/ubd_kern.c
+++ b/arch/um/drivers/ubd_kern.c
@@ -981,8 +981,6 @@ static int prepare_request(struct request *req, struct io_thread_req *io_req)
 	__u64 offset;
 	int len;
 
-	if(req->rq_status == RQ_INACTIVE) return(1);
-
 	/* This should be impossible now */
 	if((rq_data_dir(req) == WRITE) && !dev->openflags.w){
 		printk("Write attempted on readonly ubd device %s\n",
diff --git a/block/ll_rw_blk.c b/block/ll_rw_blk.c
index f7462502bfdf..b94a396aa624 100644
--- a/block/ll_rw_blk.c
+++ b/block/ll_rw_blk.c
@@ -283,7 +283,6 @@ static inline void rq_init(request_queue_t *q, struct request *rq)
 	INIT_LIST_HEAD(&rq->donelist);
 
 	rq->errors = 0;
-	rq->rq_status = RQ_ACTIVE;
 	rq->bio = rq->biotail = NULL;
 	INIT_HLIST_NODE(&rq->hash);
 	RB_CLEAR_NODE(&rq->rb_node);
@@ -2685,8 +2684,6 @@ void __blk_put_request(request_queue_t *q, struct request *req)
 
 	elv_completed_request(q, req);
 
-	req->rq_status = RQ_INACTIVE;
-
 	/*
 	 * Request may not have originated from ll_rw_blk. if not,
 	 * it didn't come out of our reserved rq pools
diff --git a/drivers/block/paride/pd.c b/drivers/block/paride/pd.c
index 500d2ebb41e4..38578b9dbfd1 100644
--- a/drivers/block/paride/pd.c
+++ b/drivers/block/paride/pd.c
@@ -719,7 +719,6 @@ static int pd_special_command(struct pd_unit *disk,
 
 	memset(&rq, 0, sizeof(rq));
 	rq.errors = 0;
-	rq.rq_status = RQ_ACTIVE;
 	rq.rq_disk = disk->gd;
 	rq.ref_count = 1;
 	rq.end_io_data = &wait;
diff --git a/drivers/block/swim3.c b/drivers/block/swim3.c
index cc42e762396f..f2305ee792a1 100644
--- a/drivers/block/swim3.c
+++ b/drivers/block/swim3.c
@@ -319,8 +319,8 @@ static void start_request(struct floppy_state *fs)
 		printk("do_fd_req: dev=%s cmd=%d sec=%ld nr_sec=%ld buf=%p\n",
 		       req->rq_disk->disk_name, req->cmd,
 		       (long)req->sector, req->nr_sectors, req->buffer);
-		printk("           rq_status=%d errors=%d current_nr_sectors=%ld\n",
-		       req->rq_status, req->errors, req->current_nr_sectors);
+		printk("           errors=%d current_nr_sectors=%ld\n",
+		       req->errors, req->current_nr_sectors);
 #endif
 
 		if (req->sector < 0 || req->sector >= fs->total_secs) {
diff --git a/drivers/block/swim_iop.c b/drivers/block/swim_iop.c
index 89e3c2f8b776..dfda796eba56 100644
--- a/drivers/block/swim_iop.c
+++ b/drivers/block/swim_iop.c
@@ -529,8 +529,8 @@ static void start_request(struct floppy_state *fs)
 		printk("do_fd_req: dev=%s cmd=%d sec=%ld nr_sec=%ld buf=%p\n",
 		       CURRENT->rq_disk->disk_name, CURRENT->cmd,
 		       CURRENT->sector, CURRENT->nr_sectors, CURRENT->buffer);
-		printk("           rq_status=%d errors=%d current_nr_sectors=%ld\n",
-		       CURRENT->rq_status, CURRENT->errors, CURRENT->current_nr_sectors);
+		printk("           errors=%d current_nr_sectors=%ld\n",
+		      CURRENT->errors, CURRENT->current_nr_sectors);
 #endif
 
 		if (CURRENT->sector < 0 || CURRENT->sector >= fs->total_secs) {
diff --git a/drivers/fc4/fc.c b/drivers/fc4/fc.c
index 1a159e8843ca..22d17474755f 100644
--- a/drivers/fc4/fc.c
+++ b/drivers/fc4/fc.c
@@ -974,7 +974,6 @@ int fcp_scsi_dev_reset(Scsi_Cmnd *SCpnt)
 	 */
 
 	fc->rst_pkt->device->host->eh_action = &sem;
-	fc->rst_pkt->request->rq_status = RQ_SCSI_BUSY;
 
 	fc->rst_pkt->done = fcp_scsi_reset_done;
 
diff --git a/drivers/ide/ide-floppy.c b/drivers/ide/ide-floppy.c
index 0edc32204915..8ccee9c769f8 100644
--- a/drivers/ide/ide-floppy.c
+++ b/drivers/ide/ide-floppy.c
@@ -1281,8 +1281,7 @@ static ide_startstop_t idefloppy_do_request (ide_drive_t *drive, struct request
 	idefloppy_pc_t *pc;
 	unsigned long block = (unsigned long)block_s;
 
-	debug_log(KERN_INFO "rq_status: %d, dev: %s, flags: %lx, errors: %d\n",
-			rq->rq_status,
+	debug_log(KERN_INFO "dev: %s, flags: %lx, errors: %d\n",
 			rq->rq_disk ? rq->rq_disk->disk_name : "?",
 			rq->flags, rq->errors);
 	debug_log(KERN_INFO "sector: %ld, nr_sectors: %ld, "
diff --git a/drivers/ide/ide-io.c b/drivers/ide/ide-io.c
index a3ffb04436bd..38479a29d3e1 100644
--- a/drivers/ide/ide-io.c
+++ b/drivers/ide/ide-io.c
@@ -1710,7 +1710,6 @@ int ide_do_drive_cmd (ide_drive_t *drive, struct request *rq, ide_action_t actio
 	int must_wait = (action == ide_wait || action == ide_head_wait);
 
 	rq->errors = 0;
-	rq->rq_status = RQ_ACTIVE;
 
 	/*
 	 * we need to hold an extra reference to request for safe inspection
diff --git a/drivers/ide/ide-tape.c b/drivers/ide/ide-tape.c
index 66f9678d2f10..2ebc3760f261 100644
--- a/drivers/ide/ide-tape.c
+++ b/drivers/ide/ide-tape.c
@@ -2423,8 +2423,8 @@ static ide_startstop_t idetape_do_request(ide_drive_t *drive,
 #if IDETAPE_DEBUG_LOG
 #if 0
 	if (tape->debug_level >= 5)
-		printk(KERN_INFO "ide-tape: rq_status: %d, "
-			"dev: %s, cmd: %ld, errors: %d\n", rq->rq_status,
+		printk(KERN_INFO "ide-tape:  %d, "
+			"dev: %s, cmd: %ld, errors: %d\n",
 			 rq->rq_disk->disk_name, rq->cmd[0], rq->errors);
 #endif
 	if (tape->debug_level >= 2)
diff --git a/drivers/scsi/ide-scsi.c b/drivers/scsi/ide-scsi.c
index 65b19695ebe2..1427a41e8441 100644
--- a/drivers/scsi/ide-scsi.c
+++ b/drivers/scsi/ide-scsi.c
@@ -708,7 +708,7 @@ static ide_startstop_t idescsi_issue_pc (ide_drive_t *drive, idescsi_pc_t *pc)
 static ide_startstop_t idescsi_do_request (ide_drive_t *drive, struct request *rq, sector_t block)
 {
 #if IDESCSI_DEBUG_LOG
-	printk (KERN_INFO "rq_status: %d, dev: %s, cmd: %x, errors: %d\n",rq->rq_status, rq->rq_disk->disk_name,rq->cmd[0],rq->errors);
+	printk (KERN_INFO "dev: %s, cmd: %x, errors: %d\n", rq->rq_disk->disk_name,rq->cmd[0],rq->errors);
 	printk (KERN_INFO "sector: %ld, nr_sectors: %ld, current_nr_sectors: %d\n",rq->sector,rq->nr_sectors,rq->current_nr_sectors);
 #endif /* IDESCSI_DEBUG_LOG */
 
diff --git a/drivers/scsi/scsi.c b/drivers/scsi/scsi.c
index 7a054f9d1ee3..12f6639dda2d 100644
--- a/drivers/scsi/scsi.c
+++ b/drivers/scsi/scsi.c
@@ -1065,7 +1065,7 @@ int scsi_device_cancel(struct scsi_device *sdev, int recovery)
 
 	spin_lock_irqsave(&sdev->list_lock, flags);
 	list_for_each_entry(scmd, &sdev->cmd_list, list) {
-		if (scmd->request && scmd->request->rq_status != RQ_INACTIVE) {
+		if (scmd->request) {
 			/*
 			 * If we are unable to remove the timer, it means
 			 * that the command has already timed out or
diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h
index d4c1dd046e27..8a3e309e0842 100644
--- a/include/linux/blkdev.h
+++ b/include/linux/blkdev.h
@@ -243,8 +243,6 @@ struct request {
 
 	void *completion_data;
 
-	int rq_status;	/* should split this into a few status bits */
-	int errors;
 	struct gendisk *rq_disk;
 	unsigned long start_time;
 
@@ -262,14 +260,16 @@ struct request {
 
 	unsigned short ioprio;
 
-	int tag;
-
-	int ref_count;
 	request_queue_t *q;
 
 	void *special;
 	char *buffer;
 
+	int tag;
+	int errors;
+
+	int ref_count;
+
 	/*
 	 * when request is used as a packet command carrier
 	 */
@@ -456,9 +456,6 @@ struct request_queue
 	struct mutex		sysfs_lock;
 };
 
-#define RQ_INACTIVE		(-1)
-#define RQ_ACTIVE		1
-
 #define QUEUE_FLAG_CLUSTER	0	/* cluster several segments into 1 */
 #define QUEUE_FLAG_QUEUED	1	/* uses generic tag queueing */
 #define QUEUE_FLAG_STOPPED	2	/* queue is stopped */
-- 
cgit v1.2.3-71-gd317


From e6a1c874a064e7d07f24986aba7cd537b7f4a25d Mon Sep 17 00:00:00 2001
From: Jens Axboe <axboe@suse.de>
Date: Thu, 10 Aug 2006 09:00:21 +0200
Subject: [PATCH] struct request: shrink and optimize some more

Move some members around and unionize completion_data and rb_node since
they cannot ever be used at the same time.

Signed-off-by: Jens Axboe <axboe@suse.de>
---
 include/linux/blkdev.h | 20 +++++++++++++-------
 1 file changed, 13 insertions(+), 7 deletions(-)

(limited to 'include/linux/blkdev.h')

diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h
index 8a3e309e0842..a1e288069e2e 100644
--- a/include/linux/blkdev.h
+++ b/include/linux/blkdev.h
@@ -211,6 +211,8 @@ struct request {
 	struct list_head queuelist;
 	struct list_head donelist;
 
+	request_queue_t *q;
+
 	unsigned int cmd_flags;
 	enum rq_cmd_type_bits cmd_type;
 
@@ -219,12 +221,12 @@ struct request {
 	 */
 
 	sector_t sector;		/* next sector to submit */
+	sector_t hard_sector;		/* next sector to complete */
 	unsigned long nr_sectors;	/* no. of sectors left to submit */
+	unsigned long hard_nr_sectors;	/* no. of sectors left to complete */
 	/* no. of sectors left to submit in the current segment */
 	unsigned int current_nr_sectors;
 
-	sector_t hard_sector;		/* next sector to complete */
-	unsigned long hard_nr_sectors;	/* no. of sectors left to complete */
 	/* no. of sectors left to complete in the current segment */
 	unsigned int hard_cur_sectors;
 
@@ -232,7 +234,15 @@ struct request {
 	struct bio *biotail;
 
 	struct hlist_node hash;	/* merge hash */
-	struct rb_node rb_node;	/* sort/lookup */
+	/*
+	 * The rb_node is only used inside the io scheduler, requests
+	 * are pruned when moved to the dispatch queue. So let the
+	 * completion_data share space with the rb_node.
+	 */
+	union {
+		struct rb_node rb_node;	/* sort/lookup */
+		void *completion_data;
+	};
 
 	/*
 	 * two pointers are available for the IO schedulers, if they need
@@ -241,8 +251,6 @@ struct request {
 	void *elevator_private;
 	void *elevator_private2;
 
-	void *completion_data;
-
 	struct gendisk *rq_disk;
 	unsigned long start_time;
 
@@ -260,8 +268,6 @@ struct request {
 
 	unsigned short ioprio;
 
-	request_queue_t *q;
-
 	void *special;
 	char *buffer;
 
-- 
cgit v1.2.3-71-gd317


From fc46379daf90dce57bf765c81d3b39f55150aac2 Mon Sep 17 00:00:00 2001
From: Jens Axboe <axboe@suse.de>
Date: Tue, 29 Aug 2006 09:05:44 +0200
Subject: [PATCH] cfq-iosched: kill cfq_exit_lock

cfq_exit_lock is protecting two things now:

- The per-ioc rbtree of cfq_io_contexts

- The per-cfqd linked list of cfq_io_contexts

The per-cfqd linked list can be protected by the queue lock, as it is (by
definition) per cfqd as the queue lock is.

The per-ioc rbtree is mainly used and updated by the process itself only.
The only outside use is the io priority changing. If we move the
priority changing to not browsing the rbtree, we can remove any locking
from the rbtree updates and lookup completely. Let the sys_ioprio syscall
just mark processes as having the iopriority changed and lazily update
the private cfq io contexts the next time io is queued, and we can
remove this locking as well.

Signed-off-by: Jens Axboe <axboe@suse.de>
---
 block/cfq-iosched.c    | 54 ++++++++++++++++----------------------------------
 block/ll_rw_blk.c      |  2 +-
 fs/ioprio.c            |  4 ++--
 include/linux/blkdev.h |  2 +-
 4 files changed, 21 insertions(+), 41 deletions(-)

(limited to 'include/linux/blkdev.h')

diff --git a/block/cfq-iosched.c b/block/cfq-iosched.c
index ec24284e9d39..33e0b0c5e31d 100644
--- a/block/cfq-iosched.c
+++ b/block/cfq-iosched.c
@@ -31,8 +31,6 @@ static int cfq_slice_idle = HZ / 125;
 
 #define CFQ_KEY_ASYNC		(0)
 
-static DEFINE_SPINLOCK(cfq_exit_lock);
-
 /*
  * for the hash of cfqq inside the cfqd
  */
@@ -1084,12 +1082,6 @@ static void cfq_free_io_context(struct io_context *ioc)
 		complete(ioc_gone);
 }
 
-static void cfq_trim(struct io_context *ioc)
-{
-	ioc->set_ioprio = NULL;
-	cfq_free_io_context(ioc);
-}
-
 static void cfq_exit_cfqq(struct cfq_data *cfqd, struct cfq_queue *cfqq)
 {
 	if (unlikely(cfqq == cfqd->active_queue))
@@ -1101,6 +1093,10 @@ static void cfq_exit_cfqq(struct cfq_data *cfqd, struct cfq_queue *cfqq)
 static void __cfq_exit_single_io_context(struct cfq_data *cfqd,
 					 struct cfq_io_context *cic)
 {
+	list_del_init(&cic->queue_list);
+	smp_wmb();
+	cic->key = NULL;
+
 	if (cic->cfqq[ASYNC]) {
 		cfq_exit_cfqq(cfqd, cic->cfqq[ASYNC]);
 		cic->cfqq[ASYNC] = NULL;
@@ -1110,9 +1106,6 @@ static void __cfq_exit_single_io_context(struct cfq_data *cfqd,
 		cfq_exit_cfqq(cfqd, cic->cfqq[SYNC]);
 		cic->cfqq[SYNC] = NULL;
 	}
-
-	cic->key = NULL;
-	list_del_init(&cic->queue_list);
 }
 
 
@@ -1123,27 +1116,23 @@ static void cfq_exit_single_io_context(struct cfq_io_context *cic)
 {
 	struct cfq_data *cfqd = cic->key;
 
-	WARN_ON(!irqs_disabled());
-
 	if (cfqd) {
 		request_queue_t *q = cfqd->queue;
 
-		spin_lock(q->queue_lock);
+		spin_lock_irq(q->queue_lock);
 		__cfq_exit_single_io_context(cfqd, cic);
-		spin_unlock(q->queue_lock);
+		spin_unlock_irq(q->queue_lock);
 	}
 }
 
 static void cfq_exit_io_context(struct io_context *ioc)
 {
 	struct cfq_io_context *__cic;
-	unsigned long flags;
 	struct rb_node *n;
 
 	/*
 	 * put the reference this task is holding to the various queues
 	 */
-	spin_lock_irqsave(&cfq_exit_lock, flags);
 
 	n = rb_first(&ioc->cic_root);
 	while (n != NULL) {
@@ -1152,8 +1141,6 @@ static void cfq_exit_io_context(struct io_context *ioc)
 		cfq_exit_single_io_context(__cic);
 		n = rb_next(n);
 	}
-
-	spin_unlock_irqrestore(&cfq_exit_lock, flags);
 }
 
 static struct cfq_io_context *
@@ -1248,15 +1235,12 @@ static inline void changed_ioprio(struct cfq_io_context *cic)
 	spin_unlock(cfqd->queue->queue_lock);
 }
 
-/*
- * callback from sys_ioprio_set, irqs are disabled
- */
-static int cfq_ioc_set_ioprio(struct io_context *ioc, unsigned int ioprio)
+static void cfq_ioc_set_ioprio(struct io_context *ioc)
 {
 	struct cfq_io_context *cic;
 	struct rb_node *n;
 
-	spin_lock(&cfq_exit_lock);
+	ioc->ioprio_changed = 0;
 
 	n = rb_first(&ioc->cic_root);
 	while (n != NULL) {
@@ -1265,10 +1249,6 @@ static int cfq_ioc_set_ioprio(struct io_context *ioc, unsigned int ioprio)
 		changed_ioprio(cic);
 		n = rb_next(n);
 	}
-
-	spin_unlock(&cfq_exit_lock);
-
-	return 0;
 }
 
 static struct cfq_queue *
@@ -1336,10 +1316,8 @@ out:
 static void
 cfq_drop_dead_cic(struct io_context *ioc, struct cfq_io_context *cic)
 {
-	spin_lock(&cfq_exit_lock);
+	WARN_ON(!list_empty(&cic->queue_list));
 	rb_erase(&cic->rb_node, &ioc->cic_root);
-	list_del_init(&cic->queue_list);
-	spin_unlock(&cfq_exit_lock);
 	kmem_cache_free(cfq_ioc_pool, cic);
 	atomic_dec(&ioc_count);
 }
@@ -1385,7 +1363,6 @@ cfq_cic_link(struct cfq_data *cfqd, struct io_context *ioc,
 	cic->ioc = ioc;
 	cic->key = cfqd;
 
-	ioc->set_ioprio = cfq_ioc_set_ioprio;
 restart:
 	parent = NULL;
 	p = &ioc->cic_root.rb_node;
@@ -1407,11 +1384,12 @@ restart:
 			BUG();
 	}
 
-	spin_lock(&cfq_exit_lock);
 	rb_link_node(&cic->rb_node, parent, p);
 	rb_insert_color(&cic->rb_node, &ioc->cic_root);
+
+	spin_lock_irq(cfqd->queue->queue_lock);
 	list_add(&cic->queue_list, &cfqd->cic_list);
-	spin_unlock(&cfq_exit_lock);
+	spin_unlock_irq(cfqd->queue->queue_lock);
 }
 
 /*
@@ -1441,6 +1419,10 @@ cfq_get_io_context(struct cfq_data *cfqd, gfp_t gfp_mask)
 
 	cfq_cic_link(cfqd, ioc, cic);
 out:
+	smp_read_barrier_depends();
+	if (unlikely(ioc->ioprio_changed))
+		cfq_ioc_set_ioprio(ioc);
+
 	return cic;
 err:
 	put_io_context(ioc);
@@ -1945,7 +1927,6 @@ static void cfq_exit_queue(elevator_t *e)
 
 	cfq_shutdown_timer_wq(cfqd);
 
-	spin_lock(&cfq_exit_lock);
 	spin_lock_irq(q->queue_lock);
 
 	if (cfqd->active_queue)
@@ -1960,7 +1941,6 @@ static void cfq_exit_queue(elevator_t *e)
 	}
 
 	spin_unlock_irq(q->queue_lock);
-	spin_unlock(&cfq_exit_lock);
 
 	cfq_shutdown_timer_wq(cfqd);
 
@@ -2149,7 +2129,7 @@ static struct elevator_type iosched_cfq = {
 		.elevator_may_queue_fn =	cfq_may_queue,
 		.elevator_init_fn =		cfq_init_queue,
 		.elevator_exit_fn =		cfq_exit_queue,
-		.trim =				cfq_trim,
+		.trim =				cfq_free_io_context,
 	},
 	.elevator_attrs =	cfq_attrs,
 	.elevator_name =	"cfq",
diff --git a/block/ll_rw_blk.c b/block/ll_rw_blk.c
index e25b4cd2dcd1..508548b834f1 100644
--- a/block/ll_rw_blk.c
+++ b/block/ll_rw_blk.c
@@ -3654,7 +3654,7 @@ struct io_context *current_io_context(gfp_t gfp_flags)
 	if (ret) {
 		atomic_set(&ret->refcount, 1);
 		ret->task = current;
-		ret->set_ioprio = NULL;
+		ret->ioprio_changed = 0;
 		ret->last_waited = jiffies; /* doesn't matter... */
 		ret->nr_batch_requests = 0; /* because this is 0 */
 		ret->aic = NULL;
diff --git a/fs/ioprio.c b/fs/ioprio.c
index 78b1deae3fa2..0fd1089d7bf6 100644
--- a/fs/ioprio.c
+++ b/fs/ioprio.c
@@ -47,8 +47,8 @@ static int set_task_ioprio(struct task_struct *task, int ioprio)
 	/* see wmb() in current_io_context() */
 	smp_read_barrier_depends();
 
-	if (ioc && ioc->set_ioprio)
-		ioc->set_ioprio(ioc, ioprio);
+	if (ioc)
+		ioc->ioprio_changed = 1;
 
 	task_unlock(task);
 	return 0;
diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h
index a1e288069e2e..79cb9fa8034a 100644
--- a/include/linux/blkdev.h
+++ b/include/linux/blkdev.h
@@ -90,7 +90,7 @@ struct io_context {
 	atomic_t refcount;
 	struct task_struct *task;
 
-	int (*set_ioprio)(struct io_context *, unsigned int);
+	unsigned int ioprio_changed;
 
 	/*
 	 * For request batching
-- 
cgit v1.2.3-71-gd317


From a3b05e8f58c95dfccbf2c824d0c68e5990571f24 Mon Sep 17 00:00:00 2001
From: Jens Axboe <axboe@suse.de>
Date: Fri, 28 Jul 2006 09:36:46 +0200
Subject: [PATCH] Kill various deprecated/unused block layer defines/functions

Signed-off-by: Jens Axboe <axboe@suse.de>
---
 drivers/block/cciss.c    |  1 -
 drivers/block/cpqarray.c |  1 -
 include/linux/blkdev.h   | 29 -----------------------------
 include/linux/fs.h       |  1 -
 4 files changed, 32 deletions(-)

(limited to 'include/linux/blkdev.h')

diff --git a/drivers/block/cciss.c b/drivers/block/cciss.c
index 2cd3391ff878..c211065ad829 100644
--- a/drivers/block/cciss.c
+++ b/drivers/block/cciss.c
@@ -1229,7 +1229,6 @@ static inline void complete_buffers(struct bio *bio, int status)
 		int nr_sectors = bio_sectors(bio);
 
 		bio->bi_next = NULL;
-		blk_finished_io(len);
 		bio_endio(bio, nr_sectors << 9, status ? 0 : -EIO);
 		bio = xbh;
 	}
diff --git a/drivers/block/cpqarray.c b/drivers/block/cpqarray.c
index 78082edc14b4..4abc193314ee 100644
--- a/drivers/block/cpqarray.c
+++ b/drivers/block/cpqarray.c
@@ -989,7 +989,6 @@ static inline void complete_buffers(struct bio *bio, int ok)
 		xbh = bio->bi_next;
 		bio->bi_next = NULL;
 		
-		blk_finished_io(nr_sectors);
 		bio_endio(bio, nr_sectors << 9, ok ? 0 : -EIO);
 
 		bio = xbh;
diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h
index 79cb9fa8034a..593386162f47 100644
--- a/include/linux/blkdev.h
+++ b/include/linux/blkdev.h
@@ -578,12 +578,6 @@ static inline void blk_clear_queue_full(struct request_queue *q, int rw)
 #define rq_mergeable(rq)	\
 	(!((rq)->cmd_flags & RQ_NOMERGE_FLAGS) && blk_fs_request((rq)))
 
-/*
- * noop, requests are automagically marked as active/inactive by I/O
- * scheduler -- see elv_next_request
- */
-#define blk_queue_headactive(q, head_active)
-
 /*
  * q->prep_rq_fn return values
  */
@@ -621,11 +615,6 @@ static inline void blk_queue_bounce(request_queue_t *q, struct bio **bio)
 	if ((rq->bio))			\
 		for (_bio = (rq)->bio; _bio; _bio = _bio->bi_next)
 
-struct sec_size {
-	unsigned block_size;
-	unsigned block_size_bits;
-};
-
 extern int blk_register_queue(struct gendisk *disk);
 extern void blk_unregister_queue(struct gendisk *disk);
 extern void register_disk(struct gendisk *dev);
@@ -690,16 +679,6 @@ extern void end_that_request_last(struct request *, int);
 extern void end_request(struct request *req, int uptodate);
 extern void blk_complete_request(struct request *);
 
-static inline int rq_all_done(struct request *rq, unsigned int nr_bytes)
-{
-	if (blk_fs_request(rq))
-		return (nr_bytes >= (rq->hard_nr_sectors << 9));
-	else if (blk_pc_request(rq))
-		return nr_bytes >= rq->data_len;
-
-	return 0;
-}
-
 /*
  * end_that_request_first/chunk() takes an uptodate argument. we account
  * any value <= as an io error. 0 means -EIO for compatability reasons,
@@ -807,14 +786,6 @@ static inline int queue_dma_alignment(request_queue_t *q)
 	return retval;
 }
 
-static inline int bdev_dma_aligment(struct block_device *bdev)
-{
-	return queue_dma_alignment(bdev_get_queue(bdev));
-}
-
-#define blk_finished_io(nsects)	do { } while (0)
-#define blk_started_io(nsects)	do { } while (0)
-
 /* assumes size > 256 */
 static inline unsigned int blksize_bits(unsigned int size)
 {
diff --git a/include/linux/fs.h b/include/linux/fs.h
index 6eafbe309483..9306f63bf77e 100644
--- a/include/linux/fs.h
+++ b/include/linux/fs.h
@@ -79,7 +79,6 @@ extern int dir_notify_enable;
 #define WRITE 1
 #define READA 2		/* read-ahead  - don't block if no resources */
 #define SWRITE 3	/* for ll_rw_block() - wait for buffer lock */
-#define SPECIAL 4	/* For non-blockdevice requests in request queue */
 #define READ_SYNC	(READ | (1 << BIO_RW_SYNC))
 #define WRITE_SYNC	(WRITE | (1 << BIO_RW_SYNC))
 #define WRITE_BARRIER	((1 << BIO_RW) | (1 << BIO_RW_BARRIER))
-- 
cgit v1.2.3-71-gd317


From b5deef901282628d88c784f4c9d2f0583ec3b355 Mon Sep 17 00:00:00 2001
From: Jens Axboe <axboe@suse.de>
Date: Wed, 19 Jul 2006 23:39:40 +0200
Subject: [PATCH] Make sure all block/io scheduler setups are node aware

Some were kmalloc_node(), some were still kmalloc(). Change them all to
kmalloc_node().

Signed-off-by: Jens Axboe <axboe@suse.de>
---
 block/as-iosched.c     |  8 ++++----
 block/cfq-iosched.c    | 13 +++++++------
 block/elevator.c       | 11 ++++++-----
 block/ll_rw_blk.c      | 13 +++++++------
 block/noop-iosched.c   |  2 +-
 include/linux/blkdev.h |  3 +--
 6 files changed, 26 insertions(+), 24 deletions(-)

(limited to 'include/linux/blkdev.h')

diff --git a/block/as-iosched.c b/block/as-iosched.c
index 8e1fef1eafc9..f6dc95489316 100644
--- a/block/as-iosched.c
+++ b/block/as-iosched.c
@@ -210,9 +210,9 @@ static struct as_io_context *alloc_as_io_context(void)
  * If the current task has no AS IO context then create one and initialise it.
  * Then take a ref on the task's io context and return it.
  */
-static struct io_context *as_get_io_context(void)
+static struct io_context *as_get_io_context(int node)
 {
-	struct io_context *ioc = get_io_context(GFP_ATOMIC);
+	struct io_context *ioc = get_io_context(GFP_ATOMIC, node);
 	if (ioc && !ioc->aic) {
 		ioc->aic = alloc_as_io_context();
 		if (!ioc->aic) {
@@ -1148,7 +1148,7 @@ static void as_add_request(request_queue_t *q, struct request *rq)
 
 	data_dir = rq_is_sync(rq);
 
-	rq->elevator_private = as_get_io_context();
+	rq->elevator_private = as_get_io_context(q->node);
 
 	if (RQ_IOC(rq)) {
 		as_update_iohist(ad, RQ_IOC(rq)->aic, rq);
@@ -1292,7 +1292,7 @@ static int as_may_queue(request_queue_t *q, int rw)
 	struct io_context *ioc;
 	if (ad->antic_status == ANTIC_WAIT_REQ ||
 			ad->antic_status == ANTIC_WAIT_NEXT) {
-		ioc = as_get_io_context();
+		ioc = as_get_io_context(q->node);
 		if (ad->io_context == ioc)
 			ret = ELV_MQUEUE_MUST;
 		put_io_context(ioc);
diff --git a/block/cfq-iosched.c b/block/cfq-iosched.c
index 85f1d87e86d4..0452108a932e 100644
--- a/block/cfq-iosched.c
+++ b/block/cfq-iosched.c
@@ -1148,8 +1148,9 @@ static void cfq_exit_io_context(struct io_context *ioc)
 static struct cfq_io_context *
 cfq_alloc_io_context(struct cfq_data *cfqd, gfp_t gfp_mask)
 {
-	struct cfq_io_context *cic = kmem_cache_alloc(cfq_ioc_pool, gfp_mask);
+	struct cfq_io_context *cic;
 
+	cic = kmem_cache_alloc_node(cfq_ioc_pool, gfp_mask, cfqd->queue->node);
 	if (cic) {
 		memset(cic, 0, sizeof(*cic));
 		cic->last_end_request = jiffies;
@@ -1277,11 +1278,11 @@ retry:
 			 * free memory.
 			 */
 			spin_unlock_irq(cfqd->queue->queue_lock);
-			new_cfqq = kmem_cache_alloc(cfq_pool, gfp_mask|__GFP_NOFAIL);
+			new_cfqq = kmem_cache_alloc_node(cfq_pool, gfp_mask|__GFP_NOFAIL, cfqd->queue->node);
 			spin_lock_irq(cfqd->queue->queue_lock);
 			goto retry;
 		} else {
-			cfqq = kmem_cache_alloc(cfq_pool, gfp_mask);
+			cfqq = kmem_cache_alloc_node(cfq_pool, gfp_mask, cfqd->queue->node);
 			if (!cfqq)
 				goto out;
 		}
@@ -1407,7 +1408,7 @@ cfq_get_io_context(struct cfq_data *cfqd, gfp_t gfp_mask)
 
 	might_sleep_if(gfp_mask & __GFP_WAIT);
 
-	ioc = get_io_context(gfp_mask);
+	ioc = get_io_context(gfp_mask, cfqd->queue->node);
 	if (!ioc)
 		return NULL;
 
@@ -1955,7 +1956,7 @@ static void *cfq_init_queue(request_queue_t *q, elevator_t *e)
 	struct cfq_data *cfqd;
 	int i;
 
-	cfqd = kmalloc(sizeof(*cfqd), GFP_KERNEL);
+	cfqd = kmalloc_node(sizeof(*cfqd), GFP_KERNEL, q->node);
 	if (!cfqd)
 		return NULL;
 
@@ -1970,7 +1971,7 @@ static void *cfq_init_queue(request_queue_t *q, elevator_t *e)
 	INIT_LIST_HEAD(&cfqd->empty_list);
 	INIT_LIST_HEAD(&cfqd->cic_list);
 
-	cfqd->cfq_hash = kmalloc(sizeof(struct hlist_head) * CFQ_QHASH_ENTRIES, GFP_KERNEL);
+	cfqd->cfq_hash = kmalloc_node(sizeof(struct hlist_head) * CFQ_QHASH_ENTRIES, GFP_KERNEL, q->node);
 	if (!cfqd->cfq_hash)
 		goto out_free;
 
diff --git a/block/elevator.c b/block/elevator.c
index 788d2d81994c..e643291793a4 100644
--- a/block/elevator.c
+++ b/block/elevator.c
@@ -161,12 +161,12 @@ __setup("elevator=", elevator_setup);
 
 static struct kobj_type elv_ktype;
 
-static elevator_t *elevator_alloc(struct elevator_type *e)
+static elevator_t *elevator_alloc(request_queue_t *q, struct elevator_type *e)
 {
 	elevator_t *eq;
 	int i;
 
-	eq = kmalloc(sizeof(elevator_t), GFP_KERNEL);
+	eq = kmalloc_node(sizeof(elevator_t), GFP_KERNEL, q->node);
 	if (unlikely(!eq))
 		goto err;
 
@@ -178,7 +178,8 @@ static elevator_t *elevator_alloc(struct elevator_type *e)
 	eq->kobj.ktype = &elv_ktype;
 	mutex_init(&eq->sysfs_lock);
 
-	eq->hash = kmalloc(sizeof(struct hlist_head) * ELV_HASH_ENTRIES, GFP_KERNEL);
+	eq->hash = kmalloc_node(sizeof(struct hlist_head) * ELV_HASH_ENTRIES,
+					GFP_KERNEL, q->node);
 	if (!eq->hash)
 		goto err;
 
@@ -224,7 +225,7 @@ int elevator_init(request_queue_t *q, char *name)
 		e = elevator_get("noop");
 	}
 
-	eq = elevator_alloc(e);
+	eq = elevator_alloc(q, e);
 	if (!eq)
 		return -ENOMEM;
 
@@ -987,7 +988,7 @@ static int elevator_switch(request_queue_t *q, struct elevator_type *new_e)
 	/*
 	 * Allocate new elevator
 	 */
-	e = elevator_alloc(new_e);
+	e = elevator_alloc(q, new_e);
 	if (!e)
 		return 0;
 
diff --git a/block/ll_rw_blk.c b/block/ll_rw_blk.c
index 4b7b4461e8d6..c6dfa889206c 100644
--- a/block/ll_rw_blk.c
+++ b/block/ll_rw_blk.c
@@ -39,6 +39,7 @@ static void blk_unplug_timeout(unsigned long data);
 static void drive_stat_acct(struct request *rq, int nr_sectors, int new_io);
 static void init_request_from_bio(struct request *req, struct bio *bio);
 static int __make_request(request_queue_t *q, struct bio *bio);
+static struct io_context *current_io_context(gfp_t gfp_flags, int node);
 
 /*
  * For the allocated request tables
@@ -2114,7 +2115,7 @@ static struct request *get_request(request_queue_t *q, int rw, struct bio *bio,
 
 	if (rl->count[rw]+1 >= queue_congestion_on_threshold(q)) {
 		if (rl->count[rw]+1 >= q->nr_requests) {
-			ioc = current_io_context(GFP_ATOMIC);
+			ioc = current_io_context(GFP_ATOMIC, q->node);
 			/*
 			 * The queue will fill after this allocation, so set
 			 * it as full, and mark this process as "batching".
@@ -2234,7 +2235,7 @@ static struct request *get_request_wait(request_queue_t *q, int rw,
 			 * up to a big batch of them for a small period time.
 			 * See ioc_batching, ioc_set_batching
 			 */
-			ioc = current_io_context(GFP_NOIO);
+			ioc = current_io_context(GFP_NOIO, q->node);
 			ioc_set_batching(q, ioc);
 
 			spin_lock_irq(q->queue_lock);
@@ -3641,7 +3642,7 @@ void exit_io_context(void)
  * but since the current task itself holds a reference, the context can be
  * used in general code, so long as it stays within `current` context.
  */
-struct io_context *current_io_context(gfp_t gfp_flags)
+static struct io_context *current_io_context(gfp_t gfp_flags, int node)
 {
 	struct task_struct *tsk = current;
 	struct io_context *ret;
@@ -3650,7 +3651,7 @@ struct io_context *current_io_context(gfp_t gfp_flags)
 	if (likely(ret))
 		return ret;
 
-	ret = kmem_cache_alloc(iocontext_cachep, gfp_flags);
+	ret = kmem_cache_alloc_node(iocontext_cachep, gfp_flags, node);
 	if (ret) {
 		atomic_set(&ret->refcount, 1);
 		ret->task = current;
@@ -3674,10 +3675,10 @@ EXPORT_SYMBOL(current_io_context);
  *
  * This is always called in the context of the task which submitted the I/O.
  */
-struct io_context *get_io_context(gfp_t gfp_flags)
+struct io_context *get_io_context(gfp_t gfp_flags, int node)
 {
 	struct io_context *ret;
-	ret = current_io_context(gfp_flags);
+	ret = current_io_context(gfp_flags, node);
 	if (likely(ret))
 		atomic_inc(&ret->refcount);
 	return ret;
diff --git a/block/noop-iosched.c b/block/noop-iosched.c
index 56a7c620574f..79af43179421 100644
--- a/block/noop-iosched.c
+++ b/block/noop-iosched.c
@@ -69,7 +69,7 @@ static void *noop_init_queue(request_queue_t *q, elevator_t *e)
 {
 	struct noop_data *nd;
 
-	nd = kmalloc(sizeof(*nd), GFP_KERNEL);
+	nd = kmalloc_node(sizeof(*nd), GFP_KERNEL, q->node);
 	if (!nd)
 		return NULL;
 	INIT_LIST_HEAD(&nd->queue);
diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h
index 593386162f47..6609371c303e 100644
--- a/include/linux/blkdev.h
+++ b/include/linux/blkdev.h
@@ -104,8 +104,7 @@ struct io_context {
 
 void put_io_context(struct io_context *ioc);
 void exit_io_context(void);
-struct io_context *current_io_context(gfp_t gfp_flags);
-struct io_context *get_io_context(gfp_t gfp_flags);
+struct io_context *get_io_context(gfp_t gfp_flags, int node);
 void copy_io_context(struct io_context **pdst, struct io_context **psrc);
 void swap_io_context(struct io_context **ioc1, struct io_context **ioc2);
 
-- 
cgit v1.2.3-71-gd317


From dc72ef4ae35c2016fb594bcc85ce871376682174 Mon Sep 17 00:00:00 2001
From: Jens Axboe <axboe@suse.de>
Date: Thu, 20 Jul 2006 14:54:05 +0200
Subject: [PATCH] Add blk_start_queueing() helper

CFQ implements this on its own now, but it's really block layer
knowledge. Tells a device queue to start dispatching requests to
the driver, taking care to unplug if needed. Also fixes the issue
where as/cfq will invoke a stopped queue, which we really don't
want.

Signed-off-by: Jens Axboe <axboe@suse.de>
---
 block/as-iosched.c     |  3 +--
 block/cfq-iosched.c    | 22 ++++------------------
 block/ll_rw_blk.c      | 25 ++++++++++++++++++++-----
 include/linux/blkdev.h |  1 +
 4 files changed, 26 insertions(+), 25 deletions(-)

(limited to 'include/linux/blkdev.h')

diff --git a/block/as-iosched.c b/block/as-iosched.c
index f6dc95489316..bc13dc0b29be 100644
--- a/block/as-iosched.c
+++ b/block/as-iosched.c
@@ -1280,8 +1280,7 @@ static void as_work_handler(void *data)
 	unsigned long flags;
 
 	spin_lock_irqsave(q->queue_lock, flags);
-	if (!as_queue_empty(q))
-		q->request_fn(q);
+	blk_start_queueing(q);
 	spin_unlock_irqrestore(q->queue_lock, flags);
 }
 
diff --git a/block/cfq-iosched.c b/block/cfq-iosched.c
index 6fb1613d44d7..9f684cc66bd1 100644
--- a/block/cfq-iosched.c
+++ b/block/cfq-iosched.c
@@ -1552,19 +1552,6 @@ static void cfq_preempt_queue(struct cfq_data *cfqd, struct cfq_queue *cfqq)
 	__cfq_set_active_queue(cfqd, cfqq);
 }
 
-/*
- * should really be a ll_rw_blk.c helper
- */
-static void cfq_start_queueing(struct cfq_data *cfqd, struct cfq_queue *cfqq)
-{
-	request_queue_t *q = cfqd->queue;
-
-	if (!blk_queue_plugged(q))
-		q->request_fn(q);
-	else
-		__generic_unplug_device(q);
-}
-
 /*
  * Called when a new fs request (rq) is added (to cfqq). Check if there's
  * something we should do about it
@@ -1593,7 +1580,7 @@ cfq_rq_enqueued(struct cfq_data *cfqd, struct cfq_queue *cfqq,
 		if (cic == cfqd->active_cic &&
 		    del_timer(&cfqd->idle_slice_timer)) {
 			cfq_slice_expired(cfqd, 0);
-			cfq_start_queueing(cfqd, cfqq);
+			blk_start_queueing(cfqd->queue);
 		}
 		return;
 	}
@@ -1614,7 +1601,7 @@ cfq_rq_enqueued(struct cfq_data *cfqd, struct cfq_queue *cfqq,
 		if (cfq_cfqq_wait_request(cfqq)) {
 			cfq_mark_cfqq_must_dispatch(cfqq);
 			del_timer(&cfqd->idle_slice_timer);
-			cfq_start_queueing(cfqd, cfqq);
+			blk_start_queueing(cfqd->queue);
 		}
 	} else if (cfq_should_preempt(cfqd, cfqq, rq)) {
 		/*
@@ -1624,7 +1611,7 @@ cfq_rq_enqueued(struct cfq_data *cfqd, struct cfq_queue *cfqq,
 		 */
 		cfq_preempt_queue(cfqd, cfqq);
 		cfq_mark_cfqq_must_dispatch(cfqq);
-		cfq_start_queueing(cfqd, cfqq);
+		blk_start_queueing(cfqd->queue);
 	}
 }
 
@@ -1832,8 +1819,7 @@ static void cfq_kick_queue(void *data)
 	unsigned long flags;
 
 	spin_lock_irqsave(q->queue_lock, flags);
-	blk_remove_plug(q);
-	q->request_fn(q);
+	blk_start_queueing(q);
 	spin_unlock_irqrestore(q->queue_lock, flags);
 }
 
diff --git a/block/ll_rw_blk.c b/block/ll_rw_blk.c
index c6dfa889206c..346be9ae31f6 100644
--- a/block/ll_rw_blk.c
+++ b/block/ll_rw_blk.c
@@ -2266,6 +2266,25 @@ struct request *blk_get_request(request_queue_t *q, int rw, gfp_t gfp_mask)
 }
 EXPORT_SYMBOL(blk_get_request);
 
+/**
+ * blk_start_queueing - initiate dispatch of requests to device
+ * @q:		request queue to kick into gear
+ *
+ * This is basically a helper to remove the need to know whether a queue
+ * is plugged or not if someone just wants to initiate dispatch of requests
+ * for this queue.
+ *
+ * The queue lock must be held with interrupts disabled.
+ */
+void blk_start_queueing(request_queue_t *q)
+{
+	if (!blk_queue_plugged(q))
+		q->request_fn(q);
+	else
+		__generic_unplug_device(q);
+}
+EXPORT_SYMBOL(blk_start_queueing);
+
 /**
  * blk_requeue_request - put a request back on queue
  * @q:		request queue where request should be inserted
@@ -2333,11 +2352,7 @@ void blk_insert_request(request_queue_t *q, struct request *rq,
 
 	drive_stat_acct(rq, rq->nr_sectors, 1);
 	__elv_add_request(q, rq, where, 0);
-
-	if (blk_queue_plugged(q))
-		__generic_unplug_device(q);
-	else
-		q->request_fn(q);
+	blk_start_queueing(q);
 	spin_unlock_irqrestore(q->queue_lock, flags);
 }
 
diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h
index 6609371c303e..5d8e288db163 100644
--- a/include/linux/blkdev.h
+++ b/include/linux/blkdev.h
@@ -635,6 +635,7 @@ extern void blk_stop_queue(request_queue_t *q);
 extern void blk_sync_queue(struct request_queue *q);
 extern void __blk_stop_queue(request_queue_t *q);
 extern void blk_run_queue(request_queue_t *);
+extern void blk_start_queueing(request_queue_t *);
 extern void blk_queue_activity_fn(request_queue_t *, activity_fn *, void *);
 extern int blk_rq_map_user(request_queue_t *, struct request *, void __user *, unsigned int);
 extern int blk_rq_unmap_user(struct bio *, unsigned int);
-- 
cgit v1.2.3-71-gd317


From 5404bc7a87b9949cf61e0174b21f80e73239ab25 Mon Sep 17 00:00:00 2001
From: Jens Axboe <axboe@suse.de>
Date: Thu, 10 Aug 2006 09:01:02 +0200
Subject: [PATCH] Allow file systems to differentiate between data and meta
 reads

We can use this information for making more intelligent priority
decisions, and it will also be useful for blktrace.

Signed-off-by: Jens Axboe <axboe@suse.de>
---
 block/ll_rw_blk.c      | 2 ++
 include/linux/bio.h    | 2 ++
 include/linux/blkdev.h | 3 +++
 include/linux/fs.h     | 1 +
 4 files changed, 8 insertions(+)

(limited to 'include/linux/blkdev.h')

diff --git a/block/ll_rw_blk.c b/block/ll_rw_blk.c
index e3980ec747c1..57992ae511c2 100644
--- a/block/ll_rw_blk.c
+++ b/block/ll_rw_blk.c
@@ -2884,6 +2884,8 @@ static void init_request_from_bio(struct request *req, struct bio *bio)
 
 	if (bio_sync(bio))
 		req->cmd_flags |= REQ_RW_SYNC;
+	if (bio_rw_meta(bio))
+		req->cmd_flags |= REQ_RW_META;
 
 	req->errors = 0;
 	req->hard_sector = req->sector = bio->bi_sector;
diff --git a/include/linux/bio.h b/include/linux/bio.h
index 76bdaeab6f62..711c321a7011 100644
--- a/include/linux/bio.h
+++ b/include/linux/bio.h
@@ -148,6 +148,7 @@ struct bio {
 #define BIO_RW_BARRIER	2
 #define BIO_RW_FAILFAST	3
 #define BIO_RW_SYNC	4
+#define BIO_RW_META	5
 
 /*
  * upper 16 bits of bi_rw define the io priority of this bio
@@ -178,6 +179,7 @@ struct bio {
 #define bio_sync(bio)		((bio)->bi_rw & (1 << BIO_RW_SYNC))
 #define bio_failfast(bio)	((bio)->bi_rw & (1 << BIO_RW_FAILFAST))
 #define bio_rw_ahead(bio)	((bio)->bi_rw & (1 << BIO_RW_AHEAD))
+#define bio_rw_meta(bio)	((bio)->bi_rw & (1 << BIO_RW_META))
 
 /*
  * will die
diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h
index 5d8e288db163..2c01a90998a7 100644
--- a/include/linux/blkdev.h
+++ b/include/linux/blkdev.h
@@ -180,6 +180,7 @@ enum rq_flag_bits {
 	__REQ_ORDERED_COLOR,	/* is before or after barrier */
 	__REQ_RW_SYNC,		/* request is sync (O_DIRECT) */
 	__REQ_ALLOCED,		/* request came from our alloc pool */
+	__REQ_RW_META,		/* metadata io request */
 	__REQ_NR_BITS,		/* stops here */
 };
 
@@ -200,6 +201,7 @@ enum rq_flag_bits {
 #define REQ_ORDERED_COLOR	(1 << __REQ_ORDERED_COLOR)
 #define REQ_RW_SYNC	(1 << __REQ_RW_SYNC)
 #define REQ_ALLOCED	(1 << __REQ_ALLOCED)
+#define REQ_RW_META	(1 << __REQ_RW_META)
 
 #define BLK_MAX_CDB	16
 
@@ -543,6 +545,7 @@ enum {
  * We regard a request as sync, if it's a READ or a SYNC write.
  */
 #define rq_is_sync(rq)		(rq_data_dir((rq)) == READ || (rq)->cmd_flags & REQ_RW_SYNC)
+#define rq_is_meta(rq)		((rq)->cmd_flags & REQ_RW_META)
 
 static inline int blk_queue_full(struct request_queue *q, int rw)
 {
diff --git a/include/linux/fs.h b/include/linux/fs.h
index 9306f63bf77e..d68c37af4dfb 100644
--- a/include/linux/fs.h
+++ b/include/linux/fs.h
@@ -80,6 +80,7 @@ extern int dir_notify_enable;
 #define READA 2		/* read-ahead  - don't block if no resources */
 #define SWRITE 3	/* for ll_rw_block() - wait for buffer lock */
 #define READ_SYNC	(READ | (1 << BIO_RW_SYNC))
+#define READ_META	(READ | (1 << BIO_RW_META))
 #define WRITE_SYNC	(WRITE | (1 << BIO_RW_SYNC))
 #define WRITE_BARRIER	((1 << BIO_RW) | (1 << BIO_RW_BARRIER))
 
-- 
cgit v1.2.3-71-gd317


From 9361401eb7619c033e2394e4f9f6d410d6719ac7 Mon Sep 17 00:00:00 2001
From: David Howells <dhowells@redhat.com>
Date: Sat, 30 Sep 2006 20:45:40 +0200
Subject: [PATCH] BLOCK: Make it possible to disable the block layer [try #6]

Make it possible to disable the block layer.  Not all embedded devices require
it, some can make do with just JFFS2, NFS, ramfs, etc - none of which require
the block layer to be present.

This patch does the following:

 (*) Introduces CONFIG_BLOCK to disable the block layer, buffering and blockdev
     support.

 (*) Adds dependencies on CONFIG_BLOCK to any configuration item that controls
     an item that uses the block layer.  This includes:

     (*) Block I/O tracing.

     (*) Disk partition code.

     (*) All filesystems that are block based, eg: Ext3, ReiserFS, ISOFS.

     (*) The SCSI layer.  As far as I can tell, even SCSI chardevs use the
     	 block layer to do scheduling.  Some drivers that use SCSI facilities -
     	 such as USB storage - end up disabled indirectly from this.

     (*) Various block-based device drivers, such as IDE and the old CDROM
     	 drivers.

     (*) MTD blockdev handling and FTL.

     (*) JFFS - which uses set_bdev_super(), something it could avoid doing by
     	 taking a leaf out of JFFS2's book.

 (*) Makes most of the contents of linux/blkdev.h, linux/buffer_head.h and
     linux/elevator.h contingent on CONFIG_BLOCK being set.  sector_div() is,
     however, still used in places, and so is still available.

 (*) Also made contingent are the contents of linux/mpage.h, linux/genhd.h and
     parts of linux/fs.h.

 (*) Makes a number of files in fs/ contingent on CONFIG_BLOCK.

 (*) Makes mm/bounce.c (bounce buffering) contingent on CONFIG_BLOCK.

 (*) set_page_dirty() doesn't call __set_page_dirty_buffers() if CONFIG_BLOCK
     is not enabled.

 (*) fs/no-block.c is created to hold out-of-line stubs and things that are
     required when CONFIG_BLOCK is not set:

     (*) Default blockdev file operations (to give error ENODEV on opening).

 (*) Makes some /proc changes:

     (*) /proc/devices does not list any blockdevs.

     (*) /proc/diskstats and /proc/partitions are contingent on CONFIG_BLOCK.

 (*) Makes some compat ioctl handling contingent on CONFIG_BLOCK.

 (*) If CONFIG_BLOCK is not defined, makes sys_quotactl() return -ENODEV if
     given command other than Q_SYNC or if a special device is specified.

 (*) In init/do_mounts.c, no reference is made to the blockdev routines if
     CONFIG_BLOCK is not defined.  This does not prohibit NFS roots or JFFS2.

 (*) The bdflush, ioprio_set and ioprio_get syscalls can now be absent (return
     error ENOSYS by way of cond_syscall if so).

 (*) The seclvl_bd_claim() and seclvl_bd_release() security calls do nothing if
     CONFIG_BLOCK is not set, since they can't then happen.

Signed-Off-By: David Howells <dhowells@redhat.com>
Signed-off-by: Jens Axboe <axboe@kernel.dk>
---
 block/Kconfig                | 20 ++++++++++++++++++
 block/Kconfig.iosched        |  3 +++
 block/Makefile               |  2 +-
 drivers/block/Kconfig        |  4 ++++
 drivers/cdrom/Kconfig        |  2 +-
 drivers/char/Kconfig         |  1 +
 drivers/char/random.c        |  4 ++++
 drivers/ide/Kconfig          |  4 ++++
 drivers/md/Kconfig           |  3 +++
 drivers/message/i2o/Kconfig  |  2 +-
 drivers/mmc/Kconfig          |  2 +-
 drivers/mmc/Makefile         |  3 ++-
 drivers/mtd/Kconfig          | 12 +++++------
 drivers/mtd/devices/Kconfig  |  2 +-
 drivers/s390/block/Kconfig   |  2 +-
 drivers/scsi/Kconfig         |  2 ++
 fs/Kconfig                   | 31 ++++++++++++++++++++-------
 fs/Makefile                  | 14 +++++++++----
 fs/compat_ioctl.c            | 18 ++++++++++++++++
 fs/internal.h                |  6 ++++++
 fs/no-block.c                | 22 +++++++++++++++++++
 fs/partitions/Makefile       |  2 +-
 fs/proc/proc_misc.c          | 11 +++++++++-
 fs/quota.c                   | 44 ++++++++++++++++++++++++++------------
 fs/super.c                   |  4 ++++
 fs/xfs/Kconfig               |  1 +
 include/linux/blkdev.h       | 50 +++++++++++++++++++++++++++++++-------------
 include/linux/buffer_head.h  | 16 ++++++++++++++
 include/linux/compat_ioctl.h |  2 ++
 include/linux/elevator.h     |  3 +++
 include/linux/fs.h           | 25 +++++++++++++++++++---
 include/linux/genhd.h        |  4 ++++
 include/linux/mpage.h        |  3 +++
 include/linux/raid/md.h      |  3 +++
 include/linux/raid/md_k.h    |  3 +++
 include/scsi/scsi_tcq.h      |  3 ++-
 init/Kconfig                 |  2 +-
 init/do_mounts.c             | 13 +++++++++++-
 kernel/sys_ni.c              |  5 +++++
 mm/Makefile                  |  2 +-
 mm/filemap.c                 |  4 ++++
 mm/migrate.c                 |  2 ++
 mm/page-writeback.c          |  8 ++++---
 mm/truncate.c                |  2 ++
 44 files changed, 308 insertions(+), 63 deletions(-)
 create mode 100644 fs/no-block.c

(limited to 'include/linux/blkdev.h')

diff --git a/block/Kconfig b/block/Kconfig
index b6f5f0a79655..9af6c614dfde 100644
--- a/block/Kconfig
+++ b/block/Kconfig
@@ -1,6 +1,24 @@
 #
 # Block layer core configuration
 #
+config BLOCK
+       bool "Enable the block layer"
+       default y
+       help
+	 This permits the block layer to be removed from the kernel if it's not
+	 needed (on some embedded devices for example).  If this option is
+	 disabled, then blockdev files will become unusable and some
+	 filesystems (such as ext3) will become unavailable.
+
+	 This option will also disable SCSI character devices and USB storage
+	 since they make use of various block layer definitions and
+	 facilities.
+
+	 Say Y here unless you know you really don't want to mount disks and
+	 suchlike.
+
+if BLOCK
+
 #XXX - it makes sense to enable this only for 32-bit subarch's, not for x86_64
 #for instance.
 config LBD
@@ -33,4 +51,6 @@ config LSF
 
 	  If unsure, say Y.
 
+endif
+
 source block/Kconfig.iosched
diff --git a/block/Kconfig.iosched b/block/Kconfig.iosched
index 48d090e266fc..903f0d3b6852 100644
--- a/block/Kconfig.iosched
+++ b/block/Kconfig.iosched
@@ -1,3 +1,4 @@
+if BLOCK
 
 menu "IO Schedulers"
 
@@ -67,3 +68,5 @@ config DEFAULT_IOSCHED
 	default "noop" if DEFAULT_NOOP
 
 endmenu
+
+endif
diff --git a/block/Makefile b/block/Makefile
index c05de0e0037f..4b84d0d5947b 100644
--- a/block/Makefile
+++ b/block/Makefile
@@ -2,7 +2,7 @@
 # Makefile for the kernel block layer
 #
 
-obj-y	:= elevator.o ll_rw_blk.o ioctl.o genhd.o scsi_ioctl.o
+obj-$(CONFIG_BLOCK) := elevator.o ll_rw_blk.o ioctl.o genhd.o scsi_ioctl.o
 
 obj-$(CONFIG_IOSCHED_NOOP)	+= noop-iosched.o
 obj-$(CONFIG_IOSCHED_AS)	+= as-iosched.o
diff --git a/drivers/block/Kconfig b/drivers/block/Kconfig
index b5382cedf0c0..422e31d5f8e5 100644
--- a/drivers/block/Kconfig
+++ b/drivers/block/Kconfig
@@ -2,6 +2,8 @@
 # Block device driver configuration
 #
 
+if BLOCK
+
 menu "Block devices"
 
 config BLK_DEV_FD
@@ -468,3 +470,5 @@ config ATA_OVER_ETH
 	devices like the Coraid EtherDrive (R) Storage Blade.
 
 endmenu
+
+endif
diff --git a/drivers/cdrom/Kconfig b/drivers/cdrom/Kconfig
index ff5652d40619..4b12e9031fb3 100644
--- a/drivers/cdrom/Kconfig
+++ b/drivers/cdrom/Kconfig
@@ -3,7 +3,7 @@
 #
 
 menu "Old CD-ROM drivers (not SCSI, not IDE)"
-	depends on ISA
+	depends on ISA && BLOCK
 
 config CD_NO_IDESCSI
 	bool "Support non-SCSI/IDE/ATAPI CDROM drives"
diff --git a/drivers/char/Kconfig b/drivers/char/Kconfig
index 4cc619edf424..bde1c665d9f4 100644
--- a/drivers/char/Kconfig
+++ b/drivers/char/Kconfig
@@ -1006,6 +1006,7 @@ config GPIO_VR41XX
 
 config RAW_DRIVER
 	tristate "RAW driver (/dev/raw/rawN) (OBSOLETE)"
+	depends on BLOCK
 	help
 	  The raw driver permits block devices to be bound to /dev/raw/rawN. 
 	  Once bound, I/O against /dev/raw/rawN uses efficient zero-copy I/O. 
diff --git a/drivers/char/random.c b/drivers/char/random.c
index 4c3a5ca9d8f7..b430a12eb819 100644
--- a/drivers/char/random.c
+++ b/drivers/char/random.c
@@ -655,6 +655,7 @@ void add_interrupt_randomness(int irq)
 	add_timer_randomness(irq_timer_state[irq], 0x100 + irq);
 }
 
+#ifdef CONFIG_BLOCK
 void add_disk_randomness(struct gendisk *disk)
 {
 	if (!disk || !disk->random)
@@ -667,6 +668,7 @@ void add_disk_randomness(struct gendisk *disk)
 }
 
 EXPORT_SYMBOL(add_disk_randomness);
+#endif
 
 #define EXTRACT_SIZE 10
 
@@ -918,6 +920,7 @@ void rand_initialize_irq(int irq)
 	}
 }
 
+#ifdef CONFIG_BLOCK
 void rand_initialize_disk(struct gendisk *disk)
 {
 	struct timer_rand_state *state;
@@ -932,6 +935,7 @@ void rand_initialize_disk(struct gendisk *disk)
 		disk->random = state;
 	}
 }
+#endif
 
 static ssize_t
 random_read(struct file * file, char __user * buf, size_t nbytes, loff_t *ppos)
diff --git a/drivers/ide/Kconfig b/drivers/ide/Kconfig
index b6fb167e20f6..69d627bd537a 100644
--- a/drivers/ide/Kconfig
+++ b/drivers/ide/Kconfig
@@ -4,6 +4,8 @@
 # Andre Hedrick <andre@linux-ide.org>
 #
 
+if BLOCK
+
 menu "ATA/ATAPI/MFM/RLL support"
 
 config IDE
@@ -1082,3 +1084,5 @@ config BLK_DEV_HD
 endif
 
 endmenu
+
+endif
diff --git a/drivers/md/Kconfig b/drivers/md/Kconfig
index bf869ed03eed..6dd31a291d84 100644
--- a/drivers/md/Kconfig
+++ b/drivers/md/Kconfig
@@ -2,6 +2,8 @@
 # Block device driver configuration
 #
 
+if BLOCK
+
 menu "Multi-device support (RAID and LVM)"
 
 config MD
@@ -251,3 +253,4 @@ config DM_MULTIPATH_EMC
 
 endmenu
 
+endif
diff --git a/drivers/message/i2o/Kconfig b/drivers/message/i2o/Kconfig
index fef677103880..6443392bffff 100644
--- a/drivers/message/i2o/Kconfig
+++ b/drivers/message/i2o/Kconfig
@@ -88,7 +88,7 @@ config I2O_BUS
 
 config I2O_BLOCK
 	tristate "I2O Block OSM"
-	depends on I2O
+	depends on I2O && BLOCK
 	---help---
 	  Include support for the I2O Block OSM. The Block OSM presents disk
 	  and other structured block devices to the operating system. If you
diff --git a/drivers/mmc/Kconfig b/drivers/mmc/Kconfig
index 45bcf098e762..f540bd88dc5a 100644
--- a/drivers/mmc/Kconfig
+++ b/drivers/mmc/Kconfig
@@ -21,7 +21,7 @@ config MMC_DEBUG
 
 config MMC_BLOCK
 	tristate "MMC block device driver"
-	depends on MMC
+	depends on MMC && BLOCK
 	default y
 	help
 	  Say Y here to enable the MMC block device driver support.
diff --git a/drivers/mmc/Makefile b/drivers/mmc/Makefile
index d2957e35cc6f..b1f6e03e7aa9 100644
--- a/drivers/mmc/Makefile
+++ b/drivers/mmc/Makefile
@@ -24,7 +24,8 @@ obj-$(CONFIG_MMC_AU1X)		+= au1xmmc.o
 obj-$(CONFIG_MMC_OMAP)		+= omap.o
 obj-$(CONFIG_MMC_AT91RM9200)	+= at91_mci.o
 
-mmc_core-y := mmc.o mmc_queue.o mmc_sysfs.o
+mmc_core-y := mmc.o mmc_sysfs.o
+mmc_core-$(CONFIG_BLOCK) += mmc_queue.o
 
 ifeq ($(CONFIG_MMC_DEBUG),y)
 EXTRA_CFLAGS += -DDEBUG
diff --git a/drivers/mtd/Kconfig b/drivers/mtd/Kconfig
index a03e862851db..a304b34c2632 100644
--- a/drivers/mtd/Kconfig
+++ b/drivers/mtd/Kconfig
@@ -166,7 +166,7 @@ config MTD_CHAR
 
 config MTD_BLOCK
 	tristate "Caching block device access to MTD devices"
-	depends on MTD
+	depends on MTD && BLOCK
 	---help---
 	  Although most flash chips have an erase size too large to be useful
 	  as block devices, it is possible to use MTD devices which are based
@@ -188,7 +188,7 @@ config MTD_BLOCK
 
 config MTD_BLOCK_RO
 	tristate "Readonly block device access to MTD devices"
-	depends on MTD_BLOCK!=y && MTD
+	depends on MTD_BLOCK!=y && MTD && BLOCK
 	help
 	  This allows you to mount read-only file systems (such as cramfs)
 	  from an MTD device, without the overhead (and danger) of the caching
@@ -199,7 +199,7 @@ config MTD_BLOCK_RO
 
 config FTL
 	tristate "FTL (Flash Translation Layer) support"
-	depends on MTD
+	depends on MTD && BLOCK
 	---help---
 	  This provides support for the original Flash Translation Layer which
 	  is part of the PCMCIA specification. It uses a kind of pseudo-
@@ -215,7 +215,7 @@ config FTL
 
 config NFTL
 	tristate "NFTL (NAND Flash Translation Layer) support"
-	depends on MTD
+	depends on MTD && BLOCK
 	---help---
 	  This provides support for the NAND Flash Translation Layer which is
 	  used on M-Systems' DiskOnChip devices. It uses a kind of pseudo-
@@ -238,7 +238,7 @@ config NFTL_RW
 
 config INFTL
 	tristate "INFTL (Inverse NAND Flash Translation Layer) support"
-	depends on MTD
+	depends on MTD && BLOCK
 	---help---
 	  This provides support for the Inverse NAND Flash Translation
 	  Layer which is used on M-Systems' newer DiskOnChip devices. It
@@ -255,7 +255,7 @@ config INFTL
 
 config RFD_FTL
         tristate "Resident Flash Disk (Flash Translation Layer) support"
-	depends on MTD
+	depends on MTD && BLOCK
 	---help---
 	  This provides support for the flash translation layer known
 	  as the Resident Flash Disk (RFD), as used by the Embedded BIOS
diff --git a/drivers/mtd/devices/Kconfig b/drivers/mtd/devices/Kconfig
index 16c02b5ccf7e..440f6851da69 100644
--- a/drivers/mtd/devices/Kconfig
+++ b/drivers/mtd/devices/Kconfig
@@ -136,7 +136,7 @@ config MTDRAM_ABS_POS
 
 config MTD_BLOCK2MTD
 	tristate "MTD using block device"
-	depends on MTD
+	depends on MTD && BLOCK
 	help
 	  This driver allows a block device to appear as an MTD. It would
 	  generally be used in the following cases:
diff --git a/drivers/s390/block/Kconfig b/drivers/s390/block/Kconfig
index 929d6fff6152..b250c5354503 100644
--- a/drivers/s390/block/Kconfig
+++ b/drivers/s390/block/Kconfig
@@ -1,4 +1,4 @@
-if S390
+if S390 && BLOCK
 
 comment "S/390 block device drivers"
 	depends on S390
diff --git a/drivers/scsi/Kconfig b/drivers/scsi/Kconfig
index c4dfcc91ddda..dab082002e6f 100644
--- a/drivers/scsi/Kconfig
+++ b/drivers/scsi/Kconfig
@@ -3,11 +3,13 @@ menu "SCSI device support"
 config RAID_ATTRS
 	tristate "RAID Transport Class"
 	default n
+	depends on BLOCK
 	---help---
 	  Provides RAID
 
 config SCSI
 	tristate "SCSI device support"
+	depends on BLOCK
 	---help---
 	  If you want to use a SCSI hard disk, SCSI tape drive, SCSI CD-ROM or
 	  any other SCSI device under Linux, say Y and make sure that you know
diff --git a/fs/Kconfig b/fs/Kconfig
index 4fd9efac29ab..1453d2d164f7 100644
--- a/fs/Kconfig
+++ b/fs/Kconfig
@@ -4,6 +4,8 @@
 
 menu "File systems"
 
+if BLOCK
+
 config EXT2_FS
 	tristate "Second extended fs support"
 	help
@@ -399,6 +401,8 @@ config ROMFS_FS
 	  If you don't know whether you need it, then you don't need it:
 	  answer N.
 
+endif
+
 config INOTIFY
 	bool "Inotify file change notification support"
 	default y
@@ -530,6 +534,7 @@ config FUSE_FS
 	  If you want to develop a userspace FS, or if you want to use
 	  a filesystem based on FUSE, answer Y or M.
 
+if BLOCK
 menu "CD-ROM/DVD Filesystems"
 
 config ISO9660_FS
@@ -597,7 +602,9 @@ config UDF_NLS
 	depends on (UDF_FS=m && NLS) || (UDF_FS=y && NLS=y)
 
 endmenu
+endif
 
+if BLOCK
 menu "DOS/FAT/NT Filesystems"
 
 config FAT_FS
@@ -782,6 +789,7 @@ config NTFS_RW
 	  It is perfectly safe to say N here.
 
 endmenu
+endif
 
 menu "Pseudo filesystems"
 
@@ -939,7 +947,7 @@ menu "Miscellaneous filesystems"
 
 config ADFS_FS
 	tristate "ADFS file system support (EXPERIMENTAL)"
-	depends on EXPERIMENTAL
+	depends on BLOCK && EXPERIMENTAL
 	help
 	  The Acorn Disc Filing System is the standard file system of the
 	  RiscOS operating system which runs on Acorn's ARM-based Risc PC
@@ -967,7 +975,7 @@ config ADFS_FS_RW
 
 config AFFS_FS
 	tristate "Amiga FFS file system support (EXPERIMENTAL)"
-	depends on EXPERIMENTAL
+	depends on BLOCK && EXPERIMENTAL
 	help
 	  The Fast File System (FFS) is the common file system used on hard
 	  disks by Amiga(tm) systems since AmigaOS Version 1.3 (34.20).  Say Y
@@ -989,7 +997,7 @@ config AFFS_FS
 
 config HFS_FS
 	tristate "Apple Macintosh file system support (EXPERIMENTAL)"
-	depends on EXPERIMENTAL
+	depends on BLOCK && EXPERIMENTAL
 	select NLS
 	help
 	  If you say Y here, you will be able to mount Macintosh-formatted
@@ -1002,6 +1010,7 @@ config HFS_FS
 
 config HFSPLUS_FS
 	tristate "Apple Extended HFS file system support"
+	depends on BLOCK
 	select NLS
 	select NLS_UTF8
 	help
@@ -1015,7 +1024,7 @@ config HFSPLUS_FS
 
 config BEFS_FS
 	tristate "BeOS file system (BeFS) support (read only) (EXPERIMENTAL)"
-	depends on EXPERIMENTAL
+	depends on BLOCK && EXPERIMENTAL
 	select NLS
 	help
 	  The BeOS File System (BeFS) is the native file system of Be, Inc's
@@ -1042,7 +1051,7 @@ config BEFS_DEBUG
 
 config BFS_FS
 	tristate "BFS file system support (EXPERIMENTAL)"
-	depends on EXPERIMENTAL
+	depends on BLOCK && EXPERIMENTAL
 	help
 	  Boot File System (BFS) is a file system used under SCO UnixWare to
 	  allow the bootloader access to the kernel image and other important
@@ -1064,7 +1073,7 @@ config BFS_FS
 
 config EFS_FS
 	tristate "EFS file system support (read only) (EXPERIMENTAL)"
-	depends on EXPERIMENTAL
+	depends on BLOCK && EXPERIMENTAL
 	help
 	  EFS is an older file system used for non-ISO9660 CD-ROMs and hard
 	  disk partitions by SGI's IRIX operating system (IRIX 6.0 and newer
@@ -1079,7 +1088,7 @@ config EFS_FS
 
 config JFFS_FS
 	tristate "Journalling Flash File System (JFFS) support"
-	depends on MTD
+	depends on MTD && BLOCK
 	help
 	  JFFS is the Journaling Flash File System developed by Axis
 	  Communications in Sweden, aimed at providing a crash/powerdown-safe
@@ -1264,6 +1273,7 @@ endchoice
 
 config CRAMFS
 	tristate "Compressed ROM file system support (cramfs)"
+	depends on BLOCK
 	select ZLIB_INFLATE
 	help
 	  Saying Y here includes support for CramFs (Compressed ROM File
@@ -1283,6 +1293,7 @@ config CRAMFS
 
 config VXFS_FS
 	tristate "FreeVxFS file system support (VERITAS VxFS(TM) compatible)"
+	depends on BLOCK
 	help
 	  FreeVxFS is a file system driver that support the VERITAS VxFS(TM)
 	  file system format.  VERITAS VxFS(TM) is the standard file system
@@ -1300,6 +1311,7 @@ config VXFS_FS
 
 config HPFS_FS
 	tristate "OS/2 HPFS file system support"
+	depends on BLOCK
 	help
 	  OS/2 is IBM's operating system for PC's, the same as Warp, and HPFS
 	  is the file system used for organizing files on OS/2 hard disk
@@ -1316,6 +1328,7 @@ config HPFS_FS
 
 config QNX4FS_FS
 	tristate "QNX4 file system support (read only)"
+	depends on BLOCK
 	help
 	  This is the file system used by the real-time operating systems
 	  QNX 4 and QNX 6 (the latter is also called QNX RTP).
@@ -1343,6 +1356,7 @@ config QNX4FS_RW
 
 config SYSV_FS
 	tristate "System V/Xenix/V7/Coherent file system support"
+	depends on BLOCK
 	help
 	  SCO, Xenix and Coherent are commercial Unix systems for Intel
 	  machines, and Version 7 was used on the DEC PDP-11. Saying Y
@@ -1381,6 +1395,7 @@ config SYSV_FS
 
 config UFS_FS
 	tristate "UFS file system support (read only)"
+	depends on BLOCK
 	help
 	  BSD and derivate versions of Unix (such as SunOS, FreeBSD, NetBSD,
 	  OpenBSD and NeXTstep) use a file system called UFS. Some System V
@@ -1959,11 +1974,13 @@ config GENERIC_ACL
 
 endmenu
 
+if BLOCK
 menu "Partition Types"
 
 source "fs/partitions/Kconfig"
 
 endmenu
+endif
 
 source "fs/nls/Kconfig"
 
diff --git a/fs/Makefile b/fs/Makefile
index 46b8cfe497b2..a503e6ce0f32 100644
--- a/fs/Makefile
+++ b/fs/Makefile
@@ -5,12 +5,18 @@
 # Rewritten to use lists instead of if-statements.
 # 
 
-obj-y :=	open.o read_write.o file_table.o buffer.o  bio.o super.o \
-		block_dev.o char_dev.o stat.o exec.o pipe.o namei.o fcntl.o \
+obj-y :=	open.o read_write.o file_table.o super.o \
+		char_dev.o stat.o exec.o pipe.o namei.o fcntl.o \
 		ioctl.o readdir.o select.o fifo.o locks.o dcache.o inode.o \
 		attr.o bad_inode.o file.o filesystems.o namespace.o aio.o \
-		seq_file.o xattr.o libfs.o fs-writeback.o mpage.o direct-io.o \
-		ioprio.o pnode.o drop_caches.o splice.o sync.o
+		seq_file.o xattr.o libfs.o fs-writeback.o \
+		pnode.o drop_caches.o splice.o sync.o
+
+ifeq ($(CONFIG_BLOCK),y)
+obj-y +=	buffer.o bio.o block_dev.o direct-io.o mpage.o ioprio.o
+else
+obj-y +=	no-block.o
+endif
 
 obj-$(CONFIG_INOTIFY)		+= inotify.o
 obj-$(CONFIG_INOTIFY_USER)	+= inotify_user.o
diff --git a/fs/compat_ioctl.c b/fs/compat_ioctl.c
index e1a56437040a..64b34533edea 100644
--- a/fs/compat_ioctl.c
+++ b/fs/compat_ioctl.c
@@ -645,6 +645,7 @@ out:
 }
 #endif
 
+#ifdef CONFIG_BLOCK
 struct hd_geometry32 {
 	unsigned char heads;
 	unsigned char sectors;
@@ -869,6 +870,7 @@ static int sg_grt_trans(unsigned int fd, unsigned int cmd, unsigned long arg)
 	}
 	return err;
 }
+#endif /* CONFIG_BLOCK */
 
 struct sock_fprog32 {
 	unsigned short	len;
@@ -992,6 +994,7 @@ static int ppp_ioctl_trans(unsigned int fd, unsigned int cmd, unsigned long arg)
 }
 
 
+#ifdef CONFIG_BLOCK
 struct mtget32 {
 	compat_long_t	mt_type;
 	compat_long_t	mt_resid;
@@ -1164,6 +1167,7 @@ static int cdrom_ioctl_trans(unsigned int fd, unsigned int cmd, unsigned long ar
 
 	return err;
 }
+#endif /* CONFIG_BLOCK */
 
 #ifdef CONFIG_VT
 
@@ -1491,6 +1495,7 @@ ret_einval(unsigned int fd, unsigned int cmd, unsigned long arg)
 	return -EINVAL;
 }
 
+#ifdef CONFIG_BLOCK
 static int broken_blkgetsize(unsigned int fd, unsigned int cmd, unsigned long arg)
 {
 	/* The mkswap binary hard codes it to Intel value :-((( */
@@ -1525,12 +1530,14 @@ static int blkpg_ioctl_trans(unsigned int fd, unsigned int cmd, unsigned long ar
 
 	return sys_ioctl(fd, cmd, (unsigned long)a);
 }
+#endif
 
 static int ioc_settimeout(unsigned int fd, unsigned int cmd, unsigned long arg)
 {
 	return rw_long(fd, AUTOFS_IOC_SETTIMEOUT, arg);
 }
 
+#ifdef CONFIG_BLOCK
 /* Fix sizeof(sizeof()) breakage */
 #define BLKBSZGET_32   _IOR(0x12,112,int)
 #define BLKBSZSET_32   _IOW(0x12,113,int)
@@ -1551,6 +1558,7 @@ static int do_blkgetsize64(unsigned int fd, unsigned int cmd,
 {
        return sys_ioctl(fd, BLKGETSIZE64, (unsigned long)compat_ptr(arg));
 }
+#endif
 
 /* Bluetooth ioctls */
 #define HCIUARTSETPROTO	_IOW('U', 200, int)
@@ -1571,6 +1579,7 @@ static int do_blkgetsize64(unsigned int fd, unsigned int cmd,
 #define HIDPGETCONNLIST	_IOR('H', 210, int)
 #define HIDPGETCONNINFO	_IOR('H', 211, int)
 
+#ifdef CONFIG_BLOCK
 struct floppy_struct32 {
 	compat_uint_t	size;
 	compat_uint_t	sect;
@@ -1895,6 +1904,7 @@ out:
 	kfree(karg);
 	return err;
 }
+#endif
 
 struct mtd_oob_buf32 {
 	u_int32_t start;
@@ -1936,6 +1946,7 @@ static int mtd_rw_oob(unsigned int fd, unsigned int cmd, unsigned long arg)
 	return err;
 }	
 
+#ifdef CONFIG_BLOCK
 struct raw32_config_request
 {
         compat_int_t    raw_minor;
@@ -2000,6 +2011,7 @@ static int raw_ioctl(unsigned fd, unsigned cmd, unsigned long arg)
         }
         return ret;
 }
+#endif /* CONFIG_BLOCK */
 
 struct serial_struct32 {
         compat_int_t    type;
@@ -2606,6 +2618,7 @@ HANDLE_IOCTL(SIOCBRDELIF, dev_ifsioc)
 HANDLE_IOCTL(SIOCRTMSG, ret_einval)
 HANDLE_IOCTL(SIOCGSTAMP, do_siocgstamp)
 #endif
+#ifdef CONFIG_BLOCK
 HANDLE_IOCTL(HDIO_GETGEO, hdio_getgeo)
 HANDLE_IOCTL(BLKRAGET, w_long)
 HANDLE_IOCTL(BLKGETSIZE, w_long)
@@ -2631,14 +2644,17 @@ HANDLE_IOCTL(FDGETFDCSTAT32, fd_ioctl_trans)
 HANDLE_IOCTL(FDWERRORGET32, fd_ioctl_trans)
 HANDLE_IOCTL(SG_IO,sg_ioctl_trans)
 HANDLE_IOCTL(SG_GET_REQUEST_TABLE, sg_grt_trans)
+#endif
 HANDLE_IOCTL(PPPIOCGIDLE32, ppp_ioctl_trans)
 HANDLE_IOCTL(PPPIOCSCOMPRESS32, ppp_ioctl_trans)
 HANDLE_IOCTL(PPPIOCSPASS32, ppp_sock_fprog_ioctl_trans)
 HANDLE_IOCTL(PPPIOCSACTIVE32, ppp_sock_fprog_ioctl_trans)
+#ifdef CONFIG_BLOCK
 HANDLE_IOCTL(MTIOCGET32, mt_ioctl_trans)
 HANDLE_IOCTL(MTIOCPOS32, mt_ioctl_trans)
 HANDLE_IOCTL(CDROMREADAUDIO, cdrom_ioctl_trans)
 HANDLE_IOCTL(CDROM_SEND_PACKET, cdrom_ioctl_trans)
+#endif
 #define AUTOFS_IOC_SETTIMEOUT32 _IOWR(0x93,0x64,unsigned int)
 HANDLE_IOCTL(AUTOFS_IOC_SETTIMEOUT32, ioc_settimeout)
 #ifdef CONFIG_VT
@@ -2677,12 +2693,14 @@ HANDLE_IOCTL(SONET_SETFRAMING, do_atm_ioctl)
 HANDLE_IOCTL(SONET_GETFRAMING, do_atm_ioctl)
 HANDLE_IOCTL(SONET_GETFRSENSE, do_atm_ioctl)
 /* block stuff */
+#ifdef CONFIG_BLOCK
 HANDLE_IOCTL(BLKBSZGET_32, do_blkbszget)
 HANDLE_IOCTL(BLKBSZSET_32, do_blkbszset)
 HANDLE_IOCTL(BLKGETSIZE64_32, do_blkgetsize64)
 /* Raw devices */
 HANDLE_IOCTL(RAW_SETBIND, raw_ioctl)
 HANDLE_IOCTL(RAW_GETBIND, raw_ioctl)
+#endif
 /* Serial */
 HANDLE_IOCTL(TIOCGSERIAL, serial_struct_ioctl)
 HANDLE_IOCTL(TIOCSSERIAL, serial_struct_ioctl)
diff --git a/fs/internal.h b/fs/internal.h
index f662b703bb97..f07147d63255 100644
--- a/fs/internal.h
+++ b/fs/internal.h
@@ -14,10 +14,16 @@
 /*
  * block_dev.c
  */
+#ifdef CONFIG_BLOCK
 extern struct super_block *blockdev_superblock;
 extern void __init bdev_cache_init(void);
 
 #define sb_is_blkdev_sb(sb) ((sb) == blockdev_superblock)
+#else
+static inline void bdev_cache_init(void) {}
+
+#define sb_is_blkdev_sb(sb) 0
+#endif
 
 /*
  * char_dev.c
diff --git a/fs/no-block.c b/fs/no-block.c
new file mode 100644
index 000000000000..d269a93d3467
--- /dev/null
+++ b/fs/no-block.c
@@ -0,0 +1,22 @@
+/* no-block.c: implementation of routines required for non-BLOCK configuration
+ *
+ * Copyright (C) 2006 Red Hat, Inc. All Rights Reserved.
+ * Written by David Howells (dhowells@redhat.com)
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version
+ * 2 of the License, or (at your option) any later version.
+ */
+
+#include <linux/kernel.h>
+#include <linux/fs.h>
+
+static int no_blkdev_open(struct inode * inode, struct file * filp)
+{
+	return -ENODEV;
+}
+
+const struct file_operations def_blk_fops = {
+	.open		= no_blkdev_open,
+};
diff --git a/fs/partitions/Makefile b/fs/partitions/Makefile
index d713ce6b3e12..67e665fdb7fc 100644
--- a/fs/partitions/Makefile
+++ b/fs/partitions/Makefile
@@ -2,7 +2,7 @@
 # Makefile for the linux kernel.
 #
 
-obj-y := check.o
+obj-$(CONFIG_BLOCK) := check.o
 
 obj-$(CONFIG_ACORN_PARTITION) += acorn.o
 obj-$(CONFIG_AMIGA_PARTITION) += amiga.o
diff --git a/fs/proc/proc_misc.c b/fs/proc/proc_misc.c
index 5bbd60896050..66bc425f2f3d 100644
--- a/fs/proc/proc_misc.c
+++ b/fs/proc/proc_misc.c
@@ -277,12 +277,15 @@ static int devinfo_show(struct seq_file *f, void *v)
 		if (i == 0)
 			seq_printf(f, "Character devices:\n");
 		chrdev_show(f, i);
-	} else {
+	}
+#ifdef CONFIG_BLOCK
+	else {
 		i -= CHRDEV_MAJOR_HASH_SIZE;
 		if (i == 0)
 			seq_printf(f, "\nBlock devices:\n");
 		blkdev_show(f, i);
 	}
+#endif
 	return 0;
 }
 
@@ -355,6 +358,7 @@ static int stram_read_proc(char *page, char **start, off_t off,
 }
 #endif
 
+#ifdef CONFIG_BLOCK
 extern struct seq_operations partitions_op;
 static int partitions_open(struct inode *inode, struct file *file)
 {
@@ -378,6 +382,7 @@ static struct file_operations proc_diskstats_operations = {
 	.llseek		= seq_lseek,
 	.release	= seq_release,
 };
+#endif
 
 #ifdef CONFIG_MODULES
 extern struct seq_operations modules_op;
@@ -695,7 +700,9 @@ void __init proc_misc_init(void)
 		entry->proc_fops = &proc_kmsg_operations;
 	create_seq_entry("devices", 0, &proc_devinfo_operations);
 	create_seq_entry("cpuinfo", 0, &proc_cpuinfo_operations);
+#ifdef CONFIG_BLOCK
 	create_seq_entry("partitions", 0, &proc_partitions_operations);
+#endif
 	create_seq_entry("stat", 0, &proc_stat_operations);
 	create_seq_entry("interrupts", 0, &proc_interrupts_operations);
 #ifdef CONFIG_SLAB
@@ -707,7 +714,9 @@ void __init proc_misc_init(void)
 	create_seq_entry("buddyinfo",S_IRUGO, &fragmentation_file_operations);
 	create_seq_entry("vmstat",S_IRUGO, &proc_vmstat_file_operations);
 	create_seq_entry("zoneinfo",S_IRUGO, &proc_zoneinfo_file_operations);
+#ifdef CONFIG_BLOCK
 	create_seq_entry("diskstats", 0, &proc_diskstats_operations);
+#endif
 #ifdef CONFIG_MODULES
 	create_seq_entry("modules", 0, &proc_modules_operations);
 #endif
diff --git a/fs/quota.c b/fs/quota.c
index d6a2be826e29..b9dae76a0b6e 100644
--- a/fs/quota.c
+++ b/fs/quota.c
@@ -337,6 +337,34 @@ static int do_quotactl(struct super_block *sb, int type, int cmd, qid_t id, void
 	return 0;
 }
 
+/*
+ * look up a superblock on which quota ops will be performed
+ * - use the name of a block device to find the superblock thereon
+ */
+static inline struct super_block *quotactl_block(const char __user *special)
+{
+#ifdef CONFIG_BLOCK
+	struct block_device *bdev;
+	struct super_block *sb;
+	char *tmp = getname(special);
+
+	if (IS_ERR(tmp))
+		return ERR_PTR(PTR_ERR(tmp));
+	bdev = lookup_bdev(tmp);
+	putname(tmp);
+	if (IS_ERR(bdev))
+		return ERR_PTR(PTR_ERR(bdev));
+	sb = get_super(bdev);
+	bdput(bdev);
+	if (!sb)
+		return ERR_PTR(-ENODEV);
+
+	return sb;
+#else
+	return ERR_PTR(-ENODEV);
+#endif
+}
+
 /*
  * This is the system call interface. This communicates with
  * the user-level programs. Currently this only supports diskquota
@@ -347,25 +375,15 @@ asmlinkage long sys_quotactl(unsigned int cmd, const char __user *special, qid_t
 {
 	uint cmds, type;
 	struct super_block *sb = NULL;
-	struct block_device *bdev;
-	char *tmp;
 	int ret;
 
 	cmds = cmd >> SUBCMDSHIFT;
 	type = cmd & SUBCMDMASK;
 
 	if (cmds != Q_SYNC || special) {
-		tmp = getname(special);
-		if (IS_ERR(tmp))
-			return PTR_ERR(tmp);
-		bdev = lookup_bdev(tmp);
-		putname(tmp);
-		if (IS_ERR(bdev))
-			return PTR_ERR(bdev);
-		sb = get_super(bdev);
-		bdput(bdev);
-		if (!sb)
-			return -ENODEV;
+		sb = quotactl_block(special);
+		if (IS_ERR(sb))
+			return PTR_ERR(sb);
 	}
 
 	ret = check_quotactl_valid(sb, type, cmds, id);
diff --git a/fs/super.c b/fs/super.c
index 15671cd048b1..aec99ddbe53f 100644
--- a/fs/super.c
+++ b/fs/super.c
@@ -571,8 +571,10 @@ int do_remount_sb(struct super_block *sb, int flags, void *data, int force)
 {
 	int retval;
 	
+#ifdef CONFIG_BLOCK
 	if (!(flags & MS_RDONLY) && bdev_read_only(sb->s_bdev))
 		return -EACCES;
+#endif
 	if (flags & MS_RDONLY)
 		acct_auto_close(sb);
 	shrink_dcache_sb(sb);
@@ -692,6 +694,7 @@ void kill_litter_super(struct super_block *sb)
 
 EXPORT_SYMBOL(kill_litter_super);
 
+#ifdef CONFIG_BLOCK
 static int set_bdev_super(struct super_block *s, void *data)
 {
 	s->s_bdev = data;
@@ -787,6 +790,7 @@ void kill_block_super(struct super_block *sb)
 }
 
 EXPORT_SYMBOL(kill_block_super);
+#endif
 
 int get_sb_nodev(struct file_system_type *fs_type,
 	int flags, void *data,
diff --git a/fs/xfs/Kconfig b/fs/xfs/Kconfig
index 26b364c9d62c..35115bca036e 100644
--- a/fs/xfs/Kconfig
+++ b/fs/xfs/Kconfig
@@ -1,5 +1,6 @@
 config XFS_FS
 	tristate "XFS filesystem support"
+	depends on BLOCK
 	help
 	  XFS is a high performance journaling filesystem which originated
 	  on the SGI IRIX platform.  It is completely multi-threaded, can
diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h
index 2c01a90998a7..3e36107d342a 100644
--- a/include/linux/blkdev.h
+++ b/include/linux/blkdev.h
@@ -16,6 +16,22 @@
 
 #include <asm/scatterlist.h>
 
+#ifdef CONFIG_LBD
+# include <asm/div64.h>
+# define sector_div(a, b) do_div(a, b)
+#else
+# define sector_div(n, b)( \
+{ \
+	int _res; \
+	_res = (n) % (b); \
+	(n) /= (b); \
+	_res; \
+} \
+)
+#endif
+
+#ifdef CONFIG_BLOCK
+
 struct scsi_ioctl_command;
 
 struct request_queue;
@@ -818,24 +834,30 @@ struct work_struct;
 int kblockd_schedule_work(struct work_struct *work);
 void kblockd_flush(void);
 
-#ifdef CONFIG_LBD
-# include <asm/div64.h>
-# define sector_div(a, b) do_div(a, b)
-#else
-# define sector_div(n, b)( \
-{ \
-	int _res; \
-	_res = (n) % (b); \
-	(n) /= (b); \
-	_res; \
-} \
-)
-#endif 
-
 #define MODULE_ALIAS_BLOCKDEV(major,minor) \
 	MODULE_ALIAS("block-major-" __stringify(major) "-" __stringify(minor))
 #define MODULE_ALIAS_BLOCKDEV_MAJOR(major) \
 	MODULE_ALIAS("block-major-" __stringify(major) "-*")
 
 
+#else /* CONFIG_BLOCK */
+/*
+ * stubs for when the block layer is configured out
+ */
+#define buffer_heads_over_limit 0
+
+static inline long blk_congestion_wait(int rw, long timeout)
+{
+	return timeout;
+}
+
+static inline long nr_blockdev_pages(void)
+{
+	return 0;
+}
+
+static inline void exit_io_context(void) {}
+
+#endif /* CONFIG_BLOCK */
+
 #endif
diff --git a/include/linux/buffer_head.h b/include/linux/buffer_head.h
index 64b508e35d2a..131ffd37e716 100644
--- a/include/linux/buffer_head.h
+++ b/include/linux/buffer_head.h
@@ -14,6 +14,8 @@
 #include <linux/wait.h>
 #include <asm/atomic.h>
 
+#ifdef CONFIG_BLOCK
+
 enum bh_state_bits {
 	BH_Uptodate,	/* Contains valid data */
 	BH_Dirty,	/* Is dirty */
@@ -301,4 +303,18 @@ static inline void lock_buffer(struct buffer_head *bh)
 }
 
 extern int __set_page_dirty_buffers(struct page *page);
+
+#else /* CONFIG_BLOCK */
+
+static inline void buffer_init(void) {}
+static inline int try_to_free_buffers(struct page *page) { return 1; }
+static inline int sync_blockdev(struct block_device *bdev) { return 0; }
+static inline int inode_has_buffers(struct inode *inode) { return 0; }
+static inline void invalidate_inode_buffers(struct inode *inode) {}
+static inline int remove_inode_buffers(struct inode *inode) { return 1; }
+static inline int sync_mapping_buffers(struct address_space *mapping) { return 0; }
+static inline void invalidate_bdev(struct block_device *bdev, int destroy_dirty_buffers) {}
+
+
+#endif /* CONFIG_BLOCK */
 #endif /* _LINUX_BUFFER_HEAD_H */
diff --git a/include/linux/compat_ioctl.h b/include/linux/compat_ioctl.h
index 98d40e08ba6e..d61ef5951538 100644
--- a/include/linux/compat_ioctl.h
+++ b/include/linux/compat_ioctl.h
@@ -90,6 +90,7 @@ COMPATIBLE_IOCTL(FDTWADDLE)
 COMPATIBLE_IOCTL(FDFMTTRK)
 COMPATIBLE_IOCTL(FDRAWCMD)
 /* 0x12 */
+#ifdef CONFIG_BLOCK
 COMPATIBLE_IOCTL(BLKRASET)
 COMPATIBLE_IOCTL(BLKROSET)
 COMPATIBLE_IOCTL(BLKROGET)
@@ -103,6 +104,7 @@ COMPATIBLE_IOCTL(BLKTRACESETUP)
 COMPATIBLE_IOCTL(BLKTRACETEARDOWN)
 ULONG_IOCTL(BLKRASET)
 ULONG_IOCTL(BLKFRASET)
+#endif
 /* RAID */
 COMPATIBLE_IOCTL(RAID_VERSION)
 COMPATIBLE_IOCTL(GET_ARRAY_INFO)
diff --git a/include/linux/elevator.h b/include/linux/elevator.h
index 9c5a04f6114c..b3370ef5164d 100644
--- a/include/linux/elevator.h
+++ b/include/linux/elevator.h
@@ -3,6 +3,8 @@
 
 #include <linux/percpu.h>
 
+#ifdef CONFIG_BLOCK
+
 typedef int (elevator_merge_fn) (request_queue_t *, struct request **,
 				 struct bio *);
 
@@ -203,4 +205,5 @@ enum {
 	__val;							\
 })
 
+#endif /* CONFIG_BLOCK */
 #endif
diff --git a/include/linux/fs.h b/include/linux/fs.h
index b73a47582dbe..5baf3a153403 100644
--- a/include/linux/fs.h
+++ b/include/linux/fs.h
@@ -1482,6 +1482,7 @@ extern void __init vfs_caches_init(unsigned long);
 extern void putname(const char *name);
 #endif
 
+#ifdef CONFIG_BLOCK
 extern int register_blkdev(unsigned int, const char *);
 extern int unregister_blkdev(unsigned int, const char *);
 extern struct block_device *bdget(dev_t);
@@ -1490,11 +1491,15 @@ extern void bd_forget(struct inode *inode);
 extern void bdput(struct block_device *);
 extern struct block_device *open_by_devnum(dev_t, unsigned);
 extern struct block_device *open_partition_by_devnum(dev_t, unsigned);
-extern const struct file_operations def_blk_fops;
 extern const struct address_space_operations def_blk_aops;
+#else
+static inline void bd_forget(struct inode *inode) {}
+#endif
+extern const struct file_operations def_blk_fops;
 extern const struct file_operations def_chr_fops;
 extern const struct file_operations bad_sock_fops;
 extern const struct file_operations def_fifo_fops;
+#ifdef CONFIG_BLOCK
 extern int ioctl_by_bdev(struct block_device *, unsigned, unsigned long);
 extern int blkdev_ioctl(struct inode *, struct file *, unsigned, unsigned long);
 extern long compat_blkdev_ioctl(struct file *, unsigned, unsigned long);
@@ -1510,6 +1515,7 @@ extern void bd_release_from_disk(struct block_device *, struct gendisk *);
 #define bd_claim_by_disk(bdev, holder, disk)	bd_claim(bdev, holder)
 #define bd_release_from_disk(bdev, disk)	bd_release(bdev)
 #endif
+#endif
 
 /* fs/char_dev.c */
 #define CHRDEV_MAJOR_HASH_SIZE	255
@@ -1523,14 +1529,19 @@ extern int chrdev_open(struct inode *, struct file *);
 extern void chrdev_show(struct seq_file *,off_t);
 
 /* fs/block_dev.c */
-#define BLKDEV_MAJOR_HASH_SIZE	255
 #define BDEVNAME_SIZE	32	/* Largest string for a blockdev identifier */
+
+#ifdef CONFIG_BLOCK
+#define BLKDEV_MAJOR_HASH_SIZE	255
 extern const char *__bdevname(dev_t, char *buffer);
 extern const char *bdevname(struct block_device *bdev, char *buffer);
 extern struct block_device *lookup_bdev(const char *);
 extern struct block_device *open_bdev_excl(const char *, int, void *);
 extern void close_bdev_excl(struct block_device *);
 extern void blkdev_show(struct seq_file *,off_t);
+#else
+#define BLKDEV_MAJOR_HASH_SIZE	0
+#endif
 
 extern void init_special_inode(struct inode *, umode_t, dev_t);
 
@@ -1544,6 +1555,7 @@ extern const struct file_operations rdwr_fifo_fops;
 
 extern int fs_may_remount_ro(struct super_block *);
 
+#ifdef CONFIG_BLOCK
 /*
  * return READ, READA, or WRITE
  */
@@ -1555,9 +1567,10 @@ extern int fs_may_remount_ro(struct super_block *);
 #define bio_data_dir(bio)	((bio)->bi_rw & 1)
 
 extern int check_disk_change(struct block_device *);
-extern int invalidate_inodes(struct super_block *);
 extern int __invalidate_device(struct block_device *);
 extern int invalidate_partition(struct gendisk *, int);
+#endif
+extern int invalidate_inodes(struct super_block *);
 unsigned long invalidate_mapping_pages(struct address_space *mapping,
 					pgoff_t start, pgoff_t end);
 unsigned long invalidate_inode_pages(struct address_space *mapping);
@@ -1590,7 +1603,9 @@ extern void emergency_sync(void);
 extern void emergency_remount(void);
 extern int do_remount_sb(struct super_block *sb, int flags,
 			 void *data, int force);
+#ifdef CONFIG_BLOCK
 extern sector_t bmap(struct inode *, sector_t);
+#endif
 extern int notify_change(struct dentry *, struct iattr *);
 extern int permission(struct inode *, int, struct nameidata *);
 extern int generic_permission(struct inode *, int,
@@ -1673,9 +1688,11 @@ static inline void insert_inode_hash(struct inode *inode) {
 extern struct file * get_empty_filp(void);
 extern void file_move(struct file *f, struct list_head *list);
 extern void file_kill(struct file *f);
+#ifdef CONFIG_BLOCK
 struct bio;
 extern void submit_bio(int, struct bio *);
 extern int bdev_read_only(struct block_device *);
+#endif
 extern int set_blocksize(struct block_device *, int);
 extern int sb_set_blocksize(struct super_block *, int);
 extern int sb_min_blocksize(struct super_block *, int);
@@ -1756,6 +1773,7 @@ static inline void do_generic_file_read(struct file * filp, loff_t *ppos,
 				actor);
 }
 
+#ifdef CONFIG_BLOCK
 ssize_t __blockdev_direct_IO(int rw, struct kiocb *iocb, struct inode *inode,
 	struct block_device *bdev, const struct iovec *iov, loff_t offset,
 	unsigned long nr_segs, get_block_t get_block, dio_iodone_t end_io,
@@ -1793,6 +1811,7 @@ static inline ssize_t blockdev_direct_IO_own_locking(int rw, struct kiocb *iocb,
 	return __blockdev_direct_IO(rw, iocb, inode, bdev, iov, offset,
 				nr_segs, get_block, end_io, DIO_OWN_LOCKING);
 }
+#endif
 
 extern const struct file_operations generic_ro_fops;
 
diff --git a/include/linux/genhd.h b/include/linux/genhd.h
index e4af57e87c17..41f276fdd185 100644
--- a/include/linux/genhd.h
+++ b/include/linux/genhd.h
@@ -11,6 +11,8 @@
 
 #include <linux/types.h>
 
+#ifdef CONFIG_BLOCK
+
 enum {
 /* These three have identical behaviour; use the second one if DOS FDISK gets
    confused about extended/logical partitions starting past cylinder 1023. */
@@ -420,3 +422,5 @@ static inline struct block_device *bdget_disk(struct gendisk *disk, int index)
 #endif
 
 #endif
+
+#endif
diff --git a/include/linux/mpage.h b/include/linux/mpage.h
index 517c098fde20..cc5fb75af78a 100644
--- a/include/linux/mpage.h
+++ b/include/linux/mpage.h
@@ -9,6 +9,7 @@
  * (And no, it doesn't do the #ifdef __MPAGE_H thing, and it doesn't do
  * nested includes.  Get it right in the .c file).
  */
+#ifdef CONFIG_BLOCK
 
 struct writeback_control;
 typedef int (writepage_t)(struct page *page, struct writeback_control *wbc);
@@ -20,3 +21,5 @@ int mpage_writepages(struct address_space *mapping,
 		struct writeback_control *wbc, get_block_t get_block);
 int mpage_writepage(struct page *page, get_block_t *get_block,
 		struct writeback_control *wbc);
+
+#endif
diff --git a/include/linux/raid/md.h b/include/linux/raid/md.h
index eb3e547c8fee..c588709acbbc 100644
--- a/include/linux/raid/md.h
+++ b/include/linux/raid/md.h
@@ -53,6 +53,8 @@
 #include <linux/raid/md_u.h>
 #include <linux/raid/md_k.h>
 
+#ifdef CONFIG_MD
+
 /*
  * Different major versions are not compatible.
  * Different minor versions are only downward compatible.
@@ -95,5 +97,6 @@ extern void md_new_event(mddev_t *mddev);
 
 extern void md_update_sb(mddev_t * mddev);
 
+#endif /* CONFIG_MD */
 #endif 
 
diff --git a/include/linux/raid/md_k.h b/include/linux/raid/md_k.h
index d28890295852..920b94fe31fa 100644
--- a/include/linux/raid/md_k.h
+++ b/include/linux/raid/md_k.h
@@ -18,6 +18,8 @@
 /* and dm-bio-list.h is not under include/linux because.... ??? */
 #include "../../../drivers/md/dm-bio-list.h"
 
+#ifdef CONFIG_BLOCK
+
 #define	LEVEL_MULTIPATH		(-4)
 #define	LEVEL_LINEAR		(-1)
 #define	LEVEL_FAULTY		(-5)
@@ -362,5 +364,6 @@ static inline void safe_put_page(struct page *p)
 	if (p) put_page(p);
 }
 
+#endif /* CONFIG_BLOCK */
 #endif
 
diff --git a/include/scsi/scsi_tcq.h b/include/scsi/scsi_tcq.h
index bbf66219b769..c247a28259bc 100644
--- a/include/scsi/scsi_tcq.h
+++ b/include/scsi/scsi_tcq.h
@@ -6,7 +6,6 @@
 #include <scsi/scsi_device.h>
 #include <scsi/scsi_host.h>
 
-
 #define MSG_SIMPLE_TAG	0x20
 #define MSG_HEAD_TAG	0x21
 #define MSG_ORDERED_TAG	0x22
@@ -14,6 +13,7 @@
 #define SCSI_NO_TAG	(-1)    /* identify no tag in use */
 
 
+#ifdef CONFIG_BLOCK
 
 /**
  * scsi_get_tag_type - get the type of tag the device supports
@@ -144,4 +144,5 @@ static inline int scsi_init_shared_tag_map(struct Scsi_Host *shost, int depth)
 	return shost->bqt ? 0 : -ENOMEM;
 }
 
+#endif /* CONFIG_BLOCK */
 #endif /* _SCSI_SCSI_TCQ_H */
diff --git a/init/Kconfig b/init/Kconfig
index 4381006dd666..d2eb7a84a264 100644
--- a/init/Kconfig
+++ b/init/Kconfig
@@ -92,7 +92,7 @@ config LOCALVERSION_AUTO
 
 config SWAP
 	bool "Support for paging of anonymous memory (swap)"
-	depends on MMU
+	depends on MMU && BLOCK
 	default y
 	help
 	  This option allows you to choose whether you want to have support
diff --git a/init/do_mounts.c b/init/do_mounts.c
index b290aadb1d3f..dc1ec0803ef9 100644
--- a/init/do_mounts.c
+++ b/init/do_mounts.c
@@ -285,7 +285,11 @@ void __init mount_block_root(char *name, int flags)
 {
 	char *fs_names = __getname();
 	char *p;
+#ifdef CONFIG_BLOCK
 	char b[BDEVNAME_SIZE];
+#else
+	const char *b = name;
+#endif
 
 	get_fs_names(fs_names);
 retry:
@@ -304,7 +308,9 @@ retry:
 		 * Allow the user to distinguish between failed sys_open
 		 * and bad superblock on root device.
 		 */
+#ifdef CONFIG_BLOCK
 		__bdevname(ROOT_DEV, b);
+#endif
 		printk("VFS: Cannot open root device \"%s\" or %s\n",
 				root_device_name, b);
 		printk("Please append a correct \"root=\" boot option\n");
@@ -316,7 +322,10 @@ retry:
 	for (p = fs_names; *p; p += strlen(p)+1)
 		printk(" %s", p);
 	printk("\n");
-	panic("VFS: Unable to mount root fs on %s", __bdevname(ROOT_DEV, b));
+#ifdef CONFIG_BLOCK
+	__bdevname(ROOT_DEV, b);
+#endif
+	panic("VFS: Unable to mount root fs on %s", b);
 out:
 	putname(fs_names);
 }
@@ -387,8 +396,10 @@ void __init mount_root(void)
 			change_floppy("root floppy");
 	}
 #endif
+#ifdef CONFIG_BLOCK
 	create_dev("/dev/root", ROOT_DEV);
 	mount_block_root("/dev/root", root_mountflags);
+#endif
 }
 
 /*
diff --git a/kernel/sys_ni.c b/kernel/sys_ni.c
index 6991bece67e8..7a3b2e75f040 100644
--- a/kernel/sys_ni.c
+++ b/kernel/sys_ni.c
@@ -134,3 +134,8 @@ cond_syscall(sys_madvise);
 cond_syscall(sys_mremap);
 cond_syscall(sys_remap_file_pages);
 cond_syscall(compat_sys_move_pages);
+
+/* block-layer dependent */
+cond_syscall(sys_bdflush);
+cond_syscall(sys_ioprio_set);
+cond_syscall(sys_ioprio_get);
diff --git a/mm/Makefile b/mm/Makefile
index 4f2166a833b9..12b3a4eee88d 100644
--- a/mm/Makefile
+++ b/mm/Makefile
@@ -12,7 +12,7 @@ obj-y			:= bootmem.o filemap.o mempool.o oom_kill.o fadvise.o \
 			   readahead.o swap.o truncate.o vmscan.o \
 			   prio_tree.o util.o mmzone.o vmstat.o $(mmu-y)
 
-ifeq ($(CONFIG_MMU),y)
+ifeq ($(CONFIG_MMU)$(CONFIG_BLOCK),yy)
 obj-y			+= bounce.o
 endif
 obj-$(CONFIG_SWAP)	+= page_io.o swap_state.o swapfile.o thrash.o
diff --git a/mm/filemap.c b/mm/filemap.c
index d6846de08887..c4fe97f5ace0 100644
--- a/mm/filemap.c
+++ b/mm/filemap.c
@@ -2020,6 +2020,7 @@ inline int generic_write_checks(struct file *file, loff_t *pos, size_t *count, i
 		if (unlikely(*pos + *count > inode->i_sb->s_maxbytes))
 			*count = inode->i_sb->s_maxbytes - *pos;
 	} else {
+#ifdef CONFIG_BLOCK
 		loff_t isize;
 		if (bdev_read_only(I_BDEV(inode)))
 			return -EPERM;
@@ -2031,6 +2032,9 @@ inline int generic_write_checks(struct file *file, loff_t *pos, size_t *count, i
 
 		if (*pos + *count > isize)
 			*count = isize - *pos;
+#else
+		return -EPERM;
+#endif
 	}
 	return 0;
 }
diff --git a/mm/migrate.c b/mm/migrate.c
index 7f50e3ff54cd..ba2453f9483d 100644
--- a/mm/migrate.c
+++ b/mm/migrate.c
@@ -409,6 +409,7 @@ int migrate_page(struct address_space *mapping,
 }
 EXPORT_SYMBOL(migrate_page);
 
+#ifdef CONFIG_BLOCK
 /*
  * Migration function for pages with buffers. This function can only be used
  * if the underlying filesystem guarantees that no other references to "page"
@@ -466,6 +467,7 @@ int buffer_migrate_page(struct address_space *mapping,
 	return 0;
 }
 EXPORT_SYMBOL(buffer_migrate_page);
+#endif
 
 /*
  * Writeback a page to clean the dirty state
diff --git a/mm/page-writeback.c b/mm/page-writeback.c
index ecf27839c203..c0d4ce144dec 100644
--- a/mm/page-writeback.c
+++ b/mm/page-writeback.c
@@ -807,9 +807,11 @@ int fastcall set_page_dirty(struct page *page)
 
 	if (likely(mapping)) {
 		int (*spd)(struct page *) = mapping->a_ops->set_page_dirty;
-		if (spd)
-			return (*spd)(page);
-		return __set_page_dirty_buffers(page);
+#ifdef CONFIG_BLOCK
+		if (!spd)
+			spd = __set_page_dirty_buffers;
+#endif
+		return (*spd)(page);
 	}
 	if (!PageDirty(page)) {
 		if (!TestSetPageDirty(page))
diff --git a/mm/truncate.c b/mm/truncate.c
index cd3e34b816db..8fde6580657e 100644
--- a/mm/truncate.c
+++ b/mm/truncate.c
@@ -35,8 +35,10 @@ void do_invalidatepage(struct page *page, unsigned long offset)
 {
 	void (*invalidatepage)(struct page *, unsigned long);
 	invalidatepage = page->mapping->a_ops->invalidatepage;
+#ifdef CONFIG_BLOCK
 	if (!invalidatepage)
 		invalidatepage = block_invalidatepage;
+#endif
 	if (invalidatepage)
 		(*invalidatepage)(page, offset);
 }
-- 
cgit v1.2.3-71-gd317


From bcfd8d36151e531e1c6c731f1fbf792509a1c494 Mon Sep 17 00:00:00 2001
From: Andrew Morton <akpm@osdl.org>
Date: Thu, 31 Aug 2006 12:56:06 +0200
Subject: [PATCH] CONFIG_BLOCK: blk_congestion_wait() fix

Don't just do nothing: it'll cause busywaits all over writeback and page
reclaim.

For now, take a fixed-length nap.  Will improve when NFS starts waking up
throttled processes.

Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Jens Axboe <axboe@kernel.dk>
---
 include/linux/blkdev.h | 7 +++++--
 1 file changed, 5 insertions(+), 2 deletions(-)

(limited to 'include/linux/blkdev.h')

diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h
index 3e36107d342a..1d79b8d4ca6d 100644
--- a/include/linux/blkdev.h
+++ b/include/linux/blkdev.h
@@ -1,6 +1,7 @@
 #ifndef _LINUX_BLKDEV_H
 #define _LINUX_BLKDEV_H
 
+#include <linux/sched.h>
 #include <linux/major.h>
 #include <linux/genhd.h>
 #include <linux/list.h>
@@ -848,7 +849,7 @@ void kblockd_flush(void);
 
 static inline long blk_congestion_wait(int rw, long timeout)
 {
-	return timeout;
+	return io_schedule_timeout(timeout);
 }
 
 static inline long nr_blockdev_pages(void)
@@ -856,7 +857,9 @@ static inline long nr_blockdev_pages(void)
 	return 0;
 }
 
-static inline void exit_io_context(void) {}
+static inline void exit_io_context(void)
+{
+}
 
 #endif /* CONFIG_BLOCK */
 
-- 
cgit v1.2.3-71-gd317