From 275a082fe9308e710324e26ccb5363c53d8fd45f Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Tue, 22 Aug 2006 20:06:24 -0400 Subject: Add a real API for dealing with blk_congestion_wait() Signed-off-by: Trond Myklebust --- include/linux/writeback.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux/writeback.h') diff --git a/include/linux/writeback.h b/include/linux/writeback.h index 9e38b566d0e7..0422036af4eb 100644 --- a/include/linux/writeback.h +++ b/include/linux/writeback.h @@ -85,6 +85,7 @@ int wakeup_pdflush(long nr_pages); void laptop_io_completion(void); void laptop_sync_completion(void); void throttle_vm_writeout(void); +void writeback_congestion_end(void); /* These are exported to sysctl. */ extern int dirty_background_ratio; -- cgit v1.2.3-71-gd317 From edc79b2a46ed854595e40edcf3f8b37f9f14aa3f Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Mon, 25 Sep 2006 23:30:58 -0700 Subject: [PATCH] mm: balance dirty pages Now that we can detect writers of shared mappings, throttle them. Avoids OOM by surprise. Signed-off-by: Peter Zijlstra Cc: Hugh Dickins Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/writeback.h | 1 + mm/memory.c | 5 +++-- mm/page-writeback.c | 10 ++++++++++ 3 files changed, 14 insertions(+), 2 deletions(-) (limited to 'include/linux/writeback.h') diff --git a/include/linux/writeback.h b/include/linux/writeback.h index 0422036af4eb..56a23a0e7f2e 100644 --- a/include/linux/writeback.h +++ b/include/linux/writeback.h @@ -116,6 +116,7 @@ int sync_page_range(struct inode *inode, struct address_space *mapping, loff_t pos, loff_t count); int sync_page_range_nolock(struct inode *inode, struct address_space *mapping, loff_t pos, loff_t count); +void set_page_dirty_balance(struct page *page); /* pdflush.c */ extern int nr_pdflush_threads; /* Global so it can be exported to sysctl diff --git a/mm/memory.c b/mm/memory.c index fa941b169071..dd7d7fc5ed60 100644 --- a/mm/memory.c +++ b/mm/memory.c @@ -49,6 +49,7 @@ #include #include #include +#include #include #include @@ -1571,7 +1572,7 @@ gotten: unlock: pte_unmap_unlock(page_table, ptl); if (dirty_page) { - set_page_dirty(dirty_page); + set_page_dirty_balance(dirty_page); put_page(dirty_page); } return ret; @@ -2218,7 +2219,7 @@ retry: unlock: pte_unmap_unlock(page_table, ptl); if (dirty_page) { - set_page_dirty(dirty_page); + set_page_dirty_balance(dirty_page); put_page(dirty_page); } return ret; diff --git a/mm/page-writeback.c b/mm/page-writeback.c index 1c87430b7a25..b9f4c6f1be86 100644 --- a/mm/page-writeback.c +++ b/mm/page-writeback.c @@ -244,6 +244,16 @@ static void balance_dirty_pages(struct address_space *mapping) pdflush_operation(background_writeout, 0); } +void set_page_dirty_balance(struct page *page) +{ + if (set_page_dirty(page)) { + struct address_space *mapping = page_mapping(page); + + if (mapping) + balance_dirty_pages_ratelimited(mapping); + } +} + /** * balance_dirty_pages_ratelimited_nr - balance dirty memory state * @mapping: address_space which was dirtied -- cgit v1.2.3-71-gd317 From 2d1d43f6a43b703587e759145f69467e7c6553a7 Mon Sep 17 00:00:00 2001 From: Chandra Seetharaman Date: Fri, 29 Sep 2006 02:01:25 -0700 Subject: [PATCH] call mm/page-writeback.c:set_ratelimit() when new pages are hot-added ratelimit_pages in page-writeback.c is recalculated (in set_ratelimit()) every time a CPU is hot-added/removed. But this value is not recalculated when new pages are hot-added. This patch fixes that problem by calling set_ratelimit() when new pages are hot-added. [akpm@osdl.org: cleanups] Signed-off-by: Chandra Seetharaman Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/writeback.h | 1 + mm/memory_hotplug.c | 2 ++ mm/page-writeback.c | 6 +++--- 3 files changed, 6 insertions(+), 3 deletions(-) (limited to 'include/linux/writeback.h') diff --git a/include/linux/writeback.h b/include/linux/writeback.h index 56a23a0e7f2e..9d4074ecd0cd 100644 --- a/include/linux/writeback.h +++ b/include/linux/writeback.h @@ -117,6 +117,7 @@ int sync_page_range(struct inode *inode, struct address_space *mapping, int sync_page_range_nolock(struct inode *inode, struct address_space *mapping, loff_t pos, loff_t count); void set_page_dirty_balance(struct page *page); +void writeback_set_ratelimit(void); /* pdflush.c */ extern int nr_pdflush_threads; /* Global so it can be exported to sysctl diff --git a/mm/memory_hotplug.c b/mm/memory_hotplug.c index 9576ed920c0a..2053bb165a21 100644 --- a/mm/memory_hotplug.c +++ b/mm/memory_hotplug.c @@ -13,6 +13,7 @@ #include #include #include +#include #include #include #include @@ -192,6 +193,7 @@ int online_pages(unsigned long pfn, unsigned long nr_pages) if (need_zonelists_rebuild) build_all_zonelists(); vm_total_pages = nr_free_pagecache_pages(); + writeback_set_ratelimit(); return 0; } diff --git a/mm/page-writeback.c b/mm/page-writeback.c index efd2705e4986..488b7088557c 100644 --- a/mm/page-writeback.c +++ b/mm/page-writeback.c @@ -501,7 +501,7 @@ void laptop_sync_completion(void) * will write six megabyte chunks, max. */ -static void set_ratelimit(void) +void writeback_set_ratelimit(void) { ratelimit_pages = vm_total_pages / (num_online_cpus() * 32); if (ratelimit_pages < 16) @@ -513,7 +513,7 @@ static void set_ratelimit(void) static int __cpuinit ratelimit_handler(struct notifier_block *self, unsigned long u, void *v) { - set_ratelimit(); + writeback_set_ratelimit(); return 0; } @@ -546,7 +546,7 @@ void __init page_writeback_init(void) vm_dirty_ratio = 1; } mod_timer(&wb_timer, jiffies + dirty_writeback_interval); - set_ratelimit(); + writeback_set_ratelimit(); register_cpu_notifier(&ratelimit_nb); } -- cgit v1.2.3-71-gd317 From 811d736f9e8013966e1a5a930c0db09508bdbb15 Mon Sep 17 00:00:00 2001 From: David Howells Date: Tue, 29 Aug 2006 19:06:09 +0100 Subject: [PATCH] BLOCK: Dissociate generic_writepages() from mpage stuff [try #6] Dissociate the generic_writepages() function from the mpage stuff, moving its declaration to linux/mm.h and actually emitting a full implementation into mm/page-writeback.c. The implementation is a partial duplicate of mpage_writepages() with all BIO references removed. It is used by NFS to do writeback. Signed-Off-By: David Howells Signed-off-by: Jens Axboe --- fs/block_dev.c | 1 + fs/mpage.c | 2 + include/linux/mpage.h | 6 --- include/linux/writeback.h | 2 + mm/page-writeback.c | 134 ++++++++++++++++++++++++++++++++++++++++++++++ 5 files changed, 139 insertions(+), 6 deletions(-) (limited to 'include/linux/writeback.h') diff --git a/fs/block_dev.c b/fs/block_dev.c index 20f7333ba372..335c38bb86eb 100644 --- a/fs/block_dev.c +++ b/fs/block_dev.c @@ -17,6 +17,7 @@ #include #include #include +#include #include #include #include diff --git a/fs/mpage.c b/fs/mpage.c index 1e4598247d0b..692a3e578fc8 100644 --- a/fs/mpage.c +++ b/fs/mpage.c @@ -693,6 +693,8 @@ out: * the call was made get new I/O started against them. If wbc->sync_mode is * WB_SYNC_ALL then we were called for data integrity and we must wait for * existing IO to complete. + * + * If you fix this you should check generic_writepages() also! */ int mpage_writepages(struct address_space *mapping, diff --git a/include/linux/mpage.h b/include/linux/mpage.h index 3ca880463c47..517c098fde20 100644 --- a/include/linux/mpage.h +++ b/include/linux/mpage.h @@ -20,9 +20,3 @@ int mpage_writepages(struct address_space *mapping, struct writeback_control *wbc, get_block_t get_block); int mpage_writepage(struct page *page, get_block_t *get_block, struct writeback_control *wbc); - -static inline int -generic_writepages(struct address_space *mapping, struct writeback_control *wbc) -{ - return mpage_writepages(mapping, wbc, NULL); -} diff --git a/include/linux/writeback.h b/include/linux/writeback.h index 9d4074ecd0cd..4f4d98addb44 100644 --- a/include/linux/writeback.h +++ b/include/linux/writeback.h @@ -111,6 +111,8 @@ balance_dirty_pages_ratelimited(struct address_space *mapping) } int pdflush_operation(void (*fn)(unsigned long), unsigned long arg0); +extern int generic_writepages(struct address_space *mapping, + struct writeback_control *wbc); int do_writepages(struct address_space *mapping, struct writeback_control *wbc); int sync_page_range(struct inode *inode, struct address_space *mapping, loff_t pos, loff_t count); diff --git a/mm/page-writeback.c b/mm/page-writeback.c index 9fdcc7903956..ecf27839c203 100644 --- a/mm/page-writeback.c +++ b/mm/page-writeback.c @@ -31,6 +31,7 @@ #include #include #include +#include /* * The maximum number of pages to writeout in a single bdflush/kupdate @@ -551,6 +552,139 @@ void __init page_writeback_init(void) register_cpu_notifier(&ratelimit_nb); } +/** + * generic_writepages - walk the list of dirty pages of the given + * address space and writepage() all of them. + * + * @mapping: address space structure to write + * @wbc: subtract the number of written pages from *@wbc->nr_to_write + * + * This is a library function, which implements the writepages() + * address_space_operation. + * + * If a page is already under I/O, generic_writepages() skips it, even + * if it's dirty. This is desirable behaviour for memory-cleaning writeback, + * but it is INCORRECT for data-integrity system calls such as fsync(). fsync() + * and msync() need to guarantee that all the data which was dirty at the time + * the call was made get new I/O started against them. If wbc->sync_mode is + * WB_SYNC_ALL then we were called for data integrity and we must wait for + * existing IO to complete. + * + * Derived from mpage_writepages() - if you fix this you should check that + * also! + */ +int generic_writepages(struct address_space *mapping, + struct writeback_control *wbc) +{ + struct backing_dev_info *bdi = mapping->backing_dev_info; + int ret = 0; + int done = 0; + int (*writepage)(struct page *page, struct writeback_control *wbc); + struct pagevec pvec; + int nr_pages; + pgoff_t index; + pgoff_t end; /* Inclusive */ + int scanned = 0; + int range_whole = 0; + + if (wbc->nonblocking && bdi_write_congested(bdi)) { + wbc->encountered_congestion = 1; + return 0; + } + + writepage = mapping->a_ops->writepage; + + /* deal with chardevs and other special file */ + if (!writepage) + return 0; + + pagevec_init(&pvec, 0); + if (wbc->range_cyclic) { + index = mapping->writeback_index; /* Start from prev offset */ + end = -1; + } else { + index = wbc->range_start >> PAGE_CACHE_SHIFT; + end = wbc->range_end >> PAGE_CACHE_SHIFT; + if (wbc->range_start == 0 && wbc->range_end == LLONG_MAX) + range_whole = 1; + scanned = 1; + } +retry: + while (!done && (index <= end) && + (nr_pages = pagevec_lookup_tag(&pvec, mapping, &index, + PAGECACHE_TAG_DIRTY, + min(end - index, (pgoff_t)PAGEVEC_SIZE-1) + 1))) { + unsigned i; + + scanned = 1; + for (i = 0; i < nr_pages; i++) { + struct page *page = pvec.pages[i]; + + /* + * At this point we hold neither mapping->tree_lock nor + * lock on the page itself: the page may be truncated or + * invalidated (changing page->mapping to NULL), or even + * swizzled back from swapper_space to tmpfs file + * mapping + */ + lock_page(page); + + if (unlikely(page->mapping != mapping)) { + unlock_page(page); + continue; + } + + if (!wbc->range_cyclic && page->index > end) { + done = 1; + unlock_page(page); + continue; + } + + if (wbc->sync_mode != WB_SYNC_NONE) + wait_on_page_writeback(page); + + if (PageWriteback(page) || + !clear_page_dirty_for_io(page)) { + unlock_page(page); + continue; + } + + ret = (*writepage)(page, wbc); + if (ret) { + if (ret == -ENOSPC) + set_bit(AS_ENOSPC, &mapping->flags); + else + set_bit(AS_EIO, &mapping->flags); + } + + if (unlikely(ret == AOP_WRITEPAGE_ACTIVATE)) + unlock_page(page); + if (ret || (--(wbc->nr_to_write) <= 0)) + done = 1; + if (wbc->nonblocking && bdi_write_congested(bdi)) { + wbc->encountered_congestion = 1; + done = 1; + } + } + pagevec_release(&pvec); + cond_resched(); + } + if (!scanned && !done) { + /* + * We hit the last page and there is more work to be done: wrap + * back to the start of the file + */ + scanned = 1; + index = 0; + goto retry; + } + if (wbc->range_cyclic || (range_whole && wbc->nr_to_write > 0)) + mapping->writeback_index = index; + return ret; +} + +EXPORT_SYMBOL(generic_writepages); + int do_writepages(struct address_space *mapping, struct writeback_control *wbc) { int ret; -- cgit v1.2.3-71-gd317