From 7f5aa215088b817add9c71914b83650bdd49f8a9 Mon Sep 17 00:00:00 2001 From: Jan Kara Date: Tue, 10 Feb 2009 11:15:34 -0500 Subject: jbd2: Avoid possible NULL dereference in jbd2_journal_begin_ordered_truncate() If we race with commit code setting i_transaction to NULL, we could possibly dereference it. Proper locking requires the journal pointer (to access journal->j_list_lock), which we don't have. So we have to change the prototype of the function so that filesystem passes us the journal pointer. Also add a more detailed comment about why the function jbd2_journal_begin_ordered_truncate() does what it does and how it should be used. Thanks to Dan Carpenter for pointing to the suspitious code. Signed-off-by: Jan Kara Signed-off-by: "Theodore Ts'o" Acked-by: Joel Becker CC: linux-ext4@vger.kernel.org CC: ocfs2-devel@oss.oracle.com CC: mfasheh@suse.de CC: Dan Carpenter --- include/linux/jbd2.h | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/jbd2.h b/include/linux/jbd2.h index b28b37eb11c6..4d248b3f1323 100644 --- a/include/linux/jbd2.h +++ b/include/linux/jbd2.h @@ -1150,7 +1150,8 @@ extern int jbd2_journal_clear_err (journal_t *); extern int jbd2_journal_bmap(journal_t *, unsigned long, unsigned long long *); extern int jbd2_journal_force_commit(journal_t *); extern int jbd2_journal_file_inode(handle_t *handle, struct jbd2_inode *inode); -extern int jbd2_journal_begin_ordered_truncate(struct jbd2_inode *inode, loff_t new_size); +extern int jbd2_journal_begin_ordered_truncate(journal_t *journal, + struct jbd2_inode *inode, loff_t new_size); extern void jbd2_journal_init_jbd_inode(struct jbd2_inode *jinode, struct inode *inode); extern void jbd2_journal_release_jbd_inode(journal_t *journal, struct jbd2_inode *jinode); -- cgit v1.2.3-71-gd317 From 9f339e7028e2855717af3193c938f9960ad13b38 Mon Sep 17 00:00:00 2001 From: Markus Metzger Date: Wed, 11 Feb 2009 15:10:27 +0100 Subject: x86, ptrace, mm: fix double-free on race Ptrace_detach() races with __ptrace_unlink() if the traced task is reaped while detaching. This might cause a double-free of the BTS buffer. Change the ptrace_detach() path to only do the memory accounting in ptrace_bts_detach() and leave the buffer free to ptrace_bts_untrace() which will be called from __ptrace_unlink(). The fix follows a proposal from Oleg Nesterov. Reported-by: Oleg Nesterov Signed-off-by: Markus Metzger Signed-off-by: Ingo Molnar --- arch/x86/kernel/ptrace.c | 16 ++++++++++------ include/linux/mm.h | 1 + mm/mlock.c | 7 ++++++- 3 files changed, 17 insertions(+), 7 deletions(-) (limited to 'include/linux') diff --git a/arch/x86/kernel/ptrace.c b/arch/x86/kernel/ptrace.c index 0a5df5f82fb9..5a4c23d89892 100644 --- a/arch/x86/kernel/ptrace.c +++ b/arch/x86/kernel/ptrace.c @@ -810,12 +810,16 @@ static void ptrace_bts_untrace(struct task_struct *child) static void ptrace_bts_detach(struct task_struct *child) { - if (unlikely(child->bts)) { - ds_release_bts(child->bts); - child->bts = NULL; - - ptrace_bts_free_buffer(child); - } + /* + * Ptrace_detach() races with ptrace_untrace() in case + * the child dies and is reaped by another thread. + * + * We only do the memory accounting at this point and + * leave the buffer deallocation and the bts tracer + * release to ptrace_bts_untrace() which will be called + * later on with tasklist_lock held. + */ + release_locked_buffer(child->bts_buffer, child->bts_size); } #else static inline void ptrace_bts_fork(struct task_struct *tsk) {} diff --git a/include/linux/mm.h b/include/linux/mm.h index e8ddc98b8405..3d7fb44d7d7e 100644 --- a/include/linux/mm.h +++ b/include/linux/mm.h @@ -1305,5 +1305,6 @@ void vmemmap_populate_print_last(void); extern void *alloc_locked_buffer(size_t size); extern void free_locked_buffer(void *buffer, size_t size); +extern void release_locked_buffer(void *buffer, size_t size); #endif /* __KERNEL__ */ #endif /* _LINUX_MM_H */ diff --git a/mm/mlock.c b/mm/mlock.c index 028ec482fdd4..2b57f7e60390 100644 --- a/mm/mlock.c +++ b/mm/mlock.c @@ -657,7 +657,7 @@ void *alloc_locked_buffer(size_t size) return buffer; } -void free_locked_buffer(void *buffer, size_t size) +void release_locked_buffer(void *buffer, size_t size) { unsigned long pgsz = PAGE_ALIGN(size) >> PAGE_SHIFT; @@ -667,6 +667,11 @@ void free_locked_buffer(void *buffer, size_t size) current->mm->locked_vm -= pgsz; up_write(¤t->mm->mmap_sem); +} + +void free_locked_buffer(void *buffer, size_t size) +{ + release_locked_buffer(buffer, size); kfree(buffer); } -- cgit v1.2.3-71-gd317 From 7a0eb1960e8ddcb68ea631caf16815485af0e228 Mon Sep 17 00:00:00 2001 From: Avi Kivity Date: Mon, 19 Jan 2009 14:57:52 +0200 Subject: KVM: Avoid using CONFIG_ in userspace visible headers Kconfig symbols are not available in userspace, and are not stripped by headers-install. Avoid their use by adding #defines in to suit each architecture. Signed-off-by: Avi Kivity --- arch/ia64/include/asm/kvm.h | 4 ++++ arch/x86/include/asm/kvm.h | 7 +++++++ include/linux/kvm.h | 10 +++++----- 3 files changed, 16 insertions(+), 5 deletions(-) (limited to 'include/linux') diff --git a/arch/ia64/include/asm/kvm.h b/arch/ia64/include/asm/kvm.h index 68aa6da807c1..bfa86b6af7cd 100644 --- a/arch/ia64/include/asm/kvm.h +++ b/arch/ia64/include/asm/kvm.h @@ -25,6 +25,10 @@ #include +/* Select x86 specific features in */ +#define __KVM_HAVE_IOAPIC +#define __KVM_HAVE_DEVICE_ASSIGNMENT + /* Architectural interrupt line count. */ #define KVM_NR_INTERRUPTS 256 diff --git a/arch/x86/include/asm/kvm.h b/arch/x86/include/asm/kvm.h index d2e3bf3608af..886c9402ec45 100644 --- a/arch/x86/include/asm/kvm.h +++ b/arch/x86/include/asm/kvm.h @@ -9,6 +9,13 @@ #include #include +/* Select x86 specific features in */ +#define __KVM_HAVE_PIT +#define __KVM_HAVE_IOAPIC +#define __KVM_HAVE_DEVICE_ASSIGNMENT +#define __KVM_HAVE_MSI +#define __KVM_HAVE_USER_NMI + /* Architectural interrupt line count. */ #define KVM_NR_INTERRUPTS 256 diff --git a/include/linux/kvm.h b/include/linux/kvm.h index 5715f1907601..0424326f1679 100644 --- a/include/linux/kvm.h +++ b/include/linux/kvm.h @@ -58,10 +58,10 @@ struct kvm_irqchip { __u32 pad; union { char dummy[512]; /* reserving space */ -#ifdef CONFIG_X86 +#ifdef __KVM_HAVE_PIT struct kvm_pic_state pic; #endif -#if defined(CONFIG_X86) || defined(CONFIG_IA64) +#ifdef __KVM_HAVE_IOAPIC struct kvm_ioapic_state ioapic; #endif } chip; @@ -384,16 +384,16 @@ struct kvm_trace_rec { #define KVM_CAP_MP_STATE 14 #define KVM_CAP_COALESCED_MMIO 15 #define KVM_CAP_SYNC_MMU 16 /* Changes to host mmap are reflected in guest */ -#if defined(CONFIG_X86)||defined(CONFIG_IA64) +#ifdef __KVM_HAVE_DEVICE_ASSIGNMENT #define KVM_CAP_DEVICE_ASSIGNMENT 17 #endif #define KVM_CAP_IOMMU 18 -#if defined(CONFIG_X86) +#ifdef __KVM_HAVE_MSI #define KVM_CAP_DEVICE_MSI 20 #endif /* Bug in KVM_SET_USER_MEMORY_REGION fixed: */ #define KVM_CAP_DESTROY_MEMORY_REGION_WORKS 21 -#if defined(CONFIG_X86) +#ifdef __KVM_HAVE_USER_NMI #define KVM_CAP_USER_NMI 22 #endif -- cgit v1.2.3-71-gd317 From ad8ba2cd44d4d39fb3fe55d5dcc565b19fc3a7fb Mon Sep 17 00:00:00 2001 From: Sheng Yang Date: Tue, 6 Jan 2009 10:03:02 +0800 Subject: KVM: Add kvm_arch_sync_events to sync with asynchronize events kvm_arch_sync_events is introduced to quiet down all other events may happen contemporary with VM destroy process, like IRQ handler and work struct for assigned device. For kvm_arch_sync_events is called at the very beginning of kvm_destroy_vm(), so the state of KVM here is legal and can provide a environment to quiet down other events. Signed-off-by: Sheng Yang Signed-off-by: Avi Kivity --- arch/ia64/kvm/kvm-ia64.c | 4 ++++ arch/powerpc/kvm/powerpc.c | 4 ++++ arch/s390/kvm/kvm-s390.c | 4 ++++ arch/x86/kvm/x86.c | 4 ++++ include/linux/kvm_host.h | 1 + virt/kvm/kvm_main.c | 1 + 6 files changed, 18 insertions(+) (limited to 'include/linux') diff --git a/arch/ia64/kvm/kvm-ia64.c b/arch/ia64/kvm/kvm-ia64.c index 4e586f6110aa..28f982045f29 100644 --- a/arch/ia64/kvm/kvm-ia64.c +++ b/arch/ia64/kvm/kvm-ia64.c @@ -1337,6 +1337,10 @@ static void kvm_release_vm_pages(struct kvm *kvm) } } +void kvm_arch_sync_events(struct kvm *kvm) +{ +} + void kvm_arch_destroy_vm(struct kvm *kvm) { kvm_iommu_unmap_guest(kvm); diff --git a/arch/powerpc/kvm/powerpc.c b/arch/powerpc/kvm/powerpc.c index 2822c8ccfaaf..5f81256287f5 100644 --- a/arch/powerpc/kvm/powerpc.c +++ b/arch/powerpc/kvm/powerpc.c @@ -125,6 +125,10 @@ static void kvmppc_free_vcpus(struct kvm *kvm) } } +void kvm_arch_sync_events(struct kvm *kvm) +{ +} + void kvm_arch_destroy_vm(struct kvm *kvm) { kvmppc_free_vcpus(kvm); diff --git a/arch/s390/kvm/kvm-s390.c b/arch/s390/kvm/kvm-s390.c index be8497186b96..0d33893e1e89 100644 --- a/arch/s390/kvm/kvm-s390.c +++ b/arch/s390/kvm/kvm-s390.c @@ -212,6 +212,10 @@ static void kvm_free_vcpus(struct kvm *kvm) } } +void kvm_arch_sync_events(struct kvm *kvm) +{ +} + void kvm_arch_destroy_vm(struct kvm *kvm) { kvm_free_vcpus(kvm); diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c index cc17546a2406..b0fc079f1bee 100644 --- a/arch/x86/kvm/x86.c +++ b/arch/x86/kvm/x86.c @@ -4127,6 +4127,10 @@ static void kvm_free_vcpus(struct kvm *kvm) } +void kvm_arch_sync_events(struct kvm *kvm) +{ +} + void kvm_arch_destroy_vm(struct kvm *kvm) { kvm_free_all_assigned_devices(kvm); diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h index ec49d0be7f52..bf6f703642fc 100644 --- a/include/linux/kvm_host.h +++ b/include/linux/kvm_host.h @@ -285,6 +285,7 @@ void kvm_free_physmem(struct kvm *kvm); struct kvm *kvm_arch_create_vm(void); void kvm_arch_destroy_vm(struct kvm *kvm); void kvm_free_all_assigned_devices(struct kvm *kvm); +void kvm_arch_sync_events(struct kvm *kvm); int kvm_cpu_get_interrupt(struct kvm_vcpu *v); int kvm_cpu_has_interrupt(struct kvm_vcpu *v); diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c index 0b6f2f71271f..68e3f1ec1674 100644 --- a/virt/kvm/kvm_main.c +++ b/virt/kvm/kvm_main.c @@ -891,6 +891,7 @@ static void kvm_destroy_vm(struct kvm *kvm) { struct mm_struct *mm = kvm->mm; + kvm_arch_sync_events(kvm); spin_lock(&kvm_lock); list_del(&kvm->vm_list); spin_unlock(&kvm_lock); -- cgit v1.2.3-71-gd317 From 5955c7a2cfb6a35429adea5dc480002b15ca8cfc Mon Sep 17 00:00:00 2001 From: Zlatko Calusic Date: Wed, 18 Feb 2009 01:33:34 +0100 Subject: Add support for VT6415 PCIE PATA IDE Host Controller Signed-off-by: Zlatko Calusic Signed-off-by: Linus Torvalds --- drivers/ata/pata_via.c | 4 +++- include/linux/pci_ids.h | 1 + 2 files changed, 4 insertions(+), 1 deletion(-) (limited to 'include/linux') diff --git a/drivers/ata/pata_via.c b/drivers/ata/pata_via.c index 79a6c9a0b721..ba556d3e6963 100644 --- a/drivers/ata/pata_via.c +++ b/drivers/ata/pata_via.c @@ -110,7 +110,8 @@ static const struct via_isa_bridge { { "vt8237s", PCI_DEVICE_ID_VIA_8237S, 0x00, 0x2f, VIA_UDMA_133 | VIA_BAD_AST }, { "vt8251", PCI_DEVICE_ID_VIA_8251, 0x00, 0x2f, VIA_UDMA_133 | VIA_BAD_AST }, { "cx700", PCI_DEVICE_ID_VIA_CX700, 0x00, 0x2f, VIA_UDMA_133 | VIA_BAD_AST | VIA_SATA_PATA }, - { "vt6410", PCI_DEVICE_ID_VIA_6410, 0x00, 0x2f, VIA_UDMA_133 | VIA_BAD_AST | VIA_NO_ENABLES}, + { "vt6410", PCI_DEVICE_ID_VIA_6410, 0x00, 0x2f, VIA_UDMA_133 | VIA_BAD_AST | VIA_NO_ENABLES }, + { "vt6415", PCI_DEVICE_ID_VIA_6415, 0x00, 0x2f, VIA_UDMA_133 | VIA_BAD_AST | VIA_NO_ENABLES }, { "vt8237a", PCI_DEVICE_ID_VIA_8237A, 0x00, 0x2f, VIA_UDMA_133 | VIA_BAD_AST }, { "vt8237", PCI_DEVICE_ID_VIA_8237, 0x00, 0x2f, VIA_UDMA_133 | VIA_BAD_AST }, { "vt8235", PCI_DEVICE_ID_VIA_8235, 0x00, 0x2f, VIA_UDMA_133 | VIA_BAD_AST }, @@ -593,6 +594,7 @@ static int via_reinit_one(struct pci_dev *pdev) #endif static const struct pci_device_id via[] = { + { PCI_VDEVICE(VIA, 0x0415), }, { PCI_VDEVICE(VIA, 0x0571), }, { PCI_VDEVICE(VIA, 0x0581), }, { PCI_VDEVICE(VIA, 0x1571), }, diff --git a/include/linux/pci_ids.h b/include/linux/pci_ids.h index 52a9fe08451c..918391b4b109 100644 --- a/include/linux/pci_ids.h +++ b/include/linux/pci_ids.h @@ -1312,6 +1312,7 @@ #define PCI_DEVICE_ID_VIA_VT3351 0x0351 #define PCI_DEVICE_ID_VIA_VT3364 0x0364 #define PCI_DEVICE_ID_VIA_8371_0 0x0391 +#define PCI_DEVICE_ID_VIA_6415 0x0415 #define PCI_DEVICE_ID_VIA_8501_0 0x0501 #define PCI_DEVICE_ID_VIA_82C561 0x0561 #define PCI_DEVICE_ID_VIA_82C586_1 0x0571 -- cgit v1.2.3-71-gd317 From 93dbb393503d53cd226e5e1f0088fe8f4dbaa2b8 Mon Sep 17 00:00:00 2001 From: Jens Axboe Date: Mon, 16 Feb 2009 10:25:40 +0100 Subject: block: fix bad definition of BIO_RW_SYNC We can't OR shift values, so get rid of BIO_RW_SYNC and use BIO_RW_SYNCIO and BIO_RW_UNPLUG explicitly. This brings back the behaviour from before 213d9417fec62ef4c3675621b9364a667954d4dd. Signed-off-by: Jens Axboe --- block/blktrace.c | 2 +- drivers/md/dm-io.c | 2 +- drivers/md/dm-kcopyd.c | 2 +- drivers/md/md.c | 4 ++-- include/linux/bio.h | 2 -- include/linux/blktrace_api.h | 1 + include/linux/fs.h | 6 +++--- kernel/power/swap.c | 5 +++-- mm/page_io.c | 2 +- 9 files changed, 13 insertions(+), 13 deletions(-) (limited to 'include/linux') diff --git a/block/blktrace.c b/block/blktrace.c index 39cc3bfe56e4..7cf9d1ff45a0 100644 --- a/block/blktrace.c +++ b/block/blktrace.c @@ -142,7 +142,7 @@ static void __blk_add_trace(struct blk_trace *bt, sector_t sector, int bytes, what |= ddir_act[rw & WRITE]; what |= MASK_TC_BIT(rw, BARRIER); - what |= MASK_TC_BIT(rw, SYNC); + what |= MASK_TC_BIT(rw, SYNCIO); what |= MASK_TC_BIT(rw, AHEAD); what |= MASK_TC_BIT(rw, META); what |= MASK_TC_BIT(rw, DISCARD); diff --git a/drivers/md/dm-io.c b/drivers/md/dm-io.c index a34338567a2a..f14813be4eff 100644 --- a/drivers/md/dm-io.c +++ b/drivers/md/dm-io.c @@ -328,7 +328,7 @@ static void dispatch_io(int rw, unsigned int num_regions, struct dpages old_pages = *dp; if (sync) - rw |= (1 << BIO_RW_SYNC); + rw |= (1 << BIO_RW_SYNCIO) | (1 << BIO_RW_UNPLUG); /* * For multiple regions we need to be careful to rewind diff --git a/drivers/md/dm-kcopyd.c b/drivers/md/dm-kcopyd.c index 3073618269ea..0a225da21272 100644 --- a/drivers/md/dm-kcopyd.c +++ b/drivers/md/dm-kcopyd.c @@ -344,7 +344,7 @@ static int run_io_job(struct kcopyd_job *job) { int r; struct dm_io_request io_req = { - .bi_rw = job->rw | (1 << BIO_RW_SYNC), + .bi_rw = job->rw | (1 << BIO_RW_SYNCIO) | (1 << BIO_RW_UNPLUG), .mem.type = DM_IO_PAGE_LIST, .mem.ptr.pl = job->pages, .mem.offset = job->offset, diff --git a/drivers/md/md.c b/drivers/md/md.c index 4495104f6c9f..03b4cd0a6344 100644 --- a/drivers/md/md.c +++ b/drivers/md/md.c @@ -474,7 +474,7 @@ void md_super_write(mddev_t *mddev, mdk_rdev_t *rdev, * causes ENOTSUPP, we allocate a spare bio... */ struct bio *bio = bio_alloc(GFP_NOIO, 1); - int rw = (1<bi_bdev = rdev->bdev; bio->bi_sector = sector; @@ -531,7 +531,7 @@ int sync_page_io(struct block_device *bdev, sector_t sector, int size, struct completion event; int ret; - rw |= (1 << BIO_RW_SYNC); + rw |= (1 << BIO_RW_SYNCIO) | (1 << BIO_RW_UNPLUG); bio->bi_bdev = bdev; bio->bi_sector = sector; diff --git a/include/linux/bio.h b/include/linux/bio.h index 2aa283ab062b..1b16108a5417 100644 --- a/include/linux/bio.h +++ b/include/linux/bio.h @@ -171,8 +171,6 @@ struct bio { #define BIO_RW_FAILFAST_TRANSPORT 8 #define BIO_RW_FAILFAST_DRIVER 9 -#define BIO_RW_SYNC (BIO_RW_SYNCIO | BIO_RW_UNPLUG) - #define bio_rw_flagged(bio, flag) ((bio)->bi_rw & (1 << (flag))) /* diff --git a/include/linux/blktrace_api.h b/include/linux/blktrace_api.h index 25379cba2370..6e915878e88c 100644 --- a/include/linux/blktrace_api.h +++ b/include/linux/blktrace_api.h @@ -15,6 +15,7 @@ enum blktrace_cat { BLK_TC_WRITE = 1 << 1, /* writes */ BLK_TC_BARRIER = 1 << 2, /* barrier */ BLK_TC_SYNC = 1 << 3, /* sync IO */ + BLK_TC_SYNCIO = BLK_TC_SYNC, BLK_TC_QUEUE = 1 << 4, /* queueing/merging */ BLK_TC_REQUEUE = 1 << 5, /* requeueing */ BLK_TC_ISSUE = 1 << 6, /* issue */ diff --git a/include/linux/fs.h b/include/linux/fs.h index 6022f44043f2..67857dc16365 100644 --- a/include/linux/fs.h +++ b/include/linux/fs.h @@ -87,10 +87,10 @@ struct inodes_stat_t { #define WRITE 1 #define READA 2 /* read-ahead - don't block if no resources */ #define SWRITE 3 /* for ll_rw_block() - wait for buffer lock */ -#define READ_SYNC (READ | (1 << BIO_RW_SYNC)) +#define READ_SYNC (READ | (1 << BIO_RW_SYNCIO) | (1 << BIO_RW_UNPLUG)) #define READ_META (READ | (1 << BIO_RW_META)) -#define WRITE_SYNC (WRITE | (1 << BIO_RW_SYNC)) -#define SWRITE_SYNC (SWRITE | (1 << BIO_RW_SYNC)) +#define WRITE_SYNC (WRITE | (1 << BIO_RW_SYNCIO) | (1 << BIO_RW_UNPLUG)) +#define SWRITE_SYNC (SWRITE | (1 << BIO_RW_SYNCIO) | (1 << BIO_RW_UNPLUG)) #define WRITE_BARRIER (WRITE | (1 << BIO_RW_BARRIER)) #define DISCARD_NOBARRIER (1 << BIO_RW_DISCARD) #define DISCARD_BARRIER ((1 << BIO_RW_DISCARD) | (1 << BIO_RW_BARRIER)) diff --git a/kernel/power/swap.c b/kernel/power/swap.c index 6da14358537c..505f319e489c 100644 --- a/kernel/power/swap.c +++ b/kernel/power/swap.c @@ -60,6 +60,7 @@ static struct block_device *resume_bdev; static int submit(int rw, pgoff_t page_off, struct page *page, struct bio **bio_chain) { + const int bio_rw = rw | (1 << BIO_RW_SYNCIO) | (1 << BIO_RW_UNPLUG); struct bio *bio; bio = bio_alloc(__GFP_WAIT | __GFP_HIGH, 1); @@ -80,7 +81,7 @@ static int submit(int rw, pgoff_t page_off, struct page *page, bio_get(bio); if (bio_chain == NULL) { - submit_bio(rw | (1 << BIO_RW_SYNC), bio); + submit_bio(bio_rw, bio); wait_on_page_locked(page); if (rw == READ) bio_set_pages_dirty(bio); @@ -90,7 +91,7 @@ static int submit(int rw, pgoff_t page_off, struct page *page, get_page(page); /* These pages are freed later */ bio->bi_private = *bio_chain; *bio_chain = bio; - submit_bio(rw | (1 << BIO_RW_SYNC), bio); + submit_bio(bio_rw, bio); } return 0; } diff --git a/mm/page_io.c b/mm/page_io.c index dc6ce0afbded..3023c475e041 100644 --- a/mm/page_io.c +++ b/mm/page_io.c @@ -111,7 +111,7 @@ int swap_writepage(struct page *page, struct writeback_control *wbc) goto out; } if (wbc->sync_mode == WB_SYNC_ALL) - rw |= (1 << BIO_RW_SYNC); + rw |= (1 << BIO_RW_SYNCIO) | (1 << BIO_RW_UNPLUG); count_vm_event(PSWPOUT); set_page_writeback(page); unlock_page(page); -- cgit v1.2.3-71-gd317 From c296861291669f305deef19b78042330d7135017 Mon Sep 17 00:00:00 2001 From: Benjamin Herrenschmidt Date: Wed, 18 Feb 2009 14:48:12 -0800 Subject: vmalloc: add __get_vm_area_caller() We have get_vm_area_caller() and __get_vm_area() but not __get_vm_area_caller() On powerpc, I use __get_vm_area() to separate the ranges of addresses given to vmalloc vs. ioremap (various good reasons for that) so in order to be able to implement the new caller tracking in /proc/vmallocinfo, I need a "_caller" variant of it. (akpm: needed for ongoing powerpc development, so merge it early) [akpm@linux-foundation.org: coding-style fixes] Signed-off-by: Benjamin Herrenschmidt Reviewed-by: KOSAKI Motohiro Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/vmalloc.h | 4 ++++ mm/vmalloc.c | 8 ++++++++ 2 files changed, 12 insertions(+) (limited to 'include/linux') diff --git a/include/linux/vmalloc.h b/include/linux/vmalloc.h index 506e7620a986..9c0890c7a06a 100644 --- a/include/linux/vmalloc.h +++ b/include/linux/vmalloc.h @@ -84,6 +84,10 @@ extern struct vm_struct *get_vm_area_caller(unsigned long size, unsigned long flags, void *caller); extern struct vm_struct *__get_vm_area(unsigned long size, unsigned long flags, unsigned long start, unsigned long end); +extern struct vm_struct *__get_vm_area_caller(unsigned long size, + unsigned long flags, + unsigned long start, unsigned long end, + void *caller); extern struct vm_struct *get_vm_area_node(unsigned long size, unsigned long flags, int node, gfp_t gfp_mask); diff --git a/mm/vmalloc.c b/mm/vmalloc.c index 75f49d312e8c..4dd2636d0b92 100644 --- a/mm/vmalloc.c +++ b/mm/vmalloc.c @@ -1106,6 +1106,14 @@ struct vm_struct *__get_vm_area(unsigned long size, unsigned long flags, } EXPORT_SYMBOL_GPL(__get_vm_area); +struct vm_struct *__get_vm_area_caller(unsigned long size, unsigned long flags, + unsigned long start, unsigned long end, + void *caller) +{ + return __get_vm_area_node(size, flags, start, end, -1, GFP_KERNEL, + caller); +} + /** * get_vm_area - reserve a contiguous kernel virtual area * @size: size of the area -- cgit v1.2.3-71-gd317 From 55ec82176eca52e4e0530a82a0eb59160a1a95a1 Mon Sep 17 00:00:00 2001 From: Paul Turner Date: Wed, 18 Feb 2009 14:48:15 -0800 Subject: vfs: separate FMODE_PREAD/FMODE_PWRITE into separate flags Separate FMODE_PREAD and FMODE_PWRITE into separate flags to reflect the reality that the read and write paths may have independent restrictions. A git grep verifies that these flags are always cleared together so this new behavior will only apply to interfaces that change to clear flags individually. This is required for "seq_file: properly cope with pread", a post-2.6.25 regression fix. [akpm@linux-foundation.org: add comment] Signed-off-by: Paul Turner Cc: Eric Biederman Cc: Alexey Dobriyan Cc: Al Viro Cc: [2.6.25.x, 2.6.26.x, 2.6.27.x, 2.6.28.x] Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/fs.h | 18 ++++++++++++------ 1 file changed, 12 insertions(+), 6 deletions(-) (limited to 'include/linux') diff --git a/include/linux/fs.h b/include/linux/fs.h index 6022f44043f2..5852bd6afbe4 100644 --- a/include/linux/fs.h +++ b/include/linux/fs.h @@ -54,24 +54,30 @@ struct inodes_stat_t { #define MAY_ACCESS 16 #define MAY_OPEN 32 +/* + * flags in file.f_mode. Note that FMODE_READ and FMODE_WRITE must correspond + * to O_WRONLY and O_RDWR via the strange trick in __dentry_open() + */ + /* file is open for reading */ #define FMODE_READ ((__force fmode_t)1) /* file is open for writing */ #define FMODE_WRITE ((__force fmode_t)2) /* file is seekable */ #define FMODE_LSEEK ((__force fmode_t)4) -/* file can be accessed using pread/pwrite */ +/* file can be accessed using pread */ #define FMODE_PREAD ((__force fmode_t)8) -#define FMODE_PWRITE FMODE_PREAD /* These go hand in hand */ +/* file can be accessed using pwrite */ +#define FMODE_PWRITE ((__force fmode_t)16) /* File is opened for execution with sys_execve / sys_uselib */ -#define FMODE_EXEC ((__force fmode_t)16) +#define FMODE_EXEC ((__force fmode_t)32) /* File is opened with O_NDELAY (only set for block devices) */ -#define FMODE_NDELAY ((__force fmode_t)32) +#define FMODE_NDELAY ((__force fmode_t)64) /* File is opened with O_EXCL (only set for block devices) */ -#define FMODE_EXCL ((__force fmode_t)64) +#define FMODE_EXCL ((__force fmode_t)128) /* File is opened using open(.., 3, ..) and is writeable only for ioctls (specialy hack for floppy.c) */ -#define FMODE_WRITE_IOCTL ((__force fmode_t)128) +#define FMODE_WRITE_IOCTL ((__force fmode_t)256) /* * Don't update ctime and mtime. -- cgit v1.2.3-71-gd317 From 8f19d472935c83d823fa4cf02bcc0a7b9952db30 Mon Sep 17 00:00:00 2001 From: Eric Biederman Date: Wed, 18 Feb 2009 14:48:16 -0800 Subject: seq_file: properly cope with pread Currently seq_read assumes that the offset passed to it is always the offset it passed to user space. In the case pread this assumption is broken and we do the wrong thing when presented with pread. To solve this I introduce an offset cache inside of struct seq_file so we know where our logical file position is. Then in seq_read if we try to read from another offset we reset our data structures and attempt to go to the offset user space wanted. [akpm@linux-foundation.org: restore FMODE_PWRITE] [pjt@google.com: seq_open needs its fmode opened up to take advantage of this] Signed-off-by: Eric Biederman Cc: Alexey Dobriyan Cc: Al Viro Cc: Paul Turner Cc: [2.6.25.x, 2.6.26.x, 2.6.27.x, 2.6.28.x] Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- fs/seq_file.c | 36 ++++++++++++++++++++++++++++++++---- include/linux/seq_file.h | 1 + 2 files changed, 33 insertions(+), 4 deletions(-) (limited to 'include/linux') diff --git a/fs/seq_file.c b/fs/seq_file.c index 5267098532bf..a1a4cfe19210 100644 --- a/fs/seq_file.c +++ b/fs/seq_file.c @@ -48,8 +48,16 @@ int seq_open(struct file *file, const struct seq_operations *op) */ file->f_version = 0; - /* SEQ files support lseek, but not pread/pwrite */ - file->f_mode &= ~(FMODE_PREAD | FMODE_PWRITE); + /* + * seq_files support lseek() and pread(). They do not implement + * write() at all, but we clear FMODE_PWRITE here for historical + * reasons. + * + * If a client of seq_files a) implements file.write() and b) wishes to + * support pwrite() then that client will need to implement its own + * file.open() which calls seq_open() and then sets FMODE_PWRITE. + */ + file->f_mode &= ~FMODE_PWRITE; return 0; } EXPORT_SYMBOL(seq_open); @@ -131,6 +139,22 @@ ssize_t seq_read(struct file *file, char __user *buf, size_t size, loff_t *ppos) int err = 0; mutex_lock(&m->lock); + + /* Don't assume *ppos is where we left it */ + if (unlikely(*ppos != m->read_pos)) { + m->read_pos = *ppos; + while ((err = traverse(m, *ppos)) == -EAGAIN) + ; + if (err) { + /* With prejudice... */ + m->read_pos = 0; + m->version = 0; + m->index = 0; + m->count = 0; + goto Done; + } + } + /* * seq_file->op->..m_start/m_stop/m_next may do special actions * or optimisations based on the file->f_version, so we want to @@ -230,8 +254,10 @@ Fill: Done: if (!copied) copied = err; - else + else { *ppos += copied; + m->read_pos += copied; + } file->f_version = m->version; mutex_unlock(&m->lock); return copied; @@ -266,16 +292,18 @@ loff_t seq_lseek(struct file *file, loff_t offset, int origin) if (offset < 0) break; retval = offset; - if (offset != file->f_pos) { + if (offset != m->read_pos) { while ((retval=traverse(m, offset)) == -EAGAIN) ; if (retval) { /* with extreme prejudice... */ file->f_pos = 0; + m->read_pos = 0; m->version = 0; m->index = 0; m->count = 0; } else { + m->read_pos = offset; retval = file->f_pos = offset; } } diff --git a/include/linux/seq_file.h b/include/linux/seq_file.h index 40ea5058c2ec..f616f31576d7 100644 --- a/include/linux/seq_file.h +++ b/include/linux/seq_file.h @@ -19,6 +19,7 @@ struct seq_file { size_t from; size_t count; loff_t index; + loff_t read_pos; u64 version; struct mutex lock; const struct seq_operations *op; -- cgit v1.2.3-71-gd317 From 610d18f4128ebbd88845d0fc60cce67b49af881e Mon Sep 17 00:00:00 2001 From: Davide Libenzi Date: Wed, 18 Feb 2009 14:48:18 -0800 Subject: timerfd: add flags check As requested by Michael, add a missing check for valid flags in timerfd_settime(), and make it return EINVAL in case some extra bits are set. Michael said: If this is to be any use to userland apps that want to check flag support (perhaps it is too late already), then the sooner we get it into the kernel the better: 2.6.29 would be good; earlier stables as well would be even better. [akpm@linux-foundation.org: remove unused TFD_FLAGS_SET] Acked-by: Michael Kerrisk Signed-off-by: Davide Libenzi Cc: [2.6.27.x, 2.6.28.x] Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- fs/timerfd.c | 12 ++++++------ include/linux/timerfd.h | 16 ++++++++++++---- 2 files changed, 18 insertions(+), 10 deletions(-) (limited to 'include/linux') diff --git a/fs/timerfd.c b/fs/timerfd.c index 6a123b8ff3f5..b042bd7034b1 100644 --- a/fs/timerfd.c +++ b/fs/timerfd.c @@ -186,10 +186,9 @@ SYSCALL_DEFINE2(timerfd_create, int, clockid, int, flags) BUILD_BUG_ON(TFD_CLOEXEC != O_CLOEXEC); BUILD_BUG_ON(TFD_NONBLOCK != O_NONBLOCK); - if (flags & ~(TFD_CLOEXEC | TFD_NONBLOCK)) - return -EINVAL; - if (clockid != CLOCK_MONOTONIC && - clockid != CLOCK_REALTIME) + if ((flags & ~TFD_CREATE_FLAGS) || + (clockid != CLOCK_MONOTONIC && + clockid != CLOCK_REALTIME)) return -EINVAL; ctx = kzalloc(sizeof(*ctx), GFP_KERNEL); @@ -201,7 +200,7 @@ SYSCALL_DEFINE2(timerfd_create, int, clockid, int, flags) hrtimer_init(&ctx->tmr, clockid, HRTIMER_MODE_ABS); ufd = anon_inode_getfd("[timerfd]", &timerfd_fops, ctx, - flags & (O_CLOEXEC | O_NONBLOCK)); + flags & TFD_SHARED_FCNTL_FLAGS); if (ufd < 0) kfree(ctx); @@ -219,7 +218,8 @@ SYSCALL_DEFINE4(timerfd_settime, int, ufd, int, flags, if (copy_from_user(&ktmr, utmr, sizeof(ktmr))) return -EFAULT; - if (!timespec_valid(&ktmr.it_value) || + if ((flags & ~TFD_SETTIME_FLAGS) || + !timespec_valid(&ktmr.it_value) || !timespec_valid(&ktmr.it_interval)) return -EINVAL; diff --git a/include/linux/timerfd.h b/include/linux/timerfd.h index 86cb0501d3e2..2d0792983f8c 100644 --- a/include/linux/timerfd.h +++ b/include/linux/timerfd.h @@ -11,13 +11,21 @@ /* For O_CLOEXEC and O_NONBLOCK */ #include -/* Flags for timerfd_settime. */ +/* + * CAREFUL: Check include/asm-generic/fcntl.h when defining + * new flags, since they might collide with O_* ones. We want + * to re-use O_* flags that couldn't possibly have a meaning + * from eventfd, in order to leave a free define-space for + * shared O_* flags. + */ #define TFD_TIMER_ABSTIME (1 << 0) - -/* Flags for timerfd_create. */ #define TFD_CLOEXEC O_CLOEXEC #define TFD_NONBLOCK O_NONBLOCK +#define TFD_SHARED_FCNTL_FLAGS (TFD_CLOEXEC | TFD_NONBLOCK) +/* Flags for timerfd_create. */ +#define TFD_CREATE_FLAGS TFD_SHARED_FCNTL_FLAGS +/* Flags for timerfd_settime. */ +#define TFD_SETTIME_FLAGS TFD_TIMER_ABSTIME #endif /* _LINUX_TIMERFD_H */ - -- cgit v1.2.3-71-gd317 From 1cf6e7d83bf334cc5916137862c920a97aabc018 Mon Sep 17 00:00:00 2001 From: Nick Piggin Date: Wed, 18 Feb 2009 14:48:18 -0800 Subject: mm: task dirty accounting fix YAMAMOTO-san noticed that task_dirty_inc doesn't seem to be called properly for cases where set_page_dirty is not used to dirty a page (eg. mark_buffer_dirty). Additionally, there is some inconsistency about when task_dirty_inc is called. It is used for dirty balancing, however it even gets called for __set_page_dirty_no_writeback. So rather than increment it in a set_page_dirty wrapper, move it down to exactly where the dirty page accounting stats are incremented. Cc: YAMAMOTO Takashi Signed-off-by: Nick Piggin Acked-by: Peter Zijlstra Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- fs/buffer.c | 1 + include/linux/mm.h | 1 + mm/page-writeback.c | 13 +++---------- 3 files changed, 5 insertions(+), 10 deletions(-) (limited to 'include/linux') diff --git a/fs/buffer.c b/fs/buffer.c index 665d446b25bc..ff4d1cdd779b 100644 --- a/fs/buffer.c +++ b/fs/buffer.c @@ -777,6 +777,7 @@ static int __set_page_dirty(struct page *page, __inc_zone_page_state(page, NR_FILE_DIRTY); __inc_bdi_stat(mapping->backing_dev_info, BDI_RECLAIMABLE); + task_dirty_inc(current); task_io_account_write(PAGE_CACHE_SIZE); } radix_tree_tag_set(&mapping->page_tree, diff --git a/include/linux/mm.h b/include/linux/mm.h index 7dc04ff5ab89..10074212a35b 100644 --- a/include/linux/mm.h +++ b/include/linux/mm.h @@ -1159,6 +1159,7 @@ extern int filemap_fault(struct vm_area_struct *, struct vm_fault *); /* mm/page-writeback.c */ int write_one_page(struct page *page, int wait); +void task_dirty_inc(struct task_struct *tsk); /* readahead.c */ #define VM_MAX_READAHEAD 128 /* kbytes */ diff --git a/mm/page-writeback.c b/mm/page-writeback.c index 3c84128596ba..74dc57c74349 100644 --- a/mm/page-writeback.c +++ b/mm/page-writeback.c @@ -240,7 +240,7 @@ void bdi_writeout_inc(struct backing_dev_info *bdi) } EXPORT_SYMBOL_GPL(bdi_writeout_inc); -static inline void task_dirty_inc(struct task_struct *tsk) +void task_dirty_inc(struct task_struct *tsk) { prop_inc_single(&vm_dirties, &tsk->dirties); } @@ -1230,6 +1230,7 @@ int __set_page_dirty_nobuffers(struct page *page) __inc_zone_page_state(page, NR_FILE_DIRTY); __inc_bdi_stat(mapping->backing_dev_info, BDI_RECLAIMABLE); + task_dirty_inc(current); task_io_account_write(PAGE_CACHE_SIZE); } radix_tree_tag_set(&mapping->page_tree, @@ -1262,7 +1263,7 @@ EXPORT_SYMBOL(redirty_page_for_writepage); * If the mapping doesn't provide a set_page_dirty a_op, then * just fall through and assume that it wants buffer_heads. */ -static int __set_page_dirty(struct page *page) +int set_page_dirty(struct page *page) { struct address_space *mapping = page_mapping(page); @@ -1280,14 +1281,6 @@ static int __set_page_dirty(struct page *page) } return 0; } - -int set_page_dirty(struct page *page) -{ - int ret = __set_page_dirty(page); - if (ret) - task_dirty_inc(current); - return ret; -} EXPORT_SYMBOL(set_page_dirty); /* -- cgit v1.2.3-71-gd317 From 287d859222e0adbc67666a6154aaf42d7d5bbb54 Mon Sep 17 00:00:00 2001 From: Dan Williams Date: Wed, 18 Feb 2009 14:48:26 -0800 Subject: atmel-mci: fix initialization of dma slave data The conversion of atmel-mci to dma_request_channel missed the initialization of the channel dma_slave information. The filter_fn passed to dma_request_channel is responsible for initializing the channel's private data. This implementation has the additional benefit of enabling a generic client-channel data passing mechanism. Reviewed-by: Atsushi Nemoto Signed-off-by: Dan Williams Acked-by: Haavard Skinnemoen Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- drivers/dma/dmaengine.c | 2 ++ drivers/dma/dw_dmac.c | 5 ++--- drivers/dma/dw_dmac_regs.h | 2 -- drivers/mmc/host/atmel-mci.c | 5 +++-- include/linux/dmaengine.h | 2 ++ 5 files changed, 9 insertions(+), 7 deletions(-) (limited to 'include/linux') diff --git a/drivers/dma/dmaengine.c b/drivers/dma/dmaengine.c index a58993011edb..280a9d263eb3 100644 --- a/drivers/dma/dmaengine.c +++ b/drivers/dma/dmaengine.c @@ -518,6 +518,7 @@ struct dma_chan *__dma_request_channel(dma_cap_mask_t *mask, dma_filter_fn fn, v dma_chan_name(chan), err); else break; + chan->private = NULL; chan = NULL; } } @@ -536,6 +537,7 @@ void dma_release_channel(struct dma_chan *chan) WARN_ONCE(chan->client_count != 1, "chan reference count %d != 1\n", chan->client_count); dma_chan_put(chan); + chan->private = NULL; mutex_unlock(&dma_list_mutex); } EXPORT_SYMBOL_GPL(dma_release_channel); diff --git a/drivers/dma/dw_dmac.c b/drivers/dma/dw_dmac.c index 6b702cc46b3d..a97c07eef7ec 100644 --- a/drivers/dma/dw_dmac.c +++ b/drivers/dma/dw_dmac.c @@ -560,7 +560,7 @@ dwc_prep_slave_sg(struct dma_chan *chan, struct scatterlist *sgl, unsigned long flags) { struct dw_dma_chan *dwc = to_dw_dma_chan(chan); - struct dw_dma_slave *dws = dwc->dws; + struct dw_dma_slave *dws = chan->private; struct dw_desc *prev; struct dw_desc *first; u32 ctllo; @@ -790,7 +790,7 @@ static int dwc_alloc_chan_resources(struct dma_chan *chan) cfghi = DWC_CFGH_FIFO_MODE; cfglo = 0; - dws = dwc->dws; + dws = chan->private; if (dws) { /* * We need controller-specific data to set up slave @@ -866,7 +866,6 @@ static void dwc_free_chan_resources(struct dma_chan *chan) spin_lock_bh(&dwc->lock); list_splice_init(&dwc->free_list, &list); dwc->descs_allocated = 0; - dwc->dws = NULL; /* Disable interrupts */ channel_clear_bit(dw, MASK.XFER, dwc->mask); diff --git a/drivers/dma/dw_dmac_regs.h b/drivers/dma/dw_dmac_regs.h index 00fdd187bb0c..b252b202c5cf 100644 --- a/drivers/dma/dw_dmac_regs.h +++ b/drivers/dma/dw_dmac_regs.h @@ -139,8 +139,6 @@ struct dw_dma_chan { struct list_head queue; struct list_head free_list; - struct dw_dma_slave *dws; - unsigned int descs_allocated; }; diff --git a/drivers/mmc/host/atmel-mci.c b/drivers/mmc/host/atmel-mci.c index 76bfe16c09b1..2b1196e6142c 100644 --- a/drivers/mmc/host/atmel-mci.c +++ b/drivers/mmc/host/atmel-mci.c @@ -1548,9 +1548,10 @@ static bool filter(struct dma_chan *chan, void *slave) { struct dw_dma_slave *dws = slave; - if (dws->dma_dev == chan->device->dev) + if (dws->dma_dev == chan->device->dev) { + chan->private = dws; return true; - else + } else return false; } #endif diff --git a/include/linux/dmaengine.h b/include/linux/dmaengine.h index 3e68469c1885..f0413845f20e 100644 --- a/include/linux/dmaengine.h +++ b/include/linux/dmaengine.h @@ -121,6 +121,7 @@ struct dma_chan_percpu { * @local: per-cpu pointer to a struct dma_chan_percpu * @client-count: how many clients are using this channel * @table_count: number of appearances in the mem-to-mem allocation table + * @private: private data for certain client-channel associations */ struct dma_chan { struct dma_device *device; @@ -134,6 +135,7 @@ struct dma_chan { struct dma_chan_percpu *local; int client_count; int table_count; + void *private; }; /** -- cgit v1.2.3-71-gd317 From f2dbcfa738368c8a40d4a5f0b65dc9879577cb21 Mon Sep 17 00:00:00 2001 From: KAMEZAWA Hiroyuki Date: Wed, 18 Feb 2009 14:48:32 -0800 Subject: mm: clean up for early_pfn_to_nid() What's happening is that the assertion in mm/page_alloc.c:move_freepages() is triggering: BUG_ON(page_zone(start_page) != page_zone(end_page)); Once I knew this is what was happening, I added some annotations: if (unlikely(page_zone(start_page) != page_zone(end_page))) { printk(KERN_ERR "move_freepages: Bogus zones: " "start_page[%p] end_page[%p] zone[%p]\n", start_page, end_page, zone); printk(KERN_ERR "move_freepages: " "start_zone[%p] end_zone[%p]\n", page_zone(start_page), page_zone(end_page)); printk(KERN_ERR "move_freepages: " "start_pfn[0x%lx] end_pfn[0x%lx]\n", page_to_pfn(start_page), page_to_pfn(end_page)); printk(KERN_ERR "move_freepages: " "start_nid[%d] end_nid[%d]\n", page_to_nid(start_page), page_to_nid(end_page)); ... And here's what I got: move_freepages: Bogus zones: start_page[2207d0000] end_page[2207dffc0] zone[fffff8103effcb00] move_freepages: start_zone[fffff8103effcb00] end_zone[fffff8003fffeb00] move_freepages: start_pfn[0x81f600] end_pfn[0x81f7ff] move_freepages: start_nid[1] end_nid[0] My memory layout on this box is: [ 0.000000] Zone PFN ranges: [ 0.000000] Normal 0x00000000 -> 0x0081ff5d [ 0.000000] Movable zone start PFN for each node [ 0.000000] early_node_map[8] active PFN ranges [ 0.000000] 0: 0x00000000 -> 0x00020000 [ 0.000000] 1: 0x00800000 -> 0x0081f7ff [ 0.000000] 1: 0x0081f800 -> 0x0081fe50 [ 0.000000] 1: 0x0081fed1 -> 0x0081fed8 [ 0.000000] 1: 0x0081feda -> 0x0081fedb [ 0.000000] 1: 0x0081fedd -> 0x0081fee5 [ 0.000000] 1: 0x0081fee7 -> 0x0081ff51 [ 0.000000] 1: 0x0081ff59 -> 0x0081ff5d So it's a block move in that 0x81f600-->0x81f7ff region which triggers the problem. This patch: Declaration of early_pfn_to_nid() is scattered over per-arch include files, and it seems it's complicated to know when the declaration is used. I think it makes fix-for-memmap-init not easy. This patch moves all declaration to include/linux/mm.h After this, if !CONFIG_NODES_POPULATES_NODE_MAP && !CONFIG_HAVE_ARCH_EARLY_PFN_TO_NID -> Use static definition in include/linux/mm.h else if !CONFIG_HAVE_ARCH_EARLY_PFN_TO_NID -> Use generic definition in mm/page_alloc.c else -> per-arch back end function will be called. Signed-off-by: KAMEZAWA Hiroyuki Tested-by: KOSAKI Motohiro Reported-by: David Miller Cc: Mel Gorman Cc: Heiko Carstens Cc: [2.6.25.x, 2.6.26.x, 2.6.27.x, 2.6.28.x] Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- arch/ia64/include/asm/mmzone.h | 4 ---- arch/ia64/mm/numa.c | 2 +- arch/x86/include/asm/mmzone_32.h | 2 -- arch/x86/include/asm/mmzone_64.h | 2 -- arch/x86/mm/numa_64.c | 2 +- include/linux/mm.h | 19 ++++++++++++++++--- mm/page_alloc.c | 8 +++++++- 7 files changed, 25 insertions(+), 14 deletions(-) (limited to 'include/linux') diff --git a/arch/ia64/include/asm/mmzone.h b/arch/ia64/include/asm/mmzone.h index 34efe88eb849..f2ca32069b3f 100644 --- a/arch/ia64/include/asm/mmzone.h +++ b/arch/ia64/include/asm/mmzone.h @@ -31,10 +31,6 @@ static inline int pfn_to_nid(unsigned long pfn) #endif } -#ifdef CONFIG_HAVE_ARCH_EARLY_PFN_TO_NID -extern int early_pfn_to_nid(unsigned long pfn); -#endif - #ifdef CONFIG_IA64_DIG /* DIG systems are small */ # define MAX_PHYSNODE_ID 8 # define NR_NODE_MEMBLKS (MAX_NUMNODES * 8) diff --git a/arch/ia64/mm/numa.c b/arch/ia64/mm/numa.c index b73bf1838e57..5061c3fb6796 100644 --- a/arch/ia64/mm/numa.c +++ b/arch/ia64/mm/numa.c @@ -58,7 +58,7 @@ paddr_to_nid(unsigned long paddr) * SPARSEMEM to allocate the SPARSEMEM sectionmap on the NUMA node where * the section resides. */ -int early_pfn_to_nid(unsigned long pfn) +int __meminit __early_pfn_to_nid(unsigned long pfn) { int i, section = pfn >> PFN_SECTION_SHIFT, ssec, esec; diff --git a/arch/x86/include/asm/mmzone_32.h b/arch/x86/include/asm/mmzone_32.h index 07f1af494ca5..105fb90a0635 100644 --- a/arch/x86/include/asm/mmzone_32.h +++ b/arch/x86/include/asm/mmzone_32.h @@ -32,8 +32,6 @@ static inline void get_memcfg_numa(void) get_memcfg_numa_flat(); } -extern int early_pfn_to_nid(unsigned long pfn); - extern void resume_map_numa_kva(pgd_t *pgd); #else /* !CONFIG_NUMA */ diff --git a/arch/x86/include/asm/mmzone_64.h b/arch/x86/include/asm/mmzone_64.h index a5b3817d4b9e..a29f48c2a322 100644 --- a/arch/x86/include/asm/mmzone_64.h +++ b/arch/x86/include/asm/mmzone_64.h @@ -40,8 +40,6 @@ static inline __attribute__((pure)) int phys_to_nid(unsigned long addr) #define node_end_pfn(nid) (NODE_DATA(nid)->node_start_pfn + \ NODE_DATA(nid)->node_spanned_pages) -extern int early_pfn_to_nid(unsigned long pfn); - #ifdef CONFIG_NUMA_EMU #define FAKE_NODE_MIN_SIZE (64 * 1024 * 1024) #define FAKE_NODE_MIN_HASH_MASK (~(FAKE_NODE_MIN_SIZE - 1UL)) diff --git a/arch/x86/mm/numa_64.c b/arch/x86/mm/numa_64.c index 71a14f89f89e..f3516da035d1 100644 --- a/arch/x86/mm/numa_64.c +++ b/arch/x86/mm/numa_64.c @@ -145,7 +145,7 @@ int __init compute_hash_shift(struct bootnode *nodes, int numnodes, return shift; } -int early_pfn_to_nid(unsigned long pfn) +int __meminit __early_pfn_to_nid(unsigned long pfn) { return phys_to_nid(pfn << PAGE_SHIFT); } diff --git a/include/linux/mm.h b/include/linux/mm.h index 10074212a35b..065cdf8c09fb 100644 --- a/include/linux/mm.h +++ b/include/linux/mm.h @@ -1041,10 +1041,23 @@ extern void free_bootmem_with_active_regions(int nid, typedef int (*work_fn_t)(unsigned long, unsigned long, void *); extern void work_with_active_regions(int nid, work_fn_t work_fn, void *data); extern void sparse_memory_present_with_active_regions(int nid); -#ifndef CONFIG_HAVE_ARCH_EARLY_PFN_TO_NID -extern int early_pfn_to_nid(unsigned long pfn); -#endif /* CONFIG_HAVE_ARCH_EARLY_PFN_TO_NID */ #endif /* CONFIG_ARCH_POPULATES_NODE_MAP */ + +#if !defined(CONFIG_ARCH_POPULATES_NODE_MAP) && \ + !defined(CONFIG_HAVE_ARCH_EARLY_PFN_TO_NID) +static inline int __early_pfn_to_nid(unsigned long pfn) +{ + return 0; +} +#else +/* please see mm/page_alloc.c */ +extern int __meminit early_pfn_to_nid(unsigned long pfn); +#ifdef CONFIG_HAVE_ARCH_EARLY_PFN_TO_NID +/* there is a per-arch backend function. */ +extern int __meminit __early_pfn_to_nid(unsigned long pfn); +#endif /* CONFIG_HAVE_ARCH_EARLY_PFN_TO_NID */ +#endif + extern void set_dma_reserve(unsigned long new_dma_reserve); extern void memmap_init_zone(unsigned long, int, unsigned long, unsigned long, enum memmap_context); diff --git a/mm/page_alloc.c b/mm/page_alloc.c index 5675b3073854..c5dd74602efc 100644 --- a/mm/page_alloc.c +++ b/mm/page_alloc.c @@ -2989,7 +2989,7 @@ static int __meminit next_active_region_index_in_nid(int index, int nid) * was used and there are no special requirements, this is a convenient * alternative */ -int __meminit early_pfn_to_nid(unsigned long pfn) +int __meminit __early_pfn_to_nid(unsigned long pfn) { int i; @@ -3005,6 +3005,12 @@ int __meminit early_pfn_to_nid(unsigned long pfn) } #endif /* CONFIG_HAVE_ARCH_EARLY_PFN_TO_NID */ +int __meminit early_pfn_to_nid(unsigned long pfn) +{ + return __early_pfn_to_nid(pfn); +} + + /* Basic iterator support to walk early_node_map[] */ #define for_each_active_range_index_in_nid(i, nid) \ for (i = first_active_region_index_in_nid(nid); i != -1; \ -- cgit v1.2.3-71-gd317 From cc2559bccc72767cb446f79b071d96c30c26439b Mon Sep 17 00:00:00 2001 From: KAMEZAWA Hiroyuki Date: Wed, 18 Feb 2009 14:48:33 -0800 Subject: mm: fix memmap init for handling memory hole Now, early_pfn_in_nid(PFN, NID) may returns false if PFN is a hole. and memmap initialization was not done. This was a trouble for sparc boot. To fix this, the PFN should be initialized and marked as PG_reserved. This patch changes early_pfn_in_nid() return true if PFN is a hole. Signed-off-by: KAMEZAWA Hiroyuki Reported-by: David Miller Tested-by: KOSAKI Motohiro Cc: Mel Gorman Cc: Heiko Carstens Cc: [2.6.25.x, 2.6.26.x, 2.6.27.x, 2.6.28.x] Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- arch/ia64/mm/numa.c | 2 +- include/linux/mmzone.h | 2 +- mm/page_alloc.c | 23 ++++++++++++++++++++--- 3 files changed, 22 insertions(+), 5 deletions(-) (limited to 'include/linux') diff --git a/arch/ia64/mm/numa.c b/arch/ia64/mm/numa.c index 5061c3fb6796..3efea7d0a351 100644 --- a/arch/ia64/mm/numa.c +++ b/arch/ia64/mm/numa.c @@ -70,7 +70,7 @@ int __meminit __early_pfn_to_nid(unsigned long pfn) return node_memblk[i].nid; } - return 0; + return -1; } #ifdef CONFIG_MEMORY_HOTPLUG diff --git a/include/linux/mmzone.h b/include/linux/mmzone.h index 09c14e213b63..1aca6cebbb78 100644 --- a/include/linux/mmzone.h +++ b/include/linux/mmzone.h @@ -1071,7 +1071,7 @@ void sparse_init(void); #endif /* CONFIG_SPARSEMEM */ #ifdef CONFIG_NODES_SPAN_OTHER_NODES -#define early_pfn_in_nid(pfn, nid) (early_pfn_to_nid(pfn) == (nid)) +bool early_pfn_in_nid(unsigned long pfn, int nid); #else #define early_pfn_in_nid(pfn, nid) (1) #endif diff --git a/mm/page_alloc.c b/mm/page_alloc.c index c5dd74602efc..5c44ed49ca93 100644 --- a/mm/page_alloc.c +++ b/mm/page_alloc.c @@ -3000,16 +3000,33 @@ int __meminit __early_pfn_to_nid(unsigned long pfn) if (start_pfn <= pfn && pfn < end_pfn) return early_node_map[i].nid; } - - return 0; + /* This is a memory hole */ + return -1; } #endif /* CONFIG_HAVE_ARCH_EARLY_PFN_TO_NID */ int __meminit early_pfn_to_nid(unsigned long pfn) { - return __early_pfn_to_nid(pfn); + int nid; + + nid = __early_pfn_to_nid(pfn); + if (nid >= 0) + return nid; + /* just returns 0 */ + return 0; } +#ifdef CONFIG_NODES_SPAN_OTHER_NODES +bool __meminit early_pfn_in_nid(unsigned long pfn, int node) +{ + int nid; + + nid = __early_pfn_to_nid(pfn); + if (nid >= 0 && nid != node) + return false; + return true; +} +#endif /* Basic iterator support to walk early_node_map[] */ #define for_each_active_range_index_in_nid(i, nid) \ -- cgit v1.2.3-71-gd317 From ffa7525c13eb3db0fd19a3e1cffe2ce6f561f5f3 Mon Sep 17 00:00:00 2001 From: Adam Lackorzynski Date: Wed, 18 Feb 2009 14:48:34 -0800 Subject: jsm: additional device support I have a Digi Neo 8 PCI card (114f:00b1) Serial controller: Digi International Digi Neo 8 (rev 05) that works with the jsm driver after using the following patch. Signed-off-by: Adam Lackorzynski Cc: Scott H Kilau Cc: Wendy Xiong Acked-by: Alan Cox Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- drivers/serial/jsm/jsm_driver.c | 3 +++ include/linux/pci_ids.h | 1 + 2 files changed, 4 insertions(+) (limited to 'include/linux') diff --git a/drivers/serial/jsm/jsm_driver.c b/drivers/serial/jsm/jsm_driver.c index 92187e28608a..ac79cbe4c2cf 100644 --- a/drivers/serial/jsm/jsm_driver.c +++ b/drivers/serial/jsm/jsm_driver.c @@ -84,6 +84,8 @@ static int jsm_probe_one(struct pci_dev *pdev, const struct pci_device_id *ent) brd->pci_dev = pdev; if (pdev->device == PCIE_DEVICE_ID_NEO_4_IBM) brd->maxports = 4; + else if (pdev->device == PCI_DEVICE_ID_DIGI_NEO_8) + brd->maxports = 8; else brd->maxports = 2; @@ -212,6 +214,7 @@ static struct pci_device_id jsm_pci_tbl[] = { { PCI_DEVICE(PCI_VENDOR_ID_DIGI, PCI_DEVICE_ID_NEO_2RJ45), 0, 0, 2 }, { PCI_DEVICE(PCI_VENDOR_ID_DIGI, PCI_DEVICE_ID_NEO_2RJ45PRI), 0, 0, 3 }, { PCI_DEVICE(PCI_VENDOR_ID_DIGI, PCIE_DEVICE_ID_NEO_4_IBM), 0, 0, 4 }, + { PCI_DEVICE(PCI_VENDOR_ID_DIGI, PCI_DEVICE_ID_DIGI_NEO_8), 0, 0, 5 }, { 0, } }; MODULE_DEVICE_TABLE(pci, jsm_pci_tbl); diff --git a/include/linux/pci_ids.h b/include/linux/pci_ids.h index 918391b4b109..114b8192eab9 100644 --- a/include/linux/pci_ids.h +++ b/include/linux/pci_ids.h @@ -1445,6 +1445,7 @@ #define PCI_DEVICE_ID_DIGI_DF_M_E 0x0071 #define PCI_DEVICE_ID_DIGI_DF_M_IOM2_A 0x0072 #define PCI_DEVICE_ID_DIGI_DF_M_A 0x0073 +#define PCI_DEVICE_ID_DIGI_NEO_8 0x00B1 #define PCI_DEVICE_ID_NEO_2DB9 0x00C8 #define PCI_DEVICE_ID_NEO_2DB9PRI 0x00C9 #define PCI_DEVICE_ID_NEO_2RJ45 0x00CA -- cgit v1.2.3-71-gd317 From 97bef7dd05563807539122c488a5dd93ed327722 Mon Sep 17 00:00:00 2001 From: Bernhard Walle Date: Wed, 18 Feb 2009 14:48:40 -0800 Subject: Bernhard has moved Since I don't work for SUSE any more and the bwalle@suse.de address is invalid, correct it in the copyright headers and documentation. Signed-off-by: Bernhard Walle Cc: Greg KH Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- Documentation/ABI/testing/sysfs-firmware-memmap | 2 +- drivers/firmware/memmap.c | 2 +- include/linux/firmware-map.h | 2 +- 3 files changed, 3 insertions(+), 3 deletions(-) (limited to 'include/linux') diff --git a/Documentation/ABI/testing/sysfs-firmware-memmap b/Documentation/ABI/testing/sysfs-firmware-memmap index 0d99ee6ae02e..eca0d65087dc 100644 --- a/Documentation/ABI/testing/sysfs-firmware-memmap +++ b/Documentation/ABI/testing/sysfs-firmware-memmap @@ -1,6 +1,6 @@ What: /sys/firmware/memmap/ Date: June 2008 -Contact: Bernhard Walle +Contact: Bernhard Walle Description: On all platforms, the firmware provides a memory map which the kernel reads. The resources from that memory map are registered diff --git a/drivers/firmware/memmap.c b/drivers/firmware/memmap.c index 261b9aa3f248..05aa2d406ac6 100644 --- a/drivers/firmware/memmap.c +++ b/drivers/firmware/memmap.c @@ -1,7 +1,7 @@ /* * linux/drivers/firmware/memmap.c * Copyright (C) 2008 SUSE LINUX Products GmbH - * by Bernhard Walle + * by Bernhard Walle * * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License v2.0 as published by diff --git a/include/linux/firmware-map.h b/include/linux/firmware-map.h index 6e199c8dfacc..cca686b39123 100644 --- a/include/linux/firmware-map.h +++ b/include/linux/firmware-map.h @@ -1,7 +1,7 @@ /* * include/linux/firmware-map.h: * Copyright (C) 2008 SUSE LINUX Products GmbH - * by Bernhard Walle + * by Bernhard Walle * * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License v2.0 as published by -- cgit v1.2.3-71-gd317