From c12d2da56d0e07d230968ee2305aaa86b93a6832 Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Mon, 4 Apr 2016 10:24:58 +0200 Subject: mm/gup: Remove the macro overload API migration helpers from the get_user*() APIs The pkeys changes brought about a truly hideous set of macros in: cde70140fed8 ("mm/gup: Overload get_user_pages() functions") ... which macros are (ab-)using the fact that __VA_ARGS__ can be used to shift parameter positions in macro arguments without breaking the build and so can be used to call separate C functions depending on the number of arguments of the macro. This allowed easy migration of these 3 GUP APIs, as both these variants worked at the C level: old: ret = get_user_pages(current, current->mm, address, 1, 1, 0, &page, NULL); new: ret = get_user_pages(address, 1, 1, 0, &page, NULL); ... while we also generated a (functionally harmless but noticeable) build time warning if the old API was used. As there are over 300 uses of these APIs, this trick eased the migration of the API and avoided excessive migration pain in linux-next. Now, with its work done, get rid of all of that complication and ugliness: 3 files changed, 16 insertions(+), 140 deletions(-) ... where the linecount of the migration hack was further inflated by the fact that there are NOMMU variants of these GUP APIs as well. Much of the conversion was done in linux-next over the past couple of months, and Linus recently removed all remaining old API uses from the upstream tree in the following upstrea commit: cb107161df3c ("Convert straggling drivers to new six-argument get_user_pages()") There was one more old-API usage in mm/gup.c, in the CONFIG_HAVE_GENERIC_RCU_GUP code path that ARM, ARM64 and PowerPC uses. After this commit any old API usage will break the build. [ Also fixed a PowerPC/HAVE_GENERIC_RCU_GUP warning reported by Stephen Rothwell. ] Cc: Andrew Morton Cc: Dave Hansen Cc: Dave Hansen Cc: Linus Torvalds Cc: Peter Zijlstra Cc: Stephen Rothwell Cc: Thomas Gleixner Cc: linux-kernel@vger.kernel.org Cc: linux-mm@kvack.org Signed-off-by: Ingo Molnar --- include/linux/mm.h | 64 +++--------------------------------------------------- 1 file changed, 3 insertions(+), 61 deletions(-) (limited to 'include/linux') diff --git a/include/linux/mm.h b/include/linux/mm.h index ed6407d1b7b5..d6508a025a31 100644 --- a/include/linux/mm.h +++ b/include/linux/mm.h @@ -1250,78 +1250,20 @@ long get_user_pages_remote(struct task_struct *tsk, struct mm_struct *mm, unsigned long start, unsigned long nr_pages, int write, int force, struct page **pages, struct vm_area_struct **vmas); -long get_user_pages6(unsigned long start, unsigned long nr_pages, +long get_user_pages(unsigned long start, unsigned long nr_pages, int write, int force, struct page **pages, struct vm_area_struct **vmas); -long get_user_pages_locked6(unsigned long start, unsigned long nr_pages, +long get_user_pages_locked(unsigned long start, unsigned long nr_pages, int write, int force, struct page **pages, int *locked); long __get_user_pages_unlocked(struct task_struct *tsk, struct mm_struct *mm, unsigned long start, unsigned long nr_pages, int write, int force, struct page **pages, unsigned int gup_flags); -long get_user_pages_unlocked5(unsigned long start, unsigned long nr_pages, +long get_user_pages_unlocked(unsigned long start, unsigned long nr_pages, int write, int force, struct page **pages); int get_user_pages_fast(unsigned long start, int nr_pages, int write, struct page **pages); -/* suppress warnings from use in EXPORT_SYMBOL() */ -#ifndef __DISABLE_GUP_DEPRECATED -#define __gup_deprecated __deprecated -#else -#define __gup_deprecated -#endif -/* - * These macros provide backward-compatibility with the old - * get_user_pages() variants which took tsk/mm. These - * functions/macros provide both compile-time __deprecated so we - * can catch old-style use and not break the build. The actual - * functions also have WARN_ON()s to let us know at runtime if - * the get_user_pages() should have been the "remote" variant. - * - * These are hideous, but temporary. - * - * If you run into one of these __deprecated warnings, look - * at how you are calling get_user_pages(). If you are calling - * it with current/current->mm as the first two arguments, - * simply remove those arguments. The behavior will be the same - * as it is now. If you are calling it on another task, use - * get_user_pages_remote() instead. - * - * Any questions? Ask Dave Hansen - */ -long -__gup_deprecated -get_user_pages8(struct task_struct *tsk, struct mm_struct *mm, - unsigned long start, unsigned long nr_pages, - int write, int force, struct page **pages, - struct vm_area_struct **vmas); -#define GUP_MACRO(_1, _2, _3, _4, _5, _6, _7, _8, get_user_pages, ...) \ - get_user_pages -#define get_user_pages(...) GUP_MACRO(__VA_ARGS__, \ - get_user_pages8, x, \ - get_user_pages6, x, x, x, x, x)(__VA_ARGS__) - -__gup_deprecated -long get_user_pages_locked8(struct task_struct *tsk, struct mm_struct *mm, - unsigned long start, unsigned long nr_pages, - int write, int force, struct page **pages, - int *locked); -#define GUPL_MACRO(_1, _2, _3, _4, _5, _6, _7, _8, get_user_pages_locked, ...) \ - get_user_pages_locked -#define get_user_pages_locked(...) GUPL_MACRO(__VA_ARGS__, \ - get_user_pages_locked8, x, \ - get_user_pages_locked6, x, x, x, x)(__VA_ARGS__) - -__gup_deprecated -long get_user_pages_unlocked7(struct task_struct *tsk, struct mm_struct *mm, - unsigned long start, unsigned long nr_pages, - int write, int force, struct page **pages); -#define GUPU_MACRO(_1, _2, _3, _4, _5, _6, _7, get_user_pages_unlocked, ...) \ - get_user_pages_unlocked -#define get_user_pages_unlocked(...) GUPU_MACRO(__VA_ARGS__, \ - get_user_pages_unlocked7, x, \ - get_user_pages_unlocked5, x, x, x, x)(__VA_ARGS__) - /* Container for pinned pfns / pages */ struct frame_vector { unsigned int nr_allocated; /* Number of frames we have space for */ -- cgit v1.2.3-71-gd317 From b32e4482aadfd1322357f46d4ed8a990603664d9 Mon Sep 17 00:00:00 2001 From: Jaegeuk Kim Date: Mon, 11 Apr 2016 15:51:57 -0700 Subject: fscrypto: don't let data integrity writebacks fail with ENOMEM This patch fixes the issue introduced by the ext4 crypto fix in a same manner. For F2FS, however, we flush the pending IOs and wait for a while to acquire free memory. Fixes: c9af28fdd4492 ("ext4 crypto: don't let data integrity writebacks fail with ENOMEM") Cc: Theodore Ts'o Signed-off-by: Jaegeuk Kim --- fs/crypto/crypto.c | 36 ++++++++++++++++++++---------------- fs/f2fs/data.c | 16 +++++++++++++--- include/linux/fscrypto.h | 9 +++++---- 3 files changed, 38 insertions(+), 23 deletions(-) (limited to 'include/linux') diff --git a/fs/crypto/crypto.c b/fs/crypto/crypto.c index 58ae0ba91ca2..da70520f3ab4 100644 --- a/fs/crypto/crypto.c +++ b/fs/crypto/crypto.c @@ -81,13 +81,14 @@ EXPORT_SYMBOL(fscrypt_release_ctx); /** * fscrypt_get_ctx() - Gets an encryption context * @inode: The inode for which we are doing the crypto + * @gfp_flags: The gfp flag for memory allocation * * Allocates and initializes an encryption context. * * Return: An allocated and initialized encryption context on success; error * value or NULL otherwise. */ -struct fscrypt_ctx *fscrypt_get_ctx(struct inode *inode) +struct fscrypt_ctx *fscrypt_get_ctx(struct inode *inode, gfp_t gfp_flags) { struct fscrypt_ctx *ctx = NULL; struct fscrypt_info *ci = inode->i_crypt_info; @@ -113,7 +114,7 @@ struct fscrypt_ctx *fscrypt_get_ctx(struct inode *inode) list_del(&ctx->free_list); spin_unlock_irqrestore(&fscrypt_ctx_lock, flags); if (!ctx) { - ctx = kmem_cache_zalloc(fscrypt_ctx_cachep, GFP_NOFS); + ctx = kmem_cache_zalloc(fscrypt_ctx_cachep, gfp_flags); if (!ctx) return ERR_PTR(-ENOMEM); ctx->flags |= FS_CTX_REQUIRES_FREE_ENCRYPT_FL; @@ -147,7 +148,8 @@ typedef enum { static int do_page_crypto(struct inode *inode, fscrypt_direction_t rw, pgoff_t index, - struct page *src_page, struct page *dest_page) + struct page *src_page, struct page *dest_page, + gfp_t gfp_flags) { u8 xts_tweak[FS_XTS_TWEAK_SIZE]; struct skcipher_request *req = NULL; @@ -157,7 +159,7 @@ static int do_page_crypto(struct inode *inode, struct crypto_skcipher *tfm = ci->ci_ctfm; int res = 0; - req = skcipher_request_alloc(tfm, GFP_NOFS); + req = skcipher_request_alloc(tfm, gfp_flags); if (!req) { printk_ratelimited(KERN_ERR "%s: crypto_request_alloc() failed\n", @@ -199,10 +201,9 @@ static int do_page_crypto(struct inode *inode, return 0; } -static struct page *alloc_bounce_page(struct fscrypt_ctx *ctx) +static struct page *alloc_bounce_page(struct fscrypt_ctx *ctx, gfp_t gfp_flags) { - ctx->w.bounce_page = mempool_alloc(fscrypt_bounce_page_pool, - GFP_NOWAIT); + ctx->w.bounce_page = mempool_alloc(fscrypt_bounce_page_pool, gfp_flags); if (ctx->w.bounce_page == NULL) return ERR_PTR(-ENOMEM); ctx->flags |= FS_WRITE_PATH_FL; @@ -213,6 +214,7 @@ static struct page *alloc_bounce_page(struct fscrypt_ctx *ctx) * fscypt_encrypt_page() - Encrypts a page * @inode: The inode for which the encryption should take place * @plaintext_page: The page to encrypt. Must be locked. + * @gfp_flags: The gfp flag for memory allocation * * Allocates a ciphertext page and encrypts plaintext_page into it using the ctx * encryption context. @@ -225,7 +227,7 @@ static struct page *alloc_bounce_page(struct fscrypt_ctx *ctx) * error value or NULL. */ struct page *fscrypt_encrypt_page(struct inode *inode, - struct page *plaintext_page) + struct page *plaintext_page, gfp_t gfp_flags) { struct fscrypt_ctx *ctx; struct page *ciphertext_page = NULL; @@ -233,18 +235,19 @@ struct page *fscrypt_encrypt_page(struct inode *inode, BUG_ON(!PageLocked(plaintext_page)); - ctx = fscrypt_get_ctx(inode); + ctx = fscrypt_get_ctx(inode, gfp_flags); if (IS_ERR(ctx)) return (struct page *)ctx; /* The encryption operation will require a bounce page. */ - ciphertext_page = alloc_bounce_page(ctx); + ciphertext_page = alloc_bounce_page(ctx, gfp_flags); if (IS_ERR(ciphertext_page)) goto errout; ctx->w.control_page = plaintext_page; err = do_page_crypto(inode, FS_ENCRYPT, plaintext_page->index, - plaintext_page, ciphertext_page); + plaintext_page, ciphertext_page, + gfp_flags); if (err) { ciphertext_page = ERR_PTR(err); goto errout; @@ -275,7 +278,7 @@ int fscrypt_decrypt_page(struct page *page) BUG_ON(!PageLocked(page)); return do_page_crypto(page->mapping->host, - FS_DECRYPT, page->index, page, page); + FS_DECRYPT, page->index, page, page, GFP_NOFS); } EXPORT_SYMBOL(fscrypt_decrypt_page); @@ -289,11 +292,11 @@ int fscrypt_zeroout_range(struct inode *inode, pgoff_t lblk, BUG_ON(inode->i_sb->s_blocksize != PAGE_SIZE); - ctx = fscrypt_get_ctx(inode); + ctx = fscrypt_get_ctx(inode, GFP_NOFS); if (IS_ERR(ctx)) return PTR_ERR(ctx); - ciphertext_page = alloc_bounce_page(ctx); + ciphertext_page = alloc_bounce_page(ctx, GFP_NOWAIT); if (IS_ERR(ciphertext_page)) { err = PTR_ERR(ciphertext_page); goto errout; @@ -301,11 +304,12 @@ int fscrypt_zeroout_range(struct inode *inode, pgoff_t lblk, while (len--) { err = do_page_crypto(inode, FS_ENCRYPT, lblk, - ZERO_PAGE(0), ciphertext_page); + ZERO_PAGE(0), ciphertext_page, + GFP_NOFS); if (err) goto errout; - bio = bio_alloc(GFP_KERNEL, 1); + bio = bio_alloc(GFP_NOWAIT, 1); if (!bio) { err = -ENOMEM; goto errout; diff --git a/fs/f2fs/data.c b/fs/f2fs/data.c index 53fec0872e60..5dafb9cef12e 100644 --- a/fs/f2fs/data.c +++ b/fs/f2fs/data.c @@ -992,7 +992,7 @@ submit_and_realloc: if (f2fs_encrypted_inode(inode) && S_ISREG(inode->i_mode)) { - ctx = fscrypt_get_ctx(inode); + ctx = fscrypt_get_ctx(inode, GFP_NOFS); if (IS_ERR(ctx)) goto set_error_page; @@ -1092,14 +1092,24 @@ int do_write_data_page(struct f2fs_io_info *fio) } if (f2fs_encrypted_inode(inode) && S_ISREG(inode->i_mode)) { + gfp_t gfp_flags = GFP_NOFS; /* wait for GCed encrypted page writeback */ f2fs_wait_on_encrypted_page_writeback(F2FS_I_SB(inode), fio->old_blkaddr); - - fio->encrypted_page = fscrypt_encrypt_page(inode, fio->page); +retry_encrypt: + fio->encrypted_page = fscrypt_encrypt_page(inode, fio->page, + gfp_flags); if (IS_ERR(fio->encrypted_page)) { err = PTR_ERR(fio->encrypted_page); + if (err == -ENOMEM) { + /* flush pending ios and wait for a while */ + f2fs_flush_merged_bios(F2FS_I_SB(inode)); + congestion_wait(BLK_RW_ASYNC, HZ/50); + gfp_flags |= __GFP_NOFAIL; + err = 0; + goto retry_encrypt; + } goto out_writepage; } } diff --git a/include/linux/fscrypto.h b/include/linux/fscrypto.h index cd91f75de49b..6027f6bbb061 100644 --- a/include/linux/fscrypto.h +++ b/include/linux/fscrypto.h @@ -263,9 +263,9 @@ static inline void fscrypt_set_d_op(struct dentry *dentry) extern struct kmem_cache *fscrypt_info_cachep; int fscrypt_initialize(void); -extern struct fscrypt_ctx *fscrypt_get_ctx(struct inode *); +extern struct fscrypt_ctx *fscrypt_get_ctx(struct inode *, gfp_t); extern void fscrypt_release_ctx(struct fscrypt_ctx *); -extern struct page *fscrypt_encrypt_page(struct inode *, struct page *); +extern struct page *fscrypt_encrypt_page(struct inode *, struct page *, gfp_t); extern int fscrypt_decrypt_page(struct page *); extern void fscrypt_decrypt_bio_pages(struct fscrypt_ctx *, struct bio *); extern void fscrypt_pullback_bio_page(struct page **, bool); @@ -299,7 +299,8 @@ extern int fscrypt_fname_usr_to_disk(struct inode *, const struct qstr *, #endif /* crypto.c */ -static inline struct fscrypt_ctx *fscrypt_notsupp_get_ctx(struct inode *i) +static inline struct fscrypt_ctx *fscrypt_notsupp_get_ctx(struct inode *i, + gfp_t f) { return ERR_PTR(-EOPNOTSUPP); } @@ -310,7 +311,7 @@ static inline void fscrypt_notsupp_release_ctx(struct fscrypt_ctx *c) } static inline struct page *fscrypt_notsupp_encrypt_page(struct inode *i, - struct page *p) + struct page *p, gfp_t f) { return ERR_PTR(-EOPNOTSUPP); } -- cgit v1.2.3-71-gd317 From 1363074667a6b7d0507527742ccd7bbed5e3ceaa Mon Sep 17 00:00:00 2001 From: Hans de Goede Date: Tue, 12 Apr 2016 12:27:09 +0200 Subject: USB: uas: Add a new NO_REPORT_LUNS quirk Add a new NO_REPORT_LUNS quirk and set it for Seagate drives with an usb-id of: 0bc2:331a, as these will fail to respond to a REPORT_LUNS command. Cc: stable@vger.kernel.org Reported-and-tested-by: David Webb Signed-off-by: Hans de Goede Acked-by: Alan Stern Signed-off-by: Greg Kroah-Hartman --- Documentation/kernel-parameters.txt | 2 ++ drivers/usb/storage/uas.c | 14 +++++++++++++- drivers/usb/storage/unusual_uas.h | 7 +++++++ drivers/usb/storage/usb.c | 5 ++++- include/linux/usb_usual.h | 2 ++ 5 files changed, 28 insertions(+), 2 deletions(-) (limited to 'include/linux') diff --git a/Documentation/kernel-parameters.txt b/Documentation/kernel-parameters.txt index ecc74fa4bfde..0b3de80ec8f6 100644 --- a/Documentation/kernel-parameters.txt +++ b/Documentation/kernel-parameters.txt @@ -4077,6 +4077,8 @@ bytes respectively. Such letter suffixes can also be entirely omitted. sector if the number is odd); i = IGNORE_DEVICE (don't bind to this device); + j = NO_REPORT_LUNS (don't use report luns + command, uas only); l = NOT_LOCKABLE (don't try to lock and unlock ejectable media); m = MAX_SECTORS_64 (don't transfer more diff --git a/drivers/usb/storage/uas.c b/drivers/usb/storage/uas.c index b1ec7499166d..16bc679dc2fc 100644 --- a/drivers/usb/storage/uas.c +++ b/drivers/usb/storage/uas.c @@ -2,7 +2,7 @@ * USB Attached SCSI * Note that this is not the same as the USB Mass Storage driver * - * Copyright Hans de Goede for Red Hat, Inc. 2013 - 2014 + * Copyright Hans de Goede for Red Hat, Inc. 2013 - 2016 * Copyright Matthew Wilcox for Intel Corp, 2010 * Copyright Sarah Sharp for Intel Corp, 2010 * @@ -781,6 +781,17 @@ static int uas_eh_bus_reset_handler(struct scsi_cmnd *cmnd) return SUCCESS; } +static int uas_target_alloc(struct scsi_target *starget) +{ + struct uas_dev_info *devinfo = (struct uas_dev_info *) + dev_to_shost(starget->dev.parent)->hostdata; + + if (devinfo->flags & US_FL_NO_REPORT_LUNS) + starget->no_report_luns = 1; + + return 0; +} + static int uas_slave_alloc(struct scsi_device *sdev) { struct uas_dev_info *devinfo = @@ -831,6 +842,7 @@ static struct scsi_host_template uas_host_template = { .module = THIS_MODULE, .name = "uas", .queuecommand = uas_queuecommand, + .target_alloc = uas_target_alloc, .slave_alloc = uas_slave_alloc, .slave_configure = uas_slave_configure, .eh_abort_handler = uas_eh_abort_handler, diff --git a/drivers/usb/storage/unusual_uas.h b/drivers/usb/storage/unusual_uas.h index ccc113e83d88..53341a77d89f 100644 --- a/drivers/usb/storage/unusual_uas.h +++ b/drivers/usb/storage/unusual_uas.h @@ -64,6 +64,13 @@ UNUSUAL_DEV(0x0bc2, 0x3312, 0x0000, 0x9999, USB_SC_DEVICE, USB_PR_DEVICE, NULL, US_FL_NO_ATA_1X), +/* Reported-by: David Webb */ +UNUSUAL_DEV(0x0bc2, 0x331a, 0x0000, 0x9999, + "Seagate", + "Expansion Desk", + USB_SC_DEVICE, USB_PR_DEVICE, NULL, + US_FL_NO_REPORT_LUNS), + /* Reported-by: Hans de Goede */ UNUSUAL_DEV(0x0bc2, 0x3320, 0x0000, 0x9999, "Seagate", diff --git a/drivers/usb/storage/usb.c b/drivers/usb/storage/usb.c index 43576ed31ccd..9de988a0f856 100644 --- a/drivers/usb/storage/usb.c +++ b/drivers/usb/storage/usb.c @@ -482,7 +482,7 @@ void usb_stor_adjust_quirks(struct usb_device *udev, unsigned long *fflags) US_FL_NO_READ_DISC_INFO | US_FL_NO_READ_CAPACITY_16 | US_FL_INITIAL_READ10 | US_FL_WRITE_CACHE | US_FL_NO_ATA_1X | US_FL_NO_REPORT_OPCODES | - US_FL_MAX_SECTORS_240); + US_FL_MAX_SECTORS_240 | US_FL_NO_REPORT_LUNS); p = quirks; while (*p) { @@ -532,6 +532,9 @@ void usb_stor_adjust_quirks(struct usb_device *udev, unsigned long *fflags) case 'i': f |= US_FL_IGNORE_DEVICE; break; + case 'j': + f |= US_FL_NO_REPORT_LUNS; + break; case 'l': f |= US_FL_NOT_LOCKABLE; break; diff --git a/include/linux/usb_usual.h b/include/linux/usb_usual.h index 7f5f78bd15ad..245f57dbbb61 100644 --- a/include/linux/usb_usual.h +++ b/include/linux/usb_usual.h @@ -79,6 +79,8 @@ /* Cannot handle MI_REPORT_SUPPORTED_OPERATION_CODES */ \ US_FLAG(MAX_SECTORS_240, 0x08000000) \ /* Sets max_sectors to 240 */ \ + US_FLAG(NO_REPORT_LUNS, 0x10000000) \ + /* Cannot handle REPORT_LUNS */ \ #define US_FLAG(name, value) US_FL_##name = value , enum { US_DO_ALL_FLAGS }; -- cgit v1.2.3-71-gd317 From cba2e47abcbd80e3f46f460899290402f98090ec Mon Sep 17 00:00:00 2001 From: Toshi Kani Date: Tue, 12 Apr 2016 18:10:52 -0600 Subject: pmem: fix BUG() error in pmem.h:48 on X86_32 After 'commit fc0c2028135c ("x86, pmem: use memcpy_mcsafe() for memcpy_from_pmem()")', probing a PMEM device hits the BUG() error below on X86_32 kernel. kernel BUG at include/linux/pmem.h:48! memcpy_from_pmem() calls arch_memcpy_from_pmem(), which is unimplemented since CONFIG_ARCH_HAS_PMEM_API is undefined on X86_32. Fix the BUG() error by adding default_memcpy_from_pmem(). Acked-by: Dan Williams Signed-off-by: Toshi Kani Signed-off-by: Ross Zwisler Cc: Dan Williams Cc: Ross Zwisler --- include/linux/pmem.h | 22 ++++++++++++++++------ 1 file changed, 16 insertions(+), 6 deletions(-) (limited to 'include/linux') diff --git a/include/linux/pmem.h b/include/linux/pmem.h index ac6d872ce067..57d146fe44dd 100644 --- a/include/linux/pmem.h +++ b/include/linux/pmem.h @@ -72,6 +72,18 @@ static inline void arch_invalidate_pmem(void __pmem *addr, size_t size) } #endif +static inline bool arch_has_pmem_api(void) +{ + return IS_ENABLED(CONFIG_ARCH_HAS_PMEM_API); +} + +static inline int default_memcpy_from_pmem(void *dst, void __pmem const *src, + size_t size) +{ + memcpy(dst, (void __force *) src, size); + return 0; +} + /* * memcpy_from_pmem - read from persistent memory with error handling * @dst: destination buffer @@ -83,12 +95,10 @@ static inline void arch_invalidate_pmem(void __pmem *addr, size_t size) static inline int memcpy_from_pmem(void *dst, void __pmem const *src, size_t size) { - return arch_memcpy_from_pmem(dst, src, size); -} - -static inline bool arch_has_pmem_api(void) -{ - return IS_ENABLED(CONFIG_ARCH_HAS_PMEM_API); + if (arch_has_pmem_api()) + return arch_memcpy_from_pmem(dst, src, size); + else + return default_memcpy_from_pmem(dst, src, size); } /** -- cgit v1.2.3-71-gd317 From 34dbbcdbf63360661ff7bda6c5f52f99ac515f92 Mon Sep 17 00:00:00 2001 From: Linus Torvalds Date: Thu, 14 Apr 2016 11:22:00 -0700 Subject: Make file credentials available to the seqfile interfaces A lot of seqfile users seem to be using things like %pK that uses the credentials of the current process, but that is actually completely wrong for filesystem interfaces. The unix semantics for permission checking files is to check permissions at _open_ time, not at read or write time, and that is not just a small detail: passing off stdin/stdout/stderr to a suid application and making the actual IO happen in privileged context is a classic exploit technique. So if we want to be able to look at permissions at read time, we need to use the file open credentials, not the current ones. Normal file accesses can just use "f_cred" (or any of the helper functions that do that, like file_ns_capable()), but the seqfile interfaces do not have any such options. It turns out that seq_file _does_ save away the user_ns information of the file, though. Since user_ns is just part of the full credential information, replace that special case with saving off the cred pointer instead, and suddenly seq_file has all the permission information it needs. Signed-off-by: Linus Torvalds --- fs/seq_file.c | 7 ++++--- include/linux/seq_file.h | 13 ++++--------- 2 files changed, 8 insertions(+), 12 deletions(-) (limited to 'include/linux') diff --git a/fs/seq_file.c b/fs/seq_file.c index e85664b7c7d9..19f532e7d35e 100644 --- a/fs/seq_file.c +++ b/fs/seq_file.c @@ -72,9 +72,10 @@ int seq_open(struct file *file, const struct seq_operations *op) mutex_init(&p->lock); p->op = op; -#ifdef CONFIG_USER_NS - p->user_ns = file->f_cred->user_ns; -#endif + + // No refcounting: the lifetime of 'p' is constrained + // to the lifetime of the file. + p->file = file; /* * Wrappers around seq_open(e.g. swaps_open) need to be diff --git a/include/linux/seq_file.h b/include/linux/seq_file.h index dde00defbaa5..f3d45dd42695 100644 --- a/include/linux/seq_file.h +++ b/include/linux/seq_file.h @@ -7,13 +7,10 @@ #include #include #include +#include +#include struct seq_operations; -struct file; -struct path; -struct inode; -struct dentry; -struct user_namespace; struct seq_file { char *buf; @@ -27,9 +24,7 @@ struct seq_file { struct mutex lock; const struct seq_operations *op; int poll_event; -#ifdef CONFIG_USER_NS - struct user_namespace *user_ns; -#endif + const struct file *file; void *private; }; @@ -147,7 +142,7 @@ int seq_release_private(struct inode *, struct file *); static inline struct user_namespace *seq_user_ns(struct seq_file *seq) { #ifdef CONFIG_USER_NS - return seq->user_ns; + return seq->file->f_cred->user_ns; #else extern struct user_namespace init_user_ns; return &init_user_ns; -- cgit v1.2.3-71-gd317