From 5a798493b8f30121363359bba834392f044c169b Mon Sep 17 00:00:00 2001 From: Josef Bacik Date: Wed, 14 Jul 2021 14:47:22 -0400 Subject: fs: add a filemap_fdatawrite_wbc helper Btrfs sometimes needs to flush dirty pages on a bunch of dirty inodes in order to reclaim metadata reservations. Unfortunately most helpers in this area are too smart for us: 1) The normal filemap_fdata* helpers only take range and sync modes, and don't give any indication of how much was written, so we can only flush full inodes, which isn't what we want in most cases. 2) The normal writeback path requires us to have the s_umount sem held, but we can't unconditionally take it in this path because we could deadlock. 3) The normal writeback path also skips inodes with I_SYNC set if we write with WB_SYNC_NONE. This isn't the behavior we want under heavy ENOSPC pressure, we want to actually make sure the pages are under writeback before returning, and if another thread is in the middle of writing the file we may return before they're under writeback and miss our ordered extents and not properly wait for completion. 4) sync_inode() uses the normal writeback path and has the same problem as #3. What we really want is to call do_writepages() with our wbc. This way we can make sure that writeback is actually started on the pages, and we can control how many pages are written as a whole as we write many inodes using the same wbc. Accomplish this with a new helper that does just that so we can use it for our ENOSPC flushing infrastructure. Reviewed-by: Nikolay Borisov Reviewed-by: Christoph Hellwig Signed-off-by: Josef Bacik Reviewed-by: David Sterba Signed-off-by: David Sterba --- include/linux/fs.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include/linux') diff --git a/include/linux/fs.h b/include/linux/fs.h index 640574294216..452cd4843843 100644 --- a/include/linux/fs.h +++ b/include/linux/fs.h @@ -2891,6 +2891,8 @@ extern int filemap_fdatawrite_range(struct address_space *mapping, loff_t start, loff_t end); extern int filemap_check_errors(struct address_space *mapping); extern void __filemap_set_wb_err(struct address_space *mapping, int err); +int filemap_fdatawrite_wbc(struct address_space *mapping, + struct writeback_control *wbc); static inline int filemap_write_and_wait(struct address_space *mapping) { -- cgit v1.2.3-71-gd317 From 5662c967c69dfd162a0667d69bad776939bedf85 Mon Sep 17 00:00:00 2001 From: Josef Bacik Date: Wed, 14 Jul 2021 14:47:25 -0400 Subject: fs: kill sync_inode Now that all users of sync_inode() have been deleted, remove sync_inode(). Reviewed-by: Christoph Hellwig Reviewed-by: Nikolay Borisov Signed-off-by: Josef Bacik Reviewed-by: David Sterba Signed-off-by: David Sterba --- fs/fs-writeback.c | 19 +------------------ include/linux/fs.h | 1 - 2 files changed, 1 insertion(+), 19 deletions(-) (limited to 'include/linux') diff --git a/fs/fs-writeback.c b/fs/fs-writeback.c index 4c3370548982..eb57dade6076 100644 --- a/fs/fs-writeback.c +++ b/fs/fs-writeback.c @@ -2729,23 +2729,6 @@ int write_inode_now(struct inode *inode, int sync) } EXPORT_SYMBOL(write_inode_now); -/** - * sync_inode - write an inode and its pages to disk. - * @inode: the inode to sync - * @wbc: controls the writeback mode - * - * sync_inode() will write an inode and its pages to disk. It will also - * correctly update the inode on its superblock's dirty inode lists and will - * update inode->i_state. - * - * The caller must have a ref on the inode. - */ -int sync_inode(struct inode *inode, struct writeback_control *wbc) -{ - return writeback_single_inode(inode, wbc); -} -EXPORT_SYMBOL(sync_inode); - /** * sync_inode_metadata - write an inode to disk * @inode: the inode to sync @@ -2762,6 +2745,6 @@ int sync_inode_metadata(struct inode *inode, int wait) .nr_to_write = 0, /* metadata-only */ }; - return sync_inode(inode, &wbc); + return writeback_single_inode(inode, &wbc); } EXPORT_SYMBOL(sync_inode_metadata); diff --git a/include/linux/fs.h b/include/linux/fs.h index 452cd4843843..1751addcb36e 100644 --- a/include/linux/fs.h +++ b/include/linux/fs.h @@ -2457,7 +2457,6 @@ static inline void file_accessed(struct file *file) extern int file_modified(struct file *file); -int sync_inode(struct inode *inode, struct writeback_control *wbc); int sync_inode_metadata(struct inode *inode, int wait); struct file_system_type { -- cgit v1.2.3-71-gd317 From e83502ca5f1e1f03fb1393008ec22d17e7dc9882 Mon Sep 17 00:00:00 2001 From: Chaitanya Kulkarni Date: Wed, 21 Jul 2021 21:43:32 +0900 Subject: block: fix argument type of bio_trim() The function bio_trim has offset and size arguments that are declared as int. The callers of this function use sector_t type when passing the offset and size, e.g. drivers/md/raid1.c:narrow_write_error() and drivers/md/raid1.c:narrow_write_error(). Change offset and size arguments to sector_t type for bio_trim(). Also, add WARN_ON_ONCE() to catch their overflow. Reviewed-by: Christoph Hellwig Signed-off-by: Chaitanya Kulkarni Signed-off-by: Naohiro Aota Reviewed-by: David Sterba Signed-off-by: David Sterba --- block/bio.c | 12 +++++++----- include/linux/bio.h | 2 +- include/linux/blk_types.h | 1 + 3 files changed, 9 insertions(+), 6 deletions(-) (limited to 'include/linux') diff --git a/block/bio.c b/block/bio.c index 1fab762e079b..77cadcba93b9 100644 --- a/block/bio.c +++ b/block/bio.c @@ -1463,12 +1463,15 @@ EXPORT_SYMBOL(bio_split); * @bio: bio to trim * @offset: number of sectors to trim from the front of @bio * @size: size we want to trim @bio to, in sectors + * + * This function is typically used for bios that are cloned and submitted + * to the underlying device in parts. */ -void bio_trim(struct bio *bio, int offset, int size) +void bio_trim(struct bio *bio, sector_t offset, sector_t size) { - /* 'bio' is a cloned bio which we need to trim to match - * the given offset and size. - */ + if (WARN_ON_ONCE(offset > BIO_MAX_SECTORS || size > BIO_MAX_SECTORS || + offset + size > bio->bi_iter.bi_size)) + return; size <<= 9; if (offset == 0 && size == bio->bi_iter.bi_size) @@ -1479,7 +1482,6 @@ void bio_trim(struct bio *bio, int offset, int size) if (bio_integrity(bio)) bio_integrity_trim(bio); - } EXPORT_SYMBOL_GPL(bio_trim); diff --git a/include/linux/bio.h b/include/linux/bio.h index 2203b686e1f0..8a451d77b573 100644 --- a/include/linux/bio.h +++ b/include/linux/bio.h @@ -375,7 +375,7 @@ static inline void bip_set_seed(struct bio_integrity_payload *bip, #endif /* CONFIG_BLK_DEV_INTEGRITY */ -extern void bio_trim(struct bio *bio, int offset, int size); +void bio_trim(struct bio *bio, sector_t offset, sector_t size); extern struct bio *bio_split(struct bio *bio, int sectors, gfp_t gfp, struct bio_set *bs); diff --git a/include/linux/blk_types.h b/include/linux/blk_types.h index 290f9061b29a..bca4d33876d4 100644 --- a/include/linux/blk_types.h +++ b/include/linux/blk_types.h @@ -281,6 +281,7 @@ struct bio { }; #define BIO_RESET_BYTES offsetof(struct bio, bi_max_vecs) +#define BIO_MAX_SECTORS (UINT_MAX >> SECTOR_SHIFT) /* * bio flags -- cgit v1.2.3-71-gd317 From c2fd68b6b2b00f0a6280b5971028c10c8f0ba70f Mon Sep 17 00:00:00 2001 From: Christian Brauner Date: Tue, 27 Jul 2021 12:48:40 +0200 Subject: namei: add mapping aware lookup helper Various filesystems rely on the lookup_one_len() helper to lookup a single path component relative to a well-known starting point. Allow such filesystems to support idmapped mounts by adding a version of this helper to take the idmap into account when calling inode_permission(). This change is a required to let btrfs (and other filesystems) support idmapped mounts. Cc: Christoph Hellwig Cc: Al Viro Cc: Matthew Wilcox (Oracle) Cc: linux-fsdevel@vger.kernel.org Reviewed-by: Josef Bacik Reviewed-by: Christoph Hellwig Signed-off-by: Christian Brauner Signed-off-by: David Sterba --- fs/namei.c | 43 +++++++++++++++++++++++++++++++++++++------ include/linux/namei.h | 1 + 2 files changed, 38 insertions(+), 6 deletions(-) (limited to 'include/linux') diff --git a/fs/namei.c b/fs/namei.c index bf6d8a738c59..902df46e7dd3 100644 --- a/fs/namei.c +++ b/fs/namei.c @@ -2575,8 +2575,9 @@ int vfs_path_lookup(struct dentry *dentry, struct vfsmount *mnt, } EXPORT_SYMBOL(vfs_path_lookup); -static int lookup_one_len_common(const char *name, struct dentry *base, - int len, struct qstr *this) +static int lookup_one_common(struct user_namespace *mnt_userns, + const char *name, struct dentry *base, int len, + struct qstr *this) { this->name = name; this->len = len; @@ -2604,7 +2605,7 @@ static int lookup_one_len_common(const char *name, struct dentry *base, return err; } - return inode_permission(&init_user_ns, base->d_inode, MAY_EXEC); + return inode_permission(mnt_userns, base->d_inode, MAY_EXEC); } /** @@ -2628,7 +2629,7 @@ struct dentry *try_lookup_one_len(const char *name, struct dentry *base, int len WARN_ON_ONCE(!inode_is_locked(base->d_inode)); - err = lookup_one_len_common(name, base, len, &this); + err = lookup_one_common(&init_user_ns, name, base, len, &this); if (err) return ERR_PTR(err); @@ -2655,7 +2656,7 @@ struct dentry *lookup_one_len(const char *name, struct dentry *base, int len) WARN_ON_ONCE(!inode_is_locked(base->d_inode)); - err = lookup_one_len_common(name, base, len, &this); + err = lookup_one_common(&init_user_ns, name, base, len, &this); if (err) return ERR_PTR(err); @@ -2664,6 +2665,36 @@ struct dentry *lookup_one_len(const char *name, struct dentry *base, int len) } EXPORT_SYMBOL(lookup_one_len); +/** + * lookup_one - filesystem helper to lookup single pathname component + * @mnt_userns: user namespace of the mount the lookup is performed from + * @name: pathname component to lookup + * @base: base directory to lookup from + * @len: maximum length @len should be interpreted to + * + * Note that this routine is purely a helper for filesystem usage and should + * not be called by generic code. + * + * The caller must hold base->i_mutex. + */ +struct dentry *lookup_one(struct user_namespace *mnt_userns, const char *name, + struct dentry *base, int len) +{ + struct dentry *dentry; + struct qstr this; + int err; + + WARN_ON_ONCE(!inode_is_locked(base->d_inode)); + + err = lookup_one_common(mnt_userns, name, base, len, &this); + if (err) + return ERR_PTR(err); + + dentry = lookup_dcache(&this, base, 0); + return dentry ? dentry : __lookup_slow(&this, base, 0); +} +EXPORT_SYMBOL(lookup_one); + /** * lookup_one_len_unlocked - filesystem helper to lookup single pathname component * @name: pathname component to lookup @@ -2683,7 +2714,7 @@ struct dentry *lookup_one_len_unlocked(const char *name, int err; struct dentry *ret; - err = lookup_one_len_common(name, base, len, &this); + err = lookup_one_common(&init_user_ns, name, base, len, &this); if (err) return ERR_PTR(err); diff --git a/include/linux/namei.h b/include/linux/namei.h index be9a2b349ca7..e89329bb3134 100644 --- a/include/linux/namei.h +++ b/include/linux/namei.h @@ -68,6 +68,7 @@ extern struct dentry *try_lookup_one_len(const char *, struct dentry *, int); extern struct dentry *lookup_one_len(const char *, struct dentry *, int); extern struct dentry *lookup_one_len_unlocked(const char *, struct dentry *, int); extern struct dentry *lookup_positive_unlocked(const char *, struct dentry *, int); +struct dentry *lookup_one(struct user_namespace *, const char *, struct dentry *, int); extern int follow_down_one(struct path *); extern int follow_down(struct path *); -- cgit v1.2.3-71-gd317