From 6ddf5f165f13ab623d04aee2a473d35818255199 Mon Sep 17 00:00:00 2001 From: Milind Changire Date: Mon, 14 Feb 2022 05:01:01 +0000 Subject: ceph: add getvxattr op Problem: Some directory vxattrs (e.g. ceph.dir.pin.random) are governed by information that isn't necessarily shared with the client. Add support for the new GETVXATTR operation, which allows the client to query the MDS directly for vxattrs. When the client is queried for a vxattr that doesn't have a special handler, have it issue a GETVXATTR to the MDS directly. Solution: Adds new getvxattr op to fetch ceph.dir.pin*, ceph.dir.layout* and ceph.file.layout* vxattrs. If the entire layout for a dir or a file is being set, then it is expected that the layout be set in standard JSON format. Individual field value retrieval is not wrapped in JSON. The JSON format also applies while setting the vxattr if the entire layout is being set in one go. As a temporary measure, setting a vxattr can also be done in the old format. The old format will be deprecated in the future. URL: https://tracker.ceph.com/issues/51062 Signed-off-by: Milind Changire Reviewed-by: Jeff Layton Signed-off-by: Ilya Dryomov --- include/linux/ceph/ceph_fs.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux') diff --git a/include/linux/ceph/ceph_fs.h b/include/linux/ceph/ceph_fs.h index 7ad6c3d0db7d..66db21ac5f0c 100644 --- a/include/linux/ceph/ceph_fs.h +++ b/include/linux/ceph/ceph_fs.h @@ -328,6 +328,7 @@ enum { CEPH_MDS_OP_LOOKUPPARENT = 0x00103, CEPH_MDS_OP_LOOKUPINO = 0x00104, CEPH_MDS_OP_LOOKUPNAME = 0x00105, + CEPH_MDS_OP_GETVXATTR = 0x00106, CEPH_MDS_OP_SETXATTR = 0x01105, CEPH_MDS_OP_RMXATTR = 0x01106, -- cgit v1.2.3-71-gd317 From ab58a5a1c0487b67f7409f39d3c8593d416d4e7f Mon Sep 17 00:00:00 2001 From: Xiubo Li Date: Tue, 15 Feb 2022 20:23:14 +0800 Subject: ceph: move to a dedicated slabcache for ceph_cap_snap There could be huge number of capsnaps around at any given time. On x86_64 the structure is 248 bytes, which will be rounded up to 256 bytes by kzalloc. Move this to a dedicated slabcache to save 8 bytes for each. [ jlayton: use kmem_cache_zalloc ] Signed-off-by: Xiubo Li Signed-off-by: Jeff Layton Signed-off-by: Ilya Dryomov --- fs/ceph/snap.c | 5 +++-- fs/ceph/super.c | 7 +++++++ fs/ceph/super.h | 2 +- include/linux/ceph/libceph.h | 1 + 4 files changed, 12 insertions(+), 3 deletions(-) (limited to 'include/linux') diff --git a/fs/ceph/snap.c b/fs/ceph/snap.c index b41e6724c591..bc5ec72d958c 100644 --- a/fs/ceph/snap.c +++ b/fs/ceph/snap.c @@ -482,7 +482,7 @@ static void ceph_queue_cap_snap(struct ceph_inode_info *ci) struct ceph_buffer *old_blob = NULL; int used, dirty; - capsnap = kzalloc(sizeof(*capsnap), GFP_NOFS); + capsnap = kmem_cache_zalloc(ceph_cap_snap_cachep, GFP_NOFS); if (!capsnap) { pr_err("ENOMEM allocating ceph_cap_snap on %p\n", inode); return; @@ -603,7 +603,8 @@ update_snapc: spin_unlock(&ci->i_ceph_lock); ceph_buffer_put(old_blob); - kfree(capsnap); + if (capsnap) + kmem_cache_free(ceph_cap_snap_cachep, capsnap); ceph_put_snap_context(old_snapc); } diff --git a/fs/ceph/super.c b/fs/ceph/super.c index bf79f369aec6..978463fa822c 100644 --- a/fs/ceph/super.c +++ b/fs/ceph/super.c @@ -864,6 +864,7 @@ static void destroy_fs_client(struct ceph_fs_client *fsc) */ struct kmem_cache *ceph_inode_cachep; struct kmem_cache *ceph_cap_cachep; +struct kmem_cache *ceph_cap_snap_cachep; struct kmem_cache *ceph_cap_flush_cachep; struct kmem_cache *ceph_dentry_cachep; struct kmem_cache *ceph_file_cachep; @@ -892,6 +893,9 @@ static int __init init_caches(void) ceph_cap_cachep = KMEM_CACHE(ceph_cap, SLAB_MEM_SPREAD); if (!ceph_cap_cachep) goto bad_cap; + ceph_cap_snap_cachep = KMEM_CACHE(ceph_cap_snap, SLAB_MEM_SPREAD); + if (!ceph_cap_snap_cachep) + goto bad_cap_snap; ceph_cap_flush_cachep = KMEM_CACHE(ceph_cap_flush, SLAB_RECLAIM_ACCOUNT|SLAB_MEM_SPREAD); if (!ceph_cap_flush_cachep) @@ -931,6 +935,8 @@ bad_file: bad_dentry: kmem_cache_destroy(ceph_cap_flush_cachep); bad_cap_flush: + kmem_cache_destroy(ceph_cap_snap_cachep); +bad_cap_snap: kmem_cache_destroy(ceph_cap_cachep); bad_cap: kmem_cache_destroy(ceph_inode_cachep); @@ -947,6 +953,7 @@ static void destroy_caches(void) kmem_cache_destroy(ceph_inode_cachep); kmem_cache_destroy(ceph_cap_cachep); + kmem_cache_destroy(ceph_cap_snap_cachep); kmem_cache_destroy(ceph_cap_flush_cachep); kmem_cache_destroy(ceph_dentry_cachep); kmem_cache_destroy(ceph_file_cachep); diff --git a/fs/ceph/super.h b/fs/ceph/super.h index 4569f802ddbb..a2caa7beca4f 100644 --- a/fs/ceph/super.h +++ b/fs/ceph/super.h @@ -230,7 +230,7 @@ static inline void ceph_put_cap_snap(struct ceph_cap_snap *capsnap) if (refcount_dec_and_test(&capsnap->nref)) { if (capsnap->xattr_blob) ceph_buffer_put(capsnap->xattr_blob); - kfree(capsnap); + kmem_cache_free(ceph_cap_snap_cachep, capsnap); } } diff --git a/include/linux/ceph/libceph.h b/include/linux/ceph/libceph.h index edf62eaa6285..00af2c98da75 100644 --- a/include/linux/ceph/libceph.h +++ b/include/linux/ceph/libceph.h @@ -284,6 +284,7 @@ DEFINE_RB_LOOKUP_FUNC(name, type, keyfld, nodefld) extern struct kmem_cache *ceph_inode_cachep; extern struct kmem_cache *ceph_cap_cachep; +extern struct kmem_cache *ceph_cap_snap_cachep; extern struct kmem_cache *ceph_cap_flush_cachep; extern struct kmem_cache *ceph_dentry_cachep; extern struct kmem_cache *ceph_file_cachep; -- cgit v1.2.3-71-gd317 From 1753629ea0f34900467185b7d8b0db11a64f4728 Mon Sep 17 00:00:00 2001 From: Xiubo Li Date: Wed, 23 Feb 2022 09:04:55 +0800 Subject: ceph: remove incorrect and unused CEPH_INO_DOTDOT macro Ceph have removed this macro and the 0x3 will be use for global dummy snaprealm. Signed-off-by: Xiubo Li Reviewed-by: Jeff Layton Signed-off-by: Ilya Dryomov --- include/linux/ceph/ceph_fs.h | 1 - 1 file changed, 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/ceph/ceph_fs.h b/include/linux/ceph/ceph_fs.h index 66db21ac5f0c..f14f9bc290e6 100644 --- a/include/linux/ceph/ceph_fs.h +++ b/include/linux/ceph/ceph_fs.h @@ -29,7 +29,6 @@ #define CEPH_INO_ROOT 1 #define CEPH_INO_CEPH 2 /* hidden .ceph dir */ -#define CEPH_INO_DOTDOT 3 /* used by ceph fuse for parent (..) */ /* arbitrary limit on max # of monitors (cluster of 3 is typical) */ #define CEPH_MAX_MON 31 -- cgit v1.2.3-71-gd317 From 5ed91587e201c77b35a5555c8c082655bb5834fe Mon Sep 17 00:00:00 2001 From: Xiubo Li Date: Wed, 23 Feb 2022 09:04:56 +0800 Subject: ceph: do not release the global snaprealm until unmounting The global snaprealm would be created and then destroyed immediately every time when updating it. URL: https://tracker.ceph.com/issues/54362 Signed-off-by: Xiubo Li Reviewed-by: Jeff Layton Signed-off-by: Ilya Dryomov --- fs/ceph/mds_client.c | 2 +- fs/ceph/snap.c | 13 +++++++++++-- fs/ceph/super.h | 2 +- include/linux/ceph/ceph_fs.h | 3 ++- 4 files changed, 15 insertions(+), 5 deletions(-) (limited to 'include/linux') diff --git a/fs/ceph/mds_client.c b/fs/ceph/mds_client.c index ef9145477aae..fa38c013126d 100644 --- a/fs/ceph/mds_client.c +++ b/fs/ceph/mds_client.c @@ -4838,7 +4838,7 @@ void ceph_mdsc_close_sessions(struct ceph_mds_client *mdsc) mutex_unlock(&mdsc->mutex); ceph_cleanup_snapid_map(mdsc); - ceph_cleanup_empty_realms(mdsc); + ceph_cleanup_global_and_empty_realms(mdsc); cancel_work_sync(&mdsc->cap_reclaim_work); cancel_delayed_work_sync(&mdsc->delayed_work); /* cancel timer */ diff --git a/fs/ceph/snap.c b/fs/ceph/snap.c index 66a1a92cf579..cc9097c27052 100644 --- a/fs/ceph/snap.c +++ b/fs/ceph/snap.c @@ -121,7 +121,11 @@ static struct ceph_snap_realm *ceph_create_snap_realm( if (!realm) return ERR_PTR(-ENOMEM); - atomic_set(&realm->nref, 1); /* for caller */ + /* Do not release the global dummy snaprealm until unmouting */ + if (ino == CEPH_INO_GLOBAL_SNAPREALM) + atomic_set(&realm->nref, 2); + else + atomic_set(&realm->nref, 1); realm->ino = ino; INIT_LIST_HEAD(&realm->children); INIT_LIST_HEAD(&realm->child_item); @@ -261,9 +265,14 @@ static void __cleanup_empty_realms(struct ceph_mds_client *mdsc) spin_unlock(&mdsc->snap_empty_lock); } -void ceph_cleanup_empty_realms(struct ceph_mds_client *mdsc) +void ceph_cleanup_global_and_empty_realms(struct ceph_mds_client *mdsc) { + struct ceph_snap_realm *global_realm; + down_write(&mdsc->snap_rwsem); + global_realm = __lookup_snap_realm(mdsc, CEPH_INO_GLOBAL_SNAPREALM); + if (global_realm) + ceph_put_snap_realm(mdsc, global_realm); __cleanup_empty_realms(mdsc); up_write(&mdsc->snap_rwsem); } diff --git a/fs/ceph/super.h b/fs/ceph/super.h index ef9f32ec905e..0b4b519682f1 100644 --- a/fs/ceph/super.h +++ b/fs/ceph/super.h @@ -940,7 +940,7 @@ extern void ceph_handle_snap(struct ceph_mds_client *mdsc, struct ceph_msg *msg); extern int __ceph_finish_cap_snap(struct ceph_inode_info *ci, struct ceph_cap_snap *capsnap); -extern void ceph_cleanup_empty_realms(struct ceph_mds_client *mdsc); +extern void ceph_cleanup_global_and_empty_realms(struct ceph_mds_client *mdsc); extern struct ceph_snapid_map *ceph_get_snapid_map(struct ceph_mds_client *mdsc, u64 snap); diff --git a/include/linux/ceph/ceph_fs.h b/include/linux/ceph/ceph_fs.h index f14f9bc290e6..86bf82dbd8b8 100644 --- a/include/linux/ceph/ceph_fs.h +++ b/include/linux/ceph/ceph_fs.h @@ -28,7 +28,8 @@ #define CEPH_INO_ROOT 1 -#define CEPH_INO_CEPH 2 /* hidden .ceph dir */ +#define CEPH_INO_CEPH 2 /* hidden .ceph dir */ +#define CEPH_INO_GLOBAL_SNAPREALM 3 /* global dummy snaprealm */ /* arbitrary limit on max # of monitors (cluster of 3 is typical) */ #define CEPH_MAX_MON 31 -- cgit v1.2.3-71-gd317