cachepc-linux

Fork of AMDESE/linux with modifications for CachePC side-channel attack
git clone https://git.sinitax.com/sinitax/cachepc-linux
Log | Files | Refs | README | LICENSE | sfeed.txt

super.h (42884B)


      1/* SPDX-License-Identifier: GPL-2.0 */
      2#ifndef _FS_CEPH_SUPER_H
      3#define _FS_CEPH_SUPER_H
      4
      5#include <linux/ceph/ceph_debug.h>
      6
      7#include <asm/unaligned.h>
      8#include <linux/backing-dev.h>
      9#include <linux/completion.h>
     10#include <linux/exportfs.h>
     11#include <linux/fs.h>
     12#include <linux/mempool.h>
     13#include <linux/pagemap.h>
     14#include <linux/wait.h>
     15#include <linux/writeback.h>
     16#include <linux/slab.h>
     17#include <linux/posix_acl.h>
     18#include <linux/refcount.h>
     19#include <linux/security.h>
     20#include <linux/netfs.h>
     21#include <linux/fscache.h>
     22
     23#include <linux/ceph/libceph.h>
     24
     25/* large granularity for statfs utilization stats to facilitate
     26 * large volume sizes on 32-bit machines. */
     27#define CEPH_BLOCK_SHIFT   22  /* 4 MB */
     28#define CEPH_BLOCK         (1 << CEPH_BLOCK_SHIFT)
     29#define CEPH_4K_BLOCK_SHIFT 12  /* 4 KB */
     30
     31#define CEPH_MOUNT_OPT_CLEANRECOVER    (1<<1) /* auto reonnect (clean mode) after blocklisted */
     32#define CEPH_MOUNT_OPT_DIRSTAT         (1<<4) /* `cat dirname` for stats */
     33#define CEPH_MOUNT_OPT_RBYTES          (1<<5) /* dir st_bytes = rbytes */
     34#define CEPH_MOUNT_OPT_NOASYNCREADDIR  (1<<7) /* no dcache readdir */
     35#define CEPH_MOUNT_OPT_INO32           (1<<8) /* 32 bit inos */
     36#define CEPH_MOUNT_OPT_DCACHE          (1<<9) /* use dcache for readdir etc */
     37#define CEPH_MOUNT_OPT_FSCACHE         (1<<10) /* use fscache */
     38#define CEPH_MOUNT_OPT_NOPOOLPERM      (1<<11) /* no pool permission check */
     39#define CEPH_MOUNT_OPT_MOUNTWAIT       (1<<12) /* mount waits if no mds is up */
     40#define CEPH_MOUNT_OPT_NOQUOTADF       (1<<13) /* no root dir quota in statfs */
     41#define CEPH_MOUNT_OPT_NOCOPYFROM      (1<<14) /* don't use RADOS 'copy-from' op */
     42#define CEPH_MOUNT_OPT_ASYNC_DIROPS    (1<<15) /* allow async directory ops */
     43#define CEPH_MOUNT_OPT_NOPAGECACHE     (1<<16) /* bypass pagecache altogether */
     44
     45#define CEPH_MOUNT_OPT_DEFAULT			\
     46	(CEPH_MOUNT_OPT_DCACHE |		\
     47	 CEPH_MOUNT_OPT_NOCOPYFROM |		\
     48	 CEPH_MOUNT_OPT_ASYNC_DIROPS)
     49
     50#define ceph_set_mount_opt(fsc, opt) \
     51	(fsc)->mount_options->flags |= CEPH_MOUNT_OPT_##opt
     52#define ceph_clear_mount_opt(fsc, opt) \
     53	(fsc)->mount_options->flags &= ~CEPH_MOUNT_OPT_##opt
     54#define ceph_test_mount_opt(fsc, opt) \
     55	(!!((fsc)->mount_options->flags & CEPH_MOUNT_OPT_##opt))
     56
     57/* max size of osd read request, limited by libceph */
     58#define CEPH_MAX_READ_SIZE              CEPH_MSG_MAX_DATA_LEN
     59/* osd has a configurable limitaion of max write size.
     60 * CEPH_MSG_MAX_DATA_LEN should be small enough. */
     61#define CEPH_MAX_WRITE_SIZE		CEPH_MSG_MAX_DATA_LEN
     62#define CEPH_RASIZE_DEFAULT             (8192*1024)    /* max readahead */
     63#define CEPH_MAX_READDIR_DEFAULT        1024
     64#define CEPH_MAX_READDIR_BYTES_DEFAULT  (512*1024)
     65#define CEPH_SNAPDIRNAME_DEFAULT        ".snap"
     66
     67/*
     68 * Delay telling the MDS we no longer want caps, in case we reopen
     69 * the file.  Delay a minimum amount of time, even if we send a cap
     70 * message for some other reason.  Otherwise, take the oppotunity to
     71 * update the mds to avoid sending another message later.
     72 */
     73#define CEPH_CAPS_WANTED_DELAY_MIN_DEFAULT      5  /* cap release delay */
     74#define CEPH_CAPS_WANTED_DELAY_MAX_DEFAULT     60  /* cap release delay */
     75
     76struct ceph_mount_options {
     77	unsigned int flags;
     78
     79	unsigned int wsize;            /* max write size */
     80	unsigned int rsize;            /* max read size */
     81	unsigned int rasize;           /* max readahead */
     82	unsigned int congestion_kb;    /* max writeback in flight */
     83	unsigned int caps_wanted_delay_min, caps_wanted_delay_max;
     84	int caps_max;
     85	unsigned int max_readdir;       /* max readdir result (entries) */
     86	unsigned int max_readdir_bytes; /* max readdir result (bytes) */
     87
     88	bool new_dev_syntax;
     89
     90	/*
     91	 * everything above this point can be memcmp'd; everything below
     92	 * is handled in compare_mount_options()
     93	 */
     94
     95	char *snapdir_name;   /* default ".snap" */
     96	char *mds_namespace;  /* default NULL */
     97	char *server_path;    /* default NULL (means "/") */
     98	char *fscache_uniq;   /* default NULL */
     99	char *mon_addr;
    100};
    101
    102struct ceph_fs_client {
    103	struct super_block *sb;
    104
    105	struct list_head metric_wakeup;
    106
    107	struct ceph_mount_options *mount_options;
    108	struct ceph_client *client;
    109
    110	int mount_state;
    111
    112	bool blocklisted;
    113
    114	bool have_copy_from2;
    115
    116	u32 filp_gen;
    117	loff_t max_file_size;
    118
    119	struct ceph_mds_client *mdsc;
    120
    121	atomic_long_t writeback_count;
    122	bool write_congested;
    123
    124	struct workqueue_struct *inode_wq;
    125	struct workqueue_struct *cap_wq;
    126
    127#ifdef CONFIG_DEBUG_FS
    128	struct dentry *debugfs_dentry_lru, *debugfs_caps;
    129	struct dentry *debugfs_congestion_kb;
    130	struct dentry *debugfs_bdi;
    131	struct dentry *debugfs_mdsc, *debugfs_mdsmap;
    132	struct dentry *debugfs_status;
    133	struct dentry *debugfs_mds_sessions;
    134	struct dentry *debugfs_metrics_dir;
    135#endif
    136
    137#ifdef CONFIG_CEPH_FSCACHE
    138	struct fscache_volume *fscache;
    139#endif
    140};
    141
    142
    143/*
    144 * File i/o capability.  This tracks shared state with the metadata
    145 * server that allows us to cache or writeback attributes or to read
    146 * and write data.  For any given inode, we should have one or more
    147 * capabilities, one issued by each metadata server, and our
    148 * cumulative access is the OR of all issued capabilities.
    149 *
    150 * Each cap is referenced by the inode's i_caps rbtree and by per-mds
    151 * session capability lists.
    152 */
    153struct ceph_cap {
    154	struct ceph_inode_info *ci;
    155	struct rb_node ci_node;          /* per-ci cap tree */
    156	struct ceph_mds_session *session;
    157	struct list_head session_caps;   /* per-session caplist */
    158	u64 cap_id;       /* unique cap id (mds provided) */
    159	union {
    160		/* in-use caps */
    161		struct {
    162			int issued;       /* latest, from the mds */
    163			int implemented;  /* implemented superset of
    164					     issued (for revocation) */
    165			int mds;	  /* mds index for this cap */
    166			int mds_wanted;   /* caps wanted from this mds */
    167		};
    168		/* caps to release */
    169		struct {
    170			u64 cap_ino;
    171			int queue_release;
    172		};
    173	};
    174	u32 seq, issue_seq, mseq;
    175	u32 cap_gen;      /* active/stale cycle */
    176	unsigned long last_used;
    177	struct list_head caps_item;
    178};
    179
    180#define CHECK_CAPS_AUTHONLY   1  /* only check auth cap */
    181#define CHECK_CAPS_FLUSH      2  /* flush any dirty caps */
    182#define CHECK_CAPS_NOINVAL    4  /* don't invalidate pagecache */
    183
    184struct ceph_cap_flush {
    185	u64 tid;
    186	int caps;
    187	bool wake; /* wake up flush waiters when finish ? */
    188	bool is_capsnap; /* true means capsnap */
    189	struct list_head g_list; // global
    190	struct list_head i_list; // per inode
    191};
    192
    193/*
    194 * Snapped cap state that is pending flush to mds.  When a snapshot occurs,
    195 * we first complete any in-process sync writes and writeback any dirty
    196 * data before flushing the snapped state (tracked here) back to the MDS.
    197 */
    198struct ceph_cap_snap {
    199	refcount_t nref;
    200	struct list_head ci_item;
    201
    202	struct ceph_cap_flush cap_flush;
    203
    204	u64 follows;
    205	int issued, dirty;
    206	struct ceph_snap_context *context;
    207
    208	umode_t mode;
    209	kuid_t uid;
    210	kgid_t gid;
    211
    212	struct ceph_buffer *xattr_blob;
    213	u64 xattr_version;
    214
    215	u64 size;
    216	u64 change_attr;
    217	struct timespec64 mtime, atime, ctime, btime;
    218	u64 time_warp_seq;
    219	u64 truncate_size;
    220	u32 truncate_seq;
    221	int writing;   /* a sync write is still in progress */
    222	int dirty_pages;     /* dirty pages awaiting writeback */
    223	bool inline_data;
    224	bool need_flush;
    225};
    226
    227static inline void ceph_put_cap_snap(struct ceph_cap_snap *capsnap)
    228{
    229	if (refcount_dec_and_test(&capsnap->nref)) {
    230		if (capsnap->xattr_blob)
    231			ceph_buffer_put(capsnap->xattr_blob);
    232		kmem_cache_free(ceph_cap_snap_cachep, capsnap);
    233	}
    234}
    235
    236/*
    237 * The frag tree describes how a directory is fragmented, potentially across
    238 * multiple metadata servers.  It is also used to indicate points where
    239 * metadata authority is delegated, and whether/where metadata is replicated.
    240 *
    241 * A _leaf_ frag will be present in the i_fragtree IFF there is
    242 * delegation info.  That is, if mds >= 0 || ndist > 0.
    243 */
    244#define CEPH_MAX_DIRFRAG_REP 4
    245
    246struct ceph_inode_frag {
    247	struct rb_node node;
    248
    249	/* fragtree state */
    250	u32 frag;
    251	int split_by;         /* i.e. 2^(split_by) children */
    252
    253	/* delegation and replication info */
    254	int mds;              /* -1 if same authority as parent */
    255	int ndist;            /* >0 if replicated */
    256	int dist[CEPH_MAX_DIRFRAG_REP];
    257};
    258
    259/*
    260 * We cache inode xattrs as an encoded blob until they are first used,
    261 * at which point we parse them into an rbtree.
    262 */
    263struct ceph_inode_xattr {
    264	struct rb_node node;
    265
    266	const char *name;
    267	int name_len;
    268	const char *val;
    269	int val_len;
    270	int dirty;
    271
    272	int should_free_name;
    273	int should_free_val;
    274};
    275
    276/*
    277 * Ceph dentry state
    278 */
    279struct ceph_dentry_info {
    280	struct dentry *dentry;
    281	struct ceph_mds_session *lease_session;
    282	struct list_head lease_list;
    283	unsigned flags;
    284	int lease_shared_gen;
    285	u32 lease_gen;
    286	u32 lease_seq;
    287	unsigned long lease_renew_after, lease_renew_from;
    288	unsigned long time;
    289	u64 offset;
    290};
    291
    292#define CEPH_DENTRY_REFERENCED		1
    293#define CEPH_DENTRY_LEASE_LIST		2
    294#define CEPH_DENTRY_SHRINK_LIST		4
    295#define CEPH_DENTRY_PRIMARY_LINK	8
    296
    297struct ceph_inode_xattrs_info {
    298	/*
    299	 * (still encoded) xattr blob. we avoid the overhead of parsing
    300	 * this until someone actually calls getxattr, etc.
    301	 *
    302	 * blob->vec.iov_len == 4 implies there are no xattrs; blob ==
    303	 * NULL means we don't know.
    304	*/
    305	struct ceph_buffer *blob, *prealloc_blob;
    306
    307	struct rb_root index;
    308	bool dirty;
    309	int count;
    310	int names_size;
    311	int vals_size;
    312	u64 version, index_version;
    313};
    314
    315/*
    316 * Ceph inode.
    317 */
    318struct ceph_inode_info {
    319	struct netfs_inode netfs; /* Netfslib context and vfs inode */
    320	struct ceph_vino i_vino;   /* ceph ino + snap */
    321
    322	spinlock_t i_ceph_lock;
    323
    324	u64 i_version;
    325	u64 i_inline_version;
    326	u32 i_time_warp_seq;
    327
    328	unsigned long i_ceph_flags;
    329	atomic64_t i_release_count;
    330	atomic64_t i_ordered_count;
    331	atomic64_t i_complete_seq[2];
    332
    333	struct ceph_dir_layout i_dir_layout;
    334	struct ceph_file_layout i_layout;
    335	struct ceph_file_layout i_cached_layout;	// for async creates
    336	char *i_symlink;
    337
    338	/* for dirs */
    339	struct timespec64 i_rctime;
    340	u64 i_rbytes, i_rfiles, i_rsubdirs, i_rsnaps;
    341	u64 i_files, i_subdirs;
    342
    343	/* quotas */
    344	u64 i_max_bytes, i_max_files;
    345
    346	s32 i_dir_pin;
    347
    348	struct rb_root i_fragtree;
    349	int i_fragtree_nsplits;
    350	struct mutex i_fragtree_mutex;
    351
    352	struct ceph_inode_xattrs_info i_xattrs;
    353
    354	/* capabilities.  protected _both_ by i_ceph_lock and cap->session's
    355	 * s_mutex. */
    356	struct rb_root i_caps;           /* cap list */
    357	struct ceph_cap *i_auth_cap;     /* authoritative cap, if any */
    358	unsigned i_dirty_caps, i_flushing_caps;     /* mask of dirtied fields */
    359
    360	/*
    361	 * Link to the auth cap's session's s_cap_dirty list. s_cap_dirty
    362	 * is protected by the mdsc->cap_dirty_lock, but each individual item
    363	 * is also protected by the inode's i_ceph_lock. Walking s_cap_dirty
    364	 * requires the mdsc->cap_dirty_lock. List presence for an item can
    365	 * be tested under the i_ceph_lock. Changing anything requires both.
    366	 */
    367	struct list_head i_dirty_item;
    368
    369	/*
    370	 * Link to session's s_cap_flushing list. Protected in a similar
    371	 * fashion to i_dirty_item, but also by the s_mutex for changes. The
    372	 * s_cap_flushing list can be walked while holding either the s_mutex
    373	 * or msdc->cap_dirty_lock. List presence can also be checked while
    374	 * holding the i_ceph_lock for this inode.
    375	 */
    376	struct list_head i_flushing_item;
    377
    378	/* we need to track cap writeback on a per-cap-bit basis, to allow
    379	 * overlapping, pipelined cap flushes to the mds.  we can probably
    380	 * reduce the tid to 8 bits if we're concerned about inode size. */
    381	struct ceph_cap_flush *i_prealloc_cap_flush;
    382	struct list_head i_cap_flush_list;
    383	wait_queue_head_t i_cap_wq;      /* threads waiting on a capability */
    384	unsigned long i_hold_caps_max; /* jiffies */
    385	struct list_head i_cap_delay_list;  /* for delayed cap release to mds */
    386	struct ceph_cap_reservation i_cap_migration_resv;
    387	struct list_head i_cap_snaps;   /* snapped state pending flush to mds */
    388	struct ceph_snap_context *i_head_snapc;  /* set if wr_buffer_head > 0 or
    389						    dirty|flushing caps */
    390	unsigned i_snap_caps;           /* cap bits for snapped files */
    391
    392	unsigned long i_last_rd;
    393	unsigned long i_last_wr;
    394	int i_nr_by_mode[CEPH_FILE_MODE_BITS];  /* open file counts */
    395
    396	struct mutex i_truncate_mutex;
    397	u32 i_truncate_seq;        /* last truncate to smaller size */
    398	u64 i_truncate_size;       /*  and the size we last truncated down to */
    399	int i_truncate_pending;    /*  still need to call vmtruncate */
    400
    401	u64 i_max_size;            /* max file size authorized by mds */
    402	u64 i_reported_size; /* (max_)size reported to or requested of mds */
    403	u64 i_wanted_max_size;     /* offset we'd like to write too */
    404	u64 i_requested_max_size;  /* max_size we've requested */
    405
    406	/* held references to caps */
    407	int i_pin_ref;
    408	int i_rd_ref, i_rdcache_ref, i_wr_ref, i_wb_ref, i_fx_ref;
    409	int i_wrbuffer_ref, i_wrbuffer_ref_head;
    410	atomic_t i_filelock_ref;
    411	atomic_t i_shared_gen;       /* increment each time we get FILE_SHARED */
    412	u32 i_rdcache_gen;      /* incremented each time we get FILE_CACHE. */
    413	u32 i_rdcache_revoking; /* RDCACHE gen to async invalidate, if any */
    414
    415	struct list_head i_unsafe_dirops; /* uncommitted mds dir ops */
    416	struct list_head i_unsafe_iops;   /* uncommitted mds inode ops */
    417	spinlock_t i_unsafe_lock;
    418
    419	union {
    420		struct ceph_snap_realm *i_snap_realm; /* snap realm (if caps) */
    421		struct ceph_snapid_map *i_snapid_map; /* snapid -> dev_t */
    422	};
    423	struct list_head i_snap_realm_item;
    424	struct list_head i_snap_flush_item;
    425	struct timespec64 i_btime;
    426	struct timespec64 i_snap_btime;
    427
    428	struct work_struct i_work;
    429	unsigned long  i_work_mask;
    430};
    431
    432static inline struct ceph_inode_info *
    433ceph_inode(const struct inode *inode)
    434{
    435	return container_of(inode, struct ceph_inode_info, netfs.inode);
    436}
    437
    438static inline struct ceph_fs_client *
    439ceph_inode_to_client(const struct inode *inode)
    440{
    441	return (struct ceph_fs_client *)inode->i_sb->s_fs_info;
    442}
    443
    444static inline struct ceph_fs_client *
    445ceph_sb_to_client(const struct super_block *sb)
    446{
    447	return (struct ceph_fs_client *)sb->s_fs_info;
    448}
    449
    450static inline struct ceph_mds_client *
    451ceph_sb_to_mdsc(const struct super_block *sb)
    452{
    453	return (struct ceph_mds_client *)ceph_sb_to_client(sb)->mdsc;
    454}
    455
    456static inline struct ceph_vino
    457ceph_vino(const struct inode *inode)
    458{
    459	return ceph_inode(inode)->i_vino;
    460}
    461
    462static inline u32 ceph_ino_to_ino32(u64 vino)
    463{
    464	u32 ino = vino & 0xffffffff;
    465	ino ^= vino >> 32;
    466	if (!ino)
    467		ino = 2;
    468	return ino;
    469}
    470
    471/*
    472 * Inode numbers in cephfs are 64 bits, but inode->i_ino is 32-bits on
    473 * some arches. We generally do not use this value inside the ceph driver, but
    474 * we do want to set it to something, so that generic vfs code has an
    475 * appropriate value for tracepoints and the like.
    476 */
    477static inline ino_t ceph_vino_to_ino_t(struct ceph_vino vino)
    478{
    479	if (sizeof(ino_t) == sizeof(u32))
    480		return ceph_ino_to_ino32(vino.ino);
    481	return (ino_t)vino.ino;
    482}
    483
    484/* for printf-style formatting */
    485#define ceph_vinop(i) ceph_inode(i)->i_vino.ino, ceph_inode(i)->i_vino.snap
    486
    487static inline u64 ceph_ino(struct inode *inode)
    488{
    489	return ceph_inode(inode)->i_vino.ino;
    490}
    491
    492static inline u64 ceph_snap(struct inode *inode)
    493{
    494	return ceph_inode(inode)->i_vino.snap;
    495}
    496
    497/**
    498 * ceph_present_ino - format an inode number for presentation to userland
    499 * @sb: superblock where the inode lives
    500 * @ino: inode number to (possibly) convert
    501 *
    502 * If the user mounted with the ino32 option, then the 64-bit value needs
    503 * to be converted to something that can fit inside 32 bits. Note that
    504 * internal kernel code never uses this value, so this is entirely for
    505 * userland consumption.
    506 */
    507static inline u64 ceph_present_ino(struct super_block *sb, u64 ino)
    508{
    509	if (unlikely(ceph_test_mount_opt(ceph_sb_to_client(sb), INO32)))
    510		return ceph_ino_to_ino32(ino);
    511	return ino;
    512}
    513
    514static inline u64 ceph_present_inode(struct inode *inode)
    515{
    516	return ceph_present_ino(inode->i_sb, ceph_ino(inode));
    517}
    518
    519static inline int ceph_ino_compare(struct inode *inode, void *data)
    520{
    521	struct ceph_vino *pvino = (struct ceph_vino *)data;
    522	struct ceph_inode_info *ci = ceph_inode(inode);
    523	return ci->i_vino.ino == pvino->ino &&
    524		ci->i_vino.snap == pvino->snap;
    525}
    526
    527/*
    528 * The MDS reserves a set of inodes for its own usage. These should never
    529 * be accessible by clients, and so the MDS has no reason to ever hand these
    530 * out. The range is CEPH_MDS_INO_MDSDIR_OFFSET..CEPH_INO_SYSTEM_BASE.
    531 *
    532 * These come from src/mds/mdstypes.h in the ceph sources.
    533 */
    534#define CEPH_MAX_MDS			0x100
    535#define CEPH_NUM_STRAY			10
    536#define CEPH_MDS_INO_MDSDIR_OFFSET	(1 * CEPH_MAX_MDS)
    537#define CEPH_MDS_INO_LOG_OFFSET		(2 * CEPH_MAX_MDS)
    538#define CEPH_INO_SYSTEM_BASE		((6*CEPH_MAX_MDS) + (CEPH_MAX_MDS * CEPH_NUM_STRAY))
    539
    540static inline bool ceph_vino_is_reserved(const struct ceph_vino vino)
    541{
    542	if (vino.ino >= CEPH_INO_SYSTEM_BASE ||
    543	    vino.ino < CEPH_MDS_INO_MDSDIR_OFFSET)
    544		return false;
    545
    546	/* Don't warn on mdsdirs */
    547	WARN_RATELIMIT(vino.ino >= CEPH_MDS_INO_LOG_OFFSET,
    548			"Attempt to access reserved inode number 0x%llx",
    549			vino.ino);
    550	return true;
    551}
    552
    553static inline struct inode *ceph_find_inode(struct super_block *sb,
    554					    struct ceph_vino vino)
    555{
    556	if (ceph_vino_is_reserved(vino))
    557		return NULL;
    558
    559	/*
    560	 * NB: The hashval will be run through the fs/inode.c hash function
    561	 * anyway, so there is no need to squash the inode number down to
    562	 * 32-bits first. Just use low-order bits on arches with 32-bit long.
    563	 */
    564	return ilookup5(sb, (unsigned long)vino.ino, ceph_ino_compare, &vino);
    565}
    566
    567
    568/*
    569 * Ceph inode.
    570 */
    571#define CEPH_I_DIR_ORDERED	(1 << 0)  /* dentries in dir are ordered */
    572#define CEPH_I_FLUSH		(1 << 2)  /* do not delay flush of dirty metadata */
    573#define CEPH_I_POOL_PERM	(1 << 3)  /* pool rd/wr bits are valid */
    574#define CEPH_I_POOL_RD		(1 << 4)  /* can read from pool */
    575#define CEPH_I_POOL_WR		(1 << 5)  /* can write to pool */
    576#define CEPH_I_SEC_INITED	(1 << 6)  /* security initialized */
    577#define CEPH_I_KICK_FLUSH	(1 << 7)  /* kick flushing caps */
    578#define CEPH_I_FLUSH_SNAPS	(1 << 8)  /* need flush snapss */
    579#define CEPH_I_ERROR_WRITE	(1 << 9) /* have seen write errors */
    580#define CEPH_I_ERROR_FILELOCK	(1 << 10) /* have seen file lock errors */
    581#define CEPH_I_ODIRECT		(1 << 11) /* inode in direct I/O mode */
    582#define CEPH_ASYNC_CREATE_BIT	(12)	  /* async create in flight for this */
    583#define CEPH_I_ASYNC_CREATE	(1 << CEPH_ASYNC_CREATE_BIT)
    584#define CEPH_I_SHUTDOWN		(1 << 13) /* inode is no longer usable */
    585
    586/*
    587 * Masks of ceph inode work.
    588 */
    589#define CEPH_I_WORK_WRITEBACK		0
    590#define CEPH_I_WORK_INVALIDATE_PAGES	1
    591#define CEPH_I_WORK_VMTRUNCATE		2
    592#define CEPH_I_WORK_CHECK_CAPS		3
    593#define CEPH_I_WORK_FLUSH_SNAPS		4
    594
    595/*
    596 * We set the ERROR_WRITE bit when we start seeing write errors on an inode
    597 * and then clear it when they start succeeding. Note that we do a lockless
    598 * check first, and only take the lock if it looks like it needs to be changed.
    599 * The write submission code just takes this as a hint, so we're not too
    600 * worried if a few slip through in either direction.
    601 */
    602static inline void ceph_set_error_write(struct ceph_inode_info *ci)
    603{
    604	if (!(READ_ONCE(ci->i_ceph_flags) & CEPH_I_ERROR_WRITE)) {
    605		spin_lock(&ci->i_ceph_lock);
    606		ci->i_ceph_flags |= CEPH_I_ERROR_WRITE;
    607		spin_unlock(&ci->i_ceph_lock);
    608	}
    609}
    610
    611static inline void ceph_clear_error_write(struct ceph_inode_info *ci)
    612{
    613	if (READ_ONCE(ci->i_ceph_flags) & CEPH_I_ERROR_WRITE) {
    614		spin_lock(&ci->i_ceph_lock);
    615		ci->i_ceph_flags &= ~CEPH_I_ERROR_WRITE;
    616		spin_unlock(&ci->i_ceph_lock);
    617	}
    618}
    619
    620static inline void __ceph_dir_set_complete(struct ceph_inode_info *ci,
    621					   long long release_count,
    622					   long long ordered_count)
    623{
    624	/*
    625	 * Makes sure operations that setup readdir cache (update page
    626	 * cache and i_size) are strongly ordered w.r.t. the following
    627	 * atomic64_set() operations.
    628	 */
    629	smp_mb();
    630	atomic64_set(&ci->i_complete_seq[0], release_count);
    631	atomic64_set(&ci->i_complete_seq[1], ordered_count);
    632}
    633
    634static inline void __ceph_dir_clear_complete(struct ceph_inode_info *ci)
    635{
    636	atomic64_inc(&ci->i_release_count);
    637}
    638
    639static inline void __ceph_dir_clear_ordered(struct ceph_inode_info *ci)
    640{
    641	atomic64_inc(&ci->i_ordered_count);
    642}
    643
    644static inline bool __ceph_dir_is_complete(struct ceph_inode_info *ci)
    645{
    646	return atomic64_read(&ci->i_complete_seq[0]) ==
    647		atomic64_read(&ci->i_release_count);
    648}
    649
    650static inline bool __ceph_dir_is_complete_ordered(struct ceph_inode_info *ci)
    651{
    652	return  atomic64_read(&ci->i_complete_seq[0]) ==
    653		atomic64_read(&ci->i_release_count) &&
    654		atomic64_read(&ci->i_complete_seq[1]) ==
    655		atomic64_read(&ci->i_ordered_count);
    656}
    657
    658static inline void ceph_dir_clear_complete(struct inode *inode)
    659{
    660	__ceph_dir_clear_complete(ceph_inode(inode));
    661}
    662
    663static inline void ceph_dir_clear_ordered(struct inode *inode)
    664{
    665	__ceph_dir_clear_ordered(ceph_inode(inode));
    666}
    667
    668static inline bool ceph_dir_is_complete_ordered(struct inode *inode)
    669{
    670	bool ret = __ceph_dir_is_complete_ordered(ceph_inode(inode));
    671	smp_rmb();
    672	return ret;
    673}
    674
    675/* find a specific frag @f */
    676extern struct ceph_inode_frag *__ceph_find_frag(struct ceph_inode_info *ci,
    677						u32 f);
    678
    679/*
    680 * choose fragment for value @v.  copy frag content to pfrag, if leaf
    681 * exists
    682 */
    683extern u32 ceph_choose_frag(struct ceph_inode_info *ci, u32 v,
    684			    struct ceph_inode_frag *pfrag,
    685			    int *found);
    686
    687static inline struct ceph_dentry_info *ceph_dentry(const struct dentry *dentry)
    688{
    689	return (struct ceph_dentry_info *)dentry->d_fsdata;
    690}
    691
    692/*
    693 * caps helpers
    694 */
    695static inline bool __ceph_is_any_real_caps(struct ceph_inode_info *ci)
    696{
    697	return !RB_EMPTY_ROOT(&ci->i_caps);
    698}
    699
    700extern int __ceph_caps_issued(struct ceph_inode_info *ci, int *implemented);
    701extern int __ceph_caps_issued_mask(struct ceph_inode_info *ci, int mask, int t);
    702extern int __ceph_caps_issued_mask_metric(struct ceph_inode_info *ci, int mask,
    703					  int t);
    704extern int __ceph_caps_issued_other(struct ceph_inode_info *ci,
    705				    struct ceph_cap *cap);
    706
    707static inline int ceph_caps_issued(struct ceph_inode_info *ci)
    708{
    709	int issued;
    710	spin_lock(&ci->i_ceph_lock);
    711	issued = __ceph_caps_issued(ci, NULL);
    712	spin_unlock(&ci->i_ceph_lock);
    713	return issued;
    714}
    715
    716static inline int ceph_caps_issued_mask_metric(struct ceph_inode_info *ci,
    717					       int mask, int touch)
    718{
    719	int r;
    720	spin_lock(&ci->i_ceph_lock);
    721	r = __ceph_caps_issued_mask_metric(ci, mask, touch);
    722	spin_unlock(&ci->i_ceph_lock);
    723	return r;
    724}
    725
    726static inline int __ceph_caps_dirty(struct ceph_inode_info *ci)
    727{
    728	return ci->i_dirty_caps | ci->i_flushing_caps;
    729}
    730extern struct ceph_cap_flush *ceph_alloc_cap_flush(void);
    731extern void ceph_free_cap_flush(struct ceph_cap_flush *cf);
    732extern int __ceph_mark_dirty_caps(struct ceph_inode_info *ci, int mask,
    733				  struct ceph_cap_flush **pcf);
    734
    735extern int __ceph_caps_revoking_other(struct ceph_inode_info *ci,
    736				      struct ceph_cap *ocap, int mask);
    737extern int ceph_caps_revoking(struct ceph_inode_info *ci, int mask);
    738extern int __ceph_caps_used(struct ceph_inode_info *ci);
    739
    740static inline bool __ceph_is_file_opened(struct ceph_inode_info *ci)
    741{
    742	return ci->i_nr_by_mode[0];
    743}
    744extern int __ceph_caps_file_wanted(struct ceph_inode_info *ci);
    745extern int __ceph_caps_wanted(struct ceph_inode_info *ci);
    746
    747/* what the mds thinks we want */
    748extern int __ceph_caps_mds_wanted(struct ceph_inode_info *ci, bool check);
    749
    750extern void ceph_caps_init(struct ceph_mds_client *mdsc);
    751extern void ceph_caps_finalize(struct ceph_mds_client *mdsc);
    752extern void ceph_adjust_caps_max_min(struct ceph_mds_client *mdsc,
    753				     struct ceph_mount_options *fsopt);
    754extern int ceph_reserve_caps(struct ceph_mds_client *mdsc,
    755			     struct ceph_cap_reservation *ctx, int need);
    756extern void ceph_unreserve_caps(struct ceph_mds_client *mdsc,
    757			       struct ceph_cap_reservation *ctx);
    758extern void ceph_reservation_status(struct ceph_fs_client *client,
    759				    int *total, int *avail, int *used,
    760				    int *reserved, int *min);
    761
    762
    763
    764/*
    765 * we keep buffered readdir results attached to file->private_data
    766 */
    767#define CEPH_F_SYNC     1
    768#define CEPH_F_ATEND    2
    769
    770struct ceph_file_info {
    771	short fmode;     /* initialized on open */
    772	short flags;     /* CEPH_F_* */
    773
    774	spinlock_t rw_contexts_lock;
    775	struct list_head rw_contexts;
    776
    777	u32 filp_gen;
    778	atomic_t num_locks;
    779};
    780
    781struct ceph_dir_file_info {
    782	struct ceph_file_info file_info;
    783
    784	/* readdir: position within the dir */
    785	u32 frag;
    786	struct ceph_mds_request *last_readdir;
    787
    788	/* readdir: position within a frag */
    789	unsigned next_offset;  /* offset of next chunk (last_name's + 1) */
    790	char *last_name;       /* last entry in previous chunk */
    791	long long dir_release_count;
    792	long long dir_ordered_count;
    793	int readdir_cache_idx;
    794
    795	/* used for -o dirstat read() on directory thing */
    796	char *dir_info;
    797	int dir_info_len;
    798};
    799
    800struct ceph_rw_context {
    801	struct list_head list;
    802	struct task_struct *thread;
    803	int caps;
    804};
    805
    806#define CEPH_DEFINE_RW_CONTEXT(_name, _caps)	\
    807	struct ceph_rw_context _name = {	\
    808		.thread = current,		\
    809		.caps = _caps,			\
    810	}
    811
    812static inline void ceph_add_rw_context(struct ceph_file_info *cf,
    813				       struct ceph_rw_context *ctx)
    814{
    815	spin_lock(&cf->rw_contexts_lock);
    816	list_add(&ctx->list, &cf->rw_contexts);
    817	spin_unlock(&cf->rw_contexts_lock);
    818}
    819
    820static inline void ceph_del_rw_context(struct ceph_file_info *cf,
    821				       struct ceph_rw_context *ctx)
    822{
    823	spin_lock(&cf->rw_contexts_lock);
    824	list_del(&ctx->list);
    825	spin_unlock(&cf->rw_contexts_lock);
    826}
    827
    828static inline struct ceph_rw_context*
    829ceph_find_rw_context(struct ceph_file_info *cf)
    830{
    831	struct ceph_rw_context *ctx, *found = NULL;
    832	spin_lock(&cf->rw_contexts_lock);
    833	list_for_each_entry(ctx, &cf->rw_contexts, list) {
    834		if (ctx->thread == current) {
    835			found = ctx;
    836			break;
    837		}
    838	}
    839	spin_unlock(&cf->rw_contexts_lock);
    840	return found;
    841}
    842
    843struct ceph_readdir_cache_control {
    844	struct page  *page;
    845	struct dentry **dentries;
    846	int index;
    847};
    848
    849/*
    850 * A "snap realm" describes a subset of the file hierarchy sharing
    851 * the same set of snapshots that apply to it.  The realms themselves
    852 * are organized into a hierarchy, such that children inherit (some of)
    853 * the snapshots of their parents.
    854 *
    855 * All inodes within the realm that have capabilities are linked into a
    856 * per-realm list.
    857 */
    858struct ceph_snap_realm {
    859	u64 ino;
    860	struct inode *inode;
    861	atomic_t nref;
    862	struct rb_node node;
    863
    864	u64 created, seq;
    865	u64 parent_ino;
    866	u64 parent_since;   /* snapid when our current parent became so */
    867
    868	u64 *prior_parent_snaps;      /* snaps inherited from any parents we */
    869	u32 num_prior_parent_snaps;   /*  had prior to parent_since */
    870	u64 *snaps;                   /* snaps specific to this realm */
    871	u32 num_snaps;
    872
    873	struct ceph_snap_realm *parent;
    874	struct list_head children;       /* list of child realms */
    875	struct list_head child_item;
    876
    877	struct list_head empty_item;     /* if i have ref==0 */
    878
    879	struct list_head dirty_item;     /* if realm needs new context */
    880
    881	struct list_head rebuild_item;   /* rebuild snap realms _downward_ in hierarchy */
    882
    883	/* the current set of snaps for this realm */
    884	struct ceph_snap_context *cached_context;
    885
    886	struct list_head inodes_with_caps;
    887	spinlock_t inodes_with_caps_lock;
    888};
    889
    890static inline int default_congestion_kb(void)
    891{
    892	int congestion_kb;
    893
    894	/*
    895	 * Copied from NFS
    896	 *
    897	 * congestion size, scale with available memory.
    898	 *
    899	 *  64MB:    8192k
    900	 * 128MB:   11585k
    901	 * 256MB:   16384k
    902	 * 512MB:   23170k
    903	 *   1GB:   32768k
    904	 *   2GB:   46340k
    905	 *   4GB:   65536k
    906	 *   8GB:   92681k
    907	 *  16GB:  131072k
    908	 *
    909	 * This allows larger machines to have larger/more transfers.
    910	 * Limit the default to 256M
    911	 */
    912	congestion_kb = (16*int_sqrt(totalram_pages())) << (PAGE_SHIFT-10);
    913	if (congestion_kb > 256*1024)
    914		congestion_kb = 256*1024;
    915
    916	return congestion_kb;
    917}
    918
    919
    920/* super.c */
    921extern int ceph_force_reconnect(struct super_block *sb);
    922/* snap.c */
    923struct ceph_snap_realm *ceph_lookup_snap_realm(struct ceph_mds_client *mdsc,
    924					       u64 ino);
    925extern void ceph_get_snap_realm(struct ceph_mds_client *mdsc,
    926				struct ceph_snap_realm *realm);
    927extern void ceph_put_snap_realm(struct ceph_mds_client *mdsc,
    928				struct ceph_snap_realm *realm);
    929extern int ceph_update_snap_trace(struct ceph_mds_client *m,
    930				  void *p, void *e, bool deletion,
    931				  struct ceph_snap_realm **realm_ret);
    932void ceph_change_snap_realm(struct inode *inode, struct ceph_snap_realm *realm);
    933extern void ceph_handle_snap(struct ceph_mds_client *mdsc,
    934			     struct ceph_mds_session *session,
    935			     struct ceph_msg *msg);
    936extern int __ceph_finish_cap_snap(struct ceph_inode_info *ci,
    937				  struct ceph_cap_snap *capsnap);
    938extern void ceph_cleanup_global_and_empty_realms(struct ceph_mds_client *mdsc);
    939
    940extern struct ceph_snapid_map *ceph_get_snapid_map(struct ceph_mds_client *mdsc,
    941						   u64 snap);
    942extern void ceph_put_snapid_map(struct ceph_mds_client* mdsc,
    943				struct ceph_snapid_map *sm);
    944extern void ceph_trim_snapid_map(struct ceph_mds_client *mdsc);
    945extern void ceph_cleanup_snapid_map(struct ceph_mds_client *mdsc);
    946void ceph_umount_begin(struct super_block *sb);
    947
    948
    949/*
    950 * a cap_snap is "pending" if it is still awaiting an in-progress
    951 * sync write (that may/may not still update size, mtime, etc.).
    952 */
    953static inline bool __ceph_have_pending_cap_snap(struct ceph_inode_info *ci)
    954{
    955	return !list_empty(&ci->i_cap_snaps) &&
    956	       list_last_entry(&ci->i_cap_snaps, struct ceph_cap_snap,
    957			       ci_item)->writing;
    958}
    959
    960/* inode.c */
    961struct ceph_mds_reply_info_in;
    962struct ceph_mds_reply_dirfrag;
    963
    964extern const struct inode_operations ceph_file_iops;
    965
    966extern struct inode *ceph_alloc_inode(struct super_block *sb);
    967extern void ceph_evict_inode(struct inode *inode);
    968extern void ceph_free_inode(struct inode *inode);
    969
    970extern struct inode *ceph_get_inode(struct super_block *sb,
    971				    struct ceph_vino vino);
    972extern struct inode *ceph_get_snapdir(struct inode *parent);
    973extern int ceph_fill_file_size(struct inode *inode, int issued,
    974			       u32 truncate_seq, u64 truncate_size, u64 size);
    975extern void ceph_fill_file_time(struct inode *inode, int issued,
    976				u64 time_warp_seq, struct timespec64 *ctime,
    977				struct timespec64 *mtime,
    978				struct timespec64 *atime);
    979extern int ceph_fill_inode(struct inode *inode, struct page *locked_page,
    980		    struct ceph_mds_reply_info_in *iinfo,
    981		    struct ceph_mds_reply_dirfrag *dirinfo,
    982		    struct ceph_mds_session *session, int cap_fmode,
    983		    struct ceph_cap_reservation *caps_reservation);
    984extern int ceph_fill_trace(struct super_block *sb,
    985			   struct ceph_mds_request *req);
    986extern int ceph_readdir_prepopulate(struct ceph_mds_request *req,
    987				    struct ceph_mds_session *session);
    988
    989extern int ceph_inode_holds_cap(struct inode *inode, int mask);
    990
    991extern bool ceph_inode_set_size(struct inode *inode, loff_t size);
    992extern void __ceph_do_pending_vmtruncate(struct inode *inode);
    993
    994void ceph_queue_inode_work(struct inode *inode, int work_bit);
    995
    996static inline void ceph_queue_vmtruncate(struct inode *inode)
    997{
    998	ceph_queue_inode_work(inode, CEPH_I_WORK_VMTRUNCATE);
    999}
   1000
   1001static inline void ceph_queue_invalidate(struct inode *inode)
   1002{
   1003	ceph_queue_inode_work(inode, CEPH_I_WORK_INVALIDATE_PAGES);
   1004}
   1005
   1006static inline void ceph_queue_writeback(struct inode *inode)
   1007{
   1008	ceph_queue_inode_work(inode, CEPH_I_WORK_WRITEBACK);
   1009}
   1010
   1011static inline void ceph_queue_check_caps(struct inode *inode)
   1012{
   1013	ceph_queue_inode_work(inode, CEPH_I_WORK_CHECK_CAPS);
   1014}
   1015
   1016static inline void ceph_queue_flush_snaps(struct inode *inode)
   1017{
   1018	ceph_queue_inode_work(inode, CEPH_I_WORK_FLUSH_SNAPS);
   1019}
   1020
   1021extern int ceph_try_to_choose_auth_mds(struct inode *inode, int mask);
   1022extern int __ceph_do_getattr(struct inode *inode, struct page *locked_page,
   1023			     int mask, bool force);
   1024static inline int ceph_do_getattr(struct inode *inode, int mask, bool force)
   1025{
   1026	return __ceph_do_getattr(inode, NULL, mask, force);
   1027}
   1028extern int ceph_permission(struct user_namespace *mnt_userns,
   1029			   struct inode *inode, int mask);
   1030extern int __ceph_setattr(struct inode *inode, struct iattr *attr);
   1031extern int ceph_setattr(struct user_namespace *mnt_userns,
   1032			struct dentry *dentry, struct iattr *attr);
   1033extern int ceph_getattr(struct user_namespace *mnt_userns,
   1034			const struct path *path, struct kstat *stat,
   1035			u32 request_mask, unsigned int flags);
   1036void ceph_inode_shutdown(struct inode *inode);
   1037
   1038static inline bool ceph_inode_is_shutdown(struct inode *inode)
   1039{
   1040	unsigned long flags = READ_ONCE(ceph_inode(inode)->i_ceph_flags);
   1041	struct ceph_fs_client *fsc = ceph_inode_to_client(inode);
   1042	int state = READ_ONCE(fsc->mount_state);
   1043
   1044	return (flags & CEPH_I_SHUTDOWN) || state >= CEPH_MOUNT_SHUTDOWN;
   1045}
   1046
   1047/* xattr.c */
   1048int __ceph_setxattr(struct inode *, const char *, const void *, size_t, int);
   1049int ceph_do_getvxattr(struct inode *inode, const char *name, void *value, size_t size);
   1050ssize_t __ceph_getxattr(struct inode *, const char *, void *, size_t);
   1051extern ssize_t ceph_listxattr(struct dentry *, char *, size_t);
   1052extern struct ceph_buffer *__ceph_build_xattrs_blob(struct ceph_inode_info *ci);
   1053extern void __ceph_destroy_xattrs(struct ceph_inode_info *ci);
   1054extern const struct xattr_handler *ceph_xattr_handlers[];
   1055
   1056struct ceph_acl_sec_ctx {
   1057#ifdef CONFIG_CEPH_FS_POSIX_ACL
   1058	void *default_acl;
   1059	void *acl;
   1060#endif
   1061#ifdef CONFIG_CEPH_FS_SECURITY_LABEL
   1062	void *sec_ctx;
   1063	u32 sec_ctxlen;
   1064#endif
   1065	struct ceph_pagelist *pagelist;
   1066};
   1067
   1068#ifdef CONFIG_SECURITY
   1069extern bool ceph_security_xattr_deadlock(struct inode *in);
   1070extern bool ceph_security_xattr_wanted(struct inode *in);
   1071#else
   1072static inline bool ceph_security_xattr_deadlock(struct inode *in)
   1073{
   1074	return false;
   1075}
   1076static inline bool ceph_security_xattr_wanted(struct inode *in)
   1077{
   1078	return false;
   1079}
   1080#endif
   1081
   1082#ifdef CONFIG_CEPH_FS_SECURITY_LABEL
   1083extern int ceph_security_init_secctx(struct dentry *dentry, umode_t mode,
   1084				     struct ceph_acl_sec_ctx *ctx);
   1085static inline void ceph_security_invalidate_secctx(struct inode *inode)
   1086{
   1087	security_inode_invalidate_secctx(inode);
   1088}
   1089#else
   1090static inline int ceph_security_init_secctx(struct dentry *dentry, umode_t mode,
   1091					    struct ceph_acl_sec_ctx *ctx)
   1092{
   1093	return 0;
   1094}
   1095static inline void ceph_security_invalidate_secctx(struct inode *inode)
   1096{
   1097}
   1098#endif
   1099
   1100void ceph_release_acl_sec_ctx(struct ceph_acl_sec_ctx *as_ctx);
   1101
   1102/* acl.c */
   1103#ifdef CONFIG_CEPH_FS_POSIX_ACL
   1104
   1105struct posix_acl *ceph_get_acl(struct inode *, int, bool);
   1106int ceph_set_acl(struct user_namespace *mnt_userns,
   1107		 struct inode *inode, struct posix_acl *acl, int type);
   1108int ceph_pre_init_acls(struct inode *dir, umode_t *mode,
   1109		       struct ceph_acl_sec_ctx *as_ctx);
   1110void ceph_init_inode_acls(struct inode *inode,
   1111			  struct ceph_acl_sec_ctx *as_ctx);
   1112
   1113static inline void ceph_forget_all_cached_acls(struct inode *inode)
   1114{
   1115       forget_all_cached_acls(inode);
   1116}
   1117
   1118#else
   1119
   1120#define ceph_get_acl NULL
   1121#define ceph_set_acl NULL
   1122
   1123static inline int ceph_pre_init_acls(struct inode *dir, umode_t *mode,
   1124				     struct ceph_acl_sec_ctx *as_ctx)
   1125{
   1126	return 0;
   1127}
   1128static inline void ceph_init_inode_acls(struct inode *inode,
   1129					struct ceph_acl_sec_ctx *as_ctx)
   1130{
   1131}
   1132static inline int ceph_acl_chmod(struct dentry *dentry, struct inode *inode)
   1133{
   1134	return 0;
   1135}
   1136
   1137static inline void ceph_forget_all_cached_acls(struct inode *inode)
   1138{
   1139}
   1140
   1141#endif
   1142
   1143/* caps.c */
   1144extern const char *ceph_cap_string(int c);
   1145extern void ceph_handle_caps(struct ceph_mds_session *session,
   1146			     struct ceph_msg *msg);
   1147extern struct ceph_cap *ceph_get_cap(struct ceph_mds_client *mdsc,
   1148				     struct ceph_cap_reservation *ctx);
   1149extern void ceph_add_cap(struct inode *inode,
   1150			 struct ceph_mds_session *session, u64 cap_id,
   1151			 unsigned issued, unsigned wanted,
   1152			 unsigned cap, unsigned seq, u64 realmino, int flags,
   1153			 struct ceph_cap **new_cap);
   1154extern void __ceph_remove_cap(struct ceph_cap *cap, bool queue_release);
   1155extern void ceph_remove_cap(struct ceph_cap *cap, bool queue_release);
   1156extern void __ceph_remove_caps(struct ceph_inode_info *ci);
   1157extern void ceph_put_cap(struct ceph_mds_client *mdsc,
   1158			 struct ceph_cap *cap);
   1159extern int ceph_is_any_caps(struct inode *inode);
   1160
   1161extern int ceph_write_inode(struct inode *inode, struct writeback_control *wbc);
   1162extern int ceph_fsync(struct file *file, loff_t start, loff_t end,
   1163		      int datasync);
   1164extern void ceph_early_kick_flushing_caps(struct ceph_mds_client *mdsc,
   1165					  struct ceph_mds_session *session);
   1166extern void ceph_kick_flushing_caps(struct ceph_mds_client *mdsc,
   1167				    struct ceph_mds_session *session);
   1168void ceph_kick_flushing_inode_caps(struct ceph_mds_session *session,
   1169				   struct ceph_inode_info *ci);
   1170extern struct ceph_cap *ceph_get_cap_for_mds(struct ceph_inode_info *ci,
   1171					     int mds);
   1172extern void ceph_take_cap_refs(struct ceph_inode_info *ci, int caps,
   1173				bool snap_rwsem_locked);
   1174extern void ceph_get_cap_refs(struct ceph_inode_info *ci, int caps);
   1175extern void ceph_put_cap_refs(struct ceph_inode_info *ci, int had);
   1176extern void ceph_put_cap_refs_async(struct ceph_inode_info *ci, int had);
   1177extern void ceph_put_cap_refs_no_check_caps(struct ceph_inode_info *ci,
   1178					    int had);
   1179extern void ceph_put_wrbuffer_cap_refs(struct ceph_inode_info *ci, int nr,
   1180				       struct ceph_snap_context *snapc);
   1181extern void __ceph_remove_capsnap(struct inode *inode,
   1182				  struct ceph_cap_snap *capsnap,
   1183				  bool *wake_ci, bool *wake_mdsc);
   1184extern void ceph_remove_capsnap(struct inode *inode,
   1185				struct ceph_cap_snap *capsnap,
   1186				bool *wake_ci, bool *wake_mdsc);
   1187extern void ceph_flush_snaps(struct ceph_inode_info *ci,
   1188			     struct ceph_mds_session **psession);
   1189extern bool __ceph_should_report_size(struct ceph_inode_info *ci);
   1190extern void ceph_check_caps(struct ceph_inode_info *ci, int flags,
   1191			    struct ceph_mds_session *session);
   1192extern unsigned long ceph_check_delayed_caps(struct ceph_mds_client *mdsc);
   1193extern void ceph_flush_dirty_caps(struct ceph_mds_client *mdsc);
   1194extern int  ceph_drop_caps_for_unlink(struct inode *inode);
   1195extern int ceph_encode_inode_release(void **p, struct inode *inode,
   1196				     int mds, int drop, int unless, int force);
   1197extern int ceph_encode_dentry_release(void **p, struct dentry *dn,
   1198				      struct inode *dir,
   1199				      int mds, int drop, int unless);
   1200
   1201extern int ceph_get_caps(struct file *filp, int need, int want,
   1202			 loff_t endoff, int *got);
   1203extern int ceph_try_get_caps(struct inode *inode,
   1204			     int need, int want, bool nonblock, int *got);
   1205
   1206/* for counting open files by mode */
   1207extern void ceph_get_fmode(struct ceph_inode_info *ci, int mode, int count);
   1208extern void ceph_put_fmode(struct ceph_inode_info *ci, int mode, int count);
   1209extern void __ceph_touch_fmode(struct ceph_inode_info *ci,
   1210			       struct ceph_mds_client *mdsc, int fmode);
   1211
   1212/* addr.c */
   1213extern const struct address_space_operations ceph_aops;
   1214extern const struct netfs_request_ops ceph_netfs_ops;
   1215extern int ceph_mmap(struct file *file, struct vm_area_struct *vma);
   1216extern int ceph_uninline_data(struct file *file);
   1217extern int ceph_pool_perm_check(struct inode *inode, int need);
   1218extern void ceph_pool_perm_destroy(struct ceph_mds_client* mdsc);
   1219int ceph_purge_inode_cap(struct inode *inode, struct ceph_cap *cap, bool *invalidate);
   1220
   1221/* file.c */
   1222extern const struct file_operations ceph_file_fops;
   1223
   1224extern int ceph_renew_caps(struct inode *inode, int fmode);
   1225extern int ceph_open(struct inode *inode, struct file *file);
   1226extern int ceph_atomic_open(struct inode *dir, struct dentry *dentry,
   1227			    struct file *file, unsigned flags, umode_t mode);
   1228extern int ceph_release(struct inode *inode, struct file *filp);
   1229extern void ceph_fill_inline_data(struct inode *inode, struct page *locked_page,
   1230				  char *data, size_t len);
   1231
   1232/* dir.c */
   1233extern const struct file_operations ceph_dir_fops;
   1234extern const struct file_operations ceph_snapdir_fops;
   1235extern const struct inode_operations ceph_dir_iops;
   1236extern const struct inode_operations ceph_snapdir_iops;
   1237extern const struct dentry_operations ceph_dentry_ops;
   1238
   1239extern loff_t ceph_make_fpos(unsigned high, unsigned off, bool hash_order);
   1240extern int ceph_handle_notrace_create(struct inode *dir, struct dentry *dentry);
   1241extern struct dentry *ceph_handle_snapdir(struct ceph_mds_request *req,
   1242			       struct dentry *dentry);
   1243extern struct dentry *ceph_finish_lookup(struct ceph_mds_request *req,
   1244					 struct dentry *dentry, int err);
   1245
   1246extern void __ceph_dentry_lease_touch(struct ceph_dentry_info *di);
   1247extern void __ceph_dentry_dir_lease_touch(struct ceph_dentry_info *di);
   1248extern void ceph_invalidate_dentry_lease(struct dentry *dentry);
   1249extern int ceph_trim_dentries(struct ceph_mds_client *mdsc);
   1250extern unsigned ceph_dentry_hash(struct inode *dir, struct dentry *dn);
   1251extern void ceph_readdir_cache_release(struct ceph_readdir_cache_control *ctl);
   1252
   1253/* ioctl.c */
   1254extern long ceph_ioctl(struct file *file, unsigned int cmd, unsigned long arg);
   1255
   1256/* export.c */
   1257extern const struct export_operations ceph_export_ops;
   1258struct inode *ceph_lookup_inode(struct super_block *sb, u64 ino);
   1259
   1260/* locks.c */
   1261extern __init void ceph_flock_init(void);
   1262extern int ceph_lock(struct file *file, int cmd, struct file_lock *fl);
   1263extern int ceph_flock(struct file *file, int cmd, struct file_lock *fl);
   1264extern void ceph_count_locks(struct inode *inode, int *p_num, int *f_num);
   1265extern int ceph_encode_locks_to_buffer(struct inode *inode,
   1266				       struct ceph_filelock *flocks,
   1267				       int num_fcntl_locks,
   1268				       int num_flock_locks);
   1269extern int ceph_locks_to_pagelist(struct ceph_filelock *flocks,
   1270				  struct ceph_pagelist *pagelist,
   1271				  int num_fcntl_locks, int num_flock_locks);
   1272
   1273/* debugfs.c */
   1274extern void ceph_fs_debugfs_init(struct ceph_fs_client *client);
   1275extern void ceph_fs_debugfs_cleanup(struct ceph_fs_client *client);
   1276
   1277/* quota.c */
   1278
   1279enum quota_get_realm {
   1280	QUOTA_GET_MAX_FILES,
   1281	QUOTA_GET_MAX_BYTES,
   1282	QUOTA_GET_ANY
   1283};
   1284
   1285static inline bool __ceph_has_quota(struct ceph_inode_info *ci,
   1286				    enum quota_get_realm which)
   1287{
   1288	bool has_quota = false;
   1289
   1290	switch (which) {
   1291	case QUOTA_GET_MAX_BYTES:
   1292		has_quota = !!ci->i_max_bytes;
   1293		break;
   1294	case QUOTA_GET_MAX_FILES:
   1295		has_quota = !!ci->i_max_files;
   1296		break;
   1297	default:
   1298		has_quota = !!(ci->i_max_files || ci->i_max_bytes);
   1299	}
   1300	return has_quota;
   1301}
   1302
   1303extern void ceph_adjust_quota_realms_count(struct inode *inode, bool inc);
   1304
   1305static inline void __ceph_update_quota(struct ceph_inode_info *ci,
   1306				       u64 max_bytes, u64 max_files)
   1307{
   1308	bool had_quota, has_quota;
   1309	had_quota = __ceph_has_quota(ci, QUOTA_GET_ANY);
   1310	ci->i_max_bytes = max_bytes;
   1311	ci->i_max_files = max_files;
   1312	has_quota = __ceph_has_quota(ci, QUOTA_GET_ANY);
   1313
   1314	if (had_quota != has_quota)
   1315		ceph_adjust_quota_realms_count(&ci->netfs.inode, has_quota);
   1316}
   1317
   1318extern void ceph_handle_quota(struct ceph_mds_client *mdsc,
   1319			      struct ceph_mds_session *session,
   1320			      struct ceph_msg *msg);
   1321extern bool ceph_quota_is_max_files_exceeded(struct inode *inode);
   1322extern bool ceph_quota_is_same_realm(struct inode *old, struct inode *new);
   1323extern bool ceph_quota_is_max_bytes_exceeded(struct inode *inode,
   1324					     loff_t newlen);
   1325extern bool ceph_quota_is_max_bytes_approaching(struct inode *inode,
   1326						loff_t newlen);
   1327extern bool ceph_quota_update_statfs(struct ceph_fs_client *fsc,
   1328				     struct kstatfs *buf);
   1329extern void ceph_cleanup_quotarealms_inodes(struct ceph_mds_client *mdsc);
   1330
   1331#endif /* _FS_CEPH_SUPER_H */