cachepc-linux

Fork of AMDESE/linux with modifications for CachePC side-channel attack
git clone https://git.sinitax.com/sinitax/cachepc-linux
Log | Files | Refs | README | LICENSE | sfeed.txt

osdmap.h (9274B)


      1/* SPDX-License-Identifier: GPL-2.0 */
      2#ifndef _FS_CEPH_OSDMAP_H
      3#define _FS_CEPH_OSDMAP_H
      4
      5#include <linux/rbtree.h>
      6#include <linux/ceph/types.h>
      7#include <linux/ceph/decode.h>
      8#include <linux/crush/crush.h>
      9
     10/*
     11 * The osd map describes the current membership of the osd cluster and
     12 * specifies the mapping of objects to placement groups and placement
     13 * groups to (sets of) osds.  That is, it completely specifies the
     14 * (desired) distribution of all data objects in the system at some
     15 * point in time.
     16 *
     17 * Each map version is identified by an epoch, which increases monotonically.
     18 *
     19 * The map can be updated either via an incremental map (diff) describing
     20 * the change between two successive epochs, or as a fully encoded map.
     21 */
     22struct ceph_pg {
     23	uint64_t pool;
     24	uint32_t seed;
     25};
     26
     27#define CEPH_SPG_NOSHARD	-1
     28
     29struct ceph_spg {
     30	struct ceph_pg pgid;
     31	s8 shard;
     32};
     33
     34int ceph_pg_compare(const struct ceph_pg *lhs, const struct ceph_pg *rhs);
     35int ceph_spg_compare(const struct ceph_spg *lhs, const struct ceph_spg *rhs);
     36
     37#define CEPH_POOL_FLAG_HASHPSPOOL	(1ULL << 0) /* hash pg seed and pool id
     38						       together */
     39#define CEPH_POOL_FLAG_FULL		(1ULL << 1) /* pool is full */
     40#define CEPH_POOL_FLAG_FULL_QUOTA	(1ULL << 10) /* pool ran out of quota,
     41							will set FULL too */
     42#define CEPH_POOL_FLAG_NEARFULL		(1ULL << 11) /* pool is nearfull */
     43
     44struct ceph_pg_pool_info {
     45	struct rb_node node;
     46	s64 id;
     47	u8 type; /* CEPH_POOL_TYPE_* */
     48	u8 size;
     49	u8 min_size;
     50	u8 crush_ruleset;
     51	u8 object_hash;
     52	u32 last_force_request_resend;
     53	u32 pg_num, pgp_num;
     54	int pg_num_mask, pgp_num_mask;
     55	s64 read_tier;
     56	s64 write_tier; /* wins for read+write ops */
     57	u64 flags; /* CEPH_POOL_FLAG_* */
     58	char *name;
     59
     60	bool was_full;  /* for handle_one_map() */
     61};
     62
     63static inline bool ceph_can_shift_osds(struct ceph_pg_pool_info *pool)
     64{
     65	switch (pool->type) {
     66	case CEPH_POOL_TYPE_REP:
     67		return true;
     68	case CEPH_POOL_TYPE_EC:
     69		return false;
     70	default:
     71		BUG();
     72	}
     73}
     74
     75struct ceph_object_locator {
     76	s64 pool;
     77	struct ceph_string *pool_ns;
     78};
     79
     80static inline void ceph_oloc_init(struct ceph_object_locator *oloc)
     81{
     82	oloc->pool = -1;
     83	oloc->pool_ns = NULL;
     84}
     85
     86static inline bool ceph_oloc_empty(const struct ceph_object_locator *oloc)
     87{
     88	return oloc->pool == -1;
     89}
     90
     91void ceph_oloc_copy(struct ceph_object_locator *dest,
     92		    const struct ceph_object_locator *src);
     93void ceph_oloc_destroy(struct ceph_object_locator *oloc);
     94
     95/*
     96 * 51-char inline_name is long enough for all cephfs and all but one
     97 * rbd requests: <imgname> in "<imgname>.rbd"/"rbd_id.<imgname>" can be
     98 * arbitrarily long (~PAGE_SIZE).  It's done once during rbd map; all
     99 * other rbd requests fit into inline_name.
    100 *
    101 * Makes ceph_object_id 64 bytes on 64-bit.
    102 */
    103#define CEPH_OID_INLINE_LEN 52
    104
    105/*
    106 * Both inline and external buffers have space for a NUL-terminator,
    107 * which is carried around.  It's not required though - RADOS object
    108 * names don't have to be NUL-terminated and may contain NULs.
    109 */
    110struct ceph_object_id {
    111	char *name;
    112	char inline_name[CEPH_OID_INLINE_LEN];
    113	int name_len;
    114};
    115
    116#define __CEPH_OID_INITIALIZER(oid) { .name = (oid).inline_name }
    117
    118#define CEPH_DEFINE_OID_ONSTACK(oid)				\
    119	struct ceph_object_id oid = __CEPH_OID_INITIALIZER(oid)
    120
    121static inline void ceph_oid_init(struct ceph_object_id *oid)
    122{
    123	*oid = (struct ceph_object_id) __CEPH_OID_INITIALIZER(*oid);
    124}
    125
    126static inline bool ceph_oid_empty(const struct ceph_object_id *oid)
    127{
    128	return oid->name == oid->inline_name && !oid->name_len;
    129}
    130
    131void ceph_oid_copy(struct ceph_object_id *dest,
    132		   const struct ceph_object_id *src);
    133__printf(2, 3)
    134void ceph_oid_printf(struct ceph_object_id *oid, const char *fmt, ...);
    135__printf(3, 4)
    136int ceph_oid_aprintf(struct ceph_object_id *oid, gfp_t gfp,
    137		     const char *fmt, ...);
    138void ceph_oid_destroy(struct ceph_object_id *oid);
    139
    140struct workspace_manager {
    141	struct list_head idle_ws;
    142	spinlock_t ws_lock;
    143	/* Number of free workspaces */
    144	int free_ws;
    145	/* Total number of allocated workspaces */
    146	atomic_t total_ws;
    147	/* Waiters for a free workspace */
    148	wait_queue_head_t ws_wait;
    149};
    150
    151struct ceph_pg_mapping {
    152	struct rb_node node;
    153	struct ceph_pg pgid;
    154
    155	union {
    156		struct {
    157			int len;
    158			int osds[];
    159		} pg_temp, pg_upmap;
    160		struct {
    161			int osd;
    162		} primary_temp;
    163		struct {
    164			int len;
    165			int from_to[][2];
    166		} pg_upmap_items;
    167	};
    168};
    169
    170struct ceph_osdmap {
    171	struct ceph_fsid fsid;
    172	u32 epoch;
    173	struct ceph_timespec created, modified;
    174
    175	u32 flags;         /* CEPH_OSDMAP_* */
    176
    177	u32 max_osd;       /* size of osd_state, _offload, _addr arrays */
    178	u32 *osd_state;    /* CEPH_OSD_* */
    179	u32 *osd_weight;   /* 0 = failed, 0x10000 = 100% normal */
    180	struct ceph_entity_addr *osd_addr;
    181
    182	struct rb_root pg_temp;
    183	struct rb_root primary_temp;
    184
    185	/* remap (post-CRUSH, pre-up) */
    186	struct rb_root pg_upmap;	/* PG := raw set */
    187	struct rb_root pg_upmap_items;	/* from -> to within raw set */
    188
    189	u32 *osd_primary_affinity;
    190
    191	struct rb_root pg_pools;
    192	u32 pool_max;
    193
    194	/* the CRUSH map specifies the mapping of placement groups to
    195	 * the list of osds that store+replicate them. */
    196	struct crush_map *crush;
    197
    198	struct workspace_manager crush_wsm;
    199};
    200
    201static inline bool ceph_osd_exists(struct ceph_osdmap *map, int osd)
    202{
    203	return osd >= 0 && osd < map->max_osd &&
    204	       (map->osd_state[osd] & CEPH_OSD_EXISTS);
    205}
    206
    207static inline bool ceph_osd_is_up(struct ceph_osdmap *map, int osd)
    208{
    209	return ceph_osd_exists(map, osd) &&
    210	       (map->osd_state[osd] & CEPH_OSD_UP);
    211}
    212
    213static inline bool ceph_osd_is_down(struct ceph_osdmap *map, int osd)
    214{
    215	return !ceph_osd_is_up(map, osd);
    216}
    217
    218char *ceph_osdmap_state_str(char *str, int len, u32 state);
    219extern u32 ceph_get_primary_affinity(struct ceph_osdmap *map, int osd);
    220
    221static inline struct ceph_entity_addr *ceph_osd_addr(struct ceph_osdmap *map,
    222						     int osd)
    223{
    224	if (osd >= map->max_osd)
    225		return NULL;
    226	return &map->osd_addr[osd];
    227}
    228
    229#define CEPH_PGID_ENCODING_LEN		(1 + 8 + 4 + 4)
    230
    231static inline int ceph_decode_pgid(void **p, void *end, struct ceph_pg *pgid)
    232{
    233	__u8 version;
    234
    235	if (!ceph_has_room(p, end, CEPH_PGID_ENCODING_LEN)) {
    236		pr_warn("incomplete pg encoding\n");
    237		return -EINVAL;
    238	}
    239	version = ceph_decode_8(p);
    240	if (version > 1) {
    241		pr_warn("do not understand pg encoding %d > 1\n",
    242			(int)version);
    243		return -EINVAL;
    244	}
    245
    246	pgid->pool = ceph_decode_64(p);
    247	pgid->seed = ceph_decode_32(p);
    248	*p += 4;	/* skip deprecated preferred value */
    249
    250	return 0;
    251}
    252
    253struct ceph_osdmap *ceph_osdmap_alloc(void);
    254struct ceph_osdmap *ceph_osdmap_decode(void **p, void *end, bool msgr2);
    255struct ceph_osdmap *osdmap_apply_incremental(void **p, void *end, bool msgr2,
    256					     struct ceph_osdmap *map);
    257extern void ceph_osdmap_destroy(struct ceph_osdmap *map);
    258
    259struct ceph_osds {
    260	int osds[CEPH_PG_MAX_SIZE];
    261	int size;
    262	int primary; /* id, NOT index */
    263};
    264
    265static inline void ceph_osds_init(struct ceph_osds *set)
    266{
    267	set->size = 0;
    268	set->primary = -1;
    269}
    270
    271void ceph_osds_copy(struct ceph_osds *dest, const struct ceph_osds *src);
    272
    273bool ceph_pg_is_split(const struct ceph_pg *pgid, u32 old_pg_num,
    274		      u32 new_pg_num);
    275bool ceph_is_new_interval(const struct ceph_osds *old_acting,
    276			  const struct ceph_osds *new_acting,
    277			  const struct ceph_osds *old_up,
    278			  const struct ceph_osds *new_up,
    279			  int old_size,
    280			  int new_size,
    281			  int old_min_size,
    282			  int new_min_size,
    283			  u32 old_pg_num,
    284			  u32 new_pg_num,
    285			  bool old_sort_bitwise,
    286			  bool new_sort_bitwise,
    287			  bool old_recovery_deletes,
    288			  bool new_recovery_deletes,
    289			  const struct ceph_pg *pgid);
    290bool ceph_osds_changed(const struct ceph_osds *old_acting,
    291		       const struct ceph_osds *new_acting,
    292		       bool any_change);
    293
    294void __ceph_object_locator_to_pg(struct ceph_pg_pool_info *pi,
    295				 const struct ceph_object_id *oid,
    296				 const struct ceph_object_locator *oloc,
    297				 struct ceph_pg *raw_pgid);
    298int ceph_object_locator_to_pg(struct ceph_osdmap *osdmap,
    299			      const struct ceph_object_id *oid,
    300			      const struct ceph_object_locator *oloc,
    301			      struct ceph_pg *raw_pgid);
    302
    303void ceph_pg_to_up_acting_osds(struct ceph_osdmap *osdmap,
    304			       struct ceph_pg_pool_info *pi,
    305			       const struct ceph_pg *raw_pgid,
    306			       struct ceph_osds *up,
    307			       struct ceph_osds *acting);
    308bool ceph_pg_to_primary_shard(struct ceph_osdmap *osdmap,
    309			      struct ceph_pg_pool_info *pi,
    310			      const struct ceph_pg *raw_pgid,
    311			      struct ceph_spg *spgid);
    312int ceph_pg_to_acting_primary(struct ceph_osdmap *osdmap,
    313			      const struct ceph_pg *raw_pgid);
    314
    315struct crush_loc {
    316	char *cl_type_name;
    317	char *cl_name;
    318};
    319
    320struct crush_loc_node {
    321	struct rb_node cl_node;
    322	struct crush_loc cl_loc;  /* pointers into cl_data */
    323	char cl_data[];
    324};
    325
    326int ceph_parse_crush_location(char *crush_location, struct rb_root *locs);
    327int ceph_compare_crush_locs(struct rb_root *locs1, struct rb_root *locs2);
    328void ceph_clear_crush_locs(struct rb_root *locs);
    329
    330int ceph_get_crush_locality(struct ceph_osdmap *osdmap, int id,
    331			    struct rb_root *locs);
    332
    333extern struct ceph_pg_pool_info *ceph_pg_pool_by_id(struct ceph_osdmap *map,
    334						    u64 id);
    335extern const char *ceph_pg_pool_name_by_id(struct ceph_osdmap *map, u64 id);
    336extern int ceph_pg_poolid_by_name(struct ceph_osdmap *map, const char *name);
    337u64 ceph_pg_pool_flags(struct ceph_osdmap *map, u64 id);
    338
    339#endif