cachepc-linux

Fork of AMDESE/linux with modifications for CachePC side-channel attack
git clone https://git.sinitax.com/sinitax/cachepc-linux
Log | Files | Refs | README | LICENSE | sfeed.txt

blk_types.h (16382B)


      1/* SPDX-License-Identifier: GPL-2.0 */
      2/*
      3 * Block data types and constants.  Directly include this file only to
      4 * break include dependency loop.
      5 */
      6#ifndef __LINUX_BLK_TYPES_H
      7#define __LINUX_BLK_TYPES_H
      8
      9#include <linux/types.h>
     10#include <linux/bvec.h>
     11#include <linux/device.h>
     12#include <linux/ktime.h>
     13
     14struct bio_set;
     15struct bio;
     16struct bio_integrity_payload;
     17struct page;
     18struct io_context;
     19struct cgroup_subsys_state;
     20typedef void (bio_end_io_t) (struct bio *);
     21struct bio_crypt_ctx;
     22
     23/*
     24 * The basic unit of block I/O is a sector. It is used in a number of contexts
     25 * in Linux (blk, bio, genhd). The size of one sector is 512 = 2**9
     26 * bytes. Variables of type sector_t represent an offset or size that is a
     27 * multiple of 512 bytes. Hence these two constants.
     28 */
     29#ifndef SECTOR_SHIFT
     30#define SECTOR_SHIFT 9
     31#endif
     32#ifndef SECTOR_SIZE
     33#define SECTOR_SIZE (1 << SECTOR_SHIFT)
     34#endif
     35
     36#define PAGE_SECTORS_SHIFT	(PAGE_SHIFT - SECTOR_SHIFT)
     37#define PAGE_SECTORS		(1 << PAGE_SECTORS_SHIFT)
     38#define SECTOR_MASK		(PAGE_SECTORS - 1)
     39
     40struct block_device {
     41	sector_t		bd_start_sect;
     42	sector_t		bd_nr_sectors;
     43	struct disk_stats __percpu *bd_stats;
     44	unsigned long		bd_stamp;
     45	bool			bd_read_only;	/* read-only policy */
     46	dev_t			bd_dev;
     47	atomic_t		bd_openers;
     48	struct inode *		bd_inode;	/* will die */
     49	struct super_block *	bd_super;
     50	void *			bd_claiming;
     51	struct device		bd_device;
     52	void *			bd_holder;
     53	int			bd_holders;
     54	bool			bd_write_holder;
     55	struct kobject		*bd_holder_dir;
     56	u8			bd_partno;
     57	spinlock_t		bd_size_lock; /* for bd_inode->i_size updates */
     58	struct gendisk *	bd_disk;
     59	struct request_queue *	bd_queue;
     60
     61	/* The counter of freeze processes */
     62	int			bd_fsfreeze_count;
     63	/* Mutex for freeze */
     64	struct mutex		bd_fsfreeze_mutex;
     65	struct super_block	*bd_fsfreeze_sb;
     66
     67	struct partition_meta_info *bd_meta_info;
     68#ifdef CONFIG_FAIL_MAKE_REQUEST
     69	bool			bd_make_it_fail;
     70#endif
     71} __randomize_layout;
     72
     73#define bdev_whole(_bdev) \
     74	((_bdev)->bd_disk->part0)
     75
     76#define dev_to_bdev(device) \
     77	container_of((device), struct block_device, bd_device)
     78
     79#define bdev_kobj(_bdev) \
     80	(&((_bdev)->bd_device.kobj))
     81
     82/*
     83 * Block error status values.  See block/blk-core:blk_errors for the details.
     84 * Alpha cannot write a byte atomically, so we need to use 32-bit value.
     85 */
     86#if defined(CONFIG_ALPHA) && !defined(__alpha_bwx__)
     87typedef u32 __bitwise blk_status_t;
     88typedef u32 blk_short_t;
     89#else
     90typedef u8 __bitwise blk_status_t;
     91typedef u16 blk_short_t;
     92#endif
     93#define	BLK_STS_OK 0
     94#define BLK_STS_NOTSUPP		((__force blk_status_t)1)
     95#define BLK_STS_TIMEOUT		((__force blk_status_t)2)
     96#define BLK_STS_NOSPC		((__force blk_status_t)3)
     97#define BLK_STS_TRANSPORT	((__force blk_status_t)4)
     98#define BLK_STS_TARGET		((__force blk_status_t)5)
     99#define BLK_STS_NEXUS		((__force blk_status_t)6)
    100#define BLK_STS_MEDIUM		((__force blk_status_t)7)
    101#define BLK_STS_PROTECTION	((__force blk_status_t)8)
    102#define BLK_STS_RESOURCE	((__force blk_status_t)9)
    103#define BLK_STS_IOERR		((__force blk_status_t)10)
    104
    105/* hack for device mapper, don't use elsewhere: */
    106#define BLK_STS_DM_REQUEUE    ((__force blk_status_t)11)
    107
    108/*
    109 * BLK_STS_AGAIN should only be returned if RQF_NOWAIT is set
    110 * and the bio would block (cf bio_wouldblock_error())
    111 */
    112#define BLK_STS_AGAIN		((__force blk_status_t)12)
    113
    114/*
    115 * BLK_STS_DEV_RESOURCE is returned from the driver to the block layer if
    116 * device related resources are unavailable, but the driver can guarantee
    117 * that the queue will be rerun in the future once resources become
    118 * available again. This is typically the case for device specific
    119 * resources that are consumed for IO. If the driver fails allocating these
    120 * resources, we know that inflight (or pending) IO will free these
    121 * resource upon completion.
    122 *
    123 * This is different from BLK_STS_RESOURCE in that it explicitly references
    124 * a device specific resource. For resources of wider scope, allocation
    125 * failure can happen without having pending IO. This means that we can't
    126 * rely on request completions freeing these resources, as IO may not be in
    127 * flight. Examples of that are kernel memory allocations, DMA mappings, or
    128 * any other system wide resources.
    129 */
    130#define BLK_STS_DEV_RESOURCE	((__force blk_status_t)13)
    131
    132/*
    133 * BLK_STS_ZONE_RESOURCE is returned from the driver to the block layer if zone
    134 * related resources are unavailable, but the driver can guarantee the queue
    135 * will be rerun in the future once the resources become available again.
    136 *
    137 * This is different from BLK_STS_DEV_RESOURCE in that it explicitly references
    138 * a zone specific resource and IO to a different zone on the same device could
    139 * still be served. Examples of that are zones that are write-locked, but a read
    140 * to the same zone could be served.
    141 */
    142#define BLK_STS_ZONE_RESOURCE	((__force blk_status_t)14)
    143
    144/*
    145 * BLK_STS_ZONE_OPEN_RESOURCE is returned from the driver in the completion
    146 * path if the device returns a status indicating that too many zone resources
    147 * are currently open. The same command should be successful if resubmitted
    148 * after the number of open zones decreases below the device's limits, which is
    149 * reported in the request_queue's max_open_zones.
    150 */
    151#define BLK_STS_ZONE_OPEN_RESOURCE	((__force blk_status_t)15)
    152
    153/*
    154 * BLK_STS_ZONE_ACTIVE_RESOURCE is returned from the driver in the completion
    155 * path if the device returns a status indicating that too many zone resources
    156 * are currently active. The same command should be successful if resubmitted
    157 * after the number of active zones decreases below the device's limits, which
    158 * is reported in the request_queue's max_active_zones.
    159 */
    160#define BLK_STS_ZONE_ACTIVE_RESOURCE	((__force blk_status_t)16)
    161
    162/*
    163 * BLK_STS_OFFLINE is returned from the driver when the target device is offline
    164 * or is being taken offline. This could help differentiate the case where a
    165 * device is intentionally being shut down from a real I/O error.
    166 */
    167#define BLK_STS_OFFLINE		((__force blk_status_t)17)
    168
    169/**
    170 * blk_path_error - returns true if error may be path related
    171 * @error: status the request was completed with
    172 *
    173 * Description:
    174 *     This classifies block error status into non-retryable errors and ones
    175 *     that may be successful if retried on a failover path.
    176 *
    177 * Return:
    178 *     %false - retrying failover path will not help
    179 *     %true  - may succeed if retried
    180 */
    181static inline bool blk_path_error(blk_status_t error)
    182{
    183	switch (error) {
    184	case BLK_STS_NOTSUPP:
    185	case BLK_STS_NOSPC:
    186	case BLK_STS_TARGET:
    187	case BLK_STS_NEXUS:
    188	case BLK_STS_MEDIUM:
    189	case BLK_STS_PROTECTION:
    190		return false;
    191	}
    192
    193	/* Anything else could be a path failure, so should be retried */
    194	return true;
    195}
    196
    197/*
    198 * From most significant bit:
    199 * 1 bit: reserved for other usage, see below
    200 * 12 bits: original size of bio
    201 * 51 bits: issue time of bio
    202 */
    203#define BIO_ISSUE_RES_BITS      1
    204#define BIO_ISSUE_SIZE_BITS     12
    205#define BIO_ISSUE_RES_SHIFT     (64 - BIO_ISSUE_RES_BITS)
    206#define BIO_ISSUE_SIZE_SHIFT    (BIO_ISSUE_RES_SHIFT - BIO_ISSUE_SIZE_BITS)
    207#define BIO_ISSUE_TIME_MASK     ((1ULL << BIO_ISSUE_SIZE_SHIFT) - 1)
    208#define BIO_ISSUE_SIZE_MASK     \
    209	(((1ULL << BIO_ISSUE_SIZE_BITS) - 1) << BIO_ISSUE_SIZE_SHIFT)
    210#define BIO_ISSUE_RES_MASK      (~((1ULL << BIO_ISSUE_RES_SHIFT) - 1))
    211
    212/* Reserved bit for blk-throtl */
    213#define BIO_ISSUE_THROTL_SKIP_LATENCY (1ULL << 63)
    214
    215struct bio_issue {
    216	u64 value;
    217};
    218
    219static inline u64 __bio_issue_time(u64 time)
    220{
    221	return time & BIO_ISSUE_TIME_MASK;
    222}
    223
    224static inline u64 bio_issue_time(struct bio_issue *issue)
    225{
    226	return __bio_issue_time(issue->value);
    227}
    228
    229static inline sector_t bio_issue_size(struct bio_issue *issue)
    230{
    231	return ((issue->value & BIO_ISSUE_SIZE_MASK) >> BIO_ISSUE_SIZE_SHIFT);
    232}
    233
    234static inline void bio_issue_init(struct bio_issue *issue,
    235				       sector_t size)
    236{
    237	size &= (1ULL << BIO_ISSUE_SIZE_BITS) - 1;
    238	issue->value = ((issue->value & BIO_ISSUE_RES_MASK) |
    239			(ktime_get_ns() & BIO_ISSUE_TIME_MASK) |
    240			((u64)size << BIO_ISSUE_SIZE_SHIFT));
    241}
    242
    243typedef unsigned int blk_qc_t;
    244#define BLK_QC_T_NONE		-1U
    245
    246/*
    247 * main unit of I/O for the block layer and lower layers (ie drivers and
    248 * stacking drivers)
    249 */
    250struct bio {
    251	struct bio		*bi_next;	/* request queue link */
    252	struct block_device	*bi_bdev;
    253	unsigned int		bi_opf;		/* bottom bits REQ_OP, top bits
    254						 * req_flags.
    255						 */
    256	unsigned short		bi_flags;	/* BIO_* below */
    257	unsigned short		bi_ioprio;
    258	blk_status_t		bi_status;
    259	atomic_t		__bi_remaining;
    260
    261	struct bvec_iter	bi_iter;
    262
    263	blk_qc_t		bi_cookie;
    264	bio_end_io_t		*bi_end_io;
    265	void			*bi_private;
    266#ifdef CONFIG_BLK_CGROUP
    267	/*
    268	 * Represents the association of the css and request_queue for the bio.
    269	 * If a bio goes direct to device, it will not have a blkg as it will
    270	 * not have a request_queue associated with it.  The reference is put
    271	 * on release of the bio.
    272	 */
    273	struct blkcg_gq		*bi_blkg;
    274	struct bio_issue	bi_issue;
    275#ifdef CONFIG_BLK_CGROUP_IOCOST
    276	u64			bi_iocost_cost;
    277#endif
    278#endif
    279
    280#ifdef CONFIG_BLK_INLINE_ENCRYPTION
    281	struct bio_crypt_ctx	*bi_crypt_context;
    282#endif
    283
    284	union {
    285#if defined(CONFIG_BLK_DEV_INTEGRITY)
    286		struct bio_integrity_payload *bi_integrity; /* data integrity */
    287#endif
    288	};
    289
    290	unsigned short		bi_vcnt;	/* how many bio_vec's */
    291
    292	/*
    293	 * Everything starting with bi_max_vecs will be preserved by bio_reset()
    294	 */
    295
    296	unsigned short		bi_max_vecs;	/* max bvl_vecs we can hold */
    297
    298	atomic_t		__bi_cnt;	/* pin count */
    299
    300	struct bio_vec		*bi_io_vec;	/* the actual vec list */
    301
    302	struct bio_set		*bi_pool;
    303
    304	/*
    305	 * We can inline a number of vecs at the end of the bio, to avoid
    306	 * double allocations for a small number of bio_vecs. This member
    307	 * MUST obviously be kept at the very end of the bio.
    308	 */
    309	struct bio_vec		bi_inline_vecs[];
    310};
    311
    312#define BIO_RESET_BYTES		offsetof(struct bio, bi_max_vecs)
    313#define BIO_MAX_SECTORS		(UINT_MAX >> SECTOR_SHIFT)
    314
    315/*
    316 * bio flags
    317 */
    318enum {
    319	BIO_NO_PAGE_REF,	/* don't put release vec pages */
    320	BIO_CLONED,		/* doesn't own data */
    321	BIO_BOUNCED,		/* bio is a bounce bio */
    322	BIO_WORKINGSET,		/* contains userspace workingset pages */
    323	BIO_QUIET,		/* Make BIO Quiet */
    324	BIO_CHAIN,		/* chained bio, ->bi_remaining in effect */
    325	BIO_REFFED,		/* bio has elevated ->bi_cnt */
    326	BIO_THROTTLED,		/* This bio has already been subjected to
    327				 * throttling rules. Don't do it again. */
    328	BIO_TRACE_COMPLETION,	/* bio_endio() should trace the final completion
    329				 * of this bio. */
    330	BIO_CGROUP_ACCT,	/* has been accounted to a cgroup */
    331	BIO_QOS_THROTTLED,	/* bio went through rq_qos throttle path */
    332	BIO_QOS_MERGED,		/* but went through rq_qos merge path */
    333	BIO_REMAPPED,
    334	BIO_ZONE_WRITE_LOCKED,	/* Owns a zoned device zone write lock */
    335	BIO_FLAG_LAST
    336};
    337
    338typedef __u32 __bitwise blk_mq_req_flags_t;
    339
    340/*
    341 * Operations and flags common to the bio and request structures.
    342 * We use 8 bits for encoding the operation, and the remaining 24 for flags.
    343 *
    344 * The least significant bit of the operation number indicates the data
    345 * transfer direction:
    346 *
    347 *   - if the least significant bit is set transfers are TO the device
    348 *   - if the least significant bit is not set transfers are FROM the device
    349 *
    350 * If a operation does not transfer data the least significant bit has no
    351 * meaning.
    352 */
    353#define REQ_OP_BITS	8
    354#define REQ_OP_MASK	((1 << REQ_OP_BITS) - 1)
    355#define REQ_FLAG_BITS	24
    356
    357enum req_opf {
    358	/* read sectors from the device */
    359	REQ_OP_READ		= 0,
    360	/* write sectors to the device */
    361	REQ_OP_WRITE		= 1,
    362	/* flush the volatile write cache */
    363	REQ_OP_FLUSH		= 2,
    364	/* discard sectors */
    365	REQ_OP_DISCARD		= 3,
    366	/* securely erase sectors */
    367	REQ_OP_SECURE_ERASE	= 5,
    368	/* write the zero filled sector many times */
    369	REQ_OP_WRITE_ZEROES	= 9,
    370	/* Open a zone */
    371	REQ_OP_ZONE_OPEN	= 10,
    372	/* Close a zone */
    373	REQ_OP_ZONE_CLOSE	= 11,
    374	/* Transition a zone to full */
    375	REQ_OP_ZONE_FINISH	= 12,
    376	/* write data at the current zone write pointer */
    377	REQ_OP_ZONE_APPEND	= 13,
    378	/* reset a zone write pointer */
    379	REQ_OP_ZONE_RESET	= 15,
    380	/* reset all the zone present on the device */
    381	REQ_OP_ZONE_RESET_ALL	= 17,
    382
    383	/* Driver private requests */
    384	REQ_OP_DRV_IN		= 34,
    385	REQ_OP_DRV_OUT		= 35,
    386
    387	REQ_OP_LAST,
    388};
    389
    390enum req_flag_bits {
    391	__REQ_FAILFAST_DEV =	/* no driver retries of device errors */
    392		REQ_OP_BITS,
    393	__REQ_FAILFAST_TRANSPORT, /* no driver retries of transport errors */
    394	__REQ_FAILFAST_DRIVER,	/* no driver retries of driver errors */
    395	__REQ_SYNC,		/* request is sync (sync write or read) */
    396	__REQ_META,		/* metadata io request */
    397	__REQ_PRIO,		/* boost priority in cfq */
    398	__REQ_NOMERGE,		/* don't touch this for merging */
    399	__REQ_IDLE,		/* anticipate more IO after this one */
    400	__REQ_INTEGRITY,	/* I/O includes block integrity payload */
    401	__REQ_FUA,		/* forced unit access */
    402	__REQ_PREFLUSH,		/* request for cache flush */
    403	__REQ_RAHEAD,		/* read ahead, can fail anytime */
    404	__REQ_BACKGROUND,	/* background IO */
    405	__REQ_NOWAIT,           /* Don't wait if request will block */
    406	/*
    407	 * When a shared kthread needs to issue a bio for a cgroup, doing
    408	 * so synchronously can lead to priority inversions as the kthread
    409	 * can be trapped waiting for that cgroup.  CGROUP_PUNT flag makes
    410	 * submit_bio() punt the actual issuing to a dedicated per-blkcg
    411	 * work item to avoid such priority inversions.
    412	 */
    413	__REQ_CGROUP_PUNT,
    414	__REQ_POLLED,		/* caller polls for completion using bio_poll */
    415	__REQ_ALLOC_CACHE,	/* allocate IO from cache if available */
    416	__REQ_SWAP,		/* swap I/O */
    417	__REQ_DRV,		/* for driver use */
    418
    419	/*
    420	 * Command specific flags, keep last:
    421	 */
    422	/* for REQ_OP_WRITE_ZEROES: */
    423	__REQ_NOUNMAP,		/* do not free blocks when zeroing */
    424
    425	__REQ_NR_BITS,		/* stops here */
    426};
    427
    428#define REQ_FAILFAST_DEV	(1ULL << __REQ_FAILFAST_DEV)
    429#define REQ_FAILFAST_TRANSPORT	(1ULL << __REQ_FAILFAST_TRANSPORT)
    430#define REQ_FAILFAST_DRIVER	(1ULL << __REQ_FAILFAST_DRIVER)
    431#define REQ_SYNC		(1ULL << __REQ_SYNC)
    432#define REQ_META		(1ULL << __REQ_META)
    433#define REQ_PRIO		(1ULL << __REQ_PRIO)
    434#define REQ_NOMERGE		(1ULL << __REQ_NOMERGE)
    435#define REQ_IDLE		(1ULL << __REQ_IDLE)
    436#define REQ_INTEGRITY		(1ULL << __REQ_INTEGRITY)
    437#define REQ_FUA			(1ULL << __REQ_FUA)
    438#define REQ_PREFLUSH		(1ULL << __REQ_PREFLUSH)
    439#define REQ_RAHEAD		(1ULL << __REQ_RAHEAD)
    440#define REQ_BACKGROUND		(1ULL << __REQ_BACKGROUND)
    441#define REQ_NOWAIT		(1ULL << __REQ_NOWAIT)
    442#define REQ_CGROUP_PUNT		(1ULL << __REQ_CGROUP_PUNT)
    443
    444#define REQ_NOUNMAP		(1ULL << __REQ_NOUNMAP)
    445#define REQ_POLLED		(1ULL << __REQ_POLLED)
    446#define REQ_ALLOC_CACHE		(1ULL << __REQ_ALLOC_CACHE)
    447
    448#define REQ_DRV			(1ULL << __REQ_DRV)
    449#define REQ_SWAP		(1ULL << __REQ_SWAP)
    450
    451#define REQ_FAILFAST_MASK \
    452	(REQ_FAILFAST_DEV | REQ_FAILFAST_TRANSPORT | REQ_FAILFAST_DRIVER)
    453
    454#define REQ_NOMERGE_FLAGS \
    455	(REQ_NOMERGE | REQ_PREFLUSH | REQ_FUA)
    456
    457enum stat_group {
    458	STAT_READ,
    459	STAT_WRITE,
    460	STAT_DISCARD,
    461	STAT_FLUSH,
    462
    463	NR_STAT_GROUPS
    464};
    465
    466#define bio_op(bio) \
    467	((bio)->bi_opf & REQ_OP_MASK)
    468
    469/* obsolete, don't use in new code */
    470static inline void bio_set_op_attrs(struct bio *bio, unsigned op,
    471		unsigned op_flags)
    472{
    473	bio->bi_opf = op | op_flags;
    474}
    475
    476static inline bool op_is_write(unsigned int op)
    477{
    478	return (op & 1);
    479}
    480
    481/*
    482 * Check if the bio or request is one that needs special treatment in the
    483 * flush state machine.
    484 */
    485static inline bool op_is_flush(unsigned int op)
    486{
    487	return op & (REQ_FUA | REQ_PREFLUSH);
    488}
    489
    490/*
    491 * Reads are always treated as synchronous, as are requests with the FUA or
    492 * PREFLUSH flag.  Other operations may be marked as synchronous using the
    493 * REQ_SYNC flag.
    494 */
    495static inline bool op_is_sync(unsigned int op)
    496{
    497	return (op & REQ_OP_MASK) == REQ_OP_READ ||
    498		(op & (REQ_SYNC | REQ_FUA | REQ_PREFLUSH));
    499}
    500
    501static inline bool op_is_discard(unsigned int op)
    502{
    503	return (op & REQ_OP_MASK) == REQ_OP_DISCARD;
    504}
    505
    506/*
    507 * Check if a bio or request operation is a zone management operation, with
    508 * the exception of REQ_OP_ZONE_RESET_ALL which is treated as a special case
    509 * due to its different handling in the block layer and device response in
    510 * case of command failure.
    511 */
    512static inline bool op_is_zone_mgmt(enum req_opf op)
    513{
    514	switch (op & REQ_OP_MASK) {
    515	case REQ_OP_ZONE_RESET:
    516	case REQ_OP_ZONE_OPEN:
    517	case REQ_OP_ZONE_CLOSE:
    518	case REQ_OP_ZONE_FINISH:
    519		return true;
    520	default:
    521		return false;
    522	}
    523}
    524
    525static inline int op_stat_group(unsigned int op)
    526{
    527	if (op_is_discard(op))
    528		return STAT_DISCARD;
    529	return op_is_write(op);
    530}
    531
    532struct blk_rq_stat {
    533	u64 mean;
    534	u64 min;
    535	u64 max;
    536	u32 nr_samples;
    537	u64 batch;
    538};
    539
    540#endif /* __LINUX_BLK_TYPES_H */