cachepc-linux

Fork of AMDESE/linux with modifications for CachePC side-channel attack
git clone https://git.sinitax.com/sinitax/cachepc-linux
Log | Files | Refs | README | LICENSE | sfeed.txt

dm-zoned-reclaim.c (15170B)


      1// SPDX-License-Identifier: GPL-2.0-only
      2/*
      3 * Copyright (C) 2017 Western Digital Corporation or its affiliates.
      4 *
      5 * This file is released under the GPL.
      6 */
      7
      8#include "dm-zoned.h"
      9
     10#include <linux/module.h>
     11
     12#define	DM_MSG_PREFIX		"zoned reclaim"
     13
     14struct dmz_reclaim {
     15	struct dmz_metadata     *metadata;
     16
     17	struct delayed_work	work;
     18	struct workqueue_struct *wq;
     19
     20	struct dm_kcopyd_client	*kc;
     21	struct dm_kcopyd_throttle kc_throttle;
     22	int			kc_err;
     23
     24	int			dev_idx;
     25
     26	unsigned long		flags;
     27
     28	/* Last target access time */
     29	unsigned long		atime;
     30};
     31
     32/*
     33 * Reclaim state flags.
     34 */
     35enum {
     36	DMZ_RECLAIM_KCOPY,
     37};
     38
     39/*
     40 * Number of seconds of target BIO inactivity to consider the target idle.
     41 */
     42#define DMZ_IDLE_PERIOD			(10UL * HZ)
     43
     44/*
     45 * Percentage of unmapped (free) random zones below which reclaim starts
     46 * even if the target is busy.
     47 */
     48#define DMZ_RECLAIM_LOW_UNMAP_ZONES	30
     49
     50/*
     51 * Percentage of unmapped (free) random zones above which reclaim will
     52 * stop if the target is busy.
     53 */
     54#define DMZ_RECLAIM_HIGH_UNMAP_ZONES	50
     55
     56/*
     57 * Align a sequential zone write pointer to chunk_block.
     58 */
     59static int dmz_reclaim_align_wp(struct dmz_reclaim *zrc, struct dm_zone *zone,
     60				sector_t block)
     61{
     62	struct dmz_metadata *zmd = zrc->metadata;
     63	struct dmz_dev *dev = zone->dev;
     64	sector_t wp_block = zone->wp_block;
     65	unsigned int nr_blocks;
     66	int ret;
     67
     68	if (wp_block == block)
     69		return 0;
     70
     71	if (wp_block > block)
     72		return -EIO;
     73
     74	/*
     75	 * Zeroout the space between the write
     76	 * pointer and the requested position.
     77	 */
     78	nr_blocks = block - wp_block;
     79	ret = blkdev_issue_zeroout(dev->bdev,
     80				   dmz_start_sect(zmd, zone) + dmz_blk2sect(wp_block),
     81				   dmz_blk2sect(nr_blocks), GFP_NOIO, 0);
     82	if (ret) {
     83		dmz_dev_err(dev,
     84			    "Align zone %u wp %llu to %llu (wp+%u) blocks failed %d",
     85			    zone->id, (unsigned long long)wp_block,
     86			    (unsigned long long)block, nr_blocks, ret);
     87		dmz_check_bdev(dev);
     88		return ret;
     89	}
     90
     91	zone->wp_block = block;
     92
     93	return 0;
     94}
     95
     96/*
     97 * dm_kcopyd_copy end notification.
     98 */
     99static void dmz_reclaim_kcopy_end(int read_err, unsigned long write_err,
    100				  void *context)
    101{
    102	struct dmz_reclaim *zrc = context;
    103
    104	if (read_err || write_err)
    105		zrc->kc_err = -EIO;
    106	else
    107		zrc->kc_err = 0;
    108
    109	clear_bit_unlock(DMZ_RECLAIM_KCOPY, &zrc->flags);
    110	smp_mb__after_atomic();
    111	wake_up_bit(&zrc->flags, DMZ_RECLAIM_KCOPY);
    112}
    113
    114/*
    115 * Copy valid blocks of src_zone into dst_zone.
    116 */
    117static int dmz_reclaim_copy(struct dmz_reclaim *zrc,
    118			    struct dm_zone *src_zone, struct dm_zone *dst_zone)
    119{
    120	struct dmz_metadata *zmd = zrc->metadata;
    121	struct dm_io_region src, dst;
    122	sector_t block = 0, end_block;
    123	sector_t nr_blocks;
    124	sector_t src_zone_block;
    125	sector_t dst_zone_block;
    126	unsigned long flags = 0;
    127	int ret;
    128
    129	if (dmz_is_seq(src_zone))
    130		end_block = src_zone->wp_block;
    131	else
    132		end_block = dmz_zone_nr_blocks(zmd);
    133	src_zone_block = dmz_start_block(zmd, src_zone);
    134	dst_zone_block = dmz_start_block(zmd, dst_zone);
    135
    136	if (dmz_is_seq(dst_zone))
    137		flags |= BIT(DM_KCOPYD_WRITE_SEQ);
    138
    139	while (block < end_block) {
    140		if (src_zone->dev->flags & DMZ_BDEV_DYING)
    141			return -EIO;
    142		if (dst_zone->dev->flags & DMZ_BDEV_DYING)
    143			return -EIO;
    144
    145		if (dmz_reclaim_should_terminate(src_zone))
    146			return -EINTR;
    147
    148		/* Get a valid region from the source zone */
    149		ret = dmz_first_valid_block(zmd, src_zone, &block);
    150		if (ret <= 0)
    151			return ret;
    152		nr_blocks = ret;
    153
    154		/*
    155		 * If we are writing in a sequential zone, we must make sure
    156		 * that writes are sequential. So Zeroout any eventual hole
    157		 * between writes.
    158		 */
    159		if (dmz_is_seq(dst_zone)) {
    160			ret = dmz_reclaim_align_wp(zrc, dst_zone, block);
    161			if (ret)
    162				return ret;
    163		}
    164
    165		src.bdev = src_zone->dev->bdev;
    166		src.sector = dmz_blk2sect(src_zone_block + block);
    167		src.count = dmz_blk2sect(nr_blocks);
    168
    169		dst.bdev = dst_zone->dev->bdev;
    170		dst.sector = dmz_blk2sect(dst_zone_block + block);
    171		dst.count = src.count;
    172
    173		/* Copy the valid region */
    174		set_bit(DMZ_RECLAIM_KCOPY, &zrc->flags);
    175		dm_kcopyd_copy(zrc->kc, &src, 1, &dst, flags,
    176			       dmz_reclaim_kcopy_end, zrc);
    177
    178		/* Wait for copy to complete */
    179		wait_on_bit_io(&zrc->flags, DMZ_RECLAIM_KCOPY,
    180			       TASK_UNINTERRUPTIBLE);
    181		if (zrc->kc_err)
    182			return zrc->kc_err;
    183
    184		block += nr_blocks;
    185		if (dmz_is_seq(dst_zone))
    186			dst_zone->wp_block = block;
    187	}
    188
    189	return 0;
    190}
    191
    192/*
    193 * Move valid blocks of dzone buffer zone into dzone (after its write pointer)
    194 * and free the buffer zone.
    195 */
    196static int dmz_reclaim_buf(struct dmz_reclaim *zrc, struct dm_zone *dzone)
    197{
    198	struct dm_zone *bzone = dzone->bzone;
    199	sector_t chunk_block = dzone->wp_block;
    200	struct dmz_metadata *zmd = zrc->metadata;
    201	int ret;
    202
    203	DMDEBUG("(%s/%u): Chunk %u, move buf zone %u (weight %u) to data zone %u (weight %u)",
    204		dmz_metadata_label(zmd), zrc->dev_idx,
    205		dzone->chunk, bzone->id, dmz_weight(bzone),
    206		dzone->id, dmz_weight(dzone));
    207
    208	/* Flush data zone into the buffer zone */
    209	ret = dmz_reclaim_copy(zrc, bzone, dzone);
    210	if (ret < 0)
    211		return ret;
    212
    213	dmz_lock_flush(zmd);
    214
    215	/* Validate copied blocks */
    216	ret = dmz_merge_valid_blocks(zmd, bzone, dzone, chunk_block);
    217	if (ret == 0) {
    218		/* Free the buffer zone */
    219		dmz_invalidate_blocks(zmd, bzone, 0, dmz_zone_nr_blocks(zmd));
    220		dmz_lock_map(zmd);
    221		dmz_unmap_zone(zmd, bzone);
    222		dmz_unlock_zone_reclaim(dzone);
    223		dmz_free_zone(zmd, bzone);
    224		dmz_unlock_map(zmd);
    225	}
    226
    227	dmz_unlock_flush(zmd);
    228
    229	return ret;
    230}
    231
    232/*
    233 * Merge valid blocks of dzone into its buffer zone and free dzone.
    234 */
    235static int dmz_reclaim_seq_data(struct dmz_reclaim *zrc, struct dm_zone *dzone)
    236{
    237	unsigned int chunk = dzone->chunk;
    238	struct dm_zone *bzone = dzone->bzone;
    239	struct dmz_metadata *zmd = zrc->metadata;
    240	int ret = 0;
    241
    242	DMDEBUG("(%s/%u): Chunk %u, move data zone %u (weight %u) to buf zone %u (weight %u)",
    243		dmz_metadata_label(zmd), zrc->dev_idx,
    244		chunk, dzone->id, dmz_weight(dzone),
    245		bzone->id, dmz_weight(bzone));
    246
    247	/* Flush data zone into the buffer zone */
    248	ret = dmz_reclaim_copy(zrc, dzone, bzone);
    249	if (ret < 0)
    250		return ret;
    251
    252	dmz_lock_flush(zmd);
    253
    254	/* Validate copied blocks */
    255	ret = dmz_merge_valid_blocks(zmd, dzone, bzone, 0);
    256	if (ret == 0) {
    257		/*
    258		 * Free the data zone and remap the chunk to
    259		 * the buffer zone.
    260		 */
    261		dmz_invalidate_blocks(zmd, dzone, 0, dmz_zone_nr_blocks(zmd));
    262		dmz_lock_map(zmd);
    263		dmz_unmap_zone(zmd, bzone);
    264		dmz_unmap_zone(zmd, dzone);
    265		dmz_unlock_zone_reclaim(dzone);
    266		dmz_free_zone(zmd, dzone);
    267		dmz_map_zone(zmd, bzone, chunk);
    268		dmz_unlock_map(zmd);
    269	}
    270
    271	dmz_unlock_flush(zmd);
    272
    273	return ret;
    274}
    275
    276/*
    277 * Move valid blocks of the random data zone dzone into a free sequential zone.
    278 * Once blocks are moved, remap the zone chunk to the sequential zone.
    279 */
    280static int dmz_reclaim_rnd_data(struct dmz_reclaim *zrc, struct dm_zone *dzone)
    281{
    282	unsigned int chunk = dzone->chunk;
    283	struct dm_zone *szone = NULL;
    284	struct dmz_metadata *zmd = zrc->metadata;
    285	int ret;
    286	int alloc_flags = DMZ_ALLOC_SEQ;
    287
    288	/* Get a free random or sequential zone */
    289	dmz_lock_map(zmd);
    290again:
    291	szone = dmz_alloc_zone(zmd, zrc->dev_idx,
    292			       alloc_flags | DMZ_ALLOC_RECLAIM);
    293	if (!szone && alloc_flags == DMZ_ALLOC_SEQ && dmz_nr_cache_zones(zmd)) {
    294		alloc_flags = DMZ_ALLOC_RND;
    295		goto again;
    296	}
    297	dmz_unlock_map(zmd);
    298	if (!szone)
    299		return -ENOSPC;
    300
    301	DMDEBUG("(%s/%u): Chunk %u, move %s zone %u (weight %u) to %s zone %u",
    302		dmz_metadata_label(zmd), zrc->dev_idx, chunk,
    303		dmz_is_cache(dzone) ? "cache" : "rnd",
    304		dzone->id, dmz_weight(dzone),
    305		dmz_is_rnd(szone) ? "rnd" : "seq", szone->id);
    306
    307	/* Flush the random data zone into the sequential zone */
    308	ret = dmz_reclaim_copy(zrc, dzone, szone);
    309
    310	dmz_lock_flush(zmd);
    311
    312	if (ret == 0) {
    313		/* Validate copied blocks */
    314		ret = dmz_copy_valid_blocks(zmd, dzone, szone);
    315	}
    316	if (ret) {
    317		/* Free the sequential zone */
    318		dmz_lock_map(zmd);
    319		dmz_free_zone(zmd, szone);
    320		dmz_unlock_map(zmd);
    321	} else {
    322		/* Free the data zone and remap the chunk */
    323		dmz_invalidate_blocks(zmd, dzone, 0, dmz_zone_nr_blocks(zmd));
    324		dmz_lock_map(zmd);
    325		dmz_unmap_zone(zmd, dzone);
    326		dmz_unlock_zone_reclaim(dzone);
    327		dmz_free_zone(zmd, dzone);
    328		dmz_map_zone(zmd, szone, chunk);
    329		dmz_unlock_map(zmd);
    330	}
    331
    332	dmz_unlock_flush(zmd);
    333
    334	return ret;
    335}
    336
    337/*
    338 * Reclaim an empty zone.
    339 */
    340static void dmz_reclaim_empty(struct dmz_reclaim *zrc, struct dm_zone *dzone)
    341{
    342	struct dmz_metadata *zmd = zrc->metadata;
    343
    344	dmz_lock_flush(zmd);
    345	dmz_lock_map(zmd);
    346	dmz_unmap_zone(zmd, dzone);
    347	dmz_unlock_zone_reclaim(dzone);
    348	dmz_free_zone(zmd, dzone);
    349	dmz_unlock_map(zmd);
    350	dmz_unlock_flush(zmd);
    351}
    352
    353/*
    354 * Test if the target device is idle.
    355 */
    356static inline int dmz_target_idle(struct dmz_reclaim *zrc)
    357{
    358	return time_is_before_jiffies(zrc->atime + DMZ_IDLE_PERIOD);
    359}
    360
    361/*
    362 * Find a candidate zone for reclaim and process it.
    363 */
    364static int dmz_do_reclaim(struct dmz_reclaim *zrc)
    365{
    366	struct dmz_metadata *zmd = zrc->metadata;
    367	struct dm_zone *dzone;
    368	struct dm_zone *rzone;
    369	unsigned long start;
    370	int ret;
    371
    372	/* Get a data zone */
    373	dzone = dmz_get_zone_for_reclaim(zmd, zrc->dev_idx,
    374					 dmz_target_idle(zrc));
    375	if (!dzone) {
    376		DMDEBUG("(%s/%u): No zone found to reclaim",
    377			dmz_metadata_label(zmd), zrc->dev_idx);
    378		return -EBUSY;
    379	}
    380	rzone = dzone;
    381
    382	start = jiffies;
    383	if (dmz_is_cache(dzone) || dmz_is_rnd(dzone)) {
    384		if (!dmz_weight(dzone)) {
    385			/* Empty zone */
    386			dmz_reclaim_empty(zrc, dzone);
    387			ret = 0;
    388		} else {
    389			/*
    390			 * Reclaim the random data zone by moving its
    391			 * valid data blocks to a free sequential zone.
    392			 */
    393			ret = dmz_reclaim_rnd_data(zrc, dzone);
    394		}
    395	} else {
    396		struct dm_zone *bzone = dzone->bzone;
    397		sector_t chunk_block = 0;
    398
    399		ret = dmz_first_valid_block(zmd, bzone, &chunk_block);
    400		if (ret < 0)
    401			goto out;
    402
    403		if (ret == 0 || chunk_block >= dzone->wp_block) {
    404			/*
    405			 * The buffer zone is empty or its valid blocks are
    406			 * after the data zone write pointer.
    407			 */
    408			ret = dmz_reclaim_buf(zrc, dzone);
    409			rzone = bzone;
    410		} else {
    411			/*
    412			 * Reclaim the data zone by merging it into the
    413			 * buffer zone so that the buffer zone itself can
    414			 * be later reclaimed.
    415			 */
    416			ret = dmz_reclaim_seq_data(zrc, dzone);
    417		}
    418	}
    419out:
    420	if (ret) {
    421		if (ret == -EINTR)
    422			DMDEBUG("(%s/%u): reclaim zone %u interrupted",
    423				dmz_metadata_label(zmd), zrc->dev_idx,
    424				rzone->id);
    425		else
    426			DMDEBUG("(%s/%u): Failed to reclaim zone %u, err %d",
    427				dmz_metadata_label(zmd), zrc->dev_idx,
    428				rzone->id, ret);
    429		dmz_unlock_zone_reclaim(dzone);
    430		return ret;
    431	}
    432
    433	ret = dmz_flush_metadata(zrc->metadata);
    434	if (ret) {
    435		DMDEBUG("(%s/%u): Metadata flush for zone %u failed, err %d",
    436			dmz_metadata_label(zmd), zrc->dev_idx, rzone->id, ret);
    437		return ret;
    438	}
    439
    440	DMDEBUG("(%s/%u): Reclaimed zone %u in %u ms",
    441		dmz_metadata_label(zmd), zrc->dev_idx,
    442		rzone->id, jiffies_to_msecs(jiffies - start));
    443	return 0;
    444}
    445
    446static unsigned int dmz_reclaim_percentage(struct dmz_reclaim *zrc)
    447{
    448	struct dmz_metadata *zmd = zrc->metadata;
    449	unsigned int nr_cache = dmz_nr_cache_zones(zmd);
    450	unsigned int nr_unmap, nr_zones;
    451
    452	if (nr_cache) {
    453		nr_zones = nr_cache;
    454		nr_unmap = dmz_nr_unmap_cache_zones(zmd);
    455	} else {
    456		nr_zones = dmz_nr_rnd_zones(zmd, zrc->dev_idx);
    457		nr_unmap = dmz_nr_unmap_rnd_zones(zmd, zrc->dev_idx);
    458	}
    459	if (nr_unmap <= 1)
    460		return 0;
    461	return nr_unmap * 100 / nr_zones;
    462}
    463
    464/*
    465 * Test if reclaim is necessary.
    466 */
    467static bool dmz_should_reclaim(struct dmz_reclaim *zrc, unsigned int p_unmap)
    468{
    469	unsigned int nr_reclaim;
    470
    471	nr_reclaim = dmz_nr_rnd_zones(zrc->metadata, zrc->dev_idx);
    472
    473	if (dmz_nr_cache_zones(zrc->metadata)) {
    474		/*
    475		 * The first device in a multi-device
    476		 * setup only contains cache zones, so
    477		 * never start reclaim there.
    478		 */
    479		if (zrc->dev_idx == 0)
    480			return false;
    481		nr_reclaim += dmz_nr_cache_zones(zrc->metadata);
    482	}
    483
    484	/* Reclaim when idle */
    485	if (dmz_target_idle(zrc) && nr_reclaim)
    486		return true;
    487
    488	/* If there are still plenty of cache zones, do not reclaim */
    489	if (p_unmap >= DMZ_RECLAIM_HIGH_UNMAP_ZONES)
    490		return false;
    491
    492	/*
    493	 * If the percentage of unmapped cache zones is low,
    494	 * reclaim even if the target is busy.
    495	 */
    496	return p_unmap <= DMZ_RECLAIM_LOW_UNMAP_ZONES;
    497}
    498
    499/*
    500 * Reclaim work function.
    501 */
    502static void dmz_reclaim_work(struct work_struct *work)
    503{
    504	struct dmz_reclaim *zrc = container_of(work, struct dmz_reclaim, work.work);
    505	struct dmz_metadata *zmd = zrc->metadata;
    506	unsigned int p_unmap;
    507	int ret;
    508
    509	if (dmz_dev_is_dying(zmd))
    510		return;
    511
    512	p_unmap = dmz_reclaim_percentage(zrc);
    513	if (!dmz_should_reclaim(zrc, p_unmap)) {
    514		mod_delayed_work(zrc->wq, &zrc->work, DMZ_IDLE_PERIOD);
    515		return;
    516	}
    517
    518	/*
    519	 * We need to start reclaiming random zones: set up zone copy
    520	 * throttling to either go fast if we are very low on random zones
    521	 * and slower if there are still some free random zones to avoid
    522	 * as much as possible to negatively impact the user workload.
    523	 */
    524	if (dmz_target_idle(zrc) || p_unmap < DMZ_RECLAIM_LOW_UNMAP_ZONES / 2) {
    525		/* Idle or very low percentage: go fast */
    526		zrc->kc_throttle.throttle = 100;
    527	} else {
    528		/* Busy but we still have some random zone: throttle */
    529		zrc->kc_throttle.throttle = min(75U, 100U - p_unmap / 2);
    530	}
    531
    532	DMDEBUG("(%s/%u): Reclaim (%u): %s, %u%% free zones (%u/%u cache %u/%u random)",
    533		dmz_metadata_label(zmd), zrc->dev_idx,
    534		zrc->kc_throttle.throttle,
    535		(dmz_target_idle(zrc) ? "Idle" : "Busy"),
    536		p_unmap, dmz_nr_unmap_cache_zones(zmd),
    537		dmz_nr_cache_zones(zmd),
    538		dmz_nr_unmap_rnd_zones(zmd, zrc->dev_idx),
    539		dmz_nr_rnd_zones(zmd, zrc->dev_idx));
    540
    541	ret = dmz_do_reclaim(zrc);
    542	if (ret && ret != -EINTR) {
    543		if (!dmz_check_dev(zmd))
    544			return;
    545	}
    546
    547	dmz_schedule_reclaim(zrc);
    548}
    549
    550/*
    551 * Initialize reclaim.
    552 */
    553int dmz_ctr_reclaim(struct dmz_metadata *zmd,
    554		    struct dmz_reclaim **reclaim, int idx)
    555{
    556	struct dmz_reclaim *zrc;
    557	int ret;
    558
    559	zrc = kzalloc(sizeof(struct dmz_reclaim), GFP_KERNEL);
    560	if (!zrc)
    561		return -ENOMEM;
    562
    563	zrc->metadata = zmd;
    564	zrc->atime = jiffies;
    565	zrc->dev_idx = idx;
    566
    567	/* Reclaim kcopyd client */
    568	zrc->kc = dm_kcopyd_client_create(&zrc->kc_throttle);
    569	if (IS_ERR(zrc->kc)) {
    570		ret = PTR_ERR(zrc->kc);
    571		zrc->kc = NULL;
    572		goto err;
    573	}
    574
    575	/* Reclaim work */
    576	INIT_DELAYED_WORK(&zrc->work, dmz_reclaim_work);
    577	zrc->wq = alloc_ordered_workqueue("dmz_rwq_%s_%d", WQ_MEM_RECLAIM,
    578					  dmz_metadata_label(zmd), idx);
    579	if (!zrc->wq) {
    580		ret = -ENOMEM;
    581		goto err;
    582	}
    583
    584	*reclaim = zrc;
    585	queue_delayed_work(zrc->wq, &zrc->work, 0);
    586
    587	return 0;
    588err:
    589	if (zrc->kc)
    590		dm_kcopyd_client_destroy(zrc->kc);
    591	kfree(zrc);
    592
    593	return ret;
    594}
    595
    596/*
    597 * Terminate reclaim.
    598 */
    599void dmz_dtr_reclaim(struct dmz_reclaim *zrc)
    600{
    601	cancel_delayed_work_sync(&zrc->work);
    602	destroy_workqueue(zrc->wq);
    603	dm_kcopyd_client_destroy(zrc->kc);
    604	kfree(zrc);
    605}
    606
    607/*
    608 * Suspend reclaim.
    609 */
    610void dmz_suspend_reclaim(struct dmz_reclaim *zrc)
    611{
    612	cancel_delayed_work_sync(&zrc->work);
    613}
    614
    615/*
    616 * Resume reclaim.
    617 */
    618void dmz_resume_reclaim(struct dmz_reclaim *zrc)
    619{
    620	queue_delayed_work(zrc->wq, &zrc->work, DMZ_IDLE_PERIOD);
    621}
    622
    623/*
    624 * BIO accounting.
    625 */
    626void dmz_reclaim_bio_acc(struct dmz_reclaim *zrc)
    627{
    628	zrc->atime = jiffies;
    629}
    630
    631/*
    632 * Start reclaim if necessary.
    633 */
    634void dmz_schedule_reclaim(struct dmz_reclaim *zrc)
    635{
    636	unsigned int p_unmap = dmz_reclaim_percentage(zrc);
    637
    638	if (dmz_should_reclaim(zrc, p_unmap))
    639		mod_delayed_work(zrc->wq, &zrc->work, 0);
    640}