buffered_read.c - cachepc-linux - Fork of AMDESE/linux with modifications for CachePC side-channel attack

	cachepc-linux Fork of AMDESE/linux with modifications for CachePC side-channel attack
	git clone https://git.sinitax.com/sinitax/cachepc-linux
	Log \| Files \| Refs \| README \| LICENSE \| sfeed.txt
buffered_read.c (12943B)
      1// SPDX-License-Identifier: GPL-2.0-or-later
      2/* Network filesystem high-level buffered read support.
      3 *
      4 * Copyright (C) 2021 Red Hat, Inc. All Rights Reserved.
      5 * Written by David Howells (dhowells@redhat.com)
      6 */
      7
      8#include <linux/export.h>
      9#include <linux/task_io_accounting_ops.h>
     10#include "internal.h"
     11
     12/*
     13 * Unlock the folios in a read operation.  We need to set PG_fscache on any
     14 * folios we're going to write back before we unlock them.
     15 */
     16void netfs_rreq_unlock_folios(struct netfs_io_request *rreq)
     17{
     18	struct netfs_io_subrequest *subreq;
     19	struct folio *folio;
     20	unsigned int iopos, account = 0;
     21	pgoff_t start_page = rreq->start / PAGE_SIZE;
     22	pgoff_t last_page = ((rreq->start + rreq->len) / PAGE_SIZE) - 1;
     23	bool subreq_failed = false;
     24
     25	XA_STATE(xas, &rreq->mapping->i_pages, start_page);
     26
     27	if (test_bit(NETFS_RREQ_FAILED, &rreq->flags)) {
     28		__clear_bit(NETFS_RREQ_COPY_TO_CACHE, &rreq->flags);
     29		list_for_each_entry(subreq, &rreq->subrequests, rreq_link) {
     30			__clear_bit(NETFS_SREQ_COPY_TO_CACHE, &subreq->flags);
     31		}
     32	}
     33
     34	/* Walk through the pagecache and the I/O request lists simultaneously.
     35	 * We may have a mixture of cached and uncached sections and we only
     36	 * really want to write out the uncached sections.  This is slightly
     37	 * complicated by the possibility that we might have huge pages with a
     38	 * mixture inside.
     39	 */
     40	subreq = list_first_entry(&rreq->subrequests,
     41				  struct netfs_io_subrequest, rreq_link);
     42	iopos = 0;
     43	subreq_failed = (subreq->error < 0);
     44
     45	trace_netfs_rreq(rreq, netfs_rreq_trace_unlock);
     46
     47	rcu_read_lock();
     48	xas_for_each(&xas, folio, last_page) {
     49		unsigned int pgpos = (folio_index(folio) - start_page) * PAGE_SIZE;
     50		unsigned int pgend = pgpos + folio_size(folio);
     51		bool pg_failed = false;
     52
     53		for (;;) {
     54			if (!subreq) {
     55				pg_failed = true;
     56				break;
     57			}
     58			if (test_bit(NETFS_SREQ_COPY_TO_CACHE, &subreq->flags))
     59				folio_start_fscache(folio);
     60			pg_failed |= subreq_failed;
     61			if (pgend < iopos + subreq->len)
     62				break;
     63
     64			account += subreq->transferred;
     65			iopos += subreq->len;
     66			if (!list_is_last(&subreq->rreq_link, &rreq->subrequests)) {
     67				subreq = list_next_entry(subreq, rreq_link);
     68				subreq_failed = (subreq->error < 0);
     69			} else {
     70				subreq = NULL;
     71				subreq_failed = false;
     72			}
     73			if (pgend == iopos)
     74				break;
     75		}
     76
     77		if (!pg_failed) {
     78			flush_dcache_folio(folio);
     79			folio_mark_uptodate(folio);
     80		}
     81
     82		if (!test_bit(NETFS_RREQ_DONT_UNLOCK_FOLIOS, &rreq->flags)) {
     83			if (folio_index(folio) == rreq->no_unlock_folio &&
     84			    test_bit(NETFS_RREQ_NO_UNLOCK_FOLIO, &rreq->flags))
     85				_debug("no unlock");
     86			else
     87				folio_unlock(folio);
     88		}
     89	}
     90	rcu_read_unlock();
     91
     92	task_io_account_read(account);
     93	if (rreq->netfs_ops->done)
     94		rreq->netfs_ops->done(rreq);
     95}
     96
     97static void netfs_cache_expand_readahead(struct netfs_io_request *rreq,
     98					 loff_t *_start, size_t *_len, loff_t i_size)
     99{
    100	struct netfs_cache_resources *cres = &rreq->cache_resources;
    101
    102	if (cres->ops && cres->ops->expand_readahead)
    103		cres->ops->expand_readahead(cres, _start, _len, i_size);
    104}
    105
    106static void netfs_rreq_expand(struct netfs_io_request *rreq,
    107			      struct readahead_control *ractl)
    108{
    109	/* Give the cache a chance to change the request parameters.  The
    110	 * resultant request must contain the original region.
    111	 */
    112	netfs_cache_expand_readahead(rreq, &rreq->start, &rreq->len, rreq->i_size);
    113
    114	/* Give the netfs a chance to change the request parameters.  The
    115	 * resultant request must contain the original region.
    116	 */
    117	if (rreq->netfs_ops->expand_readahead)
    118		rreq->netfs_ops->expand_readahead(rreq);
    119
    120	/* Expand the request if the cache wants it to start earlier.  Note
    121	 * that the expansion may get further extended if the VM wishes to
    122	 * insert THPs and the preferred start and/or end wind up in the middle
    123	 * of THPs.
    124	 *
    125	 * If this is the case, however, the THP size should be an integer
    126	 * multiple of the cache granule size, so we get a whole number of
    127	 * granules to deal with.
    128	 */
    129	if (rreq->start  != readahead_pos(ractl) ||
    130	    rreq->len != readahead_length(ractl)) {
    131		readahead_expand(ractl, rreq->start, rreq->len);
    132		rreq->start  = readahead_pos(ractl);
    133		rreq->len = readahead_length(ractl);
    134
    135		trace_netfs_read(rreq, readahead_pos(ractl), readahead_length(ractl),
    136				 netfs_read_trace_expanded);
    137	}
    138}
    139
    140/**
    141 * netfs_readahead - Helper to manage a read request
    142 * @ractl: The description of the readahead request
    143 *
    144 * Fulfil a readahead request by drawing data from the cache if possible, or
    145 * the netfs if not.  Space beyond the EOF is zero-filled.  Multiple I/O
    146 * requests from different sources will get munged together.  If necessary, the
    147 * readahead window can be expanded in either direction to a more convenient
    148 * alighment for RPC efficiency or to make storage in the cache feasible.
    149 *
    150 * The calling netfs must initialise a netfs context contiguous to the vfs
    151 * inode before calling this.
    152 *
    153 * This is usable whether or not caching is enabled.
    154 */
    155void netfs_readahead(struct readahead_control *ractl)
    156{
    157	struct netfs_io_request *rreq;
    158	struct netfs_inode *ctx = netfs_inode(ractl->mapping->host);
    159	int ret;
    160
    161	_enter("%lx,%x", readahead_index(ractl), readahead_count(ractl));
    162
    163	if (readahead_count(ractl) == 0)
    164		return;
    165
    166	rreq = netfs_alloc_request(ractl->mapping, ractl->file,
    167				   readahead_pos(ractl),
    168				   readahead_length(ractl),
    169				   NETFS_READAHEAD);
    170	if (IS_ERR(rreq))
    171		return;
    172
    173	if (ctx->ops->begin_cache_operation) {
    174		ret = ctx->ops->begin_cache_operation(rreq);
    175		if (ret == -ENOMEM || ret == -EINTR || ret == -ERESTARTSYS)
    176			goto cleanup_free;
    177	}
    178
    179	netfs_stat(&netfs_n_rh_readahead);
    180	trace_netfs_read(rreq, readahead_pos(ractl), readahead_length(ractl),
    181			 netfs_read_trace_readahead);
    182
    183	netfs_rreq_expand(rreq, ractl);
    184
    185	/* Drop the refs on the folios here rather than in the cache or
    186	 * filesystem.  The locks will be dropped in netfs_rreq_unlock().
    187	 */
    188	while (readahead_folio(ractl))
    189		;
    190
    191	netfs_begin_read(rreq, false);
    192	return;
    193
    194cleanup_free:
    195	netfs_put_request(rreq, false, netfs_rreq_trace_put_failed);
    196	return;
    197}
    198EXPORT_SYMBOL(netfs_readahead);
    199
    200/**
    201 * netfs_read_folio - Helper to manage a read_folio request
    202 * @file: The file to read from
    203 * @folio: The folio to read
    204 *
    205 * Fulfil a read_folio request by drawing data from the cache if
    206 * possible, or the netfs if not.  Space beyond the EOF is zero-filled.
    207 * Multiple I/O requests from different sources will get munged together.
    208 *
    209 * The calling netfs must initialise a netfs context contiguous to the vfs
    210 * inode before calling this.
    211 *
    212 * This is usable whether or not caching is enabled.
    213 */
    214int netfs_read_folio(struct file *file, struct folio *folio)
    215{
    216	struct address_space *mapping = folio_file_mapping(folio);
    217	struct netfs_io_request *rreq;
    218	struct netfs_inode *ctx = netfs_inode(mapping->host);
    219	int ret;
    220
    221	_enter("%lx", folio_index(folio));
    222
    223	rreq = netfs_alloc_request(mapping, file,
    224				   folio_file_pos(folio), folio_size(folio),
    225				   NETFS_READPAGE);
    226	if (IS_ERR(rreq)) {
    227		ret = PTR_ERR(rreq);
    228		goto alloc_error;
    229	}
    230
    231	if (ctx->ops->begin_cache_operation) {
    232		ret = ctx->ops->begin_cache_operation(rreq);
    233		if (ret == -ENOMEM || ret == -EINTR || ret == -ERESTARTSYS)
    234			goto discard;
    235	}
    236
    237	netfs_stat(&netfs_n_rh_readpage);
    238	trace_netfs_read(rreq, rreq->start, rreq->len, netfs_read_trace_readpage);
    239	return netfs_begin_read(rreq, true);
    240
    241discard:
    242	netfs_put_request(rreq, false, netfs_rreq_trace_put_discard);
    243alloc_error:
    244	folio_unlock(folio);
    245	return ret;
    246}
    247EXPORT_SYMBOL(netfs_read_folio);
    248
    249/*
    250 * Prepare a folio for writing without reading first
    251 * @folio: The folio being prepared
    252 * @pos: starting position for the write
    253 * @len: length of write
    254 * @always_fill: T if the folio should always be completely filled/cleared
    255 *
    256 * In some cases, write_begin doesn't need to read at all:
    257 * - full folio write
    258 * - write that lies in a folio that is completely beyond EOF
    259 * - write that covers the folio from start to EOF or beyond it
    260 *
    261 * If any of these criteria are met, then zero out the unwritten parts
    262 * of the folio and return true. Otherwise, return false.
    263 */
    264static bool netfs_skip_folio_read(struct folio *folio, loff_t pos, size_t len,
    265				 bool always_fill)
    266{
    267	struct inode *inode = folio_inode(folio);
    268	loff_t i_size = i_size_read(inode);
    269	size_t offset = offset_in_folio(folio, pos);
    270	size_t plen = folio_size(folio);
    271
    272	if (unlikely(always_fill)) {
    273		if (pos - offset + len <= i_size)
    274			return false; /* Page entirely before EOF */
    275		zero_user_segment(&folio->page, 0, plen);
    276		folio_mark_uptodate(folio);
    277		return true;
    278	}
    279
    280	/* Full folio write */
    281	if (offset == 0 && len >= plen)
    282		return true;
    283
    284	/* Page entirely beyond the end of the file */
    285	if (pos - offset >= i_size)
    286		goto zero_out;
    287
    288	/* Write that covers from the start of the folio to EOF or beyond */
    289	if (offset == 0 && (pos + len) >= i_size)
    290		goto zero_out;
    291
    292	return false;
    293zero_out:
    294	zero_user_segments(&folio->page, 0, offset, offset + len, plen);
    295	return true;
    296}
    297
    298/**
    299 * netfs_write_begin - Helper to prepare for writing
    300 * @ctx: The netfs context
    301 * @file: The file to read from
    302 * @mapping: The mapping to read from
    303 * @pos: File position at which the write will begin
    304 * @len: The length of the write (may extend beyond the end of the folio chosen)
    305 * @_folio: Where to put the resultant folio
    306 * @_fsdata: Place for the netfs to store a cookie
    307 *
    308 * Pre-read data for a write-begin request by drawing data from the cache if
    309 * possible, or the netfs if not.  Space beyond the EOF is zero-filled.
    310 * Multiple I/O requests from different sources will get munged together.  If
    311 * necessary, the readahead window can be expanded in either direction to a
    312 * more convenient alighment for RPC efficiency or to make storage in the cache
    313 * feasible.
    314 *
    315 * The calling netfs must provide a table of operations, only one of which,
    316 * issue_op, is mandatory.
    317 *
    318 * The check_write_begin() operation can be provided to check for and flush
    319 * conflicting writes once the folio is grabbed and locked.  It is passed a
    320 * pointer to the fsdata cookie that gets returned to the VM to be passed to
    321 * write_end.  It is permitted to sleep.  It should return 0 if the request
    322 * should go ahead; unlock the folio and return -EAGAIN to cause the folio to
    323 * be regot; or return an error.
    324 *
    325 * The calling netfs must initialise a netfs context contiguous to the vfs
    326 * inode before calling this.
    327 *
    328 * This is usable whether or not caching is enabled.
    329 */
    330int netfs_write_begin(struct netfs_inode *ctx,
    331		      struct file *file, struct address_space *mapping,
    332		      loff_t pos, unsigned int len, struct folio **_folio,
    333		      void **_fsdata)
    334{
    335	struct netfs_io_request *rreq;
    336	struct folio *folio;
    337	unsigned int fgp_flags = FGP_LOCK | FGP_WRITE | FGP_CREAT | FGP_STABLE;
    338	pgoff_t index = pos >> PAGE_SHIFT;
    339	int ret;
    340
    341	DEFINE_READAHEAD(ractl, file, NULL, mapping, index);
    342
    343retry:
    344	folio = __filemap_get_folio(mapping, index, fgp_flags,
    345				    mapping_gfp_mask(mapping));
    346	if (!folio)
    347		return -ENOMEM;
    348
    349	if (ctx->ops->check_write_begin) {
    350		/* Allow the netfs (eg. ceph) to flush conflicts. */
    351		ret = ctx->ops->check_write_begin(file, pos, len, folio, _fsdata);
    352		if (ret < 0) {
    353			trace_netfs_failure(NULL, NULL, ret, netfs_fail_check_write_begin);
    354			if (ret == -EAGAIN)
    355				goto retry;
    356			goto error;
    357		}
    358	}
    359
    360	if (folio_test_uptodate(folio))
    361		goto have_folio;
    362
    363	/* If the page is beyond the EOF, we want to clear it - unless it's
    364	 * within the cache granule containing the EOF, in which case we need
    365	 * to preload the granule.
    366	 */
    367	if (!netfs_is_cache_enabled(ctx) &&
    368	    netfs_skip_folio_read(folio, pos, len, false)) {
    369		netfs_stat(&netfs_n_rh_write_zskip);
    370		goto have_folio_no_wait;
    371	}
    372
    373	rreq = netfs_alloc_request(mapping, file,
    374				   folio_file_pos(folio), folio_size(folio),
    375				   NETFS_READ_FOR_WRITE);
    376	if (IS_ERR(rreq)) {
    377		ret = PTR_ERR(rreq);
    378		goto error;
    379	}
    380	rreq->no_unlock_folio	= folio_index(folio);
    381	__set_bit(NETFS_RREQ_NO_UNLOCK_FOLIO, &rreq->flags);
    382
    383	if (ctx->ops->begin_cache_operation) {
    384		ret = ctx->ops->begin_cache_operation(rreq);
    385		if (ret == -ENOMEM || ret == -EINTR || ret == -ERESTARTSYS)
    386			goto error_put;
    387	}
    388
    389	netfs_stat(&netfs_n_rh_write_begin);
    390	trace_netfs_read(rreq, pos, len, netfs_read_trace_write_begin);
    391
    392	/* Expand the request to meet caching requirements and download
    393	 * preferences.
    394	 */
    395	ractl._nr_pages = folio_nr_pages(folio);
    396	netfs_rreq_expand(rreq, &ractl);
    397
    398	/* We hold the folio locks, so we can drop the references */
    399	folio_get(folio);
    400	while (readahead_folio(&ractl))
    401		;
    402
    403	ret = netfs_begin_read(rreq, true);
    404	if (ret < 0)
    405		goto error;
    406
    407have_folio:
    408	ret = folio_wait_fscache_killable(folio);
    409	if (ret < 0)
    410		goto error;
    411have_folio_no_wait:
    412	*_folio = folio;
    413	_leave(" = 0");
    414	return 0;
    415
    416error_put:
    417	netfs_put_request(rreq, false, netfs_rreq_trace_put_failed);
    418error:
    419	folio_unlock(folio);
    420	folio_put(folio);
    421	_leave(" = %d", ret);
    422	return ret;
    423}
    424EXPORT_SYMBOL(netfs_write_begin);