cachepc-linux

Fork of AMDESE/linux with modifications for CachePC side-channel attack
git clone https://git.sinitax.com/sinitax/cachepc-linux
Log | Files | Refs | README | LICENSE | sfeed.txt

read.c (11707B)


      1// SPDX-License-Identifier: GPL-2.0-only
      2/*
      3 * linux/fs/nfs/read.c
      4 *
      5 * Block I/O for NFS
      6 *
      7 * Partial copy of Linus' read cache modifications to fs/nfs/file.c
      8 * modified for async RPC by okir@monad.swb.de
      9 */
     10
     11#include <linux/time.h>
     12#include <linux/kernel.h>
     13#include <linux/errno.h>
     14#include <linux/fcntl.h>
     15#include <linux/stat.h>
     16#include <linux/mm.h>
     17#include <linux/slab.h>
     18#include <linux/pagemap.h>
     19#include <linux/sunrpc/clnt.h>
     20#include <linux/nfs_fs.h>
     21#include <linux/nfs_page.h>
     22#include <linux/module.h>
     23
     24#include "nfs4_fs.h"
     25#include "internal.h"
     26#include "iostat.h"
     27#include "fscache.h"
     28#include "pnfs.h"
     29#include "nfstrace.h"
     30
     31#define NFSDBG_FACILITY		NFSDBG_PAGECACHE
     32
     33static const struct nfs_pgio_completion_ops nfs_async_read_completion_ops;
     34static const struct nfs_rw_ops nfs_rw_read_ops;
     35
     36static struct kmem_cache *nfs_rdata_cachep;
     37
     38static struct nfs_pgio_header *nfs_readhdr_alloc(void)
     39{
     40	struct nfs_pgio_header *p = kmem_cache_zalloc(nfs_rdata_cachep, GFP_KERNEL);
     41
     42	if (p)
     43		p->rw_mode = FMODE_READ;
     44	return p;
     45}
     46
     47static void nfs_readhdr_free(struct nfs_pgio_header *rhdr)
     48{
     49	kmem_cache_free(nfs_rdata_cachep, rhdr);
     50}
     51
     52static
     53int nfs_return_empty_page(struct page *page)
     54{
     55	zero_user(page, 0, PAGE_SIZE);
     56	SetPageUptodate(page);
     57	unlock_page(page);
     58	return 0;
     59}
     60
     61void nfs_pageio_init_read(struct nfs_pageio_descriptor *pgio,
     62			      struct inode *inode, bool force_mds,
     63			      const struct nfs_pgio_completion_ops *compl_ops)
     64{
     65	struct nfs_server *server = NFS_SERVER(inode);
     66	const struct nfs_pageio_ops *pg_ops = &nfs_pgio_rw_ops;
     67
     68#ifdef CONFIG_NFS_V4_1
     69	if (server->pnfs_curr_ld && !force_mds)
     70		pg_ops = server->pnfs_curr_ld->pg_read_ops;
     71#endif
     72	nfs_pageio_init(pgio, inode, pg_ops, compl_ops, &nfs_rw_read_ops,
     73			server->rsize, 0);
     74}
     75EXPORT_SYMBOL_GPL(nfs_pageio_init_read);
     76
     77static void nfs_pageio_complete_read(struct nfs_pageio_descriptor *pgio)
     78{
     79	struct nfs_pgio_mirror *pgm;
     80	unsigned long npages;
     81
     82	nfs_pageio_complete(pgio);
     83
     84	/* It doesn't make sense to do mirrored reads! */
     85	WARN_ON_ONCE(pgio->pg_mirror_count != 1);
     86
     87	pgm = &pgio->pg_mirrors[0];
     88	NFS_I(pgio->pg_inode)->read_io += pgm->pg_bytes_written;
     89	npages = (pgm->pg_bytes_written + PAGE_SIZE - 1) >> PAGE_SHIFT;
     90	nfs_add_stats(pgio->pg_inode, NFSIOS_READPAGES, npages);
     91}
     92
     93
     94void nfs_pageio_reset_read_mds(struct nfs_pageio_descriptor *pgio)
     95{
     96	struct nfs_pgio_mirror *mirror;
     97
     98	if (pgio->pg_ops && pgio->pg_ops->pg_cleanup)
     99		pgio->pg_ops->pg_cleanup(pgio);
    100
    101	pgio->pg_ops = &nfs_pgio_rw_ops;
    102
    103	/* read path should never have more than one mirror */
    104	WARN_ON_ONCE(pgio->pg_mirror_count != 1);
    105
    106	mirror = &pgio->pg_mirrors[0];
    107	mirror->pg_bsize = NFS_SERVER(pgio->pg_inode)->rsize;
    108}
    109EXPORT_SYMBOL_GPL(nfs_pageio_reset_read_mds);
    110
    111static void nfs_readpage_release(struct nfs_page *req, int error)
    112{
    113	struct inode *inode = d_inode(nfs_req_openctx(req)->dentry);
    114	struct page *page = req->wb_page;
    115
    116	dprintk("NFS: read done (%s/%llu %d@%lld)\n", inode->i_sb->s_id,
    117		(unsigned long long)NFS_FILEID(inode), req->wb_bytes,
    118		(long long)req_offset(req));
    119
    120	if (nfs_error_is_fatal_on_server(error) && error != -ETIMEDOUT)
    121		SetPageError(page);
    122	if (nfs_page_group_sync_on_bit(req, PG_UNLOCKPAGE)) {
    123		struct address_space *mapping = page_file_mapping(page);
    124
    125		if (PageUptodate(page))
    126			nfs_fscache_write_page(inode, page);
    127		else if (!PageError(page) && !PagePrivate(page))
    128			generic_error_remove_page(mapping, page);
    129		unlock_page(page);
    130	}
    131	nfs_release_request(req);
    132}
    133
    134struct nfs_readdesc {
    135	struct nfs_pageio_descriptor pgio;
    136	struct nfs_open_context *ctx;
    137};
    138
    139static void nfs_page_group_set_uptodate(struct nfs_page *req)
    140{
    141	if (nfs_page_group_sync_on_bit(req, PG_UPTODATE))
    142		SetPageUptodate(req->wb_page);
    143}
    144
    145static void nfs_read_completion(struct nfs_pgio_header *hdr)
    146{
    147	unsigned long bytes = 0;
    148	int error;
    149
    150	if (test_bit(NFS_IOHDR_REDO, &hdr->flags))
    151		goto out;
    152	while (!list_empty(&hdr->pages)) {
    153		struct nfs_page *req = nfs_list_entry(hdr->pages.next);
    154		struct page *page = req->wb_page;
    155		unsigned long start = req->wb_pgbase;
    156		unsigned long end = req->wb_pgbase + req->wb_bytes;
    157
    158		if (test_bit(NFS_IOHDR_EOF, &hdr->flags)) {
    159			/* note: regions of the page not covered by a
    160			 * request are zeroed in readpage_async_filler */
    161			if (bytes > hdr->good_bytes) {
    162				/* nothing in this request was good, so zero
    163				 * the full extent of the request */
    164				zero_user_segment(page, start, end);
    165
    166			} else if (hdr->good_bytes - bytes < req->wb_bytes) {
    167				/* part of this request has good bytes, but
    168				 * not all. zero the bad bytes */
    169				start += hdr->good_bytes - bytes;
    170				WARN_ON(start < req->wb_pgbase);
    171				zero_user_segment(page, start, end);
    172			}
    173		}
    174		error = 0;
    175		bytes += req->wb_bytes;
    176		if (test_bit(NFS_IOHDR_ERROR, &hdr->flags)) {
    177			if (bytes <= hdr->good_bytes)
    178				nfs_page_group_set_uptodate(req);
    179			else {
    180				error = hdr->error;
    181				xchg(&nfs_req_openctx(req)->error, error);
    182			}
    183		} else
    184			nfs_page_group_set_uptodate(req);
    185		nfs_list_remove_request(req);
    186		nfs_readpage_release(req, error);
    187	}
    188out:
    189	hdr->release(hdr);
    190}
    191
    192static void nfs_initiate_read(struct nfs_pgio_header *hdr,
    193			      struct rpc_message *msg,
    194			      const struct nfs_rpc_ops *rpc_ops,
    195			      struct rpc_task_setup *task_setup_data, int how)
    196{
    197	rpc_ops->read_setup(hdr, msg);
    198	trace_nfs_initiate_read(hdr);
    199}
    200
    201static void
    202nfs_async_read_error(struct list_head *head, int error)
    203{
    204	struct nfs_page	*req;
    205
    206	while (!list_empty(head)) {
    207		req = nfs_list_entry(head->next);
    208		nfs_list_remove_request(req);
    209		nfs_readpage_release(req, error);
    210	}
    211}
    212
    213static const struct nfs_pgio_completion_ops nfs_async_read_completion_ops = {
    214	.error_cleanup = nfs_async_read_error,
    215	.completion = nfs_read_completion,
    216};
    217
    218/*
    219 * This is the callback from RPC telling us whether a reply was
    220 * received or some error occurred (timeout or socket shutdown).
    221 */
    222static int nfs_readpage_done(struct rpc_task *task,
    223			     struct nfs_pgio_header *hdr,
    224			     struct inode *inode)
    225{
    226	int status = NFS_PROTO(inode)->read_done(task, hdr);
    227	if (status != 0)
    228		return status;
    229
    230	nfs_add_stats(inode, NFSIOS_SERVERREADBYTES, hdr->res.count);
    231	trace_nfs_readpage_done(task, hdr);
    232
    233	if (task->tk_status == -ESTALE) {
    234		nfs_set_inode_stale(inode);
    235		nfs_mark_for_revalidate(inode);
    236	}
    237	return 0;
    238}
    239
    240static void nfs_readpage_retry(struct rpc_task *task,
    241			       struct nfs_pgio_header *hdr)
    242{
    243	struct nfs_pgio_args *argp = &hdr->args;
    244	struct nfs_pgio_res  *resp = &hdr->res;
    245
    246	/* This is a short read! */
    247	nfs_inc_stats(hdr->inode, NFSIOS_SHORTREAD);
    248	trace_nfs_readpage_short(task, hdr);
    249
    250	/* Has the server at least made some progress? */
    251	if (resp->count == 0) {
    252		nfs_set_pgio_error(hdr, -EIO, argp->offset);
    253		return;
    254	}
    255
    256	/* For non rpc-based layout drivers, retry-through-MDS */
    257	if (!task->tk_ops) {
    258		hdr->pnfs_error = -EAGAIN;
    259		return;
    260	}
    261
    262	/* Yes, so retry the read at the end of the hdr */
    263	hdr->mds_offset += resp->count;
    264	argp->offset += resp->count;
    265	argp->pgbase += resp->count;
    266	argp->count -= resp->count;
    267	resp->count = 0;
    268	resp->eof = 0;
    269	rpc_restart_call_prepare(task);
    270}
    271
    272static void nfs_readpage_result(struct rpc_task *task,
    273				struct nfs_pgio_header *hdr)
    274{
    275	if (hdr->res.eof) {
    276		loff_t pos = hdr->args.offset + hdr->res.count;
    277		unsigned int new = pos - hdr->io_start;
    278
    279		if (hdr->good_bytes > new) {
    280			hdr->good_bytes = new;
    281			set_bit(NFS_IOHDR_EOF, &hdr->flags);
    282			clear_bit(NFS_IOHDR_ERROR, &hdr->flags);
    283		}
    284	} else if (hdr->res.count < hdr->args.count)
    285		nfs_readpage_retry(task, hdr);
    286}
    287
    288static int
    289readpage_async_filler(struct nfs_readdesc *desc, struct page *page)
    290{
    291	struct inode *inode = page_file_mapping(page)->host;
    292	unsigned int rsize = NFS_SERVER(inode)->rsize;
    293	struct nfs_page *new;
    294	unsigned int len, aligned_len;
    295	int error;
    296
    297	len = nfs_page_length(page);
    298	if (len == 0)
    299		return nfs_return_empty_page(page);
    300
    301	aligned_len = min_t(unsigned int, ALIGN(len, rsize), PAGE_SIZE);
    302
    303	if (!IS_SYNC(page->mapping->host)) {
    304		error = nfs_fscache_read_page(page->mapping->host, page);
    305		if (error == 0)
    306			goto out_unlock;
    307	}
    308
    309	new = nfs_create_request(desc->ctx, page, 0, aligned_len);
    310	if (IS_ERR(new))
    311		goto out_error;
    312
    313	if (len < PAGE_SIZE)
    314		zero_user_segment(page, len, PAGE_SIZE);
    315	if (!nfs_pageio_add_request(&desc->pgio, new)) {
    316		nfs_list_remove_request(new);
    317		error = desc->pgio.pg_error;
    318		nfs_readpage_release(new, error);
    319		goto out;
    320	}
    321	return 0;
    322out_error:
    323	error = PTR_ERR(new);
    324out_unlock:
    325	unlock_page(page);
    326out:
    327	return error;
    328}
    329
    330/*
    331 * Read a page over NFS.
    332 * We read the page synchronously in the following case:
    333 *  -	The error flag is set for this page. This happens only when a
    334 *	previous async read operation failed.
    335 */
    336int nfs_read_folio(struct file *file, struct folio *folio)
    337{
    338	struct page *page = &folio->page;
    339	struct nfs_readdesc desc;
    340	struct inode *inode = page_file_mapping(page)->host;
    341	int ret;
    342
    343	trace_nfs_aop_readpage(inode, page);
    344	nfs_inc_stats(inode, NFSIOS_VFSREADPAGE);
    345
    346	/*
    347	 * Try to flush any pending writes to the file..
    348	 *
    349	 * NOTE! Because we own the page lock, there cannot
    350	 * be any new pending writes generated at this point
    351	 * for this page (other pages can be written to).
    352	 */
    353	ret = nfs_wb_page(inode, page);
    354	if (ret)
    355		goto out_unlock;
    356	if (PageUptodate(page))
    357		goto out_unlock;
    358
    359	ret = -ESTALE;
    360	if (NFS_STALE(inode))
    361		goto out_unlock;
    362
    363	if (file == NULL) {
    364		ret = -EBADF;
    365		desc.ctx = nfs_find_open_context(inode, NULL, FMODE_READ);
    366		if (desc.ctx == NULL)
    367			goto out_unlock;
    368	} else
    369		desc.ctx = get_nfs_open_context(nfs_file_open_context(file));
    370
    371	xchg(&desc.ctx->error, 0);
    372	nfs_pageio_init_read(&desc.pgio, inode, false,
    373			     &nfs_async_read_completion_ops);
    374
    375	ret = readpage_async_filler(&desc, page);
    376	if (ret)
    377		goto out;
    378
    379	nfs_pageio_complete_read(&desc.pgio);
    380	ret = desc.pgio.pg_error < 0 ? desc.pgio.pg_error : 0;
    381	if (!ret) {
    382		ret = wait_on_page_locked_killable(page);
    383		if (!PageUptodate(page) && !ret)
    384			ret = xchg(&desc.ctx->error, 0);
    385	}
    386out:
    387	put_nfs_open_context(desc.ctx);
    388	trace_nfs_aop_readpage_done(inode, page, ret);
    389	return ret;
    390out_unlock:
    391	unlock_page(page);
    392	trace_nfs_aop_readpage_done(inode, page, ret);
    393	return ret;
    394}
    395
    396void nfs_readahead(struct readahead_control *ractl)
    397{
    398	unsigned int nr_pages = readahead_count(ractl);
    399	struct file *file = ractl->file;
    400	struct nfs_readdesc desc;
    401	struct inode *inode = ractl->mapping->host;
    402	struct page *page;
    403	int ret;
    404
    405	trace_nfs_aop_readahead(inode, readahead_pos(ractl), nr_pages);
    406	nfs_inc_stats(inode, NFSIOS_VFSREADPAGES);
    407
    408	ret = -ESTALE;
    409	if (NFS_STALE(inode))
    410		goto out;
    411
    412	if (file == NULL) {
    413		ret = -EBADF;
    414		desc.ctx = nfs_find_open_context(inode, NULL, FMODE_READ);
    415		if (desc.ctx == NULL)
    416			goto out;
    417	} else
    418		desc.ctx = get_nfs_open_context(nfs_file_open_context(file));
    419
    420	nfs_pageio_init_read(&desc.pgio, inode, false,
    421			     &nfs_async_read_completion_ops);
    422
    423	while ((page = readahead_page(ractl)) != NULL) {
    424		ret = readpage_async_filler(&desc, page);
    425		put_page(page);
    426		if (ret)
    427			break;
    428	}
    429
    430	nfs_pageio_complete_read(&desc.pgio);
    431
    432	put_nfs_open_context(desc.ctx);
    433out:
    434	trace_nfs_aop_readahead_done(inode, nr_pages, ret);
    435}
    436
    437int __init nfs_init_readpagecache(void)
    438{
    439	nfs_rdata_cachep = kmem_cache_create("nfs_read_data",
    440					     sizeof(struct nfs_pgio_header),
    441					     0, SLAB_HWCACHE_ALIGN,
    442					     NULL);
    443	if (nfs_rdata_cachep == NULL)
    444		return -ENOMEM;
    445
    446	return 0;
    447}
    448
    449void nfs_destroy_readpagecache(void)
    450{
    451	kmem_cache_destroy(nfs_rdata_cachep);
    452}
    453
    454static const struct nfs_rw_ops nfs_rw_read_ops = {
    455	.rw_alloc_header	= nfs_readhdr_alloc,
    456	.rw_free_header		= nfs_readhdr_free,
    457	.rw_done		= nfs_readpage_done,
    458	.rw_result		= nfs_readpage_result,
    459	.rw_initiate		= nfs_initiate_read,
    460};