cachepc-linux

Fork of AMDESE/linux with modifications for CachePC side-channel attack
git clone https://git.sinitax.com/sinitax/cachepc-linux
Log | Files | Refs | README | LICENSE | sfeed.txt

write.c (25884B)


      1// SPDX-License-Identifier: GPL-2.0-or-later
      2/* handling of writes to regular files and writing back to the server
      3 *
      4 * Copyright (C) 2007 Red Hat, Inc. All Rights Reserved.
      5 * Written by David Howells (dhowells@redhat.com)
      6 */
      7
      8#include <linux/backing-dev.h>
      9#include <linux/slab.h>
     10#include <linux/fs.h>
     11#include <linux/pagemap.h>
     12#include <linux/writeback.h>
     13#include <linux/pagevec.h>
     14#include <linux/netfs.h>
     15#include "internal.h"
     16
     17static void afs_write_to_cache(struct afs_vnode *vnode, loff_t start, size_t len,
     18			       loff_t i_size, bool caching);
     19
     20#ifdef CONFIG_AFS_FSCACHE
     21/*
     22 * Mark a page as having been made dirty and thus needing writeback.  We also
     23 * need to pin the cache object to write back to.
     24 */
     25bool afs_dirty_folio(struct address_space *mapping, struct folio *folio)
     26{
     27	return fscache_dirty_folio(mapping, folio,
     28				afs_vnode_cache(AFS_FS_I(mapping->host)));
     29}
     30static void afs_folio_start_fscache(bool caching, struct folio *folio)
     31{
     32	if (caching)
     33		folio_start_fscache(folio);
     34}
     35#else
     36static void afs_folio_start_fscache(bool caching, struct folio *folio)
     37{
     38}
     39#endif
     40
     41/*
     42 * prepare to perform part of a write to a page
     43 */
     44int afs_write_begin(struct file *file, struct address_space *mapping,
     45		    loff_t pos, unsigned len,
     46		    struct page **_page, void **fsdata)
     47{
     48	struct afs_vnode *vnode = AFS_FS_I(file_inode(file));
     49	struct folio *folio;
     50	unsigned long priv;
     51	unsigned f, from;
     52	unsigned t, to;
     53	pgoff_t index;
     54	int ret;
     55
     56	_enter("{%llx:%llu},%llx,%x",
     57	       vnode->fid.vid, vnode->fid.vnode, pos, len);
     58
     59	/* Prefetch area to be written into the cache if we're caching this
     60	 * file.  We need to do this before we get a lock on the page in case
     61	 * there's more than one writer competing for the same cache block.
     62	 */
     63	ret = netfs_write_begin(&vnode->netfs, file, mapping, pos, len, &folio, fsdata);
     64	if (ret < 0)
     65		return ret;
     66
     67	index = folio_index(folio);
     68	from = pos - index * PAGE_SIZE;
     69	to = from + len;
     70
     71try_again:
     72	/* See if this page is already partially written in a way that we can
     73	 * merge the new write with.
     74	 */
     75	if (folio_test_private(folio)) {
     76		priv = (unsigned long)folio_get_private(folio);
     77		f = afs_folio_dirty_from(folio, priv);
     78		t = afs_folio_dirty_to(folio, priv);
     79		ASSERTCMP(f, <=, t);
     80
     81		if (folio_test_writeback(folio)) {
     82			trace_afs_folio_dirty(vnode, tracepoint_string("alrdy"), folio);
     83			goto flush_conflicting_write;
     84		}
     85		/* If the file is being filled locally, allow inter-write
     86		 * spaces to be merged into writes.  If it's not, only write
     87		 * back what the user gives us.
     88		 */
     89		if (!test_bit(AFS_VNODE_NEW_CONTENT, &vnode->flags) &&
     90		    (to < f || from > t))
     91			goto flush_conflicting_write;
     92	}
     93
     94	*_page = &folio->page;
     95	_leave(" = 0");
     96	return 0;
     97
     98	/* The previous write and this write aren't adjacent or overlapping, so
     99	 * flush the page out.
    100	 */
    101flush_conflicting_write:
    102	_debug("flush conflict");
    103	ret = folio_write_one(folio);
    104	if (ret < 0)
    105		goto error;
    106
    107	ret = folio_lock_killable(folio);
    108	if (ret < 0)
    109		goto error;
    110	goto try_again;
    111
    112error:
    113	folio_put(folio);
    114	_leave(" = %d", ret);
    115	return ret;
    116}
    117
    118/*
    119 * finalise part of a write to a page
    120 */
    121int afs_write_end(struct file *file, struct address_space *mapping,
    122		  loff_t pos, unsigned len, unsigned copied,
    123		  struct page *subpage, void *fsdata)
    124{
    125	struct folio *folio = page_folio(subpage);
    126	struct afs_vnode *vnode = AFS_FS_I(file_inode(file));
    127	unsigned long priv;
    128	unsigned int f, from = offset_in_folio(folio, pos);
    129	unsigned int t, to = from + copied;
    130	loff_t i_size, write_end_pos;
    131
    132	_enter("{%llx:%llu},{%lx}",
    133	       vnode->fid.vid, vnode->fid.vnode, folio_index(folio));
    134
    135	if (!folio_test_uptodate(folio)) {
    136		if (copied < len) {
    137			copied = 0;
    138			goto out;
    139		}
    140
    141		folio_mark_uptodate(folio);
    142	}
    143
    144	if (copied == 0)
    145		goto out;
    146
    147	write_end_pos = pos + copied;
    148
    149	i_size = i_size_read(&vnode->netfs.inode);
    150	if (write_end_pos > i_size) {
    151		write_seqlock(&vnode->cb_lock);
    152		i_size = i_size_read(&vnode->netfs.inode);
    153		if (write_end_pos > i_size)
    154			afs_set_i_size(vnode, write_end_pos);
    155		write_sequnlock(&vnode->cb_lock);
    156		fscache_update_cookie(afs_vnode_cache(vnode), NULL, &write_end_pos);
    157	}
    158
    159	if (folio_test_private(folio)) {
    160		priv = (unsigned long)folio_get_private(folio);
    161		f = afs_folio_dirty_from(folio, priv);
    162		t = afs_folio_dirty_to(folio, priv);
    163		if (from < f)
    164			f = from;
    165		if (to > t)
    166			t = to;
    167		priv = afs_folio_dirty(folio, f, t);
    168		folio_change_private(folio, (void *)priv);
    169		trace_afs_folio_dirty(vnode, tracepoint_string("dirty+"), folio);
    170	} else {
    171		priv = afs_folio_dirty(folio, from, to);
    172		folio_attach_private(folio, (void *)priv);
    173		trace_afs_folio_dirty(vnode, tracepoint_string("dirty"), folio);
    174	}
    175
    176	if (folio_mark_dirty(folio))
    177		_debug("dirtied %lx", folio_index(folio));
    178
    179out:
    180	folio_unlock(folio);
    181	folio_put(folio);
    182	return copied;
    183}
    184
    185/*
    186 * kill all the pages in the given range
    187 */
    188static void afs_kill_pages(struct address_space *mapping,
    189			   loff_t start, loff_t len)
    190{
    191	struct afs_vnode *vnode = AFS_FS_I(mapping->host);
    192	struct folio *folio;
    193	pgoff_t index = start / PAGE_SIZE;
    194	pgoff_t last = (start + len - 1) / PAGE_SIZE, next;
    195
    196	_enter("{%llx:%llu},%llx @%llx",
    197	       vnode->fid.vid, vnode->fid.vnode, len, start);
    198
    199	do {
    200		_debug("kill %lx (to %lx)", index, last);
    201
    202		folio = filemap_get_folio(mapping, index);
    203		if (!folio) {
    204			next = index + 1;
    205			continue;
    206		}
    207
    208		next = folio_next_index(folio);
    209
    210		folio_clear_uptodate(folio);
    211		folio_end_writeback(folio);
    212		folio_lock(folio);
    213		generic_error_remove_page(mapping, &folio->page);
    214		folio_unlock(folio);
    215		folio_put(folio);
    216
    217	} while (index = next, index <= last);
    218
    219	_leave("");
    220}
    221
    222/*
    223 * Redirty all the pages in a given range.
    224 */
    225static void afs_redirty_pages(struct writeback_control *wbc,
    226			      struct address_space *mapping,
    227			      loff_t start, loff_t len)
    228{
    229	struct afs_vnode *vnode = AFS_FS_I(mapping->host);
    230	struct folio *folio;
    231	pgoff_t index = start / PAGE_SIZE;
    232	pgoff_t last = (start + len - 1) / PAGE_SIZE, next;
    233
    234	_enter("{%llx:%llu},%llx @%llx",
    235	       vnode->fid.vid, vnode->fid.vnode, len, start);
    236
    237	do {
    238		_debug("redirty %llx @%llx", len, start);
    239
    240		folio = filemap_get_folio(mapping, index);
    241		if (!folio) {
    242			next = index + 1;
    243			continue;
    244		}
    245
    246		next = index + folio_nr_pages(folio);
    247		folio_redirty_for_writepage(wbc, folio);
    248		folio_end_writeback(folio);
    249		folio_put(folio);
    250	} while (index = next, index <= last);
    251
    252	_leave("");
    253}
    254
    255/*
    256 * completion of write to server
    257 */
    258static void afs_pages_written_back(struct afs_vnode *vnode, loff_t start, unsigned int len)
    259{
    260	struct address_space *mapping = vnode->netfs.inode.i_mapping;
    261	struct folio *folio;
    262	pgoff_t end;
    263
    264	XA_STATE(xas, &mapping->i_pages, start / PAGE_SIZE);
    265
    266	_enter("{%llx:%llu},{%x @%llx}",
    267	       vnode->fid.vid, vnode->fid.vnode, len, start);
    268
    269	rcu_read_lock();
    270
    271	end = (start + len - 1) / PAGE_SIZE;
    272	xas_for_each(&xas, folio, end) {
    273		if (!folio_test_writeback(folio)) {
    274			kdebug("bad %x @%llx page %lx %lx",
    275			       len, start, folio_index(folio), end);
    276			ASSERT(folio_test_writeback(folio));
    277		}
    278
    279		trace_afs_folio_dirty(vnode, tracepoint_string("clear"), folio);
    280		folio_detach_private(folio);
    281		folio_end_writeback(folio);
    282	}
    283
    284	rcu_read_unlock();
    285
    286	afs_prune_wb_keys(vnode);
    287	_leave("");
    288}
    289
    290/*
    291 * Find a key to use for the writeback.  We cached the keys used to author the
    292 * writes on the vnode.  *_wbk will contain the last writeback key used or NULL
    293 * and we need to start from there if it's set.
    294 */
    295static int afs_get_writeback_key(struct afs_vnode *vnode,
    296				 struct afs_wb_key **_wbk)
    297{
    298	struct afs_wb_key *wbk = NULL;
    299	struct list_head *p;
    300	int ret = -ENOKEY, ret2;
    301
    302	spin_lock(&vnode->wb_lock);
    303	if (*_wbk)
    304		p = (*_wbk)->vnode_link.next;
    305	else
    306		p = vnode->wb_keys.next;
    307
    308	while (p != &vnode->wb_keys) {
    309		wbk = list_entry(p, struct afs_wb_key, vnode_link);
    310		_debug("wbk %u", key_serial(wbk->key));
    311		ret2 = key_validate(wbk->key);
    312		if (ret2 == 0) {
    313			refcount_inc(&wbk->usage);
    314			_debug("USE WB KEY %u", key_serial(wbk->key));
    315			break;
    316		}
    317
    318		wbk = NULL;
    319		if (ret == -ENOKEY)
    320			ret = ret2;
    321		p = p->next;
    322	}
    323
    324	spin_unlock(&vnode->wb_lock);
    325	if (*_wbk)
    326		afs_put_wb_key(*_wbk);
    327	*_wbk = wbk;
    328	return 0;
    329}
    330
    331static void afs_store_data_success(struct afs_operation *op)
    332{
    333	struct afs_vnode *vnode = op->file[0].vnode;
    334
    335	op->ctime = op->file[0].scb.status.mtime_client;
    336	afs_vnode_commit_status(op, &op->file[0]);
    337	if (op->error == 0) {
    338		if (!op->store.laundering)
    339			afs_pages_written_back(vnode, op->store.pos, op->store.size);
    340		afs_stat_v(vnode, n_stores);
    341		atomic_long_add(op->store.size, &afs_v2net(vnode)->n_store_bytes);
    342	}
    343}
    344
    345static const struct afs_operation_ops afs_store_data_operation = {
    346	.issue_afs_rpc	= afs_fs_store_data,
    347	.issue_yfs_rpc	= yfs_fs_store_data,
    348	.success	= afs_store_data_success,
    349};
    350
    351/*
    352 * write to a file
    353 */
    354static int afs_store_data(struct afs_vnode *vnode, struct iov_iter *iter, loff_t pos,
    355			  bool laundering)
    356{
    357	struct afs_operation *op;
    358	struct afs_wb_key *wbk = NULL;
    359	loff_t size = iov_iter_count(iter);
    360	int ret = -ENOKEY;
    361
    362	_enter("%s{%llx:%llu.%u},%llx,%llx",
    363	       vnode->volume->name,
    364	       vnode->fid.vid,
    365	       vnode->fid.vnode,
    366	       vnode->fid.unique,
    367	       size, pos);
    368
    369	ret = afs_get_writeback_key(vnode, &wbk);
    370	if (ret) {
    371		_leave(" = %d [no keys]", ret);
    372		return ret;
    373	}
    374
    375	op = afs_alloc_operation(wbk->key, vnode->volume);
    376	if (IS_ERR(op)) {
    377		afs_put_wb_key(wbk);
    378		return -ENOMEM;
    379	}
    380
    381	afs_op_set_vnode(op, 0, vnode);
    382	op->file[0].dv_delta = 1;
    383	op->file[0].modification = true;
    384	op->store.write_iter = iter;
    385	op->store.pos = pos;
    386	op->store.size = size;
    387	op->store.i_size = max(pos + size, vnode->netfs.remote_i_size);
    388	op->store.laundering = laundering;
    389	op->mtime = vnode->netfs.inode.i_mtime;
    390	op->flags |= AFS_OPERATION_UNINTR;
    391	op->ops = &afs_store_data_operation;
    392
    393try_next_key:
    394	afs_begin_vnode_operation(op);
    395	afs_wait_for_operation(op);
    396
    397	switch (op->error) {
    398	case -EACCES:
    399	case -EPERM:
    400	case -ENOKEY:
    401	case -EKEYEXPIRED:
    402	case -EKEYREJECTED:
    403	case -EKEYREVOKED:
    404		_debug("next");
    405
    406		ret = afs_get_writeback_key(vnode, &wbk);
    407		if (ret == 0) {
    408			key_put(op->key);
    409			op->key = key_get(wbk->key);
    410			goto try_next_key;
    411		}
    412		break;
    413	}
    414
    415	afs_put_wb_key(wbk);
    416	_leave(" = %d", op->error);
    417	return afs_put_operation(op);
    418}
    419
    420/*
    421 * Extend the region to be written back to include subsequent contiguously
    422 * dirty pages if possible, but don't sleep while doing so.
    423 *
    424 * If this page holds new content, then we can include filler zeros in the
    425 * writeback.
    426 */
    427static void afs_extend_writeback(struct address_space *mapping,
    428				 struct afs_vnode *vnode,
    429				 long *_count,
    430				 loff_t start,
    431				 loff_t max_len,
    432				 bool new_content,
    433				 bool caching,
    434				 unsigned int *_len)
    435{
    436	struct pagevec pvec;
    437	struct folio *folio;
    438	unsigned long priv;
    439	unsigned int psize, filler = 0;
    440	unsigned int f, t;
    441	loff_t len = *_len;
    442	pgoff_t index = (start + len) / PAGE_SIZE;
    443	bool stop = true;
    444	unsigned int i;
    445
    446	XA_STATE(xas, &mapping->i_pages, index);
    447	pagevec_init(&pvec);
    448
    449	do {
    450		/* Firstly, we gather up a batch of contiguous dirty pages
    451		 * under the RCU read lock - but we can't clear the dirty flags
    452		 * there if any of those pages are mapped.
    453		 */
    454		rcu_read_lock();
    455
    456		xas_for_each(&xas, folio, ULONG_MAX) {
    457			stop = true;
    458			if (xas_retry(&xas, folio))
    459				continue;
    460			if (xa_is_value(folio))
    461				break;
    462			if (folio_index(folio) != index)
    463				break;
    464
    465			if (!folio_try_get_rcu(folio)) {
    466				xas_reset(&xas);
    467				continue;
    468			}
    469
    470			/* Has the page moved or been split? */
    471			if (unlikely(folio != xas_reload(&xas))) {
    472				folio_put(folio);
    473				break;
    474			}
    475
    476			if (!folio_trylock(folio)) {
    477				folio_put(folio);
    478				break;
    479			}
    480			if (!folio_test_dirty(folio) ||
    481			    folio_test_writeback(folio) ||
    482			    folio_test_fscache(folio)) {
    483				folio_unlock(folio);
    484				folio_put(folio);
    485				break;
    486			}
    487
    488			psize = folio_size(folio);
    489			priv = (unsigned long)folio_get_private(folio);
    490			f = afs_folio_dirty_from(folio, priv);
    491			t = afs_folio_dirty_to(folio, priv);
    492			if (f != 0 && !new_content) {
    493				folio_unlock(folio);
    494				folio_put(folio);
    495				break;
    496			}
    497
    498			len += filler + t;
    499			filler = psize - t;
    500			if (len >= max_len || *_count <= 0)
    501				stop = true;
    502			else if (t == psize || new_content)
    503				stop = false;
    504
    505			index += folio_nr_pages(folio);
    506			if (!pagevec_add(&pvec, &folio->page))
    507				break;
    508			if (stop)
    509				break;
    510		}
    511
    512		if (!stop)
    513			xas_pause(&xas);
    514		rcu_read_unlock();
    515
    516		/* Now, if we obtained any pages, we can shift them to being
    517		 * writable and mark them for caching.
    518		 */
    519		if (!pagevec_count(&pvec))
    520			break;
    521
    522		for (i = 0; i < pagevec_count(&pvec); i++) {
    523			folio = page_folio(pvec.pages[i]);
    524			trace_afs_folio_dirty(vnode, tracepoint_string("store+"), folio);
    525
    526			if (!folio_clear_dirty_for_io(folio))
    527				BUG();
    528			if (folio_start_writeback(folio))
    529				BUG();
    530			afs_folio_start_fscache(caching, folio);
    531
    532			*_count -= folio_nr_pages(folio);
    533			folio_unlock(folio);
    534		}
    535
    536		pagevec_release(&pvec);
    537		cond_resched();
    538	} while (!stop);
    539
    540	*_len = len;
    541}
    542
    543/*
    544 * Synchronously write back the locked page and any subsequent non-locked dirty
    545 * pages.
    546 */
    547static ssize_t afs_write_back_from_locked_folio(struct address_space *mapping,
    548						struct writeback_control *wbc,
    549						struct folio *folio,
    550						loff_t start, loff_t end)
    551{
    552	struct afs_vnode *vnode = AFS_FS_I(mapping->host);
    553	struct iov_iter iter;
    554	unsigned long priv;
    555	unsigned int offset, to, len, max_len;
    556	loff_t i_size = i_size_read(&vnode->netfs.inode);
    557	bool new_content = test_bit(AFS_VNODE_NEW_CONTENT, &vnode->flags);
    558	bool caching = fscache_cookie_enabled(afs_vnode_cache(vnode));
    559	long count = wbc->nr_to_write;
    560	int ret;
    561
    562	_enter(",%lx,%llx-%llx", folio_index(folio), start, end);
    563
    564	if (folio_start_writeback(folio))
    565		BUG();
    566	afs_folio_start_fscache(caching, folio);
    567
    568	count -= folio_nr_pages(folio);
    569
    570	/* Find all consecutive lockable dirty pages that have contiguous
    571	 * written regions, stopping when we find a page that is not
    572	 * immediately lockable, is not dirty or is missing, or we reach the
    573	 * end of the range.
    574	 */
    575	priv = (unsigned long)folio_get_private(folio);
    576	offset = afs_folio_dirty_from(folio, priv);
    577	to = afs_folio_dirty_to(folio, priv);
    578	trace_afs_folio_dirty(vnode, tracepoint_string("store"), folio);
    579
    580	len = to - offset;
    581	start += offset;
    582	if (start < i_size) {
    583		/* Trim the write to the EOF; the extra data is ignored.  Also
    584		 * put an upper limit on the size of a single storedata op.
    585		 */
    586		max_len = 65536 * 4096;
    587		max_len = min_t(unsigned long long, max_len, end - start + 1);
    588		max_len = min_t(unsigned long long, max_len, i_size - start);
    589
    590		if (len < max_len &&
    591		    (to == folio_size(folio) || new_content))
    592			afs_extend_writeback(mapping, vnode, &count,
    593					     start, max_len, new_content,
    594					     caching, &len);
    595		len = min_t(loff_t, len, max_len);
    596	}
    597
    598	/* We now have a contiguous set of dirty pages, each with writeback
    599	 * set; the first page is still locked at this point, but all the rest
    600	 * have been unlocked.
    601	 */
    602	folio_unlock(folio);
    603
    604	if (start < i_size) {
    605		_debug("write back %x @%llx [%llx]", len, start, i_size);
    606
    607		/* Speculatively write to the cache.  We have to fix this up
    608		 * later if the store fails.
    609		 */
    610		afs_write_to_cache(vnode, start, len, i_size, caching);
    611
    612		iov_iter_xarray(&iter, WRITE, &mapping->i_pages, start, len);
    613		ret = afs_store_data(vnode, &iter, start, false);
    614	} else {
    615		_debug("write discard %x @%llx [%llx]", len, start, i_size);
    616
    617		/* The dirty region was entirely beyond the EOF. */
    618		fscache_clear_page_bits(mapping, start, len, caching);
    619		afs_pages_written_back(vnode, start, len);
    620		ret = 0;
    621	}
    622
    623	switch (ret) {
    624	case 0:
    625		wbc->nr_to_write = count;
    626		ret = len;
    627		break;
    628
    629	default:
    630		pr_notice("kAFS: Unexpected error from FS.StoreData %d\n", ret);
    631		fallthrough;
    632	case -EACCES:
    633	case -EPERM:
    634	case -ENOKEY:
    635	case -EKEYEXPIRED:
    636	case -EKEYREJECTED:
    637	case -EKEYREVOKED:
    638	case -ENETRESET:
    639		afs_redirty_pages(wbc, mapping, start, len);
    640		mapping_set_error(mapping, ret);
    641		break;
    642
    643	case -EDQUOT:
    644	case -ENOSPC:
    645		afs_redirty_pages(wbc, mapping, start, len);
    646		mapping_set_error(mapping, -ENOSPC);
    647		break;
    648
    649	case -EROFS:
    650	case -EIO:
    651	case -EREMOTEIO:
    652	case -EFBIG:
    653	case -ENOENT:
    654	case -ENOMEDIUM:
    655	case -ENXIO:
    656		trace_afs_file_error(vnode, ret, afs_file_error_writeback_fail);
    657		afs_kill_pages(mapping, start, len);
    658		mapping_set_error(mapping, ret);
    659		break;
    660	}
    661
    662	_leave(" = %d", ret);
    663	return ret;
    664}
    665
    666/*
    667 * write a page back to the server
    668 * - the caller locked the page for us
    669 */
    670int afs_writepage(struct page *subpage, struct writeback_control *wbc)
    671{
    672	struct folio *folio = page_folio(subpage);
    673	ssize_t ret;
    674	loff_t start;
    675
    676	_enter("{%lx},", folio_index(folio));
    677
    678#ifdef CONFIG_AFS_FSCACHE
    679	folio_wait_fscache(folio);
    680#endif
    681
    682	start = folio_index(folio) * PAGE_SIZE;
    683	ret = afs_write_back_from_locked_folio(folio_mapping(folio), wbc,
    684					       folio, start, LLONG_MAX - start);
    685	if (ret < 0) {
    686		_leave(" = %zd", ret);
    687		return ret;
    688	}
    689
    690	_leave(" = 0");
    691	return 0;
    692}
    693
    694/*
    695 * write a region of pages back to the server
    696 */
    697static int afs_writepages_region(struct address_space *mapping,
    698				 struct writeback_control *wbc,
    699				 loff_t start, loff_t end, loff_t *_next)
    700{
    701	struct folio *folio;
    702	struct page *head_page;
    703	ssize_t ret;
    704	int n, skips = 0;
    705
    706	_enter("%llx,%llx,", start, end);
    707
    708	do {
    709		pgoff_t index = start / PAGE_SIZE;
    710
    711		n = find_get_pages_range_tag(mapping, &index, end / PAGE_SIZE,
    712					     PAGECACHE_TAG_DIRTY, 1, &head_page);
    713		if (!n)
    714			break;
    715
    716		folio = page_folio(head_page);
    717		start = folio_pos(folio); /* May regress with THPs */
    718
    719		_debug("wback %lx", folio_index(folio));
    720
    721		/* At this point we hold neither the i_pages lock nor the
    722		 * page lock: the page may be truncated or invalidated
    723		 * (changing page->mapping to NULL), or even swizzled
    724		 * back from swapper_space to tmpfs file mapping
    725		 */
    726		if (wbc->sync_mode != WB_SYNC_NONE) {
    727			ret = folio_lock_killable(folio);
    728			if (ret < 0) {
    729				folio_put(folio);
    730				return ret;
    731			}
    732		} else {
    733			if (!folio_trylock(folio)) {
    734				folio_put(folio);
    735				return 0;
    736			}
    737		}
    738
    739		if (folio_mapping(folio) != mapping ||
    740		    !folio_test_dirty(folio)) {
    741			start += folio_size(folio);
    742			folio_unlock(folio);
    743			folio_put(folio);
    744			continue;
    745		}
    746
    747		if (folio_test_writeback(folio) ||
    748		    folio_test_fscache(folio)) {
    749			folio_unlock(folio);
    750			if (wbc->sync_mode != WB_SYNC_NONE) {
    751				folio_wait_writeback(folio);
    752#ifdef CONFIG_AFS_FSCACHE
    753				folio_wait_fscache(folio);
    754#endif
    755			} else {
    756				start += folio_size(folio);
    757			}
    758			folio_put(folio);
    759			if (wbc->sync_mode == WB_SYNC_NONE) {
    760				if (skips >= 5 || need_resched())
    761					break;
    762				skips++;
    763			}
    764			continue;
    765		}
    766
    767		if (!folio_clear_dirty_for_io(folio))
    768			BUG();
    769		ret = afs_write_back_from_locked_folio(mapping, wbc, folio, start, end);
    770		folio_put(folio);
    771		if (ret < 0) {
    772			_leave(" = %zd", ret);
    773			return ret;
    774		}
    775
    776		start += ret;
    777
    778		cond_resched();
    779	} while (wbc->nr_to_write > 0);
    780
    781	*_next = start;
    782	_leave(" = 0 [%llx]", *_next);
    783	return 0;
    784}
    785
    786/*
    787 * write some of the pending data back to the server
    788 */
    789int afs_writepages(struct address_space *mapping,
    790		   struct writeback_control *wbc)
    791{
    792	struct afs_vnode *vnode = AFS_FS_I(mapping->host);
    793	loff_t start, next;
    794	int ret;
    795
    796	_enter("");
    797
    798	/* We have to be careful as we can end up racing with setattr()
    799	 * truncating the pagecache since the caller doesn't take a lock here
    800	 * to prevent it.
    801	 */
    802	if (wbc->sync_mode == WB_SYNC_ALL)
    803		down_read(&vnode->validate_lock);
    804	else if (!down_read_trylock(&vnode->validate_lock))
    805		return 0;
    806
    807	if (wbc->range_cyclic) {
    808		start = mapping->writeback_index * PAGE_SIZE;
    809		ret = afs_writepages_region(mapping, wbc, start, LLONG_MAX, &next);
    810		if (ret == 0) {
    811			mapping->writeback_index = next / PAGE_SIZE;
    812			if (start > 0 && wbc->nr_to_write > 0) {
    813				ret = afs_writepages_region(mapping, wbc, 0,
    814							    start, &next);
    815				if (ret == 0)
    816					mapping->writeback_index =
    817						next / PAGE_SIZE;
    818			}
    819		}
    820	} else if (wbc->range_start == 0 && wbc->range_end == LLONG_MAX) {
    821		ret = afs_writepages_region(mapping, wbc, 0, LLONG_MAX, &next);
    822		if (wbc->nr_to_write > 0 && ret == 0)
    823			mapping->writeback_index = next / PAGE_SIZE;
    824	} else {
    825		ret = afs_writepages_region(mapping, wbc,
    826					    wbc->range_start, wbc->range_end, &next);
    827	}
    828
    829	up_read(&vnode->validate_lock);
    830	_leave(" = %d", ret);
    831	return ret;
    832}
    833
    834/*
    835 * write to an AFS file
    836 */
    837ssize_t afs_file_write(struct kiocb *iocb, struct iov_iter *from)
    838{
    839	struct afs_vnode *vnode = AFS_FS_I(file_inode(iocb->ki_filp));
    840	struct afs_file *af = iocb->ki_filp->private_data;
    841	ssize_t result;
    842	size_t count = iov_iter_count(from);
    843
    844	_enter("{%llx:%llu},{%zu},",
    845	       vnode->fid.vid, vnode->fid.vnode, count);
    846
    847	if (IS_SWAPFILE(&vnode->netfs.inode)) {
    848		printk(KERN_INFO
    849		       "AFS: Attempt to write to active swap file!\n");
    850		return -EBUSY;
    851	}
    852
    853	if (!count)
    854		return 0;
    855
    856	result = afs_validate(vnode, af->key);
    857	if (result < 0)
    858		return result;
    859
    860	result = generic_file_write_iter(iocb, from);
    861
    862	_leave(" = %zd", result);
    863	return result;
    864}
    865
    866/*
    867 * flush any dirty pages for this process, and check for write errors.
    868 * - the return status from this call provides a reliable indication of
    869 *   whether any write errors occurred for this process.
    870 */
    871int afs_fsync(struct file *file, loff_t start, loff_t end, int datasync)
    872{
    873	struct afs_vnode *vnode = AFS_FS_I(file_inode(file));
    874	struct afs_file *af = file->private_data;
    875	int ret;
    876
    877	_enter("{%llx:%llu},{n=%pD},%d",
    878	       vnode->fid.vid, vnode->fid.vnode, file,
    879	       datasync);
    880
    881	ret = afs_validate(vnode, af->key);
    882	if (ret < 0)
    883		return ret;
    884
    885	return file_write_and_wait_range(file, start, end);
    886}
    887
    888/*
    889 * notification that a previously read-only page is about to become writable
    890 * - if it returns an error, the caller will deliver a bus error signal
    891 */
    892vm_fault_t afs_page_mkwrite(struct vm_fault *vmf)
    893{
    894	struct folio *folio = page_folio(vmf->page);
    895	struct file *file = vmf->vma->vm_file;
    896	struct inode *inode = file_inode(file);
    897	struct afs_vnode *vnode = AFS_FS_I(inode);
    898	struct afs_file *af = file->private_data;
    899	unsigned long priv;
    900	vm_fault_t ret = VM_FAULT_RETRY;
    901
    902	_enter("{{%llx:%llu}},{%lx}", vnode->fid.vid, vnode->fid.vnode, folio_index(folio));
    903
    904	afs_validate(vnode, af->key);
    905
    906	sb_start_pagefault(inode->i_sb);
    907
    908	/* Wait for the page to be written to the cache before we allow it to
    909	 * be modified.  We then assume the entire page will need writing back.
    910	 */
    911#ifdef CONFIG_AFS_FSCACHE
    912	if (folio_test_fscache(folio) &&
    913	    folio_wait_fscache_killable(folio) < 0)
    914		goto out;
    915#endif
    916
    917	if (folio_wait_writeback_killable(folio))
    918		goto out;
    919
    920	if (folio_lock_killable(folio) < 0)
    921		goto out;
    922
    923	/* We mustn't change folio->private until writeback is complete as that
    924	 * details the portion of the page we need to write back and we might
    925	 * need to redirty the page if there's a problem.
    926	 */
    927	if (folio_wait_writeback_killable(folio) < 0) {
    928		folio_unlock(folio);
    929		goto out;
    930	}
    931
    932	priv = afs_folio_dirty(folio, 0, folio_size(folio));
    933	priv = afs_folio_dirty_mmapped(priv);
    934	if (folio_test_private(folio)) {
    935		folio_change_private(folio, (void *)priv);
    936		trace_afs_folio_dirty(vnode, tracepoint_string("mkwrite+"), folio);
    937	} else {
    938		folio_attach_private(folio, (void *)priv);
    939		trace_afs_folio_dirty(vnode, tracepoint_string("mkwrite"), folio);
    940	}
    941	file_update_time(file);
    942
    943	ret = VM_FAULT_LOCKED;
    944out:
    945	sb_end_pagefault(inode->i_sb);
    946	return ret;
    947}
    948
    949/*
    950 * Prune the keys cached for writeback.  The caller must hold vnode->wb_lock.
    951 */
    952void afs_prune_wb_keys(struct afs_vnode *vnode)
    953{
    954	LIST_HEAD(graveyard);
    955	struct afs_wb_key *wbk, *tmp;
    956
    957	/* Discard unused keys */
    958	spin_lock(&vnode->wb_lock);
    959
    960	if (!mapping_tagged(&vnode->netfs.inode.i_data, PAGECACHE_TAG_WRITEBACK) &&
    961	    !mapping_tagged(&vnode->netfs.inode.i_data, PAGECACHE_TAG_DIRTY)) {
    962		list_for_each_entry_safe(wbk, tmp, &vnode->wb_keys, vnode_link) {
    963			if (refcount_read(&wbk->usage) == 1)
    964				list_move(&wbk->vnode_link, &graveyard);
    965		}
    966	}
    967
    968	spin_unlock(&vnode->wb_lock);
    969
    970	while (!list_empty(&graveyard)) {
    971		wbk = list_entry(graveyard.next, struct afs_wb_key, vnode_link);
    972		list_del(&wbk->vnode_link);
    973		afs_put_wb_key(wbk);
    974	}
    975}
    976
    977/*
    978 * Clean up a page during invalidation.
    979 */
    980int afs_launder_folio(struct folio *folio)
    981{
    982	struct afs_vnode *vnode = AFS_FS_I(folio_inode(folio));
    983	struct iov_iter iter;
    984	struct bio_vec bv[1];
    985	unsigned long priv;
    986	unsigned int f, t;
    987	int ret = 0;
    988
    989	_enter("{%lx}", folio->index);
    990
    991	priv = (unsigned long)folio_get_private(folio);
    992	if (folio_clear_dirty_for_io(folio)) {
    993		f = 0;
    994		t = folio_size(folio);
    995		if (folio_test_private(folio)) {
    996			f = afs_folio_dirty_from(folio, priv);
    997			t = afs_folio_dirty_to(folio, priv);
    998		}
    999
   1000		bv[0].bv_page = &folio->page;
   1001		bv[0].bv_offset = f;
   1002		bv[0].bv_len = t - f;
   1003		iov_iter_bvec(&iter, WRITE, bv, 1, bv[0].bv_len);
   1004
   1005		trace_afs_folio_dirty(vnode, tracepoint_string("launder"), folio);
   1006		ret = afs_store_data(vnode, &iter, folio_pos(folio) + f, true);
   1007	}
   1008
   1009	trace_afs_folio_dirty(vnode, tracepoint_string("laundered"), folio);
   1010	folio_detach_private(folio);
   1011	folio_wait_fscache(folio);
   1012	return ret;
   1013}
   1014
   1015/*
   1016 * Deal with the completion of writing the data to the cache.
   1017 */
   1018static void afs_write_to_cache_done(void *priv, ssize_t transferred_or_error,
   1019				    bool was_async)
   1020{
   1021	struct afs_vnode *vnode = priv;
   1022
   1023	if (IS_ERR_VALUE(transferred_or_error) &&
   1024	    transferred_or_error != -ENOBUFS)
   1025		afs_invalidate_cache(vnode, 0);
   1026}
   1027
   1028/*
   1029 * Save the write to the cache also.
   1030 */
   1031static void afs_write_to_cache(struct afs_vnode *vnode,
   1032			       loff_t start, size_t len, loff_t i_size,
   1033			       bool caching)
   1034{
   1035	fscache_write_to_cache(afs_vnode_cache(vnode),
   1036			       vnode->netfs.inode.i_mapping, start, len, i_size,
   1037			       afs_write_to_cache_done, vnode, caching);
   1038}