buffered_read.c (12943B)
1// SPDX-License-Identifier: GPL-2.0-or-later 2/* Network filesystem high-level buffered read support. 3 * 4 * Copyright (C) 2021 Red Hat, Inc. All Rights Reserved. 5 * Written by David Howells (dhowells@redhat.com) 6 */ 7 8#include <linux/export.h> 9#include <linux/task_io_accounting_ops.h> 10#include "internal.h" 11 12/* 13 * Unlock the folios in a read operation. We need to set PG_fscache on any 14 * folios we're going to write back before we unlock them. 15 */ 16void netfs_rreq_unlock_folios(struct netfs_io_request *rreq) 17{ 18 struct netfs_io_subrequest *subreq; 19 struct folio *folio; 20 unsigned int iopos, account = 0; 21 pgoff_t start_page = rreq->start / PAGE_SIZE; 22 pgoff_t last_page = ((rreq->start + rreq->len) / PAGE_SIZE) - 1; 23 bool subreq_failed = false; 24 25 XA_STATE(xas, &rreq->mapping->i_pages, start_page); 26 27 if (test_bit(NETFS_RREQ_FAILED, &rreq->flags)) { 28 __clear_bit(NETFS_RREQ_COPY_TO_CACHE, &rreq->flags); 29 list_for_each_entry(subreq, &rreq->subrequests, rreq_link) { 30 __clear_bit(NETFS_SREQ_COPY_TO_CACHE, &subreq->flags); 31 } 32 } 33 34 /* Walk through the pagecache and the I/O request lists simultaneously. 35 * We may have a mixture of cached and uncached sections and we only 36 * really want to write out the uncached sections. This is slightly 37 * complicated by the possibility that we might have huge pages with a 38 * mixture inside. 39 */ 40 subreq = list_first_entry(&rreq->subrequests, 41 struct netfs_io_subrequest, rreq_link); 42 iopos = 0; 43 subreq_failed = (subreq->error < 0); 44 45 trace_netfs_rreq(rreq, netfs_rreq_trace_unlock); 46 47 rcu_read_lock(); 48 xas_for_each(&xas, folio, last_page) { 49 unsigned int pgpos = (folio_index(folio) - start_page) * PAGE_SIZE; 50 unsigned int pgend = pgpos + folio_size(folio); 51 bool pg_failed = false; 52 53 for (;;) { 54 if (!subreq) { 55 pg_failed = true; 56 break; 57 } 58 if (test_bit(NETFS_SREQ_COPY_TO_CACHE, &subreq->flags)) 59 folio_start_fscache(folio); 60 pg_failed |= subreq_failed; 61 if (pgend < iopos + subreq->len) 62 break; 63 64 account += subreq->transferred; 65 iopos += subreq->len; 66 if (!list_is_last(&subreq->rreq_link, &rreq->subrequests)) { 67 subreq = list_next_entry(subreq, rreq_link); 68 subreq_failed = (subreq->error < 0); 69 } else { 70 subreq = NULL; 71 subreq_failed = false; 72 } 73 if (pgend == iopos) 74 break; 75 } 76 77 if (!pg_failed) { 78 flush_dcache_folio(folio); 79 folio_mark_uptodate(folio); 80 } 81 82 if (!test_bit(NETFS_RREQ_DONT_UNLOCK_FOLIOS, &rreq->flags)) { 83 if (folio_index(folio) == rreq->no_unlock_folio && 84 test_bit(NETFS_RREQ_NO_UNLOCK_FOLIO, &rreq->flags)) 85 _debug("no unlock"); 86 else 87 folio_unlock(folio); 88 } 89 } 90 rcu_read_unlock(); 91 92 task_io_account_read(account); 93 if (rreq->netfs_ops->done) 94 rreq->netfs_ops->done(rreq); 95} 96 97static void netfs_cache_expand_readahead(struct netfs_io_request *rreq, 98 loff_t *_start, size_t *_len, loff_t i_size) 99{ 100 struct netfs_cache_resources *cres = &rreq->cache_resources; 101 102 if (cres->ops && cres->ops->expand_readahead) 103 cres->ops->expand_readahead(cres, _start, _len, i_size); 104} 105 106static void netfs_rreq_expand(struct netfs_io_request *rreq, 107 struct readahead_control *ractl) 108{ 109 /* Give the cache a chance to change the request parameters. The 110 * resultant request must contain the original region. 111 */ 112 netfs_cache_expand_readahead(rreq, &rreq->start, &rreq->len, rreq->i_size); 113 114 /* Give the netfs a chance to change the request parameters. The 115 * resultant request must contain the original region. 116 */ 117 if (rreq->netfs_ops->expand_readahead) 118 rreq->netfs_ops->expand_readahead(rreq); 119 120 /* Expand the request if the cache wants it to start earlier. Note 121 * that the expansion may get further extended if the VM wishes to 122 * insert THPs and the preferred start and/or end wind up in the middle 123 * of THPs. 124 * 125 * If this is the case, however, the THP size should be an integer 126 * multiple of the cache granule size, so we get a whole number of 127 * granules to deal with. 128 */ 129 if (rreq->start != readahead_pos(ractl) || 130 rreq->len != readahead_length(ractl)) { 131 readahead_expand(ractl, rreq->start, rreq->len); 132 rreq->start = readahead_pos(ractl); 133 rreq->len = readahead_length(ractl); 134 135 trace_netfs_read(rreq, readahead_pos(ractl), readahead_length(ractl), 136 netfs_read_trace_expanded); 137 } 138} 139 140/** 141 * netfs_readahead - Helper to manage a read request 142 * @ractl: The description of the readahead request 143 * 144 * Fulfil a readahead request by drawing data from the cache if possible, or 145 * the netfs if not. Space beyond the EOF is zero-filled. Multiple I/O 146 * requests from different sources will get munged together. If necessary, the 147 * readahead window can be expanded in either direction to a more convenient 148 * alighment for RPC efficiency or to make storage in the cache feasible. 149 * 150 * The calling netfs must initialise a netfs context contiguous to the vfs 151 * inode before calling this. 152 * 153 * This is usable whether or not caching is enabled. 154 */ 155void netfs_readahead(struct readahead_control *ractl) 156{ 157 struct netfs_io_request *rreq; 158 struct netfs_inode *ctx = netfs_inode(ractl->mapping->host); 159 int ret; 160 161 _enter("%lx,%x", readahead_index(ractl), readahead_count(ractl)); 162 163 if (readahead_count(ractl) == 0) 164 return; 165 166 rreq = netfs_alloc_request(ractl->mapping, ractl->file, 167 readahead_pos(ractl), 168 readahead_length(ractl), 169 NETFS_READAHEAD); 170 if (IS_ERR(rreq)) 171 return; 172 173 if (ctx->ops->begin_cache_operation) { 174 ret = ctx->ops->begin_cache_operation(rreq); 175 if (ret == -ENOMEM || ret == -EINTR || ret == -ERESTARTSYS) 176 goto cleanup_free; 177 } 178 179 netfs_stat(&netfs_n_rh_readahead); 180 trace_netfs_read(rreq, readahead_pos(ractl), readahead_length(ractl), 181 netfs_read_trace_readahead); 182 183 netfs_rreq_expand(rreq, ractl); 184 185 /* Drop the refs on the folios here rather than in the cache or 186 * filesystem. The locks will be dropped in netfs_rreq_unlock(). 187 */ 188 while (readahead_folio(ractl)) 189 ; 190 191 netfs_begin_read(rreq, false); 192 return; 193 194cleanup_free: 195 netfs_put_request(rreq, false, netfs_rreq_trace_put_failed); 196 return; 197} 198EXPORT_SYMBOL(netfs_readahead); 199 200/** 201 * netfs_read_folio - Helper to manage a read_folio request 202 * @file: The file to read from 203 * @folio: The folio to read 204 * 205 * Fulfil a read_folio request by drawing data from the cache if 206 * possible, or the netfs if not. Space beyond the EOF is zero-filled. 207 * Multiple I/O requests from different sources will get munged together. 208 * 209 * The calling netfs must initialise a netfs context contiguous to the vfs 210 * inode before calling this. 211 * 212 * This is usable whether or not caching is enabled. 213 */ 214int netfs_read_folio(struct file *file, struct folio *folio) 215{ 216 struct address_space *mapping = folio_file_mapping(folio); 217 struct netfs_io_request *rreq; 218 struct netfs_inode *ctx = netfs_inode(mapping->host); 219 int ret; 220 221 _enter("%lx", folio_index(folio)); 222 223 rreq = netfs_alloc_request(mapping, file, 224 folio_file_pos(folio), folio_size(folio), 225 NETFS_READPAGE); 226 if (IS_ERR(rreq)) { 227 ret = PTR_ERR(rreq); 228 goto alloc_error; 229 } 230 231 if (ctx->ops->begin_cache_operation) { 232 ret = ctx->ops->begin_cache_operation(rreq); 233 if (ret == -ENOMEM || ret == -EINTR || ret == -ERESTARTSYS) 234 goto discard; 235 } 236 237 netfs_stat(&netfs_n_rh_readpage); 238 trace_netfs_read(rreq, rreq->start, rreq->len, netfs_read_trace_readpage); 239 return netfs_begin_read(rreq, true); 240 241discard: 242 netfs_put_request(rreq, false, netfs_rreq_trace_put_discard); 243alloc_error: 244 folio_unlock(folio); 245 return ret; 246} 247EXPORT_SYMBOL(netfs_read_folio); 248 249/* 250 * Prepare a folio for writing without reading first 251 * @folio: The folio being prepared 252 * @pos: starting position for the write 253 * @len: length of write 254 * @always_fill: T if the folio should always be completely filled/cleared 255 * 256 * In some cases, write_begin doesn't need to read at all: 257 * - full folio write 258 * - write that lies in a folio that is completely beyond EOF 259 * - write that covers the folio from start to EOF or beyond it 260 * 261 * If any of these criteria are met, then zero out the unwritten parts 262 * of the folio and return true. Otherwise, return false. 263 */ 264static bool netfs_skip_folio_read(struct folio *folio, loff_t pos, size_t len, 265 bool always_fill) 266{ 267 struct inode *inode = folio_inode(folio); 268 loff_t i_size = i_size_read(inode); 269 size_t offset = offset_in_folio(folio, pos); 270 size_t plen = folio_size(folio); 271 272 if (unlikely(always_fill)) { 273 if (pos - offset + len <= i_size) 274 return false; /* Page entirely before EOF */ 275 zero_user_segment(&folio->page, 0, plen); 276 folio_mark_uptodate(folio); 277 return true; 278 } 279 280 /* Full folio write */ 281 if (offset == 0 && len >= plen) 282 return true; 283 284 /* Page entirely beyond the end of the file */ 285 if (pos - offset >= i_size) 286 goto zero_out; 287 288 /* Write that covers from the start of the folio to EOF or beyond */ 289 if (offset == 0 && (pos + len) >= i_size) 290 goto zero_out; 291 292 return false; 293zero_out: 294 zero_user_segments(&folio->page, 0, offset, offset + len, plen); 295 return true; 296} 297 298/** 299 * netfs_write_begin - Helper to prepare for writing 300 * @ctx: The netfs context 301 * @file: The file to read from 302 * @mapping: The mapping to read from 303 * @pos: File position at which the write will begin 304 * @len: The length of the write (may extend beyond the end of the folio chosen) 305 * @_folio: Where to put the resultant folio 306 * @_fsdata: Place for the netfs to store a cookie 307 * 308 * Pre-read data for a write-begin request by drawing data from the cache if 309 * possible, or the netfs if not. Space beyond the EOF is zero-filled. 310 * Multiple I/O requests from different sources will get munged together. If 311 * necessary, the readahead window can be expanded in either direction to a 312 * more convenient alighment for RPC efficiency or to make storage in the cache 313 * feasible. 314 * 315 * The calling netfs must provide a table of operations, only one of which, 316 * issue_op, is mandatory. 317 * 318 * The check_write_begin() operation can be provided to check for and flush 319 * conflicting writes once the folio is grabbed and locked. It is passed a 320 * pointer to the fsdata cookie that gets returned to the VM to be passed to 321 * write_end. It is permitted to sleep. It should return 0 if the request 322 * should go ahead; unlock the folio and return -EAGAIN to cause the folio to 323 * be regot; or return an error. 324 * 325 * The calling netfs must initialise a netfs context contiguous to the vfs 326 * inode before calling this. 327 * 328 * This is usable whether or not caching is enabled. 329 */ 330int netfs_write_begin(struct netfs_inode *ctx, 331 struct file *file, struct address_space *mapping, 332 loff_t pos, unsigned int len, struct folio **_folio, 333 void **_fsdata) 334{ 335 struct netfs_io_request *rreq; 336 struct folio *folio; 337 unsigned int fgp_flags = FGP_LOCK | FGP_WRITE | FGP_CREAT | FGP_STABLE; 338 pgoff_t index = pos >> PAGE_SHIFT; 339 int ret; 340 341 DEFINE_READAHEAD(ractl, file, NULL, mapping, index); 342 343retry: 344 folio = __filemap_get_folio(mapping, index, fgp_flags, 345 mapping_gfp_mask(mapping)); 346 if (!folio) 347 return -ENOMEM; 348 349 if (ctx->ops->check_write_begin) { 350 /* Allow the netfs (eg. ceph) to flush conflicts. */ 351 ret = ctx->ops->check_write_begin(file, pos, len, folio, _fsdata); 352 if (ret < 0) { 353 trace_netfs_failure(NULL, NULL, ret, netfs_fail_check_write_begin); 354 if (ret == -EAGAIN) 355 goto retry; 356 goto error; 357 } 358 } 359 360 if (folio_test_uptodate(folio)) 361 goto have_folio; 362 363 /* If the page is beyond the EOF, we want to clear it - unless it's 364 * within the cache granule containing the EOF, in which case we need 365 * to preload the granule. 366 */ 367 if (!netfs_is_cache_enabled(ctx) && 368 netfs_skip_folio_read(folio, pos, len, false)) { 369 netfs_stat(&netfs_n_rh_write_zskip); 370 goto have_folio_no_wait; 371 } 372 373 rreq = netfs_alloc_request(mapping, file, 374 folio_file_pos(folio), folio_size(folio), 375 NETFS_READ_FOR_WRITE); 376 if (IS_ERR(rreq)) { 377 ret = PTR_ERR(rreq); 378 goto error; 379 } 380 rreq->no_unlock_folio = folio_index(folio); 381 __set_bit(NETFS_RREQ_NO_UNLOCK_FOLIO, &rreq->flags); 382 383 if (ctx->ops->begin_cache_operation) { 384 ret = ctx->ops->begin_cache_operation(rreq); 385 if (ret == -ENOMEM || ret == -EINTR || ret == -ERESTARTSYS) 386 goto error_put; 387 } 388 389 netfs_stat(&netfs_n_rh_write_begin); 390 trace_netfs_read(rreq, pos, len, netfs_read_trace_write_begin); 391 392 /* Expand the request to meet caching requirements and download 393 * preferences. 394 */ 395 ractl._nr_pages = folio_nr_pages(folio); 396 netfs_rreq_expand(rreq, &ractl); 397 398 /* We hold the folio locks, so we can drop the references */ 399 folio_get(folio); 400 while (readahead_folio(&ractl)) 401 ; 402 403 ret = netfs_begin_read(rreq, true); 404 if (ret < 0) 405 goto error; 406 407have_folio: 408 ret = folio_wait_fscache_killable(folio); 409 if (ret < 0) 410 goto error; 411have_folio_no_wait: 412 *_folio = folio; 413 _leave(" = 0"); 414 return 0; 415 416error_put: 417 netfs_put_request(rreq, false, netfs_rreq_trace_put_failed); 418error: 419 folio_unlock(folio); 420 folio_put(folio); 421 _leave(" = %d", ret); 422 return ret; 423} 424EXPORT_SYMBOL(netfs_write_begin);