xfs_refcount.c (48716B)
1// SPDX-License-Identifier: GPL-2.0+ 2/* 3 * Copyright (C) 2016 Oracle. All Rights Reserved. 4 * Author: Darrick J. Wong <darrick.wong@oracle.com> 5 */ 6#include "xfs.h" 7#include "xfs_fs.h" 8#include "xfs_shared.h" 9#include "xfs_format.h" 10#include "xfs_log_format.h" 11#include "xfs_trans_resv.h" 12#include "xfs_mount.h" 13#include "xfs_defer.h" 14#include "xfs_btree.h" 15#include "xfs_bmap.h" 16#include "xfs_refcount_btree.h" 17#include "xfs_alloc.h" 18#include "xfs_errortag.h" 19#include "xfs_error.h" 20#include "xfs_trace.h" 21#include "xfs_trans.h" 22#include "xfs_bit.h" 23#include "xfs_refcount.h" 24#include "xfs_rmap.h" 25#include "xfs_ag.h" 26 27struct kmem_cache *xfs_refcount_intent_cache; 28 29/* Allowable refcount adjustment amounts. */ 30enum xfs_refc_adjust_op { 31 XFS_REFCOUNT_ADJUST_INCREASE = 1, 32 XFS_REFCOUNT_ADJUST_DECREASE = -1, 33 XFS_REFCOUNT_ADJUST_COW_ALLOC = 0, 34 XFS_REFCOUNT_ADJUST_COW_FREE = -1, 35}; 36 37STATIC int __xfs_refcount_cow_alloc(struct xfs_btree_cur *rcur, 38 xfs_agblock_t agbno, xfs_extlen_t aglen); 39STATIC int __xfs_refcount_cow_free(struct xfs_btree_cur *rcur, 40 xfs_agblock_t agbno, xfs_extlen_t aglen); 41 42/* 43 * Look up the first record less than or equal to [bno, len] in the btree 44 * given by cur. 45 */ 46int 47xfs_refcount_lookup_le( 48 struct xfs_btree_cur *cur, 49 xfs_agblock_t bno, 50 int *stat) 51{ 52 trace_xfs_refcount_lookup(cur->bc_mp, cur->bc_ag.pag->pag_agno, bno, 53 XFS_LOOKUP_LE); 54 cur->bc_rec.rc.rc_startblock = bno; 55 cur->bc_rec.rc.rc_blockcount = 0; 56 return xfs_btree_lookup(cur, XFS_LOOKUP_LE, stat); 57} 58 59/* 60 * Look up the first record greater than or equal to [bno, len] in the btree 61 * given by cur. 62 */ 63int 64xfs_refcount_lookup_ge( 65 struct xfs_btree_cur *cur, 66 xfs_agblock_t bno, 67 int *stat) 68{ 69 trace_xfs_refcount_lookup(cur->bc_mp, cur->bc_ag.pag->pag_agno, bno, 70 XFS_LOOKUP_GE); 71 cur->bc_rec.rc.rc_startblock = bno; 72 cur->bc_rec.rc.rc_blockcount = 0; 73 return xfs_btree_lookup(cur, XFS_LOOKUP_GE, stat); 74} 75 76/* 77 * Look up the first record equal to [bno, len] in the btree 78 * given by cur. 79 */ 80int 81xfs_refcount_lookup_eq( 82 struct xfs_btree_cur *cur, 83 xfs_agblock_t bno, 84 int *stat) 85{ 86 trace_xfs_refcount_lookup(cur->bc_mp, cur->bc_ag.pag->pag_agno, bno, 87 XFS_LOOKUP_LE); 88 cur->bc_rec.rc.rc_startblock = bno; 89 cur->bc_rec.rc.rc_blockcount = 0; 90 return xfs_btree_lookup(cur, XFS_LOOKUP_EQ, stat); 91} 92 93/* Convert on-disk record to in-core format. */ 94void 95xfs_refcount_btrec_to_irec( 96 const union xfs_btree_rec *rec, 97 struct xfs_refcount_irec *irec) 98{ 99 irec->rc_startblock = be32_to_cpu(rec->refc.rc_startblock); 100 irec->rc_blockcount = be32_to_cpu(rec->refc.rc_blockcount); 101 irec->rc_refcount = be32_to_cpu(rec->refc.rc_refcount); 102} 103 104/* 105 * Get the data from the pointed-to record. 106 */ 107int 108xfs_refcount_get_rec( 109 struct xfs_btree_cur *cur, 110 struct xfs_refcount_irec *irec, 111 int *stat) 112{ 113 struct xfs_mount *mp = cur->bc_mp; 114 xfs_agnumber_t agno = cur->bc_ag.pag->pag_agno; 115 union xfs_btree_rec *rec; 116 int error; 117 xfs_agblock_t realstart; 118 119 error = xfs_btree_get_rec(cur, &rec, stat); 120 if (error || !*stat) 121 return error; 122 123 xfs_refcount_btrec_to_irec(rec, irec); 124 125 agno = cur->bc_ag.pag->pag_agno; 126 if (irec->rc_blockcount == 0 || irec->rc_blockcount > MAXREFCEXTLEN) 127 goto out_bad_rec; 128 129 /* handle special COW-staging state */ 130 realstart = irec->rc_startblock; 131 if (realstart & XFS_REFC_COW_START) { 132 if (irec->rc_refcount != 1) 133 goto out_bad_rec; 134 realstart &= ~XFS_REFC_COW_START; 135 } else if (irec->rc_refcount < 2) { 136 goto out_bad_rec; 137 } 138 139 /* check for valid extent range, including overflow */ 140 if (!xfs_verify_agbno(mp, agno, realstart)) 141 goto out_bad_rec; 142 if (realstart > realstart + irec->rc_blockcount) 143 goto out_bad_rec; 144 if (!xfs_verify_agbno(mp, agno, realstart + irec->rc_blockcount - 1)) 145 goto out_bad_rec; 146 147 if (irec->rc_refcount == 0 || irec->rc_refcount > MAXREFCOUNT) 148 goto out_bad_rec; 149 150 trace_xfs_refcount_get(cur->bc_mp, cur->bc_ag.pag->pag_agno, irec); 151 return 0; 152 153out_bad_rec: 154 xfs_warn(mp, 155 "Refcount BTree record corruption in AG %d detected!", agno); 156 xfs_warn(mp, 157 "Start block 0x%x, block count 0x%x, references 0x%x", 158 irec->rc_startblock, irec->rc_blockcount, irec->rc_refcount); 159 return -EFSCORRUPTED; 160} 161 162/* 163 * Update the record referred to by cur to the value given 164 * by [bno, len, refcount]. 165 * This either works (return 0) or gets an EFSCORRUPTED error. 166 */ 167STATIC int 168xfs_refcount_update( 169 struct xfs_btree_cur *cur, 170 struct xfs_refcount_irec *irec) 171{ 172 union xfs_btree_rec rec; 173 int error; 174 175 trace_xfs_refcount_update(cur->bc_mp, cur->bc_ag.pag->pag_agno, irec); 176 rec.refc.rc_startblock = cpu_to_be32(irec->rc_startblock); 177 rec.refc.rc_blockcount = cpu_to_be32(irec->rc_blockcount); 178 rec.refc.rc_refcount = cpu_to_be32(irec->rc_refcount); 179 error = xfs_btree_update(cur, &rec); 180 if (error) 181 trace_xfs_refcount_update_error(cur->bc_mp, 182 cur->bc_ag.pag->pag_agno, error, _RET_IP_); 183 return error; 184} 185 186/* 187 * Insert the record referred to by cur to the value given 188 * by [bno, len, refcount]. 189 * This either works (return 0) or gets an EFSCORRUPTED error. 190 */ 191int 192xfs_refcount_insert( 193 struct xfs_btree_cur *cur, 194 struct xfs_refcount_irec *irec, 195 int *i) 196{ 197 int error; 198 199 trace_xfs_refcount_insert(cur->bc_mp, cur->bc_ag.pag->pag_agno, irec); 200 cur->bc_rec.rc.rc_startblock = irec->rc_startblock; 201 cur->bc_rec.rc.rc_blockcount = irec->rc_blockcount; 202 cur->bc_rec.rc.rc_refcount = irec->rc_refcount; 203 error = xfs_btree_insert(cur, i); 204 if (error) 205 goto out_error; 206 if (XFS_IS_CORRUPT(cur->bc_mp, *i != 1)) { 207 error = -EFSCORRUPTED; 208 goto out_error; 209 } 210 211out_error: 212 if (error) 213 trace_xfs_refcount_insert_error(cur->bc_mp, 214 cur->bc_ag.pag->pag_agno, error, _RET_IP_); 215 return error; 216} 217 218/* 219 * Remove the record referred to by cur, then set the pointer to the spot 220 * where the record could be re-inserted, in case we want to increment or 221 * decrement the cursor. 222 * This either works (return 0) or gets an EFSCORRUPTED error. 223 */ 224STATIC int 225xfs_refcount_delete( 226 struct xfs_btree_cur *cur, 227 int *i) 228{ 229 struct xfs_refcount_irec irec; 230 int found_rec; 231 int error; 232 233 error = xfs_refcount_get_rec(cur, &irec, &found_rec); 234 if (error) 235 goto out_error; 236 if (XFS_IS_CORRUPT(cur->bc_mp, found_rec != 1)) { 237 error = -EFSCORRUPTED; 238 goto out_error; 239 } 240 trace_xfs_refcount_delete(cur->bc_mp, cur->bc_ag.pag->pag_agno, &irec); 241 error = xfs_btree_delete(cur, i); 242 if (XFS_IS_CORRUPT(cur->bc_mp, *i != 1)) { 243 error = -EFSCORRUPTED; 244 goto out_error; 245 } 246 if (error) 247 goto out_error; 248 error = xfs_refcount_lookup_ge(cur, irec.rc_startblock, &found_rec); 249out_error: 250 if (error) 251 trace_xfs_refcount_delete_error(cur->bc_mp, 252 cur->bc_ag.pag->pag_agno, error, _RET_IP_); 253 return error; 254} 255 256/* 257 * Adjusting the Reference Count 258 * 259 * As stated elsewhere, the reference count btree (refcbt) stores 260 * >1 reference counts for extents of physical blocks. In this 261 * operation, we're either raising or lowering the reference count of 262 * some subrange stored in the tree: 263 * 264 * <------ adjustment range ------> 265 * ----+ +---+-----+ +--+--------+--------- 266 * 2 | | 3 | 4 | |17| 55 | 10 267 * ----+ +---+-----+ +--+--------+--------- 268 * X axis is physical blocks number; 269 * reference counts are the numbers inside the rectangles 270 * 271 * The first thing we need to do is to ensure that there are no 272 * refcount extents crossing either boundary of the range to be 273 * adjusted. For any extent that does cross a boundary, split it into 274 * two extents so that we can increment the refcount of one of the 275 * pieces later: 276 * 277 * <------ adjustment range ------> 278 * ----+ +---+-----+ +--+--------+----+---- 279 * 2 | | 3 | 2 | |17| 55 | 10 | 10 280 * ----+ +---+-----+ +--+--------+----+---- 281 * 282 * For this next step, let's assume that all the physical blocks in 283 * the adjustment range are mapped to a file and are therefore in use 284 * at least once. Therefore, we can infer that any gap in the 285 * refcount tree within the adjustment range represents a physical 286 * extent with refcount == 1: 287 * 288 * <------ adjustment range ------> 289 * ----+---+---+-----+-+--+--------+----+---- 290 * 2 |"1"| 3 | 2 |1|17| 55 | 10 | 10 291 * ----+---+---+-----+-+--+--------+----+---- 292 * ^ 293 * 294 * For each extent that falls within the interval range, figure out 295 * which extent is to the left or the right of that extent. Now we 296 * have a left, current, and right extent. If the new reference count 297 * of the center extent enables us to merge left, center, and right 298 * into one record covering all three, do so. If the center extent is 299 * at the left end of the range, abuts the left extent, and its new 300 * reference count matches the left extent's record, then merge them. 301 * If the center extent is at the right end of the range, abuts the 302 * right extent, and the reference counts match, merge those. In the 303 * example, we can left merge (assuming an increment operation): 304 * 305 * <------ adjustment range ------> 306 * --------+---+-----+-+--+--------+----+---- 307 * 2 | 3 | 2 |1|17| 55 | 10 | 10 308 * --------+---+-----+-+--+--------+----+---- 309 * ^ 310 * 311 * For all other extents within the range, adjust the reference count 312 * or delete it if the refcount falls below 2. If we were 313 * incrementing, the end result looks like this: 314 * 315 * <------ adjustment range ------> 316 * --------+---+-----+-+--+--------+----+---- 317 * 2 | 4 | 3 |2|18| 56 | 11 | 10 318 * --------+---+-----+-+--+--------+----+---- 319 * 320 * The result of a decrement operation looks as such: 321 * 322 * <------ adjustment range ------> 323 * ----+ +---+ +--+--------+----+---- 324 * 2 | | 2 | |16| 54 | 9 | 10 325 * ----+ +---+ +--+--------+----+---- 326 * DDDD 111111DD 327 * 328 * The blocks marked "D" are freed; the blocks marked "1" are only 329 * referenced once and therefore the record is removed from the 330 * refcount btree. 331 */ 332 333/* Next block after this extent. */ 334static inline xfs_agblock_t 335xfs_refc_next( 336 struct xfs_refcount_irec *rc) 337{ 338 return rc->rc_startblock + rc->rc_blockcount; 339} 340 341/* 342 * Split a refcount extent that crosses agbno. 343 */ 344STATIC int 345xfs_refcount_split_extent( 346 struct xfs_btree_cur *cur, 347 xfs_agblock_t agbno, 348 bool *shape_changed) 349{ 350 struct xfs_refcount_irec rcext, tmp; 351 int found_rec; 352 int error; 353 354 *shape_changed = false; 355 error = xfs_refcount_lookup_le(cur, agbno, &found_rec); 356 if (error) 357 goto out_error; 358 if (!found_rec) 359 return 0; 360 361 error = xfs_refcount_get_rec(cur, &rcext, &found_rec); 362 if (error) 363 goto out_error; 364 if (XFS_IS_CORRUPT(cur->bc_mp, found_rec != 1)) { 365 error = -EFSCORRUPTED; 366 goto out_error; 367 } 368 if (rcext.rc_startblock == agbno || xfs_refc_next(&rcext) <= agbno) 369 return 0; 370 371 *shape_changed = true; 372 trace_xfs_refcount_split_extent(cur->bc_mp, cur->bc_ag.pag->pag_agno, 373 &rcext, agbno); 374 375 /* Establish the right extent. */ 376 tmp = rcext; 377 tmp.rc_startblock = agbno; 378 tmp.rc_blockcount -= (agbno - rcext.rc_startblock); 379 error = xfs_refcount_update(cur, &tmp); 380 if (error) 381 goto out_error; 382 383 /* Insert the left extent. */ 384 tmp = rcext; 385 tmp.rc_blockcount = agbno - rcext.rc_startblock; 386 error = xfs_refcount_insert(cur, &tmp, &found_rec); 387 if (error) 388 goto out_error; 389 if (XFS_IS_CORRUPT(cur->bc_mp, found_rec != 1)) { 390 error = -EFSCORRUPTED; 391 goto out_error; 392 } 393 return error; 394 395out_error: 396 trace_xfs_refcount_split_extent_error(cur->bc_mp, 397 cur->bc_ag.pag->pag_agno, error, _RET_IP_); 398 return error; 399} 400 401/* 402 * Merge the left, center, and right extents. 403 */ 404STATIC int 405xfs_refcount_merge_center_extents( 406 struct xfs_btree_cur *cur, 407 struct xfs_refcount_irec *left, 408 struct xfs_refcount_irec *center, 409 struct xfs_refcount_irec *right, 410 unsigned long long extlen, 411 xfs_extlen_t *aglen) 412{ 413 int error; 414 int found_rec; 415 416 trace_xfs_refcount_merge_center_extents(cur->bc_mp, 417 cur->bc_ag.pag->pag_agno, left, center, right); 418 419 /* 420 * Make sure the center and right extents are not in the btree. 421 * If the center extent was synthesized, the first delete call 422 * removes the right extent and we skip the second deletion. 423 * If center and right were in the btree, then the first delete 424 * call removes the center and the second one removes the right 425 * extent. 426 */ 427 error = xfs_refcount_lookup_ge(cur, center->rc_startblock, 428 &found_rec); 429 if (error) 430 goto out_error; 431 if (XFS_IS_CORRUPT(cur->bc_mp, found_rec != 1)) { 432 error = -EFSCORRUPTED; 433 goto out_error; 434 } 435 436 error = xfs_refcount_delete(cur, &found_rec); 437 if (error) 438 goto out_error; 439 if (XFS_IS_CORRUPT(cur->bc_mp, found_rec != 1)) { 440 error = -EFSCORRUPTED; 441 goto out_error; 442 } 443 444 if (center->rc_refcount > 1) { 445 error = xfs_refcount_delete(cur, &found_rec); 446 if (error) 447 goto out_error; 448 if (XFS_IS_CORRUPT(cur->bc_mp, found_rec != 1)) { 449 error = -EFSCORRUPTED; 450 goto out_error; 451 } 452 } 453 454 /* Enlarge the left extent. */ 455 error = xfs_refcount_lookup_le(cur, left->rc_startblock, 456 &found_rec); 457 if (error) 458 goto out_error; 459 if (XFS_IS_CORRUPT(cur->bc_mp, found_rec != 1)) { 460 error = -EFSCORRUPTED; 461 goto out_error; 462 } 463 464 left->rc_blockcount = extlen; 465 error = xfs_refcount_update(cur, left); 466 if (error) 467 goto out_error; 468 469 *aglen = 0; 470 return error; 471 472out_error: 473 trace_xfs_refcount_merge_center_extents_error(cur->bc_mp, 474 cur->bc_ag.pag->pag_agno, error, _RET_IP_); 475 return error; 476} 477 478/* 479 * Merge with the left extent. 480 */ 481STATIC int 482xfs_refcount_merge_left_extent( 483 struct xfs_btree_cur *cur, 484 struct xfs_refcount_irec *left, 485 struct xfs_refcount_irec *cleft, 486 xfs_agblock_t *agbno, 487 xfs_extlen_t *aglen) 488{ 489 int error; 490 int found_rec; 491 492 trace_xfs_refcount_merge_left_extent(cur->bc_mp, 493 cur->bc_ag.pag->pag_agno, left, cleft); 494 495 /* If the extent at agbno (cleft) wasn't synthesized, remove it. */ 496 if (cleft->rc_refcount > 1) { 497 error = xfs_refcount_lookup_le(cur, cleft->rc_startblock, 498 &found_rec); 499 if (error) 500 goto out_error; 501 if (XFS_IS_CORRUPT(cur->bc_mp, found_rec != 1)) { 502 error = -EFSCORRUPTED; 503 goto out_error; 504 } 505 506 error = xfs_refcount_delete(cur, &found_rec); 507 if (error) 508 goto out_error; 509 if (XFS_IS_CORRUPT(cur->bc_mp, found_rec != 1)) { 510 error = -EFSCORRUPTED; 511 goto out_error; 512 } 513 } 514 515 /* Enlarge the left extent. */ 516 error = xfs_refcount_lookup_le(cur, left->rc_startblock, 517 &found_rec); 518 if (error) 519 goto out_error; 520 if (XFS_IS_CORRUPT(cur->bc_mp, found_rec != 1)) { 521 error = -EFSCORRUPTED; 522 goto out_error; 523 } 524 525 left->rc_blockcount += cleft->rc_blockcount; 526 error = xfs_refcount_update(cur, left); 527 if (error) 528 goto out_error; 529 530 *agbno += cleft->rc_blockcount; 531 *aglen -= cleft->rc_blockcount; 532 return error; 533 534out_error: 535 trace_xfs_refcount_merge_left_extent_error(cur->bc_mp, 536 cur->bc_ag.pag->pag_agno, error, _RET_IP_); 537 return error; 538} 539 540/* 541 * Merge with the right extent. 542 */ 543STATIC int 544xfs_refcount_merge_right_extent( 545 struct xfs_btree_cur *cur, 546 struct xfs_refcount_irec *right, 547 struct xfs_refcount_irec *cright, 548 xfs_extlen_t *aglen) 549{ 550 int error; 551 int found_rec; 552 553 trace_xfs_refcount_merge_right_extent(cur->bc_mp, 554 cur->bc_ag.pag->pag_agno, cright, right); 555 556 /* 557 * If the extent ending at agbno+aglen (cright) wasn't synthesized, 558 * remove it. 559 */ 560 if (cright->rc_refcount > 1) { 561 error = xfs_refcount_lookup_le(cur, cright->rc_startblock, 562 &found_rec); 563 if (error) 564 goto out_error; 565 if (XFS_IS_CORRUPT(cur->bc_mp, found_rec != 1)) { 566 error = -EFSCORRUPTED; 567 goto out_error; 568 } 569 570 error = xfs_refcount_delete(cur, &found_rec); 571 if (error) 572 goto out_error; 573 if (XFS_IS_CORRUPT(cur->bc_mp, found_rec != 1)) { 574 error = -EFSCORRUPTED; 575 goto out_error; 576 } 577 } 578 579 /* Enlarge the right extent. */ 580 error = xfs_refcount_lookup_le(cur, right->rc_startblock, 581 &found_rec); 582 if (error) 583 goto out_error; 584 if (XFS_IS_CORRUPT(cur->bc_mp, found_rec != 1)) { 585 error = -EFSCORRUPTED; 586 goto out_error; 587 } 588 589 right->rc_startblock -= cright->rc_blockcount; 590 right->rc_blockcount += cright->rc_blockcount; 591 error = xfs_refcount_update(cur, right); 592 if (error) 593 goto out_error; 594 595 *aglen -= cright->rc_blockcount; 596 return error; 597 598out_error: 599 trace_xfs_refcount_merge_right_extent_error(cur->bc_mp, 600 cur->bc_ag.pag->pag_agno, error, _RET_IP_); 601 return error; 602} 603 604#define XFS_FIND_RCEXT_SHARED 1 605#define XFS_FIND_RCEXT_COW 2 606/* 607 * Find the left extent and the one after it (cleft). This function assumes 608 * that we've already split any extent crossing agbno. 609 */ 610STATIC int 611xfs_refcount_find_left_extents( 612 struct xfs_btree_cur *cur, 613 struct xfs_refcount_irec *left, 614 struct xfs_refcount_irec *cleft, 615 xfs_agblock_t agbno, 616 xfs_extlen_t aglen, 617 int flags) 618{ 619 struct xfs_refcount_irec tmp; 620 int error; 621 int found_rec; 622 623 left->rc_startblock = cleft->rc_startblock = NULLAGBLOCK; 624 error = xfs_refcount_lookup_le(cur, agbno - 1, &found_rec); 625 if (error) 626 goto out_error; 627 if (!found_rec) 628 return 0; 629 630 error = xfs_refcount_get_rec(cur, &tmp, &found_rec); 631 if (error) 632 goto out_error; 633 if (XFS_IS_CORRUPT(cur->bc_mp, found_rec != 1)) { 634 error = -EFSCORRUPTED; 635 goto out_error; 636 } 637 638 if (xfs_refc_next(&tmp) != agbno) 639 return 0; 640 if ((flags & XFS_FIND_RCEXT_SHARED) && tmp.rc_refcount < 2) 641 return 0; 642 if ((flags & XFS_FIND_RCEXT_COW) && tmp.rc_refcount > 1) 643 return 0; 644 /* We have a left extent; retrieve (or invent) the next right one */ 645 *left = tmp; 646 647 error = xfs_btree_increment(cur, 0, &found_rec); 648 if (error) 649 goto out_error; 650 if (found_rec) { 651 error = xfs_refcount_get_rec(cur, &tmp, &found_rec); 652 if (error) 653 goto out_error; 654 if (XFS_IS_CORRUPT(cur->bc_mp, found_rec != 1)) { 655 error = -EFSCORRUPTED; 656 goto out_error; 657 } 658 659 /* if tmp starts at the end of our range, just use that */ 660 if (tmp.rc_startblock == agbno) 661 *cleft = tmp; 662 else { 663 /* 664 * There's a gap in the refcntbt at the start of the 665 * range we're interested in (refcount == 1) so 666 * synthesize the implied extent and pass it back. 667 * We assume here that the agbno/aglen range was 668 * passed in from a data fork extent mapping and 669 * therefore is allocated to exactly one owner. 670 */ 671 cleft->rc_startblock = agbno; 672 cleft->rc_blockcount = min(aglen, 673 tmp.rc_startblock - agbno); 674 cleft->rc_refcount = 1; 675 } 676 } else { 677 /* 678 * No extents, so pretend that there's one covering the whole 679 * range. 680 */ 681 cleft->rc_startblock = agbno; 682 cleft->rc_blockcount = aglen; 683 cleft->rc_refcount = 1; 684 } 685 trace_xfs_refcount_find_left_extent(cur->bc_mp, cur->bc_ag.pag->pag_agno, 686 left, cleft, agbno); 687 return error; 688 689out_error: 690 trace_xfs_refcount_find_left_extent_error(cur->bc_mp, 691 cur->bc_ag.pag->pag_agno, error, _RET_IP_); 692 return error; 693} 694 695/* 696 * Find the right extent and the one before it (cright). This function 697 * assumes that we've already split any extents crossing agbno + aglen. 698 */ 699STATIC int 700xfs_refcount_find_right_extents( 701 struct xfs_btree_cur *cur, 702 struct xfs_refcount_irec *right, 703 struct xfs_refcount_irec *cright, 704 xfs_agblock_t agbno, 705 xfs_extlen_t aglen, 706 int flags) 707{ 708 struct xfs_refcount_irec tmp; 709 int error; 710 int found_rec; 711 712 right->rc_startblock = cright->rc_startblock = NULLAGBLOCK; 713 error = xfs_refcount_lookup_ge(cur, agbno + aglen, &found_rec); 714 if (error) 715 goto out_error; 716 if (!found_rec) 717 return 0; 718 719 error = xfs_refcount_get_rec(cur, &tmp, &found_rec); 720 if (error) 721 goto out_error; 722 if (XFS_IS_CORRUPT(cur->bc_mp, found_rec != 1)) { 723 error = -EFSCORRUPTED; 724 goto out_error; 725 } 726 727 if (tmp.rc_startblock != agbno + aglen) 728 return 0; 729 if ((flags & XFS_FIND_RCEXT_SHARED) && tmp.rc_refcount < 2) 730 return 0; 731 if ((flags & XFS_FIND_RCEXT_COW) && tmp.rc_refcount > 1) 732 return 0; 733 /* We have a right extent; retrieve (or invent) the next left one */ 734 *right = tmp; 735 736 error = xfs_btree_decrement(cur, 0, &found_rec); 737 if (error) 738 goto out_error; 739 if (found_rec) { 740 error = xfs_refcount_get_rec(cur, &tmp, &found_rec); 741 if (error) 742 goto out_error; 743 if (XFS_IS_CORRUPT(cur->bc_mp, found_rec != 1)) { 744 error = -EFSCORRUPTED; 745 goto out_error; 746 } 747 748 /* if tmp ends at the end of our range, just use that */ 749 if (xfs_refc_next(&tmp) == agbno + aglen) 750 *cright = tmp; 751 else { 752 /* 753 * There's a gap in the refcntbt at the end of the 754 * range we're interested in (refcount == 1) so 755 * create the implied extent and pass it back. 756 * We assume here that the agbno/aglen range was 757 * passed in from a data fork extent mapping and 758 * therefore is allocated to exactly one owner. 759 */ 760 cright->rc_startblock = max(agbno, xfs_refc_next(&tmp)); 761 cright->rc_blockcount = right->rc_startblock - 762 cright->rc_startblock; 763 cright->rc_refcount = 1; 764 } 765 } else { 766 /* 767 * No extents, so pretend that there's one covering the whole 768 * range. 769 */ 770 cright->rc_startblock = agbno; 771 cright->rc_blockcount = aglen; 772 cright->rc_refcount = 1; 773 } 774 trace_xfs_refcount_find_right_extent(cur->bc_mp, cur->bc_ag.pag->pag_agno, 775 cright, right, agbno + aglen); 776 return error; 777 778out_error: 779 trace_xfs_refcount_find_right_extent_error(cur->bc_mp, 780 cur->bc_ag.pag->pag_agno, error, _RET_IP_); 781 return error; 782} 783 784/* Is this extent valid? */ 785static inline bool 786xfs_refc_valid( 787 struct xfs_refcount_irec *rc) 788{ 789 return rc->rc_startblock != NULLAGBLOCK; 790} 791 792/* 793 * Try to merge with any extents on the boundaries of the adjustment range. 794 */ 795STATIC int 796xfs_refcount_merge_extents( 797 struct xfs_btree_cur *cur, 798 xfs_agblock_t *agbno, 799 xfs_extlen_t *aglen, 800 enum xfs_refc_adjust_op adjust, 801 int flags, 802 bool *shape_changed) 803{ 804 struct xfs_refcount_irec left = {0}, cleft = {0}; 805 struct xfs_refcount_irec cright = {0}, right = {0}; 806 int error; 807 unsigned long long ulen; 808 bool cequal; 809 810 *shape_changed = false; 811 /* 812 * Find the extent just below agbno [left], just above agbno [cleft], 813 * just below (agbno + aglen) [cright], and just above (agbno + aglen) 814 * [right]. 815 */ 816 error = xfs_refcount_find_left_extents(cur, &left, &cleft, *agbno, 817 *aglen, flags); 818 if (error) 819 return error; 820 error = xfs_refcount_find_right_extents(cur, &right, &cright, *agbno, 821 *aglen, flags); 822 if (error) 823 return error; 824 825 /* No left or right extent to merge; exit. */ 826 if (!xfs_refc_valid(&left) && !xfs_refc_valid(&right)) 827 return 0; 828 829 cequal = (cleft.rc_startblock == cright.rc_startblock) && 830 (cleft.rc_blockcount == cright.rc_blockcount); 831 832 /* Try to merge left, cleft, and right. cleft must == cright. */ 833 ulen = (unsigned long long)left.rc_blockcount + cleft.rc_blockcount + 834 right.rc_blockcount; 835 if (xfs_refc_valid(&left) && xfs_refc_valid(&right) && 836 xfs_refc_valid(&cleft) && xfs_refc_valid(&cright) && cequal && 837 left.rc_refcount == cleft.rc_refcount + adjust && 838 right.rc_refcount == cleft.rc_refcount + adjust && 839 ulen < MAXREFCEXTLEN) { 840 *shape_changed = true; 841 return xfs_refcount_merge_center_extents(cur, &left, &cleft, 842 &right, ulen, aglen); 843 } 844 845 /* Try to merge left and cleft. */ 846 ulen = (unsigned long long)left.rc_blockcount + cleft.rc_blockcount; 847 if (xfs_refc_valid(&left) && xfs_refc_valid(&cleft) && 848 left.rc_refcount == cleft.rc_refcount + adjust && 849 ulen < MAXREFCEXTLEN) { 850 *shape_changed = true; 851 error = xfs_refcount_merge_left_extent(cur, &left, &cleft, 852 agbno, aglen); 853 if (error) 854 return error; 855 856 /* 857 * If we just merged left + cleft and cleft == cright, 858 * we no longer have a cright to merge with right. We're done. 859 */ 860 if (cequal) 861 return 0; 862 } 863 864 /* Try to merge cright and right. */ 865 ulen = (unsigned long long)right.rc_blockcount + cright.rc_blockcount; 866 if (xfs_refc_valid(&right) && xfs_refc_valid(&cright) && 867 right.rc_refcount == cright.rc_refcount + adjust && 868 ulen < MAXREFCEXTLEN) { 869 *shape_changed = true; 870 return xfs_refcount_merge_right_extent(cur, &right, &cright, 871 aglen); 872 } 873 874 return error; 875} 876 877/* 878 * XXX: This is a pretty hand-wavy estimate. The penalty for guessing 879 * true incorrectly is a shutdown FS; the penalty for guessing false 880 * incorrectly is more transaction rolls than might be necessary. 881 * Be conservative here. 882 */ 883static bool 884xfs_refcount_still_have_space( 885 struct xfs_btree_cur *cur) 886{ 887 unsigned long overhead; 888 889 /* 890 * Worst case estimate: full splits of the free space and rmap btrees 891 * to handle each of the shape changes to the refcount btree. 892 */ 893 overhead = xfs_allocfree_block_count(cur->bc_mp, 894 cur->bc_ag.refc.shape_changes); 895 overhead += cur->bc_mp->m_refc_maxlevels; 896 overhead *= cur->bc_mp->m_sb.sb_blocksize; 897 898 /* 899 * Only allow 2 refcount extent updates per transaction if the 900 * refcount continue update "error" has been injected. 901 */ 902 if (cur->bc_ag.refc.nr_ops > 2 && 903 XFS_TEST_ERROR(false, cur->bc_mp, 904 XFS_ERRTAG_REFCOUNT_CONTINUE_UPDATE)) 905 return false; 906 907 if (cur->bc_ag.refc.nr_ops == 0) 908 return true; 909 else if (overhead > cur->bc_tp->t_log_res) 910 return false; 911 return cur->bc_tp->t_log_res - overhead > 912 cur->bc_ag.refc.nr_ops * XFS_REFCOUNT_ITEM_OVERHEAD; 913} 914 915/* 916 * Adjust the refcounts of middle extents. At this point we should have 917 * split extents that crossed the adjustment range; merged with adjacent 918 * extents; and updated agbno/aglen to reflect the merges. Therefore, 919 * all we have to do is update the extents inside [agbno, agbno + aglen]. 920 */ 921STATIC int 922xfs_refcount_adjust_extents( 923 struct xfs_btree_cur *cur, 924 xfs_agblock_t *agbno, 925 xfs_extlen_t *aglen, 926 enum xfs_refc_adjust_op adj) 927{ 928 struct xfs_refcount_irec ext, tmp; 929 int error; 930 int found_rec, found_tmp; 931 xfs_fsblock_t fsbno; 932 933 /* Merging did all the work already. */ 934 if (*aglen == 0) 935 return 0; 936 937 error = xfs_refcount_lookup_ge(cur, *agbno, &found_rec); 938 if (error) 939 goto out_error; 940 941 while (*aglen > 0 && xfs_refcount_still_have_space(cur)) { 942 error = xfs_refcount_get_rec(cur, &ext, &found_rec); 943 if (error) 944 goto out_error; 945 if (!found_rec) { 946 ext.rc_startblock = cur->bc_mp->m_sb.sb_agblocks; 947 ext.rc_blockcount = 0; 948 ext.rc_refcount = 0; 949 } 950 951 /* 952 * Deal with a hole in the refcount tree; if a file maps to 953 * these blocks and there's no refcountbt record, pretend that 954 * there is one with refcount == 1. 955 */ 956 if (ext.rc_startblock != *agbno) { 957 tmp.rc_startblock = *agbno; 958 tmp.rc_blockcount = min(*aglen, 959 ext.rc_startblock - *agbno); 960 tmp.rc_refcount = 1 + adj; 961 trace_xfs_refcount_modify_extent(cur->bc_mp, 962 cur->bc_ag.pag->pag_agno, &tmp); 963 964 /* 965 * Either cover the hole (increment) or 966 * delete the range (decrement). 967 */ 968 cur->bc_ag.refc.nr_ops++; 969 if (tmp.rc_refcount) { 970 error = xfs_refcount_insert(cur, &tmp, 971 &found_tmp); 972 if (error) 973 goto out_error; 974 if (XFS_IS_CORRUPT(cur->bc_mp, 975 found_tmp != 1)) { 976 error = -EFSCORRUPTED; 977 goto out_error; 978 } 979 } else { 980 fsbno = XFS_AGB_TO_FSB(cur->bc_mp, 981 cur->bc_ag.pag->pag_agno, 982 tmp.rc_startblock); 983 xfs_free_extent_later(cur->bc_tp, fsbno, 984 tmp.rc_blockcount, NULL); 985 } 986 987 (*agbno) += tmp.rc_blockcount; 988 (*aglen) -= tmp.rc_blockcount; 989 990 error = xfs_refcount_lookup_ge(cur, *agbno, 991 &found_rec); 992 if (error) 993 goto out_error; 994 } 995 996 /* Stop if there's nothing left to modify */ 997 if (*aglen == 0 || !xfs_refcount_still_have_space(cur)) 998 break; 999 1000 /* 1001 * Adjust the reference count and either update the tree 1002 * (incr) or free the blocks (decr). 1003 */ 1004 if (ext.rc_refcount == MAXREFCOUNT) 1005 goto skip; 1006 ext.rc_refcount += adj; 1007 trace_xfs_refcount_modify_extent(cur->bc_mp, 1008 cur->bc_ag.pag->pag_agno, &ext); 1009 cur->bc_ag.refc.nr_ops++; 1010 if (ext.rc_refcount > 1) { 1011 error = xfs_refcount_update(cur, &ext); 1012 if (error) 1013 goto out_error; 1014 } else if (ext.rc_refcount == 1) { 1015 error = xfs_refcount_delete(cur, &found_rec); 1016 if (error) 1017 goto out_error; 1018 if (XFS_IS_CORRUPT(cur->bc_mp, found_rec != 1)) { 1019 error = -EFSCORRUPTED; 1020 goto out_error; 1021 } 1022 goto advloop; 1023 } else { 1024 fsbno = XFS_AGB_TO_FSB(cur->bc_mp, 1025 cur->bc_ag.pag->pag_agno, 1026 ext.rc_startblock); 1027 xfs_free_extent_later(cur->bc_tp, fsbno, 1028 ext.rc_blockcount, NULL); 1029 } 1030 1031skip: 1032 error = xfs_btree_increment(cur, 0, &found_rec); 1033 if (error) 1034 goto out_error; 1035 1036advloop: 1037 (*agbno) += ext.rc_blockcount; 1038 (*aglen) -= ext.rc_blockcount; 1039 } 1040 1041 return error; 1042out_error: 1043 trace_xfs_refcount_modify_extent_error(cur->bc_mp, 1044 cur->bc_ag.pag->pag_agno, error, _RET_IP_); 1045 return error; 1046} 1047 1048/* Adjust the reference count of a range of AG blocks. */ 1049STATIC int 1050xfs_refcount_adjust( 1051 struct xfs_btree_cur *cur, 1052 xfs_agblock_t agbno, 1053 xfs_extlen_t aglen, 1054 xfs_agblock_t *new_agbno, 1055 xfs_extlen_t *new_aglen, 1056 enum xfs_refc_adjust_op adj) 1057{ 1058 bool shape_changed; 1059 int shape_changes = 0; 1060 int error; 1061 1062 *new_agbno = agbno; 1063 *new_aglen = aglen; 1064 if (adj == XFS_REFCOUNT_ADJUST_INCREASE) 1065 trace_xfs_refcount_increase(cur->bc_mp, cur->bc_ag.pag->pag_agno, 1066 agbno, aglen); 1067 else 1068 trace_xfs_refcount_decrease(cur->bc_mp, cur->bc_ag.pag->pag_agno, 1069 agbno, aglen); 1070 1071 /* 1072 * Ensure that no rcextents cross the boundary of the adjustment range. 1073 */ 1074 error = xfs_refcount_split_extent(cur, agbno, &shape_changed); 1075 if (error) 1076 goto out_error; 1077 if (shape_changed) 1078 shape_changes++; 1079 1080 error = xfs_refcount_split_extent(cur, agbno + aglen, &shape_changed); 1081 if (error) 1082 goto out_error; 1083 if (shape_changed) 1084 shape_changes++; 1085 1086 /* 1087 * Try to merge with the left or right extents of the range. 1088 */ 1089 error = xfs_refcount_merge_extents(cur, new_agbno, new_aglen, adj, 1090 XFS_FIND_RCEXT_SHARED, &shape_changed); 1091 if (error) 1092 goto out_error; 1093 if (shape_changed) 1094 shape_changes++; 1095 if (shape_changes) 1096 cur->bc_ag.refc.shape_changes++; 1097 1098 /* Now that we've taken care of the ends, adjust the middle extents */ 1099 error = xfs_refcount_adjust_extents(cur, new_agbno, new_aglen, adj); 1100 if (error) 1101 goto out_error; 1102 1103 return 0; 1104 1105out_error: 1106 trace_xfs_refcount_adjust_error(cur->bc_mp, cur->bc_ag.pag->pag_agno, 1107 error, _RET_IP_); 1108 return error; 1109} 1110 1111/* Clean up after calling xfs_refcount_finish_one. */ 1112void 1113xfs_refcount_finish_one_cleanup( 1114 struct xfs_trans *tp, 1115 struct xfs_btree_cur *rcur, 1116 int error) 1117{ 1118 struct xfs_buf *agbp; 1119 1120 if (rcur == NULL) 1121 return; 1122 agbp = rcur->bc_ag.agbp; 1123 xfs_btree_del_cursor(rcur, error); 1124 if (error) 1125 xfs_trans_brelse(tp, agbp); 1126} 1127 1128/* 1129 * Process one of the deferred refcount operations. We pass back the 1130 * btree cursor to maintain our lock on the btree between calls. 1131 * This saves time and eliminates a buffer deadlock between the 1132 * superblock and the AGF because we'll always grab them in the same 1133 * order. 1134 */ 1135int 1136xfs_refcount_finish_one( 1137 struct xfs_trans *tp, 1138 enum xfs_refcount_intent_type type, 1139 xfs_fsblock_t startblock, 1140 xfs_extlen_t blockcount, 1141 xfs_fsblock_t *new_fsb, 1142 xfs_extlen_t *new_len, 1143 struct xfs_btree_cur **pcur) 1144{ 1145 struct xfs_mount *mp = tp->t_mountp; 1146 struct xfs_btree_cur *rcur; 1147 struct xfs_buf *agbp = NULL; 1148 int error = 0; 1149 xfs_agblock_t bno; 1150 xfs_agblock_t new_agbno; 1151 unsigned long nr_ops = 0; 1152 int shape_changes = 0; 1153 struct xfs_perag *pag; 1154 1155 pag = xfs_perag_get(mp, XFS_FSB_TO_AGNO(mp, startblock)); 1156 bno = XFS_FSB_TO_AGBNO(mp, startblock); 1157 1158 trace_xfs_refcount_deferred(mp, XFS_FSB_TO_AGNO(mp, startblock), 1159 type, XFS_FSB_TO_AGBNO(mp, startblock), 1160 blockcount); 1161 1162 if (XFS_TEST_ERROR(false, mp, XFS_ERRTAG_REFCOUNT_FINISH_ONE)) { 1163 error = -EIO; 1164 goto out_drop; 1165 } 1166 1167 /* 1168 * If we haven't gotten a cursor or the cursor AG doesn't match 1169 * the startblock, get one now. 1170 */ 1171 rcur = *pcur; 1172 if (rcur != NULL && rcur->bc_ag.pag != pag) { 1173 nr_ops = rcur->bc_ag.refc.nr_ops; 1174 shape_changes = rcur->bc_ag.refc.shape_changes; 1175 xfs_refcount_finish_one_cleanup(tp, rcur, 0); 1176 rcur = NULL; 1177 *pcur = NULL; 1178 } 1179 if (rcur == NULL) { 1180 error = xfs_alloc_read_agf(tp->t_mountp, tp, pag->pag_agno, 1181 XFS_ALLOC_FLAG_FREEING, &agbp); 1182 if (error) 1183 goto out_drop; 1184 1185 rcur = xfs_refcountbt_init_cursor(mp, tp, agbp, pag); 1186 rcur->bc_ag.refc.nr_ops = nr_ops; 1187 rcur->bc_ag.refc.shape_changes = shape_changes; 1188 } 1189 *pcur = rcur; 1190 1191 switch (type) { 1192 case XFS_REFCOUNT_INCREASE: 1193 error = xfs_refcount_adjust(rcur, bno, blockcount, &new_agbno, 1194 new_len, XFS_REFCOUNT_ADJUST_INCREASE); 1195 *new_fsb = XFS_AGB_TO_FSB(mp, pag->pag_agno, new_agbno); 1196 break; 1197 case XFS_REFCOUNT_DECREASE: 1198 error = xfs_refcount_adjust(rcur, bno, blockcount, &new_agbno, 1199 new_len, XFS_REFCOUNT_ADJUST_DECREASE); 1200 *new_fsb = XFS_AGB_TO_FSB(mp, pag->pag_agno, new_agbno); 1201 break; 1202 case XFS_REFCOUNT_ALLOC_COW: 1203 *new_fsb = startblock + blockcount; 1204 *new_len = 0; 1205 error = __xfs_refcount_cow_alloc(rcur, bno, blockcount); 1206 break; 1207 case XFS_REFCOUNT_FREE_COW: 1208 *new_fsb = startblock + blockcount; 1209 *new_len = 0; 1210 error = __xfs_refcount_cow_free(rcur, bno, blockcount); 1211 break; 1212 default: 1213 ASSERT(0); 1214 error = -EFSCORRUPTED; 1215 } 1216 if (!error && *new_len > 0) 1217 trace_xfs_refcount_finish_one_leftover(mp, pag->pag_agno, type, 1218 bno, blockcount, new_agbno, *new_len); 1219out_drop: 1220 xfs_perag_put(pag); 1221 return error; 1222} 1223 1224/* 1225 * Record a refcount intent for later processing. 1226 */ 1227static void 1228__xfs_refcount_add( 1229 struct xfs_trans *tp, 1230 enum xfs_refcount_intent_type type, 1231 xfs_fsblock_t startblock, 1232 xfs_extlen_t blockcount) 1233{ 1234 struct xfs_refcount_intent *ri; 1235 1236 trace_xfs_refcount_defer(tp->t_mountp, 1237 XFS_FSB_TO_AGNO(tp->t_mountp, startblock), 1238 type, XFS_FSB_TO_AGBNO(tp->t_mountp, startblock), 1239 blockcount); 1240 1241 ri = kmem_cache_alloc(xfs_refcount_intent_cache, 1242 GFP_NOFS | __GFP_NOFAIL); 1243 INIT_LIST_HEAD(&ri->ri_list); 1244 ri->ri_type = type; 1245 ri->ri_startblock = startblock; 1246 ri->ri_blockcount = blockcount; 1247 1248 xfs_defer_add(tp, XFS_DEFER_OPS_TYPE_REFCOUNT, &ri->ri_list); 1249} 1250 1251/* 1252 * Increase the reference count of the blocks backing a file's extent. 1253 */ 1254void 1255xfs_refcount_increase_extent( 1256 struct xfs_trans *tp, 1257 struct xfs_bmbt_irec *PREV) 1258{ 1259 if (!xfs_has_reflink(tp->t_mountp)) 1260 return; 1261 1262 __xfs_refcount_add(tp, XFS_REFCOUNT_INCREASE, PREV->br_startblock, 1263 PREV->br_blockcount); 1264} 1265 1266/* 1267 * Decrease the reference count of the blocks backing a file's extent. 1268 */ 1269void 1270xfs_refcount_decrease_extent( 1271 struct xfs_trans *tp, 1272 struct xfs_bmbt_irec *PREV) 1273{ 1274 if (!xfs_has_reflink(tp->t_mountp)) 1275 return; 1276 1277 __xfs_refcount_add(tp, XFS_REFCOUNT_DECREASE, PREV->br_startblock, 1278 PREV->br_blockcount); 1279} 1280 1281/* 1282 * Given an AG extent, find the lowest-numbered run of shared blocks 1283 * within that range and return the range in fbno/flen. If 1284 * find_end_of_shared is set, return the longest contiguous extent of 1285 * shared blocks; if not, just return the first extent we find. If no 1286 * shared blocks are found, fbno and flen will be set to NULLAGBLOCK 1287 * and 0, respectively. 1288 */ 1289int 1290xfs_refcount_find_shared( 1291 struct xfs_btree_cur *cur, 1292 xfs_agblock_t agbno, 1293 xfs_extlen_t aglen, 1294 xfs_agblock_t *fbno, 1295 xfs_extlen_t *flen, 1296 bool find_end_of_shared) 1297{ 1298 struct xfs_refcount_irec tmp; 1299 int i; 1300 int have; 1301 int error; 1302 1303 trace_xfs_refcount_find_shared(cur->bc_mp, cur->bc_ag.pag->pag_agno, 1304 agbno, aglen); 1305 1306 /* By default, skip the whole range */ 1307 *fbno = NULLAGBLOCK; 1308 *flen = 0; 1309 1310 /* Try to find a refcount extent that crosses the start */ 1311 error = xfs_refcount_lookup_le(cur, agbno, &have); 1312 if (error) 1313 goto out_error; 1314 if (!have) { 1315 /* No left extent, look at the next one */ 1316 error = xfs_btree_increment(cur, 0, &have); 1317 if (error) 1318 goto out_error; 1319 if (!have) 1320 goto done; 1321 } 1322 error = xfs_refcount_get_rec(cur, &tmp, &i); 1323 if (error) 1324 goto out_error; 1325 if (XFS_IS_CORRUPT(cur->bc_mp, i != 1)) { 1326 error = -EFSCORRUPTED; 1327 goto out_error; 1328 } 1329 1330 /* If the extent ends before the start, look at the next one */ 1331 if (tmp.rc_startblock + tmp.rc_blockcount <= agbno) { 1332 error = xfs_btree_increment(cur, 0, &have); 1333 if (error) 1334 goto out_error; 1335 if (!have) 1336 goto done; 1337 error = xfs_refcount_get_rec(cur, &tmp, &i); 1338 if (error) 1339 goto out_error; 1340 if (XFS_IS_CORRUPT(cur->bc_mp, i != 1)) { 1341 error = -EFSCORRUPTED; 1342 goto out_error; 1343 } 1344 } 1345 1346 /* If the extent starts after the range we want, bail out */ 1347 if (tmp.rc_startblock >= agbno + aglen) 1348 goto done; 1349 1350 /* We found the start of a shared extent! */ 1351 if (tmp.rc_startblock < agbno) { 1352 tmp.rc_blockcount -= (agbno - tmp.rc_startblock); 1353 tmp.rc_startblock = agbno; 1354 } 1355 1356 *fbno = tmp.rc_startblock; 1357 *flen = min(tmp.rc_blockcount, agbno + aglen - *fbno); 1358 if (!find_end_of_shared) 1359 goto done; 1360 1361 /* Otherwise, find the end of this shared extent */ 1362 while (*fbno + *flen < agbno + aglen) { 1363 error = xfs_btree_increment(cur, 0, &have); 1364 if (error) 1365 goto out_error; 1366 if (!have) 1367 break; 1368 error = xfs_refcount_get_rec(cur, &tmp, &i); 1369 if (error) 1370 goto out_error; 1371 if (XFS_IS_CORRUPT(cur->bc_mp, i != 1)) { 1372 error = -EFSCORRUPTED; 1373 goto out_error; 1374 } 1375 if (tmp.rc_startblock >= agbno + aglen || 1376 tmp.rc_startblock != *fbno + *flen) 1377 break; 1378 *flen = min(*flen + tmp.rc_blockcount, agbno + aglen - *fbno); 1379 } 1380 1381done: 1382 trace_xfs_refcount_find_shared_result(cur->bc_mp, 1383 cur->bc_ag.pag->pag_agno, *fbno, *flen); 1384 1385out_error: 1386 if (error) 1387 trace_xfs_refcount_find_shared_error(cur->bc_mp, 1388 cur->bc_ag.pag->pag_agno, error, _RET_IP_); 1389 return error; 1390} 1391 1392/* 1393 * Recovering CoW Blocks After a Crash 1394 * 1395 * Due to the way that the copy on write mechanism works, there's a window of 1396 * opportunity in which we can lose track of allocated blocks during a crash. 1397 * Because CoW uses delayed allocation in the in-core CoW fork, writeback 1398 * causes blocks to be allocated and stored in the CoW fork. The blocks are 1399 * no longer in the free space btree but are not otherwise recorded anywhere 1400 * until the write completes and the blocks are mapped into the file. A crash 1401 * in between allocation and remapping results in the replacement blocks being 1402 * lost. This situation is exacerbated by the CoW extent size hint because 1403 * allocations can hang around for long time. 1404 * 1405 * However, there is a place where we can record these allocations before they 1406 * become mappings -- the reference count btree. The btree does not record 1407 * extents with refcount == 1, so we can record allocations with a refcount of 1408 * 1. Blocks being used for CoW writeout cannot be shared, so there should be 1409 * no conflict with shared block records. These mappings should be created 1410 * when we allocate blocks to the CoW fork and deleted when they're removed 1411 * from the CoW fork. 1412 * 1413 * Minor nit: records for in-progress CoW allocations and records for shared 1414 * extents must never be merged, to preserve the property that (except for CoW 1415 * allocations) there are no refcount btree entries with refcount == 1. The 1416 * only time this could potentially happen is when unsharing a block that's 1417 * adjacent to CoW allocations, so we must be careful to avoid this. 1418 * 1419 * At mount time we recover lost CoW allocations by searching the refcount 1420 * btree for these refcount == 1 mappings. These represent CoW allocations 1421 * that were in progress at the time the filesystem went down, so we can free 1422 * them to get the space back. 1423 * 1424 * This mechanism is superior to creating EFIs for unmapped CoW extents for 1425 * several reasons -- first, EFIs pin the tail of the log and would have to be 1426 * periodically relogged to avoid filling up the log. Second, CoW completions 1427 * will have to file an EFD and create new EFIs for whatever remains in the 1428 * CoW fork; this partially takes care of (1) but extent-size reservations 1429 * will have to periodically relog even if there's no writeout in progress. 1430 * This can happen if the CoW extent size hint is set, which you really want. 1431 * Third, EFIs cannot currently be automatically relogged into newer 1432 * transactions to advance the log tail. Fourth, stuffing the log full of 1433 * EFIs places an upper bound on the number of CoW allocations that can be 1434 * held filesystem-wide at any given time. Recording them in the refcount 1435 * btree doesn't require us to maintain any state in memory and doesn't pin 1436 * the log. 1437 */ 1438/* 1439 * Adjust the refcounts of CoW allocations. These allocations are "magic" 1440 * in that they're not referenced anywhere else in the filesystem, so we 1441 * stash them in the refcount btree with a refcount of 1 until either file 1442 * remapping (or CoW cancellation) happens. 1443 */ 1444STATIC int 1445xfs_refcount_adjust_cow_extents( 1446 struct xfs_btree_cur *cur, 1447 xfs_agblock_t agbno, 1448 xfs_extlen_t aglen, 1449 enum xfs_refc_adjust_op adj) 1450{ 1451 struct xfs_refcount_irec ext, tmp; 1452 int error; 1453 int found_rec, found_tmp; 1454 1455 if (aglen == 0) 1456 return 0; 1457 1458 /* Find any overlapping refcount records */ 1459 error = xfs_refcount_lookup_ge(cur, agbno, &found_rec); 1460 if (error) 1461 goto out_error; 1462 error = xfs_refcount_get_rec(cur, &ext, &found_rec); 1463 if (error) 1464 goto out_error; 1465 if (!found_rec) { 1466 ext.rc_startblock = cur->bc_mp->m_sb.sb_agblocks + 1467 XFS_REFC_COW_START; 1468 ext.rc_blockcount = 0; 1469 ext.rc_refcount = 0; 1470 } 1471 1472 switch (adj) { 1473 case XFS_REFCOUNT_ADJUST_COW_ALLOC: 1474 /* Adding a CoW reservation, there should be nothing here. */ 1475 if (XFS_IS_CORRUPT(cur->bc_mp, 1476 agbno + aglen > ext.rc_startblock)) { 1477 error = -EFSCORRUPTED; 1478 goto out_error; 1479 } 1480 1481 tmp.rc_startblock = agbno; 1482 tmp.rc_blockcount = aglen; 1483 tmp.rc_refcount = 1; 1484 trace_xfs_refcount_modify_extent(cur->bc_mp, 1485 cur->bc_ag.pag->pag_agno, &tmp); 1486 1487 error = xfs_refcount_insert(cur, &tmp, 1488 &found_tmp); 1489 if (error) 1490 goto out_error; 1491 if (XFS_IS_CORRUPT(cur->bc_mp, found_tmp != 1)) { 1492 error = -EFSCORRUPTED; 1493 goto out_error; 1494 } 1495 break; 1496 case XFS_REFCOUNT_ADJUST_COW_FREE: 1497 /* Removing a CoW reservation, there should be one extent. */ 1498 if (XFS_IS_CORRUPT(cur->bc_mp, ext.rc_startblock != agbno)) { 1499 error = -EFSCORRUPTED; 1500 goto out_error; 1501 } 1502 if (XFS_IS_CORRUPT(cur->bc_mp, ext.rc_blockcount != aglen)) { 1503 error = -EFSCORRUPTED; 1504 goto out_error; 1505 } 1506 if (XFS_IS_CORRUPT(cur->bc_mp, ext.rc_refcount != 1)) { 1507 error = -EFSCORRUPTED; 1508 goto out_error; 1509 } 1510 1511 ext.rc_refcount = 0; 1512 trace_xfs_refcount_modify_extent(cur->bc_mp, 1513 cur->bc_ag.pag->pag_agno, &ext); 1514 error = xfs_refcount_delete(cur, &found_rec); 1515 if (error) 1516 goto out_error; 1517 if (XFS_IS_CORRUPT(cur->bc_mp, found_rec != 1)) { 1518 error = -EFSCORRUPTED; 1519 goto out_error; 1520 } 1521 break; 1522 default: 1523 ASSERT(0); 1524 } 1525 1526 return error; 1527out_error: 1528 trace_xfs_refcount_modify_extent_error(cur->bc_mp, 1529 cur->bc_ag.pag->pag_agno, error, _RET_IP_); 1530 return error; 1531} 1532 1533/* 1534 * Add or remove refcount btree entries for CoW reservations. 1535 */ 1536STATIC int 1537xfs_refcount_adjust_cow( 1538 struct xfs_btree_cur *cur, 1539 xfs_agblock_t agbno, 1540 xfs_extlen_t aglen, 1541 enum xfs_refc_adjust_op adj) 1542{ 1543 bool shape_changed; 1544 int error; 1545 1546 agbno += XFS_REFC_COW_START; 1547 1548 /* 1549 * Ensure that no rcextents cross the boundary of the adjustment range. 1550 */ 1551 error = xfs_refcount_split_extent(cur, agbno, &shape_changed); 1552 if (error) 1553 goto out_error; 1554 1555 error = xfs_refcount_split_extent(cur, agbno + aglen, &shape_changed); 1556 if (error) 1557 goto out_error; 1558 1559 /* 1560 * Try to merge with the left or right extents of the range. 1561 */ 1562 error = xfs_refcount_merge_extents(cur, &agbno, &aglen, adj, 1563 XFS_FIND_RCEXT_COW, &shape_changed); 1564 if (error) 1565 goto out_error; 1566 1567 /* Now that we've taken care of the ends, adjust the middle extents */ 1568 error = xfs_refcount_adjust_cow_extents(cur, agbno, aglen, adj); 1569 if (error) 1570 goto out_error; 1571 1572 return 0; 1573 1574out_error: 1575 trace_xfs_refcount_adjust_cow_error(cur->bc_mp, cur->bc_ag.pag->pag_agno, 1576 error, _RET_IP_); 1577 return error; 1578} 1579 1580/* 1581 * Record a CoW allocation in the refcount btree. 1582 */ 1583STATIC int 1584__xfs_refcount_cow_alloc( 1585 struct xfs_btree_cur *rcur, 1586 xfs_agblock_t agbno, 1587 xfs_extlen_t aglen) 1588{ 1589 trace_xfs_refcount_cow_increase(rcur->bc_mp, rcur->bc_ag.pag->pag_agno, 1590 agbno, aglen); 1591 1592 /* Add refcount btree reservation */ 1593 return xfs_refcount_adjust_cow(rcur, agbno, aglen, 1594 XFS_REFCOUNT_ADJUST_COW_ALLOC); 1595} 1596 1597/* 1598 * Remove a CoW allocation from the refcount btree. 1599 */ 1600STATIC int 1601__xfs_refcount_cow_free( 1602 struct xfs_btree_cur *rcur, 1603 xfs_agblock_t agbno, 1604 xfs_extlen_t aglen) 1605{ 1606 trace_xfs_refcount_cow_decrease(rcur->bc_mp, rcur->bc_ag.pag->pag_agno, 1607 agbno, aglen); 1608 1609 /* Remove refcount btree reservation */ 1610 return xfs_refcount_adjust_cow(rcur, agbno, aglen, 1611 XFS_REFCOUNT_ADJUST_COW_FREE); 1612} 1613 1614/* Record a CoW staging extent in the refcount btree. */ 1615void 1616xfs_refcount_alloc_cow_extent( 1617 struct xfs_trans *tp, 1618 xfs_fsblock_t fsb, 1619 xfs_extlen_t len) 1620{ 1621 struct xfs_mount *mp = tp->t_mountp; 1622 1623 if (!xfs_has_reflink(mp)) 1624 return; 1625 1626 __xfs_refcount_add(tp, XFS_REFCOUNT_ALLOC_COW, fsb, len); 1627 1628 /* Add rmap entry */ 1629 xfs_rmap_alloc_extent(tp, XFS_FSB_TO_AGNO(mp, fsb), 1630 XFS_FSB_TO_AGBNO(mp, fsb), len, XFS_RMAP_OWN_COW); 1631} 1632 1633/* Forget a CoW staging event in the refcount btree. */ 1634void 1635xfs_refcount_free_cow_extent( 1636 struct xfs_trans *tp, 1637 xfs_fsblock_t fsb, 1638 xfs_extlen_t len) 1639{ 1640 struct xfs_mount *mp = tp->t_mountp; 1641 1642 if (!xfs_has_reflink(mp)) 1643 return; 1644 1645 /* Remove rmap entry */ 1646 xfs_rmap_free_extent(tp, XFS_FSB_TO_AGNO(mp, fsb), 1647 XFS_FSB_TO_AGBNO(mp, fsb), len, XFS_RMAP_OWN_COW); 1648 __xfs_refcount_add(tp, XFS_REFCOUNT_FREE_COW, fsb, len); 1649} 1650 1651struct xfs_refcount_recovery { 1652 struct list_head rr_list; 1653 struct xfs_refcount_irec rr_rrec; 1654}; 1655 1656/* Stuff an extent on the recovery list. */ 1657STATIC int 1658xfs_refcount_recover_extent( 1659 struct xfs_btree_cur *cur, 1660 const union xfs_btree_rec *rec, 1661 void *priv) 1662{ 1663 struct list_head *debris = priv; 1664 struct xfs_refcount_recovery *rr; 1665 1666 if (XFS_IS_CORRUPT(cur->bc_mp, 1667 be32_to_cpu(rec->refc.rc_refcount) != 1)) 1668 return -EFSCORRUPTED; 1669 1670 rr = kmem_alloc(sizeof(struct xfs_refcount_recovery), 0); 1671 xfs_refcount_btrec_to_irec(rec, &rr->rr_rrec); 1672 list_add_tail(&rr->rr_list, debris); 1673 1674 return 0; 1675} 1676 1677/* Find and remove leftover CoW reservations. */ 1678int 1679xfs_refcount_recover_cow_leftovers( 1680 struct xfs_mount *mp, 1681 struct xfs_perag *pag) 1682{ 1683 struct xfs_trans *tp; 1684 struct xfs_btree_cur *cur; 1685 struct xfs_buf *agbp; 1686 struct xfs_refcount_recovery *rr, *n; 1687 struct list_head debris; 1688 union xfs_btree_irec low; 1689 union xfs_btree_irec high; 1690 xfs_fsblock_t fsb; 1691 xfs_agblock_t agbno; 1692 int error; 1693 1694 if (mp->m_sb.sb_agblocks >= XFS_REFC_COW_START) 1695 return -EOPNOTSUPP; 1696 1697 INIT_LIST_HEAD(&debris); 1698 1699 /* 1700 * In this first part, we use an empty transaction to gather up 1701 * all the leftover CoW extents so that we can subsequently 1702 * delete them. The empty transaction is used to avoid 1703 * a buffer lock deadlock if there happens to be a loop in the 1704 * refcountbt because we're allowed to re-grab a buffer that is 1705 * already attached to our transaction. When we're done 1706 * recording the CoW debris we cancel the (empty) transaction 1707 * and everything goes away cleanly. 1708 */ 1709 error = xfs_trans_alloc_empty(mp, &tp); 1710 if (error) 1711 return error; 1712 1713 error = xfs_alloc_read_agf(mp, tp, pag->pag_agno, 0, &agbp); 1714 if (error) 1715 goto out_trans; 1716 cur = xfs_refcountbt_init_cursor(mp, tp, agbp, pag); 1717 1718 /* Find all the leftover CoW staging extents. */ 1719 memset(&low, 0, sizeof(low)); 1720 memset(&high, 0, sizeof(high)); 1721 low.rc.rc_startblock = XFS_REFC_COW_START; 1722 high.rc.rc_startblock = -1U; 1723 error = xfs_btree_query_range(cur, &low, &high, 1724 xfs_refcount_recover_extent, &debris); 1725 xfs_btree_del_cursor(cur, error); 1726 xfs_trans_brelse(tp, agbp); 1727 xfs_trans_cancel(tp); 1728 if (error) 1729 goto out_free; 1730 1731 /* Now iterate the list to free the leftovers */ 1732 list_for_each_entry_safe(rr, n, &debris, rr_list) { 1733 /* Set up transaction. */ 1734 error = xfs_trans_alloc(mp, &M_RES(mp)->tr_write, 0, 0, 0, &tp); 1735 if (error) 1736 goto out_free; 1737 1738 trace_xfs_refcount_recover_extent(mp, pag->pag_agno, 1739 &rr->rr_rrec); 1740 1741 /* Free the orphan record */ 1742 agbno = rr->rr_rrec.rc_startblock - XFS_REFC_COW_START; 1743 fsb = XFS_AGB_TO_FSB(mp, pag->pag_agno, agbno); 1744 xfs_refcount_free_cow_extent(tp, fsb, 1745 rr->rr_rrec.rc_blockcount); 1746 1747 /* Free the block. */ 1748 xfs_free_extent_later(tp, fsb, rr->rr_rrec.rc_blockcount, NULL); 1749 1750 error = xfs_trans_commit(tp); 1751 if (error) 1752 goto out_free; 1753 1754 list_del(&rr->rr_list); 1755 kmem_free(rr); 1756 } 1757 1758 return error; 1759out_trans: 1760 xfs_trans_cancel(tp); 1761out_free: 1762 /* Free the leftover list */ 1763 list_for_each_entry_safe(rr, n, &debris, rr_list) { 1764 list_del(&rr->rr_list); 1765 kmem_free(rr); 1766 } 1767 return error; 1768} 1769 1770/* Is there a record covering a given extent? */ 1771int 1772xfs_refcount_has_record( 1773 struct xfs_btree_cur *cur, 1774 xfs_agblock_t bno, 1775 xfs_extlen_t len, 1776 bool *exists) 1777{ 1778 union xfs_btree_irec low; 1779 union xfs_btree_irec high; 1780 1781 memset(&low, 0, sizeof(low)); 1782 low.rc.rc_startblock = bno; 1783 memset(&high, 0xFF, sizeof(high)); 1784 high.rc.rc_startblock = bno + len - 1; 1785 1786 return xfs_btree_has_record(cur, &low, &high, exists); 1787} 1788 1789int __init 1790xfs_refcount_intent_init_cache(void) 1791{ 1792 xfs_refcount_intent_cache = kmem_cache_create("xfs_refc_intent", 1793 sizeof(struct xfs_refcount_intent), 1794 0, 0, NULL); 1795 1796 return xfs_refcount_intent_cache != NULL ? 0 : -ENOMEM; 1797} 1798 1799void 1800xfs_refcount_intent_destroy_cache(void) 1801{ 1802 kmem_cache_destroy(xfs_refcount_intent_cache); 1803 xfs_refcount_intent_cache = NULL; 1804}