extent_map.c (23984B)
1// SPDX-License-Identifier: GPL-2.0-only 2/* 3 * extent_map.c 4 * 5 * Block/Cluster mapping functions 6 * 7 * Copyright (C) 2004 Oracle. All rights reserved. 8 */ 9 10#include <linux/fs.h> 11#include <linux/init.h> 12#include <linux/slab.h> 13#include <linux/types.h> 14#include <linux/fiemap.h> 15 16#include <cluster/masklog.h> 17 18#include "ocfs2.h" 19 20#include "alloc.h" 21#include "dlmglue.h" 22#include "extent_map.h" 23#include "inode.h" 24#include "super.h" 25#include "symlink.h" 26#include "aops.h" 27#include "ocfs2_trace.h" 28 29#include "buffer_head_io.h" 30 31/* 32 * The extent caching implementation is intentionally trivial. 33 * 34 * We only cache a small number of extents stored directly on the 35 * inode, so linear order operations are acceptable. If we ever want 36 * to increase the size of the extent map, then these algorithms must 37 * get smarter. 38 */ 39 40void ocfs2_extent_map_init(struct inode *inode) 41{ 42 struct ocfs2_inode_info *oi = OCFS2_I(inode); 43 44 oi->ip_extent_map.em_num_items = 0; 45 INIT_LIST_HEAD(&oi->ip_extent_map.em_list); 46} 47 48static void __ocfs2_extent_map_lookup(struct ocfs2_extent_map *em, 49 unsigned int cpos, 50 struct ocfs2_extent_map_item **ret_emi) 51{ 52 unsigned int range; 53 struct ocfs2_extent_map_item *emi; 54 55 *ret_emi = NULL; 56 57 list_for_each_entry(emi, &em->em_list, ei_list) { 58 range = emi->ei_cpos + emi->ei_clusters; 59 60 if (cpos >= emi->ei_cpos && cpos < range) { 61 list_move(&emi->ei_list, &em->em_list); 62 63 *ret_emi = emi; 64 break; 65 } 66 } 67} 68 69static int ocfs2_extent_map_lookup(struct inode *inode, unsigned int cpos, 70 unsigned int *phys, unsigned int *len, 71 unsigned int *flags) 72{ 73 unsigned int coff; 74 struct ocfs2_inode_info *oi = OCFS2_I(inode); 75 struct ocfs2_extent_map_item *emi; 76 77 spin_lock(&oi->ip_lock); 78 79 __ocfs2_extent_map_lookup(&oi->ip_extent_map, cpos, &emi); 80 if (emi) { 81 coff = cpos - emi->ei_cpos; 82 *phys = emi->ei_phys + coff; 83 if (len) 84 *len = emi->ei_clusters - coff; 85 if (flags) 86 *flags = emi->ei_flags; 87 } 88 89 spin_unlock(&oi->ip_lock); 90 91 if (emi == NULL) 92 return -ENOENT; 93 94 return 0; 95} 96 97/* 98 * Forget about all clusters equal to or greater than cpos. 99 */ 100void ocfs2_extent_map_trunc(struct inode *inode, unsigned int cpos) 101{ 102 struct ocfs2_extent_map_item *emi, *n; 103 struct ocfs2_inode_info *oi = OCFS2_I(inode); 104 struct ocfs2_extent_map *em = &oi->ip_extent_map; 105 LIST_HEAD(tmp_list); 106 unsigned int range; 107 108 spin_lock(&oi->ip_lock); 109 list_for_each_entry_safe(emi, n, &em->em_list, ei_list) { 110 if (emi->ei_cpos >= cpos) { 111 /* Full truncate of this record. */ 112 list_move(&emi->ei_list, &tmp_list); 113 BUG_ON(em->em_num_items == 0); 114 em->em_num_items--; 115 continue; 116 } 117 118 range = emi->ei_cpos + emi->ei_clusters; 119 if (range > cpos) { 120 /* Partial truncate */ 121 emi->ei_clusters = cpos - emi->ei_cpos; 122 } 123 } 124 spin_unlock(&oi->ip_lock); 125 126 list_for_each_entry_safe(emi, n, &tmp_list, ei_list) { 127 list_del(&emi->ei_list); 128 kfree(emi); 129 } 130} 131 132/* 133 * Is any part of emi2 contained within emi1 134 */ 135static int ocfs2_ei_is_contained(struct ocfs2_extent_map_item *emi1, 136 struct ocfs2_extent_map_item *emi2) 137{ 138 unsigned int range1, range2; 139 140 /* 141 * Check if logical start of emi2 is inside emi1 142 */ 143 range1 = emi1->ei_cpos + emi1->ei_clusters; 144 if (emi2->ei_cpos >= emi1->ei_cpos && emi2->ei_cpos < range1) 145 return 1; 146 147 /* 148 * Check if logical end of emi2 is inside emi1 149 */ 150 range2 = emi2->ei_cpos + emi2->ei_clusters; 151 if (range2 > emi1->ei_cpos && range2 <= range1) 152 return 1; 153 154 return 0; 155} 156 157static void ocfs2_copy_emi_fields(struct ocfs2_extent_map_item *dest, 158 struct ocfs2_extent_map_item *src) 159{ 160 dest->ei_cpos = src->ei_cpos; 161 dest->ei_phys = src->ei_phys; 162 dest->ei_clusters = src->ei_clusters; 163 dest->ei_flags = src->ei_flags; 164} 165 166/* 167 * Try to merge emi with ins. Returns 1 if merge succeeds, zero 168 * otherwise. 169 */ 170static int ocfs2_try_to_merge_extent_map(struct ocfs2_extent_map_item *emi, 171 struct ocfs2_extent_map_item *ins) 172{ 173 /* 174 * Handle contiguousness 175 */ 176 if (ins->ei_phys == (emi->ei_phys + emi->ei_clusters) && 177 ins->ei_cpos == (emi->ei_cpos + emi->ei_clusters) && 178 ins->ei_flags == emi->ei_flags) { 179 emi->ei_clusters += ins->ei_clusters; 180 return 1; 181 } else if ((ins->ei_phys + ins->ei_clusters) == emi->ei_phys && 182 (ins->ei_cpos + ins->ei_clusters) == emi->ei_cpos && 183 ins->ei_flags == emi->ei_flags) { 184 emi->ei_phys = ins->ei_phys; 185 emi->ei_cpos = ins->ei_cpos; 186 emi->ei_clusters += ins->ei_clusters; 187 return 1; 188 } 189 190 /* 191 * Overlapping extents - this shouldn't happen unless we've 192 * split an extent to change it's flags. That is exceedingly 193 * rare, so there's no sense in trying to optimize it yet. 194 */ 195 if (ocfs2_ei_is_contained(emi, ins) || 196 ocfs2_ei_is_contained(ins, emi)) { 197 ocfs2_copy_emi_fields(emi, ins); 198 return 1; 199 } 200 201 /* No merge was possible. */ 202 return 0; 203} 204 205/* 206 * In order to reduce complexity on the caller, this insert function 207 * is intentionally liberal in what it will accept. 208 * 209 * The only rule is that the truncate call *must* be used whenever 210 * records have been deleted. This avoids inserting overlapping 211 * records with different physical mappings. 212 */ 213void ocfs2_extent_map_insert_rec(struct inode *inode, 214 struct ocfs2_extent_rec *rec) 215{ 216 struct ocfs2_inode_info *oi = OCFS2_I(inode); 217 struct ocfs2_extent_map *em = &oi->ip_extent_map; 218 struct ocfs2_extent_map_item *emi, *new_emi = NULL; 219 struct ocfs2_extent_map_item ins; 220 221 ins.ei_cpos = le32_to_cpu(rec->e_cpos); 222 ins.ei_phys = ocfs2_blocks_to_clusters(inode->i_sb, 223 le64_to_cpu(rec->e_blkno)); 224 ins.ei_clusters = le16_to_cpu(rec->e_leaf_clusters); 225 ins.ei_flags = rec->e_flags; 226 227search: 228 spin_lock(&oi->ip_lock); 229 230 list_for_each_entry(emi, &em->em_list, ei_list) { 231 if (ocfs2_try_to_merge_extent_map(emi, &ins)) { 232 list_move(&emi->ei_list, &em->em_list); 233 spin_unlock(&oi->ip_lock); 234 goto out; 235 } 236 } 237 238 /* 239 * No item could be merged. 240 * 241 * Either allocate and add a new item, or overwrite the last recently 242 * inserted. 243 */ 244 245 if (em->em_num_items < OCFS2_MAX_EXTENT_MAP_ITEMS) { 246 if (new_emi == NULL) { 247 spin_unlock(&oi->ip_lock); 248 249 new_emi = kmalloc(sizeof(*new_emi), GFP_NOFS); 250 if (new_emi == NULL) 251 goto out; 252 253 goto search; 254 } 255 256 ocfs2_copy_emi_fields(new_emi, &ins); 257 list_add(&new_emi->ei_list, &em->em_list); 258 em->em_num_items++; 259 new_emi = NULL; 260 } else { 261 BUG_ON(list_empty(&em->em_list) || em->em_num_items == 0); 262 emi = list_entry(em->em_list.prev, 263 struct ocfs2_extent_map_item, ei_list); 264 list_move(&emi->ei_list, &em->em_list); 265 ocfs2_copy_emi_fields(emi, &ins); 266 } 267 268 spin_unlock(&oi->ip_lock); 269 270out: 271 kfree(new_emi); 272} 273 274static int ocfs2_last_eb_is_empty(struct inode *inode, 275 struct ocfs2_dinode *di) 276{ 277 int ret, next_free; 278 u64 last_eb_blk = le64_to_cpu(di->i_last_eb_blk); 279 struct buffer_head *eb_bh = NULL; 280 struct ocfs2_extent_block *eb; 281 struct ocfs2_extent_list *el; 282 283 ret = ocfs2_read_extent_block(INODE_CACHE(inode), last_eb_blk, &eb_bh); 284 if (ret) { 285 mlog_errno(ret); 286 goto out; 287 } 288 289 eb = (struct ocfs2_extent_block *) eb_bh->b_data; 290 el = &eb->h_list; 291 292 if (el->l_tree_depth) { 293 ocfs2_error(inode->i_sb, 294 "Inode %lu has non zero tree depth in leaf block %llu\n", 295 inode->i_ino, 296 (unsigned long long)eb_bh->b_blocknr); 297 ret = -EROFS; 298 goto out; 299 } 300 301 next_free = le16_to_cpu(el->l_next_free_rec); 302 303 if (next_free == 0 || 304 (next_free == 1 && ocfs2_is_empty_extent(&el->l_recs[0]))) 305 ret = 1; 306 307out: 308 brelse(eb_bh); 309 return ret; 310} 311 312/* 313 * Return the 1st index within el which contains an extent start 314 * larger than v_cluster. 315 */ 316static int ocfs2_search_for_hole_index(struct ocfs2_extent_list *el, 317 u32 v_cluster) 318{ 319 int i; 320 struct ocfs2_extent_rec *rec; 321 322 for(i = 0; i < le16_to_cpu(el->l_next_free_rec); i++) { 323 rec = &el->l_recs[i]; 324 325 if (v_cluster < le32_to_cpu(rec->e_cpos)) 326 break; 327 } 328 329 return i; 330} 331 332/* 333 * Figure out the size of a hole which starts at v_cluster within the given 334 * extent list. 335 * 336 * If there is no more allocation past v_cluster, we return the maximum 337 * cluster size minus v_cluster. 338 * 339 * If we have in-inode extents, then el points to the dinode list and 340 * eb_bh is NULL. Otherwise, eb_bh should point to the extent block 341 * containing el. 342 */ 343int ocfs2_figure_hole_clusters(struct ocfs2_caching_info *ci, 344 struct ocfs2_extent_list *el, 345 struct buffer_head *eb_bh, 346 u32 v_cluster, 347 u32 *num_clusters) 348{ 349 int ret, i; 350 struct buffer_head *next_eb_bh = NULL; 351 struct ocfs2_extent_block *eb, *next_eb; 352 353 i = ocfs2_search_for_hole_index(el, v_cluster); 354 355 if (i == le16_to_cpu(el->l_next_free_rec) && eb_bh) { 356 eb = (struct ocfs2_extent_block *)eb_bh->b_data; 357 358 /* 359 * Check the next leaf for any extents. 360 */ 361 362 if (le64_to_cpu(eb->h_next_leaf_blk) == 0ULL) 363 goto no_more_extents; 364 365 ret = ocfs2_read_extent_block(ci, 366 le64_to_cpu(eb->h_next_leaf_blk), 367 &next_eb_bh); 368 if (ret) { 369 mlog_errno(ret); 370 goto out; 371 } 372 373 next_eb = (struct ocfs2_extent_block *)next_eb_bh->b_data; 374 el = &next_eb->h_list; 375 i = ocfs2_search_for_hole_index(el, v_cluster); 376 } 377 378no_more_extents: 379 if (i == le16_to_cpu(el->l_next_free_rec)) { 380 /* 381 * We're at the end of our existing allocation. Just 382 * return the maximum number of clusters we could 383 * possibly allocate. 384 */ 385 *num_clusters = UINT_MAX - v_cluster; 386 } else { 387 *num_clusters = le32_to_cpu(el->l_recs[i].e_cpos) - v_cluster; 388 } 389 390 ret = 0; 391out: 392 brelse(next_eb_bh); 393 return ret; 394} 395 396static int ocfs2_get_clusters_nocache(struct inode *inode, 397 struct buffer_head *di_bh, 398 u32 v_cluster, unsigned int *hole_len, 399 struct ocfs2_extent_rec *ret_rec, 400 unsigned int *is_last) 401{ 402 int i, ret, tree_height, len; 403 struct ocfs2_dinode *di; 404 struct ocfs2_extent_block *eb; 405 struct ocfs2_extent_list *el; 406 struct ocfs2_extent_rec *rec; 407 struct buffer_head *eb_bh = NULL; 408 409 memset(ret_rec, 0, sizeof(*ret_rec)); 410 if (is_last) 411 *is_last = 0; 412 413 di = (struct ocfs2_dinode *) di_bh->b_data; 414 el = &di->id2.i_list; 415 tree_height = le16_to_cpu(el->l_tree_depth); 416 417 if (tree_height > 0) { 418 ret = ocfs2_find_leaf(INODE_CACHE(inode), el, v_cluster, 419 &eb_bh); 420 if (ret) { 421 mlog_errno(ret); 422 goto out; 423 } 424 425 eb = (struct ocfs2_extent_block *) eb_bh->b_data; 426 el = &eb->h_list; 427 428 if (el->l_tree_depth) { 429 ocfs2_error(inode->i_sb, 430 "Inode %lu has non zero tree depth in leaf block %llu\n", 431 inode->i_ino, 432 (unsigned long long)eb_bh->b_blocknr); 433 ret = -EROFS; 434 goto out; 435 } 436 } 437 438 i = ocfs2_search_extent_list(el, v_cluster); 439 if (i == -1) { 440 /* 441 * Holes can be larger than the maximum size of an 442 * extent, so we return their lengths in a separate 443 * field. 444 */ 445 if (hole_len) { 446 ret = ocfs2_figure_hole_clusters(INODE_CACHE(inode), 447 el, eb_bh, 448 v_cluster, &len); 449 if (ret) { 450 mlog_errno(ret); 451 goto out; 452 } 453 454 *hole_len = len; 455 } 456 goto out_hole; 457 } 458 459 rec = &el->l_recs[i]; 460 461 BUG_ON(v_cluster < le32_to_cpu(rec->e_cpos)); 462 463 if (!rec->e_blkno) { 464 ocfs2_error(inode->i_sb, 465 "Inode %lu has bad extent record (%u, %u, 0)\n", 466 inode->i_ino, 467 le32_to_cpu(rec->e_cpos), 468 ocfs2_rec_clusters(el, rec)); 469 ret = -EROFS; 470 goto out; 471 } 472 473 *ret_rec = *rec; 474 475 /* 476 * Checking for last extent is potentially expensive - we 477 * might have to look at the next leaf over to see if it's 478 * empty. 479 * 480 * The first two checks are to see whether the caller even 481 * cares for this information, and if the extent is at least 482 * the last in it's list. 483 * 484 * If those hold true, then the extent is last if any of the 485 * additional conditions hold true: 486 * - Extent list is in-inode 487 * - Extent list is right-most 488 * - Extent list is 2nd to rightmost, with empty right-most 489 */ 490 if (is_last) { 491 if (i == (le16_to_cpu(el->l_next_free_rec) - 1)) { 492 if (tree_height == 0) 493 *is_last = 1; 494 else if (eb->h_blkno == di->i_last_eb_blk) 495 *is_last = 1; 496 else if (eb->h_next_leaf_blk == di->i_last_eb_blk) { 497 ret = ocfs2_last_eb_is_empty(inode, di); 498 if (ret < 0) { 499 mlog_errno(ret); 500 goto out; 501 } 502 if (ret == 1) 503 *is_last = 1; 504 } 505 } 506 } 507 508out_hole: 509 ret = 0; 510out: 511 brelse(eb_bh); 512 return ret; 513} 514 515static void ocfs2_relative_extent_offsets(struct super_block *sb, 516 u32 v_cluster, 517 struct ocfs2_extent_rec *rec, 518 u32 *p_cluster, u32 *num_clusters) 519 520{ 521 u32 coff = v_cluster - le32_to_cpu(rec->e_cpos); 522 523 *p_cluster = ocfs2_blocks_to_clusters(sb, le64_to_cpu(rec->e_blkno)); 524 *p_cluster = *p_cluster + coff; 525 526 if (num_clusters) 527 *num_clusters = le16_to_cpu(rec->e_leaf_clusters) - coff; 528} 529 530int ocfs2_xattr_get_clusters(struct inode *inode, u32 v_cluster, 531 u32 *p_cluster, u32 *num_clusters, 532 struct ocfs2_extent_list *el, 533 unsigned int *extent_flags) 534{ 535 int ret = 0, i; 536 struct buffer_head *eb_bh = NULL; 537 struct ocfs2_extent_block *eb; 538 struct ocfs2_extent_rec *rec; 539 u32 coff; 540 541 if (el->l_tree_depth) { 542 ret = ocfs2_find_leaf(INODE_CACHE(inode), el, v_cluster, 543 &eb_bh); 544 if (ret) { 545 mlog_errno(ret); 546 goto out; 547 } 548 549 eb = (struct ocfs2_extent_block *) eb_bh->b_data; 550 el = &eb->h_list; 551 552 if (el->l_tree_depth) { 553 ocfs2_error(inode->i_sb, 554 "Inode %lu has non zero tree depth in xattr leaf block %llu\n", 555 inode->i_ino, 556 (unsigned long long)eb_bh->b_blocknr); 557 ret = -EROFS; 558 goto out; 559 } 560 } 561 562 i = ocfs2_search_extent_list(el, v_cluster); 563 if (i == -1) { 564 ret = -EROFS; 565 mlog_errno(ret); 566 goto out; 567 } else { 568 rec = &el->l_recs[i]; 569 BUG_ON(v_cluster < le32_to_cpu(rec->e_cpos)); 570 571 if (!rec->e_blkno) { 572 ocfs2_error(inode->i_sb, 573 "Inode %lu has bad extent record (%u, %u, 0) in xattr\n", 574 inode->i_ino, 575 le32_to_cpu(rec->e_cpos), 576 ocfs2_rec_clusters(el, rec)); 577 ret = -EROFS; 578 goto out; 579 } 580 coff = v_cluster - le32_to_cpu(rec->e_cpos); 581 *p_cluster = ocfs2_blocks_to_clusters(inode->i_sb, 582 le64_to_cpu(rec->e_blkno)); 583 *p_cluster = *p_cluster + coff; 584 if (num_clusters) 585 *num_clusters = ocfs2_rec_clusters(el, rec) - coff; 586 587 if (extent_flags) 588 *extent_flags = rec->e_flags; 589 } 590out: 591 brelse(eb_bh); 592 return ret; 593} 594 595int ocfs2_get_clusters(struct inode *inode, u32 v_cluster, 596 u32 *p_cluster, u32 *num_clusters, 597 unsigned int *extent_flags) 598{ 599 int ret; 600 unsigned int hole_len, flags = 0; 601 struct buffer_head *di_bh = NULL; 602 struct ocfs2_extent_rec rec; 603 604 if (OCFS2_I(inode)->ip_dyn_features & OCFS2_INLINE_DATA_FL) { 605 ret = -ERANGE; 606 mlog_errno(ret); 607 goto out; 608 } 609 610 ret = ocfs2_extent_map_lookup(inode, v_cluster, p_cluster, 611 num_clusters, extent_flags); 612 if (ret == 0) 613 goto out; 614 615 ret = ocfs2_read_inode_block(inode, &di_bh); 616 if (ret) { 617 mlog_errno(ret); 618 goto out; 619 } 620 621 ret = ocfs2_get_clusters_nocache(inode, di_bh, v_cluster, &hole_len, 622 &rec, NULL); 623 if (ret) { 624 mlog_errno(ret); 625 goto out; 626 } 627 628 if (rec.e_blkno == 0ULL) { 629 /* 630 * A hole was found. Return some canned values that 631 * callers can key on. If asked for, num_clusters will 632 * be populated with the size of the hole. 633 */ 634 *p_cluster = 0; 635 if (num_clusters) { 636 *num_clusters = hole_len; 637 } 638 } else { 639 ocfs2_relative_extent_offsets(inode->i_sb, v_cluster, &rec, 640 p_cluster, num_clusters); 641 flags = rec.e_flags; 642 643 ocfs2_extent_map_insert_rec(inode, &rec); 644 } 645 646 if (extent_flags) 647 *extent_flags = flags; 648 649out: 650 brelse(di_bh); 651 return ret; 652} 653 654/* 655 * This expects alloc_sem to be held. The allocation cannot change at 656 * all while the map is in the process of being updated. 657 */ 658int ocfs2_extent_map_get_blocks(struct inode *inode, u64 v_blkno, u64 *p_blkno, 659 u64 *ret_count, unsigned int *extent_flags) 660{ 661 int ret; 662 int bpc = ocfs2_clusters_to_blocks(inode->i_sb, 1); 663 u32 cpos, num_clusters, p_cluster; 664 u64 boff = 0; 665 666 cpos = ocfs2_blocks_to_clusters(inode->i_sb, v_blkno); 667 668 ret = ocfs2_get_clusters(inode, cpos, &p_cluster, &num_clusters, 669 extent_flags); 670 if (ret) { 671 mlog_errno(ret); 672 goto out; 673 } 674 675 /* 676 * p_cluster == 0 indicates a hole. 677 */ 678 if (p_cluster) { 679 boff = ocfs2_clusters_to_blocks(inode->i_sb, p_cluster); 680 boff += (v_blkno & (u64)(bpc - 1)); 681 } 682 683 *p_blkno = boff; 684 685 if (ret_count) { 686 *ret_count = ocfs2_clusters_to_blocks(inode->i_sb, num_clusters); 687 *ret_count -= v_blkno & (u64)(bpc - 1); 688 } 689 690out: 691 return ret; 692} 693 694/* 695 * The ocfs2_fiemap_inline() may be a little bit misleading, since 696 * it not only handles the fiemap for inlined files, but also deals 697 * with the fast symlink, cause they have no difference for extent 698 * mapping per se. 699 */ 700static int ocfs2_fiemap_inline(struct inode *inode, struct buffer_head *di_bh, 701 struct fiemap_extent_info *fieinfo, 702 u64 map_start) 703{ 704 int ret; 705 unsigned int id_count; 706 struct ocfs2_dinode *di; 707 u64 phys; 708 u32 flags = FIEMAP_EXTENT_DATA_INLINE|FIEMAP_EXTENT_LAST; 709 struct ocfs2_inode_info *oi = OCFS2_I(inode); 710 711 di = (struct ocfs2_dinode *)di_bh->b_data; 712 if (ocfs2_inode_is_fast_symlink(inode)) 713 id_count = ocfs2_fast_symlink_chars(inode->i_sb); 714 else 715 id_count = le16_to_cpu(di->id2.i_data.id_count); 716 717 if (map_start < id_count) { 718 phys = oi->ip_blkno << inode->i_sb->s_blocksize_bits; 719 if (ocfs2_inode_is_fast_symlink(inode)) 720 phys += offsetof(struct ocfs2_dinode, id2.i_symlink); 721 else 722 phys += offsetof(struct ocfs2_dinode, 723 id2.i_data.id_data); 724 725 ret = fiemap_fill_next_extent(fieinfo, 0, phys, id_count, 726 flags); 727 if (ret < 0) 728 return ret; 729 } 730 731 return 0; 732} 733 734int ocfs2_fiemap(struct inode *inode, struct fiemap_extent_info *fieinfo, 735 u64 map_start, u64 map_len) 736{ 737 int ret, is_last; 738 u32 mapping_end, cpos; 739 unsigned int hole_size; 740 struct ocfs2_super *osb = OCFS2_SB(inode->i_sb); 741 u64 len_bytes, phys_bytes, virt_bytes; 742 struct buffer_head *di_bh = NULL; 743 struct ocfs2_extent_rec rec; 744 745 ret = fiemap_prep(inode, fieinfo, map_start, &map_len, 0); 746 if (ret) 747 return ret; 748 749 ret = ocfs2_inode_lock(inode, &di_bh, 0); 750 if (ret) { 751 mlog_errno(ret); 752 goto out; 753 } 754 755 down_read(&OCFS2_I(inode)->ip_alloc_sem); 756 757 /* 758 * Handle inline-data and fast symlink separately. 759 */ 760 if ((OCFS2_I(inode)->ip_dyn_features & OCFS2_INLINE_DATA_FL) || 761 ocfs2_inode_is_fast_symlink(inode)) { 762 ret = ocfs2_fiemap_inline(inode, di_bh, fieinfo, map_start); 763 goto out_unlock; 764 } 765 766 cpos = map_start >> osb->s_clustersize_bits; 767 mapping_end = ocfs2_clusters_for_bytes(inode->i_sb, 768 map_start + map_len); 769 is_last = 0; 770 while (cpos < mapping_end && !is_last) { 771 u32 fe_flags; 772 773 ret = ocfs2_get_clusters_nocache(inode, di_bh, cpos, 774 &hole_size, &rec, &is_last); 775 if (ret) { 776 mlog_errno(ret); 777 goto out_unlock; 778 } 779 780 if (rec.e_blkno == 0ULL) { 781 cpos += hole_size; 782 continue; 783 } 784 785 fe_flags = 0; 786 if (rec.e_flags & OCFS2_EXT_UNWRITTEN) 787 fe_flags |= FIEMAP_EXTENT_UNWRITTEN; 788 if (rec.e_flags & OCFS2_EXT_REFCOUNTED) 789 fe_flags |= FIEMAP_EXTENT_SHARED; 790 if (is_last) 791 fe_flags |= FIEMAP_EXTENT_LAST; 792 len_bytes = (u64)le16_to_cpu(rec.e_leaf_clusters) << osb->s_clustersize_bits; 793 phys_bytes = le64_to_cpu(rec.e_blkno) << osb->sb->s_blocksize_bits; 794 virt_bytes = (u64)le32_to_cpu(rec.e_cpos) << osb->s_clustersize_bits; 795 796 ret = fiemap_fill_next_extent(fieinfo, virt_bytes, phys_bytes, 797 len_bytes, fe_flags); 798 if (ret) 799 break; 800 801 cpos = le32_to_cpu(rec.e_cpos)+ le16_to_cpu(rec.e_leaf_clusters); 802 } 803 804 if (ret > 0) 805 ret = 0; 806 807out_unlock: 808 brelse(di_bh); 809 810 up_read(&OCFS2_I(inode)->ip_alloc_sem); 811 812 ocfs2_inode_unlock(inode, 0); 813out: 814 815 return ret; 816} 817 818/* Is IO overwriting allocated blocks? */ 819int ocfs2_overwrite_io(struct inode *inode, struct buffer_head *di_bh, 820 u64 map_start, u64 map_len) 821{ 822 int ret = 0, is_last; 823 u32 mapping_end, cpos; 824 struct ocfs2_super *osb = OCFS2_SB(inode->i_sb); 825 struct ocfs2_extent_rec rec; 826 827 if (OCFS2_I(inode)->ip_dyn_features & OCFS2_INLINE_DATA_FL) { 828 if (ocfs2_size_fits_inline_data(di_bh, map_start + map_len)) 829 return ret; 830 else 831 return -EAGAIN; 832 } 833 834 cpos = map_start >> osb->s_clustersize_bits; 835 mapping_end = ocfs2_clusters_for_bytes(inode->i_sb, 836 map_start + map_len); 837 is_last = 0; 838 while (cpos < mapping_end && !is_last) { 839 ret = ocfs2_get_clusters_nocache(inode, di_bh, cpos, 840 NULL, &rec, &is_last); 841 if (ret) { 842 mlog_errno(ret); 843 goto out; 844 } 845 846 if (rec.e_blkno == 0ULL) 847 break; 848 849 if (rec.e_flags & OCFS2_EXT_REFCOUNTED) 850 break; 851 852 cpos = le32_to_cpu(rec.e_cpos) + 853 le16_to_cpu(rec.e_leaf_clusters); 854 } 855 856 if (cpos < mapping_end) 857 ret = -EAGAIN; 858out: 859 return ret; 860} 861 862int ocfs2_seek_data_hole_offset(struct file *file, loff_t *offset, int whence) 863{ 864 struct inode *inode = file->f_mapping->host; 865 int ret; 866 unsigned int is_last = 0, is_data = 0; 867 u16 cs_bits = OCFS2_SB(inode->i_sb)->s_clustersize_bits; 868 u32 cpos, cend, clen, hole_size; 869 u64 extoff, extlen; 870 struct buffer_head *di_bh = NULL; 871 struct ocfs2_extent_rec rec; 872 873 BUG_ON(whence != SEEK_DATA && whence != SEEK_HOLE); 874 875 ret = ocfs2_inode_lock(inode, &di_bh, 0); 876 if (ret) { 877 mlog_errno(ret); 878 goto out; 879 } 880 881 down_read(&OCFS2_I(inode)->ip_alloc_sem); 882 883 if (*offset >= i_size_read(inode)) { 884 ret = -ENXIO; 885 goto out_unlock; 886 } 887 888 if (OCFS2_I(inode)->ip_dyn_features & OCFS2_INLINE_DATA_FL) { 889 if (whence == SEEK_HOLE) 890 *offset = i_size_read(inode); 891 goto out_unlock; 892 } 893 894 clen = 0; 895 cpos = *offset >> cs_bits; 896 cend = ocfs2_clusters_for_bytes(inode->i_sb, i_size_read(inode)); 897 898 while (cpos < cend && !is_last) { 899 ret = ocfs2_get_clusters_nocache(inode, di_bh, cpos, &hole_size, 900 &rec, &is_last); 901 if (ret) { 902 mlog_errno(ret); 903 goto out_unlock; 904 } 905 906 extoff = cpos; 907 extoff <<= cs_bits; 908 909 if (rec.e_blkno == 0ULL) { 910 clen = hole_size; 911 is_data = 0; 912 } else { 913 clen = le16_to_cpu(rec.e_leaf_clusters) - 914 (cpos - le32_to_cpu(rec.e_cpos)); 915 is_data = (rec.e_flags & OCFS2_EXT_UNWRITTEN) ? 0 : 1; 916 } 917 918 if ((!is_data && whence == SEEK_HOLE) || 919 (is_data && whence == SEEK_DATA)) { 920 if (extoff > *offset) 921 *offset = extoff; 922 goto out_unlock; 923 } 924 925 if (!is_last) 926 cpos += clen; 927 } 928 929 if (whence == SEEK_HOLE) { 930 extoff = cpos; 931 extoff <<= cs_bits; 932 extlen = clen; 933 extlen <<= cs_bits; 934 935 if ((extoff + extlen) > i_size_read(inode)) 936 extlen = i_size_read(inode) - extoff; 937 extoff += extlen; 938 if (extoff > *offset) 939 *offset = extoff; 940 goto out_unlock; 941 } 942 943 ret = -ENXIO; 944 945out_unlock: 946 947 brelse(di_bh); 948 949 up_read(&OCFS2_I(inode)->ip_alloc_sem); 950 951 ocfs2_inode_unlock(inode, 0); 952out: 953 return ret; 954} 955 956int ocfs2_read_virt_blocks(struct inode *inode, u64 v_block, int nr, 957 struct buffer_head *bhs[], int flags, 958 int (*validate)(struct super_block *sb, 959 struct buffer_head *bh)) 960{ 961 int rc = 0; 962 u64 p_block, p_count; 963 int i, count, done = 0; 964 965 trace_ocfs2_read_virt_blocks( 966 inode, (unsigned long long)v_block, nr, bhs, flags, 967 validate); 968 969 if (((v_block + nr - 1) << inode->i_sb->s_blocksize_bits) >= 970 i_size_read(inode)) { 971 BUG_ON(!(flags & OCFS2_BH_READAHEAD)); 972 goto out; 973 } 974 975 while (done < nr) { 976 down_read(&OCFS2_I(inode)->ip_alloc_sem); 977 rc = ocfs2_extent_map_get_blocks(inode, v_block + done, 978 &p_block, &p_count, NULL); 979 up_read(&OCFS2_I(inode)->ip_alloc_sem); 980 if (rc) { 981 mlog_errno(rc); 982 break; 983 } 984 985 if (!p_block) { 986 rc = -EIO; 987 mlog(ML_ERROR, 988 "Inode #%llu contains a hole at offset %llu\n", 989 (unsigned long long)OCFS2_I(inode)->ip_blkno, 990 (unsigned long long)(v_block + done) << 991 inode->i_sb->s_blocksize_bits); 992 break; 993 } 994 995 count = nr - done; 996 if (p_count < count) 997 count = p_count; 998 999 /* 1000 * If the caller passed us bhs, they should have come 1001 * from a previous readahead call to this function. Thus, 1002 * they should have the right b_blocknr. 1003 */ 1004 for (i = 0; i < count; i++) { 1005 if (!bhs[done + i]) 1006 continue; 1007 BUG_ON(bhs[done + i]->b_blocknr != (p_block + i)); 1008 } 1009 1010 rc = ocfs2_read_blocks(INODE_CACHE(inode), p_block, count, 1011 bhs + done, flags, validate); 1012 if (rc) { 1013 mlog_errno(rc); 1014 break; 1015 } 1016 done += count; 1017 } 1018 1019out: 1020 return rc; 1021} 1022 1023