cachepc-linux

Fork of AMDESE/linux with modifications for CachePC side-channel attack
git clone https://git.sinitax.com/sinitax/cachepc-linux
Log | Files | Refs | README | LICENSE | sfeed.txt

extent_map.c (23984B)


      1// SPDX-License-Identifier: GPL-2.0-only
      2/*
      3 * extent_map.c
      4 *
      5 * Block/Cluster mapping functions
      6 *
      7 * Copyright (C) 2004 Oracle.  All rights reserved.
      8 */
      9
     10#include <linux/fs.h>
     11#include <linux/init.h>
     12#include <linux/slab.h>
     13#include <linux/types.h>
     14#include <linux/fiemap.h>
     15
     16#include <cluster/masklog.h>
     17
     18#include "ocfs2.h"
     19
     20#include "alloc.h"
     21#include "dlmglue.h"
     22#include "extent_map.h"
     23#include "inode.h"
     24#include "super.h"
     25#include "symlink.h"
     26#include "aops.h"
     27#include "ocfs2_trace.h"
     28
     29#include "buffer_head_io.h"
     30
     31/*
     32 * The extent caching implementation is intentionally trivial.
     33 *
     34 * We only cache a small number of extents stored directly on the
     35 * inode, so linear order operations are acceptable. If we ever want
     36 * to increase the size of the extent map, then these algorithms must
     37 * get smarter.
     38 */
     39
     40void ocfs2_extent_map_init(struct inode *inode)
     41{
     42	struct ocfs2_inode_info *oi = OCFS2_I(inode);
     43
     44	oi->ip_extent_map.em_num_items = 0;
     45	INIT_LIST_HEAD(&oi->ip_extent_map.em_list);
     46}
     47
     48static void __ocfs2_extent_map_lookup(struct ocfs2_extent_map *em,
     49				      unsigned int cpos,
     50				      struct ocfs2_extent_map_item **ret_emi)
     51{
     52	unsigned int range;
     53	struct ocfs2_extent_map_item *emi;
     54
     55	*ret_emi = NULL;
     56
     57	list_for_each_entry(emi, &em->em_list, ei_list) {
     58		range = emi->ei_cpos + emi->ei_clusters;
     59
     60		if (cpos >= emi->ei_cpos && cpos < range) {
     61			list_move(&emi->ei_list, &em->em_list);
     62
     63			*ret_emi = emi;
     64			break;
     65		}
     66	}
     67}
     68
     69static int ocfs2_extent_map_lookup(struct inode *inode, unsigned int cpos,
     70				   unsigned int *phys, unsigned int *len,
     71				   unsigned int *flags)
     72{
     73	unsigned int coff;
     74	struct ocfs2_inode_info *oi = OCFS2_I(inode);
     75	struct ocfs2_extent_map_item *emi;
     76
     77	spin_lock(&oi->ip_lock);
     78
     79	__ocfs2_extent_map_lookup(&oi->ip_extent_map, cpos, &emi);
     80	if (emi) {
     81		coff = cpos - emi->ei_cpos;
     82		*phys = emi->ei_phys + coff;
     83		if (len)
     84			*len = emi->ei_clusters - coff;
     85		if (flags)
     86			*flags = emi->ei_flags;
     87	}
     88
     89	spin_unlock(&oi->ip_lock);
     90
     91	if (emi == NULL)
     92		return -ENOENT;
     93
     94	return 0;
     95}
     96
     97/*
     98 * Forget about all clusters equal to or greater than cpos.
     99 */
    100void ocfs2_extent_map_trunc(struct inode *inode, unsigned int cpos)
    101{
    102	struct ocfs2_extent_map_item *emi, *n;
    103	struct ocfs2_inode_info *oi = OCFS2_I(inode);
    104	struct ocfs2_extent_map *em = &oi->ip_extent_map;
    105	LIST_HEAD(tmp_list);
    106	unsigned int range;
    107
    108	spin_lock(&oi->ip_lock);
    109	list_for_each_entry_safe(emi, n, &em->em_list, ei_list) {
    110		if (emi->ei_cpos >= cpos) {
    111			/* Full truncate of this record. */
    112			list_move(&emi->ei_list, &tmp_list);
    113			BUG_ON(em->em_num_items == 0);
    114			em->em_num_items--;
    115			continue;
    116		}
    117
    118		range = emi->ei_cpos + emi->ei_clusters;
    119		if (range > cpos) {
    120			/* Partial truncate */
    121			emi->ei_clusters = cpos - emi->ei_cpos;
    122		}
    123	}
    124	spin_unlock(&oi->ip_lock);
    125
    126	list_for_each_entry_safe(emi, n, &tmp_list, ei_list) {
    127		list_del(&emi->ei_list);
    128		kfree(emi);
    129	}
    130}
    131
    132/*
    133 * Is any part of emi2 contained within emi1
    134 */
    135static int ocfs2_ei_is_contained(struct ocfs2_extent_map_item *emi1,
    136				 struct ocfs2_extent_map_item *emi2)
    137{
    138	unsigned int range1, range2;
    139
    140	/*
    141	 * Check if logical start of emi2 is inside emi1
    142	 */
    143	range1 = emi1->ei_cpos + emi1->ei_clusters;
    144	if (emi2->ei_cpos >= emi1->ei_cpos && emi2->ei_cpos < range1)
    145		return 1;
    146
    147	/*
    148	 * Check if logical end of emi2 is inside emi1
    149	 */
    150	range2 = emi2->ei_cpos + emi2->ei_clusters;
    151	if (range2 > emi1->ei_cpos && range2 <= range1)
    152		return 1;
    153
    154	return 0;
    155}
    156
    157static void ocfs2_copy_emi_fields(struct ocfs2_extent_map_item *dest,
    158				  struct ocfs2_extent_map_item *src)
    159{
    160	dest->ei_cpos = src->ei_cpos;
    161	dest->ei_phys = src->ei_phys;
    162	dest->ei_clusters = src->ei_clusters;
    163	dest->ei_flags = src->ei_flags;
    164}
    165
    166/*
    167 * Try to merge emi with ins. Returns 1 if merge succeeds, zero
    168 * otherwise.
    169 */
    170static int ocfs2_try_to_merge_extent_map(struct ocfs2_extent_map_item *emi,
    171					 struct ocfs2_extent_map_item *ins)
    172{
    173	/*
    174	 * Handle contiguousness
    175	 */
    176	if (ins->ei_phys == (emi->ei_phys + emi->ei_clusters) &&
    177	    ins->ei_cpos == (emi->ei_cpos + emi->ei_clusters) &&
    178	    ins->ei_flags == emi->ei_flags) {
    179		emi->ei_clusters += ins->ei_clusters;
    180		return 1;
    181	} else if ((ins->ei_phys + ins->ei_clusters) == emi->ei_phys &&
    182		   (ins->ei_cpos + ins->ei_clusters) == emi->ei_cpos &&
    183		   ins->ei_flags == emi->ei_flags) {
    184		emi->ei_phys = ins->ei_phys;
    185		emi->ei_cpos = ins->ei_cpos;
    186		emi->ei_clusters += ins->ei_clusters;
    187		return 1;
    188	}
    189
    190	/*
    191	 * Overlapping extents - this shouldn't happen unless we've
    192	 * split an extent to change it's flags. That is exceedingly
    193	 * rare, so there's no sense in trying to optimize it yet.
    194	 */
    195	if (ocfs2_ei_is_contained(emi, ins) ||
    196	    ocfs2_ei_is_contained(ins, emi)) {
    197		ocfs2_copy_emi_fields(emi, ins);
    198		return 1;
    199	}
    200
    201	/* No merge was possible. */
    202	return 0;
    203}
    204
    205/*
    206 * In order to reduce complexity on the caller, this insert function
    207 * is intentionally liberal in what it will accept.
    208 *
    209 * The only rule is that the truncate call *must* be used whenever
    210 * records have been deleted. This avoids inserting overlapping
    211 * records with different physical mappings.
    212 */
    213void ocfs2_extent_map_insert_rec(struct inode *inode,
    214				 struct ocfs2_extent_rec *rec)
    215{
    216	struct ocfs2_inode_info *oi = OCFS2_I(inode);
    217	struct ocfs2_extent_map *em = &oi->ip_extent_map;
    218	struct ocfs2_extent_map_item *emi, *new_emi = NULL;
    219	struct ocfs2_extent_map_item ins;
    220
    221	ins.ei_cpos = le32_to_cpu(rec->e_cpos);
    222	ins.ei_phys = ocfs2_blocks_to_clusters(inode->i_sb,
    223					       le64_to_cpu(rec->e_blkno));
    224	ins.ei_clusters = le16_to_cpu(rec->e_leaf_clusters);
    225	ins.ei_flags = rec->e_flags;
    226
    227search:
    228	spin_lock(&oi->ip_lock);
    229
    230	list_for_each_entry(emi, &em->em_list, ei_list) {
    231		if (ocfs2_try_to_merge_extent_map(emi, &ins)) {
    232			list_move(&emi->ei_list, &em->em_list);
    233			spin_unlock(&oi->ip_lock);
    234			goto out;
    235		}
    236	}
    237
    238	/*
    239	 * No item could be merged.
    240	 *
    241	 * Either allocate and add a new item, or overwrite the last recently
    242	 * inserted.
    243	 */
    244
    245	if (em->em_num_items < OCFS2_MAX_EXTENT_MAP_ITEMS) {
    246		if (new_emi == NULL) {
    247			spin_unlock(&oi->ip_lock);
    248
    249			new_emi = kmalloc(sizeof(*new_emi), GFP_NOFS);
    250			if (new_emi == NULL)
    251				goto out;
    252
    253			goto search;
    254		}
    255
    256		ocfs2_copy_emi_fields(new_emi, &ins);
    257		list_add(&new_emi->ei_list, &em->em_list);
    258		em->em_num_items++;
    259		new_emi = NULL;
    260	} else {
    261		BUG_ON(list_empty(&em->em_list) || em->em_num_items == 0);
    262		emi = list_entry(em->em_list.prev,
    263				 struct ocfs2_extent_map_item, ei_list);
    264		list_move(&emi->ei_list, &em->em_list);
    265		ocfs2_copy_emi_fields(emi, &ins);
    266	}
    267
    268	spin_unlock(&oi->ip_lock);
    269
    270out:
    271	kfree(new_emi);
    272}
    273
    274static int ocfs2_last_eb_is_empty(struct inode *inode,
    275				  struct ocfs2_dinode *di)
    276{
    277	int ret, next_free;
    278	u64 last_eb_blk = le64_to_cpu(di->i_last_eb_blk);
    279	struct buffer_head *eb_bh = NULL;
    280	struct ocfs2_extent_block *eb;
    281	struct ocfs2_extent_list *el;
    282
    283	ret = ocfs2_read_extent_block(INODE_CACHE(inode), last_eb_blk, &eb_bh);
    284	if (ret) {
    285		mlog_errno(ret);
    286		goto out;
    287	}
    288
    289	eb = (struct ocfs2_extent_block *) eb_bh->b_data;
    290	el = &eb->h_list;
    291
    292	if (el->l_tree_depth) {
    293		ocfs2_error(inode->i_sb,
    294			    "Inode %lu has non zero tree depth in leaf block %llu\n",
    295			    inode->i_ino,
    296			    (unsigned long long)eb_bh->b_blocknr);
    297		ret = -EROFS;
    298		goto out;
    299	}
    300
    301	next_free = le16_to_cpu(el->l_next_free_rec);
    302
    303	if (next_free == 0 ||
    304	    (next_free == 1 && ocfs2_is_empty_extent(&el->l_recs[0])))
    305		ret = 1;
    306
    307out:
    308	brelse(eb_bh);
    309	return ret;
    310}
    311
    312/*
    313 * Return the 1st index within el which contains an extent start
    314 * larger than v_cluster.
    315 */
    316static int ocfs2_search_for_hole_index(struct ocfs2_extent_list *el,
    317				       u32 v_cluster)
    318{
    319	int i;
    320	struct ocfs2_extent_rec *rec;
    321
    322	for(i = 0; i < le16_to_cpu(el->l_next_free_rec); i++) {
    323		rec = &el->l_recs[i];
    324
    325		if (v_cluster < le32_to_cpu(rec->e_cpos))
    326			break;
    327	}
    328
    329	return i;
    330}
    331
    332/*
    333 * Figure out the size of a hole which starts at v_cluster within the given
    334 * extent list.
    335 *
    336 * If there is no more allocation past v_cluster, we return the maximum
    337 * cluster size minus v_cluster.
    338 *
    339 * If we have in-inode extents, then el points to the dinode list and
    340 * eb_bh is NULL. Otherwise, eb_bh should point to the extent block
    341 * containing el.
    342 */
    343int ocfs2_figure_hole_clusters(struct ocfs2_caching_info *ci,
    344			       struct ocfs2_extent_list *el,
    345			       struct buffer_head *eb_bh,
    346			       u32 v_cluster,
    347			       u32 *num_clusters)
    348{
    349	int ret, i;
    350	struct buffer_head *next_eb_bh = NULL;
    351	struct ocfs2_extent_block *eb, *next_eb;
    352
    353	i = ocfs2_search_for_hole_index(el, v_cluster);
    354
    355	if (i == le16_to_cpu(el->l_next_free_rec) && eb_bh) {
    356		eb = (struct ocfs2_extent_block *)eb_bh->b_data;
    357
    358		/*
    359		 * Check the next leaf for any extents.
    360		 */
    361
    362		if (le64_to_cpu(eb->h_next_leaf_blk) == 0ULL)
    363			goto no_more_extents;
    364
    365		ret = ocfs2_read_extent_block(ci,
    366					      le64_to_cpu(eb->h_next_leaf_blk),
    367					      &next_eb_bh);
    368		if (ret) {
    369			mlog_errno(ret);
    370			goto out;
    371		}
    372
    373		next_eb = (struct ocfs2_extent_block *)next_eb_bh->b_data;
    374		el = &next_eb->h_list;
    375		i = ocfs2_search_for_hole_index(el, v_cluster);
    376	}
    377
    378no_more_extents:
    379	if (i == le16_to_cpu(el->l_next_free_rec)) {
    380		/*
    381		 * We're at the end of our existing allocation. Just
    382		 * return the maximum number of clusters we could
    383		 * possibly allocate.
    384		 */
    385		*num_clusters = UINT_MAX - v_cluster;
    386	} else {
    387		*num_clusters = le32_to_cpu(el->l_recs[i].e_cpos) - v_cluster;
    388	}
    389
    390	ret = 0;
    391out:
    392	brelse(next_eb_bh);
    393	return ret;
    394}
    395
    396static int ocfs2_get_clusters_nocache(struct inode *inode,
    397				      struct buffer_head *di_bh,
    398				      u32 v_cluster, unsigned int *hole_len,
    399				      struct ocfs2_extent_rec *ret_rec,
    400				      unsigned int *is_last)
    401{
    402	int i, ret, tree_height, len;
    403	struct ocfs2_dinode *di;
    404	struct ocfs2_extent_block *eb;
    405	struct ocfs2_extent_list *el;
    406	struct ocfs2_extent_rec *rec;
    407	struct buffer_head *eb_bh = NULL;
    408
    409	memset(ret_rec, 0, sizeof(*ret_rec));
    410	if (is_last)
    411		*is_last = 0;
    412
    413	di = (struct ocfs2_dinode *) di_bh->b_data;
    414	el = &di->id2.i_list;
    415	tree_height = le16_to_cpu(el->l_tree_depth);
    416
    417	if (tree_height > 0) {
    418		ret = ocfs2_find_leaf(INODE_CACHE(inode), el, v_cluster,
    419				      &eb_bh);
    420		if (ret) {
    421			mlog_errno(ret);
    422			goto out;
    423		}
    424
    425		eb = (struct ocfs2_extent_block *) eb_bh->b_data;
    426		el = &eb->h_list;
    427
    428		if (el->l_tree_depth) {
    429			ocfs2_error(inode->i_sb,
    430				    "Inode %lu has non zero tree depth in leaf block %llu\n",
    431				    inode->i_ino,
    432				    (unsigned long long)eb_bh->b_blocknr);
    433			ret = -EROFS;
    434			goto out;
    435		}
    436	}
    437
    438	i = ocfs2_search_extent_list(el, v_cluster);
    439	if (i == -1) {
    440		/*
    441		 * Holes can be larger than the maximum size of an
    442		 * extent, so we return their lengths in a separate
    443		 * field.
    444		 */
    445		if (hole_len) {
    446			ret = ocfs2_figure_hole_clusters(INODE_CACHE(inode),
    447							 el, eb_bh,
    448							 v_cluster, &len);
    449			if (ret) {
    450				mlog_errno(ret);
    451				goto out;
    452			}
    453
    454			*hole_len = len;
    455		}
    456		goto out_hole;
    457	}
    458
    459	rec = &el->l_recs[i];
    460
    461	BUG_ON(v_cluster < le32_to_cpu(rec->e_cpos));
    462
    463	if (!rec->e_blkno) {
    464		ocfs2_error(inode->i_sb,
    465			    "Inode %lu has bad extent record (%u, %u, 0)\n",
    466			    inode->i_ino,
    467			    le32_to_cpu(rec->e_cpos),
    468			    ocfs2_rec_clusters(el, rec));
    469		ret = -EROFS;
    470		goto out;
    471	}
    472
    473	*ret_rec = *rec;
    474
    475	/*
    476	 * Checking for last extent is potentially expensive - we
    477	 * might have to look at the next leaf over to see if it's
    478	 * empty.
    479	 *
    480	 * The first two checks are to see whether the caller even
    481	 * cares for this information, and if the extent is at least
    482	 * the last in it's list.
    483	 *
    484	 * If those hold true, then the extent is last if any of the
    485	 * additional conditions hold true:
    486	 *  - Extent list is in-inode
    487	 *  - Extent list is right-most
    488	 *  - Extent list is 2nd to rightmost, with empty right-most
    489	 */
    490	if (is_last) {
    491		if (i == (le16_to_cpu(el->l_next_free_rec) - 1)) {
    492			if (tree_height == 0)
    493				*is_last = 1;
    494			else if (eb->h_blkno == di->i_last_eb_blk)
    495				*is_last = 1;
    496			else if (eb->h_next_leaf_blk == di->i_last_eb_blk) {
    497				ret = ocfs2_last_eb_is_empty(inode, di);
    498				if (ret < 0) {
    499					mlog_errno(ret);
    500					goto out;
    501				}
    502				if (ret == 1)
    503					*is_last = 1;
    504			}
    505		}
    506	}
    507
    508out_hole:
    509	ret = 0;
    510out:
    511	brelse(eb_bh);
    512	return ret;
    513}
    514
    515static void ocfs2_relative_extent_offsets(struct super_block *sb,
    516					  u32 v_cluster,
    517					  struct ocfs2_extent_rec *rec,
    518					  u32 *p_cluster, u32 *num_clusters)
    519
    520{
    521	u32 coff = v_cluster - le32_to_cpu(rec->e_cpos);
    522
    523	*p_cluster = ocfs2_blocks_to_clusters(sb, le64_to_cpu(rec->e_blkno));
    524	*p_cluster = *p_cluster + coff;
    525
    526	if (num_clusters)
    527		*num_clusters = le16_to_cpu(rec->e_leaf_clusters) - coff;
    528}
    529
    530int ocfs2_xattr_get_clusters(struct inode *inode, u32 v_cluster,
    531			     u32 *p_cluster, u32 *num_clusters,
    532			     struct ocfs2_extent_list *el,
    533			     unsigned int *extent_flags)
    534{
    535	int ret = 0, i;
    536	struct buffer_head *eb_bh = NULL;
    537	struct ocfs2_extent_block *eb;
    538	struct ocfs2_extent_rec *rec;
    539	u32 coff;
    540
    541	if (el->l_tree_depth) {
    542		ret = ocfs2_find_leaf(INODE_CACHE(inode), el, v_cluster,
    543				      &eb_bh);
    544		if (ret) {
    545			mlog_errno(ret);
    546			goto out;
    547		}
    548
    549		eb = (struct ocfs2_extent_block *) eb_bh->b_data;
    550		el = &eb->h_list;
    551
    552		if (el->l_tree_depth) {
    553			ocfs2_error(inode->i_sb,
    554				    "Inode %lu has non zero tree depth in xattr leaf block %llu\n",
    555				    inode->i_ino,
    556				    (unsigned long long)eb_bh->b_blocknr);
    557			ret = -EROFS;
    558			goto out;
    559		}
    560	}
    561
    562	i = ocfs2_search_extent_list(el, v_cluster);
    563	if (i == -1) {
    564		ret = -EROFS;
    565		mlog_errno(ret);
    566		goto out;
    567	} else {
    568		rec = &el->l_recs[i];
    569		BUG_ON(v_cluster < le32_to_cpu(rec->e_cpos));
    570
    571		if (!rec->e_blkno) {
    572			ocfs2_error(inode->i_sb,
    573				    "Inode %lu has bad extent record (%u, %u, 0) in xattr\n",
    574				    inode->i_ino,
    575				    le32_to_cpu(rec->e_cpos),
    576				    ocfs2_rec_clusters(el, rec));
    577			ret = -EROFS;
    578			goto out;
    579		}
    580		coff = v_cluster - le32_to_cpu(rec->e_cpos);
    581		*p_cluster = ocfs2_blocks_to_clusters(inode->i_sb,
    582						    le64_to_cpu(rec->e_blkno));
    583		*p_cluster = *p_cluster + coff;
    584		if (num_clusters)
    585			*num_clusters = ocfs2_rec_clusters(el, rec) - coff;
    586
    587		if (extent_flags)
    588			*extent_flags = rec->e_flags;
    589	}
    590out:
    591	brelse(eb_bh);
    592	return ret;
    593}
    594
    595int ocfs2_get_clusters(struct inode *inode, u32 v_cluster,
    596		       u32 *p_cluster, u32 *num_clusters,
    597		       unsigned int *extent_flags)
    598{
    599	int ret;
    600	unsigned int hole_len, flags = 0;
    601	struct buffer_head *di_bh = NULL;
    602	struct ocfs2_extent_rec rec;
    603
    604	if (OCFS2_I(inode)->ip_dyn_features & OCFS2_INLINE_DATA_FL) {
    605		ret = -ERANGE;
    606		mlog_errno(ret);
    607		goto out;
    608	}
    609
    610	ret = ocfs2_extent_map_lookup(inode, v_cluster, p_cluster,
    611				      num_clusters, extent_flags);
    612	if (ret == 0)
    613		goto out;
    614
    615	ret = ocfs2_read_inode_block(inode, &di_bh);
    616	if (ret) {
    617		mlog_errno(ret);
    618		goto out;
    619	}
    620
    621	ret = ocfs2_get_clusters_nocache(inode, di_bh, v_cluster, &hole_len,
    622					 &rec, NULL);
    623	if (ret) {
    624		mlog_errno(ret);
    625		goto out;
    626	}
    627
    628	if (rec.e_blkno == 0ULL) {
    629		/*
    630		 * A hole was found. Return some canned values that
    631		 * callers can key on. If asked for, num_clusters will
    632		 * be populated with the size of the hole.
    633		 */
    634		*p_cluster = 0;
    635		if (num_clusters) {
    636			*num_clusters = hole_len;
    637		}
    638	} else {
    639		ocfs2_relative_extent_offsets(inode->i_sb, v_cluster, &rec,
    640					      p_cluster, num_clusters);
    641		flags = rec.e_flags;
    642
    643		ocfs2_extent_map_insert_rec(inode, &rec);
    644	}
    645
    646	if (extent_flags)
    647		*extent_flags = flags;
    648
    649out:
    650	brelse(di_bh);
    651	return ret;
    652}
    653
    654/*
    655 * This expects alloc_sem to be held. The allocation cannot change at
    656 * all while the map is in the process of being updated.
    657 */
    658int ocfs2_extent_map_get_blocks(struct inode *inode, u64 v_blkno, u64 *p_blkno,
    659				u64 *ret_count, unsigned int *extent_flags)
    660{
    661	int ret;
    662	int bpc = ocfs2_clusters_to_blocks(inode->i_sb, 1);
    663	u32 cpos, num_clusters, p_cluster;
    664	u64 boff = 0;
    665
    666	cpos = ocfs2_blocks_to_clusters(inode->i_sb, v_blkno);
    667
    668	ret = ocfs2_get_clusters(inode, cpos, &p_cluster, &num_clusters,
    669				 extent_flags);
    670	if (ret) {
    671		mlog_errno(ret);
    672		goto out;
    673	}
    674
    675	/*
    676	 * p_cluster == 0 indicates a hole.
    677	 */
    678	if (p_cluster) {
    679		boff = ocfs2_clusters_to_blocks(inode->i_sb, p_cluster);
    680		boff += (v_blkno & (u64)(bpc - 1));
    681	}
    682
    683	*p_blkno = boff;
    684
    685	if (ret_count) {
    686		*ret_count = ocfs2_clusters_to_blocks(inode->i_sb, num_clusters);
    687		*ret_count -= v_blkno & (u64)(bpc - 1);
    688	}
    689
    690out:
    691	return ret;
    692}
    693
    694/*
    695 * The ocfs2_fiemap_inline() may be a little bit misleading, since
    696 * it not only handles the fiemap for inlined files, but also deals
    697 * with the fast symlink, cause they have no difference for extent
    698 * mapping per se.
    699 */
    700static int ocfs2_fiemap_inline(struct inode *inode, struct buffer_head *di_bh,
    701			       struct fiemap_extent_info *fieinfo,
    702			       u64 map_start)
    703{
    704	int ret;
    705	unsigned int id_count;
    706	struct ocfs2_dinode *di;
    707	u64 phys;
    708	u32 flags = FIEMAP_EXTENT_DATA_INLINE|FIEMAP_EXTENT_LAST;
    709	struct ocfs2_inode_info *oi = OCFS2_I(inode);
    710
    711	di = (struct ocfs2_dinode *)di_bh->b_data;
    712	if (ocfs2_inode_is_fast_symlink(inode))
    713		id_count = ocfs2_fast_symlink_chars(inode->i_sb);
    714	else
    715		id_count = le16_to_cpu(di->id2.i_data.id_count);
    716
    717	if (map_start < id_count) {
    718		phys = oi->ip_blkno << inode->i_sb->s_blocksize_bits;
    719		if (ocfs2_inode_is_fast_symlink(inode))
    720			phys += offsetof(struct ocfs2_dinode, id2.i_symlink);
    721		else
    722			phys += offsetof(struct ocfs2_dinode,
    723					 id2.i_data.id_data);
    724
    725		ret = fiemap_fill_next_extent(fieinfo, 0, phys, id_count,
    726					      flags);
    727		if (ret < 0)
    728			return ret;
    729	}
    730
    731	return 0;
    732}
    733
    734int ocfs2_fiemap(struct inode *inode, struct fiemap_extent_info *fieinfo,
    735		 u64 map_start, u64 map_len)
    736{
    737	int ret, is_last;
    738	u32 mapping_end, cpos;
    739	unsigned int hole_size;
    740	struct ocfs2_super *osb = OCFS2_SB(inode->i_sb);
    741	u64 len_bytes, phys_bytes, virt_bytes;
    742	struct buffer_head *di_bh = NULL;
    743	struct ocfs2_extent_rec rec;
    744
    745	ret = fiemap_prep(inode, fieinfo, map_start, &map_len, 0);
    746	if (ret)
    747		return ret;
    748
    749	ret = ocfs2_inode_lock(inode, &di_bh, 0);
    750	if (ret) {
    751		mlog_errno(ret);
    752		goto out;
    753	}
    754
    755	down_read(&OCFS2_I(inode)->ip_alloc_sem);
    756
    757	/*
    758	 * Handle inline-data and fast symlink separately.
    759	 */
    760	if ((OCFS2_I(inode)->ip_dyn_features & OCFS2_INLINE_DATA_FL) ||
    761	    ocfs2_inode_is_fast_symlink(inode)) {
    762		ret = ocfs2_fiemap_inline(inode, di_bh, fieinfo, map_start);
    763		goto out_unlock;
    764	}
    765
    766	cpos = map_start >> osb->s_clustersize_bits;
    767	mapping_end = ocfs2_clusters_for_bytes(inode->i_sb,
    768					       map_start + map_len);
    769	is_last = 0;
    770	while (cpos < mapping_end && !is_last) {
    771		u32 fe_flags;
    772
    773		ret = ocfs2_get_clusters_nocache(inode, di_bh, cpos,
    774						 &hole_size, &rec, &is_last);
    775		if (ret) {
    776			mlog_errno(ret);
    777			goto out_unlock;
    778		}
    779
    780		if (rec.e_blkno == 0ULL) {
    781			cpos += hole_size;
    782			continue;
    783		}
    784
    785		fe_flags = 0;
    786		if (rec.e_flags & OCFS2_EXT_UNWRITTEN)
    787			fe_flags |= FIEMAP_EXTENT_UNWRITTEN;
    788		if (rec.e_flags & OCFS2_EXT_REFCOUNTED)
    789			fe_flags |= FIEMAP_EXTENT_SHARED;
    790		if (is_last)
    791			fe_flags |= FIEMAP_EXTENT_LAST;
    792		len_bytes = (u64)le16_to_cpu(rec.e_leaf_clusters) << osb->s_clustersize_bits;
    793		phys_bytes = le64_to_cpu(rec.e_blkno) << osb->sb->s_blocksize_bits;
    794		virt_bytes = (u64)le32_to_cpu(rec.e_cpos) << osb->s_clustersize_bits;
    795
    796		ret = fiemap_fill_next_extent(fieinfo, virt_bytes, phys_bytes,
    797					      len_bytes, fe_flags);
    798		if (ret)
    799			break;
    800
    801		cpos = le32_to_cpu(rec.e_cpos)+ le16_to_cpu(rec.e_leaf_clusters);
    802	}
    803
    804	if (ret > 0)
    805		ret = 0;
    806
    807out_unlock:
    808	brelse(di_bh);
    809
    810	up_read(&OCFS2_I(inode)->ip_alloc_sem);
    811
    812	ocfs2_inode_unlock(inode, 0);
    813out:
    814
    815	return ret;
    816}
    817
    818/* Is IO overwriting allocated blocks? */
    819int ocfs2_overwrite_io(struct inode *inode, struct buffer_head *di_bh,
    820		       u64 map_start, u64 map_len)
    821{
    822	int ret = 0, is_last;
    823	u32 mapping_end, cpos;
    824	struct ocfs2_super *osb = OCFS2_SB(inode->i_sb);
    825	struct ocfs2_extent_rec rec;
    826
    827	if (OCFS2_I(inode)->ip_dyn_features & OCFS2_INLINE_DATA_FL) {
    828		if (ocfs2_size_fits_inline_data(di_bh, map_start + map_len))
    829			return ret;
    830		else
    831			return -EAGAIN;
    832	}
    833
    834	cpos = map_start >> osb->s_clustersize_bits;
    835	mapping_end = ocfs2_clusters_for_bytes(inode->i_sb,
    836					       map_start + map_len);
    837	is_last = 0;
    838	while (cpos < mapping_end && !is_last) {
    839		ret = ocfs2_get_clusters_nocache(inode, di_bh, cpos,
    840						 NULL, &rec, &is_last);
    841		if (ret) {
    842			mlog_errno(ret);
    843			goto out;
    844		}
    845
    846		if (rec.e_blkno == 0ULL)
    847			break;
    848
    849		if (rec.e_flags & OCFS2_EXT_REFCOUNTED)
    850			break;
    851
    852		cpos = le32_to_cpu(rec.e_cpos) +
    853			le16_to_cpu(rec.e_leaf_clusters);
    854	}
    855
    856	if (cpos < mapping_end)
    857		ret = -EAGAIN;
    858out:
    859	return ret;
    860}
    861
    862int ocfs2_seek_data_hole_offset(struct file *file, loff_t *offset, int whence)
    863{
    864	struct inode *inode = file->f_mapping->host;
    865	int ret;
    866	unsigned int is_last = 0, is_data = 0;
    867	u16 cs_bits = OCFS2_SB(inode->i_sb)->s_clustersize_bits;
    868	u32 cpos, cend, clen, hole_size;
    869	u64 extoff, extlen;
    870	struct buffer_head *di_bh = NULL;
    871	struct ocfs2_extent_rec rec;
    872
    873	BUG_ON(whence != SEEK_DATA && whence != SEEK_HOLE);
    874
    875	ret = ocfs2_inode_lock(inode, &di_bh, 0);
    876	if (ret) {
    877		mlog_errno(ret);
    878		goto out;
    879	}
    880
    881	down_read(&OCFS2_I(inode)->ip_alloc_sem);
    882
    883	if (*offset >= i_size_read(inode)) {
    884		ret = -ENXIO;
    885		goto out_unlock;
    886	}
    887
    888	if (OCFS2_I(inode)->ip_dyn_features & OCFS2_INLINE_DATA_FL) {
    889		if (whence == SEEK_HOLE)
    890			*offset = i_size_read(inode);
    891		goto out_unlock;
    892	}
    893
    894	clen = 0;
    895	cpos = *offset >> cs_bits;
    896	cend = ocfs2_clusters_for_bytes(inode->i_sb, i_size_read(inode));
    897
    898	while (cpos < cend && !is_last) {
    899		ret = ocfs2_get_clusters_nocache(inode, di_bh, cpos, &hole_size,
    900						 &rec, &is_last);
    901		if (ret) {
    902			mlog_errno(ret);
    903			goto out_unlock;
    904		}
    905
    906		extoff = cpos;
    907		extoff <<= cs_bits;
    908
    909		if (rec.e_blkno == 0ULL) {
    910			clen = hole_size;
    911			is_data = 0;
    912		} else {
    913			clen = le16_to_cpu(rec.e_leaf_clusters) -
    914				(cpos - le32_to_cpu(rec.e_cpos));
    915			is_data = (rec.e_flags & OCFS2_EXT_UNWRITTEN) ?  0 : 1;
    916		}
    917
    918		if ((!is_data && whence == SEEK_HOLE) ||
    919		    (is_data && whence == SEEK_DATA)) {
    920			if (extoff > *offset)
    921				*offset = extoff;
    922			goto out_unlock;
    923		}
    924
    925		if (!is_last)
    926			cpos += clen;
    927	}
    928
    929	if (whence == SEEK_HOLE) {
    930		extoff = cpos;
    931		extoff <<= cs_bits;
    932		extlen = clen;
    933		extlen <<=  cs_bits;
    934
    935		if ((extoff + extlen) > i_size_read(inode))
    936			extlen = i_size_read(inode) - extoff;
    937		extoff += extlen;
    938		if (extoff > *offset)
    939			*offset = extoff;
    940		goto out_unlock;
    941	}
    942
    943	ret = -ENXIO;
    944
    945out_unlock:
    946
    947	brelse(di_bh);
    948
    949	up_read(&OCFS2_I(inode)->ip_alloc_sem);
    950
    951	ocfs2_inode_unlock(inode, 0);
    952out:
    953	return ret;
    954}
    955
    956int ocfs2_read_virt_blocks(struct inode *inode, u64 v_block, int nr,
    957			   struct buffer_head *bhs[], int flags,
    958			   int (*validate)(struct super_block *sb,
    959					   struct buffer_head *bh))
    960{
    961	int rc = 0;
    962	u64 p_block, p_count;
    963	int i, count, done = 0;
    964
    965	trace_ocfs2_read_virt_blocks(
    966	     inode, (unsigned long long)v_block, nr, bhs, flags,
    967	     validate);
    968
    969	if (((v_block + nr - 1) << inode->i_sb->s_blocksize_bits) >=
    970	    i_size_read(inode)) {
    971		BUG_ON(!(flags & OCFS2_BH_READAHEAD));
    972		goto out;
    973	}
    974
    975	while (done < nr) {
    976		down_read(&OCFS2_I(inode)->ip_alloc_sem);
    977		rc = ocfs2_extent_map_get_blocks(inode, v_block + done,
    978						 &p_block, &p_count, NULL);
    979		up_read(&OCFS2_I(inode)->ip_alloc_sem);
    980		if (rc) {
    981			mlog_errno(rc);
    982			break;
    983		}
    984
    985		if (!p_block) {
    986			rc = -EIO;
    987			mlog(ML_ERROR,
    988			     "Inode #%llu contains a hole at offset %llu\n",
    989			     (unsigned long long)OCFS2_I(inode)->ip_blkno,
    990			     (unsigned long long)(v_block + done) <<
    991			     inode->i_sb->s_blocksize_bits);
    992			break;
    993		}
    994
    995		count = nr - done;
    996		if (p_count < count)
    997			count = p_count;
    998
    999		/*
   1000		 * If the caller passed us bhs, they should have come
   1001		 * from a previous readahead call to this function.  Thus,
   1002		 * they should have the right b_blocknr.
   1003		 */
   1004		for (i = 0; i < count; i++) {
   1005			if (!bhs[done + i])
   1006				continue;
   1007			BUG_ON(bhs[done + i]->b_blocknr != (p_block + i));
   1008		}
   1009
   1010		rc = ocfs2_read_blocks(INODE_CACHE(inode), p_block, count,
   1011				       bhs + done, flags, validate);
   1012		if (rc) {
   1013			mlog_errno(rc);
   1014			break;
   1015		}
   1016		done += count;
   1017	}
   1018
   1019out:
   1020	return rc;
   1021}
   1022
   1023