cachepc-linux

Fork of AMDESE/linux with modifications for CachePC side-channel attack
git clone https://git.sinitax.com/sinitax/cachepc-linux
Log | Files | Refs | README | LICENSE | sfeed.txt

xattr.c (197555B)


      1// SPDX-License-Identifier: GPL-2.0-only
      2/*
      3 * xattr.c
      4 *
      5 * Copyright (C) 2004, 2008 Oracle.  All rights reserved.
      6 *
      7 * CREDITS:
      8 * Lots of code in this file is copy from linux/fs/ext3/xattr.c.
      9 * Copyright (C) 2001-2003 Andreas Gruenbacher, <agruen@suse.de>
     10 */
     11
     12#include <linux/capability.h>
     13#include <linux/fs.h>
     14#include <linux/types.h>
     15#include <linux/slab.h>
     16#include <linux/highmem.h>
     17#include <linux/pagemap.h>
     18#include <linux/uio.h>
     19#include <linux/sched.h>
     20#include <linux/splice.h>
     21#include <linux/mount.h>
     22#include <linux/writeback.h>
     23#include <linux/falloc.h>
     24#include <linux/sort.h>
     25#include <linux/init.h>
     26#include <linux/module.h>
     27#include <linux/string.h>
     28#include <linux/security.h>
     29
     30#include <cluster/masklog.h>
     31
     32#include "ocfs2.h"
     33#include "alloc.h"
     34#include "blockcheck.h"
     35#include "dlmglue.h"
     36#include "file.h"
     37#include "symlink.h"
     38#include "sysfile.h"
     39#include "inode.h"
     40#include "journal.h"
     41#include "ocfs2_fs.h"
     42#include "suballoc.h"
     43#include "uptodate.h"
     44#include "buffer_head_io.h"
     45#include "super.h"
     46#include "xattr.h"
     47#include "refcounttree.h"
     48#include "acl.h"
     49#include "ocfs2_trace.h"
     50
     51struct ocfs2_xattr_def_value_root {
     52	struct ocfs2_xattr_value_root	xv;
     53	struct ocfs2_extent_rec		er;
     54};
     55
     56struct ocfs2_xattr_bucket {
     57	/* The inode these xattrs are associated with */
     58	struct inode *bu_inode;
     59
     60	/* The actual buffers that make up the bucket */
     61	struct buffer_head *bu_bhs[OCFS2_XATTR_MAX_BLOCKS_PER_BUCKET];
     62
     63	/* How many blocks make up one bucket for this filesystem */
     64	int bu_blocks;
     65};
     66
     67struct ocfs2_xattr_set_ctxt {
     68	handle_t *handle;
     69	struct ocfs2_alloc_context *meta_ac;
     70	struct ocfs2_alloc_context *data_ac;
     71	struct ocfs2_cached_dealloc_ctxt dealloc;
     72	int set_abort;
     73};
     74
     75#define OCFS2_XATTR_ROOT_SIZE	(sizeof(struct ocfs2_xattr_def_value_root))
     76#define OCFS2_XATTR_INLINE_SIZE	80
     77#define OCFS2_XATTR_HEADER_GAP	4
     78#define OCFS2_XATTR_FREE_IN_IBODY	(OCFS2_MIN_XATTR_INLINE_SIZE \
     79					 - sizeof(struct ocfs2_xattr_header) \
     80					 - OCFS2_XATTR_HEADER_GAP)
     81#define OCFS2_XATTR_FREE_IN_BLOCK(ptr)	((ptr)->i_sb->s_blocksize \
     82					 - sizeof(struct ocfs2_xattr_block) \
     83					 - sizeof(struct ocfs2_xattr_header) \
     84					 - OCFS2_XATTR_HEADER_GAP)
     85
     86static struct ocfs2_xattr_def_value_root def_xv = {
     87	.xv.xr_list.l_count = cpu_to_le16(1),
     88};
     89
     90const struct xattr_handler *ocfs2_xattr_handlers[] = {
     91	&ocfs2_xattr_user_handler,
     92	&posix_acl_access_xattr_handler,
     93	&posix_acl_default_xattr_handler,
     94	&ocfs2_xattr_trusted_handler,
     95	&ocfs2_xattr_security_handler,
     96	NULL
     97};
     98
     99static const struct xattr_handler *ocfs2_xattr_handler_map[OCFS2_XATTR_MAX] = {
    100	[OCFS2_XATTR_INDEX_USER]	= &ocfs2_xattr_user_handler,
    101	[OCFS2_XATTR_INDEX_POSIX_ACL_ACCESS]
    102					= &posix_acl_access_xattr_handler,
    103	[OCFS2_XATTR_INDEX_POSIX_ACL_DEFAULT]
    104					= &posix_acl_default_xattr_handler,
    105	[OCFS2_XATTR_INDEX_TRUSTED]	= &ocfs2_xattr_trusted_handler,
    106	[OCFS2_XATTR_INDEX_SECURITY]	= &ocfs2_xattr_security_handler,
    107};
    108
    109struct ocfs2_xattr_info {
    110	int		xi_name_index;
    111	const char	*xi_name;
    112	int		xi_name_len;
    113	const void	*xi_value;
    114	size_t		xi_value_len;
    115};
    116
    117struct ocfs2_xattr_search {
    118	struct buffer_head *inode_bh;
    119	/*
    120	 * xattr_bh point to the block buffer head which has extended attribute
    121	 * when extended attribute in inode, xattr_bh is equal to inode_bh.
    122	 */
    123	struct buffer_head *xattr_bh;
    124	struct ocfs2_xattr_header *header;
    125	struct ocfs2_xattr_bucket *bucket;
    126	void *base;
    127	void *end;
    128	struct ocfs2_xattr_entry *here;
    129	int not_found;
    130};
    131
    132/* Operations on struct ocfs2_xa_entry */
    133struct ocfs2_xa_loc;
    134struct ocfs2_xa_loc_operations {
    135	/*
    136	 * Journal functions
    137	 */
    138	int (*xlo_journal_access)(handle_t *handle, struct ocfs2_xa_loc *loc,
    139				  int type);
    140	void (*xlo_journal_dirty)(handle_t *handle, struct ocfs2_xa_loc *loc);
    141
    142	/*
    143	 * Return a pointer to the appropriate buffer in loc->xl_storage
    144	 * at the given offset from loc->xl_header.
    145	 */
    146	void *(*xlo_offset_pointer)(struct ocfs2_xa_loc *loc, int offset);
    147
    148	/* Can we reuse the existing entry for the new value? */
    149	int (*xlo_can_reuse)(struct ocfs2_xa_loc *loc,
    150			     struct ocfs2_xattr_info *xi);
    151
    152	/* How much space is needed for the new value? */
    153	int (*xlo_check_space)(struct ocfs2_xa_loc *loc,
    154			       struct ocfs2_xattr_info *xi);
    155
    156	/*
    157	 * Return the offset of the first name+value pair.  This is
    158	 * the start of our downward-filling free space.
    159	 */
    160	int (*xlo_get_free_start)(struct ocfs2_xa_loc *loc);
    161
    162	/*
    163	 * Remove the name+value at this location.  Do whatever is
    164	 * appropriate with the remaining name+value pairs.
    165	 */
    166	void (*xlo_wipe_namevalue)(struct ocfs2_xa_loc *loc);
    167
    168	/* Fill xl_entry with a new entry */
    169	void (*xlo_add_entry)(struct ocfs2_xa_loc *loc, u32 name_hash);
    170
    171	/* Add name+value storage to an entry */
    172	void (*xlo_add_namevalue)(struct ocfs2_xa_loc *loc, int size);
    173
    174	/*
    175	 * Initialize the value buf's access and bh fields for this entry.
    176	 * ocfs2_xa_fill_value_buf() will handle the xv pointer.
    177	 */
    178	void (*xlo_fill_value_buf)(struct ocfs2_xa_loc *loc,
    179				   struct ocfs2_xattr_value_buf *vb);
    180};
    181
    182/*
    183 * Describes an xattr entry location.  This is a memory structure
    184 * tracking the on-disk structure.
    185 */
    186struct ocfs2_xa_loc {
    187	/* This xattr belongs to this inode */
    188	struct inode *xl_inode;
    189
    190	/* The ocfs2_xattr_header inside the on-disk storage. Not NULL. */
    191	struct ocfs2_xattr_header *xl_header;
    192
    193	/* Bytes from xl_header to the end of the storage */
    194	int xl_size;
    195
    196	/*
    197	 * The ocfs2_xattr_entry this location describes.  If this is
    198	 * NULL, this location describes the on-disk structure where it
    199	 * would have been.
    200	 */
    201	struct ocfs2_xattr_entry *xl_entry;
    202
    203	/*
    204	 * Internal housekeeping
    205	 */
    206
    207	/* Buffer(s) containing this entry */
    208	void *xl_storage;
    209
    210	/* Operations on the storage backing this location */
    211	const struct ocfs2_xa_loc_operations *xl_ops;
    212};
    213
    214/*
    215 * Convenience functions to calculate how much space is needed for a
    216 * given name+value pair
    217 */
    218static int namevalue_size(int name_len, uint64_t value_len)
    219{
    220	if (value_len > OCFS2_XATTR_INLINE_SIZE)
    221		return OCFS2_XATTR_SIZE(name_len) + OCFS2_XATTR_ROOT_SIZE;
    222	else
    223		return OCFS2_XATTR_SIZE(name_len) + OCFS2_XATTR_SIZE(value_len);
    224}
    225
    226static int namevalue_size_xi(struct ocfs2_xattr_info *xi)
    227{
    228	return namevalue_size(xi->xi_name_len, xi->xi_value_len);
    229}
    230
    231static int namevalue_size_xe(struct ocfs2_xattr_entry *xe)
    232{
    233	u64 value_len = le64_to_cpu(xe->xe_value_size);
    234
    235	BUG_ON((value_len > OCFS2_XATTR_INLINE_SIZE) &&
    236	       ocfs2_xattr_is_local(xe));
    237	return namevalue_size(xe->xe_name_len, value_len);
    238}
    239
    240
    241static int ocfs2_xattr_bucket_get_name_value(struct super_block *sb,
    242					     struct ocfs2_xattr_header *xh,
    243					     int index,
    244					     int *block_off,
    245					     int *new_offset);
    246
    247static int ocfs2_xattr_block_find(struct inode *inode,
    248				  int name_index,
    249				  const char *name,
    250				  struct ocfs2_xattr_search *xs);
    251static int ocfs2_xattr_index_block_find(struct inode *inode,
    252					struct buffer_head *root_bh,
    253					int name_index,
    254					const char *name,
    255					struct ocfs2_xattr_search *xs);
    256
    257static int ocfs2_xattr_tree_list_index_block(struct inode *inode,
    258					struct buffer_head *blk_bh,
    259					char *buffer,
    260					size_t buffer_size);
    261
    262static int ocfs2_xattr_create_index_block(struct inode *inode,
    263					  struct ocfs2_xattr_search *xs,
    264					  struct ocfs2_xattr_set_ctxt *ctxt);
    265
    266static int ocfs2_xattr_set_entry_index_block(struct inode *inode,
    267					     struct ocfs2_xattr_info *xi,
    268					     struct ocfs2_xattr_search *xs,
    269					     struct ocfs2_xattr_set_ctxt *ctxt);
    270
    271typedef int (xattr_tree_rec_func)(struct inode *inode,
    272				  struct buffer_head *root_bh,
    273				  u64 blkno, u32 cpos, u32 len, void *para);
    274static int ocfs2_iterate_xattr_index_block(struct inode *inode,
    275					   struct buffer_head *root_bh,
    276					   xattr_tree_rec_func *rec_func,
    277					   void *para);
    278static int ocfs2_delete_xattr_in_bucket(struct inode *inode,
    279					struct ocfs2_xattr_bucket *bucket,
    280					void *para);
    281static int ocfs2_rm_xattr_cluster(struct inode *inode,
    282				  struct buffer_head *root_bh,
    283				  u64 blkno,
    284				  u32 cpos,
    285				  u32 len,
    286				  void *para);
    287
    288static int ocfs2_mv_xattr_buckets(struct inode *inode, handle_t *handle,
    289				  u64 src_blk, u64 last_blk, u64 to_blk,
    290				  unsigned int start_bucket,
    291				  u32 *first_hash);
    292static int ocfs2_prepare_refcount_xattr(struct inode *inode,
    293					struct ocfs2_dinode *di,
    294					struct ocfs2_xattr_info *xi,
    295					struct ocfs2_xattr_search *xis,
    296					struct ocfs2_xattr_search *xbs,
    297					struct ocfs2_refcount_tree **ref_tree,
    298					int *meta_need,
    299					int *credits);
    300static int ocfs2_get_xattr_tree_value_root(struct super_block *sb,
    301					   struct ocfs2_xattr_bucket *bucket,
    302					   int offset,
    303					   struct ocfs2_xattr_value_root **xv,
    304					   struct buffer_head **bh);
    305
    306static inline u16 ocfs2_xattr_buckets_per_cluster(struct ocfs2_super *osb)
    307{
    308	return (1 << osb->s_clustersize_bits) / OCFS2_XATTR_BUCKET_SIZE;
    309}
    310
    311static inline u16 ocfs2_blocks_per_xattr_bucket(struct super_block *sb)
    312{
    313	return OCFS2_XATTR_BUCKET_SIZE / (1 << sb->s_blocksize_bits);
    314}
    315
    316#define bucket_blkno(_b) ((_b)->bu_bhs[0]->b_blocknr)
    317#define bucket_block(_b, _n) ((_b)->bu_bhs[(_n)]->b_data)
    318#define bucket_xh(_b) ((struct ocfs2_xattr_header *)bucket_block((_b), 0))
    319
    320static struct ocfs2_xattr_bucket *ocfs2_xattr_bucket_new(struct inode *inode)
    321{
    322	struct ocfs2_xattr_bucket *bucket;
    323	int blks = ocfs2_blocks_per_xattr_bucket(inode->i_sb);
    324
    325	BUG_ON(blks > OCFS2_XATTR_MAX_BLOCKS_PER_BUCKET);
    326
    327	bucket = kzalloc(sizeof(struct ocfs2_xattr_bucket), GFP_NOFS);
    328	if (bucket) {
    329		bucket->bu_inode = inode;
    330		bucket->bu_blocks = blks;
    331	}
    332
    333	return bucket;
    334}
    335
    336static void ocfs2_xattr_bucket_relse(struct ocfs2_xattr_bucket *bucket)
    337{
    338	int i;
    339
    340	for (i = 0; i < bucket->bu_blocks; i++) {
    341		brelse(bucket->bu_bhs[i]);
    342		bucket->bu_bhs[i] = NULL;
    343	}
    344}
    345
    346static void ocfs2_xattr_bucket_free(struct ocfs2_xattr_bucket *bucket)
    347{
    348	if (bucket) {
    349		ocfs2_xattr_bucket_relse(bucket);
    350		bucket->bu_inode = NULL;
    351		kfree(bucket);
    352	}
    353}
    354
    355/*
    356 * A bucket that has never been written to disk doesn't need to be
    357 * read.  We just need the buffer_heads.  Don't call this for
    358 * buckets that are already on disk.  ocfs2_read_xattr_bucket() initializes
    359 * them fully.
    360 */
    361static int ocfs2_init_xattr_bucket(struct ocfs2_xattr_bucket *bucket,
    362				   u64 xb_blkno, int new)
    363{
    364	int i, rc = 0;
    365
    366	for (i = 0; i < bucket->bu_blocks; i++) {
    367		bucket->bu_bhs[i] = sb_getblk(bucket->bu_inode->i_sb,
    368					      xb_blkno + i);
    369		if (!bucket->bu_bhs[i]) {
    370			rc = -ENOMEM;
    371			mlog_errno(rc);
    372			break;
    373		}
    374
    375		if (!ocfs2_buffer_uptodate(INODE_CACHE(bucket->bu_inode),
    376					   bucket->bu_bhs[i])) {
    377			if (new)
    378				ocfs2_set_new_buffer_uptodate(INODE_CACHE(bucket->bu_inode),
    379							      bucket->bu_bhs[i]);
    380			else {
    381				set_buffer_uptodate(bucket->bu_bhs[i]);
    382				ocfs2_set_buffer_uptodate(INODE_CACHE(bucket->bu_inode),
    383							  bucket->bu_bhs[i]);
    384			}
    385		}
    386	}
    387
    388	if (rc)
    389		ocfs2_xattr_bucket_relse(bucket);
    390	return rc;
    391}
    392
    393/* Read the xattr bucket at xb_blkno */
    394static int ocfs2_read_xattr_bucket(struct ocfs2_xattr_bucket *bucket,
    395				   u64 xb_blkno)
    396{
    397	int rc;
    398
    399	rc = ocfs2_read_blocks(INODE_CACHE(bucket->bu_inode), xb_blkno,
    400			       bucket->bu_blocks, bucket->bu_bhs, 0,
    401			       NULL);
    402	if (!rc) {
    403		spin_lock(&OCFS2_SB(bucket->bu_inode->i_sb)->osb_xattr_lock);
    404		rc = ocfs2_validate_meta_ecc_bhs(bucket->bu_inode->i_sb,
    405						 bucket->bu_bhs,
    406						 bucket->bu_blocks,
    407						 &bucket_xh(bucket)->xh_check);
    408		spin_unlock(&OCFS2_SB(bucket->bu_inode->i_sb)->osb_xattr_lock);
    409		if (rc)
    410			mlog_errno(rc);
    411	}
    412
    413	if (rc)
    414		ocfs2_xattr_bucket_relse(bucket);
    415	return rc;
    416}
    417
    418static int ocfs2_xattr_bucket_journal_access(handle_t *handle,
    419					     struct ocfs2_xattr_bucket *bucket,
    420					     int type)
    421{
    422	int i, rc = 0;
    423
    424	for (i = 0; i < bucket->bu_blocks; i++) {
    425		rc = ocfs2_journal_access(handle,
    426					  INODE_CACHE(bucket->bu_inode),
    427					  bucket->bu_bhs[i], type);
    428		if (rc) {
    429			mlog_errno(rc);
    430			break;
    431		}
    432	}
    433
    434	return rc;
    435}
    436
    437static void ocfs2_xattr_bucket_journal_dirty(handle_t *handle,
    438					     struct ocfs2_xattr_bucket *bucket)
    439{
    440	int i;
    441
    442	spin_lock(&OCFS2_SB(bucket->bu_inode->i_sb)->osb_xattr_lock);
    443	ocfs2_compute_meta_ecc_bhs(bucket->bu_inode->i_sb,
    444				   bucket->bu_bhs, bucket->bu_blocks,
    445				   &bucket_xh(bucket)->xh_check);
    446	spin_unlock(&OCFS2_SB(bucket->bu_inode->i_sb)->osb_xattr_lock);
    447
    448	for (i = 0; i < bucket->bu_blocks; i++)
    449		ocfs2_journal_dirty(handle, bucket->bu_bhs[i]);
    450}
    451
    452static void ocfs2_xattr_bucket_copy_data(struct ocfs2_xattr_bucket *dest,
    453					 struct ocfs2_xattr_bucket *src)
    454{
    455	int i;
    456	int blocksize = src->bu_inode->i_sb->s_blocksize;
    457
    458	BUG_ON(dest->bu_blocks != src->bu_blocks);
    459	BUG_ON(dest->bu_inode != src->bu_inode);
    460
    461	for (i = 0; i < src->bu_blocks; i++) {
    462		memcpy(bucket_block(dest, i), bucket_block(src, i),
    463		       blocksize);
    464	}
    465}
    466
    467static int ocfs2_validate_xattr_block(struct super_block *sb,
    468				      struct buffer_head *bh)
    469{
    470	int rc;
    471	struct ocfs2_xattr_block *xb =
    472		(struct ocfs2_xattr_block *)bh->b_data;
    473
    474	trace_ocfs2_validate_xattr_block((unsigned long long)bh->b_blocknr);
    475
    476	BUG_ON(!buffer_uptodate(bh));
    477
    478	/*
    479	 * If the ecc fails, we return the error but otherwise
    480	 * leave the filesystem running.  We know any error is
    481	 * local to this block.
    482	 */
    483	rc = ocfs2_validate_meta_ecc(sb, bh->b_data, &xb->xb_check);
    484	if (rc)
    485		return rc;
    486
    487	/*
    488	 * Errors after here are fatal
    489	 */
    490
    491	if (!OCFS2_IS_VALID_XATTR_BLOCK(xb)) {
    492		return ocfs2_error(sb,
    493				   "Extended attribute block #%llu has bad signature %.*s\n",
    494				   (unsigned long long)bh->b_blocknr, 7,
    495				   xb->xb_signature);
    496	}
    497
    498	if (le64_to_cpu(xb->xb_blkno) != bh->b_blocknr) {
    499		return ocfs2_error(sb,
    500				   "Extended attribute block #%llu has an invalid xb_blkno of %llu\n",
    501				   (unsigned long long)bh->b_blocknr,
    502				   (unsigned long long)le64_to_cpu(xb->xb_blkno));
    503	}
    504
    505	if (le32_to_cpu(xb->xb_fs_generation) != OCFS2_SB(sb)->fs_generation) {
    506		return ocfs2_error(sb,
    507				   "Extended attribute block #%llu has an invalid xb_fs_generation of #%u\n",
    508				   (unsigned long long)bh->b_blocknr,
    509				   le32_to_cpu(xb->xb_fs_generation));
    510	}
    511
    512	return 0;
    513}
    514
    515static int ocfs2_read_xattr_block(struct inode *inode, u64 xb_blkno,
    516				  struct buffer_head **bh)
    517{
    518	int rc;
    519	struct buffer_head *tmp = *bh;
    520
    521	rc = ocfs2_read_block(INODE_CACHE(inode), xb_blkno, &tmp,
    522			      ocfs2_validate_xattr_block);
    523
    524	/* If ocfs2_read_block() got us a new bh, pass it up. */
    525	if (!rc && !*bh)
    526		*bh = tmp;
    527
    528	return rc;
    529}
    530
    531static inline const char *ocfs2_xattr_prefix(int name_index)
    532{
    533	const struct xattr_handler *handler = NULL;
    534
    535	if (name_index > 0 && name_index < OCFS2_XATTR_MAX)
    536		handler = ocfs2_xattr_handler_map[name_index];
    537	return handler ? xattr_prefix(handler) : NULL;
    538}
    539
    540static u32 ocfs2_xattr_name_hash(struct inode *inode,
    541				 const char *name,
    542				 int name_len)
    543{
    544	/* Get hash value of uuid from super block */
    545	u32 hash = OCFS2_SB(inode->i_sb)->uuid_hash;
    546	int i;
    547
    548	/* hash extended attribute name */
    549	for (i = 0; i < name_len; i++) {
    550		hash = (hash << OCFS2_HASH_SHIFT) ^
    551		       (hash >> (8*sizeof(hash) - OCFS2_HASH_SHIFT)) ^
    552		       *name++;
    553	}
    554
    555	return hash;
    556}
    557
    558static int ocfs2_xattr_entry_real_size(int name_len, size_t value_len)
    559{
    560	return namevalue_size(name_len, value_len) +
    561		sizeof(struct ocfs2_xattr_entry);
    562}
    563
    564static int ocfs2_xi_entry_usage(struct ocfs2_xattr_info *xi)
    565{
    566	return namevalue_size_xi(xi) +
    567		sizeof(struct ocfs2_xattr_entry);
    568}
    569
    570static int ocfs2_xe_entry_usage(struct ocfs2_xattr_entry *xe)
    571{
    572	return namevalue_size_xe(xe) +
    573		sizeof(struct ocfs2_xattr_entry);
    574}
    575
    576int ocfs2_calc_security_init(struct inode *dir,
    577			     struct ocfs2_security_xattr_info *si,
    578			     int *want_clusters,
    579			     int *xattr_credits,
    580			     struct ocfs2_alloc_context **xattr_ac)
    581{
    582	int ret = 0;
    583	struct ocfs2_super *osb = OCFS2_SB(dir->i_sb);
    584	int s_size = ocfs2_xattr_entry_real_size(strlen(si->name),
    585						 si->value_len);
    586
    587	/*
    588	 * The max space of security xattr taken inline is
    589	 * 256(name) + 80(value) + 16(entry) = 352 bytes,
    590	 * So reserve one metadata block for it is ok.
    591	 */
    592	if (dir->i_sb->s_blocksize == OCFS2_MIN_BLOCKSIZE ||
    593	    s_size > OCFS2_XATTR_FREE_IN_IBODY) {
    594		ret = ocfs2_reserve_new_metadata_blocks(osb, 1, xattr_ac);
    595		if (ret) {
    596			mlog_errno(ret);
    597			return ret;
    598		}
    599		*xattr_credits += OCFS2_XATTR_BLOCK_CREATE_CREDITS;
    600	}
    601
    602	/* reserve clusters for xattr value which will be set in B tree*/
    603	if (si->value_len > OCFS2_XATTR_INLINE_SIZE) {
    604		int new_clusters = ocfs2_clusters_for_bytes(dir->i_sb,
    605							    si->value_len);
    606
    607		*xattr_credits += ocfs2_clusters_to_blocks(dir->i_sb,
    608							   new_clusters);
    609		*want_clusters += new_clusters;
    610	}
    611	return ret;
    612}
    613
    614int ocfs2_calc_xattr_init(struct inode *dir,
    615			  struct buffer_head *dir_bh,
    616			  umode_t mode,
    617			  struct ocfs2_security_xattr_info *si,
    618			  int *want_clusters,
    619			  int *xattr_credits,
    620			  int *want_meta)
    621{
    622	int ret = 0;
    623	struct ocfs2_super *osb = OCFS2_SB(dir->i_sb);
    624	int s_size = 0, a_size = 0, acl_len = 0, new_clusters;
    625
    626	if (si->enable)
    627		s_size = ocfs2_xattr_entry_real_size(strlen(si->name),
    628						     si->value_len);
    629
    630	if (osb->s_mount_opt & OCFS2_MOUNT_POSIX_ACL) {
    631		down_read(&OCFS2_I(dir)->ip_xattr_sem);
    632		acl_len = ocfs2_xattr_get_nolock(dir, dir_bh,
    633					OCFS2_XATTR_INDEX_POSIX_ACL_DEFAULT,
    634					"", NULL, 0);
    635		up_read(&OCFS2_I(dir)->ip_xattr_sem);
    636		if (acl_len > 0) {
    637			a_size = ocfs2_xattr_entry_real_size(0, acl_len);
    638			if (S_ISDIR(mode))
    639				a_size <<= 1;
    640		} else if (acl_len != 0 && acl_len != -ENODATA) {
    641			ret = acl_len;
    642			mlog_errno(ret);
    643			return ret;
    644		}
    645	}
    646
    647	if (!(s_size + a_size))
    648		return ret;
    649
    650	/*
    651	 * The max space of security xattr taken inline is
    652	 * 256(name) + 80(value) + 16(entry) = 352 bytes,
    653	 * The max space of acl xattr taken inline is
    654	 * 80(value) + 16(entry) * 2(if directory) = 192 bytes,
    655	 * when blocksize = 512, may reserve one more cluser for
    656	 * xattr bucket, otherwise reserve one metadata block
    657	 * for them is ok.
    658	 * If this is a new directory with inline data,
    659	 * we choose to reserve the entire inline area for
    660	 * directory contents and force an external xattr block.
    661	 */
    662	if (dir->i_sb->s_blocksize == OCFS2_MIN_BLOCKSIZE ||
    663	    (S_ISDIR(mode) && ocfs2_supports_inline_data(osb)) ||
    664	    (s_size + a_size) > OCFS2_XATTR_FREE_IN_IBODY) {
    665		*want_meta = *want_meta + 1;
    666		*xattr_credits += OCFS2_XATTR_BLOCK_CREATE_CREDITS;
    667	}
    668
    669	if (dir->i_sb->s_blocksize == OCFS2_MIN_BLOCKSIZE &&
    670	    (s_size + a_size) > OCFS2_XATTR_FREE_IN_BLOCK(dir)) {
    671		*want_clusters += 1;
    672		*xattr_credits += ocfs2_blocks_per_xattr_bucket(dir->i_sb);
    673	}
    674
    675	/*
    676	 * reserve credits and clusters for xattrs which has large value
    677	 * and have to be set outside
    678	 */
    679	if (si->enable && si->value_len > OCFS2_XATTR_INLINE_SIZE) {
    680		new_clusters = ocfs2_clusters_for_bytes(dir->i_sb,
    681							si->value_len);
    682		*xattr_credits += ocfs2_clusters_to_blocks(dir->i_sb,
    683							   new_clusters);
    684		*want_clusters += new_clusters;
    685	}
    686	if (osb->s_mount_opt & OCFS2_MOUNT_POSIX_ACL &&
    687	    acl_len > OCFS2_XATTR_INLINE_SIZE) {
    688		/* for directory, it has DEFAULT and ACCESS two types of acls */
    689		new_clusters = (S_ISDIR(mode) ? 2 : 1) *
    690				ocfs2_clusters_for_bytes(dir->i_sb, acl_len);
    691		*xattr_credits += ocfs2_clusters_to_blocks(dir->i_sb,
    692							   new_clusters);
    693		*want_clusters += new_clusters;
    694	}
    695
    696	return ret;
    697}
    698
    699static int ocfs2_xattr_extend_allocation(struct inode *inode,
    700					 u32 clusters_to_add,
    701					 struct ocfs2_xattr_value_buf *vb,
    702					 struct ocfs2_xattr_set_ctxt *ctxt)
    703{
    704	int status = 0, credits;
    705	handle_t *handle = ctxt->handle;
    706	enum ocfs2_alloc_restarted why;
    707	u32 prev_clusters, logical_start = le32_to_cpu(vb->vb_xv->xr_clusters);
    708	struct ocfs2_extent_tree et;
    709
    710	ocfs2_init_xattr_value_extent_tree(&et, INODE_CACHE(inode), vb);
    711
    712	while (clusters_to_add) {
    713		trace_ocfs2_xattr_extend_allocation(clusters_to_add);
    714
    715		status = vb->vb_access(handle, INODE_CACHE(inode), vb->vb_bh,
    716				       OCFS2_JOURNAL_ACCESS_WRITE);
    717		if (status < 0) {
    718			mlog_errno(status);
    719			break;
    720		}
    721
    722		prev_clusters = le32_to_cpu(vb->vb_xv->xr_clusters);
    723		status = ocfs2_add_clusters_in_btree(handle,
    724						     &et,
    725						     &logical_start,
    726						     clusters_to_add,
    727						     0,
    728						     ctxt->data_ac,
    729						     ctxt->meta_ac,
    730						     &why);
    731		if ((status < 0) && (status != -EAGAIN)) {
    732			if (status != -ENOSPC)
    733				mlog_errno(status);
    734			break;
    735		}
    736
    737		ocfs2_journal_dirty(handle, vb->vb_bh);
    738
    739		clusters_to_add -= le32_to_cpu(vb->vb_xv->xr_clusters) -
    740					 prev_clusters;
    741
    742		if (why != RESTART_NONE && clusters_to_add) {
    743			/*
    744			 * We can only fail in case the alloc file doesn't give
    745			 * up enough clusters.
    746			 */
    747			BUG_ON(why == RESTART_META);
    748
    749			credits = ocfs2_calc_extend_credits(inode->i_sb,
    750							    &vb->vb_xv->xr_list);
    751			status = ocfs2_extend_trans(handle, credits);
    752			if (status < 0) {
    753				status = -ENOMEM;
    754				mlog_errno(status);
    755				break;
    756			}
    757		}
    758	}
    759
    760	return status;
    761}
    762
    763static int __ocfs2_remove_xattr_range(struct inode *inode,
    764				      struct ocfs2_xattr_value_buf *vb,
    765				      u32 cpos, u32 phys_cpos, u32 len,
    766				      unsigned int ext_flags,
    767				      struct ocfs2_xattr_set_ctxt *ctxt)
    768{
    769	int ret;
    770	u64 phys_blkno = ocfs2_clusters_to_blocks(inode->i_sb, phys_cpos);
    771	handle_t *handle = ctxt->handle;
    772	struct ocfs2_extent_tree et;
    773
    774	ocfs2_init_xattr_value_extent_tree(&et, INODE_CACHE(inode), vb);
    775
    776	ret = vb->vb_access(handle, INODE_CACHE(inode), vb->vb_bh,
    777			    OCFS2_JOURNAL_ACCESS_WRITE);
    778	if (ret) {
    779		mlog_errno(ret);
    780		goto out;
    781	}
    782
    783	ret = ocfs2_remove_extent(handle, &et, cpos, len, ctxt->meta_ac,
    784				  &ctxt->dealloc);
    785	if (ret) {
    786		mlog_errno(ret);
    787		goto out;
    788	}
    789
    790	le32_add_cpu(&vb->vb_xv->xr_clusters, -len);
    791	ocfs2_journal_dirty(handle, vb->vb_bh);
    792
    793	if (ext_flags & OCFS2_EXT_REFCOUNTED)
    794		ret = ocfs2_decrease_refcount(inode, handle,
    795					ocfs2_blocks_to_clusters(inode->i_sb,
    796								 phys_blkno),
    797					len, ctxt->meta_ac, &ctxt->dealloc, 1);
    798	else
    799		ret = ocfs2_cache_cluster_dealloc(&ctxt->dealloc,
    800						  phys_blkno, len);
    801	if (ret)
    802		mlog_errno(ret);
    803
    804out:
    805	return ret;
    806}
    807
    808static int ocfs2_xattr_shrink_size(struct inode *inode,
    809				   u32 old_clusters,
    810				   u32 new_clusters,
    811				   struct ocfs2_xattr_value_buf *vb,
    812				   struct ocfs2_xattr_set_ctxt *ctxt)
    813{
    814	int ret = 0;
    815	unsigned int ext_flags;
    816	u32 trunc_len, cpos, phys_cpos, alloc_size;
    817	u64 block;
    818
    819	if (old_clusters <= new_clusters)
    820		return 0;
    821
    822	cpos = new_clusters;
    823	trunc_len = old_clusters - new_clusters;
    824	while (trunc_len) {
    825		ret = ocfs2_xattr_get_clusters(inode, cpos, &phys_cpos,
    826					       &alloc_size,
    827					       &vb->vb_xv->xr_list, &ext_flags);
    828		if (ret) {
    829			mlog_errno(ret);
    830			goto out;
    831		}
    832
    833		if (alloc_size > trunc_len)
    834			alloc_size = trunc_len;
    835
    836		ret = __ocfs2_remove_xattr_range(inode, vb, cpos,
    837						 phys_cpos, alloc_size,
    838						 ext_flags, ctxt);
    839		if (ret) {
    840			mlog_errno(ret);
    841			goto out;
    842		}
    843
    844		block = ocfs2_clusters_to_blocks(inode->i_sb, phys_cpos);
    845		ocfs2_remove_xattr_clusters_from_cache(INODE_CACHE(inode),
    846						       block, alloc_size);
    847		cpos += alloc_size;
    848		trunc_len -= alloc_size;
    849	}
    850
    851out:
    852	return ret;
    853}
    854
    855static int ocfs2_xattr_value_truncate(struct inode *inode,
    856				      struct ocfs2_xattr_value_buf *vb,
    857				      int len,
    858				      struct ocfs2_xattr_set_ctxt *ctxt)
    859{
    860	int ret;
    861	u32 new_clusters = ocfs2_clusters_for_bytes(inode->i_sb, len);
    862	u32 old_clusters = le32_to_cpu(vb->vb_xv->xr_clusters);
    863
    864	if (new_clusters == old_clusters)
    865		return 0;
    866
    867	if (new_clusters > old_clusters)
    868		ret = ocfs2_xattr_extend_allocation(inode,
    869						    new_clusters - old_clusters,
    870						    vb, ctxt);
    871	else
    872		ret = ocfs2_xattr_shrink_size(inode,
    873					      old_clusters, new_clusters,
    874					      vb, ctxt);
    875
    876	return ret;
    877}
    878
    879static int ocfs2_xattr_list_entry(struct super_block *sb,
    880				  char *buffer, size_t size,
    881				  size_t *result, int type,
    882				  const char *name, int name_len)
    883{
    884	char *p = buffer + *result;
    885	const char *prefix;
    886	int prefix_len;
    887	int total_len;
    888
    889	switch(type) {
    890	case OCFS2_XATTR_INDEX_USER:
    891		if (OCFS2_SB(sb)->s_mount_opt & OCFS2_MOUNT_NOUSERXATTR)
    892			return 0;
    893		break;
    894
    895	case OCFS2_XATTR_INDEX_POSIX_ACL_ACCESS:
    896	case OCFS2_XATTR_INDEX_POSIX_ACL_DEFAULT:
    897		if (!(sb->s_flags & SB_POSIXACL))
    898			return 0;
    899		break;
    900
    901	case OCFS2_XATTR_INDEX_TRUSTED:
    902		if (!capable(CAP_SYS_ADMIN))
    903			return 0;
    904		break;
    905	}
    906
    907	prefix = ocfs2_xattr_prefix(type);
    908	if (!prefix)
    909		return 0;
    910	prefix_len = strlen(prefix);
    911	total_len = prefix_len + name_len + 1;
    912	*result += total_len;
    913
    914	/* we are just looking for how big our buffer needs to be */
    915	if (!size)
    916		return 0;
    917
    918	if (*result > size)
    919		return -ERANGE;
    920
    921	memcpy(p, prefix, prefix_len);
    922	memcpy(p + prefix_len, name, name_len);
    923	p[prefix_len + name_len] = '\0';
    924
    925	return 0;
    926}
    927
    928static int ocfs2_xattr_list_entries(struct inode *inode,
    929				    struct ocfs2_xattr_header *header,
    930				    char *buffer, size_t buffer_size)
    931{
    932	size_t result = 0;
    933	int i, type, ret;
    934	const char *name;
    935
    936	for (i = 0 ; i < le16_to_cpu(header->xh_count); i++) {
    937		struct ocfs2_xattr_entry *entry = &header->xh_entries[i];
    938		type = ocfs2_xattr_get_type(entry);
    939		name = (const char *)header +
    940			le16_to_cpu(entry->xe_name_offset);
    941
    942		ret = ocfs2_xattr_list_entry(inode->i_sb,
    943					     buffer, buffer_size,
    944					     &result, type, name,
    945					     entry->xe_name_len);
    946		if (ret)
    947			return ret;
    948	}
    949
    950	return result;
    951}
    952
    953int ocfs2_has_inline_xattr_value_outside(struct inode *inode,
    954					 struct ocfs2_dinode *di)
    955{
    956	struct ocfs2_xattr_header *xh;
    957	int i;
    958
    959	xh = (struct ocfs2_xattr_header *)
    960		 ((void *)di + inode->i_sb->s_blocksize -
    961		 le16_to_cpu(di->i_xattr_inline_size));
    962
    963	for (i = 0; i < le16_to_cpu(xh->xh_count); i++)
    964		if (!ocfs2_xattr_is_local(&xh->xh_entries[i]))
    965			return 1;
    966
    967	return 0;
    968}
    969
    970static int ocfs2_xattr_ibody_list(struct inode *inode,
    971				  struct ocfs2_dinode *di,
    972				  char *buffer,
    973				  size_t buffer_size)
    974{
    975	struct ocfs2_xattr_header *header = NULL;
    976	struct ocfs2_inode_info *oi = OCFS2_I(inode);
    977	int ret = 0;
    978
    979	if (!(oi->ip_dyn_features & OCFS2_INLINE_XATTR_FL))
    980		return ret;
    981
    982	header = (struct ocfs2_xattr_header *)
    983		 ((void *)di + inode->i_sb->s_blocksize -
    984		 le16_to_cpu(di->i_xattr_inline_size));
    985
    986	ret = ocfs2_xattr_list_entries(inode, header, buffer, buffer_size);
    987
    988	return ret;
    989}
    990
    991static int ocfs2_xattr_block_list(struct inode *inode,
    992				  struct ocfs2_dinode *di,
    993				  char *buffer,
    994				  size_t buffer_size)
    995{
    996	struct buffer_head *blk_bh = NULL;
    997	struct ocfs2_xattr_block *xb;
    998	int ret = 0;
    999
   1000	if (!di->i_xattr_loc)
   1001		return ret;
   1002
   1003	ret = ocfs2_read_xattr_block(inode, le64_to_cpu(di->i_xattr_loc),
   1004				     &blk_bh);
   1005	if (ret < 0) {
   1006		mlog_errno(ret);
   1007		return ret;
   1008	}
   1009
   1010	xb = (struct ocfs2_xattr_block *)blk_bh->b_data;
   1011	if (!(le16_to_cpu(xb->xb_flags) & OCFS2_XATTR_INDEXED)) {
   1012		struct ocfs2_xattr_header *header = &xb->xb_attrs.xb_header;
   1013		ret = ocfs2_xattr_list_entries(inode, header,
   1014					       buffer, buffer_size);
   1015	} else
   1016		ret = ocfs2_xattr_tree_list_index_block(inode, blk_bh,
   1017						   buffer, buffer_size);
   1018
   1019	brelse(blk_bh);
   1020
   1021	return ret;
   1022}
   1023
   1024ssize_t ocfs2_listxattr(struct dentry *dentry,
   1025			char *buffer,
   1026			size_t size)
   1027{
   1028	int ret = 0, i_ret = 0, b_ret = 0;
   1029	struct buffer_head *di_bh = NULL;
   1030	struct ocfs2_dinode *di = NULL;
   1031	struct ocfs2_inode_info *oi = OCFS2_I(d_inode(dentry));
   1032
   1033	if (!ocfs2_supports_xattr(OCFS2_SB(dentry->d_sb)))
   1034		return -EOPNOTSUPP;
   1035
   1036	if (!(oi->ip_dyn_features & OCFS2_HAS_XATTR_FL))
   1037		return ret;
   1038
   1039	ret = ocfs2_inode_lock(d_inode(dentry), &di_bh, 0);
   1040	if (ret < 0) {
   1041		mlog_errno(ret);
   1042		return ret;
   1043	}
   1044
   1045	di = (struct ocfs2_dinode *)di_bh->b_data;
   1046
   1047	down_read(&oi->ip_xattr_sem);
   1048	i_ret = ocfs2_xattr_ibody_list(d_inode(dentry), di, buffer, size);
   1049	if (i_ret < 0)
   1050		b_ret = 0;
   1051	else {
   1052		if (buffer) {
   1053			buffer += i_ret;
   1054			size -= i_ret;
   1055		}
   1056		b_ret = ocfs2_xattr_block_list(d_inode(dentry), di,
   1057					       buffer, size);
   1058		if (b_ret < 0)
   1059			i_ret = 0;
   1060	}
   1061	up_read(&oi->ip_xattr_sem);
   1062	ocfs2_inode_unlock(d_inode(dentry), 0);
   1063
   1064	brelse(di_bh);
   1065
   1066	return i_ret + b_ret;
   1067}
   1068
   1069static int ocfs2_xattr_find_entry(int name_index,
   1070				  const char *name,
   1071				  struct ocfs2_xattr_search *xs)
   1072{
   1073	struct ocfs2_xattr_entry *entry;
   1074	size_t name_len;
   1075	int i, cmp = 1;
   1076
   1077	if (name == NULL)
   1078		return -EINVAL;
   1079
   1080	name_len = strlen(name);
   1081	entry = xs->here;
   1082	for (i = 0; i < le16_to_cpu(xs->header->xh_count); i++) {
   1083		cmp = name_index - ocfs2_xattr_get_type(entry);
   1084		if (!cmp)
   1085			cmp = name_len - entry->xe_name_len;
   1086		if (!cmp)
   1087			cmp = memcmp(name, (xs->base +
   1088				     le16_to_cpu(entry->xe_name_offset)),
   1089				     name_len);
   1090		if (cmp == 0)
   1091			break;
   1092		entry += 1;
   1093	}
   1094	xs->here = entry;
   1095
   1096	return cmp ? -ENODATA : 0;
   1097}
   1098
   1099static int ocfs2_xattr_get_value_outside(struct inode *inode,
   1100					 struct ocfs2_xattr_value_root *xv,
   1101					 void *buffer,
   1102					 size_t len)
   1103{
   1104	u32 cpos, p_cluster, num_clusters, bpc, clusters;
   1105	u64 blkno;
   1106	int i, ret = 0;
   1107	size_t cplen, blocksize;
   1108	struct buffer_head *bh = NULL;
   1109	struct ocfs2_extent_list *el;
   1110
   1111	el = &xv->xr_list;
   1112	clusters = le32_to_cpu(xv->xr_clusters);
   1113	bpc = ocfs2_clusters_to_blocks(inode->i_sb, 1);
   1114	blocksize = inode->i_sb->s_blocksize;
   1115
   1116	cpos = 0;
   1117	while (cpos < clusters) {
   1118		ret = ocfs2_xattr_get_clusters(inode, cpos, &p_cluster,
   1119					       &num_clusters, el, NULL);
   1120		if (ret) {
   1121			mlog_errno(ret);
   1122			goto out;
   1123		}
   1124
   1125		blkno = ocfs2_clusters_to_blocks(inode->i_sb, p_cluster);
   1126		/* Copy ocfs2_xattr_value */
   1127		for (i = 0; i < num_clusters * bpc; i++, blkno++) {
   1128			ret = ocfs2_read_block(INODE_CACHE(inode), blkno,
   1129					       &bh, NULL);
   1130			if (ret) {
   1131				mlog_errno(ret);
   1132				goto out;
   1133			}
   1134
   1135			cplen = len >= blocksize ? blocksize : len;
   1136			memcpy(buffer, bh->b_data, cplen);
   1137			len -= cplen;
   1138			buffer += cplen;
   1139
   1140			brelse(bh);
   1141			bh = NULL;
   1142			if (len == 0)
   1143				break;
   1144		}
   1145		cpos += num_clusters;
   1146	}
   1147out:
   1148	return ret;
   1149}
   1150
   1151static int ocfs2_xattr_ibody_get(struct inode *inode,
   1152				 int name_index,
   1153				 const char *name,
   1154				 void *buffer,
   1155				 size_t buffer_size,
   1156				 struct ocfs2_xattr_search *xs)
   1157{
   1158	struct ocfs2_inode_info *oi = OCFS2_I(inode);
   1159	struct ocfs2_dinode *di = (struct ocfs2_dinode *)xs->inode_bh->b_data;
   1160	struct ocfs2_xattr_value_root *xv;
   1161	size_t size;
   1162	int ret = 0;
   1163
   1164	if (!(oi->ip_dyn_features & OCFS2_INLINE_XATTR_FL))
   1165		return -ENODATA;
   1166
   1167	xs->end = (void *)di + inode->i_sb->s_blocksize;
   1168	xs->header = (struct ocfs2_xattr_header *)
   1169			(xs->end - le16_to_cpu(di->i_xattr_inline_size));
   1170	xs->base = (void *)xs->header;
   1171	xs->here = xs->header->xh_entries;
   1172
   1173	ret = ocfs2_xattr_find_entry(name_index, name, xs);
   1174	if (ret)
   1175		return ret;
   1176	size = le64_to_cpu(xs->here->xe_value_size);
   1177	if (buffer) {
   1178		if (size > buffer_size)
   1179			return -ERANGE;
   1180		if (ocfs2_xattr_is_local(xs->here)) {
   1181			memcpy(buffer, (void *)xs->base +
   1182			       le16_to_cpu(xs->here->xe_name_offset) +
   1183			       OCFS2_XATTR_SIZE(xs->here->xe_name_len), size);
   1184		} else {
   1185			xv = (struct ocfs2_xattr_value_root *)
   1186				(xs->base + le16_to_cpu(
   1187				 xs->here->xe_name_offset) +
   1188				OCFS2_XATTR_SIZE(xs->here->xe_name_len));
   1189			ret = ocfs2_xattr_get_value_outside(inode, xv,
   1190							    buffer, size);
   1191			if (ret < 0) {
   1192				mlog_errno(ret);
   1193				return ret;
   1194			}
   1195		}
   1196	}
   1197
   1198	return size;
   1199}
   1200
   1201static int ocfs2_xattr_block_get(struct inode *inode,
   1202				 int name_index,
   1203				 const char *name,
   1204				 void *buffer,
   1205				 size_t buffer_size,
   1206				 struct ocfs2_xattr_search *xs)
   1207{
   1208	struct ocfs2_xattr_block *xb;
   1209	struct ocfs2_xattr_value_root *xv;
   1210	size_t size;
   1211	int ret = -ENODATA, name_offset, name_len, i;
   1212	int block_off;
   1213
   1214	xs->bucket = ocfs2_xattr_bucket_new(inode);
   1215	if (!xs->bucket) {
   1216		ret = -ENOMEM;
   1217		mlog_errno(ret);
   1218		goto cleanup;
   1219	}
   1220
   1221	ret = ocfs2_xattr_block_find(inode, name_index, name, xs);
   1222	if (ret) {
   1223		mlog_errno(ret);
   1224		goto cleanup;
   1225	}
   1226
   1227	if (xs->not_found) {
   1228		ret = -ENODATA;
   1229		goto cleanup;
   1230	}
   1231
   1232	xb = (struct ocfs2_xattr_block *)xs->xattr_bh->b_data;
   1233	size = le64_to_cpu(xs->here->xe_value_size);
   1234	if (buffer) {
   1235		ret = -ERANGE;
   1236		if (size > buffer_size)
   1237			goto cleanup;
   1238
   1239		name_offset = le16_to_cpu(xs->here->xe_name_offset);
   1240		name_len = OCFS2_XATTR_SIZE(xs->here->xe_name_len);
   1241		i = xs->here - xs->header->xh_entries;
   1242
   1243		if (le16_to_cpu(xb->xb_flags) & OCFS2_XATTR_INDEXED) {
   1244			ret = ocfs2_xattr_bucket_get_name_value(inode->i_sb,
   1245								bucket_xh(xs->bucket),
   1246								i,
   1247								&block_off,
   1248								&name_offset);
   1249			if (ret) {
   1250				mlog_errno(ret);
   1251				goto cleanup;
   1252			}
   1253			xs->base = bucket_block(xs->bucket, block_off);
   1254		}
   1255		if (ocfs2_xattr_is_local(xs->here)) {
   1256			memcpy(buffer, (void *)xs->base +
   1257			       name_offset + name_len, size);
   1258		} else {
   1259			xv = (struct ocfs2_xattr_value_root *)
   1260				(xs->base + name_offset + name_len);
   1261			ret = ocfs2_xattr_get_value_outside(inode, xv,
   1262							    buffer, size);
   1263			if (ret < 0) {
   1264				mlog_errno(ret);
   1265				goto cleanup;
   1266			}
   1267		}
   1268	}
   1269	ret = size;
   1270cleanup:
   1271	ocfs2_xattr_bucket_free(xs->bucket);
   1272
   1273	brelse(xs->xattr_bh);
   1274	xs->xattr_bh = NULL;
   1275	return ret;
   1276}
   1277
   1278int ocfs2_xattr_get_nolock(struct inode *inode,
   1279			   struct buffer_head *di_bh,
   1280			   int name_index,
   1281			   const char *name,
   1282			   void *buffer,
   1283			   size_t buffer_size)
   1284{
   1285	int ret;
   1286	struct ocfs2_dinode *di = NULL;
   1287	struct ocfs2_inode_info *oi = OCFS2_I(inode);
   1288	struct ocfs2_xattr_search xis = {
   1289		.not_found = -ENODATA,
   1290	};
   1291	struct ocfs2_xattr_search xbs = {
   1292		.not_found = -ENODATA,
   1293	};
   1294
   1295	if (!ocfs2_supports_xattr(OCFS2_SB(inode->i_sb)))
   1296		return -EOPNOTSUPP;
   1297
   1298	if (!(oi->ip_dyn_features & OCFS2_HAS_XATTR_FL))
   1299		return -ENODATA;
   1300
   1301	xis.inode_bh = xbs.inode_bh = di_bh;
   1302	di = (struct ocfs2_dinode *)di_bh->b_data;
   1303
   1304	ret = ocfs2_xattr_ibody_get(inode, name_index, name, buffer,
   1305				    buffer_size, &xis);
   1306	if (ret == -ENODATA && di->i_xattr_loc)
   1307		ret = ocfs2_xattr_block_get(inode, name_index, name, buffer,
   1308					    buffer_size, &xbs);
   1309
   1310	return ret;
   1311}
   1312
   1313/* ocfs2_xattr_get()
   1314 *
   1315 * Copy an extended attribute into the buffer provided.
   1316 * Buffer is NULL to compute the size of buffer required.
   1317 */
   1318static int ocfs2_xattr_get(struct inode *inode,
   1319			   int name_index,
   1320			   const char *name,
   1321			   void *buffer,
   1322			   size_t buffer_size)
   1323{
   1324	int ret, had_lock;
   1325	struct buffer_head *di_bh = NULL;
   1326	struct ocfs2_lock_holder oh;
   1327
   1328	had_lock = ocfs2_inode_lock_tracker(inode, &di_bh, 0, &oh);
   1329	if (had_lock < 0) {
   1330		mlog_errno(had_lock);
   1331		return had_lock;
   1332	}
   1333	down_read(&OCFS2_I(inode)->ip_xattr_sem);
   1334	ret = ocfs2_xattr_get_nolock(inode, di_bh, name_index,
   1335				     name, buffer, buffer_size);
   1336	up_read(&OCFS2_I(inode)->ip_xattr_sem);
   1337
   1338	ocfs2_inode_unlock_tracker(inode, 0, &oh, had_lock);
   1339
   1340	brelse(di_bh);
   1341
   1342	return ret;
   1343}
   1344
   1345static int __ocfs2_xattr_set_value_outside(struct inode *inode,
   1346					   handle_t *handle,
   1347					   struct ocfs2_xattr_value_buf *vb,
   1348					   const void *value,
   1349					   int value_len)
   1350{
   1351	int ret = 0, i, cp_len;
   1352	u16 blocksize = inode->i_sb->s_blocksize;
   1353	u32 p_cluster, num_clusters;
   1354	u32 cpos = 0, bpc = ocfs2_clusters_to_blocks(inode->i_sb, 1);
   1355	u32 clusters = ocfs2_clusters_for_bytes(inode->i_sb, value_len);
   1356	u64 blkno;
   1357	struct buffer_head *bh = NULL;
   1358	unsigned int ext_flags;
   1359	struct ocfs2_xattr_value_root *xv = vb->vb_xv;
   1360
   1361	BUG_ON(clusters > le32_to_cpu(xv->xr_clusters));
   1362
   1363	while (cpos < clusters) {
   1364		ret = ocfs2_xattr_get_clusters(inode, cpos, &p_cluster,
   1365					       &num_clusters, &xv->xr_list,
   1366					       &ext_flags);
   1367		if (ret) {
   1368			mlog_errno(ret);
   1369			goto out;
   1370		}
   1371
   1372		BUG_ON(ext_flags & OCFS2_EXT_REFCOUNTED);
   1373
   1374		blkno = ocfs2_clusters_to_blocks(inode->i_sb, p_cluster);
   1375
   1376		for (i = 0; i < num_clusters * bpc; i++, blkno++) {
   1377			ret = ocfs2_read_block(INODE_CACHE(inode), blkno,
   1378					       &bh, NULL);
   1379			if (ret) {
   1380				mlog_errno(ret);
   1381				goto out;
   1382			}
   1383
   1384			ret = ocfs2_journal_access(handle,
   1385						   INODE_CACHE(inode),
   1386						   bh,
   1387						   OCFS2_JOURNAL_ACCESS_WRITE);
   1388			if (ret < 0) {
   1389				mlog_errno(ret);
   1390				goto out;
   1391			}
   1392
   1393			cp_len = value_len > blocksize ? blocksize : value_len;
   1394			memcpy(bh->b_data, value, cp_len);
   1395			value_len -= cp_len;
   1396			value += cp_len;
   1397			if (cp_len < blocksize)
   1398				memset(bh->b_data + cp_len, 0,
   1399				       blocksize - cp_len);
   1400
   1401			ocfs2_journal_dirty(handle, bh);
   1402			brelse(bh);
   1403			bh = NULL;
   1404
   1405			/*
   1406			 * XXX: do we need to empty all the following
   1407			 * blocks in this cluster?
   1408			 */
   1409			if (!value_len)
   1410				break;
   1411		}
   1412		cpos += num_clusters;
   1413	}
   1414out:
   1415	brelse(bh);
   1416
   1417	return ret;
   1418}
   1419
   1420static int ocfs2_xa_check_space_helper(int needed_space, int free_start,
   1421				       int num_entries)
   1422{
   1423	int free_space;
   1424
   1425	if (!needed_space)
   1426		return 0;
   1427
   1428	free_space = free_start -
   1429		sizeof(struct ocfs2_xattr_header) -
   1430		(num_entries * sizeof(struct ocfs2_xattr_entry)) -
   1431		OCFS2_XATTR_HEADER_GAP;
   1432	if (free_space < 0)
   1433		return -EIO;
   1434	if (free_space < needed_space)
   1435		return -ENOSPC;
   1436
   1437	return 0;
   1438}
   1439
   1440static int ocfs2_xa_journal_access(handle_t *handle, struct ocfs2_xa_loc *loc,
   1441				   int type)
   1442{
   1443	return loc->xl_ops->xlo_journal_access(handle, loc, type);
   1444}
   1445
   1446static void ocfs2_xa_journal_dirty(handle_t *handle, struct ocfs2_xa_loc *loc)
   1447{
   1448	loc->xl_ops->xlo_journal_dirty(handle, loc);
   1449}
   1450
   1451/* Give a pointer into the storage for the given offset */
   1452static void *ocfs2_xa_offset_pointer(struct ocfs2_xa_loc *loc, int offset)
   1453{
   1454	BUG_ON(offset >= loc->xl_size);
   1455	return loc->xl_ops->xlo_offset_pointer(loc, offset);
   1456}
   1457
   1458/*
   1459 * Wipe the name+value pair and allow the storage to reclaim it.  This
   1460 * must be followed by either removal of the entry or a call to
   1461 * ocfs2_xa_add_namevalue().
   1462 */
   1463static void ocfs2_xa_wipe_namevalue(struct ocfs2_xa_loc *loc)
   1464{
   1465	loc->xl_ops->xlo_wipe_namevalue(loc);
   1466}
   1467
   1468/*
   1469 * Find lowest offset to a name+value pair.  This is the start of our
   1470 * downward-growing free space.
   1471 */
   1472static int ocfs2_xa_get_free_start(struct ocfs2_xa_loc *loc)
   1473{
   1474	return loc->xl_ops->xlo_get_free_start(loc);
   1475}
   1476
   1477/* Can we reuse loc->xl_entry for xi? */
   1478static int ocfs2_xa_can_reuse_entry(struct ocfs2_xa_loc *loc,
   1479				    struct ocfs2_xattr_info *xi)
   1480{
   1481	return loc->xl_ops->xlo_can_reuse(loc, xi);
   1482}
   1483
   1484/* How much free space is needed to set the new value */
   1485static int ocfs2_xa_check_space(struct ocfs2_xa_loc *loc,
   1486				struct ocfs2_xattr_info *xi)
   1487{
   1488	return loc->xl_ops->xlo_check_space(loc, xi);
   1489}
   1490
   1491static void ocfs2_xa_add_entry(struct ocfs2_xa_loc *loc, u32 name_hash)
   1492{
   1493	loc->xl_ops->xlo_add_entry(loc, name_hash);
   1494	loc->xl_entry->xe_name_hash = cpu_to_le32(name_hash);
   1495	/*
   1496	 * We can't leave the new entry's xe_name_offset at zero or
   1497	 * add_namevalue() will go nuts.  We set it to the size of our
   1498	 * storage so that it can never be less than any other entry.
   1499	 */
   1500	loc->xl_entry->xe_name_offset = cpu_to_le16(loc->xl_size);
   1501}
   1502
   1503static void ocfs2_xa_add_namevalue(struct ocfs2_xa_loc *loc,
   1504				   struct ocfs2_xattr_info *xi)
   1505{
   1506	int size = namevalue_size_xi(xi);
   1507	int nameval_offset;
   1508	char *nameval_buf;
   1509
   1510	loc->xl_ops->xlo_add_namevalue(loc, size);
   1511	loc->xl_entry->xe_value_size = cpu_to_le64(xi->xi_value_len);
   1512	loc->xl_entry->xe_name_len = xi->xi_name_len;
   1513	ocfs2_xattr_set_type(loc->xl_entry, xi->xi_name_index);
   1514	ocfs2_xattr_set_local(loc->xl_entry,
   1515			      xi->xi_value_len <= OCFS2_XATTR_INLINE_SIZE);
   1516
   1517	nameval_offset = le16_to_cpu(loc->xl_entry->xe_name_offset);
   1518	nameval_buf = ocfs2_xa_offset_pointer(loc, nameval_offset);
   1519	memset(nameval_buf, 0, size);
   1520	memcpy(nameval_buf, xi->xi_name, xi->xi_name_len);
   1521}
   1522
   1523static void ocfs2_xa_fill_value_buf(struct ocfs2_xa_loc *loc,
   1524				    struct ocfs2_xattr_value_buf *vb)
   1525{
   1526	int nameval_offset = le16_to_cpu(loc->xl_entry->xe_name_offset);
   1527	int name_size = OCFS2_XATTR_SIZE(loc->xl_entry->xe_name_len);
   1528
   1529	/* Value bufs are for value trees */
   1530	BUG_ON(ocfs2_xattr_is_local(loc->xl_entry));
   1531	BUG_ON(namevalue_size_xe(loc->xl_entry) !=
   1532	       (name_size + OCFS2_XATTR_ROOT_SIZE));
   1533
   1534	loc->xl_ops->xlo_fill_value_buf(loc, vb);
   1535	vb->vb_xv =
   1536		(struct ocfs2_xattr_value_root *)ocfs2_xa_offset_pointer(loc,
   1537							nameval_offset +
   1538							name_size);
   1539}
   1540
   1541static int ocfs2_xa_block_journal_access(handle_t *handle,
   1542					 struct ocfs2_xa_loc *loc, int type)
   1543{
   1544	struct buffer_head *bh = loc->xl_storage;
   1545	ocfs2_journal_access_func access;
   1546
   1547	if (loc->xl_size == (bh->b_size -
   1548			     offsetof(struct ocfs2_xattr_block,
   1549				      xb_attrs.xb_header)))
   1550		access = ocfs2_journal_access_xb;
   1551	else
   1552		access = ocfs2_journal_access_di;
   1553	return access(handle, INODE_CACHE(loc->xl_inode), bh, type);
   1554}
   1555
   1556static void ocfs2_xa_block_journal_dirty(handle_t *handle,
   1557					 struct ocfs2_xa_loc *loc)
   1558{
   1559	struct buffer_head *bh = loc->xl_storage;
   1560
   1561	ocfs2_journal_dirty(handle, bh);
   1562}
   1563
   1564static void *ocfs2_xa_block_offset_pointer(struct ocfs2_xa_loc *loc,
   1565					   int offset)
   1566{
   1567	return (char *)loc->xl_header + offset;
   1568}
   1569
   1570static int ocfs2_xa_block_can_reuse(struct ocfs2_xa_loc *loc,
   1571				    struct ocfs2_xattr_info *xi)
   1572{
   1573	/*
   1574	 * Block storage is strict.  If the sizes aren't exact, we will
   1575	 * remove the old one and reinsert the new.
   1576	 */
   1577	return namevalue_size_xe(loc->xl_entry) ==
   1578		namevalue_size_xi(xi);
   1579}
   1580
   1581static int ocfs2_xa_block_get_free_start(struct ocfs2_xa_loc *loc)
   1582{
   1583	struct ocfs2_xattr_header *xh = loc->xl_header;
   1584	int i, count = le16_to_cpu(xh->xh_count);
   1585	int offset, free_start = loc->xl_size;
   1586
   1587	for (i = 0; i < count; i++) {
   1588		offset = le16_to_cpu(xh->xh_entries[i].xe_name_offset);
   1589		if (offset < free_start)
   1590			free_start = offset;
   1591	}
   1592
   1593	return free_start;
   1594}
   1595
   1596static int ocfs2_xa_block_check_space(struct ocfs2_xa_loc *loc,
   1597				      struct ocfs2_xattr_info *xi)
   1598{
   1599	int count = le16_to_cpu(loc->xl_header->xh_count);
   1600	int free_start = ocfs2_xa_get_free_start(loc);
   1601	int needed_space = ocfs2_xi_entry_usage(xi);
   1602
   1603	/*
   1604	 * Block storage will reclaim the original entry before inserting
   1605	 * the new value, so we only need the difference.  If the new
   1606	 * entry is smaller than the old one, we don't need anything.
   1607	 */
   1608	if (loc->xl_entry) {
   1609		/* Don't need space if we're reusing! */
   1610		if (ocfs2_xa_can_reuse_entry(loc, xi))
   1611			needed_space = 0;
   1612		else
   1613			needed_space -= ocfs2_xe_entry_usage(loc->xl_entry);
   1614	}
   1615	if (needed_space < 0)
   1616		needed_space = 0;
   1617	return ocfs2_xa_check_space_helper(needed_space, free_start, count);
   1618}
   1619
   1620/*
   1621 * Block storage for xattrs keeps the name+value pairs compacted.  When
   1622 * we remove one, we have to shift any that preceded it towards the end.
   1623 */
   1624static void ocfs2_xa_block_wipe_namevalue(struct ocfs2_xa_loc *loc)
   1625{
   1626	int i, offset;
   1627	int namevalue_offset, first_namevalue_offset, namevalue_size;
   1628	struct ocfs2_xattr_entry *entry = loc->xl_entry;
   1629	struct ocfs2_xattr_header *xh = loc->xl_header;
   1630	int count = le16_to_cpu(xh->xh_count);
   1631
   1632	namevalue_offset = le16_to_cpu(entry->xe_name_offset);
   1633	namevalue_size = namevalue_size_xe(entry);
   1634	first_namevalue_offset = ocfs2_xa_get_free_start(loc);
   1635
   1636	/* Shift the name+value pairs */
   1637	memmove((char *)xh + first_namevalue_offset + namevalue_size,
   1638		(char *)xh + first_namevalue_offset,
   1639		namevalue_offset - first_namevalue_offset);
   1640	memset((char *)xh + first_namevalue_offset, 0, namevalue_size);
   1641
   1642	/* Now tell xh->xh_entries about it */
   1643	for (i = 0; i < count; i++) {
   1644		offset = le16_to_cpu(xh->xh_entries[i].xe_name_offset);
   1645		if (offset <= namevalue_offset)
   1646			le16_add_cpu(&xh->xh_entries[i].xe_name_offset,
   1647				     namevalue_size);
   1648	}
   1649
   1650	/*
   1651	 * Note that we don't update xh_free_start or xh_name_value_len
   1652	 * because they're not used in block-stored xattrs.
   1653	 */
   1654}
   1655
   1656static void ocfs2_xa_block_add_entry(struct ocfs2_xa_loc *loc, u32 name_hash)
   1657{
   1658	int count = le16_to_cpu(loc->xl_header->xh_count);
   1659	loc->xl_entry = &(loc->xl_header->xh_entries[count]);
   1660	le16_add_cpu(&loc->xl_header->xh_count, 1);
   1661	memset(loc->xl_entry, 0, sizeof(struct ocfs2_xattr_entry));
   1662}
   1663
   1664static void ocfs2_xa_block_add_namevalue(struct ocfs2_xa_loc *loc, int size)
   1665{
   1666	int free_start = ocfs2_xa_get_free_start(loc);
   1667
   1668	loc->xl_entry->xe_name_offset = cpu_to_le16(free_start - size);
   1669}
   1670
   1671static void ocfs2_xa_block_fill_value_buf(struct ocfs2_xa_loc *loc,
   1672					  struct ocfs2_xattr_value_buf *vb)
   1673{
   1674	struct buffer_head *bh = loc->xl_storage;
   1675
   1676	if (loc->xl_size == (bh->b_size -
   1677			     offsetof(struct ocfs2_xattr_block,
   1678				      xb_attrs.xb_header)))
   1679		vb->vb_access = ocfs2_journal_access_xb;
   1680	else
   1681		vb->vb_access = ocfs2_journal_access_di;
   1682	vb->vb_bh = bh;
   1683}
   1684
   1685/*
   1686 * Operations for xattrs stored in blocks.  This includes inline inode
   1687 * storage and unindexed ocfs2_xattr_blocks.
   1688 */
   1689static const struct ocfs2_xa_loc_operations ocfs2_xa_block_loc_ops = {
   1690	.xlo_journal_access	= ocfs2_xa_block_journal_access,
   1691	.xlo_journal_dirty	= ocfs2_xa_block_journal_dirty,
   1692	.xlo_offset_pointer	= ocfs2_xa_block_offset_pointer,
   1693	.xlo_check_space	= ocfs2_xa_block_check_space,
   1694	.xlo_can_reuse		= ocfs2_xa_block_can_reuse,
   1695	.xlo_get_free_start	= ocfs2_xa_block_get_free_start,
   1696	.xlo_wipe_namevalue	= ocfs2_xa_block_wipe_namevalue,
   1697	.xlo_add_entry		= ocfs2_xa_block_add_entry,
   1698	.xlo_add_namevalue	= ocfs2_xa_block_add_namevalue,
   1699	.xlo_fill_value_buf	= ocfs2_xa_block_fill_value_buf,
   1700};
   1701
   1702static int ocfs2_xa_bucket_journal_access(handle_t *handle,
   1703					  struct ocfs2_xa_loc *loc, int type)
   1704{
   1705	struct ocfs2_xattr_bucket *bucket = loc->xl_storage;
   1706
   1707	return ocfs2_xattr_bucket_journal_access(handle, bucket, type);
   1708}
   1709
   1710static void ocfs2_xa_bucket_journal_dirty(handle_t *handle,
   1711					  struct ocfs2_xa_loc *loc)
   1712{
   1713	struct ocfs2_xattr_bucket *bucket = loc->xl_storage;
   1714
   1715	ocfs2_xattr_bucket_journal_dirty(handle, bucket);
   1716}
   1717
   1718static void *ocfs2_xa_bucket_offset_pointer(struct ocfs2_xa_loc *loc,
   1719					    int offset)
   1720{
   1721	struct ocfs2_xattr_bucket *bucket = loc->xl_storage;
   1722	int block, block_offset;
   1723
   1724	/* The header is at the front of the bucket */
   1725	block = offset >> loc->xl_inode->i_sb->s_blocksize_bits;
   1726	block_offset = offset % loc->xl_inode->i_sb->s_blocksize;
   1727
   1728	return bucket_block(bucket, block) + block_offset;
   1729}
   1730
   1731static int ocfs2_xa_bucket_can_reuse(struct ocfs2_xa_loc *loc,
   1732				     struct ocfs2_xattr_info *xi)
   1733{
   1734	return namevalue_size_xe(loc->xl_entry) >=
   1735		namevalue_size_xi(xi);
   1736}
   1737
   1738static int ocfs2_xa_bucket_get_free_start(struct ocfs2_xa_loc *loc)
   1739{
   1740	struct ocfs2_xattr_bucket *bucket = loc->xl_storage;
   1741	return le16_to_cpu(bucket_xh(bucket)->xh_free_start);
   1742}
   1743
   1744static int ocfs2_bucket_align_free_start(struct super_block *sb,
   1745					 int free_start, int size)
   1746{
   1747	/*
   1748	 * We need to make sure that the name+value pair fits within
   1749	 * one block.
   1750	 */
   1751	if (((free_start - size) >> sb->s_blocksize_bits) !=
   1752	    ((free_start - 1) >> sb->s_blocksize_bits))
   1753		free_start -= free_start % sb->s_blocksize;
   1754
   1755	return free_start;
   1756}
   1757
   1758static int ocfs2_xa_bucket_check_space(struct ocfs2_xa_loc *loc,
   1759				       struct ocfs2_xattr_info *xi)
   1760{
   1761	int rc;
   1762	int count = le16_to_cpu(loc->xl_header->xh_count);
   1763	int free_start = ocfs2_xa_get_free_start(loc);
   1764	int needed_space = ocfs2_xi_entry_usage(xi);
   1765	int size = namevalue_size_xi(xi);
   1766	struct super_block *sb = loc->xl_inode->i_sb;
   1767
   1768	/*
   1769	 * Bucket storage does not reclaim name+value pairs it cannot
   1770	 * reuse.  They live as holes until the bucket fills, and then
   1771	 * the bucket is defragmented.  However, the bucket can reclaim
   1772	 * the ocfs2_xattr_entry.
   1773	 */
   1774	if (loc->xl_entry) {
   1775		/* Don't need space if we're reusing! */
   1776		if (ocfs2_xa_can_reuse_entry(loc, xi))
   1777			needed_space = 0;
   1778		else
   1779			needed_space -= sizeof(struct ocfs2_xattr_entry);
   1780	}
   1781	BUG_ON(needed_space < 0);
   1782
   1783	if (free_start < size) {
   1784		if (needed_space)
   1785			return -ENOSPC;
   1786	} else {
   1787		/*
   1788		 * First we check if it would fit in the first place.
   1789		 * Below, we align the free start to a block.  This may
   1790		 * slide us below the minimum gap.  By checking unaligned
   1791		 * first, we avoid that error.
   1792		 */
   1793		rc = ocfs2_xa_check_space_helper(needed_space, free_start,
   1794						 count);
   1795		if (rc)
   1796			return rc;
   1797		free_start = ocfs2_bucket_align_free_start(sb, free_start,
   1798							   size);
   1799	}
   1800	return ocfs2_xa_check_space_helper(needed_space, free_start, count);
   1801}
   1802
   1803static void ocfs2_xa_bucket_wipe_namevalue(struct ocfs2_xa_loc *loc)
   1804{
   1805	le16_add_cpu(&loc->xl_header->xh_name_value_len,
   1806		     -namevalue_size_xe(loc->xl_entry));
   1807}
   1808
   1809static void ocfs2_xa_bucket_add_entry(struct ocfs2_xa_loc *loc, u32 name_hash)
   1810{
   1811	struct ocfs2_xattr_header *xh = loc->xl_header;
   1812	int count = le16_to_cpu(xh->xh_count);
   1813	int low = 0, high = count - 1, tmp;
   1814	struct ocfs2_xattr_entry *tmp_xe;
   1815
   1816	/*
   1817	 * We keep buckets sorted by name_hash, so we need to find
   1818	 * our insert place.
   1819	 */
   1820	while (low <= high && count) {
   1821		tmp = (low + high) / 2;
   1822		tmp_xe = &xh->xh_entries[tmp];
   1823
   1824		if (name_hash > le32_to_cpu(tmp_xe->xe_name_hash))
   1825			low = tmp + 1;
   1826		else if (name_hash < le32_to_cpu(tmp_xe->xe_name_hash))
   1827			high = tmp - 1;
   1828		else {
   1829			low = tmp;
   1830			break;
   1831		}
   1832	}
   1833
   1834	if (low != count)
   1835		memmove(&xh->xh_entries[low + 1],
   1836			&xh->xh_entries[low],
   1837			((count - low) * sizeof(struct ocfs2_xattr_entry)));
   1838
   1839	le16_add_cpu(&xh->xh_count, 1);
   1840	loc->xl_entry = &xh->xh_entries[low];
   1841	memset(loc->xl_entry, 0, sizeof(struct ocfs2_xattr_entry));
   1842}
   1843
   1844static void ocfs2_xa_bucket_add_namevalue(struct ocfs2_xa_loc *loc, int size)
   1845{
   1846	int free_start = ocfs2_xa_get_free_start(loc);
   1847	struct ocfs2_xattr_header *xh = loc->xl_header;
   1848	struct super_block *sb = loc->xl_inode->i_sb;
   1849	int nameval_offset;
   1850
   1851	free_start = ocfs2_bucket_align_free_start(sb, free_start, size);
   1852	nameval_offset = free_start - size;
   1853	loc->xl_entry->xe_name_offset = cpu_to_le16(nameval_offset);
   1854	xh->xh_free_start = cpu_to_le16(nameval_offset);
   1855	le16_add_cpu(&xh->xh_name_value_len, size);
   1856
   1857}
   1858
   1859static void ocfs2_xa_bucket_fill_value_buf(struct ocfs2_xa_loc *loc,
   1860					   struct ocfs2_xattr_value_buf *vb)
   1861{
   1862	struct ocfs2_xattr_bucket *bucket = loc->xl_storage;
   1863	struct super_block *sb = loc->xl_inode->i_sb;
   1864	int nameval_offset = le16_to_cpu(loc->xl_entry->xe_name_offset);
   1865	int size = namevalue_size_xe(loc->xl_entry);
   1866	int block_offset = nameval_offset >> sb->s_blocksize_bits;
   1867
   1868	/* Values are not allowed to straddle block boundaries */
   1869	BUG_ON(block_offset !=
   1870	       ((nameval_offset + size - 1) >> sb->s_blocksize_bits));
   1871	/* We expect the bucket to be filled in */
   1872	BUG_ON(!bucket->bu_bhs[block_offset]);
   1873
   1874	vb->vb_access = ocfs2_journal_access;
   1875	vb->vb_bh = bucket->bu_bhs[block_offset];
   1876}
   1877
   1878/* Operations for xattrs stored in buckets. */
   1879static const struct ocfs2_xa_loc_operations ocfs2_xa_bucket_loc_ops = {
   1880	.xlo_journal_access	= ocfs2_xa_bucket_journal_access,
   1881	.xlo_journal_dirty	= ocfs2_xa_bucket_journal_dirty,
   1882	.xlo_offset_pointer	= ocfs2_xa_bucket_offset_pointer,
   1883	.xlo_check_space	= ocfs2_xa_bucket_check_space,
   1884	.xlo_can_reuse		= ocfs2_xa_bucket_can_reuse,
   1885	.xlo_get_free_start	= ocfs2_xa_bucket_get_free_start,
   1886	.xlo_wipe_namevalue	= ocfs2_xa_bucket_wipe_namevalue,
   1887	.xlo_add_entry		= ocfs2_xa_bucket_add_entry,
   1888	.xlo_add_namevalue	= ocfs2_xa_bucket_add_namevalue,
   1889	.xlo_fill_value_buf	= ocfs2_xa_bucket_fill_value_buf,
   1890};
   1891
   1892static unsigned int ocfs2_xa_value_clusters(struct ocfs2_xa_loc *loc)
   1893{
   1894	struct ocfs2_xattr_value_buf vb;
   1895
   1896	if (ocfs2_xattr_is_local(loc->xl_entry))
   1897		return 0;
   1898
   1899	ocfs2_xa_fill_value_buf(loc, &vb);
   1900	return le32_to_cpu(vb.vb_xv->xr_clusters);
   1901}
   1902
   1903static int ocfs2_xa_value_truncate(struct ocfs2_xa_loc *loc, u64 bytes,
   1904				   struct ocfs2_xattr_set_ctxt *ctxt)
   1905{
   1906	int trunc_rc, access_rc;
   1907	struct ocfs2_xattr_value_buf vb;
   1908
   1909	ocfs2_xa_fill_value_buf(loc, &vb);
   1910	trunc_rc = ocfs2_xattr_value_truncate(loc->xl_inode, &vb, bytes,
   1911					      ctxt);
   1912
   1913	/*
   1914	 * The caller of ocfs2_xa_value_truncate() has already called
   1915	 * ocfs2_xa_journal_access on the loc.  However, The truncate code
   1916	 * calls ocfs2_extend_trans().  This may commit the previous
   1917	 * transaction and open a new one.  If this is a bucket, truncate
   1918	 * could leave only vb->vb_bh set up for journaling.  Meanwhile,
   1919	 * the caller is expecting to dirty the entire bucket.  So we must
   1920	 * reset the journal work.  We do this even if truncate has failed,
   1921	 * as it could have failed after committing the extend.
   1922	 */
   1923	access_rc = ocfs2_xa_journal_access(ctxt->handle, loc,
   1924					    OCFS2_JOURNAL_ACCESS_WRITE);
   1925
   1926	/* Errors in truncate take precedence */
   1927	return trunc_rc ? trunc_rc : access_rc;
   1928}
   1929
   1930static void ocfs2_xa_remove_entry(struct ocfs2_xa_loc *loc)
   1931{
   1932	int index, count;
   1933	struct ocfs2_xattr_header *xh = loc->xl_header;
   1934	struct ocfs2_xattr_entry *entry = loc->xl_entry;
   1935
   1936	ocfs2_xa_wipe_namevalue(loc);
   1937	loc->xl_entry = NULL;
   1938
   1939	le16_add_cpu(&xh->xh_count, -1);
   1940	count = le16_to_cpu(xh->xh_count);
   1941
   1942	/*
   1943	 * Only zero out the entry if there are more remaining.  This is
   1944	 * important for an empty bucket, as it keeps track of the
   1945	 * bucket's hash value.  It doesn't hurt empty block storage.
   1946	 */
   1947	if (count) {
   1948		index = ((char *)entry - (char *)&xh->xh_entries) /
   1949			sizeof(struct ocfs2_xattr_entry);
   1950		memmove(&xh->xh_entries[index], &xh->xh_entries[index + 1],
   1951			(count - index) * sizeof(struct ocfs2_xattr_entry));
   1952		memset(&xh->xh_entries[count], 0,
   1953		       sizeof(struct ocfs2_xattr_entry));
   1954	}
   1955}
   1956
   1957/*
   1958 * If we have a problem adjusting the size of an external value during
   1959 * ocfs2_xa_prepare_entry() or ocfs2_xa_remove(), we may have an xattr
   1960 * in an intermediate state.  For example, the value may be partially
   1961 * truncated.
   1962 *
   1963 * If the value tree hasn't changed, the extend/truncate went nowhere.
   1964 * We have nothing to do.  The caller can treat it as a straight error.
   1965 *
   1966 * If the value tree got partially truncated, we now have a corrupted
   1967 * extended attribute.  We're going to wipe its entry and leak the
   1968 * clusters.  Better to leak some storage than leave a corrupt entry.
   1969 *
   1970 * If the value tree grew, it obviously didn't grow enough for the
   1971 * new entry.  We're not going to try and reclaim those clusters either.
   1972 * If there was already an external value there (orig_clusters != 0),
   1973 * the new clusters are attached safely and we can just leave the old
   1974 * value in place.  If there was no external value there, we remove
   1975 * the entry.
   1976 *
   1977 * This way, the xattr block we store in the journal will be consistent.
   1978 * If the size change broke because of the journal, no changes will hit
   1979 * disk anyway.
   1980 */
   1981static void ocfs2_xa_cleanup_value_truncate(struct ocfs2_xa_loc *loc,
   1982					    const char *what,
   1983					    unsigned int orig_clusters)
   1984{
   1985	unsigned int new_clusters = ocfs2_xa_value_clusters(loc);
   1986	char *nameval_buf = ocfs2_xa_offset_pointer(loc,
   1987				le16_to_cpu(loc->xl_entry->xe_name_offset));
   1988
   1989	if (new_clusters < orig_clusters) {
   1990		mlog(ML_ERROR,
   1991		     "Partial truncate while %s xattr %.*s.  Leaking "
   1992		     "%u clusters and removing the entry\n",
   1993		     what, loc->xl_entry->xe_name_len, nameval_buf,
   1994		     orig_clusters - new_clusters);
   1995		ocfs2_xa_remove_entry(loc);
   1996	} else if (!orig_clusters) {
   1997		mlog(ML_ERROR,
   1998		     "Unable to allocate an external value for xattr "
   1999		     "%.*s safely.  Leaking %u clusters and removing the "
   2000		     "entry\n",
   2001		     loc->xl_entry->xe_name_len, nameval_buf,
   2002		     new_clusters - orig_clusters);
   2003		ocfs2_xa_remove_entry(loc);
   2004	} else if (new_clusters > orig_clusters)
   2005		mlog(ML_ERROR,
   2006		     "Unable to grow xattr %.*s safely.  %u new clusters "
   2007		     "have been added, but the value will not be "
   2008		     "modified\n",
   2009		     loc->xl_entry->xe_name_len, nameval_buf,
   2010		     new_clusters - orig_clusters);
   2011}
   2012
   2013static int ocfs2_xa_remove(struct ocfs2_xa_loc *loc,
   2014			   struct ocfs2_xattr_set_ctxt *ctxt)
   2015{
   2016	int rc = 0;
   2017	unsigned int orig_clusters;
   2018
   2019	if (!ocfs2_xattr_is_local(loc->xl_entry)) {
   2020		orig_clusters = ocfs2_xa_value_clusters(loc);
   2021		rc = ocfs2_xa_value_truncate(loc, 0, ctxt);
   2022		if (rc) {
   2023			mlog_errno(rc);
   2024			/*
   2025			 * Since this is remove, we can return 0 if
   2026			 * ocfs2_xa_cleanup_value_truncate() is going to
   2027			 * wipe the entry anyway.  So we check the
   2028			 * cluster count as well.
   2029			 */
   2030			if (orig_clusters != ocfs2_xa_value_clusters(loc))
   2031				rc = 0;
   2032			ocfs2_xa_cleanup_value_truncate(loc, "removing",
   2033							orig_clusters);
   2034			if (rc)
   2035				goto out;
   2036		}
   2037	}
   2038
   2039	ocfs2_xa_remove_entry(loc);
   2040
   2041out:
   2042	return rc;
   2043}
   2044
   2045static void ocfs2_xa_install_value_root(struct ocfs2_xa_loc *loc)
   2046{
   2047	int name_size = OCFS2_XATTR_SIZE(loc->xl_entry->xe_name_len);
   2048	char *nameval_buf;
   2049
   2050	nameval_buf = ocfs2_xa_offset_pointer(loc,
   2051				le16_to_cpu(loc->xl_entry->xe_name_offset));
   2052	memcpy(nameval_buf + name_size, &def_xv, OCFS2_XATTR_ROOT_SIZE);
   2053}
   2054
   2055/*
   2056 * Take an existing entry and make it ready for the new value.  This
   2057 * won't allocate space, but it may free space.  It should be ready for
   2058 * ocfs2_xa_prepare_entry() to finish the work.
   2059 */
   2060static int ocfs2_xa_reuse_entry(struct ocfs2_xa_loc *loc,
   2061				struct ocfs2_xattr_info *xi,
   2062				struct ocfs2_xattr_set_ctxt *ctxt)
   2063{
   2064	int rc = 0;
   2065	int name_size = OCFS2_XATTR_SIZE(xi->xi_name_len);
   2066	unsigned int orig_clusters;
   2067	char *nameval_buf;
   2068	int xe_local = ocfs2_xattr_is_local(loc->xl_entry);
   2069	int xi_local = xi->xi_value_len <= OCFS2_XATTR_INLINE_SIZE;
   2070
   2071	BUG_ON(OCFS2_XATTR_SIZE(loc->xl_entry->xe_name_len) !=
   2072	       name_size);
   2073
   2074	nameval_buf = ocfs2_xa_offset_pointer(loc,
   2075				le16_to_cpu(loc->xl_entry->xe_name_offset));
   2076	if (xe_local) {
   2077		memset(nameval_buf + name_size, 0,
   2078		       namevalue_size_xe(loc->xl_entry) - name_size);
   2079		if (!xi_local)
   2080			ocfs2_xa_install_value_root(loc);
   2081	} else {
   2082		orig_clusters = ocfs2_xa_value_clusters(loc);
   2083		if (xi_local) {
   2084			rc = ocfs2_xa_value_truncate(loc, 0, ctxt);
   2085			if (rc < 0)
   2086				mlog_errno(rc);
   2087			else
   2088				memset(nameval_buf + name_size, 0,
   2089				       namevalue_size_xe(loc->xl_entry) -
   2090				       name_size);
   2091		} else if (le64_to_cpu(loc->xl_entry->xe_value_size) >
   2092			   xi->xi_value_len) {
   2093			rc = ocfs2_xa_value_truncate(loc, xi->xi_value_len,
   2094						     ctxt);
   2095			if (rc < 0)
   2096				mlog_errno(rc);
   2097		}
   2098
   2099		if (rc) {
   2100			ocfs2_xa_cleanup_value_truncate(loc, "reusing",
   2101							orig_clusters);
   2102			goto out;
   2103		}
   2104	}
   2105
   2106	loc->xl_entry->xe_value_size = cpu_to_le64(xi->xi_value_len);
   2107	ocfs2_xattr_set_local(loc->xl_entry, xi_local);
   2108
   2109out:
   2110	return rc;
   2111}
   2112
   2113/*
   2114 * Prepares loc->xl_entry to receive the new xattr.  This includes
   2115 * properly setting up the name+value pair region.  If loc->xl_entry
   2116 * already exists, it will take care of modifying it appropriately.
   2117 *
   2118 * Note that this modifies the data.  You did journal_access already,
   2119 * right?
   2120 */
   2121static int ocfs2_xa_prepare_entry(struct ocfs2_xa_loc *loc,
   2122				  struct ocfs2_xattr_info *xi,
   2123				  u32 name_hash,
   2124				  struct ocfs2_xattr_set_ctxt *ctxt)
   2125{
   2126	int rc = 0;
   2127	unsigned int orig_clusters;
   2128	__le64 orig_value_size = 0;
   2129
   2130	rc = ocfs2_xa_check_space(loc, xi);
   2131	if (rc)
   2132		goto out;
   2133
   2134	if (loc->xl_entry) {
   2135		if (ocfs2_xa_can_reuse_entry(loc, xi)) {
   2136			orig_value_size = loc->xl_entry->xe_value_size;
   2137			rc = ocfs2_xa_reuse_entry(loc, xi, ctxt);
   2138			if (rc)
   2139				goto out;
   2140			goto alloc_value;
   2141		}
   2142
   2143		if (!ocfs2_xattr_is_local(loc->xl_entry)) {
   2144			orig_clusters = ocfs2_xa_value_clusters(loc);
   2145			rc = ocfs2_xa_value_truncate(loc, 0, ctxt);
   2146			if (rc) {
   2147				mlog_errno(rc);
   2148				ocfs2_xa_cleanup_value_truncate(loc,
   2149								"overwriting",
   2150								orig_clusters);
   2151				goto out;
   2152			}
   2153		}
   2154		ocfs2_xa_wipe_namevalue(loc);
   2155	} else
   2156		ocfs2_xa_add_entry(loc, name_hash);
   2157
   2158	/*
   2159	 * If we get here, we have a blank entry.  Fill it.  We grow our
   2160	 * name+value pair back from the end.
   2161	 */
   2162	ocfs2_xa_add_namevalue(loc, xi);
   2163	if (xi->xi_value_len > OCFS2_XATTR_INLINE_SIZE)
   2164		ocfs2_xa_install_value_root(loc);
   2165
   2166alloc_value:
   2167	if (xi->xi_value_len > OCFS2_XATTR_INLINE_SIZE) {
   2168		orig_clusters = ocfs2_xa_value_clusters(loc);
   2169		rc = ocfs2_xa_value_truncate(loc, xi->xi_value_len, ctxt);
   2170		if (rc < 0) {
   2171			ctxt->set_abort = 1;
   2172			ocfs2_xa_cleanup_value_truncate(loc, "growing",
   2173							orig_clusters);
   2174			/*
   2175			 * If we were growing an existing value,
   2176			 * ocfs2_xa_cleanup_value_truncate() won't remove
   2177			 * the entry. We need to restore the original value
   2178			 * size.
   2179			 */
   2180			if (loc->xl_entry) {
   2181				BUG_ON(!orig_value_size);
   2182				loc->xl_entry->xe_value_size = orig_value_size;
   2183			}
   2184			mlog_errno(rc);
   2185		}
   2186	}
   2187
   2188out:
   2189	return rc;
   2190}
   2191
   2192/*
   2193 * Store the value portion of the name+value pair.  This will skip
   2194 * values that are stored externally.  Their tree roots were set up
   2195 * by ocfs2_xa_prepare_entry().
   2196 */
   2197static int ocfs2_xa_store_value(struct ocfs2_xa_loc *loc,
   2198				struct ocfs2_xattr_info *xi,
   2199				struct ocfs2_xattr_set_ctxt *ctxt)
   2200{
   2201	int rc = 0;
   2202	int nameval_offset = le16_to_cpu(loc->xl_entry->xe_name_offset);
   2203	int name_size = OCFS2_XATTR_SIZE(xi->xi_name_len);
   2204	char *nameval_buf;
   2205	struct ocfs2_xattr_value_buf vb;
   2206
   2207	nameval_buf = ocfs2_xa_offset_pointer(loc, nameval_offset);
   2208	if (xi->xi_value_len > OCFS2_XATTR_INLINE_SIZE) {
   2209		ocfs2_xa_fill_value_buf(loc, &vb);
   2210		rc = __ocfs2_xattr_set_value_outside(loc->xl_inode,
   2211						     ctxt->handle, &vb,
   2212						     xi->xi_value,
   2213						     xi->xi_value_len);
   2214	} else
   2215		memcpy(nameval_buf + name_size, xi->xi_value, xi->xi_value_len);
   2216
   2217	return rc;
   2218}
   2219
   2220static int ocfs2_xa_set(struct ocfs2_xa_loc *loc,
   2221			struct ocfs2_xattr_info *xi,
   2222			struct ocfs2_xattr_set_ctxt *ctxt)
   2223{
   2224	int ret;
   2225	u32 name_hash = ocfs2_xattr_name_hash(loc->xl_inode, xi->xi_name,
   2226					      xi->xi_name_len);
   2227
   2228	ret = ocfs2_xa_journal_access(ctxt->handle, loc,
   2229				      OCFS2_JOURNAL_ACCESS_WRITE);
   2230	if (ret) {
   2231		mlog_errno(ret);
   2232		goto out;
   2233	}
   2234
   2235	/*
   2236	 * From here on out, everything is going to modify the buffer a
   2237	 * little.  Errors are going to leave the xattr header in a
   2238	 * sane state.  Thus, even with errors we dirty the sucker.
   2239	 */
   2240
   2241	/* Don't worry, we are never called with !xi_value and !xl_entry */
   2242	if (!xi->xi_value) {
   2243		ret = ocfs2_xa_remove(loc, ctxt);
   2244		goto out_dirty;
   2245	}
   2246
   2247	ret = ocfs2_xa_prepare_entry(loc, xi, name_hash, ctxt);
   2248	if (ret) {
   2249		if (ret != -ENOSPC)
   2250			mlog_errno(ret);
   2251		goto out_dirty;
   2252	}
   2253
   2254	ret = ocfs2_xa_store_value(loc, xi, ctxt);
   2255	if (ret)
   2256		mlog_errno(ret);
   2257
   2258out_dirty:
   2259	ocfs2_xa_journal_dirty(ctxt->handle, loc);
   2260
   2261out:
   2262	return ret;
   2263}
   2264
   2265static void ocfs2_init_dinode_xa_loc(struct ocfs2_xa_loc *loc,
   2266				     struct inode *inode,
   2267				     struct buffer_head *bh,
   2268				     struct ocfs2_xattr_entry *entry)
   2269{
   2270	struct ocfs2_dinode *di = (struct ocfs2_dinode *)bh->b_data;
   2271
   2272	BUG_ON(!(OCFS2_I(inode)->ip_dyn_features & OCFS2_INLINE_XATTR_FL));
   2273
   2274	loc->xl_inode = inode;
   2275	loc->xl_ops = &ocfs2_xa_block_loc_ops;
   2276	loc->xl_storage = bh;
   2277	loc->xl_entry = entry;
   2278	loc->xl_size = le16_to_cpu(di->i_xattr_inline_size);
   2279	loc->xl_header =
   2280		(struct ocfs2_xattr_header *)(bh->b_data + bh->b_size -
   2281					      loc->xl_size);
   2282}
   2283
   2284static void ocfs2_init_xattr_block_xa_loc(struct ocfs2_xa_loc *loc,
   2285					  struct inode *inode,
   2286					  struct buffer_head *bh,
   2287					  struct ocfs2_xattr_entry *entry)
   2288{
   2289	struct ocfs2_xattr_block *xb =
   2290		(struct ocfs2_xattr_block *)bh->b_data;
   2291
   2292	BUG_ON(le16_to_cpu(xb->xb_flags) & OCFS2_XATTR_INDEXED);
   2293
   2294	loc->xl_inode = inode;
   2295	loc->xl_ops = &ocfs2_xa_block_loc_ops;
   2296	loc->xl_storage = bh;
   2297	loc->xl_header = &(xb->xb_attrs.xb_header);
   2298	loc->xl_entry = entry;
   2299	loc->xl_size = bh->b_size - offsetof(struct ocfs2_xattr_block,
   2300					     xb_attrs.xb_header);
   2301}
   2302
   2303static void ocfs2_init_xattr_bucket_xa_loc(struct ocfs2_xa_loc *loc,
   2304					   struct ocfs2_xattr_bucket *bucket,
   2305					   struct ocfs2_xattr_entry *entry)
   2306{
   2307	loc->xl_inode = bucket->bu_inode;
   2308	loc->xl_ops = &ocfs2_xa_bucket_loc_ops;
   2309	loc->xl_storage = bucket;
   2310	loc->xl_header = bucket_xh(bucket);
   2311	loc->xl_entry = entry;
   2312	loc->xl_size = OCFS2_XATTR_BUCKET_SIZE;
   2313}
   2314
   2315/*
   2316 * In xattr remove, if it is stored outside and refcounted, we may have
   2317 * the chance to split the refcount tree. So need the allocators.
   2318 */
   2319static int ocfs2_lock_xattr_remove_allocators(struct inode *inode,
   2320					struct ocfs2_xattr_value_root *xv,
   2321					struct ocfs2_caching_info *ref_ci,
   2322					struct buffer_head *ref_root_bh,
   2323					struct ocfs2_alloc_context **meta_ac,
   2324					int *ref_credits)
   2325{
   2326	int ret, meta_add = 0;
   2327	u32 p_cluster, num_clusters;
   2328	unsigned int ext_flags;
   2329
   2330	*ref_credits = 0;
   2331	ret = ocfs2_xattr_get_clusters(inode, 0, &p_cluster,
   2332				       &num_clusters,
   2333				       &xv->xr_list,
   2334				       &ext_flags);
   2335	if (ret) {
   2336		mlog_errno(ret);
   2337		goto out;
   2338	}
   2339
   2340	if (!(ext_flags & OCFS2_EXT_REFCOUNTED))
   2341		goto out;
   2342
   2343	ret = ocfs2_refcounted_xattr_delete_need(inode, ref_ci,
   2344						 ref_root_bh, xv,
   2345						 &meta_add, ref_credits);
   2346	if (ret) {
   2347		mlog_errno(ret);
   2348		goto out;
   2349	}
   2350
   2351	ret = ocfs2_reserve_new_metadata_blocks(OCFS2_SB(inode->i_sb),
   2352						meta_add, meta_ac);
   2353	if (ret)
   2354		mlog_errno(ret);
   2355
   2356out:
   2357	return ret;
   2358}
   2359
   2360static int ocfs2_remove_value_outside(struct inode*inode,
   2361				      struct ocfs2_xattr_value_buf *vb,
   2362				      struct ocfs2_xattr_header *header,
   2363				      struct ocfs2_caching_info *ref_ci,
   2364				      struct buffer_head *ref_root_bh)
   2365{
   2366	int ret = 0, i, ref_credits;
   2367	struct ocfs2_super *osb = OCFS2_SB(inode->i_sb);
   2368	struct ocfs2_xattr_set_ctxt ctxt = { NULL, NULL, };
   2369	void *val;
   2370
   2371	ocfs2_init_dealloc_ctxt(&ctxt.dealloc);
   2372
   2373	for (i = 0; i < le16_to_cpu(header->xh_count); i++) {
   2374		struct ocfs2_xattr_entry *entry = &header->xh_entries[i];
   2375
   2376		if (ocfs2_xattr_is_local(entry))
   2377			continue;
   2378
   2379		val = (void *)header +
   2380			le16_to_cpu(entry->xe_name_offset);
   2381		vb->vb_xv = (struct ocfs2_xattr_value_root *)
   2382			(val + OCFS2_XATTR_SIZE(entry->xe_name_len));
   2383
   2384		ret = ocfs2_lock_xattr_remove_allocators(inode, vb->vb_xv,
   2385							 ref_ci, ref_root_bh,
   2386							 &ctxt.meta_ac,
   2387							 &ref_credits);
   2388
   2389		ctxt.handle = ocfs2_start_trans(osb, ref_credits +
   2390					ocfs2_remove_extent_credits(osb->sb));
   2391		if (IS_ERR(ctxt.handle)) {
   2392			ret = PTR_ERR(ctxt.handle);
   2393			mlog_errno(ret);
   2394			break;
   2395		}
   2396
   2397		ret = ocfs2_xattr_value_truncate(inode, vb, 0, &ctxt);
   2398
   2399		ocfs2_commit_trans(osb, ctxt.handle);
   2400		if (ctxt.meta_ac) {
   2401			ocfs2_free_alloc_context(ctxt.meta_ac);
   2402			ctxt.meta_ac = NULL;
   2403		}
   2404
   2405		if (ret < 0) {
   2406			mlog_errno(ret);
   2407			break;
   2408		}
   2409
   2410	}
   2411
   2412	if (ctxt.meta_ac)
   2413		ocfs2_free_alloc_context(ctxt.meta_ac);
   2414	ocfs2_schedule_truncate_log_flush(osb, 1);
   2415	ocfs2_run_deallocs(osb, &ctxt.dealloc);
   2416	return ret;
   2417}
   2418
   2419static int ocfs2_xattr_ibody_remove(struct inode *inode,
   2420				    struct buffer_head *di_bh,
   2421				    struct ocfs2_caching_info *ref_ci,
   2422				    struct buffer_head *ref_root_bh)
   2423{
   2424
   2425	struct ocfs2_dinode *di = (struct ocfs2_dinode *)di_bh->b_data;
   2426	struct ocfs2_xattr_header *header;
   2427	int ret;
   2428	struct ocfs2_xattr_value_buf vb = {
   2429		.vb_bh = di_bh,
   2430		.vb_access = ocfs2_journal_access_di,
   2431	};
   2432
   2433	header = (struct ocfs2_xattr_header *)
   2434		 ((void *)di + inode->i_sb->s_blocksize -
   2435		 le16_to_cpu(di->i_xattr_inline_size));
   2436
   2437	ret = ocfs2_remove_value_outside(inode, &vb, header,
   2438					 ref_ci, ref_root_bh);
   2439
   2440	return ret;
   2441}
   2442
   2443struct ocfs2_rm_xattr_bucket_para {
   2444	struct ocfs2_caching_info *ref_ci;
   2445	struct buffer_head *ref_root_bh;
   2446};
   2447
   2448static int ocfs2_xattr_block_remove(struct inode *inode,
   2449				    struct buffer_head *blk_bh,
   2450				    struct ocfs2_caching_info *ref_ci,
   2451				    struct buffer_head *ref_root_bh)
   2452{
   2453	struct ocfs2_xattr_block *xb;
   2454	int ret = 0;
   2455	struct ocfs2_xattr_value_buf vb = {
   2456		.vb_bh = blk_bh,
   2457		.vb_access = ocfs2_journal_access_xb,
   2458	};
   2459	struct ocfs2_rm_xattr_bucket_para args = {
   2460		.ref_ci = ref_ci,
   2461		.ref_root_bh = ref_root_bh,
   2462	};
   2463
   2464	xb = (struct ocfs2_xattr_block *)blk_bh->b_data;
   2465	if (!(le16_to_cpu(xb->xb_flags) & OCFS2_XATTR_INDEXED)) {
   2466		struct ocfs2_xattr_header *header = &(xb->xb_attrs.xb_header);
   2467		ret = ocfs2_remove_value_outside(inode, &vb, header,
   2468						 ref_ci, ref_root_bh);
   2469	} else
   2470		ret = ocfs2_iterate_xattr_index_block(inode,
   2471						blk_bh,
   2472						ocfs2_rm_xattr_cluster,
   2473						&args);
   2474
   2475	return ret;
   2476}
   2477
   2478static int ocfs2_xattr_free_block(struct inode *inode,
   2479				  u64 block,
   2480				  struct ocfs2_caching_info *ref_ci,
   2481				  struct buffer_head *ref_root_bh)
   2482{
   2483	struct inode *xb_alloc_inode;
   2484	struct buffer_head *xb_alloc_bh = NULL;
   2485	struct buffer_head *blk_bh = NULL;
   2486	struct ocfs2_xattr_block *xb;
   2487	struct ocfs2_super *osb = OCFS2_SB(inode->i_sb);
   2488	handle_t *handle;
   2489	int ret = 0;
   2490	u64 blk, bg_blkno;
   2491	u16 bit;
   2492
   2493	ret = ocfs2_read_xattr_block(inode, block, &blk_bh);
   2494	if (ret < 0) {
   2495		mlog_errno(ret);
   2496		goto out;
   2497	}
   2498
   2499	ret = ocfs2_xattr_block_remove(inode, blk_bh, ref_ci, ref_root_bh);
   2500	if (ret < 0) {
   2501		mlog_errno(ret);
   2502		goto out;
   2503	}
   2504
   2505	xb = (struct ocfs2_xattr_block *)blk_bh->b_data;
   2506	blk = le64_to_cpu(xb->xb_blkno);
   2507	bit = le16_to_cpu(xb->xb_suballoc_bit);
   2508	if (xb->xb_suballoc_loc)
   2509		bg_blkno = le64_to_cpu(xb->xb_suballoc_loc);
   2510	else
   2511		bg_blkno = ocfs2_which_suballoc_group(blk, bit);
   2512
   2513	xb_alloc_inode = ocfs2_get_system_file_inode(osb,
   2514				EXTENT_ALLOC_SYSTEM_INODE,
   2515				le16_to_cpu(xb->xb_suballoc_slot));
   2516	if (!xb_alloc_inode) {
   2517		ret = -ENOMEM;
   2518		mlog_errno(ret);
   2519		goto out;
   2520	}
   2521	inode_lock(xb_alloc_inode);
   2522
   2523	ret = ocfs2_inode_lock(xb_alloc_inode, &xb_alloc_bh, 1);
   2524	if (ret < 0) {
   2525		mlog_errno(ret);
   2526		goto out_mutex;
   2527	}
   2528
   2529	handle = ocfs2_start_trans(osb, OCFS2_SUBALLOC_FREE);
   2530	if (IS_ERR(handle)) {
   2531		ret = PTR_ERR(handle);
   2532		mlog_errno(ret);
   2533		goto out_unlock;
   2534	}
   2535
   2536	ret = ocfs2_free_suballoc_bits(handle, xb_alloc_inode, xb_alloc_bh,
   2537				       bit, bg_blkno, 1);
   2538	if (ret < 0)
   2539		mlog_errno(ret);
   2540
   2541	ocfs2_commit_trans(osb, handle);
   2542out_unlock:
   2543	ocfs2_inode_unlock(xb_alloc_inode, 1);
   2544	brelse(xb_alloc_bh);
   2545out_mutex:
   2546	inode_unlock(xb_alloc_inode);
   2547	iput(xb_alloc_inode);
   2548out:
   2549	brelse(blk_bh);
   2550	return ret;
   2551}
   2552
   2553/*
   2554 * ocfs2_xattr_remove()
   2555 *
   2556 * Free extended attribute resources associated with this inode.
   2557 */
   2558int ocfs2_xattr_remove(struct inode *inode, struct buffer_head *di_bh)
   2559{
   2560	struct ocfs2_inode_info *oi = OCFS2_I(inode);
   2561	struct ocfs2_dinode *di = (struct ocfs2_dinode *)di_bh->b_data;
   2562	struct ocfs2_refcount_tree *ref_tree = NULL;
   2563	struct buffer_head *ref_root_bh = NULL;
   2564	struct ocfs2_caching_info *ref_ci = NULL;
   2565	handle_t *handle;
   2566	int ret;
   2567
   2568	if (!ocfs2_supports_xattr(OCFS2_SB(inode->i_sb)))
   2569		return 0;
   2570
   2571	if (!(oi->ip_dyn_features & OCFS2_HAS_XATTR_FL))
   2572		return 0;
   2573
   2574	if (ocfs2_is_refcount_inode(inode)) {
   2575		ret = ocfs2_lock_refcount_tree(OCFS2_SB(inode->i_sb),
   2576					       le64_to_cpu(di->i_refcount_loc),
   2577					       1, &ref_tree, &ref_root_bh);
   2578		if (ret) {
   2579			mlog_errno(ret);
   2580			goto out;
   2581		}
   2582		ref_ci = &ref_tree->rf_ci;
   2583
   2584	}
   2585
   2586	if (oi->ip_dyn_features & OCFS2_INLINE_XATTR_FL) {
   2587		ret = ocfs2_xattr_ibody_remove(inode, di_bh,
   2588					       ref_ci, ref_root_bh);
   2589		if (ret < 0) {
   2590			mlog_errno(ret);
   2591			goto out;
   2592		}
   2593	}
   2594
   2595	if (di->i_xattr_loc) {
   2596		ret = ocfs2_xattr_free_block(inode,
   2597					     le64_to_cpu(di->i_xattr_loc),
   2598					     ref_ci, ref_root_bh);
   2599		if (ret < 0) {
   2600			mlog_errno(ret);
   2601			goto out;
   2602		}
   2603	}
   2604
   2605	handle = ocfs2_start_trans((OCFS2_SB(inode->i_sb)),
   2606				   OCFS2_INODE_UPDATE_CREDITS);
   2607	if (IS_ERR(handle)) {
   2608		ret = PTR_ERR(handle);
   2609		mlog_errno(ret);
   2610		goto out;
   2611	}
   2612	ret = ocfs2_journal_access_di(handle, INODE_CACHE(inode), di_bh,
   2613				      OCFS2_JOURNAL_ACCESS_WRITE);
   2614	if (ret) {
   2615		mlog_errno(ret);
   2616		goto out_commit;
   2617	}
   2618
   2619	di->i_xattr_loc = 0;
   2620
   2621	spin_lock(&oi->ip_lock);
   2622	oi->ip_dyn_features &= ~(OCFS2_INLINE_XATTR_FL | OCFS2_HAS_XATTR_FL);
   2623	di->i_dyn_features = cpu_to_le16(oi->ip_dyn_features);
   2624	spin_unlock(&oi->ip_lock);
   2625	ocfs2_update_inode_fsync_trans(handle, inode, 0);
   2626
   2627	ocfs2_journal_dirty(handle, di_bh);
   2628out_commit:
   2629	ocfs2_commit_trans(OCFS2_SB(inode->i_sb), handle);
   2630out:
   2631	if (ref_tree)
   2632		ocfs2_unlock_refcount_tree(OCFS2_SB(inode->i_sb), ref_tree, 1);
   2633	brelse(ref_root_bh);
   2634	return ret;
   2635}
   2636
   2637static int ocfs2_xattr_has_space_inline(struct inode *inode,
   2638					struct ocfs2_dinode *di)
   2639{
   2640	struct ocfs2_inode_info *oi = OCFS2_I(inode);
   2641	unsigned int xattrsize = OCFS2_SB(inode->i_sb)->s_xattr_inline_size;
   2642	int free;
   2643
   2644	if (xattrsize < OCFS2_MIN_XATTR_INLINE_SIZE)
   2645		return 0;
   2646
   2647	if (oi->ip_dyn_features & OCFS2_INLINE_DATA_FL) {
   2648		struct ocfs2_inline_data *idata = &di->id2.i_data;
   2649		free = le16_to_cpu(idata->id_count) - le64_to_cpu(di->i_size);
   2650	} else if (ocfs2_inode_is_fast_symlink(inode)) {
   2651		free = ocfs2_fast_symlink_chars(inode->i_sb) -
   2652			le64_to_cpu(di->i_size);
   2653	} else {
   2654		struct ocfs2_extent_list *el = &di->id2.i_list;
   2655		free = (le16_to_cpu(el->l_count) -
   2656			le16_to_cpu(el->l_next_free_rec)) *
   2657			sizeof(struct ocfs2_extent_rec);
   2658	}
   2659	if (free >= xattrsize)
   2660		return 1;
   2661
   2662	return 0;
   2663}
   2664
   2665/*
   2666 * ocfs2_xattr_ibody_find()
   2667 *
   2668 * Find extended attribute in inode block and
   2669 * fill search info into struct ocfs2_xattr_search.
   2670 */
   2671static int ocfs2_xattr_ibody_find(struct inode *inode,
   2672				  int name_index,
   2673				  const char *name,
   2674				  struct ocfs2_xattr_search *xs)
   2675{
   2676	struct ocfs2_inode_info *oi = OCFS2_I(inode);
   2677	struct ocfs2_dinode *di = (struct ocfs2_dinode *)xs->inode_bh->b_data;
   2678	int ret;
   2679	int has_space = 0;
   2680
   2681	if (inode->i_sb->s_blocksize == OCFS2_MIN_BLOCKSIZE)
   2682		return 0;
   2683
   2684	if (!(oi->ip_dyn_features & OCFS2_INLINE_XATTR_FL)) {
   2685		down_read(&oi->ip_alloc_sem);
   2686		has_space = ocfs2_xattr_has_space_inline(inode, di);
   2687		up_read(&oi->ip_alloc_sem);
   2688		if (!has_space)
   2689			return 0;
   2690	}
   2691
   2692	xs->xattr_bh = xs->inode_bh;
   2693	xs->end = (void *)di + inode->i_sb->s_blocksize;
   2694	if (oi->ip_dyn_features & OCFS2_INLINE_XATTR_FL)
   2695		xs->header = (struct ocfs2_xattr_header *)
   2696			(xs->end - le16_to_cpu(di->i_xattr_inline_size));
   2697	else
   2698		xs->header = (struct ocfs2_xattr_header *)
   2699			(xs->end - OCFS2_SB(inode->i_sb)->s_xattr_inline_size);
   2700	xs->base = (void *)xs->header;
   2701	xs->here = xs->header->xh_entries;
   2702
   2703	/* Find the named attribute. */
   2704	if (oi->ip_dyn_features & OCFS2_INLINE_XATTR_FL) {
   2705		ret = ocfs2_xattr_find_entry(name_index, name, xs);
   2706		if (ret && ret != -ENODATA)
   2707			return ret;
   2708		xs->not_found = ret;
   2709	}
   2710
   2711	return 0;
   2712}
   2713
   2714static int ocfs2_xattr_ibody_init(struct inode *inode,
   2715				  struct buffer_head *di_bh,
   2716				  struct ocfs2_xattr_set_ctxt *ctxt)
   2717{
   2718	int ret;
   2719	struct ocfs2_inode_info *oi = OCFS2_I(inode);
   2720	struct ocfs2_dinode *di = (struct ocfs2_dinode *)di_bh->b_data;
   2721	struct ocfs2_super *osb = OCFS2_SB(inode->i_sb);
   2722	unsigned int xattrsize = osb->s_xattr_inline_size;
   2723
   2724	if (!ocfs2_xattr_has_space_inline(inode, di)) {
   2725		ret = -ENOSPC;
   2726		goto out;
   2727	}
   2728
   2729	ret = ocfs2_journal_access_di(ctxt->handle, INODE_CACHE(inode), di_bh,
   2730				      OCFS2_JOURNAL_ACCESS_WRITE);
   2731	if (ret) {
   2732		mlog_errno(ret);
   2733		goto out;
   2734	}
   2735
   2736	/*
   2737	 * Adjust extent record count or inline data size
   2738	 * to reserve space for extended attribute.
   2739	 */
   2740	if (oi->ip_dyn_features & OCFS2_INLINE_DATA_FL) {
   2741		struct ocfs2_inline_data *idata = &di->id2.i_data;
   2742		le16_add_cpu(&idata->id_count, -xattrsize);
   2743	} else if (!(ocfs2_inode_is_fast_symlink(inode))) {
   2744		struct ocfs2_extent_list *el = &di->id2.i_list;
   2745		le16_add_cpu(&el->l_count, -(xattrsize /
   2746					     sizeof(struct ocfs2_extent_rec)));
   2747	}
   2748	di->i_xattr_inline_size = cpu_to_le16(xattrsize);
   2749
   2750	spin_lock(&oi->ip_lock);
   2751	oi->ip_dyn_features |= OCFS2_INLINE_XATTR_FL|OCFS2_HAS_XATTR_FL;
   2752	di->i_dyn_features = cpu_to_le16(oi->ip_dyn_features);
   2753	spin_unlock(&oi->ip_lock);
   2754
   2755	ocfs2_journal_dirty(ctxt->handle, di_bh);
   2756
   2757out:
   2758	return ret;
   2759}
   2760
   2761/*
   2762 * ocfs2_xattr_ibody_set()
   2763 *
   2764 * Set, replace or remove an extended attribute into inode block.
   2765 *
   2766 */
   2767static int ocfs2_xattr_ibody_set(struct inode *inode,
   2768				 struct ocfs2_xattr_info *xi,
   2769				 struct ocfs2_xattr_search *xs,
   2770				 struct ocfs2_xattr_set_ctxt *ctxt)
   2771{
   2772	int ret;
   2773	struct ocfs2_inode_info *oi = OCFS2_I(inode);
   2774	struct ocfs2_xa_loc loc;
   2775
   2776	if (inode->i_sb->s_blocksize == OCFS2_MIN_BLOCKSIZE)
   2777		return -ENOSPC;
   2778
   2779	down_write(&oi->ip_alloc_sem);
   2780	if (!(oi->ip_dyn_features & OCFS2_INLINE_XATTR_FL)) {
   2781		ret = ocfs2_xattr_ibody_init(inode, xs->inode_bh, ctxt);
   2782		if (ret) {
   2783			if (ret != -ENOSPC)
   2784				mlog_errno(ret);
   2785			goto out;
   2786		}
   2787	}
   2788
   2789	ocfs2_init_dinode_xa_loc(&loc, inode, xs->inode_bh,
   2790				 xs->not_found ? NULL : xs->here);
   2791	ret = ocfs2_xa_set(&loc, xi, ctxt);
   2792	if (ret) {
   2793		if (ret != -ENOSPC)
   2794			mlog_errno(ret);
   2795		goto out;
   2796	}
   2797	xs->here = loc.xl_entry;
   2798
   2799out:
   2800	up_write(&oi->ip_alloc_sem);
   2801
   2802	return ret;
   2803}
   2804
   2805/*
   2806 * ocfs2_xattr_block_find()
   2807 *
   2808 * Find extended attribute in external block and
   2809 * fill search info into struct ocfs2_xattr_search.
   2810 */
   2811static int ocfs2_xattr_block_find(struct inode *inode,
   2812				  int name_index,
   2813				  const char *name,
   2814				  struct ocfs2_xattr_search *xs)
   2815{
   2816	struct ocfs2_dinode *di = (struct ocfs2_dinode *)xs->inode_bh->b_data;
   2817	struct buffer_head *blk_bh = NULL;
   2818	struct ocfs2_xattr_block *xb;
   2819	int ret = 0;
   2820
   2821	if (!di->i_xattr_loc)
   2822		return ret;
   2823
   2824	ret = ocfs2_read_xattr_block(inode, le64_to_cpu(di->i_xattr_loc),
   2825				     &blk_bh);
   2826	if (ret < 0) {
   2827		mlog_errno(ret);
   2828		return ret;
   2829	}
   2830
   2831	xs->xattr_bh = blk_bh;
   2832	xb = (struct ocfs2_xattr_block *)blk_bh->b_data;
   2833
   2834	if (!(le16_to_cpu(xb->xb_flags) & OCFS2_XATTR_INDEXED)) {
   2835		xs->header = &xb->xb_attrs.xb_header;
   2836		xs->base = (void *)xs->header;
   2837		xs->end = (void *)(blk_bh->b_data) + blk_bh->b_size;
   2838		xs->here = xs->header->xh_entries;
   2839
   2840		ret = ocfs2_xattr_find_entry(name_index, name, xs);
   2841	} else
   2842		ret = ocfs2_xattr_index_block_find(inode, blk_bh,
   2843						   name_index,
   2844						   name, xs);
   2845
   2846	if (ret && ret != -ENODATA) {
   2847		xs->xattr_bh = NULL;
   2848		goto cleanup;
   2849	}
   2850	xs->not_found = ret;
   2851	return 0;
   2852cleanup:
   2853	brelse(blk_bh);
   2854
   2855	return ret;
   2856}
   2857
   2858static int ocfs2_create_xattr_block(struct inode *inode,
   2859				    struct buffer_head *inode_bh,
   2860				    struct ocfs2_xattr_set_ctxt *ctxt,
   2861				    int indexed,
   2862				    struct buffer_head **ret_bh)
   2863{
   2864	int ret;
   2865	u16 suballoc_bit_start;
   2866	u32 num_got;
   2867	u64 suballoc_loc, first_blkno;
   2868	struct ocfs2_dinode *di =  (struct ocfs2_dinode *)inode_bh->b_data;
   2869	struct buffer_head *new_bh = NULL;
   2870	struct ocfs2_xattr_block *xblk;
   2871
   2872	ret = ocfs2_journal_access_di(ctxt->handle, INODE_CACHE(inode),
   2873				      inode_bh, OCFS2_JOURNAL_ACCESS_CREATE);
   2874	if (ret < 0) {
   2875		mlog_errno(ret);
   2876		goto end;
   2877	}
   2878
   2879	ret = ocfs2_claim_metadata(ctxt->handle, ctxt->meta_ac, 1,
   2880				   &suballoc_loc, &suballoc_bit_start,
   2881				   &num_got, &first_blkno);
   2882	if (ret < 0) {
   2883		mlog_errno(ret);
   2884		goto end;
   2885	}
   2886
   2887	new_bh = sb_getblk(inode->i_sb, first_blkno);
   2888	if (!new_bh) {
   2889		ret = -ENOMEM;
   2890		mlog_errno(ret);
   2891		goto end;
   2892	}
   2893
   2894	ocfs2_set_new_buffer_uptodate(INODE_CACHE(inode), new_bh);
   2895
   2896	ret = ocfs2_journal_access_xb(ctxt->handle, INODE_CACHE(inode),
   2897				      new_bh,
   2898				      OCFS2_JOURNAL_ACCESS_CREATE);
   2899	if (ret < 0) {
   2900		mlog_errno(ret);
   2901		goto end;
   2902	}
   2903
   2904	/* Initialize ocfs2_xattr_block */
   2905	xblk = (struct ocfs2_xattr_block *)new_bh->b_data;
   2906	memset(xblk, 0, inode->i_sb->s_blocksize);
   2907	strcpy((void *)xblk, OCFS2_XATTR_BLOCK_SIGNATURE);
   2908	xblk->xb_suballoc_slot = cpu_to_le16(ctxt->meta_ac->ac_alloc_slot);
   2909	xblk->xb_suballoc_loc = cpu_to_le64(suballoc_loc);
   2910	xblk->xb_suballoc_bit = cpu_to_le16(suballoc_bit_start);
   2911	xblk->xb_fs_generation =
   2912		cpu_to_le32(OCFS2_SB(inode->i_sb)->fs_generation);
   2913	xblk->xb_blkno = cpu_to_le64(first_blkno);
   2914	if (indexed) {
   2915		struct ocfs2_xattr_tree_root *xr = &xblk->xb_attrs.xb_root;
   2916		xr->xt_clusters = cpu_to_le32(1);
   2917		xr->xt_last_eb_blk = 0;
   2918		xr->xt_list.l_tree_depth = 0;
   2919		xr->xt_list.l_count = cpu_to_le16(
   2920					ocfs2_xattr_recs_per_xb(inode->i_sb));
   2921		xr->xt_list.l_next_free_rec = cpu_to_le16(1);
   2922		xblk->xb_flags = cpu_to_le16(OCFS2_XATTR_INDEXED);
   2923	}
   2924	ocfs2_journal_dirty(ctxt->handle, new_bh);
   2925
   2926	/* Add it to the inode */
   2927	di->i_xattr_loc = cpu_to_le64(first_blkno);
   2928
   2929	spin_lock(&OCFS2_I(inode)->ip_lock);
   2930	OCFS2_I(inode)->ip_dyn_features |= OCFS2_HAS_XATTR_FL;
   2931	di->i_dyn_features = cpu_to_le16(OCFS2_I(inode)->ip_dyn_features);
   2932	spin_unlock(&OCFS2_I(inode)->ip_lock);
   2933
   2934	ocfs2_journal_dirty(ctxt->handle, inode_bh);
   2935
   2936	*ret_bh = new_bh;
   2937	new_bh = NULL;
   2938
   2939end:
   2940	brelse(new_bh);
   2941	return ret;
   2942}
   2943
   2944/*
   2945 * ocfs2_xattr_block_set()
   2946 *
   2947 * Set, replace or remove an extended attribute into external block.
   2948 *
   2949 */
   2950static int ocfs2_xattr_block_set(struct inode *inode,
   2951				 struct ocfs2_xattr_info *xi,
   2952				 struct ocfs2_xattr_search *xs,
   2953				 struct ocfs2_xattr_set_ctxt *ctxt)
   2954{
   2955	struct buffer_head *new_bh = NULL;
   2956	struct ocfs2_xattr_block *xblk = NULL;
   2957	int ret;
   2958	struct ocfs2_xa_loc loc;
   2959
   2960	if (!xs->xattr_bh) {
   2961		ret = ocfs2_create_xattr_block(inode, xs->inode_bh, ctxt,
   2962					       0, &new_bh);
   2963		if (ret) {
   2964			mlog_errno(ret);
   2965			goto end;
   2966		}
   2967
   2968		xs->xattr_bh = new_bh;
   2969		xblk = (struct ocfs2_xattr_block *)xs->xattr_bh->b_data;
   2970		xs->header = &xblk->xb_attrs.xb_header;
   2971		xs->base = (void *)xs->header;
   2972		xs->end = (void *)xblk + inode->i_sb->s_blocksize;
   2973		xs->here = xs->header->xh_entries;
   2974	} else
   2975		xblk = (struct ocfs2_xattr_block *)xs->xattr_bh->b_data;
   2976
   2977	if (!(le16_to_cpu(xblk->xb_flags) & OCFS2_XATTR_INDEXED)) {
   2978		ocfs2_init_xattr_block_xa_loc(&loc, inode, xs->xattr_bh,
   2979					      xs->not_found ? NULL : xs->here);
   2980
   2981		ret = ocfs2_xa_set(&loc, xi, ctxt);
   2982		if (!ret)
   2983			xs->here = loc.xl_entry;
   2984		else if ((ret != -ENOSPC) || ctxt->set_abort)
   2985			goto end;
   2986		else {
   2987			ret = ocfs2_xattr_create_index_block(inode, xs, ctxt);
   2988			if (ret)
   2989				goto end;
   2990		}
   2991	}
   2992
   2993	if (le16_to_cpu(xblk->xb_flags) & OCFS2_XATTR_INDEXED)
   2994		ret = ocfs2_xattr_set_entry_index_block(inode, xi, xs, ctxt);
   2995
   2996end:
   2997	return ret;
   2998}
   2999
   3000/* Check whether the new xattr can be inserted into the inode. */
   3001static int ocfs2_xattr_can_be_in_inode(struct inode *inode,
   3002				       struct ocfs2_xattr_info *xi,
   3003				       struct ocfs2_xattr_search *xs)
   3004{
   3005	struct ocfs2_xattr_entry *last;
   3006	int free, i;
   3007	size_t min_offs = xs->end - xs->base;
   3008
   3009	if (!xs->header)
   3010		return 0;
   3011
   3012	last = xs->header->xh_entries;
   3013
   3014	for (i = 0; i < le16_to_cpu(xs->header->xh_count); i++) {
   3015		size_t offs = le16_to_cpu(last->xe_name_offset);
   3016		if (offs < min_offs)
   3017			min_offs = offs;
   3018		last += 1;
   3019	}
   3020
   3021	free = min_offs - ((void *)last - xs->base) - OCFS2_XATTR_HEADER_GAP;
   3022	if (free < 0)
   3023		return 0;
   3024
   3025	BUG_ON(!xs->not_found);
   3026
   3027	if (free >= (sizeof(struct ocfs2_xattr_entry) + namevalue_size_xi(xi)))
   3028		return 1;
   3029
   3030	return 0;
   3031}
   3032
   3033static int ocfs2_calc_xattr_set_need(struct inode *inode,
   3034				     struct ocfs2_dinode *di,
   3035				     struct ocfs2_xattr_info *xi,
   3036				     struct ocfs2_xattr_search *xis,
   3037				     struct ocfs2_xattr_search *xbs,
   3038				     int *clusters_need,
   3039				     int *meta_need,
   3040				     int *credits_need)
   3041{
   3042	int ret = 0, old_in_xb = 0;
   3043	int clusters_add = 0, meta_add = 0, credits = 0;
   3044	struct buffer_head *bh = NULL;
   3045	struct ocfs2_xattr_block *xb = NULL;
   3046	struct ocfs2_xattr_entry *xe = NULL;
   3047	struct ocfs2_xattr_value_root *xv = NULL;
   3048	char *base = NULL;
   3049	int name_offset, name_len = 0;
   3050	u32 new_clusters = ocfs2_clusters_for_bytes(inode->i_sb,
   3051						    xi->xi_value_len);
   3052	u64 value_size;
   3053
   3054	/*
   3055	 * Calculate the clusters we need to write.
   3056	 * No matter whether we replace an old one or add a new one,
   3057	 * we need this for writing.
   3058	 */
   3059	if (xi->xi_value_len > OCFS2_XATTR_INLINE_SIZE)
   3060		credits += new_clusters *
   3061			   ocfs2_clusters_to_blocks(inode->i_sb, 1);
   3062
   3063	if (xis->not_found && xbs->not_found) {
   3064		credits += ocfs2_blocks_per_xattr_bucket(inode->i_sb);
   3065
   3066		if (xi->xi_value_len > OCFS2_XATTR_INLINE_SIZE) {
   3067			clusters_add += new_clusters;
   3068			credits += ocfs2_calc_extend_credits(inode->i_sb,
   3069							&def_xv.xv.xr_list);
   3070		}
   3071
   3072		goto meta_guess;
   3073	}
   3074
   3075	if (!xis->not_found) {
   3076		xe = xis->here;
   3077		name_offset = le16_to_cpu(xe->xe_name_offset);
   3078		name_len = OCFS2_XATTR_SIZE(xe->xe_name_len);
   3079		base = xis->base;
   3080		credits += OCFS2_INODE_UPDATE_CREDITS;
   3081	} else {
   3082		int i, block_off = 0;
   3083		xb = (struct ocfs2_xattr_block *)xbs->xattr_bh->b_data;
   3084		xe = xbs->here;
   3085		name_offset = le16_to_cpu(xe->xe_name_offset);
   3086		name_len = OCFS2_XATTR_SIZE(xe->xe_name_len);
   3087		i = xbs->here - xbs->header->xh_entries;
   3088		old_in_xb = 1;
   3089
   3090		if (le16_to_cpu(xb->xb_flags) & OCFS2_XATTR_INDEXED) {
   3091			ret = ocfs2_xattr_bucket_get_name_value(inode->i_sb,
   3092							bucket_xh(xbs->bucket),
   3093							i, &block_off,
   3094							&name_offset);
   3095			base = bucket_block(xbs->bucket, block_off);
   3096			credits += ocfs2_blocks_per_xattr_bucket(inode->i_sb);
   3097		} else {
   3098			base = xbs->base;
   3099			credits += OCFS2_XATTR_BLOCK_UPDATE_CREDITS;
   3100		}
   3101	}
   3102
   3103	/*
   3104	 * delete a xattr doesn't need metadata and cluster allocation.
   3105	 * so just calculate the credits and return.
   3106	 *
   3107	 * The credits for removing the value tree will be extended
   3108	 * by ocfs2_remove_extent itself.
   3109	 */
   3110	if (!xi->xi_value) {
   3111		if (!ocfs2_xattr_is_local(xe))
   3112			credits += ocfs2_remove_extent_credits(inode->i_sb);
   3113
   3114		goto out;
   3115	}
   3116
   3117	/* do cluster allocation guess first. */
   3118	value_size = le64_to_cpu(xe->xe_value_size);
   3119
   3120	if (old_in_xb) {
   3121		/*
   3122		 * In xattr set, we always try to set the xe in inode first,
   3123		 * so if it can be inserted into inode successfully, the old
   3124		 * one will be removed from the xattr block, and this xattr
   3125		 * will be inserted into inode as a new xattr in inode.
   3126		 */
   3127		if (ocfs2_xattr_can_be_in_inode(inode, xi, xis)) {
   3128			clusters_add += new_clusters;
   3129			credits += ocfs2_remove_extent_credits(inode->i_sb) +
   3130				    OCFS2_INODE_UPDATE_CREDITS;
   3131			if (!ocfs2_xattr_is_local(xe))
   3132				credits += ocfs2_calc_extend_credits(
   3133							inode->i_sb,
   3134							&def_xv.xv.xr_list);
   3135			goto out;
   3136		}
   3137	}
   3138
   3139	if (xi->xi_value_len > OCFS2_XATTR_INLINE_SIZE) {
   3140		/* the new values will be stored outside. */
   3141		u32 old_clusters = 0;
   3142
   3143		if (!ocfs2_xattr_is_local(xe)) {
   3144			old_clusters =	ocfs2_clusters_for_bytes(inode->i_sb,
   3145								 value_size);
   3146			xv = (struct ocfs2_xattr_value_root *)
   3147			     (base + name_offset + name_len);
   3148			value_size = OCFS2_XATTR_ROOT_SIZE;
   3149		} else
   3150			xv = &def_xv.xv;
   3151
   3152		if (old_clusters >= new_clusters) {
   3153			credits += ocfs2_remove_extent_credits(inode->i_sb);
   3154			goto out;
   3155		} else {
   3156			meta_add += ocfs2_extend_meta_needed(&xv->xr_list);
   3157			clusters_add += new_clusters - old_clusters;
   3158			credits += ocfs2_calc_extend_credits(inode->i_sb,
   3159							     &xv->xr_list);
   3160			if (value_size >= OCFS2_XATTR_ROOT_SIZE)
   3161				goto out;
   3162		}
   3163	} else {
   3164		/*
   3165		 * Now the new value will be stored inside. So if the new
   3166		 * value is smaller than the size of value root or the old
   3167		 * value, we don't need any allocation, otherwise we have
   3168		 * to guess metadata allocation.
   3169		 */
   3170		if ((ocfs2_xattr_is_local(xe) &&
   3171		     (value_size >= xi->xi_value_len)) ||
   3172		    (!ocfs2_xattr_is_local(xe) &&
   3173		     OCFS2_XATTR_ROOT_SIZE >= xi->xi_value_len))
   3174			goto out;
   3175	}
   3176
   3177meta_guess:
   3178	/* calculate metadata allocation. */
   3179	if (di->i_xattr_loc) {
   3180		if (!xbs->xattr_bh) {
   3181			ret = ocfs2_read_xattr_block(inode,
   3182						     le64_to_cpu(di->i_xattr_loc),
   3183						     &bh);
   3184			if (ret) {
   3185				mlog_errno(ret);
   3186				goto out;
   3187			}
   3188
   3189			xb = (struct ocfs2_xattr_block *)bh->b_data;
   3190		} else
   3191			xb = (struct ocfs2_xattr_block *)xbs->xattr_bh->b_data;
   3192
   3193		/*
   3194		 * If there is already an xattr tree, good, we can calculate
   3195		 * like other b-trees. Otherwise we may have the chance of
   3196		 * create a tree, the credit calculation is borrowed from
   3197		 * ocfs2_calc_extend_credits with root_el = NULL. And the
   3198		 * new tree will be cluster based, so no meta is needed.
   3199		 */
   3200		if (le16_to_cpu(xb->xb_flags) & OCFS2_XATTR_INDEXED) {
   3201			struct ocfs2_extent_list *el =
   3202				 &xb->xb_attrs.xb_root.xt_list;
   3203			meta_add += ocfs2_extend_meta_needed(el);
   3204			credits += ocfs2_calc_extend_credits(inode->i_sb,
   3205							     el);
   3206		} else
   3207			credits += OCFS2_SUBALLOC_ALLOC + 1;
   3208
   3209		/*
   3210		 * This cluster will be used either for new bucket or for
   3211		 * new xattr block.
   3212		 * If the cluster size is the same as the bucket size, one
   3213		 * more is needed since we may need to extend the bucket
   3214		 * also.
   3215		 */
   3216		clusters_add += 1;
   3217		credits += ocfs2_blocks_per_xattr_bucket(inode->i_sb);
   3218		if (OCFS2_XATTR_BUCKET_SIZE ==
   3219			OCFS2_SB(inode->i_sb)->s_clustersize) {
   3220			credits += ocfs2_blocks_per_xattr_bucket(inode->i_sb);
   3221			clusters_add += 1;
   3222		}
   3223	} else {
   3224		credits += OCFS2_XATTR_BLOCK_CREATE_CREDITS;
   3225		if (xi->xi_value_len > OCFS2_XATTR_INLINE_SIZE) {
   3226			struct ocfs2_extent_list *el = &def_xv.xv.xr_list;
   3227			meta_add += ocfs2_extend_meta_needed(el);
   3228			credits += ocfs2_calc_extend_credits(inode->i_sb,
   3229							     el);
   3230		} else {
   3231			meta_add += 1;
   3232		}
   3233	}
   3234out:
   3235	if (clusters_need)
   3236		*clusters_need = clusters_add;
   3237	if (meta_need)
   3238		*meta_need = meta_add;
   3239	if (credits_need)
   3240		*credits_need = credits;
   3241	brelse(bh);
   3242	return ret;
   3243}
   3244
   3245static int ocfs2_init_xattr_set_ctxt(struct inode *inode,
   3246				     struct ocfs2_dinode *di,
   3247				     struct ocfs2_xattr_info *xi,
   3248				     struct ocfs2_xattr_search *xis,
   3249				     struct ocfs2_xattr_search *xbs,
   3250				     struct ocfs2_xattr_set_ctxt *ctxt,
   3251				     int extra_meta,
   3252				     int *credits)
   3253{
   3254	int clusters_add, meta_add, ret;
   3255	struct ocfs2_super *osb = OCFS2_SB(inode->i_sb);
   3256
   3257	memset(ctxt, 0, sizeof(struct ocfs2_xattr_set_ctxt));
   3258
   3259	ocfs2_init_dealloc_ctxt(&ctxt->dealloc);
   3260
   3261	ret = ocfs2_calc_xattr_set_need(inode, di, xi, xis, xbs,
   3262					&clusters_add, &meta_add, credits);
   3263	if (ret) {
   3264		mlog_errno(ret);
   3265		return ret;
   3266	}
   3267
   3268	meta_add += extra_meta;
   3269	trace_ocfs2_init_xattr_set_ctxt(xi->xi_name, meta_add,
   3270					clusters_add, *credits);
   3271
   3272	if (meta_add) {
   3273		ret = ocfs2_reserve_new_metadata_blocks(osb, meta_add,
   3274							&ctxt->meta_ac);
   3275		if (ret) {
   3276			mlog_errno(ret);
   3277			goto out;
   3278		}
   3279	}
   3280
   3281	if (clusters_add) {
   3282		ret = ocfs2_reserve_clusters(osb, clusters_add, &ctxt->data_ac);
   3283		if (ret)
   3284			mlog_errno(ret);
   3285	}
   3286out:
   3287	if (ret) {
   3288		if (ctxt->meta_ac) {
   3289			ocfs2_free_alloc_context(ctxt->meta_ac);
   3290			ctxt->meta_ac = NULL;
   3291		}
   3292
   3293		/*
   3294		 * We cannot have an error and a non null ctxt->data_ac.
   3295		 */
   3296	}
   3297
   3298	return ret;
   3299}
   3300
   3301static int __ocfs2_xattr_set_handle(struct inode *inode,
   3302				    struct ocfs2_dinode *di,
   3303				    struct ocfs2_xattr_info *xi,
   3304				    struct ocfs2_xattr_search *xis,
   3305				    struct ocfs2_xattr_search *xbs,
   3306				    struct ocfs2_xattr_set_ctxt *ctxt)
   3307{
   3308	int ret = 0, credits, old_found;
   3309
   3310	if (!xi->xi_value) {
   3311		/* Remove existing extended attribute */
   3312		if (!xis->not_found)
   3313			ret = ocfs2_xattr_ibody_set(inode, xi, xis, ctxt);
   3314		else if (!xbs->not_found)
   3315			ret = ocfs2_xattr_block_set(inode, xi, xbs, ctxt);
   3316	} else {
   3317		/* We always try to set extended attribute into inode first*/
   3318		ret = ocfs2_xattr_ibody_set(inode, xi, xis, ctxt);
   3319		if (!ret && !xbs->not_found) {
   3320			/*
   3321			 * If succeed and that extended attribute existing in
   3322			 * external block, then we will remove it.
   3323			 */
   3324			xi->xi_value = NULL;
   3325			xi->xi_value_len = 0;
   3326
   3327			old_found = xis->not_found;
   3328			xis->not_found = -ENODATA;
   3329			ret = ocfs2_calc_xattr_set_need(inode,
   3330							di,
   3331							xi,
   3332							xis,
   3333							xbs,
   3334							NULL,
   3335							NULL,
   3336							&credits);
   3337			xis->not_found = old_found;
   3338			if (ret) {
   3339				mlog_errno(ret);
   3340				goto out;
   3341			}
   3342
   3343			ret = ocfs2_extend_trans(ctxt->handle, credits);
   3344			if (ret) {
   3345				mlog_errno(ret);
   3346				goto out;
   3347			}
   3348			ret = ocfs2_xattr_block_set(inode, xi, xbs, ctxt);
   3349		} else if ((ret == -ENOSPC) && !ctxt->set_abort) {
   3350			if (di->i_xattr_loc && !xbs->xattr_bh) {
   3351				ret = ocfs2_xattr_block_find(inode,
   3352							     xi->xi_name_index,
   3353							     xi->xi_name, xbs);
   3354				if (ret)
   3355					goto out;
   3356
   3357				old_found = xis->not_found;
   3358				xis->not_found = -ENODATA;
   3359				ret = ocfs2_calc_xattr_set_need(inode,
   3360								di,
   3361								xi,
   3362								xis,
   3363								xbs,
   3364								NULL,
   3365								NULL,
   3366								&credits);
   3367				xis->not_found = old_found;
   3368				if (ret) {
   3369					mlog_errno(ret);
   3370					goto out;
   3371				}
   3372
   3373				ret = ocfs2_extend_trans(ctxt->handle, credits);
   3374				if (ret) {
   3375					mlog_errno(ret);
   3376					goto out;
   3377				}
   3378			}
   3379			/*
   3380			 * If no space in inode, we will set extended attribute
   3381			 * into external block.
   3382			 */
   3383			ret = ocfs2_xattr_block_set(inode, xi, xbs, ctxt);
   3384			if (ret)
   3385				goto out;
   3386			if (!xis->not_found) {
   3387				/*
   3388				 * If succeed and that extended attribute
   3389				 * existing in inode, we will remove it.
   3390				 */
   3391				xi->xi_value = NULL;
   3392				xi->xi_value_len = 0;
   3393				xbs->not_found = -ENODATA;
   3394				ret = ocfs2_calc_xattr_set_need(inode,
   3395								di,
   3396								xi,
   3397								xis,
   3398								xbs,
   3399								NULL,
   3400								NULL,
   3401								&credits);
   3402				if (ret) {
   3403					mlog_errno(ret);
   3404					goto out;
   3405				}
   3406
   3407				ret = ocfs2_extend_trans(ctxt->handle, credits);
   3408				if (ret) {
   3409					mlog_errno(ret);
   3410					goto out;
   3411				}
   3412				ret = ocfs2_xattr_ibody_set(inode, xi,
   3413							    xis, ctxt);
   3414			}
   3415		}
   3416	}
   3417
   3418	if (!ret) {
   3419		/* Update inode ctime. */
   3420		ret = ocfs2_journal_access_di(ctxt->handle, INODE_CACHE(inode),
   3421					      xis->inode_bh,
   3422					      OCFS2_JOURNAL_ACCESS_WRITE);
   3423		if (ret) {
   3424			mlog_errno(ret);
   3425			goto out;
   3426		}
   3427
   3428		inode->i_ctime = current_time(inode);
   3429		di->i_ctime = cpu_to_le64(inode->i_ctime.tv_sec);
   3430		di->i_ctime_nsec = cpu_to_le32(inode->i_ctime.tv_nsec);
   3431		ocfs2_journal_dirty(ctxt->handle, xis->inode_bh);
   3432	}
   3433out:
   3434	return ret;
   3435}
   3436
   3437/*
   3438 * This function only called duing creating inode
   3439 * for init security/acl xattrs of the new inode.
   3440 * All transanction credits have been reserved in mknod.
   3441 */
   3442int ocfs2_xattr_set_handle(handle_t *handle,
   3443			   struct inode *inode,
   3444			   struct buffer_head *di_bh,
   3445			   int name_index,
   3446			   const char *name,
   3447			   const void *value,
   3448			   size_t value_len,
   3449			   int flags,
   3450			   struct ocfs2_alloc_context *meta_ac,
   3451			   struct ocfs2_alloc_context *data_ac)
   3452{
   3453	struct ocfs2_dinode *di;
   3454	int ret;
   3455
   3456	struct ocfs2_xattr_info xi = {
   3457		.xi_name_index = name_index,
   3458		.xi_name = name,
   3459		.xi_name_len = strlen(name),
   3460		.xi_value = value,
   3461		.xi_value_len = value_len,
   3462	};
   3463
   3464	struct ocfs2_xattr_search xis = {
   3465		.not_found = -ENODATA,
   3466	};
   3467
   3468	struct ocfs2_xattr_search xbs = {
   3469		.not_found = -ENODATA,
   3470	};
   3471
   3472	struct ocfs2_xattr_set_ctxt ctxt = {
   3473		.handle = handle,
   3474		.meta_ac = meta_ac,
   3475		.data_ac = data_ac,
   3476	};
   3477
   3478	if (!ocfs2_supports_xattr(OCFS2_SB(inode->i_sb)))
   3479		return -EOPNOTSUPP;
   3480
   3481	/*
   3482	 * In extreme situation, may need xattr bucket when
   3483	 * block size is too small. And we have already reserved
   3484	 * the credits for bucket in mknod.
   3485	 */
   3486	if (inode->i_sb->s_blocksize == OCFS2_MIN_BLOCKSIZE) {
   3487		xbs.bucket = ocfs2_xattr_bucket_new(inode);
   3488		if (!xbs.bucket) {
   3489			mlog_errno(-ENOMEM);
   3490			return -ENOMEM;
   3491		}
   3492	}
   3493
   3494	xis.inode_bh = xbs.inode_bh = di_bh;
   3495	di = (struct ocfs2_dinode *)di_bh->b_data;
   3496
   3497	down_write(&OCFS2_I(inode)->ip_xattr_sem);
   3498
   3499	ret = ocfs2_xattr_ibody_find(inode, name_index, name, &xis);
   3500	if (ret)
   3501		goto cleanup;
   3502	if (xis.not_found) {
   3503		ret = ocfs2_xattr_block_find(inode, name_index, name, &xbs);
   3504		if (ret)
   3505			goto cleanup;
   3506	}
   3507
   3508	ret = __ocfs2_xattr_set_handle(inode, di, &xi, &xis, &xbs, &ctxt);
   3509
   3510cleanup:
   3511	up_write(&OCFS2_I(inode)->ip_xattr_sem);
   3512	brelse(xbs.xattr_bh);
   3513	ocfs2_xattr_bucket_free(xbs.bucket);
   3514
   3515	return ret;
   3516}
   3517
   3518/*
   3519 * ocfs2_xattr_set()
   3520 *
   3521 * Set, replace or remove an extended attribute for this inode.
   3522 * value is NULL to remove an existing extended attribute, else either
   3523 * create or replace an extended attribute.
   3524 */
   3525int ocfs2_xattr_set(struct inode *inode,
   3526		    int name_index,
   3527		    const char *name,
   3528		    const void *value,
   3529		    size_t value_len,
   3530		    int flags)
   3531{
   3532	struct buffer_head *di_bh = NULL;
   3533	struct ocfs2_dinode *di;
   3534	int ret, credits, had_lock, ref_meta = 0, ref_credits = 0;
   3535	struct ocfs2_super *osb = OCFS2_SB(inode->i_sb);
   3536	struct inode *tl_inode = osb->osb_tl_inode;
   3537	struct ocfs2_xattr_set_ctxt ctxt = { NULL, NULL, NULL, };
   3538	struct ocfs2_refcount_tree *ref_tree = NULL;
   3539	struct ocfs2_lock_holder oh;
   3540
   3541	struct ocfs2_xattr_info xi = {
   3542		.xi_name_index = name_index,
   3543		.xi_name = name,
   3544		.xi_name_len = strlen(name),
   3545		.xi_value = value,
   3546		.xi_value_len = value_len,
   3547	};
   3548
   3549	struct ocfs2_xattr_search xis = {
   3550		.not_found = -ENODATA,
   3551	};
   3552
   3553	struct ocfs2_xattr_search xbs = {
   3554		.not_found = -ENODATA,
   3555	};
   3556
   3557	if (!ocfs2_supports_xattr(osb))
   3558		return -EOPNOTSUPP;
   3559
   3560	/*
   3561	 * Only xbs will be used on indexed trees.  xis doesn't need a
   3562	 * bucket.
   3563	 */
   3564	xbs.bucket = ocfs2_xattr_bucket_new(inode);
   3565	if (!xbs.bucket) {
   3566		mlog_errno(-ENOMEM);
   3567		return -ENOMEM;
   3568	}
   3569
   3570	had_lock = ocfs2_inode_lock_tracker(inode, &di_bh, 1, &oh);
   3571	if (had_lock < 0) {
   3572		ret = had_lock;
   3573		mlog_errno(ret);
   3574		goto cleanup_nolock;
   3575	}
   3576	xis.inode_bh = xbs.inode_bh = di_bh;
   3577	di = (struct ocfs2_dinode *)di_bh->b_data;
   3578
   3579	down_write(&OCFS2_I(inode)->ip_xattr_sem);
   3580	/*
   3581	 * Scan inode and external block to find the same name
   3582	 * extended attribute and collect search information.
   3583	 */
   3584	ret = ocfs2_xattr_ibody_find(inode, name_index, name, &xis);
   3585	if (ret)
   3586		goto cleanup;
   3587	if (xis.not_found) {
   3588		ret = ocfs2_xattr_block_find(inode, name_index, name, &xbs);
   3589		if (ret)
   3590			goto cleanup;
   3591	}
   3592
   3593	if (xis.not_found && xbs.not_found) {
   3594		ret = -ENODATA;
   3595		if (flags & XATTR_REPLACE)
   3596			goto cleanup;
   3597		ret = 0;
   3598		if (!value)
   3599			goto cleanup;
   3600	} else {
   3601		ret = -EEXIST;
   3602		if (flags & XATTR_CREATE)
   3603			goto cleanup;
   3604	}
   3605
   3606	/* Check whether the value is refcounted and do some preparation. */
   3607	if (ocfs2_is_refcount_inode(inode) &&
   3608	    (!xis.not_found || !xbs.not_found)) {
   3609		ret = ocfs2_prepare_refcount_xattr(inode, di, &xi,
   3610						   &xis, &xbs, &ref_tree,
   3611						   &ref_meta, &ref_credits);
   3612		if (ret) {
   3613			mlog_errno(ret);
   3614			goto cleanup;
   3615		}
   3616	}
   3617
   3618	inode_lock(tl_inode);
   3619
   3620	if (ocfs2_truncate_log_needs_flush(osb)) {
   3621		ret = __ocfs2_flush_truncate_log(osb);
   3622		if (ret < 0) {
   3623			inode_unlock(tl_inode);
   3624			mlog_errno(ret);
   3625			goto cleanup;
   3626		}
   3627	}
   3628	inode_unlock(tl_inode);
   3629
   3630	ret = ocfs2_init_xattr_set_ctxt(inode, di, &xi, &xis,
   3631					&xbs, &ctxt, ref_meta, &credits);
   3632	if (ret) {
   3633		mlog_errno(ret);
   3634		goto cleanup;
   3635	}
   3636
   3637	/* we need to update inode's ctime field, so add credit for it. */
   3638	credits += OCFS2_INODE_UPDATE_CREDITS;
   3639	ctxt.handle = ocfs2_start_trans(osb, credits + ref_credits);
   3640	if (IS_ERR(ctxt.handle)) {
   3641		ret = PTR_ERR(ctxt.handle);
   3642		mlog_errno(ret);
   3643		goto out_free_ac;
   3644	}
   3645
   3646	ret = __ocfs2_xattr_set_handle(inode, di, &xi, &xis, &xbs, &ctxt);
   3647	ocfs2_update_inode_fsync_trans(ctxt.handle, inode, 0);
   3648
   3649	ocfs2_commit_trans(osb, ctxt.handle);
   3650
   3651out_free_ac:
   3652	if (ctxt.data_ac)
   3653		ocfs2_free_alloc_context(ctxt.data_ac);
   3654	if (ctxt.meta_ac)
   3655		ocfs2_free_alloc_context(ctxt.meta_ac);
   3656	if (ocfs2_dealloc_has_cluster(&ctxt.dealloc))
   3657		ocfs2_schedule_truncate_log_flush(osb, 1);
   3658	ocfs2_run_deallocs(osb, &ctxt.dealloc);
   3659
   3660cleanup:
   3661	if (ref_tree)
   3662		ocfs2_unlock_refcount_tree(osb, ref_tree, 1);
   3663	up_write(&OCFS2_I(inode)->ip_xattr_sem);
   3664	if (!value && !ret) {
   3665		ret = ocfs2_try_remove_refcount_tree(inode, di_bh);
   3666		if (ret)
   3667			mlog_errno(ret);
   3668	}
   3669	ocfs2_inode_unlock_tracker(inode, 1, &oh, had_lock);
   3670cleanup_nolock:
   3671	brelse(di_bh);
   3672	brelse(xbs.xattr_bh);
   3673	ocfs2_xattr_bucket_free(xbs.bucket);
   3674
   3675	return ret;
   3676}
   3677
   3678/*
   3679 * Find the xattr extent rec which may contains name_hash.
   3680 * e_cpos will be the first name hash of the xattr rec.
   3681 * el must be the ocfs2_xattr_header.xb_attrs.xb_root.xt_list.
   3682 */
   3683static int ocfs2_xattr_get_rec(struct inode *inode,
   3684			       u32 name_hash,
   3685			       u64 *p_blkno,
   3686			       u32 *e_cpos,
   3687			       u32 *num_clusters,
   3688			       struct ocfs2_extent_list *el)
   3689{
   3690	int ret = 0, i;
   3691	struct buffer_head *eb_bh = NULL;
   3692	struct ocfs2_extent_block *eb;
   3693	struct ocfs2_extent_rec *rec = NULL;
   3694	u64 e_blkno = 0;
   3695
   3696	if (el->l_tree_depth) {
   3697		ret = ocfs2_find_leaf(INODE_CACHE(inode), el, name_hash,
   3698				      &eb_bh);
   3699		if (ret) {
   3700			mlog_errno(ret);
   3701			goto out;
   3702		}
   3703
   3704		eb = (struct ocfs2_extent_block *) eb_bh->b_data;
   3705		el = &eb->h_list;
   3706
   3707		if (el->l_tree_depth) {
   3708			ret = ocfs2_error(inode->i_sb,
   3709					  "Inode %lu has non zero tree depth in xattr tree block %llu\n",
   3710					  inode->i_ino,
   3711					  (unsigned long long)eb_bh->b_blocknr);
   3712			goto out;
   3713		}
   3714	}
   3715
   3716	for (i = le16_to_cpu(el->l_next_free_rec) - 1; i >= 0; i--) {
   3717		rec = &el->l_recs[i];
   3718
   3719		if (le32_to_cpu(rec->e_cpos) <= name_hash) {
   3720			e_blkno = le64_to_cpu(rec->e_blkno);
   3721			break;
   3722		}
   3723	}
   3724
   3725	if (!e_blkno) {
   3726		ret = ocfs2_error(inode->i_sb, "Inode %lu has bad extent record (%u, %u, 0) in xattr\n",
   3727				  inode->i_ino,
   3728				  le32_to_cpu(rec->e_cpos),
   3729				  ocfs2_rec_clusters(el, rec));
   3730		goto out;
   3731	}
   3732
   3733	*p_blkno = le64_to_cpu(rec->e_blkno);
   3734	*num_clusters = le16_to_cpu(rec->e_leaf_clusters);
   3735	if (e_cpos)
   3736		*e_cpos = le32_to_cpu(rec->e_cpos);
   3737out:
   3738	brelse(eb_bh);
   3739	return ret;
   3740}
   3741
   3742typedef int (xattr_bucket_func)(struct inode *inode,
   3743				struct ocfs2_xattr_bucket *bucket,
   3744				void *para);
   3745
   3746static int ocfs2_find_xe_in_bucket(struct inode *inode,
   3747				   struct ocfs2_xattr_bucket *bucket,
   3748				   int name_index,
   3749				   const char *name,
   3750				   u32 name_hash,
   3751				   u16 *xe_index,
   3752				   int *found)
   3753{
   3754	int i, ret = 0, cmp = 1, block_off, new_offset;
   3755	struct ocfs2_xattr_header *xh = bucket_xh(bucket);
   3756	size_t name_len = strlen(name);
   3757	struct ocfs2_xattr_entry *xe = NULL;
   3758	char *xe_name;
   3759
   3760	/*
   3761	 * We don't use binary search in the bucket because there
   3762	 * may be multiple entries with the same name hash.
   3763	 */
   3764	for (i = 0; i < le16_to_cpu(xh->xh_count); i++) {
   3765		xe = &xh->xh_entries[i];
   3766
   3767		if (name_hash > le32_to_cpu(xe->xe_name_hash))
   3768			continue;
   3769		else if (name_hash < le32_to_cpu(xe->xe_name_hash))
   3770			break;
   3771
   3772		cmp = name_index - ocfs2_xattr_get_type(xe);
   3773		if (!cmp)
   3774			cmp = name_len - xe->xe_name_len;
   3775		if (cmp)
   3776			continue;
   3777
   3778		ret = ocfs2_xattr_bucket_get_name_value(inode->i_sb,
   3779							xh,
   3780							i,
   3781							&block_off,
   3782							&new_offset);
   3783		if (ret) {
   3784			mlog_errno(ret);
   3785			break;
   3786		}
   3787
   3788
   3789		xe_name = bucket_block(bucket, block_off) + new_offset;
   3790		if (!memcmp(name, xe_name, name_len)) {
   3791			*xe_index = i;
   3792			*found = 1;
   3793			ret = 0;
   3794			break;
   3795		}
   3796	}
   3797
   3798	return ret;
   3799}
   3800
   3801/*
   3802 * Find the specified xattr entry in a series of buckets.
   3803 * This series start from p_blkno and last for num_clusters.
   3804 * The ocfs2_xattr_header.xh_num_buckets of the first bucket contains
   3805 * the num of the valid buckets.
   3806 *
   3807 * Return the buffer_head this xattr should reside in. And if the xattr's
   3808 * hash is in the gap of 2 buckets, return the lower bucket.
   3809 */
   3810static int ocfs2_xattr_bucket_find(struct inode *inode,
   3811				   int name_index,
   3812				   const char *name,
   3813				   u32 name_hash,
   3814				   u64 p_blkno,
   3815				   u32 first_hash,
   3816				   u32 num_clusters,
   3817				   struct ocfs2_xattr_search *xs)
   3818{
   3819	int ret, found = 0;
   3820	struct ocfs2_xattr_header *xh = NULL;
   3821	struct ocfs2_xattr_entry *xe = NULL;
   3822	u16 index = 0;
   3823	u16 blk_per_bucket = ocfs2_blocks_per_xattr_bucket(inode->i_sb);
   3824	int low_bucket = 0, bucket, high_bucket;
   3825	struct ocfs2_xattr_bucket *search;
   3826	u64 blkno, lower_blkno = 0;
   3827
   3828	search = ocfs2_xattr_bucket_new(inode);
   3829	if (!search) {
   3830		ret = -ENOMEM;
   3831		mlog_errno(ret);
   3832		goto out;
   3833	}
   3834
   3835	ret = ocfs2_read_xattr_bucket(search, p_blkno);
   3836	if (ret) {
   3837		mlog_errno(ret);
   3838		goto out;
   3839	}
   3840
   3841	xh = bucket_xh(search);
   3842	high_bucket = le16_to_cpu(xh->xh_num_buckets) - 1;
   3843	while (low_bucket <= high_bucket) {
   3844		ocfs2_xattr_bucket_relse(search);
   3845
   3846		bucket = (low_bucket + high_bucket) / 2;
   3847		blkno = p_blkno + bucket * blk_per_bucket;
   3848		ret = ocfs2_read_xattr_bucket(search, blkno);
   3849		if (ret) {
   3850			mlog_errno(ret);
   3851			goto out;
   3852		}
   3853
   3854		xh = bucket_xh(search);
   3855		xe = &xh->xh_entries[0];
   3856		if (name_hash < le32_to_cpu(xe->xe_name_hash)) {
   3857			high_bucket = bucket - 1;
   3858			continue;
   3859		}
   3860
   3861		/*
   3862		 * Check whether the hash of the last entry in our
   3863		 * bucket is larger than the search one. for an empty
   3864		 * bucket, the last one is also the first one.
   3865		 */
   3866		if (xh->xh_count)
   3867			xe = &xh->xh_entries[le16_to_cpu(xh->xh_count) - 1];
   3868
   3869		/* record lower_blkno which may be the insert place. */
   3870		lower_blkno = blkno;
   3871
   3872		if (name_hash > le32_to_cpu(xe->xe_name_hash)) {
   3873			low_bucket = bucket + 1;
   3874			continue;
   3875		}
   3876
   3877		/* the searched xattr should reside in this bucket if exists. */
   3878		ret = ocfs2_find_xe_in_bucket(inode, search,
   3879					      name_index, name, name_hash,
   3880					      &index, &found);
   3881		if (ret) {
   3882			mlog_errno(ret);
   3883			goto out;
   3884		}
   3885		break;
   3886	}
   3887
   3888	/*
   3889	 * Record the bucket we have found.
   3890	 * When the xattr's hash value is in the gap of 2 buckets, we will
   3891	 * always set it to the previous bucket.
   3892	 */
   3893	if (!lower_blkno)
   3894		lower_blkno = p_blkno;
   3895
   3896	/* This should be in cache - we just read it during the search */
   3897	ret = ocfs2_read_xattr_bucket(xs->bucket, lower_blkno);
   3898	if (ret) {
   3899		mlog_errno(ret);
   3900		goto out;
   3901	}
   3902
   3903	xs->header = bucket_xh(xs->bucket);
   3904	xs->base = bucket_block(xs->bucket, 0);
   3905	xs->end = xs->base + inode->i_sb->s_blocksize;
   3906
   3907	if (found) {
   3908		xs->here = &xs->header->xh_entries[index];
   3909		trace_ocfs2_xattr_bucket_find(OCFS2_I(inode)->ip_blkno,
   3910			name, name_index, name_hash,
   3911			(unsigned long long)bucket_blkno(xs->bucket),
   3912			index);
   3913	} else
   3914		ret = -ENODATA;
   3915
   3916out:
   3917	ocfs2_xattr_bucket_free(search);
   3918	return ret;
   3919}
   3920
   3921static int ocfs2_xattr_index_block_find(struct inode *inode,
   3922					struct buffer_head *root_bh,
   3923					int name_index,
   3924					const char *name,
   3925					struct ocfs2_xattr_search *xs)
   3926{
   3927	int ret;
   3928	struct ocfs2_xattr_block *xb =
   3929			(struct ocfs2_xattr_block *)root_bh->b_data;
   3930	struct ocfs2_xattr_tree_root *xb_root = &xb->xb_attrs.xb_root;
   3931	struct ocfs2_extent_list *el = &xb_root->xt_list;
   3932	u64 p_blkno = 0;
   3933	u32 first_hash, num_clusters = 0;
   3934	u32 name_hash = ocfs2_xattr_name_hash(inode, name, strlen(name));
   3935
   3936	if (le16_to_cpu(el->l_next_free_rec) == 0)
   3937		return -ENODATA;
   3938
   3939	trace_ocfs2_xattr_index_block_find(OCFS2_I(inode)->ip_blkno,
   3940					name, name_index, name_hash,
   3941					(unsigned long long)root_bh->b_blocknr,
   3942					-1);
   3943
   3944	ret = ocfs2_xattr_get_rec(inode, name_hash, &p_blkno, &first_hash,
   3945				  &num_clusters, el);
   3946	if (ret) {
   3947		mlog_errno(ret);
   3948		goto out;
   3949	}
   3950
   3951	BUG_ON(p_blkno == 0 || num_clusters == 0 || first_hash > name_hash);
   3952
   3953	trace_ocfs2_xattr_index_block_find_rec(OCFS2_I(inode)->ip_blkno,
   3954					name, name_index, first_hash,
   3955					(unsigned long long)p_blkno,
   3956					num_clusters);
   3957
   3958	ret = ocfs2_xattr_bucket_find(inode, name_index, name, name_hash,
   3959				      p_blkno, first_hash, num_clusters, xs);
   3960
   3961out:
   3962	return ret;
   3963}
   3964
   3965static int ocfs2_iterate_xattr_buckets(struct inode *inode,
   3966				       u64 blkno,
   3967				       u32 clusters,
   3968				       xattr_bucket_func *func,
   3969				       void *para)
   3970{
   3971	int i, ret = 0;
   3972	u32 bpc = ocfs2_xattr_buckets_per_cluster(OCFS2_SB(inode->i_sb));
   3973	u32 num_buckets = clusters * bpc;
   3974	struct ocfs2_xattr_bucket *bucket;
   3975
   3976	bucket = ocfs2_xattr_bucket_new(inode);
   3977	if (!bucket) {
   3978		mlog_errno(-ENOMEM);
   3979		return -ENOMEM;
   3980	}
   3981
   3982	trace_ocfs2_iterate_xattr_buckets(
   3983		(unsigned long long)OCFS2_I(inode)->ip_blkno,
   3984		(unsigned long long)blkno, clusters);
   3985
   3986	for (i = 0; i < num_buckets; i++, blkno += bucket->bu_blocks) {
   3987		ret = ocfs2_read_xattr_bucket(bucket, blkno);
   3988		if (ret) {
   3989			mlog_errno(ret);
   3990			break;
   3991		}
   3992
   3993		/*
   3994		 * The real bucket num in this series of blocks is stored
   3995		 * in the 1st bucket.
   3996		 */
   3997		if (i == 0)
   3998			num_buckets = le16_to_cpu(bucket_xh(bucket)->xh_num_buckets);
   3999
   4000		trace_ocfs2_iterate_xattr_bucket((unsigned long long)blkno,
   4001		     le32_to_cpu(bucket_xh(bucket)->xh_entries[0].xe_name_hash));
   4002		if (func) {
   4003			ret = func(inode, bucket, para);
   4004			if (ret && ret != -ERANGE)
   4005				mlog_errno(ret);
   4006			/* Fall through to bucket_relse() */
   4007		}
   4008
   4009		ocfs2_xattr_bucket_relse(bucket);
   4010		if (ret)
   4011			break;
   4012	}
   4013
   4014	ocfs2_xattr_bucket_free(bucket);
   4015	return ret;
   4016}
   4017
   4018struct ocfs2_xattr_tree_list {
   4019	char *buffer;
   4020	size_t buffer_size;
   4021	size_t result;
   4022};
   4023
   4024static int ocfs2_xattr_bucket_get_name_value(struct super_block *sb,
   4025					     struct ocfs2_xattr_header *xh,
   4026					     int index,
   4027					     int *block_off,
   4028					     int *new_offset)
   4029{
   4030	u16 name_offset;
   4031
   4032	if (index < 0 || index >= le16_to_cpu(xh->xh_count))
   4033		return -EINVAL;
   4034
   4035	name_offset = le16_to_cpu(xh->xh_entries[index].xe_name_offset);
   4036
   4037	*block_off = name_offset >> sb->s_blocksize_bits;
   4038	*new_offset = name_offset % sb->s_blocksize;
   4039
   4040	return 0;
   4041}
   4042
   4043static int ocfs2_list_xattr_bucket(struct inode *inode,
   4044				   struct ocfs2_xattr_bucket *bucket,
   4045				   void *para)
   4046{
   4047	int ret = 0, type;
   4048	struct ocfs2_xattr_tree_list *xl = (struct ocfs2_xattr_tree_list *)para;
   4049	int i, block_off, new_offset;
   4050	const char *name;
   4051
   4052	for (i = 0 ; i < le16_to_cpu(bucket_xh(bucket)->xh_count); i++) {
   4053		struct ocfs2_xattr_entry *entry = &bucket_xh(bucket)->xh_entries[i];
   4054		type = ocfs2_xattr_get_type(entry);
   4055
   4056		ret = ocfs2_xattr_bucket_get_name_value(inode->i_sb,
   4057							bucket_xh(bucket),
   4058							i,
   4059							&block_off,
   4060							&new_offset);
   4061		if (ret)
   4062			break;
   4063
   4064		name = (const char *)bucket_block(bucket, block_off) +
   4065			new_offset;
   4066		ret = ocfs2_xattr_list_entry(inode->i_sb,
   4067					     xl->buffer,
   4068					     xl->buffer_size,
   4069					     &xl->result,
   4070					     type, name,
   4071					     entry->xe_name_len);
   4072		if (ret)
   4073			break;
   4074	}
   4075
   4076	return ret;
   4077}
   4078
   4079static int ocfs2_iterate_xattr_index_block(struct inode *inode,
   4080					   struct buffer_head *blk_bh,
   4081					   xattr_tree_rec_func *rec_func,
   4082					   void *para)
   4083{
   4084	struct ocfs2_xattr_block *xb =
   4085			(struct ocfs2_xattr_block *)blk_bh->b_data;
   4086	struct ocfs2_extent_list *el = &xb->xb_attrs.xb_root.xt_list;
   4087	int ret = 0;
   4088	u32 name_hash = UINT_MAX, e_cpos = 0, num_clusters = 0;
   4089	u64 p_blkno = 0;
   4090
   4091	if (!el->l_next_free_rec || !rec_func)
   4092		return 0;
   4093
   4094	while (name_hash > 0) {
   4095		ret = ocfs2_xattr_get_rec(inode, name_hash, &p_blkno,
   4096					  &e_cpos, &num_clusters, el);
   4097		if (ret) {
   4098			mlog_errno(ret);
   4099			break;
   4100		}
   4101
   4102		ret = rec_func(inode, blk_bh, p_blkno, e_cpos,
   4103			       num_clusters, para);
   4104		if (ret) {
   4105			if (ret != -ERANGE)
   4106				mlog_errno(ret);
   4107			break;
   4108		}
   4109
   4110		if (e_cpos == 0)
   4111			break;
   4112
   4113		name_hash = e_cpos - 1;
   4114	}
   4115
   4116	return ret;
   4117
   4118}
   4119
   4120static int ocfs2_list_xattr_tree_rec(struct inode *inode,
   4121				     struct buffer_head *root_bh,
   4122				     u64 blkno, u32 cpos, u32 len, void *para)
   4123{
   4124	return ocfs2_iterate_xattr_buckets(inode, blkno, len,
   4125					   ocfs2_list_xattr_bucket, para);
   4126}
   4127
   4128static int ocfs2_xattr_tree_list_index_block(struct inode *inode,
   4129					     struct buffer_head *blk_bh,
   4130					     char *buffer,
   4131					     size_t buffer_size)
   4132{
   4133	int ret;
   4134	struct ocfs2_xattr_tree_list xl = {
   4135		.buffer = buffer,
   4136		.buffer_size = buffer_size,
   4137		.result = 0,
   4138	};
   4139
   4140	ret = ocfs2_iterate_xattr_index_block(inode, blk_bh,
   4141					      ocfs2_list_xattr_tree_rec, &xl);
   4142	if (ret) {
   4143		mlog_errno(ret);
   4144		goto out;
   4145	}
   4146
   4147	ret = xl.result;
   4148out:
   4149	return ret;
   4150}
   4151
   4152static int cmp_xe(const void *a, const void *b)
   4153{
   4154	const struct ocfs2_xattr_entry *l = a, *r = b;
   4155	u32 l_hash = le32_to_cpu(l->xe_name_hash);
   4156	u32 r_hash = le32_to_cpu(r->xe_name_hash);
   4157
   4158	if (l_hash > r_hash)
   4159		return 1;
   4160	if (l_hash < r_hash)
   4161		return -1;
   4162	return 0;
   4163}
   4164
   4165static void swap_xe(void *a, void *b, int size)
   4166{
   4167	struct ocfs2_xattr_entry *l = a, *r = b, tmp;
   4168
   4169	tmp = *l;
   4170	memcpy(l, r, sizeof(struct ocfs2_xattr_entry));
   4171	memcpy(r, &tmp, sizeof(struct ocfs2_xattr_entry));
   4172}
   4173
   4174/*
   4175 * When the ocfs2_xattr_block is filled up, new bucket will be created
   4176 * and all the xattr entries will be moved to the new bucket.
   4177 * The header goes at the start of the bucket, and the names+values are
   4178 * filled from the end.  This is why *target starts as the last buffer.
   4179 * Note: we need to sort the entries since they are not saved in order
   4180 * in the ocfs2_xattr_block.
   4181 */
   4182static void ocfs2_cp_xattr_block_to_bucket(struct inode *inode,
   4183					   struct buffer_head *xb_bh,
   4184					   struct ocfs2_xattr_bucket *bucket)
   4185{
   4186	int i, blocksize = inode->i_sb->s_blocksize;
   4187	int blks = ocfs2_blocks_per_xattr_bucket(inode->i_sb);
   4188	u16 offset, size, off_change;
   4189	struct ocfs2_xattr_entry *xe;
   4190	struct ocfs2_xattr_block *xb =
   4191				(struct ocfs2_xattr_block *)xb_bh->b_data;
   4192	struct ocfs2_xattr_header *xb_xh = &xb->xb_attrs.xb_header;
   4193	struct ocfs2_xattr_header *xh = bucket_xh(bucket);
   4194	u16 count = le16_to_cpu(xb_xh->xh_count);
   4195	char *src = xb_bh->b_data;
   4196	char *target = bucket_block(bucket, blks - 1);
   4197
   4198	trace_ocfs2_cp_xattr_block_to_bucket_begin(
   4199				(unsigned long long)xb_bh->b_blocknr,
   4200				(unsigned long long)bucket_blkno(bucket));
   4201
   4202	for (i = 0; i < blks; i++)
   4203		memset(bucket_block(bucket, i), 0, blocksize);
   4204
   4205	/*
   4206	 * Since the xe_name_offset is based on ocfs2_xattr_header,
   4207	 * there is a offset change corresponding to the change of
   4208	 * ocfs2_xattr_header's position.
   4209	 */
   4210	off_change = offsetof(struct ocfs2_xattr_block, xb_attrs.xb_header);
   4211	xe = &xb_xh->xh_entries[count - 1];
   4212	offset = le16_to_cpu(xe->xe_name_offset) + off_change;
   4213	size = blocksize - offset;
   4214
   4215	/* copy all the names and values. */
   4216	memcpy(target + offset, src + offset, size);
   4217
   4218	/* Init new header now. */
   4219	xh->xh_count = xb_xh->xh_count;
   4220	xh->xh_num_buckets = cpu_to_le16(1);
   4221	xh->xh_name_value_len = cpu_to_le16(size);
   4222	xh->xh_free_start = cpu_to_le16(OCFS2_XATTR_BUCKET_SIZE - size);
   4223
   4224	/* copy all the entries. */
   4225	target = bucket_block(bucket, 0);
   4226	offset = offsetof(struct ocfs2_xattr_header, xh_entries);
   4227	size = count * sizeof(struct ocfs2_xattr_entry);
   4228	memcpy(target + offset, (char *)xb_xh + offset, size);
   4229
   4230	/* Change the xe offset for all the xe because of the move. */
   4231	off_change = OCFS2_XATTR_BUCKET_SIZE - blocksize +
   4232		 offsetof(struct ocfs2_xattr_block, xb_attrs.xb_header);
   4233	for (i = 0; i < count; i++)
   4234		le16_add_cpu(&xh->xh_entries[i].xe_name_offset, off_change);
   4235
   4236	trace_ocfs2_cp_xattr_block_to_bucket_end(offset, size, off_change);
   4237
   4238	sort(target + offset, count, sizeof(struct ocfs2_xattr_entry),
   4239	     cmp_xe, swap_xe);
   4240}
   4241
   4242/*
   4243 * After we move xattr from block to index btree, we have to
   4244 * update ocfs2_xattr_search to the new xe and base.
   4245 *
   4246 * When the entry is in xattr block, xattr_bh indicates the storage place.
   4247 * While if the entry is in index b-tree, "bucket" indicates the
   4248 * real place of the xattr.
   4249 */
   4250static void ocfs2_xattr_update_xattr_search(struct inode *inode,
   4251					    struct ocfs2_xattr_search *xs,
   4252					    struct buffer_head *old_bh)
   4253{
   4254	char *buf = old_bh->b_data;
   4255	struct ocfs2_xattr_block *old_xb = (struct ocfs2_xattr_block *)buf;
   4256	struct ocfs2_xattr_header *old_xh = &old_xb->xb_attrs.xb_header;
   4257	int i;
   4258
   4259	xs->header = bucket_xh(xs->bucket);
   4260	xs->base = bucket_block(xs->bucket, 0);
   4261	xs->end = xs->base + inode->i_sb->s_blocksize;
   4262
   4263	if (xs->not_found)
   4264		return;
   4265
   4266	i = xs->here - old_xh->xh_entries;
   4267	xs->here = &xs->header->xh_entries[i];
   4268}
   4269
   4270static int ocfs2_xattr_create_index_block(struct inode *inode,
   4271					  struct ocfs2_xattr_search *xs,
   4272					  struct ocfs2_xattr_set_ctxt *ctxt)
   4273{
   4274	int ret;
   4275	u32 bit_off, len;
   4276	u64 blkno;
   4277	handle_t *handle = ctxt->handle;
   4278	struct ocfs2_inode_info *oi = OCFS2_I(inode);
   4279	struct buffer_head *xb_bh = xs->xattr_bh;
   4280	struct ocfs2_xattr_block *xb =
   4281			(struct ocfs2_xattr_block *)xb_bh->b_data;
   4282	struct ocfs2_xattr_tree_root *xr;
   4283	u16 xb_flags = le16_to_cpu(xb->xb_flags);
   4284
   4285	trace_ocfs2_xattr_create_index_block_begin(
   4286				(unsigned long long)xb_bh->b_blocknr);
   4287
   4288	BUG_ON(xb_flags & OCFS2_XATTR_INDEXED);
   4289	BUG_ON(!xs->bucket);
   4290
   4291	/*
   4292	 * XXX:
   4293	 * We can use this lock for now, and maybe move to a dedicated mutex
   4294	 * if performance becomes a problem later.
   4295	 */
   4296	down_write(&oi->ip_alloc_sem);
   4297
   4298	ret = ocfs2_journal_access_xb(handle, INODE_CACHE(inode), xb_bh,
   4299				      OCFS2_JOURNAL_ACCESS_WRITE);
   4300	if (ret) {
   4301		mlog_errno(ret);
   4302		goto out;
   4303	}
   4304
   4305	ret = __ocfs2_claim_clusters(handle, ctxt->data_ac,
   4306				     1, 1, &bit_off, &len);
   4307	if (ret) {
   4308		mlog_errno(ret);
   4309		goto out;
   4310	}
   4311
   4312	/*
   4313	 * The bucket may spread in many blocks, and
   4314	 * we will only touch the 1st block and the last block
   4315	 * in the whole bucket(one for entry and one for data).
   4316	 */
   4317	blkno = ocfs2_clusters_to_blocks(inode->i_sb, bit_off);
   4318
   4319	trace_ocfs2_xattr_create_index_block((unsigned long long)blkno);
   4320
   4321	ret = ocfs2_init_xattr_bucket(xs->bucket, blkno, 1);
   4322	if (ret) {
   4323		mlog_errno(ret);
   4324		goto out;
   4325	}
   4326
   4327	ret = ocfs2_xattr_bucket_journal_access(handle, xs->bucket,
   4328						OCFS2_JOURNAL_ACCESS_CREATE);
   4329	if (ret) {
   4330		mlog_errno(ret);
   4331		goto out;
   4332	}
   4333
   4334	ocfs2_cp_xattr_block_to_bucket(inode, xb_bh, xs->bucket);
   4335	ocfs2_xattr_bucket_journal_dirty(handle, xs->bucket);
   4336
   4337	ocfs2_xattr_update_xattr_search(inode, xs, xb_bh);
   4338
   4339	/* Change from ocfs2_xattr_header to ocfs2_xattr_tree_root */
   4340	memset(&xb->xb_attrs, 0, inode->i_sb->s_blocksize -
   4341	       offsetof(struct ocfs2_xattr_block, xb_attrs));
   4342
   4343	xr = &xb->xb_attrs.xb_root;
   4344	xr->xt_clusters = cpu_to_le32(1);
   4345	xr->xt_last_eb_blk = 0;
   4346	xr->xt_list.l_tree_depth = 0;
   4347	xr->xt_list.l_count = cpu_to_le16(ocfs2_xattr_recs_per_xb(inode->i_sb));
   4348	xr->xt_list.l_next_free_rec = cpu_to_le16(1);
   4349
   4350	xr->xt_list.l_recs[0].e_cpos = 0;
   4351	xr->xt_list.l_recs[0].e_blkno = cpu_to_le64(blkno);
   4352	xr->xt_list.l_recs[0].e_leaf_clusters = cpu_to_le16(1);
   4353
   4354	xb->xb_flags = cpu_to_le16(xb_flags | OCFS2_XATTR_INDEXED);
   4355
   4356	ocfs2_journal_dirty(handle, xb_bh);
   4357
   4358out:
   4359	up_write(&oi->ip_alloc_sem);
   4360
   4361	return ret;
   4362}
   4363
   4364static int cmp_xe_offset(const void *a, const void *b)
   4365{
   4366	const struct ocfs2_xattr_entry *l = a, *r = b;
   4367	u32 l_name_offset = le16_to_cpu(l->xe_name_offset);
   4368	u32 r_name_offset = le16_to_cpu(r->xe_name_offset);
   4369
   4370	if (l_name_offset < r_name_offset)
   4371		return 1;
   4372	if (l_name_offset > r_name_offset)
   4373		return -1;
   4374	return 0;
   4375}
   4376
   4377/*
   4378 * defrag a xattr bucket if we find that the bucket has some
   4379 * holes beteen name/value pairs.
   4380 * We will move all the name/value pairs to the end of the bucket
   4381 * so that we can spare some space for insertion.
   4382 */
   4383static int ocfs2_defrag_xattr_bucket(struct inode *inode,
   4384				     handle_t *handle,
   4385				     struct ocfs2_xattr_bucket *bucket)
   4386{
   4387	int ret, i;
   4388	size_t end, offset, len;
   4389	struct ocfs2_xattr_header *xh;
   4390	char *entries, *buf, *bucket_buf = NULL;
   4391	u64 blkno = bucket_blkno(bucket);
   4392	u16 xh_free_start;
   4393	size_t blocksize = inode->i_sb->s_blocksize;
   4394	struct ocfs2_xattr_entry *xe;
   4395
   4396	/*
   4397	 * In order to make the operation more efficient and generic,
   4398	 * we copy all the blocks into a contiguous memory and do the
   4399	 * defragment there, so if anything is error, we will not touch
   4400	 * the real block.
   4401	 */
   4402	bucket_buf = kmalloc(OCFS2_XATTR_BUCKET_SIZE, GFP_NOFS);
   4403	if (!bucket_buf) {
   4404		ret = -EIO;
   4405		goto out;
   4406	}
   4407
   4408	buf = bucket_buf;
   4409	for (i = 0; i < bucket->bu_blocks; i++, buf += blocksize)
   4410		memcpy(buf, bucket_block(bucket, i), blocksize);
   4411
   4412	ret = ocfs2_xattr_bucket_journal_access(handle, bucket,
   4413						OCFS2_JOURNAL_ACCESS_WRITE);
   4414	if (ret < 0) {
   4415		mlog_errno(ret);
   4416		goto out;
   4417	}
   4418
   4419	xh = (struct ocfs2_xattr_header *)bucket_buf;
   4420	entries = (char *)xh->xh_entries;
   4421	xh_free_start = le16_to_cpu(xh->xh_free_start);
   4422
   4423	trace_ocfs2_defrag_xattr_bucket(
   4424	     (unsigned long long)blkno, le16_to_cpu(xh->xh_count),
   4425	     xh_free_start, le16_to_cpu(xh->xh_name_value_len));
   4426
   4427	/*
   4428	 * sort all the entries by their offset.
   4429	 * the largest will be the first, so that we can
   4430	 * move them to the end one by one.
   4431	 */
   4432	sort(entries, le16_to_cpu(xh->xh_count),
   4433	     sizeof(struct ocfs2_xattr_entry),
   4434	     cmp_xe_offset, swap_xe);
   4435
   4436	/* Move all name/values to the end of the bucket. */
   4437	xe = xh->xh_entries;
   4438	end = OCFS2_XATTR_BUCKET_SIZE;
   4439	for (i = 0; i < le16_to_cpu(xh->xh_count); i++, xe++) {
   4440		offset = le16_to_cpu(xe->xe_name_offset);
   4441		len = namevalue_size_xe(xe);
   4442
   4443		/*
   4444		 * We must make sure that the name/value pair
   4445		 * exist in the same block. So adjust end to
   4446		 * the previous block end if needed.
   4447		 */
   4448		if (((end - len) / blocksize !=
   4449			(end - 1) / blocksize))
   4450			end = end - end % blocksize;
   4451
   4452		if (end > offset + len) {
   4453			memmove(bucket_buf + end - len,
   4454				bucket_buf + offset, len);
   4455			xe->xe_name_offset = cpu_to_le16(end - len);
   4456		}
   4457
   4458		mlog_bug_on_msg(end < offset + len, "Defrag check failed for "
   4459				"bucket %llu\n", (unsigned long long)blkno);
   4460
   4461		end -= len;
   4462	}
   4463
   4464	mlog_bug_on_msg(xh_free_start > end, "Defrag check failed for "
   4465			"bucket %llu\n", (unsigned long long)blkno);
   4466
   4467	if (xh_free_start == end)
   4468		goto out;
   4469
   4470	memset(bucket_buf + xh_free_start, 0, end - xh_free_start);
   4471	xh->xh_free_start = cpu_to_le16(end);
   4472
   4473	/* sort the entries by their name_hash. */
   4474	sort(entries, le16_to_cpu(xh->xh_count),
   4475	     sizeof(struct ocfs2_xattr_entry),
   4476	     cmp_xe, swap_xe);
   4477
   4478	buf = bucket_buf;
   4479	for (i = 0; i < bucket->bu_blocks; i++, buf += blocksize)
   4480		memcpy(bucket_block(bucket, i), buf, blocksize);
   4481	ocfs2_xattr_bucket_journal_dirty(handle, bucket);
   4482
   4483out:
   4484	kfree(bucket_buf);
   4485	return ret;
   4486}
   4487
   4488/*
   4489 * prev_blkno points to the start of an existing extent.  new_blkno
   4490 * points to a newly allocated extent.  Because we know each of our
   4491 * clusters contains more than bucket, we can easily split one cluster
   4492 * at a bucket boundary.  So we take the last cluster of the existing
   4493 * extent and split it down the middle.  We move the last half of the
   4494 * buckets in the last cluster of the existing extent over to the new
   4495 * extent.
   4496 *
   4497 * first_bh is the buffer at prev_blkno so we can update the existing
   4498 * extent's bucket count.  header_bh is the bucket were we were hoping
   4499 * to insert our xattr.  If the bucket move places the target in the new
   4500 * extent, we'll update first_bh and header_bh after modifying the old
   4501 * extent.
   4502 *
   4503 * first_hash will be set as the 1st xe's name_hash in the new extent.
   4504 */
   4505static int ocfs2_mv_xattr_bucket_cross_cluster(struct inode *inode,
   4506					       handle_t *handle,
   4507					       struct ocfs2_xattr_bucket *first,
   4508					       struct ocfs2_xattr_bucket *target,
   4509					       u64 new_blkno,
   4510					       u32 num_clusters,
   4511					       u32 *first_hash)
   4512{
   4513	int ret;
   4514	struct super_block *sb = inode->i_sb;
   4515	int blks_per_bucket = ocfs2_blocks_per_xattr_bucket(sb);
   4516	int num_buckets = ocfs2_xattr_buckets_per_cluster(OCFS2_SB(sb));
   4517	int to_move = num_buckets / 2;
   4518	u64 src_blkno;
   4519	u64 last_cluster_blkno = bucket_blkno(first) +
   4520		((num_clusters - 1) * ocfs2_clusters_to_blocks(sb, 1));
   4521
   4522	BUG_ON(le16_to_cpu(bucket_xh(first)->xh_num_buckets) < num_buckets);
   4523	BUG_ON(OCFS2_XATTR_BUCKET_SIZE == OCFS2_SB(sb)->s_clustersize);
   4524
   4525	trace_ocfs2_mv_xattr_bucket_cross_cluster(
   4526				(unsigned long long)last_cluster_blkno,
   4527				(unsigned long long)new_blkno);
   4528
   4529	ret = ocfs2_mv_xattr_buckets(inode, handle, bucket_blkno(first),
   4530				     last_cluster_blkno, new_blkno,
   4531				     to_move, first_hash);
   4532	if (ret) {
   4533		mlog_errno(ret);
   4534		goto out;
   4535	}
   4536
   4537	/* This is the first bucket that got moved */
   4538	src_blkno = last_cluster_blkno + (to_move * blks_per_bucket);
   4539
   4540	/*
   4541	 * If the target bucket was part of the moved buckets, we need to
   4542	 * update first and target.
   4543	 */
   4544	if (bucket_blkno(target) >= src_blkno) {
   4545		/* Find the block for the new target bucket */
   4546		src_blkno = new_blkno +
   4547			(bucket_blkno(target) - src_blkno);
   4548
   4549		ocfs2_xattr_bucket_relse(first);
   4550		ocfs2_xattr_bucket_relse(target);
   4551
   4552		/*
   4553		 * These shouldn't fail - the buffers are in the
   4554		 * journal from ocfs2_cp_xattr_bucket().
   4555		 */
   4556		ret = ocfs2_read_xattr_bucket(first, new_blkno);
   4557		if (ret) {
   4558			mlog_errno(ret);
   4559			goto out;
   4560		}
   4561		ret = ocfs2_read_xattr_bucket(target, src_blkno);
   4562		if (ret)
   4563			mlog_errno(ret);
   4564
   4565	}
   4566
   4567out:
   4568	return ret;
   4569}
   4570
   4571/*
   4572 * Find the suitable pos when we divide a bucket into 2.
   4573 * We have to make sure the xattrs with the same hash value exist
   4574 * in the same bucket.
   4575 *
   4576 * If this ocfs2_xattr_header covers more than one hash value, find a
   4577 * place where the hash value changes.  Try to find the most even split.
   4578 * The most common case is that all entries have different hash values,
   4579 * and the first check we make will find a place to split.
   4580 */
   4581static int ocfs2_xattr_find_divide_pos(struct ocfs2_xattr_header *xh)
   4582{
   4583	struct ocfs2_xattr_entry *entries = xh->xh_entries;
   4584	int count = le16_to_cpu(xh->xh_count);
   4585	int delta, middle = count / 2;
   4586
   4587	/*
   4588	 * We start at the middle.  Each step gets farther away in both
   4589	 * directions.  We therefore hit the change in hash value
   4590	 * nearest to the middle.  Note that this loop does not execute for
   4591	 * count < 2.
   4592	 */
   4593	for (delta = 0; delta < middle; delta++) {
   4594		/* Let's check delta earlier than middle */
   4595		if (cmp_xe(&entries[middle - delta - 1],
   4596			   &entries[middle - delta]))
   4597			return middle - delta;
   4598
   4599		/* For even counts, don't walk off the end */
   4600		if ((middle + delta + 1) == count)
   4601			continue;
   4602
   4603		/* Now try delta past middle */
   4604		if (cmp_xe(&entries[middle + delta],
   4605			   &entries[middle + delta + 1]))
   4606			return middle + delta + 1;
   4607	}
   4608
   4609	/* Every entry had the same hash */
   4610	return count;
   4611}
   4612
   4613/*
   4614 * Move some xattrs in old bucket(blk) to new bucket(new_blk).
   4615 * first_hash will record the 1st hash of the new bucket.
   4616 *
   4617 * Normally half of the xattrs will be moved.  But we have to make
   4618 * sure that the xattrs with the same hash value are stored in the
   4619 * same bucket. If all the xattrs in this bucket have the same hash
   4620 * value, the new bucket will be initialized as an empty one and the
   4621 * first_hash will be initialized as (hash_value+1).
   4622 */
   4623static int ocfs2_divide_xattr_bucket(struct inode *inode,
   4624				    handle_t *handle,
   4625				    u64 blk,
   4626				    u64 new_blk,
   4627				    u32 *first_hash,
   4628				    int new_bucket_head)
   4629{
   4630	int ret, i;
   4631	int count, start, len, name_value_len = 0, name_offset = 0;
   4632	struct ocfs2_xattr_bucket *s_bucket = NULL, *t_bucket = NULL;
   4633	struct ocfs2_xattr_header *xh;
   4634	struct ocfs2_xattr_entry *xe;
   4635	int blocksize = inode->i_sb->s_blocksize;
   4636
   4637	trace_ocfs2_divide_xattr_bucket_begin((unsigned long long)blk,
   4638					      (unsigned long long)new_blk);
   4639
   4640	s_bucket = ocfs2_xattr_bucket_new(inode);
   4641	t_bucket = ocfs2_xattr_bucket_new(inode);
   4642	if (!s_bucket || !t_bucket) {
   4643		ret = -ENOMEM;
   4644		mlog_errno(ret);
   4645		goto out;
   4646	}
   4647
   4648	ret = ocfs2_read_xattr_bucket(s_bucket, blk);
   4649	if (ret) {
   4650		mlog_errno(ret);
   4651		goto out;
   4652	}
   4653
   4654	ret = ocfs2_xattr_bucket_journal_access(handle, s_bucket,
   4655						OCFS2_JOURNAL_ACCESS_WRITE);
   4656	if (ret) {
   4657		mlog_errno(ret);
   4658		goto out;
   4659	}
   4660
   4661	/*
   4662	 * Even if !new_bucket_head, we're overwriting t_bucket.  Thus,
   4663	 * there's no need to read it.
   4664	 */
   4665	ret = ocfs2_init_xattr_bucket(t_bucket, new_blk, new_bucket_head);
   4666	if (ret) {
   4667		mlog_errno(ret);
   4668		goto out;
   4669	}
   4670
   4671	/*
   4672	 * Hey, if we're overwriting t_bucket, what difference does
   4673	 * ACCESS_CREATE vs ACCESS_WRITE make?  See the comment in the
   4674	 * same part of ocfs2_cp_xattr_bucket().
   4675	 */
   4676	ret = ocfs2_xattr_bucket_journal_access(handle, t_bucket,
   4677						new_bucket_head ?
   4678						OCFS2_JOURNAL_ACCESS_CREATE :
   4679						OCFS2_JOURNAL_ACCESS_WRITE);
   4680	if (ret) {
   4681		mlog_errno(ret);
   4682		goto out;
   4683	}
   4684
   4685	xh = bucket_xh(s_bucket);
   4686	count = le16_to_cpu(xh->xh_count);
   4687	start = ocfs2_xattr_find_divide_pos(xh);
   4688
   4689	if (start == count) {
   4690		xe = &xh->xh_entries[start-1];
   4691
   4692		/*
   4693		 * initialized a new empty bucket here.
   4694		 * The hash value is set as one larger than
   4695		 * that of the last entry in the previous bucket.
   4696		 */
   4697		for (i = 0; i < t_bucket->bu_blocks; i++)
   4698			memset(bucket_block(t_bucket, i), 0, blocksize);
   4699
   4700		xh = bucket_xh(t_bucket);
   4701		xh->xh_free_start = cpu_to_le16(blocksize);
   4702		xh->xh_entries[0].xe_name_hash = xe->xe_name_hash;
   4703		le32_add_cpu(&xh->xh_entries[0].xe_name_hash, 1);
   4704
   4705		goto set_num_buckets;
   4706	}
   4707
   4708	/* copy the whole bucket to the new first. */
   4709	ocfs2_xattr_bucket_copy_data(t_bucket, s_bucket);
   4710
   4711	/* update the new bucket. */
   4712	xh = bucket_xh(t_bucket);
   4713
   4714	/*
   4715	 * Calculate the total name/value len and xh_free_start for
   4716	 * the old bucket first.
   4717	 */
   4718	name_offset = OCFS2_XATTR_BUCKET_SIZE;
   4719	name_value_len = 0;
   4720	for (i = 0; i < start; i++) {
   4721		xe = &xh->xh_entries[i];
   4722		name_value_len += namevalue_size_xe(xe);
   4723		if (le16_to_cpu(xe->xe_name_offset) < name_offset)
   4724			name_offset = le16_to_cpu(xe->xe_name_offset);
   4725	}
   4726
   4727	/*
   4728	 * Now begin the modification to the new bucket.
   4729	 *
   4730	 * In the new bucket, We just move the xattr entry to the beginning
   4731	 * and don't touch the name/value. So there will be some holes in the
   4732	 * bucket, and they will be removed when ocfs2_defrag_xattr_bucket is
   4733	 * called.
   4734	 */
   4735	xe = &xh->xh_entries[start];
   4736	len = sizeof(struct ocfs2_xattr_entry) * (count - start);
   4737	trace_ocfs2_divide_xattr_bucket_move(len,
   4738			(int)((char *)xe - (char *)xh),
   4739			(int)((char *)xh->xh_entries - (char *)xh));
   4740	memmove((char *)xh->xh_entries, (char *)xe, len);
   4741	xe = &xh->xh_entries[count - start];
   4742	len = sizeof(struct ocfs2_xattr_entry) * start;
   4743	memset((char *)xe, 0, len);
   4744
   4745	le16_add_cpu(&xh->xh_count, -start);
   4746	le16_add_cpu(&xh->xh_name_value_len, -name_value_len);
   4747
   4748	/* Calculate xh_free_start for the new bucket. */
   4749	xh->xh_free_start = cpu_to_le16(OCFS2_XATTR_BUCKET_SIZE);
   4750	for (i = 0; i < le16_to_cpu(xh->xh_count); i++) {
   4751		xe = &xh->xh_entries[i];
   4752		if (le16_to_cpu(xe->xe_name_offset) <
   4753		    le16_to_cpu(xh->xh_free_start))
   4754			xh->xh_free_start = xe->xe_name_offset;
   4755	}
   4756
   4757set_num_buckets:
   4758	/* set xh->xh_num_buckets for the new xh. */
   4759	if (new_bucket_head)
   4760		xh->xh_num_buckets = cpu_to_le16(1);
   4761	else
   4762		xh->xh_num_buckets = 0;
   4763
   4764	ocfs2_xattr_bucket_journal_dirty(handle, t_bucket);
   4765
   4766	/* store the first_hash of the new bucket. */
   4767	if (first_hash)
   4768		*first_hash = le32_to_cpu(xh->xh_entries[0].xe_name_hash);
   4769
   4770	/*
   4771	 * Now only update the 1st block of the old bucket.  If we
   4772	 * just added a new empty bucket, there is no need to modify
   4773	 * it.
   4774	 */
   4775	if (start == count)
   4776		goto out;
   4777
   4778	xh = bucket_xh(s_bucket);
   4779	memset(&xh->xh_entries[start], 0,
   4780	       sizeof(struct ocfs2_xattr_entry) * (count - start));
   4781	xh->xh_count = cpu_to_le16(start);
   4782	xh->xh_free_start = cpu_to_le16(name_offset);
   4783	xh->xh_name_value_len = cpu_to_le16(name_value_len);
   4784
   4785	ocfs2_xattr_bucket_journal_dirty(handle, s_bucket);
   4786
   4787out:
   4788	ocfs2_xattr_bucket_free(s_bucket);
   4789	ocfs2_xattr_bucket_free(t_bucket);
   4790
   4791	return ret;
   4792}
   4793
   4794/*
   4795 * Copy xattr from one bucket to another bucket.
   4796 *
   4797 * The caller must make sure that the journal transaction
   4798 * has enough space for journaling.
   4799 */
   4800static int ocfs2_cp_xattr_bucket(struct inode *inode,
   4801				 handle_t *handle,
   4802				 u64 s_blkno,
   4803				 u64 t_blkno,
   4804				 int t_is_new)
   4805{
   4806	int ret;
   4807	struct ocfs2_xattr_bucket *s_bucket = NULL, *t_bucket = NULL;
   4808
   4809	BUG_ON(s_blkno == t_blkno);
   4810
   4811	trace_ocfs2_cp_xattr_bucket((unsigned long long)s_blkno,
   4812				    (unsigned long long)t_blkno,
   4813				    t_is_new);
   4814
   4815	s_bucket = ocfs2_xattr_bucket_new(inode);
   4816	t_bucket = ocfs2_xattr_bucket_new(inode);
   4817	if (!s_bucket || !t_bucket) {
   4818		ret = -ENOMEM;
   4819		mlog_errno(ret);
   4820		goto out;
   4821	}
   4822
   4823	ret = ocfs2_read_xattr_bucket(s_bucket, s_blkno);
   4824	if (ret)
   4825		goto out;
   4826
   4827	/*
   4828	 * Even if !t_is_new, we're overwriting t_bucket.  Thus,
   4829	 * there's no need to read it.
   4830	 */
   4831	ret = ocfs2_init_xattr_bucket(t_bucket, t_blkno, t_is_new);
   4832	if (ret)
   4833		goto out;
   4834
   4835	/*
   4836	 * Hey, if we're overwriting t_bucket, what difference does
   4837	 * ACCESS_CREATE vs ACCESS_WRITE make?  Well, if we allocated a new
   4838	 * cluster to fill, we came here from
   4839	 * ocfs2_mv_xattr_buckets(), and it is really new -
   4840	 * ACCESS_CREATE is required.  But we also might have moved data
   4841	 * out of t_bucket before extending back into it.
   4842	 * ocfs2_add_new_xattr_bucket() can do this - its call to
   4843	 * ocfs2_add_new_xattr_cluster() may have created a new extent
   4844	 * and copied out the end of the old extent.  Then it re-extends
   4845	 * the old extent back to create space for new xattrs.  That's
   4846	 * how we get here, and the bucket isn't really new.
   4847	 */
   4848	ret = ocfs2_xattr_bucket_journal_access(handle, t_bucket,
   4849						t_is_new ?
   4850						OCFS2_JOURNAL_ACCESS_CREATE :
   4851						OCFS2_JOURNAL_ACCESS_WRITE);
   4852	if (ret)
   4853		goto out;
   4854
   4855	ocfs2_xattr_bucket_copy_data(t_bucket, s_bucket);
   4856	ocfs2_xattr_bucket_journal_dirty(handle, t_bucket);
   4857
   4858out:
   4859	ocfs2_xattr_bucket_free(t_bucket);
   4860	ocfs2_xattr_bucket_free(s_bucket);
   4861
   4862	return ret;
   4863}
   4864
   4865/*
   4866 * src_blk points to the start of an existing extent.  last_blk points to
   4867 * last cluster in that extent.  to_blk points to a newly allocated
   4868 * extent.  We copy the buckets from the cluster at last_blk to the new
   4869 * extent.  If start_bucket is non-zero, we skip that many buckets before
   4870 * we start copying.  The new extent's xh_num_buckets gets set to the
   4871 * number of buckets we copied.  The old extent's xh_num_buckets shrinks
   4872 * by the same amount.
   4873 */
   4874static int ocfs2_mv_xattr_buckets(struct inode *inode, handle_t *handle,
   4875				  u64 src_blk, u64 last_blk, u64 to_blk,
   4876				  unsigned int start_bucket,
   4877				  u32 *first_hash)
   4878{
   4879	int i, ret, credits;
   4880	struct ocfs2_super *osb = OCFS2_SB(inode->i_sb);
   4881	int blks_per_bucket = ocfs2_blocks_per_xattr_bucket(inode->i_sb);
   4882	int num_buckets = ocfs2_xattr_buckets_per_cluster(osb);
   4883	struct ocfs2_xattr_bucket *old_first, *new_first;
   4884
   4885	trace_ocfs2_mv_xattr_buckets((unsigned long long)last_blk,
   4886				     (unsigned long long)to_blk);
   4887
   4888	BUG_ON(start_bucket >= num_buckets);
   4889	if (start_bucket) {
   4890		num_buckets -= start_bucket;
   4891		last_blk += (start_bucket * blks_per_bucket);
   4892	}
   4893
   4894	/* The first bucket of the original extent */
   4895	old_first = ocfs2_xattr_bucket_new(inode);
   4896	/* The first bucket of the new extent */
   4897	new_first = ocfs2_xattr_bucket_new(inode);
   4898	if (!old_first || !new_first) {
   4899		ret = -ENOMEM;
   4900		mlog_errno(ret);
   4901		goto out;
   4902	}
   4903
   4904	ret = ocfs2_read_xattr_bucket(old_first, src_blk);
   4905	if (ret) {
   4906		mlog_errno(ret);
   4907		goto out;
   4908	}
   4909
   4910	/*
   4911	 * We need to update the first bucket of the old extent and all
   4912	 * the buckets going to the new extent.
   4913	 */
   4914	credits = ((num_buckets + 1) * blks_per_bucket);
   4915	ret = ocfs2_extend_trans(handle, credits);
   4916	if (ret) {
   4917		mlog_errno(ret);
   4918		goto out;
   4919	}
   4920
   4921	ret = ocfs2_xattr_bucket_journal_access(handle, old_first,
   4922						OCFS2_JOURNAL_ACCESS_WRITE);
   4923	if (ret) {
   4924		mlog_errno(ret);
   4925		goto out;
   4926	}
   4927
   4928	for (i = 0; i < num_buckets; i++) {
   4929		ret = ocfs2_cp_xattr_bucket(inode, handle,
   4930					    last_blk + (i * blks_per_bucket),
   4931					    to_blk + (i * blks_per_bucket),
   4932					    1);
   4933		if (ret) {
   4934			mlog_errno(ret);
   4935			goto out;
   4936		}
   4937	}
   4938
   4939	/*
   4940	 * Get the new bucket ready before we dirty anything
   4941	 * (This actually shouldn't fail, because we already dirtied
   4942	 * it once in ocfs2_cp_xattr_bucket()).
   4943	 */
   4944	ret = ocfs2_read_xattr_bucket(new_first, to_blk);
   4945	if (ret) {
   4946		mlog_errno(ret);
   4947		goto out;
   4948	}
   4949	ret = ocfs2_xattr_bucket_journal_access(handle, new_first,
   4950						OCFS2_JOURNAL_ACCESS_WRITE);
   4951	if (ret) {
   4952		mlog_errno(ret);
   4953		goto out;
   4954	}
   4955
   4956	/* Now update the headers */
   4957	le16_add_cpu(&bucket_xh(old_first)->xh_num_buckets, -num_buckets);
   4958	ocfs2_xattr_bucket_journal_dirty(handle, old_first);
   4959
   4960	bucket_xh(new_first)->xh_num_buckets = cpu_to_le16(num_buckets);
   4961	ocfs2_xattr_bucket_journal_dirty(handle, new_first);
   4962
   4963	if (first_hash)
   4964		*first_hash = le32_to_cpu(bucket_xh(new_first)->xh_entries[0].xe_name_hash);
   4965
   4966out:
   4967	ocfs2_xattr_bucket_free(new_first);
   4968	ocfs2_xattr_bucket_free(old_first);
   4969	return ret;
   4970}
   4971
   4972/*
   4973 * Move some xattrs in this cluster to the new cluster.
   4974 * This function should only be called when bucket size == cluster size.
   4975 * Otherwise ocfs2_mv_xattr_bucket_cross_cluster should be used instead.
   4976 */
   4977static int ocfs2_divide_xattr_cluster(struct inode *inode,
   4978				      handle_t *handle,
   4979				      u64 prev_blk,
   4980				      u64 new_blk,
   4981				      u32 *first_hash)
   4982{
   4983	u16 blk_per_bucket = ocfs2_blocks_per_xattr_bucket(inode->i_sb);
   4984	int ret, credits = 2 * blk_per_bucket;
   4985
   4986	BUG_ON(OCFS2_XATTR_BUCKET_SIZE < OCFS2_SB(inode->i_sb)->s_clustersize);
   4987
   4988	ret = ocfs2_extend_trans(handle, credits);
   4989	if (ret) {
   4990		mlog_errno(ret);
   4991		return ret;
   4992	}
   4993
   4994	/* Move half of the xattr in start_blk to the next bucket. */
   4995	return  ocfs2_divide_xattr_bucket(inode, handle, prev_blk,
   4996					  new_blk, first_hash, 1);
   4997}
   4998
   4999/*
   5000 * Move some xattrs from the old cluster to the new one since they are not
   5001 * contiguous in ocfs2 xattr tree.
   5002 *
   5003 * new_blk starts a new separate cluster, and we will move some xattrs from
   5004 * prev_blk to it. v_start will be set as the first name hash value in this
   5005 * new cluster so that it can be used as e_cpos during tree insertion and
   5006 * don't collide with our original b-tree operations. first_bh and header_bh
   5007 * will also be updated since they will be used in ocfs2_extend_xattr_bucket
   5008 * to extend the insert bucket.
   5009 *
   5010 * The problem is how much xattr should we move to the new one and when should
   5011 * we update first_bh and header_bh?
   5012 * 1. If cluster size > bucket size, that means the previous cluster has more
   5013 *    than 1 bucket, so just move half nums of bucket into the new cluster and
   5014 *    update the first_bh and header_bh if the insert bucket has been moved
   5015 *    to the new cluster.
   5016 * 2. If cluster_size == bucket_size:
   5017 *    a) If the previous extent rec has more than one cluster and the insert
   5018 *       place isn't in the last cluster, copy the entire last cluster to the
   5019 *       new one. This time, we don't need to upate the first_bh and header_bh
   5020 *       since they will not be moved into the new cluster.
   5021 *    b) Otherwise, move the bottom half of the xattrs in the last cluster into
   5022 *       the new one. And we set the extend flag to zero if the insert place is
   5023 *       moved into the new allocated cluster since no extend is needed.
   5024 */
   5025static int ocfs2_adjust_xattr_cross_cluster(struct inode *inode,
   5026					    handle_t *handle,
   5027					    struct ocfs2_xattr_bucket *first,
   5028					    struct ocfs2_xattr_bucket *target,
   5029					    u64 new_blk,
   5030					    u32 prev_clusters,
   5031					    u32 *v_start,
   5032					    int *extend)
   5033{
   5034	int ret;
   5035
   5036	trace_ocfs2_adjust_xattr_cross_cluster(
   5037			(unsigned long long)bucket_blkno(first),
   5038			(unsigned long long)new_blk, prev_clusters);
   5039
   5040	if (ocfs2_xattr_buckets_per_cluster(OCFS2_SB(inode->i_sb)) > 1) {
   5041		ret = ocfs2_mv_xattr_bucket_cross_cluster(inode,
   5042							  handle,
   5043							  first, target,
   5044							  new_blk,
   5045							  prev_clusters,
   5046							  v_start);
   5047		if (ret)
   5048			mlog_errno(ret);
   5049	} else {
   5050		/* The start of the last cluster in the first extent */
   5051		u64 last_blk = bucket_blkno(first) +
   5052			((prev_clusters - 1) *
   5053			 ocfs2_clusters_to_blocks(inode->i_sb, 1));
   5054
   5055		if (prev_clusters > 1 && bucket_blkno(target) != last_blk) {
   5056			ret = ocfs2_mv_xattr_buckets(inode, handle,
   5057						     bucket_blkno(first),
   5058						     last_blk, new_blk, 0,
   5059						     v_start);
   5060			if (ret)
   5061				mlog_errno(ret);
   5062		} else {
   5063			ret = ocfs2_divide_xattr_cluster(inode, handle,
   5064							 last_blk, new_blk,
   5065							 v_start);
   5066			if (ret)
   5067				mlog_errno(ret);
   5068
   5069			if ((bucket_blkno(target) == last_blk) && extend)
   5070				*extend = 0;
   5071		}
   5072	}
   5073
   5074	return ret;
   5075}
   5076
   5077/*
   5078 * Add a new cluster for xattr storage.
   5079 *
   5080 * If the new cluster is contiguous with the previous one, it will be
   5081 * appended to the same extent record, and num_clusters will be updated.
   5082 * If not, we will insert a new extent for it and move some xattrs in
   5083 * the last cluster into the new allocated one.
   5084 * We also need to limit the maximum size of a btree leaf, otherwise we'll
   5085 * lose the benefits of hashing because we'll have to search large leaves.
   5086 * So now the maximum size is OCFS2_MAX_XATTR_TREE_LEAF_SIZE(or clustersize,
   5087 * if it's bigger).
   5088 *
   5089 * first_bh is the first block of the previous extent rec and header_bh
   5090 * indicates the bucket we will insert the new xattrs. They will be updated
   5091 * when the header_bh is moved into the new cluster.
   5092 */
   5093static int ocfs2_add_new_xattr_cluster(struct inode *inode,
   5094				       struct buffer_head *root_bh,
   5095				       struct ocfs2_xattr_bucket *first,
   5096				       struct ocfs2_xattr_bucket *target,
   5097				       u32 *num_clusters,
   5098				       u32 prev_cpos,
   5099				       int *extend,
   5100				       struct ocfs2_xattr_set_ctxt *ctxt)
   5101{
   5102	int ret;
   5103	u16 bpc = ocfs2_clusters_to_blocks(inode->i_sb, 1);
   5104	u32 prev_clusters = *num_clusters;
   5105	u32 clusters_to_add = 1, bit_off, num_bits, v_start = 0;
   5106	u64 block;
   5107	handle_t *handle = ctxt->handle;
   5108	struct ocfs2_super *osb = OCFS2_SB(inode->i_sb);
   5109	struct ocfs2_extent_tree et;
   5110
   5111	trace_ocfs2_add_new_xattr_cluster_begin(
   5112		(unsigned long long)OCFS2_I(inode)->ip_blkno,
   5113		(unsigned long long)bucket_blkno(first),
   5114		prev_cpos, prev_clusters);
   5115
   5116	ocfs2_init_xattr_tree_extent_tree(&et, INODE_CACHE(inode), root_bh);
   5117
   5118	ret = ocfs2_journal_access_xb(handle, INODE_CACHE(inode), root_bh,
   5119				      OCFS2_JOURNAL_ACCESS_WRITE);
   5120	if (ret < 0) {
   5121		mlog_errno(ret);
   5122		goto leave;
   5123	}
   5124
   5125	ret = __ocfs2_claim_clusters(handle, ctxt->data_ac, 1,
   5126				     clusters_to_add, &bit_off, &num_bits);
   5127	if (ret < 0) {
   5128		if (ret != -ENOSPC)
   5129			mlog_errno(ret);
   5130		goto leave;
   5131	}
   5132
   5133	BUG_ON(num_bits > clusters_to_add);
   5134
   5135	block = ocfs2_clusters_to_blocks(osb->sb, bit_off);
   5136	trace_ocfs2_add_new_xattr_cluster((unsigned long long)block, num_bits);
   5137
   5138	if (bucket_blkno(first) + (prev_clusters * bpc) == block &&
   5139	    (prev_clusters + num_bits) << osb->s_clustersize_bits <=
   5140	     OCFS2_MAX_XATTR_TREE_LEAF_SIZE) {
   5141		/*
   5142		 * If this cluster is contiguous with the old one and
   5143		 * adding this new cluster, we don't surpass the limit of
   5144		 * OCFS2_MAX_XATTR_TREE_LEAF_SIZE, cool. We will let it be
   5145		 * initialized and used like other buckets in the previous
   5146		 * cluster.
   5147		 * So add it as a contiguous one. The caller will handle
   5148		 * its init process.
   5149		 */
   5150		v_start = prev_cpos + prev_clusters;
   5151		*num_clusters = prev_clusters + num_bits;
   5152	} else {
   5153		ret = ocfs2_adjust_xattr_cross_cluster(inode,
   5154						       handle,
   5155						       first,
   5156						       target,
   5157						       block,
   5158						       prev_clusters,
   5159						       &v_start,
   5160						       extend);
   5161		if (ret) {
   5162			mlog_errno(ret);
   5163			goto leave;
   5164		}
   5165	}
   5166
   5167	trace_ocfs2_add_new_xattr_cluster_insert((unsigned long long)block,
   5168						 v_start, num_bits);
   5169	ret = ocfs2_insert_extent(handle, &et, v_start, block,
   5170				  num_bits, 0, ctxt->meta_ac);
   5171	if (ret < 0) {
   5172		mlog_errno(ret);
   5173		goto leave;
   5174	}
   5175
   5176	ocfs2_journal_dirty(handle, root_bh);
   5177
   5178leave:
   5179	return ret;
   5180}
   5181
   5182/*
   5183 * We are given an extent.  'first' is the bucket at the very front of
   5184 * the extent.  The extent has space for an additional bucket past
   5185 * bucket_xh(first)->xh_num_buckets.  'target_blkno' is the block number
   5186 * of the target bucket.  We wish to shift every bucket past the target
   5187 * down one, filling in that additional space.  When we get back to the
   5188 * target, we split the target between itself and the now-empty bucket
   5189 * at target+1 (aka, target_blkno + blks_per_bucket).
   5190 */
   5191static int ocfs2_extend_xattr_bucket(struct inode *inode,
   5192				     handle_t *handle,
   5193				     struct ocfs2_xattr_bucket *first,
   5194				     u64 target_blk,
   5195				     u32 num_clusters)
   5196{
   5197	int ret, credits;
   5198	struct ocfs2_super *osb = OCFS2_SB(inode->i_sb);
   5199	u16 blk_per_bucket = ocfs2_blocks_per_xattr_bucket(inode->i_sb);
   5200	u64 end_blk;
   5201	u16 new_bucket = le16_to_cpu(bucket_xh(first)->xh_num_buckets);
   5202
   5203	trace_ocfs2_extend_xattr_bucket((unsigned long long)target_blk,
   5204					(unsigned long long)bucket_blkno(first),
   5205					num_clusters, new_bucket);
   5206
   5207	/* The extent must have room for an additional bucket */
   5208	BUG_ON(new_bucket >=
   5209	       (num_clusters * ocfs2_xattr_buckets_per_cluster(osb)));
   5210
   5211	/* end_blk points to the last existing bucket */
   5212	end_blk = bucket_blkno(first) + ((new_bucket - 1) * blk_per_bucket);
   5213
   5214	/*
   5215	 * end_blk is the start of the last existing bucket.
   5216	 * Thus, (end_blk - target_blk) covers the target bucket and
   5217	 * every bucket after it up to, but not including, the last
   5218	 * existing bucket.  Then we add the last existing bucket, the
   5219	 * new bucket, and the first bucket (3 * blk_per_bucket).
   5220	 */
   5221	credits = (end_blk - target_blk) + (3 * blk_per_bucket);
   5222	ret = ocfs2_extend_trans(handle, credits);
   5223	if (ret) {
   5224		mlog_errno(ret);
   5225		goto out;
   5226	}
   5227
   5228	ret = ocfs2_xattr_bucket_journal_access(handle, first,
   5229						OCFS2_JOURNAL_ACCESS_WRITE);
   5230	if (ret) {
   5231		mlog_errno(ret);
   5232		goto out;
   5233	}
   5234
   5235	while (end_blk != target_blk) {
   5236		ret = ocfs2_cp_xattr_bucket(inode, handle, end_blk,
   5237					    end_blk + blk_per_bucket, 0);
   5238		if (ret)
   5239			goto out;
   5240		end_blk -= blk_per_bucket;
   5241	}
   5242
   5243	/* Move half of the xattr in target_blkno to the next bucket. */
   5244	ret = ocfs2_divide_xattr_bucket(inode, handle, target_blk,
   5245					target_blk + blk_per_bucket, NULL, 0);
   5246
   5247	le16_add_cpu(&bucket_xh(first)->xh_num_buckets, 1);
   5248	ocfs2_xattr_bucket_journal_dirty(handle, first);
   5249
   5250out:
   5251	return ret;
   5252}
   5253
   5254/*
   5255 * Add new xattr bucket in an extent record and adjust the buckets
   5256 * accordingly.  xb_bh is the ocfs2_xattr_block, and target is the
   5257 * bucket we want to insert into.
   5258 *
   5259 * In the easy case, we will move all the buckets after target down by
   5260 * one. Half of target's xattrs will be moved to the next bucket.
   5261 *
   5262 * If current cluster is full, we'll allocate a new one.  This may not
   5263 * be contiguous.  The underlying calls will make sure that there is
   5264 * space for the insert, shifting buckets around if necessary.
   5265 * 'target' may be moved by those calls.
   5266 */
   5267static int ocfs2_add_new_xattr_bucket(struct inode *inode,
   5268				      struct buffer_head *xb_bh,
   5269				      struct ocfs2_xattr_bucket *target,
   5270				      struct ocfs2_xattr_set_ctxt *ctxt)
   5271{
   5272	struct ocfs2_xattr_block *xb =
   5273			(struct ocfs2_xattr_block *)xb_bh->b_data;
   5274	struct ocfs2_xattr_tree_root *xb_root = &xb->xb_attrs.xb_root;
   5275	struct ocfs2_extent_list *el = &xb_root->xt_list;
   5276	u32 name_hash =
   5277		le32_to_cpu(bucket_xh(target)->xh_entries[0].xe_name_hash);
   5278	struct ocfs2_super *osb = OCFS2_SB(inode->i_sb);
   5279	int ret, num_buckets, extend = 1;
   5280	u64 p_blkno;
   5281	u32 e_cpos, num_clusters;
   5282	/* The bucket at the front of the extent */
   5283	struct ocfs2_xattr_bucket *first;
   5284
   5285	trace_ocfs2_add_new_xattr_bucket(
   5286				(unsigned long long)bucket_blkno(target));
   5287
   5288	/* The first bucket of the original extent */
   5289	first = ocfs2_xattr_bucket_new(inode);
   5290	if (!first) {
   5291		ret = -ENOMEM;
   5292		mlog_errno(ret);
   5293		goto out;
   5294	}
   5295
   5296	ret = ocfs2_xattr_get_rec(inode, name_hash, &p_blkno, &e_cpos,
   5297				  &num_clusters, el);
   5298	if (ret) {
   5299		mlog_errno(ret);
   5300		goto out;
   5301	}
   5302
   5303	ret = ocfs2_read_xattr_bucket(first, p_blkno);
   5304	if (ret) {
   5305		mlog_errno(ret);
   5306		goto out;
   5307	}
   5308
   5309	num_buckets = ocfs2_xattr_buckets_per_cluster(osb) * num_clusters;
   5310	if (num_buckets == le16_to_cpu(bucket_xh(first)->xh_num_buckets)) {
   5311		/*
   5312		 * This can move first+target if the target bucket moves
   5313		 * to the new extent.
   5314		 */
   5315		ret = ocfs2_add_new_xattr_cluster(inode,
   5316						  xb_bh,
   5317						  first,
   5318						  target,
   5319						  &num_clusters,
   5320						  e_cpos,
   5321						  &extend,
   5322						  ctxt);
   5323		if (ret) {
   5324			mlog_errno(ret);
   5325			goto out;
   5326		}
   5327	}
   5328
   5329	if (extend) {
   5330		ret = ocfs2_extend_xattr_bucket(inode,
   5331						ctxt->handle,
   5332						first,
   5333						bucket_blkno(target),
   5334						num_clusters);
   5335		if (ret)
   5336			mlog_errno(ret);
   5337	}
   5338
   5339out:
   5340	ocfs2_xattr_bucket_free(first);
   5341
   5342	return ret;
   5343}
   5344
   5345/*
   5346 * Truncate the specified xe_off entry in xattr bucket.
   5347 * bucket is indicated by header_bh and len is the new length.
   5348 * Both the ocfs2_xattr_value_root and the entry will be updated here.
   5349 *
   5350 * Copy the new updated xe and xe_value_root to new_xe and new_xv if needed.
   5351 */
   5352static int ocfs2_xattr_bucket_value_truncate(struct inode *inode,
   5353					     struct ocfs2_xattr_bucket *bucket,
   5354					     int xe_off,
   5355					     int len,
   5356					     struct ocfs2_xattr_set_ctxt *ctxt)
   5357{
   5358	int ret, offset;
   5359	u64 value_blk;
   5360	struct ocfs2_xattr_entry *xe;
   5361	struct ocfs2_xattr_header *xh = bucket_xh(bucket);
   5362	size_t blocksize = inode->i_sb->s_blocksize;
   5363	struct ocfs2_xattr_value_buf vb = {
   5364		.vb_access = ocfs2_journal_access,
   5365	};
   5366
   5367	xe = &xh->xh_entries[xe_off];
   5368
   5369	BUG_ON(!xe || ocfs2_xattr_is_local(xe));
   5370
   5371	offset = le16_to_cpu(xe->xe_name_offset) +
   5372		 OCFS2_XATTR_SIZE(xe->xe_name_len);
   5373
   5374	value_blk = offset / blocksize;
   5375
   5376	/* We don't allow ocfs2_xattr_value to be stored in different block. */
   5377	BUG_ON(value_blk != (offset + OCFS2_XATTR_ROOT_SIZE - 1) / blocksize);
   5378
   5379	vb.vb_bh = bucket->bu_bhs[value_blk];
   5380	BUG_ON(!vb.vb_bh);
   5381
   5382	vb.vb_xv = (struct ocfs2_xattr_value_root *)
   5383		(vb.vb_bh->b_data + offset % blocksize);
   5384
   5385	/*
   5386	 * From here on out we have to dirty the bucket.  The generic
   5387	 * value calls only modify one of the bucket's bhs, but we need
   5388	 * to send the bucket at once.  So if they error, they *could* have
   5389	 * modified something.  We have to assume they did, and dirty
   5390	 * the whole bucket.  This leaves us in a consistent state.
   5391	 */
   5392	trace_ocfs2_xattr_bucket_value_truncate(
   5393			(unsigned long long)bucket_blkno(bucket), xe_off, len);
   5394	ret = ocfs2_xattr_value_truncate(inode, &vb, len, ctxt);
   5395	if (ret) {
   5396		mlog_errno(ret);
   5397		goto out;
   5398	}
   5399
   5400	ret = ocfs2_xattr_bucket_journal_access(ctxt->handle, bucket,
   5401						OCFS2_JOURNAL_ACCESS_WRITE);
   5402	if (ret) {
   5403		mlog_errno(ret);
   5404		goto out;
   5405	}
   5406
   5407	xe->xe_value_size = cpu_to_le64(len);
   5408
   5409	ocfs2_xattr_bucket_journal_dirty(ctxt->handle, bucket);
   5410
   5411out:
   5412	return ret;
   5413}
   5414
   5415static int ocfs2_rm_xattr_cluster(struct inode *inode,
   5416				  struct buffer_head *root_bh,
   5417				  u64 blkno,
   5418				  u32 cpos,
   5419				  u32 len,
   5420				  void *para)
   5421{
   5422	int ret;
   5423	struct ocfs2_super *osb = OCFS2_SB(inode->i_sb);
   5424	struct inode *tl_inode = osb->osb_tl_inode;
   5425	handle_t *handle;
   5426	struct ocfs2_xattr_block *xb =
   5427			(struct ocfs2_xattr_block *)root_bh->b_data;
   5428	struct ocfs2_alloc_context *meta_ac = NULL;
   5429	struct ocfs2_cached_dealloc_ctxt dealloc;
   5430	struct ocfs2_extent_tree et;
   5431
   5432	ret = ocfs2_iterate_xattr_buckets(inode, blkno, len,
   5433					  ocfs2_delete_xattr_in_bucket, para);
   5434	if (ret) {
   5435		mlog_errno(ret);
   5436		return ret;
   5437	}
   5438
   5439	ocfs2_init_xattr_tree_extent_tree(&et, INODE_CACHE(inode), root_bh);
   5440
   5441	ocfs2_init_dealloc_ctxt(&dealloc);
   5442
   5443	trace_ocfs2_rm_xattr_cluster(
   5444			(unsigned long long)OCFS2_I(inode)->ip_blkno,
   5445			(unsigned long long)blkno, cpos, len);
   5446
   5447	ocfs2_remove_xattr_clusters_from_cache(INODE_CACHE(inode), blkno,
   5448					       len);
   5449
   5450	ret = ocfs2_lock_allocators(inode, &et, 0, 1, NULL, &meta_ac);
   5451	if (ret) {
   5452		mlog_errno(ret);
   5453		return ret;
   5454	}
   5455
   5456	inode_lock(tl_inode);
   5457
   5458	if (ocfs2_truncate_log_needs_flush(osb)) {
   5459		ret = __ocfs2_flush_truncate_log(osb);
   5460		if (ret < 0) {
   5461			mlog_errno(ret);
   5462			goto out;
   5463		}
   5464	}
   5465
   5466	handle = ocfs2_start_trans(osb, ocfs2_remove_extent_credits(osb->sb));
   5467	if (IS_ERR(handle)) {
   5468		ret = -ENOMEM;
   5469		mlog_errno(ret);
   5470		goto out;
   5471	}
   5472
   5473	ret = ocfs2_journal_access_xb(handle, INODE_CACHE(inode), root_bh,
   5474				      OCFS2_JOURNAL_ACCESS_WRITE);
   5475	if (ret) {
   5476		mlog_errno(ret);
   5477		goto out_commit;
   5478	}
   5479
   5480	ret = ocfs2_remove_extent(handle, &et, cpos, len, meta_ac,
   5481				  &dealloc);
   5482	if (ret) {
   5483		mlog_errno(ret);
   5484		goto out_commit;
   5485	}
   5486
   5487	le32_add_cpu(&xb->xb_attrs.xb_root.xt_clusters, -len);
   5488	ocfs2_journal_dirty(handle, root_bh);
   5489
   5490	ret = ocfs2_truncate_log_append(osb, handle, blkno, len);
   5491	if (ret)
   5492		mlog_errno(ret);
   5493	ocfs2_update_inode_fsync_trans(handle, inode, 0);
   5494
   5495out_commit:
   5496	ocfs2_commit_trans(osb, handle);
   5497out:
   5498	ocfs2_schedule_truncate_log_flush(osb, 1);
   5499
   5500	inode_unlock(tl_inode);
   5501
   5502	if (meta_ac)
   5503		ocfs2_free_alloc_context(meta_ac);
   5504
   5505	ocfs2_run_deallocs(osb, &dealloc);
   5506
   5507	return ret;
   5508}
   5509
   5510/*
   5511 * check whether the xattr bucket is filled up with the same hash value.
   5512 * If we want to insert the xattr with the same hash, return -ENOSPC.
   5513 * If we want to insert a xattr with different hash value, go ahead
   5514 * and ocfs2_divide_xattr_bucket will handle this.
   5515 */
   5516static int ocfs2_check_xattr_bucket_collision(struct inode *inode,
   5517					      struct ocfs2_xattr_bucket *bucket,
   5518					      const char *name)
   5519{
   5520	struct ocfs2_xattr_header *xh = bucket_xh(bucket);
   5521	u32 name_hash = ocfs2_xattr_name_hash(inode, name, strlen(name));
   5522
   5523	if (name_hash != le32_to_cpu(xh->xh_entries[0].xe_name_hash))
   5524		return 0;
   5525
   5526	if (xh->xh_entries[le16_to_cpu(xh->xh_count) - 1].xe_name_hash ==
   5527	    xh->xh_entries[0].xe_name_hash) {
   5528		mlog(ML_ERROR, "Too much hash collision in xattr bucket %llu, "
   5529		     "hash = %u\n",
   5530		     (unsigned long long)bucket_blkno(bucket),
   5531		     le32_to_cpu(xh->xh_entries[0].xe_name_hash));
   5532		return -ENOSPC;
   5533	}
   5534
   5535	return 0;
   5536}
   5537
   5538/*
   5539 * Try to set the entry in the current bucket.  If we fail, the caller
   5540 * will handle getting us another bucket.
   5541 */
   5542static int ocfs2_xattr_set_entry_bucket(struct inode *inode,
   5543					struct ocfs2_xattr_info *xi,
   5544					struct ocfs2_xattr_search *xs,
   5545					struct ocfs2_xattr_set_ctxt *ctxt)
   5546{
   5547	int ret;
   5548	struct ocfs2_xa_loc loc;
   5549
   5550	trace_ocfs2_xattr_set_entry_bucket(xi->xi_name);
   5551
   5552	ocfs2_init_xattr_bucket_xa_loc(&loc, xs->bucket,
   5553				       xs->not_found ? NULL : xs->here);
   5554	ret = ocfs2_xa_set(&loc, xi, ctxt);
   5555	if (!ret) {
   5556		xs->here = loc.xl_entry;
   5557		goto out;
   5558	}
   5559	if (ret != -ENOSPC) {
   5560		mlog_errno(ret);
   5561		goto out;
   5562	}
   5563
   5564	/* Ok, we need space.  Let's try defragmenting the bucket. */
   5565	ret = ocfs2_defrag_xattr_bucket(inode, ctxt->handle,
   5566					xs->bucket);
   5567	if (ret) {
   5568		mlog_errno(ret);
   5569		goto out;
   5570	}
   5571
   5572	ret = ocfs2_xa_set(&loc, xi, ctxt);
   5573	if (!ret) {
   5574		xs->here = loc.xl_entry;
   5575		goto out;
   5576	}
   5577	if (ret != -ENOSPC)
   5578		mlog_errno(ret);
   5579
   5580
   5581out:
   5582	return ret;
   5583}
   5584
   5585static int ocfs2_xattr_set_entry_index_block(struct inode *inode,
   5586					     struct ocfs2_xattr_info *xi,
   5587					     struct ocfs2_xattr_search *xs,
   5588					     struct ocfs2_xattr_set_ctxt *ctxt)
   5589{
   5590	int ret;
   5591
   5592	trace_ocfs2_xattr_set_entry_index_block(xi->xi_name);
   5593
   5594	ret = ocfs2_xattr_set_entry_bucket(inode, xi, xs, ctxt);
   5595	if (!ret)
   5596		goto out;
   5597	if (ret != -ENOSPC) {
   5598		mlog_errno(ret);
   5599		goto out;
   5600	}
   5601
   5602	/* Ack, need more space.  Let's try to get another bucket! */
   5603
   5604	/*
   5605	 * We do not allow for overlapping ranges between buckets. And
   5606	 * the maximum number of collisions we will allow for then is
   5607	 * one bucket's worth, so check it here whether we need to
   5608	 * add a new bucket for the insert.
   5609	 */
   5610	ret = ocfs2_check_xattr_bucket_collision(inode,
   5611						 xs->bucket,
   5612						 xi->xi_name);
   5613	if (ret) {
   5614		mlog_errno(ret);
   5615		goto out;
   5616	}
   5617
   5618	ret = ocfs2_add_new_xattr_bucket(inode,
   5619					 xs->xattr_bh,
   5620					 xs->bucket,
   5621					 ctxt);
   5622	if (ret) {
   5623		mlog_errno(ret);
   5624		goto out;
   5625	}
   5626
   5627	/*
   5628	 * ocfs2_add_new_xattr_bucket() will have updated
   5629	 * xs->bucket if it moved, but it will not have updated
   5630	 * any of the other search fields.  Thus, we drop it and
   5631	 * re-search.  Everything should be cached, so it'll be
   5632	 * quick.
   5633	 */
   5634	ocfs2_xattr_bucket_relse(xs->bucket);
   5635	ret = ocfs2_xattr_index_block_find(inode, xs->xattr_bh,
   5636					   xi->xi_name_index,
   5637					   xi->xi_name, xs);
   5638	if (ret && ret != -ENODATA)
   5639		goto out;
   5640	xs->not_found = ret;
   5641
   5642	/* Ok, we have a new bucket, let's try again */
   5643	ret = ocfs2_xattr_set_entry_bucket(inode, xi, xs, ctxt);
   5644	if (ret && (ret != -ENOSPC))
   5645		mlog_errno(ret);
   5646
   5647out:
   5648	return ret;
   5649}
   5650
   5651static int ocfs2_delete_xattr_in_bucket(struct inode *inode,
   5652					struct ocfs2_xattr_bucket *bucket,
   5653					void *para)
   5654{
   5655	int ret = 0, ref_credits;
   5656	struct ocfs2_xattr_header *xh = bucket_xh(bucket);
   5657	u16 i;
   5658	struct ocfs2_xattr_entry *xe;
   5659	struct ocfs2_super *osb = OCFS2_SB(inode->i_sb);
   5660	struct ocfs2_xattr_set_ctxt ctxt = {NULL, NULL,};
   5661	int credits = ocfs2_remove_extent_credits(osb->sb) +
   5662		ocfs2_blocks_per_xattr_bucket(inode->i_sb);
   5663	struct ocfs2_xattr_value_root *xv;
   5664	struct ocfs2_rm_xattr_bucket_para *args =
   5665			(struct ocfs2_rm_xattr_bucket_para *)para;
   5666
   5667	ocfs2_init_dealloc_ctxt(&ctxt.dealloc);
   5668
   5669	for (i = 0; i < le16_to_cpu(xh->xh_count); i++) {
   5670		xe = &xh->xh_entries[i];
   5671		if (ocfs2_xattr_is_local(xe))
   5672			continue;
   5673
   5674		ret = ocfs2_get_xattr_tree_value_root(inode->i_sb, bucket,
   5675						      i, &xv, NULL);
   5676		if (ret) {
   5677			mlog_errno(ret);
   5678			break;
   5679		}
   5680
   5681		ret = ocfs2_lock_xattr_remove_allocators(inode, xv,
   5682							 args->ref_ci,
   5683							 args->ref_root_bh,
   5684							 &ctxt.meta_ac,
   5685							 &ref_credits);
   5686
   5687		ctxt.handle = ocfs2_start_trans(osb, credits + ref_credits);
   5688		if (IS_ERR(ctxt.handle)) {
   5689			ret = PTR_ERR(ctxt.handle);
   5690			mlog_errno(ret);
   5691			break;
   5692		}
   5693
   5694		ret = ocfs2_xattr_bucket_value_truncate(inode, bucket,
   5695							i, 0, &ctxt);
   5696
   5697		ocfs2_commit_trans(osb, ctxt.handle);
   5698		if (ctxt.meta_ac) {
   5699			ocfs2_free_alloc_context(ctxt.meta_ac);
   5700			ctxt.meta_ac = NULL;
   5701		}
   5702		if (ret) {
   5703			mlog_errno(ret);
   5704			break;
   5705		}
   5706	}
   5707
   5708	if (ctxt.meta_ac)
   5709		ocfs2_free_alloc_context(ctxt.meta_ac);
   5710	ocfs2_schedule_truncate_log_flush(osb, 1);
   5711	ocfs2_run_deallocs(osb, &ctxt.dealloc);
   5712	return ret;
   5713}
   5714
   5715/*
   5716 * Whenever we modify a xattr value root in the bucket(e.g, CoW
   5717 * or change the extent record flag), we need to recalculate
   5718 * the metaecc for the whole bucket. So it is done here.
   5719 *
   5720 * Note:
   5721 * We have to give the extra credits for the caller.
   5722 */
   5723static int ocfs2_xattr_bucket_post_refcount(struct inode *inode,
   5724					    handle_t *handle,
   5725					    void *para)
   5726{
   5727	int ret;
   5728	struct ocfs2_xattr_bucket *bucket =
   5729			(struct ocfs2_xattr_bucket *)para;
   5730
   5731	ret = ocfs2_xattr_bucket_journal_access(handle, bucket,
   5732						OCFS2_JOURNAL_ACCESS_WRITE);
   5733	if (ret) {
   5734		mlog_errno(ret);
   5735		return ret;
   5736	}
   5737
   5738	ocfs2_xattr_bucket_journal_dirty(handle, bucket);
   5739
   5740	return 0;
   5741}
   5742
   5743/*
   5744 * Special action we need if the xattr value is refcounted.
   5745 *
   5746 * 1. If the xattr is refcounted, lock the tree.
   5747 * 2. CoW the xattr if we are setting the new value and the value
   5748 *    will be stored outside.
   5749 * 3. In other case, decrease_refcount will work for us, so just
   5750 *    lock the refcount tree, calculate the meta and credits is OK.
   5751 *
   5752 * We have to do CoW before ocfs2_init_xattr_set_ctxt since
   5753 * currently CoW is a completed transaction, while this function
   5754 * will also lock the allocators and let us deadlock. So we will
   5755 * CoW the whole xattr value.
   5756 */
   5757static int ocfs2_prepare_refcount_xattr(struct inode *inode,
   5758					struct ocfs2_dinode *di,
   5759					struct ocfs2_xattr_info *xi,
   5760					struct ocfs2_xattr_search *xis,
   5761					struct ocfs2_xattr_search *xbs,
   5762					struct ocfs2_refcount_tree **ref_tree,
   5763					int *meta_add,
   5764					int *credits)
   5765{
   5766	int ret = 0;
   5767	struct ocfs2_xattr_block *xb;
   5768	struct ocfs2_xattr_entry *xe;
   5769	char *base;
   5770	u32 p_cluster, num_clusters;
   5771	unsigned int ext_flags;
   5772	int name_offset, name_len;
   5773	struct ocfs2_xattr_value_buf vb;
   5774	struct ocfs2_xattr_bucket *bucket = NULL;
   5775	struct ocfs2_super *osb = OCFS2_SB(inode->i_sb);
   5776	struct ocfs2_post_refcount refcount;
   5777	struct ocfs2_post_refcount *p = NULL;
   5778	struct buffer_head *ref_root_bh = NULL;
   5779
   5780	if (!xis->not_found) {
   5781		xe = xis->here;
   5782		name_offset = le16_to_cpu(xe->xe_name_offset);
   5783		name_len = OCFS2_XATTR_SIZE(xe->xe_name_len);
   5784		base = xis->base;
   5785		vb.vb_bh = xis->inode_bh;
   5786		vb.vb_access = ocfs2_journal_access_di;
   5787	} else {
   5788		int i, block_off = 0;
   5789		xb = (struct ocfs2_xattr_block *)xbs->xattr_bh->b_data;
   5790		xe = xbs->here;
   5791		name_offset = le16_to_cpu(xe->xe_name_offset);
   5792		name_len = OCFS2_XATTR_SIZE(xe->xe_name_len);
   5793		i = xbs->here - xbs->header->xh_entries;
   5794
   5795		if (le16_to_cpu(xb->xb_flags) & OCFS2_XATTR_INDEXED) {
   5796			ret = ocfs2_xattr_bucket_get_name_value(inode->i_sb,
   5797							bucket_xh(xbs->bucket),
   5798							i, &block_off,
   5799							&name_offset);
   5800			if (ret) {
   5801				mlog_errno(ret);
   5802				goto out;
   5803			}
   5804			base = bucket_block(xbs->bucket, block_off);
   5805			vb.vb_bh = xbs->bucket->bu_bhs[block_off];
   5806			vb.vb_access = ocfs2_journal_access;
   5807
   5808			if (ocfs2_meta_ecc(osb)) {
   5809				/*create parameters for ocfs2_post_refcount. */
   5810				bucket = xbs->bucket;
   5811				refcount.credits = bucket->bu_blocks;
   5812				refcount.para = bucket;
   5813				refcount.func =
   5814					ocfs2_xattr_bucket_post_refcount;
   5815				p = &refcount;
   5816			}
   5817		} else {
   5818			base = xbs->base;
   5819			vb.vb_bh = xbs->xattr_bh;
   5820			vb.vb_access = ocfs2_journal_access_xb;
   5821		}
   5822	}
   5823
   5824	if (ocfs2_xattr_is_local(xe))
   5825		goto out;
   5826
   5827	vb.vb_xv = (struct ocfs2_xattr_value_root *)
   5828				(base + name_offset + name_len);
   5829
   5830	ret = ocfs2_xattr_get_clusters(inode, 0, &p_cluster,
   5831				       &num_clusters, &vb.vb_xv->xr_list,
   5832				       &ext_flags);
   5833	if (ret) {
   5834		mlog_errno(ret);
   5835		goto out;
   5836	}
   5837
   5838	/*
   5839	 * We just need to check the 1st extent record, since we always
   5840	 * CoW the whole xattr. So there shouldn't be a xattr with
   5841	 * some REFCOUNT extent recs after the 1st one.
   5842	 */
   5843	if (!(ext_flags & OCFS2_EXT_REFCOUNTED))
   5844		goto out;
   5845
   5846	ret = ocfs2_lock_refcount_tree(osb, le64_to_cpu(di->i_refcount_loc),
   5847				       1, ref_tree, &ref_root_bh);
   5848	if (ret) {
   5849		mlog_errno(ret);
   5850		goto out;
   5851	}
   5852
   5853	/*
   5854	 * If we are deleting the xattr or the new size will be stored inside,
   5855	 * cool, leave it there, the xattr truncate process will remove them
   5856	 * for us(it still needs the refcount tree lock and the meta, credits).
   5857	 * And the worse case is that every cluster truncate will split the
   5858	 * refcount tree, and make the original extent become 3. So we will need
   5859	 * 2 * cluster more extent recs at most.
   5860	 */
   5861	if (!xi->xi_value || xi->xi_value_len <= OCFS2_XATTR_INLINE_SIZE) {
   5862
   5863		ret = ocfs2_refcounted_xattr_delete_need(inode,
   5864							 &(*ref_tree)->rf_ci,
   5865							 ref_root_bh, vb.vb_xv,
   5866							 meta_add, credits);
   5867		if (ret)
   5868			mlog_errno(ret);
   5869		goto out;
   5870	}
   5871
   5872	ret = ocfs2_refcount_cow_xattr(inode, di, &vb,
   5873				       *ref_tree, ref_root_bh, 0,
   5874				       le32_to_cpu(vb.vb_xv->xr_clusters), p);
   5875	if (ret)
   5876		mlog_errno(ret);
   5877
   5878out:
   5879	brelse(ref_root_bh);
   5880	return ret;
   5881}
   5882
   5883/*
   5884 * Add the REFCOUNTED flags for all the extent rec in ocfs2_xattr_value_root.
   5885 * The physical clusters will be added to refcount tree.
   5886 */
   5887static int ocfs2_xattr_value_attach_refcount(struct inode *inode,
   5888				struct ocfs2_xattr_value_root *xv,
   5889				struct ocfs2_extent_tree *value_et,
   5890				struct ocfs2_caching_info *ref_ci,
   5891				struct buffer_head *ref_root_bh,
   5892				struct ocfs2_cached_dealloc_ctxt *dealloc,
   5893				struct ocfs2_post_refcount *refcount)
   5894{
   5895	int ret = 0;
   5896	u32 clusters = le32_to_cpu(xv->xr_clusters);
   5897	u32 cpos, p_cluster, num_clusters;
   5898	struct ocfs2_extent_list *el = &xv->xr_list;
   5899	unsigned int ext_flags;
   5900
   5901	cpos = 0;
   5902	while (cpos < clusters) {
   5903		ret = ocfs2_xattr_get_clusters(inode, cpos, &p_cluster,
   5904					       &num_clusters, el, &ext_flags);
   5905		if (ret) {
   5906			mlog_errno(ret);
   5907			break;
   5908		}
   5909
   5910		cpos += num_clusters;
   5911		if ((ext_flags & OCFS2_EXT_REFCOUNTED))
   5912			continue;
   5913
   5914		BUG_ON(!p_cluster);
   5915
   5916		ret = ocfs2_add_refcount_flag(inode, value_et,
   5917					      ref_ci, ref_root_bh,
   5918					      cpos - num_clusters,
   5919					      p_cluster, num_clusters,
   5920					      dealloc, refcount);
   5921		if (ret) {
   5922			mlog_errno(ret);
   5923			break;
   5924		}
   5925	}
   5926
   5927	return ret;
   5928}
   5929
   5930/*
   5931 * Given a normal ocfs2_xattr_header, refcount all the entries which
   5932 * have value stored outside.
   5933 * Used for xattrs stored in inode and ocfs2_xattr_block.
   5934 */
   5935static int ocfs2_xattr_attach_refcount_normal(struct inode *inode,
   5936				struct ocfs2_xattr_value_buf *vb,
   5937				struct ocfs2_xattr_header *header,
   5938				struct ocfs2_caching_info *ref_ci,
   5939				struct buffer_head *ref_root_bh,
   5940				struct ocfs2_cached_dealloc_ctxt *dealloc)
   5941{
   5942
   5943	struct ocfs2_xattr_entry *xe;
   5944	struct ocfs2_xattr_value_root *xv;
   5945	struct ocfs2_extent_tree et;
   5946	int i, ret = 0;
   5947
   5948	for (i = 0; i < le16_to_cpu(header->xh_count); i++) {
   5949		xe = &header->xh_entries[i];
   5950
   5951		if (ocfs2_xattr_is_local(xe))
   5952			continue;
   5953
   5954		xv = (struct ocfs2_xattr_value_root *)((void *)header +
   5955			le16_to_cpu(xe->xe_name_offset) +
   5956			OCFS2_XATTR_SIZE(xe->xe_name_len));
   5957
   5958		vb->vb_xv = xv;
   5959		ocfs2_init_xattr_value_extent_tree(&et, INODE_CACHE(inode), vb);
   5960
   5961		ret = ocfs2_xattr_value_attach_refcount(inode, xv, &et,
   5962							ref_ci, ref_root_bh,
   5963							dealloc, NULL);
   5964		if (ret) {
   5965			mlog_errno(ret);
   5966			break;
   5967		}
   5968	}
   5969
   5970	return ret;
   5971}
   5972
   5973static int ocfs2_xattr_inline_attach_refcount(struct inode *inode,
   5974				struct buffer_head *fe_bh,
   5975				struct ocfs2_caching_info *ref_ci,
   5976				struct buffer_head *ref_root_bh,
   5977				struct ocfs2_cached_dealloc_ctxt *dealloc)
   5978{
   5979	struct ocfs2_dinode *di = (struct ocfs2_dinode *)fe_bh->b_data;
   5980	struct ocfs2_xattr_header *header = (struct ocfs2_xattr_header *)
   5981				(fe_bh->b_data + inode->i_sb->s_blocksize -
   5982				le16_to_cpu(di->i_xattr_inline_size));
   5983	struct ocfs2_xattr_value_buf vb = {
   5984		.vb_bh = fe_bh,
   5985		.vb_access = ocfs2_journal_access_di,
   5986	};
   5987
   5988	return ocfs2_xattr_attach_refcount_normal(inode, &vb, header,
   5989						  ref_ci, ref_root_bh, dealloc);
   5990}
   5991
   5992struct ocfs2_xattr_tree_value_refcount_para {
   5993	struct ocfs2_caching_info *ref_ci;
   5994	struct buffer_head *ref_root_bh;
   5995	struct ocfs2_cached_dealloc_ctxt *dealloc;
   5996};
   5997
   5998static int ocfs2_get_xattr_tree_value_root(struct super_block *sb,
   5999					   struct ocfs2_xattr_bucket *bucket,
   6000					   int offset,
   6001					   struct ocfs2_xattr_value_root **xv,
   6002					   struct buffer_head **bh)
   6003{
   6004	int ret, block_off, name_offset;
   6005	struct ocfs2_xattr_header *xh = bucket_xh(bucket);
   6006	struct ocfs2_xattr_entry *xe = &xh->xh_entries[offset];
   6007	void *base;
   6008
   6009	ret = ocfs2_xattr_bucket_get_name_value(sb,
   6010						bucket_xh(bucket),
   6011						offset,
   6012						&block_off,
   6013						&name_offset);
   6014	if (ret) {
   6015		mlog_errno(ret);
   6016		goto out;
   6017	}
   6018
   6019	base = bucket_block(bucket, block_off);
   6020
   6021	*xv = (struct ocfs2_xattr_value_root *)(base + name_offset +
   6022			 OCFS2_XATTR_SIZE(xe->xe_name_len));
   6023
   6024	if (bh)
   6025		*bh = bucket->bu_bhs[block_off];
   6026out:
   6027	return ret;
   6028}
   6029
   6030/*
   6031 * For a given xattr bucket, refcount all the entries which
   6032 * have value stored outside.
   6033 */
   6034static int ocfs2_xattr_bucket_value_refcount(struct inode *inode,
   6035					     struct ocfs2_xattr_bucket *bucket,
   6036					     void *para)
   6037{
   6038	int i, ret = 0;
   6039	struct ocfs2_extent_tree et;
   6040	struct ocfs2_xattr_tree_value_refcount_para *ref =
   6041			(struct ocfs2_xattr_tree_value_refcount_para *)para;
   6042	struct ocfs2_xattr_header *xh =
   6043			(struct ocfs2_xattr_header *)bucket->bu_bhs[0]->b_data;
   6044	struct ocfs2_xattr_entry *xe;
   6045	struct ocfs2_xattr_value_buf vb = {
   6046		.vb_access = ocfs2_journal_access,
   6047	};
   6048	struct ocfs2_post_refcount refcount = {
   6049		.credits = bucket->bu_blocks,
   6050		.para = bucket,
   6051		.func = ocfs2_xattr_bucket_post_refcount,
   6052	};
   6053	struct ocfs2_post_refcount *p = NULL;
   6054
   6055	/* We only need post_refcount if we support metaecc. */
   6056	if (ocfs2_meta_ecc(OCFS2_SB(inode->i_sb)))
   6057		p = &refcount;
   6058
   6059	trace_ocfs2_xattr_bucket_value_refcount(
   6060				(unsigned long long)bucket_blkno(bucket),
   6061				le16_to_cpu(xh->xh_count));
   6062	for (i = 0; i < le16_to_cpu(xh->xh_count); i++) {
   6063		xe = &xh->xh_entries[i];
   6064
   6065		if (ocfs2_xattr_is_local(xe))
   6066			continue;
   6067
   6068		ret = ocfs2_get_xattr_tree_value_root(inode->i_sb, bucket, i,
   6069						      &vb.vb_xv, &vb.vb_bh);
   6070		if (ret) {
   6071			mlog_errno(ret);
   6072			break;
   6073		}
   6074
   6075		ocfs2_init_xattr_value_extent_tree(&et,
   6076						   INODE_CACHE(inode), &vb);
   6077
   6078		ret = ocfs2_xattr_value_attach_refcount(inode, vb.vb_xv,
   6079							&et, ref->ref_ci,
   6080							ref->ref_root_bh,
   6081							ref->dealloc, p);
   6082		if (ret) {
   6083			mlog_errno(ret);
   6084			break;
   6085		}
   6086	}
   6087
   6088	return ret;
   6089
   6090}
   6091
   6092static int ocfs2_refcount_xattr_tree_rec(struct inode *inode,
   6093				     struct buffer_head *root_bh,
   6094				     u64 blkno, u32 cpos, u32 len, void *para)
   6095{
   6096	return ocfs2_iterate_xattr_buckets(inode, blkno, len,
   6097					   ocfs2_xattr_bucket_value_refcount,
   6098					   para);
   6099}
   6100
   6101static int ocfs2_xattr_block_attach_refcount(struct inode *inode,
   6102				struct buffer_head *blk_bh,
   6103				struct ocfs2_caching_info *ref_ci,
   6104				struct buffer_head *ref_root_bh,
   6105				struct ocfs2_cached_dealloc_ctxt *dealloc)
   6106{
   6107	int ret = 0;
   6108	struct ocfs2_xattr_block *xb =
   6109				(struct ocfs2_xattr_block *)blk_bh->b_data;
   6110
   6111	if (!(le16_to_cpu(xb->xb_flags) & OCFS2_XATTR_INDEXED)) {
   6112		struct ocfs2_xattr_header *header = &xb->xb_attrs.xb_header;
   6113		struct ocfs2_xattr_value_buf vb = {
   6114			.vb_bh = blk_bh,
   6115			.vb_access = ocfs2_journal_access_xb,
   6116		};
   6117
   6118		ret = ocfs2_xattr_attach_refcount_normal(inode, &vb, header,
   6119							 ref_ci, ref_root_bh,
   6120							 dealloc);
   6121	} else {
   6122		struct ocfs2_xattr_tree_value_refcount_para para = {
   6123			.ref_ci = ref_ci,
   6124			.ref_root_bh = ref_root_bh,
   6125			.dealloc = dealloc,
   6126		};
   6127
   6128		ret = ocfs2_iterate_xattr_index_block(inode, blk_bh,
   6129						ocfs2_refcount_xattr_tree_rec,
   6130						&para);
   6131	}
   6132
   6133	return ret;
   6134}
   6135
   6136int ocfs2_xattr_attach_refcount_tree(struct inode *inode,
   6137				     struct buffer_head *fe_bh,
   6138				     struct ocfs2_caching_info *ref_ci,
   6139				     struct buffer_head *ref_root_bh,
   6140				     struct ocfs2_cached_dealloc_ctxt *dealloc)
   6141{
   6142	int ret = 0;
   6143	struct ocfs2_inode_info *oi = OCFS2_I(inode);
   6144	struct ocfs2_dinode *di = (struct ocfs2_dinode *)fe_bh->b_data;
   6145	struct buffer_head *blk_bh = NULL;
   6146
   6147	if (oi->ip_dyn_features & OCFS2_INLINE_XATTR_FL) {
   6148		ret = ocfs2_xattr_inline_attach_refcount(inode, fe_bh,
   6149							 ref_ci, ref_root_bh,
   6150							 dealloc);
   6151		if (ret) {
   6152			mlog_errno(ret);
   6153			goto out;
   6154		}
   6155	}
   6156
   6157	if (!di->i_xattr_loc)
   6158		goto out;
   6159
   6160	ret = ocfs2_read_xattr_block(inode, le64_to_cpu(di->i_xattr_loc),
   6161				     &blk_bh);
   6162	if (ret < 0) {
   6163		mlog_errno(ret);
   6164		goto out;
   6165	}
   6166
   6167	ret = ocfs2_xattr_block_attach_refcount(inode, blk_bh, ref_ci,
   6168						ref_root_bh, dealloc);
   6169	if (ret)
   6170		mlog_errno(ret);
   6171
   6172	brelse(blk_bh);
   6173out:
   6174
   6175	return ret;
   6176}
   6177
   6178typedef int (should_xattr_reflinked)(struct ocfs2_xattr_entry *xe);
   6179/*
   6180 * Store the information we need in xattr reflink.
   6181 * old_bh and new_bh are inode bh for the old and new inode.
   6182 */
   6183struct ocfs2_xattr_reflink {
   6184	struct inode *old_inode;
   6185	struct inode *new_inode;
   6186	struct buffer_head *old_bh;
   6187	struct buffer_head *new_bh;
   6188	struct ocfs2_caching_info *ref_ci;
   6189	struct buffer_head *ref_root_bh;
   6190	struct ocfs2_cached_dealloc_ctxt *dealloc;
   6191	should_xattr_reflinked *xattr_reflinked;
   6192};
   6193
   6194/*
   6195 * Given a xattr header and xe offset,
   6196 * return the proper xv and the corresponding bh.
   6197 * xattr in inode, block and xattr tree have different implementaions.
   6198 */
   6199typedef int (get_xattr_value_root)(struct super_block *sb,
   6200				   struct buffer_head *bh,
   6201				   struct ocfs2_xattr_header *xh,
   6202				   int offset,
   6203				   struct ocfs2_xattr_value_root **xv,
   6204				   struct buffer_head **ret_bh,
   6205				   void *para);
   6206
   6207/*
   6208 * Calculate all the xattr value root metadata stored in this xattr header and
   6209 * credits we need if we create them from the scratch.
   6210 * We use get_xattr_value_root so that all types of xattr container can use it.
   6211 */
   6212static int ocfs2_value_metas_in_xattr_header(struct super_block *sb,
   6213					     struct buffer_head *bh,
   6214					     struct ocfs2_xattr_header *xh,
   6215					     int *metas, int *credits,
   6216					     int *num_recs,
   6217					     get_xattr_value_root *func,
   6218					     void *para)
   6219{
   6220	int i, ret = 0;
   6221	struct ocfs2_xattr_value_root *xv;
   6222	struct ocfs2_xattr_entry *xe;
   6223
   6224	for (i = 0; i < le16_to_cpu(xh->xh_count); i++) {
   6225		xe = &xh->xh_entries[i];
   6226		if (ocfs2_xattr_is_local(xe))
   6227			continue;
   6228
   6229		ret = func(sb, bh, xh, i, &xv, NULL, para);
   6230		if (ret) {
   6231			mlog_errno(ret);
   6232			break;
   6233		}
   6234
   6235		*metas += le16_to_cpu(xv->xr_list.l_tree_depth) *
   6236			  le16_to_cpu(xv->xr_list.l_next_free_rec);
   6237
   6238		*credits += ocfs2_calc_extend_credits(sb,
   6239						&def_xv.xv.xr_list);
   6240
   6241		/*
   6242		 * If the value is a tree with depth > 1, We don't go deep
   6243		 * to the extent block, so just calculate a maximum record num.
   6244		 */
   6245		if (!xv->xr_list.l_tree_depth)
   6246			*num_recs += le16_to_cpu(xv->xr_list.l_next_free_rec);
   6247		else
   6248			*num_recs += ocfs2_clusters_for_bytes(sb,
   6249							      XATTR_SIZE_MAX);
   6250	}
   6251
   6252	return ret;
   6253}
   6254
   6255/* Used by xattr inode and block to return the right xv and buffer_head. */
   6256static int ocfs2_get_xattr_value_root(struct super_block *sb,
   6257				      struct buffer_head *bh,
   6258				      struct ocfs2_xattr_header *xh,
   6259				      int offset,
   6260				      struct ocfs2_xattr_value_root **xv,
   6261				      struct buffer_head **ret_bh,
   6262				      void *para)
   6263{
   6264	struct ocfs2_xattr_entry *xe = &xh->xh_entries[offset];
   6265
   6266	*xv = (struct ocfs2_xattr_value_root *)((void *)xh +
   6267		le16_to_cpu(xe->xe_name_offset) +
   6268		OCFS2_XATTR_SIZE(xe->xe_name_len));
   6269
   6270	if (ret_bh)
   6271		*ret_bh = bh;
   6272
   6273	return 0;
   6274}
   6275
   6276/*
   6277 * Lock the meta_ac and caculate how much credits we need for reflink xattrs.
   6278 * It is only used for inline xattr and xattr block.
   6279 */
   6280static int ocfs2_reflink_lock_xattr_allocators(struct ocfs2_super *osb,
   6281					struct ocfs2_xattr_header *xh,
   6282					struct buffer_head *ref_root_bh,
   6283					int *credits,
   6284					struct ocfs2_alloc_context **meta_ac)
   6285{
   6286	int ret, meta_add = 0, num_recs = 0;
   6287	struct ocfs2_refcount_block *rb =
   6288			(struct ocfs2_refcount_block *)ref_root_bh->b_data;
   6289
   6290	*credits = 0;
   6291
   6292	ret = ocfs2_value_metas_in_xattr_header(osb->sb, NULL, xh,
   6293						&meta_add, credits, &num_recs,
   6294						ocfs2_get_xattr_value_root,
   6295						NULL);
   6296	if (ret) {
   6297		mlog_errno(ret);
   6298		goto out;
   6299	}
   6300
   6301	/*
   6302	 * We need to add/modify num_recs in refcount tree, so just calculate
   6303	 * an approximate number we need for refcount tree change.
   6304	 * Sometimes we need to split the tree, and after split,  half recs
   6305	 * will be moved to the new block, and a new block can only provide
   6306	 * half number of recs. So we multiple new blocks by 2.
   6307	 */
   6308	num_recs = num_recs / ocfs2_refcount_recs_per_rb(osb->sb) * 2;
   6309	meta_add += num_recs;
   6310	*credits += num_recs + num_recs * OCFS2_EXPAND_REFCOUNT_TREE_CREDITS;
   6311	if (le32_to_cpu(rb->rf_flags) & OCFS2_REFCOUNT_TREE_FL)
   6312		*credits += le16_to_cpu(rb->rf_list.l_tree_depth) *
   6313			    le16_to_cpu(rb->rf_list.l_next_free_rec) + 1;
   6314	else
   6315		*credits += 1;
   6316
   6317	ret = ocfs2_reserve_new_metadata_blocks(osb, meta_add, meta_ac);
   6318	if (ret)
   6319		mlog_errno(ret);
   6320
   6321out:
   6322	return ret;
   6323}
   6324
   6325/*
   6326 * Given a xattr header, reflink all the xattrs in this container.
   6327 * It can be used for inode, block and bucket.
   6328 *
   6329 * NOTE:
   6330 * Before we call this function, the caller has memcpy the xattr in
   6331 * old_xh to the new_xh.
   6332 *
   6333 * If args.xattr_reflinked is set, call it to decide whether the xe should
   6334 * be reflinked or not. If not, remove it from the new xattr header.
   6335 */
   6336static int ocfs2_reflink_xattr_header(handle_t *handle,
   6337				      struct ocfs2_xattr_reflink *args,
   6338				      struct buffer_head *old_bh,
   6339				      struct ocfs2_xattr_header *xh,
   6340				      struct buffer_head *new_bh,
   6341				      struct ocfs2_xattr_header *new_xh,
   6342				      struct ocfs2_xattr_value_buf *vb,
   6343				      struct ocfs2_alloc_context *meta_ac,
   6344				      get_xattr_value_root *func,
   6345				      void *para)
   6346{
   6347	int ret = 0, i, j;
   6348	struct super_block *sb = args->old_inode->i_sb;
   6349	struct buffer_head *value_bh;
   6350	struct ocfs2_xattr_entry *xe, *last;
   6351	struct ocfs2_xattr_value_root *xv, *new_xv;
   6352	struct ocfs2_extent_tree data_et;
   6353	u32 clusters, cpos, p_cluster, num_clusters;
   6354	unsigned int ext_flags = 0;
   6355
   6356	trace_ocfs2_reflink_xattr_header((unsigned long long)old_bh->b_blocknr,
   6357					 le16_to_cpu(xh->xh_count));
   6358
   6359	last = &new_xh->xh_entries[le16_to_cpu(new_xh->xh_count)];
   6360	for (i = 0, j = 0; i < le16_to_cpu(xh->xh_count); i++, j++) {
   6361		xe = &xh->xh_entries[i];
   6362
   6363		if (args->xattr_reflinked && !args->xattr_reflinked(xe)) {
   6364			xe = &new_xh->xh_entries[j];
   6365
   6366			le16_add_cpu(&new_xh->xh_count, -1);
   6367			if (new_xh->xh_count) {
   6368				memmove(xe, xe + 1,
   6369					(void *)last - (void *)xe);
   6370				memset(last, 0,
   6371				       sizeof(struct ocfs2_xattr_entry));
   6372			}
   6373
   6374			/*
   6375			 * We don't want j to increase in the next round since
   6376			 * it is already moved ahead.
   6377			 */
   6378			j--;
   6379			continue;
   6380		}
   6381
   6382		if (ocfs2_xattr_is_local(xe))
   6383			continue;
   6384
   6385		ret = func(sb, old_bh, xh, i, &xv, NULL, para);
   6386		if (ret) {
   6387			mlog_errno(ret);
   6388			break;
   6389		}
   6390
   6391		ret = func(sb, new_bh, new_xh, j, &new_xv, &value_bh, para);
   6392		if (ret) {
   6393			mlog_errno(ret);
   6394			break;
   6395		}
   6396
   6397		/*
   6398		 * For the xattr which has l_tree_depth = 0, all the extent
   6399		 * recs have already be copied to the new xh with the
   6400		 * propriate OCFS2_EXT_REFCOUNTED flag we just need to
   6401		 * increase the refount count int the refcount tree.
   6402		 *
   6403		 * For the xattr which has l_tree_depth > 0, we need
   6404		 * to initialize it to the empty default value root,
   6405		 * and then insert the extents one by one.
   6406		 */
   6407		if (xv->xr_list.l_tree_depth) {
   6408			memcpy(new_xv, &def_xv, OCFS2_XATTR_ROOT_SIZE);
   6409			vb->vb_xv = new_xv;
   6410			vb->vb_bh = value_bh;
   6411			ocfs2_init_xattr_value_extent_tree(&data_et,
   6412					INODE_CACHE(args->new_inode), vb);
   6413		}
   6414
   6415		clusters = le32_to_cpu(xv->xr_clusters);
   6416		cpos = 0;
   6417		while (cpos < clusters) {
   6418			ret = ocfs2_xattr_get_clusters(args->old_inode,
   6419						       cpos,
   6420						       &p_cluster,
   6421						       &num_clusters,
   6422						       &xv->xr_list,
   6423						       &ext_flags);
   6424			if (ret) {
   6425				mlog_errno(ret);
   6426				goto out;
   6427			}
   6428
   6429			BUG_ON(!p_cluster);
   6430
   6431			if (xv->xr_list.l_tree_depth) {
   6432				ret = ocfs2_insert_extent(handle,
   6433						&data_et, cpos,
   6434						ocfs2_clusters_to_blocks(
   6435							args->old_inode->i_sb,
   6436							p_cluster),
   6437						num_clusters, ext_flags,
   6438						meta_ac);
   6439				if (ret) {
   6440					mlog_errno(ret);
   6441					goto out;
   6442				}
   6443			}
   6444
   6445			ret = ocfs2_increase_refcount(handle, args->ref_ci,
   6446						      args->ref_root_bh,
   6447						      p_cluster, num_clusters,
   6448						      meta_ac, args->dealloc);
   6449			if (ret) {
   6450				mlog_errno(ret);
   6451				goto out;
   6452			}
   6453
   6454			cpos += num_clusters;
   6455		}
   6456	}
   6457
   6458out:
   6459	return ret;
   6460}
   6461
   6462static int ocfs2_reflink_xattr_inline(struct ocfs2_xattr_reflink *args)
   6463{
   6464	int ret = 0, credits = 0;
   6465	handle_t *handle;
   6466	struct ocfs2_super *osb = OCFS2_SB(args->old_inode->i_sb);
   6467	struct ocfs2_dinode *di = (struct ocfs2_dinode *)args->old_bh->b_data;
   6468	int inline_size = le16_to_cpu(di->i_xattr_inline_size);
   6469	int header_off = osb->sb->s_blocksize - inline_size;
   6470	struct ocfs2_xattr_header *xh = (struct ocfs2_xattr_header *)
   6471					(args->old_bh->b_data + header_off);
   6472	struct ocfs2_xattr_header *new_xh = (struct ocfs2_xattr_header *)
   6473					(args->new_bh->b_data + header_off);
   6474	struct ocfs2_alloc_context *meta_ac = NULL;
   6475	struct ocfs2_inode_info *new_oi;
   6476	struct ocfs2_dinode *new_di;
   6477	struct ocfs2_xattr_value_buf vb = {
   6478		.vb_bh = args->new_bh,
   6479		.vb_access = ocfs2_journal_access_di,
   6480	};
   6481
   6482	ret = ocfs2_reflink_lock_xattr_allocators(osb, xh, args->ref_root_bh,
   6483						  &credits, &meta_ac);
   6484	if (ret) {
   6485		mlog_errno(ret);
   6486		goto out;
   6487	}
   6488
   6489	handle = ocfs2_start_trans(osb, credits);
   6490	if (IS_ERR(handle)) {
   6491		ret = PTR_ERR(handle);
   6492		mlog_errno(ret);
   6493		goto out;
   6494	}
   6495
   6496	ret = ocfs2_journal_access_di(handle, INODE_CACHE(args->new_inode),
   6497				      args->new_bh, OCFS2_JOURNAL_ACCESS_WRITE);
   6498	if (ret) {
   6499		mlog_errno(ret);
   6500		goto out_commit;
   6501	}
   6502
   6503	memcpy(args->new_bh->b_data + header_off,
   6504	       args->old_bh->b_data + header_off, inline_size);
   6505
   6506	new_di = (struct ocfs2_dinode *)args->new_bh->b_data;
   6507	new_di->i_xattr_inline_size = cpu_to_le16(inline_size);
   6508
   6509	ret = ocfs2_reflink_xattr_header(handle, args, args->old_bh, xh,
   6510					 args->new_bh, new_xh, &vb, meta_ac,
   6511					 ocfs2_get_xattr_value_root, NULL);
   6512	if (ret) {
   6513		mlog_errno(ret);
   6514		goto out_commit;
   6515	}
   6516
   6517	new_oi = OCFS2_I(args->new_inode);
   6518	/*
   6519	 * Adjust extent record count to reserve space for extended attribute.
   6520	 * Inline data count had been adjusted in ocfs2_duplicate_inline_data().
   6521	 */
   6522	if (!(new_oi->ip_dyn_features & OCFS2_INLINE_DATA_FL) &&
   6523	    !(ocfs2_inode_is_fast_symlink(args->new_inode))) {
   6524		struct ocfs2_extent_list *el = &new_di->id2.i_list;
   6525		le16_add_cpu(&el->l_count, -(inline_size /
   6526					sizeof(struct ocfs2_extent_rec)));
   6527	}
   6528	spin_lock(&new_oi->ip_lock);
   6529	new_oi->ip_dyn_features |= OCFS2_HAS_XATTR_FL | OCFS2_INLINE_XATTR_FL;
   6530	new_di->i_dyn_features = cpu_to_le16(new_oi->ip_dyn_features);
   6531	spin_unlock(&new_oi->ip_lock);
   6532
   6533	ocfs2_journal_dirty(handle, args->new_bh);
   6534
   6535out_commit:
   6536	ocfs2_commit_trans(osb, handle);
   6537
   6538out:
   6539	if (meta_ac)
   6540		ocfs2_free_alloc_context(meta_ac);
   6541	return ret;
   6542}
   6543
   6544static int ocfs2_create_empty_xattr_block(struct inode *inode,
   6545					  struct buffer_head *fe_bh,
   6546					  struct buffer_head **ret_bh,
   6547					  int indexed)
   6548{
   6549	int ret;
   6550	struct ocfs2_super *osb = OCFS2_SB(inode->i_sb);
   6551	struct ocfs2_xattr_set_ctxt ctxt;
   6552
   6553	memset(&ctxt, 0, sizeof(ctxt));
   6554	ret = ocfs2_reserve_new_metadata_blocks(osb, 1, &ctxt.meta_ac);
   6555	if (ret < 0) {
   6556		mlog_errno(ret);
   6557		return ret;
   6558	}
   6559
   6560	ctxt.handle = ocfs2_start_trans(osb, OCFS2_XATTR_BLOCK_CREATE_CREDITS);
   6561	if (IS_ERR(ctxt.handle)) {
   6562		ret = PTR_ERR(ctxt.handle);
   6563		mlog_errno(ret);
   6564		goto out;
   6565	}
   6566
   6567	trace_ocfs2_create_empty_xattr_block(
   6568				(unsigned long long)fe_bh->b_blocknr, indexed);
   6569	ret = ocfs2_create_xattr_block(inode, fe_bh, &ctxt, indexed,
   6570				       ret_bh);
   6571	if (ret)
   6572		mlog_errno(ret);
   6573
   6574	ocfs2_commit_trans(osb, ctxt.handle);
   6575out:
   6576	ocfs2_free_alloc_context(ctxt.meta_ac);
   6577	return ret;
   6578}
   6579
   6580static int ocfs2_reflink_xattr_block(struct ocfs2_xattr_reflink *args,
   6581				     struct buffer_head *blk_bh,
   6582				     struct buffer_head *new_blk_bh)
   6583{
   6584	int ret = 0, credits = 0;
   6585	handle_t *handle;
   6586	struct ocfs2_inode_info *new_oi = OCFS2_I(args->new_inode);
   6587	struct ocfs2_dinode *new_di;
   6588	struct ocfs2_super *osb = OCFS2_SB(args->new_inode->i_sb);
   6589	int header_off = offsetof(struct ocfs2_xattr_block, xb_attrs.xb_header);
   6590	struct ocfs2_xattr_block *xb =
   6591			(struct ocfs2_xattr_block *)blk_bh->b_data;
   6592	struct ocfs2_xattr_header *xh = &xb->xb_attrs.xb_header;
   6593	struct ocfs2_xattr_block *new_xb =
   6594			(struct ocfs2_xattr_block *)new_blk_bh->b_data;
   6595	struct ocfs2_xattr_header *new_xh = &new_xb->xb_attrs.xb_header;
   6596	struct ocfs2_alloc_context *meta_ac;
   6597	struct ocfs2_xattr_value_buf vb = {
   6598		.vb_bh = new_blk_bh,
   6599		.vb_access = ocfs2_journal_access_xb,
   6600	};
   6601
   6602	ret = ocfs2_reflink_lock_xattr_allocators(osb, xh, args->ref_root_bh,
   6603						  &credits, &meta_ac);
   6604	if (ret) {
   6605		mlog_errno(ret);
   6606		return ret;
   6607	}
   6608
   6609	/* One more credits in case we need to add xattr flags in new inode. */
   6610	handle = ocfs2_start_trans(osb, credits + 1);
   6611	if (IS_ERR(handle)) {
   6612		ret = PTR_ERR(handle);
   6613		mlog_errno(ret);
   6614		goto out;
   6615	}
   6616
   6617	if (!(new_oi->ip_dyn_features & OCFS2_HAS_XATTR_FL)) {
   6618		ret = ocfs2_journal_access_di(handle,
   6619					      INODE_CACHE(args->new_inode),
   6620					      args->new_bh,
   6621					      OCFS2_JOURNAL_ACCESS_WRITE);
   6622		if (ret) {
   6623			mlog_errno(ret);
   6624			goto out_commit;
   6625		}
   6626	}
   6627
   6628	ret = ocfs2_journal_access_xb(handle, INODE_CACHE(args->new_inode),
   6629				      new_blk_bh, OCFS2_JOURNAL_ACCESS_WRITE);
   6630	if (ret) {
   6631		mlog_errno(ret);
   6632		goto out_commit;
   6633	}
   6634
   6635	memcpy(new_blk_bh->b_data + header_off, blk_bh->b_data + header_off,
   6636	       osb->sb->s_blocksize - header_off);
   6637
   6638	ret = ocfs2_reflink_xattr_header(handle, args, blk_bh, xh,
   6639					 new_blk_bh, new_xh, &vb, meta_ac,
   6640					 ocfs2_get_xattr_value_root, NULL);
   6641	if (ret) {
   6642		mlog_errno(ret);
   6643		goto out_commit;
   6644	}
   6645
   6646	ocfs2_journal_dirty(handle, new_blk_bh);
   6647
   6648	if (!(new_oi->ip_dyn_features & OCFS2_HAS_XATTR_FL)) {
   6649		new_di = (struct ocfs2_dinode *)args->new_bh->b_data;
   6650		spin_lock(&new_oi->ip_lock);
   6651		new_oi->ip_dyn_features |= OCFS2_HAS_XATTR_FL;
   6652		new_di->i_dyn_features = cpu_to_le16(new_oi->ip_dyn_features);
   6653		spin_unlock(&new_oi->ip_lock);
   6654
   6655		ocfs2_journal_dirty(handle, args->new_bh);
   6656	}
   6657
   6658out_commit:
   6659	ocfs2_commit_trans(osb, handle);
   6660
   6661out:
   6662	ocfs2_free_alloc_context(meta_ac);
   6663	return ret;
   6664}
   6665
   6666struct ocfs2_reflink_xattr_tree_args {
   6667	struct ocfs2_xattr_reflink *reflink;
   6668	struct buffer_head *old_blk_bh;
   6669	struct buffer_head *new_blk_bh;
   6670	struct ocfs2_xattr_bucket *old_bucket;
   6671	struct ocfs2_xattr_bucket *new_bucket;
   6672};
   6673
   6674/*
   6675 * NOTE:
   6676 * We have to handle the case that both old bucket and new bucket
   6677 * will call this function to get the right ret_bh.
   6678 * So The caller must give us the right bh.
   6679 */
   6680static int ocfs2_get_reflink_xattr_value_root(struct super_block *sb,
   6681					struct buffer_head *bh,
   6682					struct ocfs2_xattr_header *xh,
   6683					int offset,
   6684					struct ocfs2_xattr_value_root **xv,
   6685					struct buffer_head **ret_bh,
   6686					void *para)
   6687{
   6688	struct ocfs2_reflink_xattr_tree_args *args =
   6689			(struct ocfs2_reflink_xattr_tree_args *)para;
   6690	struct ocfs2_xattr_bucket *bucket;
   6691
   6692	if (bh == args->old_bucket->bu_bhs[0])
   6693		bucket = args->old_bucket;
   6694	else
   6695		bucket = args->new_bucket;
   6696
   6697	return ocfs2_get_xattr_tree_value_root(sb, bucket, offset,
   6698					       xv, ret_bh);
   6699}
   6700
   6701struct ocfs2_value_tree_metas {
   6702	int num_metas;
   6703	int credits;
   6704	int num_recs;
   6705};
   6706
   6707static int ocfs2_value_tree_metas_in_bucket(struct super_block *sb,
   6708					struct buffer_head *bh,
   6709					struct ocfs2_xattr_header *xh,
   6710					int offset,
   6711					struct ocfs2_xattr_value_root **xv,
   6712					struct buffer_head **ret_bh,
   6713					void *para)
   6714{
   6715	struct ocfs2_xattr_bucket *bucket =
   6716				(struct ocfs2_xattr_bucket *)para;
   6717
   6718	return ocfs2_get_xattr_tree_value_root(sb, bucket, offset,
   6719					       xv, ret_bh);
   6720}
   6721
   6722static int ocfs2_calc_value_tree_metas(struct inode *inode,
   6723				      struct ocfs2_xattr_bucket *bucket,
   6724				      void *para)
   6725{
   6726	struct ocfs2_value_tree_metas *metas =
   6727			(struct ocfs2_value_tree_metas *)para;
   6728	struct ocfs2_xattr_header *xh =
   6729			(struct ocfs2_xattr_header *)bucket->bu_bhs[0]->b_data;
   6730
   6731	/* Add the credits for this bucket first. */
   6732	metas->credits += bucket->bu_blocks;
   6733	return ocfs2_value_metas_in_xattr_header(inode->i_sb, bucket->bu_bhs[0],
   6734					xh, &metas->num_metas,
   6735					&metas->credits, &metas->num_recs,
   6736					ocfs2_value_tree_metas_in_bucket,
   6737					bucket);
   6738}
   6739
   6740/*
   6741 * Given a xattr extent rec starting from blkno and having len clusters,
   6742 * iterate all the buckets calculate how much metadata we need for reflinking
   6743 * all the ocfs2_xattr_value_root and lock the allocators accordingly.
   6744 */
   6745static int ocfs2_lock_reflink_xattr_rec_allocators(
   6746				struct ocfs2_reflink_xattr_tree_args *args,
   6747				struct ocfs2_extent_tree *xt_et,
   6748				u64 blkno, u32 len, int *credits,
   6749				struct ocfs2_alloc_context **meta_ac,
   6750				struct ocfs2_alloc_context **data_ac)
   6751{
   6752	int ret, num_free_extents;
   6753	struct ocfs2_value_tree_metas metas;
   6754	struct ocfs2_super *osb = OCFS2_SB(args->reflink->old_inode->i_sb);
   6755	struct ocfs2_refcount_block *rb;
   6756
   6757	memset(&metas, 0, sizeof(metas));
   6758
   6759	ret = ocfs2_iterate_xattr_buckets(args->reflink->old_inode, blkno, len,
   6760					  ocfs2_calc_value_tree_metas, &metas);
   6761	if (ret) {
   6762		mlog_errno(ret);
   6763		goto out;
   6764	}
   6765
   6766	*credits = metas.credits;
   6767
   6768	/*
   6769	 * Calculate we need for refcount tree change.
   6770	 *
   6771	 * We need to add/modify num_recs in refcount tree, so just calculate
   6772	 * an approximate number we need for refcount tree change.
   6773	 * Sometimes we need to split the tree, and after split,  half recs
   6774	 * will be moved to the new block, and a new block can only provide
   6775	 * half number of recs. So we multiple new blocks by 2.
   6776	 * In the end, we have to add credits for modifying the already
   6777	 * existed refcount block.
   6778	 */
   6779	rb = (struct ocfs2_refcount_block *)args->reflink->ref_root_bh->b_data;
   6780	metas.num_recs =
   6781		(metas.num_recs + ocfs2_refcount_recs_per_rb(osb->sb) - 1) /
   6782		 ocfs2_refcount_recs_per_rb(osb->sb) * 2;
   6783	metas.num_metas += metas.num_recs;
   6784	*credits += metas.num_recs +
   6785		    metas.num_recs * OCFS2_EXPAND_REFCOUNT_TREE_CREDITS;
   6786	if (le32_to_cpu(rb->rf_flags) & OCFS2_REFCOUNT_TREE_FL)
   6787		*credits += le16_to_cpu(rb->rf_list.l_tree_depth) *
   6788			    le16_to_cpu(rb->rf_list.l_next_free_rec) + 1;
   6789	else
   6790		*credits += 1;
   6791
   6792	/* count in the xattr tree change. */
   6793	num_free_extents = ocfs2_num_free_extents(xt_et);
   6794	if (num_free_extents < 0) {
   6795		ret = num_free_extents;
   6796		mlog_errno(ret);
   6797		goto out;
   6798	}
   6799
   6800	if (num_free_extents < len)
   6801		metas.num_metas += ocfs2_extend_meta_needed(xt_et->et_root_el);
   6802
   6803	*credits += ocfs2_calc_extend_credits(osb->sb,
   6804					      xt_et->et_root_el);
   6805
   6806	if (metas.num_metas) {
   6807		ret = ocfs2_reserve_new_metadata_blocks(osb, metas.num_metas,
   6808							meta_ac);
   6809		if (ret) {
   6810			mlog_errno(ret);
   6811			goto out;
   6812		}
   6813	}
   6814
   6815	if (len) {
   6816		ret = ocfs2_reserve_clusters(osb, len, data_ac);
   6817		if (ret)
   6818			mlog_errno(ret);
   6819	}
   6820out:
   6821	if (ret) {
   6822		if (*meta_ac) {
   6823			ocfs2_free_alloc_context(*meta_ac);
   6824			*meta_ac = NULL;
   6825		}
   6826	}
   6827
   6828	return ret;
   6829}
   6830
   6831static int ocfs2_reflink_xattr_bucket(handle_t *handle,
   6832				u64 blkno, u64 new_blkno, u32 clusters,
   6833				u32 *cpos, int num_buckets,
   6834				struct ocfs2_alloc_context *meta_ac,
   6835				struct ocfs2_alloc_context *data_ac,
   6836				struct ocfs2_reflink_xattr_tree_args *args)
   6837{
   6838	int i, j, ret = 0;
   6839	struct super_block *sb = args->reflink->old_inode->i_sb;
   6840	int bpb = args->old_bucket->bu_blocks;
   6841	struct ocfs2_xattr_value_buf vb = {
   6842		.vb_access = ocfs2_journal_access,
   6843	};
   6844
   6845	for (i = 0; i < num_buckets; i++, blkno += bpb, new_blkno += bpb) {
   6846		ret = ocfs2_read_xattr_bucket(args->old_bucket, blkno);
   6847		if (ret) {
   6848			mlog_errno(ret);
   6849			break;
   6850		}
   6851
   6852		ret = ocfs2_init_xattr_bucket(args->new_bucket, new_blkno, 1);
   6853		if (ret) {
   6854			mlog_errno(ret);
   6855			break;
   6856		}
   6857
   6858		ret = ocfs2_xattr_bucket_journal_access(handle,
   6859						args->new_bucket,
   6860						OCFS2_JOURNAL_ACCESS_CREATE);
   6861		if (ret) {
   6862			mlog_errno(ret);
   6863			break;
   6864		}
   6865
   6866		for (j = 0; j < bpb; j++)
   6867			memcpy(bucket_block(args->new_bucket, j),
   6868			       bucket_block(args->old_bucket, j),
   6869			       sb->s_blocksize);
   6870
   6871		/*
   6872		 * Record the start cpos so that we can use it to initialize
   6873		 * our xattr tree we also set the xh_num_bucket for the new
   6874		 * bucket.
   6875		 */
   6876		if (i == 0) {
   6877			*cpos = le32_to_cpu(bucket_xh(args->new_bucket)->
   6878					    xh_entries[0].xe_name_hash);
   6879			bucket_xh(args->new_bucket)->xh_num_buckets =
   6880				cpu_to_le16(num_buckets);
   6881		}
   6882
   6883		ocfs2_xattr_bucket_journal_dirty(handle, args->new_bucket);
   6884
   6885		ret = ocfs2_reflink_xattr_header(handle, args->reflink,
   6886					args->old_bucket->bu_bhs[0],
   6887					bucket_xh(args->old_bucket),
   6888					args->new_bucket->bu_bhs[0],
   6889					bucket_xh(args->new_bucket),
   6890					&vb, meta_ac,
   6891					ocfs2_get_reflink_xattr_value_root,
   6892					args);
   6893		if (ret) {
   6894			mlog_errno(ret);
   6895			break;
   6896		}
   6897
   6898		/*
   6899		 * Re-access and dirty the bucket to calculate metaecc.
   6900		 * Because we may extend the transaction in reflink_xattr_header
   6901		 * which will let the already accessed block gone.
   6902		 */
   6903		ret = ocfs2_xattr_bucket_journal_access(handle,
   6904						args->new_bucket,
   6905						OCFS2_JOURNAL_ACCESS_WRITE);
   6906		if (ret) {
   6907			mlog_errno(ret);
   6908			break;
   6909		}
   6910
   6911		ocfs2_xattr_bucket_journal_dirty(handle, args->new_bucket);
   6912
   6913		ocfs2_xattr_bucket_relse(args->old_bucket);
   6914		ocfs2_xattr_bucket_relse(args->new_bucket);
   6915	}
   6916
   6917	ocfs2_xattr_bucket_relse(args->old_bucket);
   6918	ocfs2_xattr_bucket_relse(args->new_bucket);
   6919	return ret;
   6920}
   6921
   6922static int ocfs2_reflink_xattr_buckets(handle_t *handle,
   6923				struct inode *inode,
   6924				struct ocfs2_reflink_xattr_tree_args *args,
   6925				struct ocfs2_extent_tree *et,
   6926				struct ocfs2_alloc_context *meta_ac,
   6927				struct ocfs2_alloc_context *data_ac,
   6928				u64 blkno, u32 cpos, u32 len)
   6929{
   6930	int ret, first_inserted = 0;
   6931	u32 p_cluster, num_clusters, reflink_cpos = 0;
   6932	u64 new_blkno;
   6933	unsigned int num_buckets, reflink_buckets;
   6934	unsigned int bpc =
   6935		ocfs2_xattr_buckets_per_cluster(OCFS2_SB(inode->i_sb));
   6936
   6937	ret = ocfs2_read_xattr_bucket(args->old_bucket, blkno);
   6938	if (ret) {
   6939		mlog_errno(ret);
   6940		goto out;
   6941	}
   6942	num_buckets = le16_to_cpu(bucket_xh(args->old_bucket)->xh_num_buckets);
   6943	ocfs2_xattr_bucket_relse(args->old_bucket);
   6944
   6945	while (len && num_buckets) {
   6946		ret = ocfs2_claim_clusters(handle, data_ac,
   6947					   1, &p_cluster, &num_clusters);
   6948		if (ret) {
   6949			mlog_errno(ret);
   6950			goto out;
   6951		}
   6952
   6953		new_blkno = ocfs2_clusters_to_blocks(inode->i_sb, p_cluster);
   6954		reflink_buckets = min(num_buckets, bpc * num_clusters);
   6955
   6956		ret = ocfs2_reflink_xattr_bucket(handle, blkno,
   6957						 new_blkno, num_clusters,
   6958						 &reflink_cpos, reflink_buckets,
   6959						 meta_ac, data_ac, args);
   6960		if (ret) {
   6961			mlog_errno(ret);
   6962			goto out;
   6963		}
   6964
   6965		/*
   6966		 * For the 1st allocated cluster, we make it use the same cpos
   6967		 * so that the xattr tree looks the same as the original one
   6968		 * in the most case.
   6969		 */
   6970		if (!first_inserted) {
   6971			reflink_cpos = cpos;
   6972			first_inserted = 1;
   6973		}
   6974		ret = ocfs2_insert_extent(handle, et, reflink_cpos, new_blkno,
   6975					  num_clusters, 0, meta_ac);
   6976		if (ret)
   6977			mlog_errno(ret);
   6978
   6979		trace_ocfs2_reflink_xattr_buckets((unsigned long long)new_blkno,
   6980						  num_clusters, reflink_cpos);
   6981
   6982		len -= num_clusters;
   6983		blkno += ocfs2_clusters_to_blocks(inode->i_sb, num_clusters);
   6984		num_buckets -= reflink_buckets;
   6985	}
   6986out:
   6987	return ret;
   6988}
   6989
   6990/*
   6991 * Create the same xattr extent record in the new inode's xattr tree.
   6992 */
   6993static int ocfs2_reflink_xattr_rec(struct inode *inode,
   6994				   struct buffer_head *root_bh,
   6995				   u64 blkno,
   6996				   u32 cpos,
   6997				   u32 len,
   6998				   void *para)
   6999{
   7000	int ret, credits = 0;
   7001	handle_t *handle;
   7002	struct ocfs2_reflink_xattr_tree_args *args =
   7003			(struct ocfs2_reflink_xattr_tree_args *)para;
   7004	struct ocfs2_super *osb = OCFS2_SB(inode->i_sb);
   7005	struct ocfs2_alloc_context *meta_ac = NULL;
   7006	struct ocfs2_alloc_context *data_ac = NULL;
   7007	struct ocfs2_extent_tree et;
   7008
   7009	trace_ocfs2_reflink_xattr_rec((unsigned long long)blkno, len);
   7010
   7011	ocfs2_init_xattr_tree_extent_tree(&et,
   7012					  INODE_CACHE(args->reflink->new_inode),
   7013					  args->new_blk_bh);
   7014
   7015	ret = ocfs2_lock_reflink_xattr_rec_allocators(args, &et, blkno,
   7016						      len, &credits,
   7017						      &meta_ac, &data_ac);
   7018	if (ret) {
   7019		mlog_errno(ret);
   7020		goto out;
   7021	}
   7022
   7023	handle = ocfs2_start_trans(osb, credits);
   7024	if (IS_ERR(handle)) {
   7025		ret = PTR_ERR(handle);
   7026		mlog_errno(ret);
   7027		goto out;
   7028	}
   7029
   7030	ret = ocfs2_reflink_xattr_buckets(handle, inode, args, &et,
   7031					  meta_ac, data_ac,
   7032					  blkno, cpos, len);
   7033	if (ret)
   7034		mlog_errno(ret);
   7035
   7036	ocfs2_commit_trans(osb, handle);
   7037
   7038out:
   7039	if (meta_ac)
   7040		ocfs2_free_alloc_context(meta_ac);
   7041	if (data_ac)
   7042		ocfs2_free_alloc_context(data_ac);
   7043	return ret;
   7044}
   7045
   7046/*
   7047 * Create reflinked xattr buckets.
   7048 * We will add bucket one by one, and refcount all the xattrs in the bucket
   7049 * if they are stored outside.
   7050 */
   7051static int ocfs2_reflink_xattr_tree(struct ocfs2_xattr_reflink *args,
   7052				    struct buffer_head *blk_bh,
   7053				    struct buffer_head *new_blk_bh)
   7054{
   7055	int ret;
   7056	struct ocfs2_reflink_xattr_tree_args para;
   7057
   7058	memset(&para, 0, sizeof(para));
   7059	para.reflink = args;
   7060	para.old_blk_bh = blk_bh;
   7061	para.new_blk_bh = new_blk_bh;
   7062
   7063	para.old_bucket = ocfs2_xattr_bucket_new(args->old_inode);
   7064	if (!para.old_bucket) {
   7065		mlog_errno(-ENOMEM);
   7066		return -ENOMEM;
   7067	}
   7068
   7069	para.new_bucket = ocfs2_xattr_bucket_new(args->new_inode);
   7070	if (!para.new_bucket) {
   7071		ret = -ENOMEM;
   7072		mlog_errno(ret);
   7073		goto out;
   7074	}
   7075
   7076	ret = ocfs2_iterate_xattr_index_block(args->old_inode, blk_bh,
   7077					      ocfs2_reflink_xattr_rec,
   7078					      &para);
   7079	if (ret)
   7080		mlog_errno(ret);
   7081
   7082out:
   7083	ocfs2_xattr_bucket_free(para.old_bucket);
   7084	ocfs2_xattr_bucket_free(para.new_bucket);
   7085	return ret;
   7086}
   7087
   7088static int ocfs2_reflink_xattr_in_block(struct ocfs2_xattr_reflink *args,
   7089					struct buffer_head *blk_bh)
   7090{
   7091	int ret, indexed = 0;
   7092	struct buffer_head *new_blk_bh = NULL;
   7093	struct ocfs2_xattr_block *xb =
   7094			(struct ocfs2_xattr_block *)blk_bh->b_data;
   7095
   7096
   7097	if (le16_to_cpu(xb->xb_flags) & OCFS2_XATTR_INDEXED)
   7098		indexed = 1;
   7099
   7100	ret = ocfs2_create_empty_xattr_block(args->new_inode, args->new_bh,
   7101					     &new_blk_bh, indexed);
   7102	if (ret) {
   7103		mlog_errno(ret);
   7104		goto out;
   7105	}
   7106
   7107	if (!indexed)
   7108		ret = ocfs2_reflink_xattr_block(args, blk_bh, new_blk_bh);
   7109	else
   7110		ret = ocfs2_reflink_xattr_tree(args, blk_bh, new_blk_bh);
   7111	if (ret)
   7112		mlog_errno(ret);
   7113
   7114out:
   7115	brelse(new_blk_bh);
   7116	return ret;
   7117}
   7118
   7119static int ocfs2_reflink_xattr_no_security(struct ocfs2_xattr_entry *xe)
   7120{
   7121	int type = ocfs2_xattr_get_type(xe);
   7122
   7123	return type != OCFS2_XATTR_INDEX_SECURITY &&
   7124	       type != OCFS2_XATTR_INDEX_POSIX_ACL_ACCESS &&
   7125	       type != OCFS2_XATTR_INDEX_POSIX_ACL_DEFAULT;
   7126}
   7127
   7128int ocfs2_reflink_xattrs(struct inode *old_inode,
   7129			 struct buffer_head *old_bh,
   7130			 struct inode *new_inode,
   7131			 struct buffer_head *new_bh,
   7132			 bool preserve_security)
   7133{
   7134	int ret;
   7135	struct ocfs2_xattr_reflink args;
   7136	struct ocfs2_inode_info *oi = OCFS2_I(old_inode);
   7137	struct ocfs2_dinode *di = (struct ocfs2_dinode *)old_bh->b_data;
   7138	struct buffer_head *blk_bh = NULL;
   7139	struct ocfs2_cached_dealloc_ctxt dealloc;
   7140	struct ocfs2_refcount_tree *ref_tree;
   7141	struct buffer_head *ref_root_bh = NULL;
   7142
   7143	ret = ocfs2_lock_refcount_tree(OCFS2_SB(old_inode->i_sb),
   7144				       le64_to_cpu(di->i_refcount_loc),
   7145				       1, &ref_tree, &ref_root_bh);
   7146	if (ret) {
   7147		mlog_errno(ret);
   7148		goto out;
   7149	}
   7150
   7151	ocfs2_init_dealloc_ctxt(&dealloc);
   7152
   7153	args.old_inode = old_inode;
   7154	args.new_inode = new_inode;
   7155	args.old_bh = old_bh;
   7156	args.new_bh = new_bh;
   7157	args.ref_ci = &ref_tree->rf_ci;
   7158	args.ref_root_bh = ref_root_bh;
   7159	args.dealloc = &dealloc;
   7160	if (preserve_security)
   7161		args.xattr_reflinked = NULL;
   7162	else
   7163		args.xattr_reflinked = ocfs2_reflink_xattr_no_security;
   7164
   7165	if (oi->ip_dyn_features & OCFS2_INLINE_XATTR_FL) {
   7166		ret = ocfs2_reflink_xattr_inline(&args);
   7167		if (ret) {
   7168			mlog_errno(ret);
   7169			goto out_unlock;
   7170		}
   7171	}
   7172
   7173	if (!di->i_xattr_loc)
   7174		goto out_unlock;
   7175
   7176	ret = ocfs2_read_xattr_block(old_inode, le64_to_cpu(di->i_xattr_loc),
   7177				     &blk_bh);
   7178	if (ret < 0) {
   7179		mlog_errno(ret);
   7180		goto out_unlock;
   7181	}
   7182
   7183	ret = ocfs2_reflink_xattr_in_block(&args, blk_bh);
   7184	if (ret)
   7185		mlog_errno(ret);
   7186
   7187	brelse(blk_bh);
   7188
   7189out_unlock:
   7190	ocfs2_unlock_refcount_tree(OCFS2_SB(old_inode->i_sb),
   7191				   ref_tree, 1);
   7192	brelse(ref_root_bh);
   7193
   7194	if (ocfs2_dealloc_has_cluster(&dealloc)) {
   7195		ocfs2_schedule_truncate_log_flush(OCFS2_SB(old_inode->i_sb), 1);
   7196		ocfs2_run_deallocs(OCFS2_SB(old_inode->i_sb), &dealloc);
   7197	}
   7198
   7199out:
   7200	return ret;
   7201}
   7202
   7203/*
   7204 * Initialize security and acl for a already created inode.
   7205 * Used for reflink a non-preserve-security file.
   7206 *
   7207 * It uses common api like ocfs2_xattr_set, so the caller
   7208 * must not hold any lock expect i_rwsem.
   7209 */
   7210int ocfs2_init_security_and_acl(struct inode *dir,
   7211				struct inode *inode,
   7212				const struct qstr *qstr)
   7213{
   7214	int ret = 0;
   7215	struct buffer_head *dir_bh = NULL;
   7216
   7217	ret = ocfs2_init_security_get(inode, dir, qstr, NULL);
   7218	if (ret) {
   7219		mlog_errno(ret);
   7220		goto leave;
   7221	}
   7222
   7223	ret = ocfs2_inode_lock(dir, &dir_bh, 0);
   7224	if (ret) {
   7225		mlog_errno(ret);
   7226		goto leave;
   7227	}
   7228	ret = ocfs2_init_acl(NULL, inode, dir, NULL, dir_bh, NULL, NULL);
   7229	if (ret)
   7230		mlog_errno(ret);
   7231
   7232	ocfs2_inode_unlock(dir, 0);
   7233	brelse(dir_bh);
   7234leave:
   7235	return ret;
   7236}
   7237
   7238/*
   7239 * 'security' attributes support
   7240 */
   7241static int ocfs2_xattr_security_get(const struct xattr_handler *handler,
   7242				    struct dentry *unused, struct inode *inode,
   7243				    const char *name, void *buffer, size_t size)
   7244{
   7245	return ocfs2_xattr_get(inode, OCFS2_XATTR_INDEX_SECURITY,
   7246			       name, buffer, size);
   7247}
   7248
   7249static int ocfs2_xattr_security_set(const struct xattr_handler *handler,
   7250				    struct user_namespace *mnt_userns,
   7251				    struct dentry *unused, struct inode *inode,
   7252				    const char *name, const void *value,
   7253				    size_t size, int flags)
   7254{
   7255	return ocfs2_xattr_set(inode, OCFS2_XATTR_INDEX_SECURITY,
   7256			       name, value, size, flags);
   7257}
   7258
   7259static int ocfs2_initxattrs(struct inode *inode, const struct xattr *xattr_array,
   7260		     void *fs_info)
   7261{
   7262	const struct xattr *xattr;
   7263	int err = 0;
   7264
   7265	for (xattr = xattr_array; xattr->name != NULL; xattr++) {
   7266		err = ocfs2_xattr_set(inode, OCFS2_XATTR_INDEX_SECURITY,
   7267				      xattr->name, xattr->value,
   7268				      xattr->value_len, XATTR_CREATE);
   7269		if (err)
   7270			break;
   7271	}
   7272	return err;
   7273}
   7274
   7275int ocfs2_init_security_get(struct inode *inode,
   7276			    struct inode *dir,
   7277			    const struct qstr *qstr,
   7278			    struct ocfs2_security_xattr_info *si)
   7279{
   7280	/* check whether ocfs2 support feature xattr */
   7281	if (!ocfs2_supports_xattr(OCFS2_SB(dir->i_sb)))
   7282		return -EOPNOTSUPP;
   7283	if (si)
   7284		return security_old_inode_init_security(inode, dir, qstr,
   7285							&si->name, &si->value,
   7286							&si->value_len);
   7287
   7288	return security_inode_init_security(inode, dir, qstr,
   7289					    &ocfs2_initxattrs, NULL);
   7290}
   7291
   7292int ocfs2_init_security_set(handle_t *handle,
   7293			    struct inode *inode,
   7294			    struct buffer_head *di_bh,
   7295			    struct ocfs2_security_xattr_info *si,
   7296			    struct ocfs2_alloc_context *xattr_ac,
   7297			    struct ocfs2_alloc_context *data_ac)
   7298{
   7299	return ocfs2_xattr_set_handle(handle, inode, di_bh,
   7300				     OCFS2_XATTR_INDEX_SECURITY,
   7301				     si->name, si->value, si->value_len, 0,
   7302				     xattr_ac, data_ac);
   7303}
   7304
   7305const struct xattr_handler ocfs2_xattr_security_handler = {
   7306	.prefix	= XATTR_SECURITY_PREFIX,
   7307	.get	= ocfs2_xattr_security_get,
   7308	.set	= ocfs2_xattr_security_set,
   7309};
   7310
   7311/*
   7312 * 'trusted' attributes support
   7313 */
   7314static int ocfs2_xattr_trusted_get(const struct xattr_handler *handler,
   7315				   struct dentry *unused, struct inode *inode,
   7316				   const char *name, void *buffer, size_t size)
   7317{
   7318	return ocfs2_xattr_get(inode, OCFS2_XATTR_INDEX_TRUSTED,
   7319			       name, buffer, size);
   7320}
   7321
   7322static int ocfs2_xattr_trusted_set(const struct xattr_handler *handler,
   7323				   struct user_namespace *mnt_userns,
   7324				   struct dentry *unused, struct inode *inode,
   7325				   const char *name, const void *value,
   7326				   size_t size, int flags)
   7327{
   7328	return ocfs2_xattr_set(inode, OCFS2_XATTR_INDEX_TRUSTED,
   7329			       name, value, size, flags);
   7330}
   7331
   7332const struct xattr_handler ocfs2_xattr_trusted_handler = {
   7333	.prefix	= XATTR_TRUSTED_PREFIX,
   7334	.get	= ocfs2_xattr_trusted_get,
   7335	.set	= ocfs2_xattr_trusted_set,
   7336};
   7337
   7338/*
   7339 * 'user' attributes support
   7340 */
   7341static int ocfs2_xattr_user_get(const struct xattr_handler *handler,
   7342				struct dentry *unused, struct inode *inode,
   7343				const char *name, void *buffer, size_t size)
   7344{
   7345	struct ocfs2_super *osb = OCFS2_SB(inode->i_sb);
   7346
   7347	if (osb->s_mount_opt & OCFS2_MOUNT_NOUSERXATTR)
   7348		return -EOPNOTSUPP;
   7349	return ocfs2_xattr_get(inode, OCFS2_XATTR_INDEX_USER, name,
   7350			       buffer, size);
   7351}
   7352
   7353static int ocfs2_xattr_user_set(const struct xattr_handler *handler,
   7354				struct user_namespace *mnt_userns,
   7355				struct dentry *unused, struct inode *inode,
   7356				const char *name, const void *value,
   7357				size_t size, int flags)
   7358{
   7359	struct ocfs2_super *osb = OCFS2_SB(inode->i_sb);
   7360
   7361	if (osb->s_mount_opt & OCFS2_MOUNT_NOUSERXATTR)
   7362		return -EOPNOTSUPP;
   7363
   7364	return ocfs2_xattr_set(inode, OCFS2_XATTR_INDEX_USER,
   7365			       name, value, size, flags);
   7366}
   7367
   7368const struct xattr_handler ocfs2_xattr_user_handler = {
   7369	.prefix	= XATTR_USER_PREFIX,
   7370	.get	= ocfs2_xattr_user_get,
   7371	.set	= ocfs2_xattr_user_set,
   7372};