cachepc-linux

Fork of AMDESE/linux with modifications for CachePC side-channel attack
git clone https://git.sinitax.com/sinitax/cachepc-linux
Log | Files | Refs | README | LICENSE | sfeed.txt

resize.c (63115B)


      1// SPDX-License-Identifier: GPL-2.0
      2/*
      3 *  linux/fs/ext4/resize.c
      4 *
      5 * Support for resizing an ext4 filesystem while it is mounted.
      6 *
      7 * Copyright (C) 2001, 2002 Andreas Dilger <adilger@clusterfs.com>
      8 *
      9 * This could probably be made into a module, because it is not often in use.
     10 */
     11
     12
     13#define EXT4FS_DEBUG
     14
     15#include <linux/errno.h>
     16#include <linux/slab.h>
     17#include <linux/jiffies.h>
     18
     19#include "ext4_jbd2.h"
     20
     21struct ext4_rcu_ptr {
     22	struct rcu_head rcu;
     23	void *ptr;
     24};
     25
     26static void ext4_rcu_ptr_callback(struct rcu_head *head)
     27{
     28	struct ext4_rcu_ptr *ptr;
     29
     30	ptr = container_of(head, struct ext4_rcu_ptr, rcu);
     31	kvfree(ptr->ptr);
     32	kfree(ptr);
     33}
     34
     35void ext4_kvfree_array_rcu(void *to_free)
     36{
     37	struct ext4_rcu_ptr *ptr = kzalloc(sizeof(*ptr), GFP_KERNEL);
     38
     39	if (ptr) {
     40		ptr->ptr = to_free;
     41		call_rcu(&ptr->rcu, ext4_rcu_ptr_callback);
     42		return;
     43	}
     44	synchronize_rcu();
     45	kvfree(to_free);
     46}
     47
     48int ext4_resize_begin(struct super_block *sb)
     49{
     50	struct ext4_sb_info *sbi = EXT4_SB(sb);
     51	int ret = 0;
     52
     53	if (!capable(CAP_SYS_RESOURCE))
     54		return -EPERM;
     55
     56	/*
     57	 * If the reserved GDT blocks is non-zero, the resize_inode feature
     58	 * should always be set.
     59	 */
     60	if (EXT4_SB(sb)->s_es->s_reserved_gdt_blocks &&
     61	    !ext4_has_feature_resize_inode(sb)) {
     62		ext4_error(sb, "resize_inode disabled but reserved GDT blocks non-zero");
     63		return -EFSCORRUPTED;
     64	}
     65
     66	/*
     67	 * If we are not using the primary superblock/GDT copy don't resize,
     68         * because the user tools have no way of handling this.  Probably a
     69         * bad time to do it anyways.
     70         */
     71	if (EXT4_B2C(sbi, sbi->s_sbh->b_blocknr) !=
     72	    le32_to_cpu(EXT4_SB(sb)->s_es->s_first_data_block)) {
     73		ext4_warning(sb, "won't resize using backup superblock at %llu",
     74			(unsigned long long)EXT4_SB(sb)->s_sbh->b_blocknr);
     75		return -EPERM;
     76	}
     77
     78	/*
     79	 * We are not allowed to do online-resizing on a filesystem mounted
     80	 * with error, because it can destroy the filesystem easily.
     81	 */
     82	if (EXT4_SB(sb)->s_mount_state & EXT4_ERROR_FS) {
     83		ext4_warning(sb, "There are errors in the filesystem, "
     84			     "so online resizing is not allowed");
     85		return -EPERM;
     86	}
     87
     88	if (ext4_has_feature_sparse_super2(sb)) {
     89		ext4_msg(sb, KERN_ERR, "Online resizing not supported with sparse_super2");
     90		return -EOPNOTSUPP;
     91	}
     92
     93	if (test_and_set_bit_lock(EXT4_FLAGS_RESIZING,
     94				  &EXT4_SB(sb)->s_ext4_flags))
     95		ret = -EBUSY;
     96
     97	return ret;
     98}
     99
    100void ext4_resize_end(struct super_block *sb)
    101{
    102	clear_bit_unlock(EXT4_FLAGS_RESIZING, &EXT4_SB(sb)->s_ext4_flags);
    103	smp_mb__after_atomic();
    104}
    105
    106static ext4_group_t ext4_meta_bg_first_group(struct super_block *sb,
    107					     ext4_group_t group) {
    108	return (group >> EXT4_DESC_PER_BLOCK_BITS(sb)) <<
    109	       EXT4_DESC_PER_BLOCK_BITS(sb);
    110}
    111
    112static ext4_fsblk_t ext4_meta_bg_first_block_no(struct super_block *sb,
    113					     ext4_group_t group) {
    114	group = ext4_meta_bg_first_group(sb, group);
    115	return ext4_group_first_block_no(sb, group);
    116}
    117
    118static ext4_grpblk_t ext4_group_overhead_blocks(struct super_block *sb,
    119						ext4_group_t group) {
    120	ext4_grpblk_t overhead;
    121	overhead = ext4_bg_num_gdb(sb, group);
    122	if (ext4_bg_has_super(sb, group))
    123		overhead += 1 +
    124			  le16_to_cpu(EXT4_SB(sb)->s_es->s_reserved_gdt_blocks);
    125	return overhead;
    126}
    127
    128#define outside(b, first, last)	((b) < (first) || (b) >= (last))
    129#define inside(b, first, last)	((b) >= (first) && (b) < (last))
    130
    131static int verify_group_input(struct super_block *sb,
    132			      struct ext4_new_group_data *input)
    133{
    134	struct ext4_sb_info *sbi = EXT4_SB(sb);
    135	struct ext4_super_block *es = sbi->s_es;
    136	ext4_fsblk_t start = ext4_blocks_count(es);
    137	ext4_fsblk_t end = start + input->blocks_count;
    138	ext4_group_t group = input->group;
    139	ext4_fsblk_t itend = input->inode_table + sbi->s_itb_per_group;
    140	unsigned overhead;
    141	ext4_fsblk_t metaend;
    142	struct buffer_head *bh = NULL;
    143	ext4_grpblk_t free_blocks_count, offset;
    144	int err = -EINVAL;
    145
    146	if (group != sbi->s_groups_count) {
    147		ext4_warning(sb, "Cannot add at group %u (only %u groups)",
    148			     input->group, sbi->s_groups_count);
    149		return -EINVAL;
    150	}
    151
    152	overhead = ext4_group_overhead_blocks(sb, group);
    153	metaend = start + overhead;
    154	input->free_clusters_count = free_blocks_count =
    155		input->blocks_count - 2 - overhead - sbi->s_itb_per_group;
    156
    157	if (test_opt(sb, DEBUG))
    158		printk(KERN_DEBUG "EXT4-fs: adding %s group %u: %u blocks "
    159		       "(%d free, %u reserved)\n",
    160		       ext4_bg_has_super(sb, input->group) ? "normal" :
    161		       "no-super", input->group, input->blocks_count,
    162		       free_blocks_count, input->reserved_blocks);
    163
    164	ext4_get_group_no_and_offset(sb, start, NULL, &offset);
    165	if (offset != 0)
    166			ext4_warning(sb, "Last group not full");
    167	else if (input->reserved_blocks > input->blocks_count / 5)
    168		ext4_warning(sb, "Reserved blocks too high (%u)",
    169			     input->reserved_blocks);
    170	else if (free_blocks_count < 0)
    171		ext4_warning(sb, "Bad blocks count %u",
    172			     input->blocks_count);
    173	else if (IS_ERR(bh = ext4_sb_bread(sb, end - 1, 0))) {
    174		err = PTR_ERR(bh);
    175		bh = NULL;
    176		ext4_warning(sb, "Cannot read last block (%llu)",
    177			     end - 1);
    178	} else if (outside(input->block_bitmap, start, end))
    179		ext4_warning(sb, "Block bitmap not in group (block %llu)",
    180			     (unsigned long long)input->block_bitmap);
    181	else if (outside(input->inode_bitmap, start, end))
    182		ext4_warning(sb, "Inode bitmap not in group (block %llu)",
    183			     (unsigned long long)input->inode_bitmap);
    184	else if (outside(input->inode_table, start, end) ||
    185		 outside(itend - 1, start, end))
    186		ext4_warning(sb, "Inode table not in group (blocks %llu-%llu)",
    187			     (unsigned long long)input->inode_table, itend - 1);
    188	else if (input->inode_bitmap == input->block_bitmap)
    189		ext4_warning(sb, "Block bitmap same as inode bitmap (%llu)",
    190			     (unsigned long long)input->block_bitmap);
    191	else if (inside(input->block_bitmap, input->inode_table, itend))
    192		ext4_warning(sb, "Block bitmap (%llu) in inode table "
    193			     "(%llu-%llu)",
    194			     (unsigned long long)input->block_bitmap,
    195			     (unsigned long long)input->inode_table, itend - 1);
    196	else if (inside(input->inode_bitmap, input->inode_table, itend))
    197		ext4_warning(sb, "Inode bitmap (%llu) in inode table "
    198			     "(%llu-%llu)",
    199			     (unsigned long long)input->inode_bitmap,
    200			     (unsigned long long)input->inode_table, itend - 1);
    201	else if (inside(input->block_bitmap, start, metaend))
    202		ext4_warning(sb, "Block bitmap (%llu) in GDT table (%llu-%llu)",
    203			     (unsigned long long)input->block_bitmap,
    204			     start, metaend - 1);
    205	else if (inside(input->inode_bitmap, start, metaend))
    206		ext4_warning(sb, "Inode bitmap (%llu) in GDT table (%llu-%llu)",
    207			     (unsigned long long)input->inode_bitmap,
    208			     start, metaend - 1);
    209	else if (inside(input->inode_table, start, metaend) ||
    210		 inside(itend - 1, start, metaend))
    211		ext4_warning(sb, "Inode table (%llu-%llu) overlaps GDT table "
    212			     "(%llu-%llu)",
    213			     (unsigned long long)input->inode_table,
    214			     itend - 1, start, metaend - 1);
    215	else
    216		err = 0;
    217	brelse(bh);
    218
    219	return err;
    220}
    221
    222/*
    223 * ext4_new_flex_group_data is used by 64bit-resize interface to add a flex
    224 * group each time.
    225 */
    226struct ext4_new_flex_group_data {
    227	struct ext4_new_group_data *groups;	/* new_group_data for groups
    228						   in the flex group */
    229	__u16 *bg_flags;			/* block group flags of groups
    230						   in @groups */
    231	ext4_group_t count;			/* number of groups in @groups
    232						 */
    233};
    234
    235/*
    236 * alloc_flex_gd() allocates a ext4_new_flex_group_data with size of
    237 * @flexbg_size.
    238 *
    239 * Returns NULL on failure otherwise address of the allocated structure.
    240 */
    241static struct ext4_new_flex_group_data *alloc_flex_gd(unsigned long flexbg_size)
    242{
    243	struct ext4_new_flex_group_data *flex_gd;
    244
    245	flex_gd = kmalloc(sizeof(*flex_gd), GFP_NOFS);
    246	if (flex_gd == NULL)
    247		goto out3;
    248
    249	if (flexbg_size >= UINT_MAX / sizeof(struct ext4_new_group_data))
    250		goto out2;
    251	flex_gd->count = flexbg_size;
    252
    253	flex_gd->groups = kmalloc_array(flexbg_size,
    254					sizeof(struct ext4_new_group_data),
    255					GFP_NOFS);
    256	if (flex_gd->groups == NULL)
    257		goto out2;
    258
    259	flex_gd->bg_flags = kmalloc_array(flexbg_size, sizeof(__u16),
    260					  GFP_NOFS);
    261	if (flex_gd->bg_flags == NULL)
    262		goto out1;
    263
    264	return flex_gd;
    265
    266out1:
    267	kfree(flex_gd->groups);
    268out2:
    269	kfree(flex_gd);
    270out3:
    271	return NULL;
    272}
    273
    274static void free_flex_gd(struct ext4_new_flex_group_data *flex_gd)
    275{
    276	kfree(flex_gd->bg_flags);
    277	kfree(flex_gd->groups);
    278	kfree(flex_gd);
    279}
    280
    281/*
    282 * ext4_alloc_group_tables() allocates block bitmaps, inode bitmaps
    283 * and inode tables for a flex group.
    284 *
    285 * This function is used by 64bit-resize.  Note that this function allocates
    286 * group tables from the 1st group of groups contained by @flexgd, which may
    287 * be a partial of a flex group.
    288 *
    289 * @sb: super block of fs to which the groups belongs
    290 *
    291 * Returns 0 on a successful allocation of the metadata blocks in the
    292 * block group.
    293 */
    294static int ext4_alloc_group_tables(struct super_block *sb,
    295				struct ext4_new_flex_group_data *flex_gd,
    296				int flexbg_size)
    297{
    298	struct ext4_new_group_data *group_data = flex_gd->groups;
    299	ext4_fsblk_t start_blk;
    300	ext4_fsblk_t last_blk;
    301	ext4_group_t src_group;
    302	ext4_group_t bb_index = 0;
    303	ext4_group_t ib_index = 0;
    304	ext4_group_t it_index = 0;
    305	ext4_group_t group;
    306	ext4_group_t last_group;
    307	unsigned overhead;
    308	__u16 uninit_mask = (flexbg_size > 1) ? ~EXT4_BG_BLOCK_UNINIT : ~0;
    309	int i;
    310
    311	BUG_ON(flex_gd->count == 0 || group_data == NULL);
    312
    313	src_group = group_data[0].group;
    314	last_group  = src_group + flex_gd->count - 1;
    315
    316	BUG_ON((flexbg_size > 1) && ((src_group & ~(flexbg_size - 1)) !=
    317	       (last_group & ~(flexbg_size - 1))));
    318next_group:
    319	group = group_data[0].group;
    320	if (src_group >= group_data[0].group + flex_gd->count)
    321		return -ENOSPC;
    322	start_blk = ext4_group_first_block_no(sb, src_group);
    323	last_blk = start_blk + group_data[src_group - group].blocks_count;
    324
    325	overhead = ext4_group_overhead_blocks(sb, src_group);
    326
    327	start_blk += overhead;
    328
    329	/* We collect contiguous blocks as much as possible. */
    330	src_group++;
    331	for (; src_group <= last_group; src_group++) {
    332		overhead = ext4_group_overhead_blocks(sb, src_group);
    333		if (overhead == 0)
    334			last_blk += group_data[src_group - group].blocks_count;
    335		else
    336			break;
    337	}
    338
    339	/* Allocate block bitmaps */
    340	for (; bb_index < flex_gd->count; bb_index++) {
    341		if (start_blk >= last_blk)
    342			goto next_group;
    343		group_data[bb_index].block_bitmap = start_blk++;
    344		group = ext4_get_group_number(sb, start_blk - 1);
    345		group -= group_data[0].group;
    346		group_data[group].mdata_blocks++;
    347		flex_gd->bg_flags[group] &= uninit_mask;
    348	}
    349
    350	/* Allocate inode bitmaps */
    351	for (; ib_index < flex_gd->count; ib_index++) {
    352		if (start_blk >= last_blk)
    353			goto next_group;
    354		group_data[ib_index].inode_bitmap = start_blk++;
    355		group = ext4_get_group_number(sb, start_blk - 1);
    356		group -= group_data[0].group;
    357		group_data[group].mdata_blocks++;
    358		flex_gd->bg_flags[group] &= uninit_mask;
    359	}
    360
    361	/* Allocate inode tables */
    362	for (; it_index < flex_gd->count; it_index++) {
    363		unsigned int itb = EXT4_SB(sb)->s_itb_per_group;
    364		ext4_fsblk_t next_group_start;
    365
    366		if (start_blk + itb > last_blk)
    367			goto next_group;
    368		group_data[it_index].inode_table = start_blk;
    369		group = ext4_get_group_number(sb, start_blk);
    370		next_group_start = ext4_group_first_block_no(sb, group + 1);
    371		group -= group_data[0].group;
    372
    373		if (start_blk + itb > next_group_start) {
    374			flex_gd->bg_flags[group + 1] &= uninit_mask;
    375			overhead = start_blk + itb - next_group_start;
    376			group_data[group + 1].mdata_blocks += overhead;
    377			itb -= overhead;
    378		}
    379
    380		group_data[group].mdata_blocks += itb;
    381		flex_gd->bg_flags[group] &= uninit_mask;
    382		start_blk += EXT4_SB(sb)->s_itb_per_group;
    383	}
    384
    385	/* Update free clusters count to exclude metadata blocks */
    386	for (i = 0; i < flex_gd->count; i++) {
    387		group_data[i].free_clusters_count -=
    388				EXT4_NUM_B2C(EXT4_SB(sb),
    389					     group_data[i].mdata_blocks);
    390	}
    391
    392	if (test_opt(sb, DEBUG)) {
    393		int i;
    394		group = group_data[0].group;
    395
    396		printk(KERN_DEBUG "EXT4-fs: adding a flex group with "
    397		       "%d groups, flexbg size is %d:\n", flex_gd->count,
    398		       flexbg_size);
    399
    400		for (i = 0; i < flex_gd->count; i++) {
    401			ext4_debug(
    402			       "adding %s group %u: %u blocks (%d free, %d mdata blocks)\n",
    403			       ext4_bg_has_super(sb, group + i) ? "normal" :
    404			       "no-super", group + i,
    405			       group_data[i].blocks_count,
    406			       group_data[i].free_clusters_count,
    407			       group_data[i].mdata_blocks);
    408		}
    409	}
    410	return 0;
    411}
    412
    413static struct buffer_head *bclean(handle_t *handle, struct super_block *sb,
    414				  ext4_fsblk_t blk)
    415{
    416	struct buffer_head *bh;
    417	int err;
    418
    419	bh = sb_getblk(sb, blk);
    420	if (unlikely(!bh))
    421		return ERR_PTR(-ENOMEM);
    422	BUFFER_TRACE(bh, "get_write_access");
    423	err = ext4_journal_get_write_access(handle, sb, bh, EXT4_JTR_NONE);
    424	if (err) {
    425		brelse(bh);
    426		bh = ERR_PTR(err);
    427	} else {
    428		memset(bh->b_data, 0, sb->s_blocksize);
    429		set_buffer_uptodate(bh);
    430	}
    431
    432	return bh;
    433}
    434
    435static int ext4_resize_ensure_credits_batch(handle_t *handle, int credits)
    436{
    437	return ext4_journal_ensure_credits_fn(handle, credits,
    438		EXT4_MAX_TRANS_DATA, 0, 0);
    439}
    440
    441/*
    442 * set_flexbg_block_bitmap() mark clusters [@first_cluster, @last_cluster] used.
    443 *
    444 * Helper function for ext4_setup_new_group_blocks() which set .
    445 *
    446 * @sb: super block
    447 * @handle: journal handle
    448 * @flex_gd: flex group data
    449 */
    450static int set_flexbg_block_bitmap(struct super_block *sb, handle_t *handle,
    451			struct ext4_new_flex_group_data *flex_gd,
    452			ext4_fsblk_t first_cluster, ext4_fsblk_t last_cluster)
    453{
    454	struct ext4_sb_info *sbi = EXT4_SB(sb);
    455	ext4_group_t count = last_cluster - first_cluster + 1;
    456	ext4_group_t count2;
    457
    458	ext4_debug("mark clusters [%llu-%llu] used\n", first_cluster,
    459		   last_cluster);
    460	for (count2 = count; count > 0;
    461	     count -= count2, first_cluster += count2) {
    462		ext4_fsblk_t start;
    463		struct buffer_head *bh;
    464		ext4_group_t group;
    465		int err;
    466
    467		group = ext4_get_group_number(sb, EXT4_C2B(sbi, first_cluster));
    468		start = EXT4_B2C(sbi, ext4_group_first_block_no(sb, group));
    469		group -= flex_gd->groups[0].group;
    470
    471		count2 = EXT4_CLUSTERS_PER_GROUP(sb) - (first_cluster - start);
    472		if (count2 > count)
    473			count2 = count;
    474
    475		if (flex_gd->bg_flags[group] & EXT4_BG_BLOCK_UNINIT) {
    476			BUG_ON(flex_gd->count > 1);
    477			continue;
    478		}
    479
    480		err = ext4_resize_ensure_credits_batch(handle, 1);
    481		if (err < 0)
    482			return err;
    483
    484		bh = sb_getblk(sb, flex_gd->groups[group].block_bitmap);
    485		if (unlikely(!bh))
    486			return -ENOMEM;
    487
    488		BUFFER_TRACE(bh, "get_write_access");
    489		err = ext4_journal_get_write_access(handle, sb, bh,
    490						    EXT4_JTR_NONE);
    491		if (err) {
    492			brelse(bh);
    493			return err;
    494		}
    495		ext4_debug("mark block bitmap %#04llx (+%llu/%u)\n",
    496			   first_cluster, first_cluster - start, count2);
    497		mb_set_bits(bh->b_data, first_cluster - start, count2);
    498
    499		err = ext4_handle_dirty_metadata(handle, NULL, bh);
    500		brelse(bh);
    501		if (unlikely(err))
    502			return err;
    503	}
    504
    505	return 0;
    506}
    507
    508/*
    509 * Set up the block and inode bitmaps, and the inode table for the new groups.
    510 * This doesn't need to be part of the main transaction, since we are only
    511 * changing blocks outside the actual filesystem.  We still do journaling to
    512 * ensure the recovery is correct in case of a failure just after resize.
    513 * If any part of this fails, we simply abort the resize.
    514 *
    515 * setup_new_flex_group_blocks handles a flex group as follow:
    516 *  1. copy super block and GDT, and initialize group tables if necessary.
    517 *     In this step, we only set bits in blocks bitmaps for blocks taken by
    518 *     super block and GDT.
    519 *  2. allocate group tables in block bitmaps, that is, set bits in block
    520 *     bitmap for blocks taken by group tables.
    521 */
    522static int setup_new_flex_group_blocks(struct super_block *sb,
    523				struct ext4_new_flex_group_data *flex_gd)
    524{
    525	int group_table_count[] = {1, 1, EXT4_SB(sb)->s_itb_per_group};
    526	ext4_fsblk_t start;
    527	ext4_fsblk_t block;
    528	struct ext4_sb_info *sbi = EXT4_SB(sb);
    529	struct ext4_super_block *es = sbi->s_es;
    530	struct ext4_new_group_data *group_data = flex_gd->groups;
    531	__u16 *bg_flags = flex_gd->bg_flags;
    532	handle_t *handle;
    533	ext4_group_t group, count;
    534	struct buffer_head *bh = NULL;
    535	int reserved_gdb, i, j, err = 0, err2;
    536	int meta_bg;
    537
    538	BUG_ON(!flex_gd->count || !group_data ||
    539	       group_data[0].group != sbi->s_groups_count);
    540
    541	reserved_gdb = le16_to_cpu(es->s_reserved_gdt_blocks);
    542	meta_bg = ext4_has_feature_meta_bg(sb);
    543
    544	/* This transaction may be extended/restarted along the way */
    545	handle = ext4_journal_start_sb(sb, EXT4_HT_RESIZE, EXT4_MAX_TRANS_DATA);
    546	if (IS_ERR(handle))
    547		return PTR_ERR(handle);
    548
    549	group = group_data[0].group;
    550	for (i = 0; i < flex_gd->count; i++, group++) {
    551		unsigned long gdblocks;
    552		ext4_grpblk_t overhead;
    553
    554		gdblocks = ext4_bg_num_gdb(sb, group);
    555		start = ext4_group_first_block_no(sb, group);
    556
    557		if (meta_bg == 0 && !ext4_bg_has_super(sb, group))
    558			goto handle_itb;
    559
    560		if (meta_bg == 1) {
    561			ext4_group_t first_group;
    562			first_group = ext4_meta_bg_first_group(sb, group);
    563			if (first_group != group + 1 &&
    564			    first_group != group + EXT4_DESC_PER_BLOCK(sb) - 1)
    565				goto handle_itb;
    566		}
    567
    568		block = start + ext4_bg_has_super(sb, group);
    569		/* Copy all of the GDT blocks into the backup in this group */
    570		for (j = 0; j < gdblocks; j++, block++) {
    571			struct buffer_head *gdb;
    572
    573			ext4_debug("update backup group %#04llx\n", block);
    574			err = ext4_resize_ensure_credits_batch(handle, 1);
    575			if (err < 0)
    576				goto out;
    577
    578			gdb = sb_getblk(sb, block);
    579			if (unlikely(!gdb)) {
    580				err = -ENOMEM;
    581				goto out;
    582			}
    583
    584			BUFFER_TRACE(gdb, "get_write_access");
    585			err = ext4_journal_get_write_access(handle, sb, gdb,
    586							    EXT4_JTR_NONE);
    587			if (err) {
    588				brelse(gdb);
    589				goto out;
    590			}
    591			memcpy(gdb->b_data, sbi_array_rcu_deref(sbi,
    592				s_group_desc, j)->b_data, gdb->b_size);
    593			set_buffer_uptodate(gdb);
    594
    595			err = ext4_handle_dirty_metadata(handle, NULL, gdb);
    596			if (unlikely(err)) {
    597				brelse(gdb);
    598				goto out;
    599			}
    600			brelse(gdb);
    601		}
    602
    603		/* Zero out all of the reserved backup group descriptor
    604		 * table blocks
    605		 */
    606		if (ext4_bg_has_super(sb, group)) {
    607			err = sb_issue_zeroout(sb, gdblocks + start + 1,
    608					reserved_gdb, GFP_NOFS);
    609			if (err)
    610				goto out;
    611		}
    612
    613handle_itb:
    614		/* Initialize group tables of the grop @group */
    615		if (!(bg_flags[i] & EXT4_BG_INODE_ZEROED))
    616			goto handle_bb;
    617
    618		/* Zero out all of the inode table blocks */
    619		block = group_data[i].inode_table;
    620		ext4_debug("clear inode table blocks %#04llx -> %#04lx\n",
    621			   block, sbi->s_itb_per_group);
    622		err = sb_issue_zeroout(sb, block, sbi->s_itb_per_group,
    623				       GFP_NOFS);
    624		if (err)
    625			goto out;
    626
    627handle_bb:
    628		if (bg_flags[i] & EXT4_BG_BLOCK_UNINIT)
    629			goto handle_ib;
    630
    631		/* Initialize block bitmap of the @group */
    632		block = group_data[i].block_bitmap;
    633		err = ext4_resize_ensure_credits_batch(handle, 1);
    634		if (err < 0)
    635			goto out;
    636
    637		bh = bclean(handle, sb, block);
    638		if (IS_ERR(bh)) {
    639			err = PTR_ERR(bh);
    640			goto out;
    641		}
    642		overhead = ext4_group_overhead_blocks(sb, group);
    643		if (overhead != 0) {
    644			ext4_debug("mark backup superblock %#04llx (+0)\n",
    645				   start);
    646			mb_set_bits(bh->b_data, 0,
    647				      EXT4_NUM_B2C(sbi, overhead));
    648		}
    649		ext4_mark_bitmap_end(EXT4_B2C(sbi, group_data[i].blocks_count),
    650				     sb->s_blocksize * 8, bh->b_data);
    651		err = ext4_handle_dirty_metadata(handle, NULL, bh);
    652		brelse(bh);
    653		if (err)
    654			goto out;
    655
    656handle_ib:
    657		if (bg_flags[i] & EXT4_BG_INODE_UNINIT)
    658			continue;
    659
    660		/* Initialize inode bitmap of the @group */
    661		block = group_data[i].inode_bitmap;
    662		err = ext4_resize_ensure_credits_batch(handle, 1);
    663		if (err < 0)
    664			goto out;
    665		/* Mark unused entries in inode bitmap used */
    666		bh = bclean(handle, sb, block);
    667		if (IS_ERR(bh)) {
    668			err = PTR_ERR(bh);
    669			goto out;
    670		}
    671
    672		ext4_mark_bitmap_end(EXT4_INODES_PER_GROUP(sb),
    673				     sb->s_blocksize * 8, bh->b_data);
    674		err = ext4_handle_dirty_metadata(handle, NULL, bh);
    675		brelse(bh);
    676		if (err)
    677			goto out;
    678	}
    679
    680	/* Mark group tables in block bitmap */
    681	for (j = 0; j < GROUP_TABLE_COUNT; j++) {
    682		count = group_table_count[j];
    683		start = (&group_data[0].block_bitmap)[j];
    684		block = start;
    685		for (i = 1; i < flex_gd->count; i++) {
    686			block += group_table_count[j];
    687			if (block == (&group_data[i].block_bitmap)[j]) {
    688				count += group_table_count[j];
    689				continue;
    690			}
    691			err = set_flexbg_block_bitmap(sb, handle,
    692						      flex_gd,
    693						      EXT4_B2C(sbi, start),
    694						      EXT4_B2C(sbi,
    695							       start + count
    696							       - 1));
    697			if (err)
    698				goto out;
    699			count = group_table_count[j];
    700			start = (&group_data[i].block_bitmap)[j];
    701			block = start;
    702		}
    703
    704		if (count) {
    705			err = set_flexbg_block_bitmap(sb, handle,
    706						      flex_gd,
    707						      EXT4_B2C(sbi, start),
    708						      EXT4_B2C(sbi,
    709							       start + count
    710							       - 1));
    711			if (err)
    712				goto out;
    713		}
    714	}
    715
    716out:
    717	err2 = ext4_journal_stop(handle);
    718	if (err2 && !err)
    719		err = err2;
    720
    721	return err;
    722}
    723
    724/*
    725 * Iterate through the groups which hold BACKUP superblock/GDT copies in an
    726 * ext4 filesystem.  The counters should be initialized to 1, 5, and 7 before
    727 * calling this for the first time.  In a sparse filesystem it will be the
    728 * sequence of powers of 3, 5, and 7: 1, 3, 5, 7, 9, 25, 27, 49, 81, ...
    729 * For a non-sparse filesystem it will be every group: 1, 2, 3, 4, ...
    730 */
    731unsigned int ext4_list_backups(struct super_block *sb, unsigned int *three,
    732			       unsigned int *five, unsigned int *seven)
    733{
    734	struct ext4_super_block *es = EXT4_SB(sb)->s_es;
    735	unsigned int *min = three;
    736	int mult = 3;
    737	unsigned int ret;
    738
    739	if (ext4_has_feature_sparse_super2(sb)) {
    740		do {
    741			if (*min > 2)
    742				return UINT_MAX;
    743			ret = le32_to_cpu(es->s_backup_bgs[*min - 1]);
    744			*min += 1;
    745		} while (!ret);
    746		return ret;
    747	}
    748
    749	if (!ext4_has_feature_sparse_super(sb)) {
    750		ret = *min;
    751		*min += 1;
    752		return ret;
    753	}
    754
    755	if (*five < *min) {
    756		min = five;
    757		mult = 5;
    758	}
    759	if (*seven < *min) {
    760		min = seven;
    761		mult = 7;
    762	}
    763
    764	ret = *min;
    765	*min *= mult;
    766
    767	return ret;
    768}
    769
    770/*
    771 * Check that all of the backup GDT blocks are held in the primary GDT block.
    772 * It is assumed that they are stored in group order.  Returns the number of
    773 * groups in current filesystem that have BACKUPS, or -ve error code.
    774 */
    775static int verify_reserved_gdb(struct super_block *sb,
    776			       ext4_group_t end,
    777			       struct buffer_head *primary)
    778{
    779	const ext4_fsblk_t blk = primary->b_blocknr;
    780	unsigned three = 1;
    781	unsigned five = 5;
    782	unsigned seven = 7;
    783	unsigned grp;
    784	__le32 *p = (__le32 *)primary->b_data;
    785	int gdbackups = 0;
    786
    787	while ((grp = ext4_list_backups(sb, &three, &five, &seven)) < end) {
    788		if (le32_to_cpu(*p++) !=
    789		    grp * EXT4_BLOCKS_PER_GROUP(sb) + blk){
    790			ext4_warning(sb, "reserved GDT %llu"
    791				     " missing grp %d (%llu)",
    792				     blk, grp,
    793				     grp *
    794				     (ext4_fsblk_t)EXT4_BLOCKS_PER_GROUP(sb) +
    795				     blk);
    796			return -EINVAL;
    797		}
    798		if (++gdbackups > EXT4_ADDR_PER_BLOCK(sb))
    799			return -EFBIG;
    800	}
    801
    802	return gdbackups;
    803}
    804
    805/*
    806 * Called when we need to bring a reserved group descriptor table block into
    807 * use from the resize inode.  The primary copy of the new GDT block currently
    808 * is an indirect block (under the double indirect block in the resize inode).
    809 * The new backup GDT blocks will be stored as leaf blocks in this indirect
    810 * block, in group order.  Even though we know all the block numbers we need,
    811 * we check to ensure that the resize inode has actually reserved these blocks.
    812 *
    813 * Don't need to update the block bitmaps because the blocks are still in use.
    814 *
    815 * We get all of the error cases out of the way, so that we are sure to not
    816 * fail once we start modifying the data on disk, because JBD has no rollback.
    817 */
    818static int add_new_gdb(handle_t *handle, struct inode *inode,
    819		       ext4_group_t group)
    820{
    821	struct super_block *sb = inode->i_sb;
    822	struct ext4_super_block *es = EXT4_SB(sb)->s_es;
    823	unsigned long gdb_num = group / EXT4_DESC_PER_BLOCK(sb);
    824	ext4_fsblk_t gdblock = EXT4_SB(sb)->s_sbh->b_blocknr + 1 + gdb_num;
    825	struct buffer_head **o_group_desc, **n_group_desc = NULL;
    826	struct buffer_head *dind = NULL;
    827	struct buffer_head *gdb_bh = NULL;
    828	int gdbackups;
    829	struct ext4_iloc iloc = { .bh = NULL };
    830	__le32 *data;
    831	int err;
    832
    833	if (test_opt(sb, DEBUG))
    834		printk(KERN_DEBUG
    835		       "EXT4-fs: ext4_add_new_gdb: adding group block %lu\n",
    836		       gdb_num);
    837
    838	gdb_bh = ext4_sb_bread(sb, gdblock, 0);
    839	if (IS_ERR(gdb_bh))
    840		return PTR_ERR(gdb_bh);
    841
    842	gdbackups = verify_reserved_gdb(sb, group, gdb_bh);
    843	if (gdbackups < 0) {
    844		err = gdbackups;
    845		goto errout;
    846	}
    847
    848	data = EXT4_I(inode)->i_data + EXT4_DIND_BLOCK;
    849	dind = ext4_sb_bread(sb, le32_to_cpu(*data), 0);
    850	if (IS_ERR(dind)) {
    851		err = PTR_ERR(dind);
    852		dind = NULL;
    853		goto errout;
    854	}
    855
    856	data = (__le32 *)dind->b_data;
    857	if (le32_to_cpu(data[gdb_num % EXT4_ADDR_PER_BLOCK(sb)]) != gdblock) {
    858		ext4_warning(sb, "new group %u GDT block %llu not reserved",
    859			     group, gdblock);
    860		err = -EINVAL;
    861		goto errout;
    862	}
    863
    864	BUFFER_TRACE(EXT4_SB(sb)->s_sbh, "get_write_access");
    865	err = ext4_journal_get_write_access(handle, sb, EXT4_SB(sb)->s_sbh,
    866					    EXT4_JTR_NONE);
    867	if (unlikely(err))
    868		goto errout;
    869
    870	BUFFER_TRACE(gdb_bh, "get_write_access");
    871	err = ext4_journal_get_write_access(handle, sb, gdb_bh, EXT4_JTR_NONE);
    872	if (unlikely(err))
    873		goto errout;
    874
    875	BUFFER_TRACE(dind, "get_write_access");
    876	err = ext4_journal_get_write_access(handle, sb, dind, EXT4_JTR_NONE);
    877	if (unlikely(err)) {
    878		ext4_std_error(sb, err);
    879		goto errout;
    880	}
    881
    882	/* ext4_reserve_inode_write() gets a reference on the iloc */
    883	err = ext4_reserve_inode_write(handle, inode, &iloc);
    884	if (unlikely(err))
    885		goto errout;
    886
    887	n_group_desc = kvmalloc((gdb_num + 1) * sizeof(struct buffer_head *),
    888				GFP_KERNEL);
    889	if (!n_group_desc) {
    890		err = -ENOMEM;
    891		ext4_warning(sb, "not enough memory for %lu groups",
    892			     gdb_num + 1);
    893		goto errout;
    894	}
    895
    896	/*
    897	 * Finally, we have all of the possible failures behind us...
    898	 *
    899	 * Remove new GDT block from inode double-indirect block and clear out
    900	 * the new GDT block for use (which also "frees" the backup GDT blocks
    901	 * from the reserved inode).  We don't need to change the bitmaps for
    902	 * these blocks, because they are marked as in-use from being in the
    903	 * reserved inode, and will become GDT blocks (primary and backup).
    904	 */
    905	data[gdb_num % EXT4_ADDR_PER_BLOCK(sb)] = 0;
    906	err = ext4_handle_dirty_metadata(handle, NULL, dind);
    907	if (unlikely(err)) {
    908		ext4_std_error(sb, err);
    909		goto errout;
    910	}
    911	inode->i_blocks -= (gdbackups + 1) * sb->s_blocksize >>
    912			   (9 - EXT4_SB(sb)->s_cluster_bits);
    913	ext4_mark_iloc_dirty(handle, inode, &iloc);
    914	memset(gdb_bh->b_data, 0, sb->s_blocksize);
    915	err = ext4_handle_dirty_metadata(handle, NULL, gdb_bh);
    916	if (unlikely(err)) {
    917		ext4_std_error(sb, err);
    918		iloc.bh = NULL;
    919		goto errout;
    920	}
    921	brelse(dind);
    922
    923	rcu_read_lock();
    924	o_group_desc = rcu_dereference(EXT4_SB(sb)->s_group_desc);
    925	memcpy(n_group_desc, o_group_desc,
    926	       EXT4_SB(sb)->s_gdb_count * sizeof(struct buffer_head *));
    927	rcu_read_unlock();
    928	n_group_desc[gdb_num] = gdb_bh;
    929	rcu_assign_pointer(EXT4_SB(sb)->s_group_desc, n_group_desc);
    930	EXT4_SB(sb)->s_gdb_count++;
    931	ext4_kvfree_array_rcu(o_group_desc);
    932
    933	lock_buffer(EXT4_SB(sb)->s_sbh);
    934	le16_add_cpu(&es->s_reserved_gdt_blocks, -1);
    935	ext4_superblock_csum_set(sb);
    936	unlock_buffer(EXT4_SB(sb)->s_sbh);
    937	err = ext4_handle_dirty_metadata(handle, NULL, EXT4_SB(sb)->s_sbh);
    938	if (err)
    939		ext4_std_error(sb, err);
    940	return err;
    941errout:
    942	kvfree(n_group_desc);
    943	brelse(iloc.bh);
    944	brelse(dind);
    945	brelse(gdb_bh);
    946
    947	ext4_debug("leaving with error %d\n", err);
    948	return err;
    949}
    950
    951/*
    952 * add_new_gdb_meta_bg is the sister of add_new_gdb.
    953 */
    954static int add_new_gdb_meta_bg(struct super_block *sb,
    955			       handle_t *handle, ext4_group_t group) {
    956	ext4_fsblk_t gdblock;
    957	struct buffer_head *gdb_bh;
    958	struct buffer_head **o_group_desc, **n_group_desc;
    959	unsigned long gdb_num = group / EXT4_DESC_PER_BLOCK(sb);
    960	int err;
    961
    962	gdblock = ext4_meta_bg_first_block_no(sb, group) +
    963		   ext4_bg_has_super(sb, group);
    964	gdb_bh = ext4_sb_bread(sb, gdblock, 0);
    965	if (IS_ERR(gdb_bh))
    966		return PTR_ERR(gdb_bh);
    967	n_group_desc = kvmalloc((gdb_num + 1) * sizeof(struct buffer_head *),
    968				GFP_KERNEL);
    969	if (!n_group_desc) {
    970		brelse(gdb_bh);
    971		err = -ENOMEM;
    972		ext4_warning(sb, "not enough memory for %lu groups",
    973			     gdb_num + 1);
    974		return err;
    975	}
    976
    977	rcu_read_lock();
    978	o_group_desc = rcu_dereference(EXT4_SB(sb)->s_group_desc);
    979	memcpy(n_group_desc, o_group_desc,
    980	       EXT4_SB(sb)->s_gdb_count * sizeof(struct buffer_head *));
    981	rcu_read_unlock();
    982	n_group_desc[gdb_num] = gdb_bh;
    983
    984	BUFFER_TRACE(gdb_bh, "get_write_access");
    985	err = ext4_journal_get_write_access(handle, sb, gdb_bh, EXT4_JTR_NONE);
    986	if (err) {
    987		kvfree(n_group_desc);
    988		brelse(gdb_bh);
    989		return err;
    990	}
    991
    992	rcu_assign_pointer(EXT4_SB(sb)->s_group_desc, n_group_desc);
    993	EXT4_SB(sb)->s_gdb_count++;
    994	ext4_kvfree_array_rcu(o_group_desc);
    995	return err;
    996}
    997
    998/*
    999 * Called when we are adding a new group which has a backup copy of each of
   1000 * the GDT blocks (i.e. sparse group) and there are reserved GDT blocks.
   1001 * We need to add these reserved backup GDT blocks to the resize inode, so
   1002 * that they are kept for future resizing and not allocated to files.
   1003 *
   1004 * Each reserved backup GDT block will go into a different indirect block.
   1005 * The indirect blocks are actually the primary reserved GDT blocks,
   1006 * so we know in advance what their block numbers are.  We only get the
   1007 * double-indirect block to verify it is pointing to the primary reserved
   1008 * GDT blocks so we don't overwrite a data block by accident.  The reserved
   1009 * backup GDT blocks are stored in their reserved primary GDT block.
   1010 */
   1011static int reserve_backup_gdb(handle_t *handle, struct inode *inode,
   1012			      ext4_group_t group)
   1013{
   1014	struct super_block *sb = inode->i_sb;
   1015	int reserved_gdb =le16_to_cpu(EXT4_SB(sb)->s_es->s_reserved_gdt_blocks);
   1016	int cluster_bits = EXT4_SB(sb)->s_cluster_bits;
   1017	struct buffer_head **primary;
   1018	struct buffer_head *dind;
   1019	struct ext4_iloc iloc;
   1020	ext4_fsblk_t blk;
   1021	__le32 *data, *end;
   1022	int gdbackups = 0;
   1023	int res, i;
   1024	int err;
   1025
   1026	primary = kmalloc_array(reserved_gdb, sizeof(*primary), GFP_NOFS);
   1027	if (!primary)
   1028		return -ENOMEM;
   1029
   1030	data = EXT4_I(inode)->i_data + EXT4_DIND_BLOCK;
   1031	dind = ext4_sb_bread(sb, le32_to_cpu(*data), 0);
   1032	if (IS_ERR(dind)) {
   1033		err = PTR_ERR(dind);
   1034		dind = NULL;
   1035		goto exit_free;
   1036	}
   1037
   1038	blk = EXT4_SB(sb)->s_sbh->b_blocknr + 1 + EXT4_SB(sb)->s_gdb_count;
   1039	data = (__le32 *)dind->b_data + (EXT4_SB(sb)->s_gdb_count %
   1040					 EXT4_ADDR_PER_BLOCK(sb));
   1041	end = (__le32 *)dind->b_data + EXT4_ADDR_PER_BLOCK(sb);
   1042
   1043	/* Get each reserved primary GDT block and verify it holds backups */
   1044	for (res = 0; res < reserved_gdb; res++, blk++) {
   1045		if (le32_to_cpu(*data) != blk) {
   1046			ext4_warning(sb, "reserved block %llu"
   1047				     " not at offset %ld",
   1048				     blk,
   1049				     (long)(data - (__le32 *)dind->b_data));
   1050			err = -EINVAL;
   1051			goto exit_bh;
   1052		}
   1053		primary[res] = ext4_sb_bread(sb, blk, 0);
   1054		if (IS_ERR(primary[res])) {
   1055			err = PTR_ERR(primary[res]);
   1056			primary[res] = NULL;
   1057			goto exit_bh;
   1058		}
   1059		gdbackups = verify_reserved_gdb(sb, group, primary[res]);
   1060		if (gdbackups < 0) {
   1061			brelse(primary[res]);
   1062			err = gdbackups;
   1063			goto exit_bh;
   1064		}
   1065		if (++data >= end)
   1066			data = (__le32 *)dind->b_data;
   1067	}
   1068
   1069	for (i = 0; i < reserved_gdb; i++) {
   1070		BUFFER_TRACE(primary[i], "get_write_access");
   1071		if ((err = ext4_journal_get_write_access(handle, sb, primary[i],
   1072							 EXT4_JTR_NONE)))
   1073			goto exit_bh;
   1074	}
   1075
   1076	if ((err = ext4_reserve_inode_write(handle, inode, &iloc)))
   1077		goto exit_bh;
   1078
   1079	/*
   1080	 * Finally we can add each of the reserved backup GDT blocks from
   1081	 * the new group to its reserved primary GDT block.
   1082	 */
   1083	blk = group * EXT4_BLOCKS_PER_GROUP(sb);
   1084	for (i = 0; i < reserved_gdb; i++) {
   1085		int err2;
   1086		data = (__le32 *)primary[i]->b_data;
   1087		/* printk("reserving backup %lu[%u] = %lu\n",
   1088		       primary[i]->b_blocknr, gdbackups,
   1089		       blk + primary[i]->b_blocknr); */
   1090		data[gdbackups] = cpu_to_le32(blk + primary[i]->b_blocknr);
   1091		err2 = ext4_handle_dirty_metadata(handle, NULL, primary[i]);
   1092		if (!err)
   1093			err = err2;
   1094	}
   1095
   1096	inode->i_blocks += reserved_gdb * sb->s_blocksize >> (9 - cluster_bits);
   1097	ext4_mark_iloc_dirty(handle, inode, &iloc);
   1098
   1099exit_bh:
   1100	while (--res >= 0)
   1101		brelse(primary[res]);
   1102	brelse(dind);
   1103
   1104exit_free:
   1105	kfree(primary);
   1106
   1107	return err;
   1108}
   1109
   1110/*
   1111 * Update the backup copies of the ext4 metadata.  These don't need to be part
   1112 * of the main resize transaction, because e2fsck will re-write them if there
   1113 * is a problem (basically only OOM will cause a problem).  However, we
   1114 * _should_ update the backups if possible, in case the primary gets trashed
   1115 * for some reason and we need to run e2fsck from a backup superblock.  The
   1116 * important part is that the new block and inode counts are in the backup
   1117 * superblocks, and the location of the new group metadata in the GDT backups.
   1118 *
   1119 * We do not need take the s_resize_lock for this, because these
   1120 * blocks are not otherwise touched by the filesystem code when it is
   1121 * mounted.  We don't need to worry about last changing from
   1122 * sbi->s_groups_count, because the worst that can happen is that we
   1123 * do not copy the full number of backups at this time.  The resize
   1124 * which changed s_groups_count will backup again.
   1125 */
   1126static void update_backups(struct super_block *sb, sector_t blk_off, char *data,
   1127			   int size, int meta_bg)
   1128{
   1129	struct ext4_sb_info *sbi = EXT4_SB(sb);
   1130	ext4_group_t last;
   1131	const int bpg = EXT4_BLOCKS_PER_GROUP(sb);
   1132	unsigned three = 1;
   1133	unsigned five = 5;
   1134	unsigned seven = 7;
   1135	ext4_group_t group = 0;
   1136	int rest = sb->s_blocksize - size;
   1137	handle_t *handle;
   1138	int err = 0, err2;
   1139
   1140	handle = ext4_journal_start_sb(sb, EXT4_HT_RESIZE, EXT4_MAX_TRANS_DATA);
   1141	if (IS_ERR(handle)) {
   1142		group = 1;
   1143		err = PTR_ERR(handle);
   1144		goto exit_err;
   1145	}
   1146
   1147	if (meta_bg == 0) {
   1148		group = ext4_list_backups(sb, &three, &five, &seven);
   1149		last = sbi->s_groups_count;
   1150	} else {
   1151		group = ext4_get_group_number(sb, blk_off) + 1;
   1152		last = (ext4_group_t)(group + EXT4_DESC_PER_BLOCK(sb) - 2);
   1153	}
   1154
   1155	while (group < sbi->s_groups_count) {
   1156		struct buffer_head *bh;
   1157		ext4_fsblk_t backup_block;
   1158
   1159		/* Out of journal space, and can't get more - abort - so sad */
   1160		err = ext4_resize_ensure_credits_batch(handle, 1);
   1161		if (err < 0)
   1162			break;
   1163
   1164		if (meta_bg == 0)
   1165			backup_block = ((ext4_fsblk_t)group) * bpg + blk_off;
   1166		else
   1167			backup_block = (ext4_group_first_block_no(sb, group) +
   1168					ext4_bg_has_super(sb, group));
   1169
   1170		bh = sb_getblk(sb, backup_block);
   1171		if (unlikely(!bh)) {
   1172			err = -ENOMEM;
   1173			break;
   1174		}
   1175		ext4_debug("update metadata backup %llu(+%llu)\n",
   1176			   backup_block, backup_block -
   1177			   ext4_group_first_block_no(sb, group));
   1178		BUFFER_TRACE(bh, "get_write_access");
   1179		if ((err = ext4_journal_get_write_access(handle, sb, bh,
   1180							 EXT4_JTR_NONE)))
   1181			break;
   1182		lock_buffer(bh);
   1183		memcpy(bh->b_data, data, size);
   1184		if (rest)
   1185			memset(bh->b_data + size, 0, rest);
   1186		set_buffer_uptodate(bh);
   1187		unlock_buffer(bh);
   1188		err = ext4_handle_dirty_metadata(handle, NULL, bh);
   1189		if (unlikely(err))
   1190			ext4_std_error(sb, err);
   1191		brelse(bh);
   1192
   1193		if (meta_bg == 0)
   1194			group = ext4_list_backups(sb, &three, &five, &seven);
   1195		else if (group == last)
   1196			break;
   1197		else
   1198			group = last;
   1199	}
   1200	if ((err2 = ext4_journal_stop(handle)) && !err)
   1201		err = err2;
   1202
   1203	/*
   1204	 * Ugh! Need to have e2fsck write the backup copies.  It is too
   1205	 * late to revert the resize, we shouldn't fail just because of
   1206	 * the backup copies (they are only needed in case of corruption).
   1207	 *
   1208	 * However, if we got here we have a journal problem too, so we
   1209	 * can't really start a transaction to mark the superblock.
   1210	 * Chicken out and just set the flag on the hope it will be written
   1211	 * to disk, and if not - we will simply wait until next fsck.
   1212	 */
   1213exit_err:
   1214	if (err) {
   1215		ext4_warning(sb, "can't update backup for group %u (err %d), "
   1216			     "forcing fsck on next reboot", group, err);
   1217		sbi->s_mount_state &= ~EXT4_VALID_FS;
   1218		sbi->s_es->s_state &= cpu_to_le16(~EXT4_VALID_FS);
   1219		mark_buffer_dirty(sbi->s_sbh);
   1220	}
   1221}
   1222
   1223/*
   1224 * ext4_add_new_descs() adds @count group descriptor of groups
   1225 * starting at @group
   1226 *
   1227 * @handle: journal handle
   1228 * @sb: super block
   1229 * @group: the group no. of the first group desc to be added
   1230 * @resize_inode: the resize inode
   1231 * @count: number of group descriptors to be added
   1232 */
   1233static int ext4_add_new_descs(handle_t *handle, struct super_block *sb,
   1234			      ext4_group_t group, struct inode *resize_inode,
   1235			      ext4_group_t count)
   1236{
   1237	struct ext4_sb_info *sbi = EXT4_SB(sb);
   1238	struct ext4_super_block *es = sbi->s_es;
   1239	struct buffer_head *gdb_bh;
   1240	int i, gdb_off, gdb_num, err = 0;
   1241	int meta_bg;
   1242
   1243	meta_bg = ext4_has_feature_meta_bg(sb);
   1244	for (i = 0; i < count; i++, group++) {
   1245		int reserved_gdb = ext4_bg_has_super(sb, group) ?
   1246			le16_to_cpu(es->s_reserved_gdt_blocks) : 0;
   1247
   1248		gdb_off = group % EXT4_DESC_PER_BLOCK(sb);
   1249		gdb_num = group / EXT4_DESC_PER_BLOCK(sb);
   1250
   1251		/*
   1252		 * We will only either add reserved group blocks to a backup group
   1253		 * or remove reserved blocks for the first group in a new group block.
   1254		 * Doing both would be mean more complex code, and sane people don't
   1255		 * use non-sparse filesystems anymore.  This is already checked above.
   1256		 */
   1257		if (gdb_off) {
   1258			gdb_bh = sbi_array_rcu_deref(sbi, s_group_desc,
   1259						     gdb_num);
   1260			BUFFER_TRACE(gdb_bh, "get_write_access");
   1261			err = ext4_journal_get_write_access(handle, sb, gdb_bh,
   1262							    EXT4_JTR_NONE);
   1263
   1264			if (!err && reserved_gdb && ext4_bg_num_gdb(sb, group))
   1265				err = reserve_backup_gdb(handle, resize_inode, group);
   1266		} else if (meta_bg != 0) {
   1267			err = add_new_gdb_meta_bg(sb, handle, group);
   1268		} else {
   1269			err = add_new_gdb(handle, resize_inode, group);
   1270		}
   1271		if (err)
   1272			break;
   1273	}
   1274	return err;
   1275}
   1276
   1277static struct buffer_head *ext4_get_bitmap(struct super_block *sb, __u64 block)
   1278{
   1279	struct buffer_head *bh = sb_getblk(sb, block);
   1280	if (unlikely(!bh))
   1281		return NULL;
   1282	if (!bh_uptodate_or_lock(bh)) {
   1283		if (ext4_read_bh(bh, 0, NULL) < 0) {
   1284			brelse(bh);
   1285			return NULL;
   1286		}
   1287	}
   1288
   1289	return bh;
   1290}
   1291
   1292static int ext4_set_bitmap_checksums(struct super_block *sb,
   1293				     ext4_group_t group,
   1294				     struct ext4_group_desc *gdp,
   1295				     struct ext4_new_group_data *group_data)
   1296{
   1297	struct buffer_head *bh;
   1298
   1299	if (!ext4_has_metadata_csum(sb))
   1300		return 0;
   1301
   1302	bh = ext4_get_bitmap(sb, group_data->inode_bitmap);
   1303	if (!bh)
   1304		return -EIO;
   1305	ext4_inode_bitmap_csum_set(sb, group, gdp, bh,
   1306				   EXT4_INODES_PER_GROUP(sb) / 8);
   1307	brelse(bh);
   1308
   1309	bh = ext4_get_bitmap(sb, group_data->block_bitmap);
   1310	if (!bh)
   1311		return -EIO;
   1312	ext4_block_bitmap_csum_set(sb, group, gdp, bh);
   1313	brelse(bh);
   1314
   1315	return 0;
   1316}
   1317
   1318/*
   1319 * ext4_setup_new_descs() will set up the group descriptor descriptors of a flex bg
   1320 */
   1321static int ext4_setup_new_descs(handle_t *handle, struct super_block *sb,
   1322				struct ext4_new_flex_group_data *flex_gd)
   1323{
   1324	struct ext4_new_group_data	*group_data = flex_gd->groups;
   1325	struct ext4_group_desc		*gdp;
   1326	struct ext4_sb_info		*sbi = EXT4_SB(sb);
   1327	struct buffer_head		*gdb_bh;
   1328	ext4_group_t			group;
   1329	__u16				*bg_flags = flex_gd->bg_flags;
   1330	int				i, gdb_off, gdb_num, err = 0;
   1331
   1332
   1333	for (i = 0; i < flex_gd->count; i++, group_data++, bg_flags++) {
   1334		group = group_data->group;
   1335
   1336		gdb_off = group % EXT4_DESC_PER_BLOCK(sb);
   1337		gdb_num = group / EXT4_DESC_PER_BLOCK(sb);
   1338
   1339		/*
   1340		 * get_write_access() has been called on gdb_bh by ext4_add_new_desc().
   1341		 */
   1342		gdb_bh = sbi_array_rcu_deref(sbi, s_group_desc, gdb_num);
   1343		/* Update group descriptor block for new group */
   1344		gdp = (struct ext4_group_desc *)(gdb_bh->b_data +
   1345						 gdb_off * EXT4_DESC_SIZE(sb));
   1346
   1347		memset(gdp, 0, EXT4_DESC_SIZE(sb));
   1348		ext4_block_bitmap_set(sb, gdp, group_data->block_bitmap);
   1349		ext4_inode_bitmap_set(sb, gdp, group_data->inode_bitmap);
   1350		err = ext4_set_bitmap_checksums(sb, group, gdp, group_data);
   1351		if (err) {
   1352			ext4_std_error(sb, err);
   1353			break;
   1354		}
   1355
   1356		ext4_inode_table_set(sb, gdp, group_data->inode_table);
   1357		ext4_free_group_clusters_set(sb, gdp,
   1358					     group_data->free_clusters_count);
   1359		ext4_free_inodes_set(sb, gdp, EXT4_INODES_PER_GROUP(sb));
   1360		if (ext4_has_group_desc_csum(sb))
   1361			ext4_itable_unused_set(sb, gdp,
   1362					       EXT4_INODES_PER_GROUP(sb));
   1363		gdp->bg_flags = cpu_to_le16(*bg_flags);
   1364		ext4_group_desc_csum_set(sb, group, gdp);
   1365
   1366		err = ext4_handle_dirty_metadata(handle, NULL, gdb_bh);
   1367		if (unlikely(err)) {
   1368			ext4_std_error(sb, err);
   1369			break;
   1370		}
   1371
   1372		/*
   1373		 * We can allocate memory for mb_alloc based on the new group
   1374		 * descriptor
   1375		 */
   1376		err = ext4_mb_add_groupinfo(sb, group, gdp);
   1377		if (err)
   1378			break;
   1379	}
   1380	return err;
   1381}
   1382
   1383/*
   1384 * ext4_update_super() updates the super block so that the newly added
   1385 * groups can be seen by the filesystem.
   1386 *
   1387 * @sb: super block
   1388 * @flex_gd: new added groups
   1389 */
   1390static void ext4_update_super(struct super_block *sb,
   1391			     struct ext4_new_flex_group_data *flex_gd)
   1392{
   1393	ext4_fsblk_t blocks_count = 0;
   1394	ext4_fsblk_t free_blocks = 0;
   1395	ext4_fsblk_t reserved_blocks = 0;
   1396	struct ext4_new_group_data *group_data = flex_gd->groups;
   1397	struct ext4_sb_info *sbi = EXT4_SB(sb);
   1398	struct ext4_super_block *es = sbi->s_es;
   1399	int i;
   1400
   1401	BUG_ON(flex_gd->count == 0 || group_data == NULL);
   1402	/*
   1403	 * Make the new blocks and inodes valid next.  We do this before
   1404	 * increasing the group count so that once the group is enabled,
   1405	 * all of its blocks and inodes are already valid.
   1406	 *
   1407	 * We always allocate group-by-group, then block-by-block or
   1408	 * inode-by-inode within a group, so enabling these
   1409	 * blocks/inodes before the group is live won't actually let us
   1410	 * allocate the new space yet.
   1411	 */
   1412	for (i = 0; i < flex_gd->count; i++) {
   1413		blocks_count += group_data[i].blocks_count;
   1414		free_blocks += EXT4_C2B(sbi, group_data[i].free_clusters_count);
   1415	}
   1416
   1417	reserved_blocks = ext4_r_blocks_count(es) * 100;
   1418	reserved_blocks = div64_u64(reserved_blocks, ext4_blocks_count(es));
   1419	reserved_blocks *= blocks_count;
   1420	do_div(reserved_blocks, 100);
   1421
   1422	lock_buffer(sbi->s_sbh);
   1423	ext4_blocks_count_set(es, ext4_blocks_count(es) + blocks_count);
   1424	ext4_free_blocks_count_set(es, ext4_free_blocks_count(es) + free_blocks);
   1425	le32_add_cpu(&es->s_inodes_count, EXT4_INODES_PER_GROUP(sb) *
   1426		     flex_gd->count);
   1427	le32_add_cpu(&es->s_free_inodes_count, EXT4_INODES_PER_GROUP(sb) *
   1428		     flex_gd->count);
   1429
   1430	ext4_debug("free blocks count %llu", ext4_free_blocks_count(es));
   1431	/*
   1432	 * We need to protect s_groups_count against other CPUs seeing
   1433	 * inconsistent state in the superblock.
   1434	 *
   1435	 * The precise rules we use are:
   1436	 *
   1437	 * * Writers must perform a smp_wmb() after updating all
   1438	 *   dependent data and before modifying the groups count
   1439	 *
   1440	 * * Readers must perform an smp_rmb() after reading the groups
   1441	 *   count and before reading any dependent data.
   1442	 *
   1443	 * NB. These rules can be relaxed when checking the group count
   1444	 * while freeing data, as we can only allocate from a block
   1445	 * group after serialising against the group count, and we can
   1446	 * only then free after serialising in turn against that
   1447	 * allocation.
   1448	 */
   1449	smp_wmb();
   1450
   1451	/* Update the global fs size fields */
   1452	sbi->s_groups_count += flex_gd->count;
   1453	sbi->s_blockfile_groups = min_t(ext4_group_t, sbi->s_groups_count,
   1454			(EXT4_MAX_BLOCK_FILE_PHYS / EXT4_BLOCKS_PER_GROUP(sb)));
   1455
   1456	/* Update the reserved block counts only once the new group is
   1457	 * active. */
   1458	ext4_r_blocks_count_set(es, ext4_r_blocks_count(es) +
   1459				reserved_blocks);
   1460	ext4_superblock_csum_set(sb);
   1461	unlock_buffer(sbi->s_sbh);
   1462
   1463	/* Update the free space counts */
   1464	percpu_counter_add(&sbi->s_freeclusters_counter,
   1465			   EXT4_NUM_B2C(sbi, free_blocks));
   1466	percpu_counter_add(&sbi->s_freeinodes_counter,
   1467			   EXT4_INODES_PER_GROUP(sb) * flex_gd->count);
   1468
   1469	ext4_debug("free blocks count %llu",
   1470		   percpu_counter_read(&sbi->s_freeclusters_counter));
   1471	if (ext4_has_feature_flex_bg(sb) && sbi->s_log_groups_per_flex) {
   1472		ext4_group_t flex_group;
   1473		struct flex_groups *fg;
   1474
   1475		flex_group = ext4_flex_group(sbi, group_data[0].group);
   1476		fg = sbi_array_rcu_deref(sbi, s_flex_groups, flex_group);
   1477		atomic64_add(EXT4_NUM_B2C(sbi, free_blocks),
   1478			     &fg->free_clusters);
   1479		atomic_add(EXT4_INODES_PER_GROUP(sb) * flex_gd->count,
   1480			   &fg->free_inodes);
   1481	}
   1482
   1483	/*
   1484	 * Update the fs overhead information
   1485	 */
   1486	ext4_calculate_overhead(sb);
   1487
   1488	if (test_opt(sb, DEBUG))
   1489		printk(KERN_DEBUG "EXT4-fs: added group %u:"
   1490		       "%llu blocks(%llu free %llu reserved)\n", flex_gd->count,
   1491		       blocks_count, free_blocks, reserved_blocks);
   1492}
   1493
   1494/* Add a flex group to an fs. Ensure we handle all possible error conditions
   1495 * _before_ we start modifying the filesystem, because we cannot abort the
   1496 * transaction and not have it write the data to disk.
   1497 */
   1498static int ext4_flex_group_add(struct super_block *sb,
   1499			       struct inode *resize_inode,
   1500			       struct ext4_new_flex_group_data *flex_gd)
   1501{
   1502	struct ext4_sb_info *sbi = EXT4_SB(sb);
   1503	struct ext4_super_block *es = sbi->s_es;
   1504	ext4_fsblk_t o_blocks_count;
   1505	ext4_grpblk_t last;
   1506	ext4_group_t group;
   1507	handle_t *handle;
   1508	unsigned reserved_gdb;
   1509	int err = 0, err2 = 0, credit;
   1510
   1511	BUG_ON(!flex_gd->count || !flex_gd->groups || !flex_gd->bg_flags);
   1512
   1513	reserved_gdb = le16_to_cpu(es->s_reserved_gdt_blocks);
   1514	o_blocks_count = ext4_blocks_count(es);
   1515	ext4_get_group_no_and_offset(sb, o_blocks_count, &group, &last);
   1516	BUG_ON(last);
   1517
   1518	err = setup_new_flex_group_blocks(sb, flex_gd);
   1519	if (err)
   1520		goto exit;
   1521	/*
   1522	 * We will always be modifying at least the superblock and  GDT
   1523	 * blocks.  If we are adding a group past the last current GDT block,
   1524	 * we will also modify the inode and the dindirect block.  If we
   1525	 * are adding a group with superblock/GDT backups  we will also
   1526	 * modify each of the reserved GDT dindirect blocks.
   1527	 */
   1528	credit = 3;	/* sb, resize inode, resize inode dindirect */
   1529	/* GDT blocks */
   1530	credit += 1 + DIV_ROUND_UP(flex_gd->count, EXT4_DESC_PER_BLOCK(sb));
   1531	credit += reserved_gdb;	/* Reserved GDT dindirect blocks */
   1532	handle = ext4_journal_start_sb(sb, EXT4_HT_RESIZE, credit);
   1533	if (IS_ERR(handle)) {
   1534		err = PTR_ERR(handle);
   1535		goto exit;
   1536	}
   1537
   1538	BUFFER_TRACE(sbi->s_sbh, "get_write_access");
   1539	err = ext4_journal_get_write_access(handle, sb, sbi->s_sbh,
   1540					    EXT4_JTR_NONE);
   1541	if (err)
   1542		goto exit_journal;
   1543
   1544	group = flex_gd->groups[0].group;
   1545	BUG_ON(group != sbi->s_groups_count);
   1546	err = ext4_add_new_descs(handle, sb, group,
   1547				resize_inode, flex_gd->count);
   1548	if (err)
   1549		goto exit_journal;
   1550
   1551	err = ext4_setup_new_descs(handle, sb, flex_gd);
   1552	if (err)
   1553		goto exit_journal;
   1554
   1555	ext4_update_super(sb, flex_gd);
   1556
   1557	err = ext4_handle_dirty_metadata(handle, NULL, sbi->s_sbh);
   1558
   1559exit_journal:
   1560	err2 = ext4_journal_stop(handle);
   1561	if (!err)
   1562		err = err2;
   1563
   1564	if (!err) {
   1565		int gdb_num = group / EXT4_DESC_PER_BLOCK(sb);
   1566		int gdb_num_end = ((group + flex_gd->count - 1) /
   1567				   EXT4_DESC_PER_BLOCK(sb));
   1568		int meta_bg = ext4_has_feature_meta_bg(sb);
   1569		sector_t old_gdb = 0;
   1570
   1571		update_backups(sb, sbi->s_sbh->b_blocknr, (char *)es,
   1572			       sizeof(struct ext4_super_block), 0);
   1573		for (; gdb_num <= gdb_num_end; gdb_num++) {
   1574			struct buffer_head *gdb_bh;
   1575
   1576			gdb_bh = sbi_array_rcu_deref(sbi, s_group_desc,
   1577						     gdb_num);
   1578			if (old_gdb == gdb_bh->b_blocknr)
   1579				continue;
   1580			update_backups(sb, gdb_bh->b_blocknr, gdb_bh->b_data,
   1581				       gdb_bh->b_size, meta_bg);
   1582			old_gdb = gdb_bh->b_blocknr;
   1583		}
   1584	}
   1585exit:
   1586	return err;
   1587}
   1588
   1589static int ext4_setup_next_flex_gd(struct super_block *sb,
   1590				    struct ext4_new_flex_group_data *flex_gd,
   1591				    ext4_fsblk_t n_blocks_count,
   1592				    unsigned long flexbg_size)
   1593{
   1594	struct ext4_sb_info *sbi = EXT4_SB(sb);
   1595	struct ext4_super_block *es = sbi->s_es;
   1596	struct ext4_new_group_data *group_data = flex_gd->groups;
   1597	ext4_fsblk_t o_blocks_count;
   1598	ext4_group_t n_group;
   1599	ext4_group_t group;
   1600	ext4_group_t last_group;
   1601	ext4_grpblk_t last;
   1602	ext4_grpblk_t clusters_per_group;
   1603	unsigned long i;
   1604
   1605	clusters_per_group = EXT4_CLUSTERS_PER_GROUP(sb);
   1606
   1607	o_blocks_count = ext4_blocks_count(es);
   1608
   1609	if (o_blocks_count == n_blocks_count)
   1610		return 0;
   1611
   1612	ext4_get_group_no_and_offset(sb, o_blocks_count, &group, &last);
   1613	BUG_ON(last);
   1614	ext4_get_group_no_and_offset(sb, n_blocks_count - 1, &n_group, &last);
   1615
   1616	last_group = group | (flexbg_size - 1);
   1617	if (last_group > n_group)
   1618		last_group = n_group;
   1619
   1620	flex_gd->count = last_group - group + 1;
   1621
   1622	for (i = 0; i < flex_gd->count; i++) {
   1623		int overhead;
   1624
   1625		group_data[i].group = group + i;
   1626		group_data[i].blocks_count = EXT4_BLOCKS_PER_GROUP(sb);
   1627		overhead = ext4_group_overhead_blocks(sb, group + i);
   1628		group_data[i].mdata_blocks = overhead;
   1629		group_data[i].free_clusters_count = EXT4_CLUSTERS_PER_GROUP(sb);
   1630		if (ext4_has_group_desc_csum(sb)) {
   1631			flex_gd->bg_flags[i] = EXT4_BG_BLOCK_UNINIT |
   1632					       EXT4_BG_INODE_UNINIT;
   1633			if (!test_opt(sb, INIT_INODE_TABLE))
   1634				flex_gd->bg_flags[i] |= EXT4_BG_INODE_ZEROED;
   1635		} else
   1636			flex_gd->bg_flags[i] = EXT4_BG_INODE_ZEROED;
   1637	}
   1638
   1639	if (last_group == n_group && ext4_has_group_desc_csum(sb))
   1640		/* We need to initialize block bitmap of last group. */
   1641		flex_gd->bg_flags[i - 1] &= ~EXT4_BG_BLOCK_UNINIT;
   1642
   1643	if ((last_group == n_group) && (last != clusters_per_group - 1)) {
   1644		group_data[i - 1].blocks_count = EXT4_C2B(sbi, last + 1);
   1645		group_data[i - 1].free_clusters_count -= clusters_per_group -
   1646						       last - 1;
   1647	}
   1648
   1649	return 1;
   1650}
   1651
   1652/* Add group descriptor data to an existing or new group descriptor block.
   1653 * Ensure we handle all possible error conditions _before_ we start modifying
   1654 * the filesystem, because we cannot abort the transaction and not have it
   1655 * write the data to disk.
   1656 *
   1657 * If we are on a GDT block boundary, we need to get the reserved GDT block.
   1658 * Otherwise, we may need to add backup GDT blocks for a sparse group.
   1659 *
   1660 * We only need to hold the superblock lock while we are actually adding
   1661 * in the new group's counts to the superblock.  Prior to that we have
   1662 * not really "added" the group at all.  We re-check that we are still
   1663 * adding in the last group in case things have changed since verifying.
   1664 */
   1665int ext4_group_add(struct super_block *sb, struct ext4_new_group_data *input)
   1666{
   1667	struct ext4_new_flex_group_data flex_gd;
   1668	struct ext4_sb_info *sbi = EXT4_SB(sb);
   1669	struct ext4_super_block *es = sbi->s_es;
   1670	int reserved_gdb = ext4_bg_has_super(sb, input->group) ?
   1671		le16_to_cpu(es->s_reserved_gdt_blocks) : 0;
   1672	struct inode *inode = NULL;
   1673	int gdb_off;
   1674	int err;
   1675	__u16 bg_flags = 0;
   1676
   1677	gdb_off = input->group % EXT4_DESC_PER_BLOCK(sb);
   1678
   1679	if (gdb_off == 0 && !ext4_has_feature_sparse_super(sb)) {
   1680		ext4_warning(sb, "Can't resize non-sparse filesystem further");
   1681		return -EPERM;
   1682	}
   1683
   1684	if (ext4_blocks_count(es) + input->blocks_count <
   1685	    ext4_blocks_count(es)) {
   1686		ext4_warning(sb, "blocks_count overflow");
   1687		return -EINVAL;
   1688	}
   1689
   1690	if (le32_to_cpu(es->s_inodes_count) + EXT4_INODES_PER_GROUP(sb) <
   1691	    le32_to_cpu(es->s_inodes_count)) {
   1692		ext4_warning(sb, "inodes_count overflow");
   1693		return -EINVAL;
   1694	}
   1695
   1696	if (reserved_gdb || gdb_off == 0) {
   1697		if (!ext4_has_feature_resize_inode(sb) ||
   1698		    !le16_to_cpu(es->s_reserved_gdt_blocks)) {
   1699			ext4_warning(sb,
   1700				     "No reserved GDT blocks, can't resize");
   1701			return -EPERM;
   1702		}
   1703		inode = ext4_iget(sb, EXT4_RESIZE_INO, EXT4_IGET_SPECIAL);
   1704		if (IS_ERR(inode)) {
   1705			ext4_warning(sb, "Error opening resize inode");
   1706			return PTR_ERR(inode);
   1707		}
   1708	}
   1709
   1710
   1711	err = verify_group_input(sb, input);
   1712	if (err)
   1713		goto out;
   1714
   1715	err = ext4_alloc_flex_bg_array(sb, input->group + 1);
   1716	if (err)
   1717		goto out;
   1718
   1719	err = ext4_mb_alloc_groupinfo(sb, input->group + 1);
   1720	if (err)
   1721		goto out;
   1722
   1723	flex_gd.count = 1;
   1724	flex_gd.groups = input;
   1725	flex_gd.bg_flags = &bg_flags;
   1726	err = ext4_flex_group_add(sb, inode, &flex_gd);
   1727out:
   1728	iput(inode);
   1729	return err;
   1730} /* ext4_group_add */
   1731
   1732/*
   1733 * extend a group without checking assuming that checking has been done.
   1734 */
   1735static int ext4_group_extend_no_check(struct super_block *sb,
   1736				      ext4_fsblk_t o_blocks_count, ext4_grpblk_t add)
   1737{
   1738	struct ext4_super_block *es = EXT4_SB(sb)->s_es;
   1739	handle_t *handle;
   1740	int err = 0, err2;
   1741
   1742	/* We will update the superblock, one block bitmap, and
   1743	 * one group descriptor via ext4_group_add_blocks().
   1744	 */
   1745	handle = ext4_journal_start_sb(sb, EXT4_HT_RESIZE, 3);
   1746	if (IS_ERR(handle)) {
   1747		err = PTR_ERR(handle);
   1748		ext4_warning(sb, "error %d on journal start", err);
   1749		return err;
   1750	}
   1751
   1752	BUFFER_TRACE(EXT4_SB(sb)->s_sbh, "get_write_access");
   1753	err = ext4_journal_get_write_access(handle, sb, EXT4_SB(sb)->s_sbh,
   1754					    EXT4_JTR_NONE);
   1755	if (err) {
   1756		ext4_warning(sb, "error %d on journal write access", err);
   1757		goto errout;
   1758	}
   1759
   1760	lock_buffer(EXT4_SB(sb)->s_sbh);
   1761	ext4_blocks_count_set(es, o_blocks_count + add);
   1762	ext4_free_blocks_count_set(es, ext4_free_blocks_count(es) + add);
   1763	ext4_superblock_csum_set(sb);
   1764	unlock_buffer(EXT4_SB(sb)->s_sbh);
   1765	ext4_debug("freeing blocks %llu through %llu\n", o_blocks_count,
   1766		   o_blocks_count + add);
   1767	/* We add the blocks to the bitmap and set the group need init bit */
   1768	err = ext4_group_add_blocks(handle, sb, o_blocks_count, add);
   1769	if (err)
   1770		goto errout;
   1771	ext4_handle_dirty_metadata(handle, NULL, EXT4_SB(sb)->s_sbh);
   1772	ext4_debug("freed blocks %llu through %llu\n", o_blocks_count,
   1773		   o_blocks_count + add);
   1774errout:
   1775	err2 = ext4_journal_stop(handle);
   1776	if (err2 && !err)
   1777		err = err2;
   1778
   1779	if (!err) {
   1780		if (test_opt(sb, DEBUG))
   1781			printk(KERN_DEBUG "EXT4-fs: extended group to %llu "
   1782			       "blocks\n", ext4_blocks_count(es));
   1783		update_backups(sb, EXT4_SB(sb)->s_sbh->b_blocknr,
   1784			       (char *)es, sizeof(struct ext4_super_block), 0);
   1785	}
   1786	return err;
   1787}
   1788
   1789/*
   1790 * Extend the filesystem to the new number of blocks specified.  This entry
   1791 * point is only used to extend the current filesystem to the end of the last
   1792 * existing group.  It can be accessed via ioctl, or by "remount,resize=<size>"
   1793 * for emergencies (because it has no dependencies on reserved blocks).
   1794 *
   1795 * If we _really_ wanted, we could use default values to call ext4_group_add()
   1796 * allow the "remount" trick to work for arbitrary resizing, assuming enough
   1797 * GDT blocks are reserved to grow to the desired size.
   1798 */
   1799int ext4_group_extend(struct super_block *sb, struct ext4_super_block *es,
   1800		      ext4_fsblk_t n_blocks_count)
   1801{
   1802	ext4_fsblk_t o_blocks_count;
   1803	ext4_grpblk_t last;
   1804	ext4_grpblk_t add;
   1805	struct buffer_head *bh;
   1806	int err;
   1807	ext4_group_t group;
   1808
   1809	o_blocks_count = ext4_blocks_count(es);
   1810
   1811	if (test_opt(sb, DEBUG))
   1812		ext4_msg(sb, KERN_DEBUG,
   1813			 "extending last group from %llu to %llu blocks",
   1814			 o_blocks_count, n_blocks_count);
   1815
   1816	if (n_blocks_count == 0 || n_blocks_count == o_blocks_count)
   1817		return 0;
   1818
   1819	if (n_blocks_count > (sector_t)(~0ULL) >> (sb->s_blocksize_bits - 9)) {
   1820		ext4_msg(sb, KERN_ERR,
   1821			 "filesystem too large to resize to %llu blocks safely",
   1822			 n_blocks_count);
   1823		return -EINVAL;
   1824	}
   1825
   1826	if (n_blocks_count < o_blocks_count) {
   1827		ext4_warning(sb, "can't shrink FS - resize aborted");
   1828		return -EINVAL;
   1829	}
   1830
   1831	/* Handle the remaining blocks in the last group only. */
   1832	ext4_get_group_no_and_offset(sb, o_blocks_count, &group, &last);
   1833
   1834	if (last == 0) {
   1835		ext4_warning(sb, "need to use ext2online to resize further");
   1836		return -EPERM;
   1837	}
   1838
   1839	add = EXT4_BLOCKS_PER_GROUP(sb) - last;
   1840
   1841	if (o_blocks_count + add < o_blocks_count) {
   1842		ext4_warning(sb, "blocks_count overflow");
   1843		return -EINVAL;
   1844	}
   1845
   1846	if (o_blocks_count + add > n_blocks_count)
   1847		add = n_blocks_count - o_blocks_count;
   1848
   1849	if (o_blocks_count + add < n_blocks_count)
   1850		ext4_warning(sb, "will only finish group (%llu blocks, %u new)",
   1851			     o_blocks_count + add, add);
   1852
   1853	/* See if the device is actually as big as what was requested */
   1854	bh = ext4_sb_bread(sb, o_blocks_count + add - 1, 0);
   1855	if (IS_ERR(bh)) {
   1856		ext4_warning(sb, "can't read last block, resize aborted");
   1857		return -ENOSPC;
   1858	}
   1859	brelse(bh);
   1860
   1861	err = ext4_group_extend_no_check(sb, o_blocks_count, add);
   1862	return err;
   1863} /* ext4_group_extend */
   1864
   1865
   1866static int num_desc_blocks(struct super_block *sb, ext4_group_t groups)
   1867{
   1868	return (groups + EXT4_DESC_PER_BLOCK(sb) - 1) / EXT4_DESC_PER_BLOCK(sb);
   1869}
   1870
   1871/*
   1872 * Release the resize inode and drop the resize_inode feature if there
   1873 * are no more reserved gdt blocks, and then convert the file system
   1874 * to enable meta_bg
   1875 */
   1876static int ext4_convert_meta_bg(struct super_block *sb, struct inode *inode)
   1877{
   1878	handle_t *handle;
   1879	struct ext4_sb_info *sbi = EXT4_SB(sb);
   1880	struct ext4_super_block *es = sbi->s_es;
   1881	struct ext4_inode_info *ei = EXT4_I(inode);
   1882	ext4_fsblk_t nr;
   1883	int i, ret, err = 0;
   1884	int credits = 1;
   1885
   1886	ext4_msg(sb, KERN_INFO, "Converting file system to meta_bg");
   1887	if (inode) {
   1888		if (es->s_reserved_gdt_blocks) {
   1889			ext4_error(sb, "Unexpected non-zero "
   1890				   "s_reserved_gdt_blocks");
   1891			return -EPERM;
   1892		}
   1893
   1894		/* Do a quick sanity check of the resize inode */
   1895		if (inode->i_blocks != 1 << (inode->i_blkbits -
   1896					     (9 - sbi->s_cluster_bits)))
   1897			goto invalid_resize_inode;
   1898		for (i = 0; i < EXT4_N_BLOCKS; i++) {
   1899			if (i == EXT4_DIND_BLOCK) {
   1900				if (ei->i_data[i])
   1901					continue;
   1902				else
   1903					goto invalid_resize_inode;
   1904			}
   1905			if (ei->i_data[i])
   1906				goto invalid_resize_inode;
   1907		}
   1908		credits += 3;	/* block bitmap, bg descriptor, resize inode */
   1909	}
   1910
   1911	handle = ext4_journal_start_sb(sb, EXT4_HT_RESIZE, credits);
   1912	if (IS_ERR(handle))
   1913		return PTR_ERR(handle);
   1914
   1915	BUFFER_TRACE(sbi->s_sbh, "get_write_access");
   1916	err = ext4_journal_get_write_access(handle, sb, sbi->s_sbh,
   1917					    EXT4_JTR_NONE);
   1918	if (err)
   1919		goto errout;
   1920
   1921	lock_buffer(sbi->s_sbh);
   1922	ext4_clear_feature_resize_inode(sb);
   1923	ext4_set_feature_meta_bg(sb);
   1924	sbi->s_es->s_first_meta_bg =
   1925		cpu_to_le32(num_desc_blocks(sb, sbi->s_groups_count));
   1926	ext4_superblock_csum_set(sb);
   1927	unlock_buffer(sbi->s_sbh);
   1928
   1929	err = ext4_handle_dirty_metadata(handle, NULL, sbi->s_sbh);
   1930	if (err) {
   1931		ext4_std_error(sb, err);
   1932		goto errout;
   1933	}
   1934
   1935	if (inode) {
   1936		nr = le32_to_cpu(ei->i_data[EXT4_DIND_BLOCK]);
   1937		ext4_free_blocks(handle, inode, NULL, nr, 1,
   1938				 EXT4_FREE_BLOCKS_METADATA |
   1939				 EXT4_FREE_BLOCKS_FORGET);
   1940		ei->i_data[EXT4_DIND_BLOCK] = 0;
   1941		inode->i_blocks = 0;
   1942
   1943		err = ext4_mark_inode_dirty(handle, inode);
   1944		if (err)
   1945			ext4_std_error(sb, err);
   1946	}
   1947
   1948errout:
   1949	ret = ext4_journal_stop(handle);
   1950	if (!err)
   1951		err = ret;
   1952	return ret;
   1953
   1954invalid_resize_inode:
   1955	ext4_error(sb, "corrupted/inconsistent resize inode");
   1956	return -EINVAL;
   1957}
   1958
   1959/*
   1960 * ext4_resize_fs() resizes a fs to new size specified by @n_blocks_count
   1961 *
   1962 * @sb: super block of the fs to be resized
   1963 * @n_blocks_count: the number of blocks resides in the resized fs
   1964 */
   1965int ext4_resize_fs(struct super_block *sb, ext4_fsblk_t n_blocks_count)
   1966{
   1967	struct ext4_new_flex_group_data *flex_gd = NULL;
   1968	struct ext4_sb_info *sbi = EXT4_SB(sb);
   1969	struct ext4_super_block *es = sbi->s_es;
   1970	struct buffer_head *bh;
   1971	struct inode *resize_inode = NULL;
   1972	ext4_grpblk_t add, offset;
   1973	unsigned long n_desc_blocks;
   1974	unsigned long o_desc_blocks;
   1975	ext4_group_t o_group;
   1976	ext4_group_t n_group;
   1977	ext4_fsblk_t o_blocks_count;
   1978	ext4_fsblk_t n_blocks_count_retry = 0;
   1979	unsigned long last_update_time = 0;
   1980	int err = 0, flexbg_size = 1 << sbi->s_log_groups_per_flex;
   1981	int meta_bg;
   1982
   1983	/* See if the device is actually as big as what was requested */
   1984	bh = ext4_sb_bread(sb, n_blocks_count - 1, 0);
   1985	if (IS_ERR(bh)) {
   1986		ext4_warning(sb, "can't read last block, resize aborted");
   1987		return -ENOSPC;
   1988	}
   1989	brelse(bh);
   1990
   1991retry:
   1992	o_blocks_count = ext4_blocks_count(es);
   1993
   1994	ext4_msg(sb, KERN_INFO, "resizing filesystem from %llu "
   1995		 "to %llu blocks", o_blocks_count, n_blocks_count);
   1996
   1997	if (n_blocks_count < o_blocks_count) {
   1998		/* On-line shrinking not supported */
   1999		ext4_warning(sb, "can't shrink FS - resize aborted");
   2000		return -EINVAL;
   2001	}
   2002
   2003	if (n_blocks_count == o_blocks_count)
   2004		/* Nothing need to do */
   2005		return 0;
   2006
   2007	n_group = ext4_get_group_number(sb, n_blocks_count - 1);
   2008	if (n_group >= (0xFFFFFFFFUL / EXT4_INODES_PER_GROUP(sb))) {
   2009		ext4_warning(sb, "resize would cause inodes_count overflow");
   2010		return -EINVAL;
   2011	}
   2012	ext4_get_group_no_and_offset(sb, o_blocks_count - 1, &o_group, &offset);
   2013
   2014	n_desc_blocks = num_desc_blocks(sb, n_group + 1);
   2015	o_desc_blocks = num_desc_blocks(sb, sbi->s_groups_count);
   2016
   2017	meta_bg = ext4_has_feature_meta_bg(sb);
   2018
   2019	if (ext4_has_feature_resize_inode(sb)) {
   2020		if (meta_bg) {
   2021			ext4_error(sb, "resize_inode and meta_bg enabled "
   2022				   "simultaneously");
   2023			return -EINVAL;
   2024		}
   2025		if (n_desc_blocks > o_desc_blocks +
   2026		    le16_to_cpu(es->s_reserved_gdt_blocks)) {
   2027			n_blocks_count_retry = n_blocks_count;
   2028			n_desc_blocks = o_desc_blocks +
   2029				le16_to_cpu(es->s_reserved_gdt_blocks);
   2030			n_group = n_desc_blocks * EXT4_DESC_PER_BLOCK(sb);
   2031			n_blocks_count = (ext4_fsblk_t)n_group *
   2032				EXT4_BLOCKS_PER_GROUP(sb) +
   2033				le32_to_cpu(es->s_first_data_block);
   2034			n_group--; /* set to last group number */
   2035		}
   2036
   2037		if (!resize_inode)
   2038			resize_inode = ext4_iget(sb, EXT4_RESIZE_INO,
   2039						 EXT4_IGET_SPECIAL);
   2040		if (IS_ERR(resize_inode)) {
   2041			ext4_warning(sb, "Error opening resize inode");
   2042			return PTR_ERR(resize_inode);
   2043		}
   2044	}
   2045
   2046	if ((!resize_inode && !meta_bg) || n_blocks_count == o_blocks_count) {
   2047		err = ext4_convert_meta_bg(sb, resize_inode);
   2048		if (err)
   2049			goto out;
   2050		if (resize_inode) {
   2051			iput(resize_inode);
   2052			resize_inode = NULL;
   2053		}
   2054		if (n_blocks_count_retry) {
   2055			n_blocks_count = n_blocks_count_retry;
   2056			n_blocks_count_retry = 0;
   2057			goto retry;
   2058		}
   2059	}
   2060
   2061	/*
   2062	 * Make sure the last group has enough space so that it's
   2063	 * guaranteed to have enough space for all metadata blocks
   2064	 * that it might need to hold.  (We might not need to store
   2065	 * the inode table blocks in the last block group, but there
   2066	 * will be cases where this might be needed.)
   2067	 */
   2068	if ((ext4_group_first_block_no(sb, n_group) +
   2069	     ext4_group_overhead_blocks(sb, n_group) + 2 +
   2070	     sbi->s_itb_per_group + sbi->s_cluster_ratio) >= n_blocks_count) {
   2071		n_blocks_count = ext4_group_first_block_no(sb, n_group);
   2072		n_group--;
   2073		n_blocks_count_retry = 0;
   2074		if (resize_inode) {
   2075			iput(resize_inode);
   2076			resize_inode = NULL;
   2077		}
   2078		goto retry;
   2079	}
   2080
   2081	/* extend the last group */
   2082	if (n_group == o_group)
   2083		add = n_blocks_count - o_blocks_count;
   2084	else
   2085		add = EXT4_C2B(sbi, EXT4_CLUSTERS_PER_GROUP(sb) - (offset + 1));
   2086	if (add > 0) {
   2087		err = ext4_group_extend_no_check(sb, o_blocks_count, add);
   2088		if (err)
   2089			goto out;
   2090	}
   2091
   2092	if (ext4_blocks_count(es) == n_blocks_count)
   2093		goto out;
   2094
   2095	err = ext4_alloc_flex_bg_array(sb, n_group + 1);
   2096	if (err)
   2097		goto out;
   2098
   2099	err = ext4_mb_alloc_groupinfo(sb, n_group + 1);
   2100	if (err)
   2101		goto out;
   2102
   2103	flex_gd = alloc_flex_gd(flexbg_size);
   2104	if (flex_gd == NULL) {
   2105		err = -ENOMEM;
   2106		goto out;
   2107	}
   2108
   2109	/* Add flex groups. Note that a regular group is a
   2110	 * flex group with 1 group.
   2111	 */
   2112	while (ext4_setup_next_flex_gd(sb, flex_gd, n_blocks_count,
   2113					      flexbg_size)) {
   2114		if (time_is_before_jiffies(last_update_time + HZ * 10)) {
   2115			if (last_update_time)
   2116				ext4_msg(sb, KERN_INFO,
   2117					 "resized to %llu blocks",
   2118					 ext4_blocks_count(es));
   2119			last_update_time = jiffies;
   2120		}
   2121		if (ext4_alloc_group_tables(sb, flex_gd, flexbg_size) != 0)
   2122			break;
   2123		err = ext4_flex_group_add(sb, resize_inode, flex_gd);
   2124		if (unlikely(err))
   2125			break;
   2126	}
   2127
   2128	if (!err && n_blocks_count_retry) {
   2129		n_blocks_count = n_blocks_count_retry;
   2130		n_blocks_count_retry = 0;
   2131		free_flex_gd(flex_gd);
   2132		flex_gd = NULL;
   2133		if (resize_inode) {
   2134			iput(resize_inode);
   2135			resize_inode = NULL;
   2136		}
   2137		goto retry;
   2138	}
   2139
   2140out:
   2141	if (flex_gd)
   2142		free_flex_gd(flex_gd);
   2143	if (resize_inode != NULL)
   2144		iput(resize_inode);
   2145	if (err)
   2146		ext4_warning(sb, "error (%d) occurred during "
   2147			     "file system resize", err);
   2148	ext4_msg(sb, KERN_INFO, "resized filesystem to %llu",
   2149		 ext4_blocks_count(es));
   2150	return err;
   2151}