cachepc-linux

Fork of AMDESE/linux with modifications for CachePC side-channel attack
git clone https://git.sinitax.com/sinitax/cachepc-linux
Log | Files | Refs | README | LICENSE | sfeed.txt

bitmap.c (40551B)


      1/*
      2 * Copyright 2000 by Hans Reiser, licensing governed by reiserfs/README
      3 */
      4/* Reiserfs block (de)allocator, bitmap-based. */
      5
      6#include <linux/time.h>
      7#include "reiserfs.h"
      8#include <linux/errno.h>
      9#include <linux/buffer_head.h>
     10#include <linux/kernel.h>
     11#include <linux/pagemap.h>
     12#include <linux/vmalloc.h>
     13#include <linux/quotaops.h>
     14#include <linux/seq_file.h>
     15
     16#define PREALLOCATION_SIZE 9
     17
     18/* different reiserfs block allocator options */
     19
     20#define SB_ALLOC_OPTS(s) (REISERFS_SB(s)->s_alloc_options.bits)
     21
     22#define  _ALLOC_concentrating_formatted_nodes 0
     23#define  _ALLOC_displacing_large_files 1
     24#define  _ALLOC_displacing_new_packing_localities 2
     25#define  _ALLOC_old_hashed_relocation 3
     26#define  _ALLOC_new_hashed_relocation 4
     27#define  _ALLOC_skip_busy 5
     28#define  _ALLOC_displace_based_on_dirid 6
     29#define  _ALLOC_hashed_formatted_nodes 7
     30#define  _ALLOC_old_way 8
     31#define  _ALLOC_hundredth_slices 9
     32#define  _ALLOC_dirid_groups 10
     33#define  _ALLOC_oid_groups 11
     34#define  _ALLOC_packing_groups 12
     35
     36#define  concentrating_formatted_nodes(s)	test_bit(_ALLOC_concentrating_formatted_nodes, &SB_ALLOC_OPTS(s))
     37#define  displacing_large_files(s)		test_bit(_ALLOC_displacing_large_files, &SB_ALLOC_OPTS(s))
     38#define  displacing_new_packing_localities(s)	test_bit(_ALLOC_displacing_new_packing_localities, &SB_ALLOC_OPTS(s))
     39
     40#define SET_OPTION(optname) \
     41   do { \
     42	reiserfs_info(s, "block allocator option \"%s\" is set", #optname); \
     43	set_bit(_ALLOC_ ## optname , &SB_ALLOC_OPTS(s)); \
     44    } while(0)
     45#define TEST_OPTION(optname, s) \
     46    test_bit(_ALLOC_ ## optname , &SB_ALLOC_OPTS(s))
     47
     48static inline void get_bit_address(struct super_block *s,
     49				   b_blocknr_t block,
     50				   unsigned int *bmap_nr,
     51				   unsigned int *offset)
     52{
     53	/*
     54	 * It is in the bitmap block number equal to the block
     55	 * number divided by the number of bits in a block.
     56	 */
     57	*bmap_nr = block >> (s->s_blocksize_bits + 3);
     58	/* Within that bitmap block it is located at bit offset *offset. */
     59	*offset = block & ((s->s_blocksize << 3) - 1);
     60}
     61
     62int is_reusable(struct super_block *s, b_blocknr_t block, int bit_value)
     63{
     64	unsigned int bmap, offset;
     65	unsigned int bmap_count = reiserfs_bmap_count(s);
     66
     67	if (block == 0 || block >= SB_BLOCK_COUNT(s)) {
     68		reiserfs_error(s, "vs-4010",
     69			       "block number is out of range %lu (%u)",
     70			       block, SB_BLOCK_COUNT(s));
     71		return 0;
     72	}
     73
     74	get_bit_address(s, block, &bmap, &offset);
     75
     76	/*
     77	 * Old format filesystem? Unlikely, but the bitmaps are all
     78	 * up front so we need to account for it.
     79	 */
     80	if (unlikely(test_bit(REISERFS_OLD_FORMAT,
     81			      &REISERFS_SB(s)->s_properties))) {
     82		b_blocknr_t bmap1 = REISERFS_SB(s)->s_sbh->b_blocknr + 1;
     83		if (block >= bmap1 &&
     84		    block <= bmap1 + bmap_count) {
     85			reiserfs_error(s, "vs-4019", "bitmap block %lu(%u) "
     86				       "can't be freed or reused",
     87				       block, bmap_count);
     88			return 0;
     89		}
     90	} else {
     91		if (offset == 0) {
     92			reiserfs_error(s, "vs-4020", "bitmap block %lu(%u) "
     93				       "can't be freed or reused",
     94				       block, bmap_count);
     95			return 0;
     96		}
     97	}
     98
     99	if (bmap >= bmap_count) {
    100		reiserfs_error(s, "vs-4030", "bitmap for requested block "
    101			       "is out of range: block=%lu, bitmap_nr=%u",
    102			       block, bmap);
    103		return 0;
    104	}
    105
    106	if (bit_value == 0 && block == SB_ROOT_BLOCK(s)) {
    107		reiserfs_error(s, "vs-4050", "this is root block (%u), "
    108			       "it must be busy", SB_ROOT_BLOCK(s));
    109		return 0;
    110	}
    111
    112	return 1;
    113}
    114
    115/*
    116 * Searches in journal structures for a given block number (bmap, off).
    117 * If block is found in reiserfs journal it suggests next free block
    118 * candidate to test.
    119 */
    120static inline int is_block_in_journal(struct super_block *s, unsigned int bmap,
    121				      int off, int *next)
    122{
    123	b_blocknr_t tmp;
    124
    125	if (reiserfs_in_journal(s, bmap, off, 1, &tmp)) {
    126		if (tmp) {	/* hint supplied */
    127			*next = tmp;
    128			PROC_INFO_INC(s, scan_bitmap.in_journal_hint);
    129		} else {
    130			(*next) = off + 1;  /* inc offset to avoid looping. */
    131			PROC_INFO_INC(s, scan_bitmap.in_journal_nohint);
    132		}
    133		PROC_INFO_INC(s, scan_bitmap.retry);
    134		return 1;
    135	}
    136	return 0;
    137}
    138
    139/*
    140 * Searches for a window of zero bits with given minimum and maximum
    141 * lengths in one bitmap block
    142 */
    143static int scan_bitmap_block(struct reiserfs_transaction_handle *th,
    144			     unsigned int bmap_n, int *beg, int boundary,
    145			     int min, int max, int unfm)
    146{
    147	struct super_block *s = th->t_super;
    148	struct reiserfs_bitmap_info *bi = &SB_AP_BITMAP(s)[bmap_n];
    149	struct buffer_head *bh;
    150	int end, next;
    151	int org = *beg;
    152
    153	BUG_ON(!th->t_trans_id);
    154	RFALSE(bmap_n >= reiserfs_bmap_count(s), "Bitmap %u is out of "
    155	       "range (0..%u)", bmap_n, reiserfs_bmap_count(s) - 1);
    156	PROC_INFO_INC(s, scan_bitmap.bmap);
    157
    158	if (!bi) {
    159		reiserfs_error(s, "jdm-4055", "NULL bitmap info pointer "
    160			       "for bitmap %d", bmap_n);
    161		return 0;
    162	}
    163
    164	bh = reiserfs_read_bitmap_block(s, bmap_n);
    165	if (bh == NULL)
    166		return 0;
    167
    168	while (1) {
    169cont:
    170		if (bi->free_count < min) {
    171			brelse(bh);
    172			return 0;	/* No free blocks in this bitmap */
    173		}
    174
    175		/* search for a first zero bit -- beginning of a window */
    176		*beg = reiserfs_find_next_zero_le_bit
    177		    ((unsigned long *)(bh->b_data), boundary, *beg);
    178
    179		/*
    180		 * search for a zero bit fails or the rest of bitmap block
    181		 * cannot contain a zero window of minimum size
    182		 */
    183		if (*beg + min > boundary) {
    184			brelse(bh);
    185			return 0;
    186		}
    187
    188		if (unfm && is_block_in_journal(s, bmap_n, *beg, beg))
    189			continue;
    190		/* first zero bit found; we check next bits */
    191		for (end = *beg + 1;; end++) {
    192			if (end >= *beg + max || end >= boundary
    193			    || reiserfs_test_le_bit(end, bh->b_data)) {
    194				next = end;
    195				break;
    196			}
    197
    198			/*
    199			 * finding the other end of zero bit window requires
    200			 * looking into journal structures (in case of
    201			 * searching for free blocks for unformatted nodes)
    202			 */
    203			if (unfm && is_block_in_journal(s, bmap_n, end, &next))
    204				break;
    205		}
    206
    207		/*
    208		 * now (*beg) points to beginning of zero bits window,
    209		 * (end) points to one bit after the window end
    210		 */
    211
    212		/* found window of proper size */
    213		if (end - *beg >= min) {
    214			int i;
    215			reiserfs_prepare_for_journal(s, bh, 1);
    216			/*
    217			 * try to set all blocks used checking are
    218			 * they still free
    219			 */
    220			for (i = *beg; i < end; i++) {
    221				/* Don't check in journal again. */
    222				if (reiserfs_test_and_set_le_bit
    223				    (i, bh->b_data)) {
    224					/*
    225					 * bit was set by another process while
    226					 * we slept in prepare_for_journal()
    227					 */
    228					PROC_INFO_INC(s, scan_bitmap.stolen);
    229
    230					/*
    231					 * we can continue with smaller set
    232					 * of allocated blocks, if length of
    233					 * this set is more or equal to `min'
    234					 */
    235					if (i >= *beg + min) {
    236						end = i;
    237						break;
    238					}
    239
    240					/*
    241					 * otherwise we clear all bit
    242					 * were set ...
    243					 */
    244					while (--i >= *beg)
    245						reiserfs_clear_le_bit
    246						    (i, bh->b_data);
    247					reiserfs_restore_prepared_buffer(s, bh);
    248					*beg = org;
    249
    250					/*
    251					 * Search again in current block
    252					 * from beginning
    253					 */
    254					goto cont;
    255				}
    256			}
    257			bi->free_count -= (end - *beg);
    258			journal_mark_dirty(th, bh);
    259			brelse(bh);
    260
    261			/* free block count calculation */
    262			reiserfs_prepare_for_journal(s, SB_BUFFER_WITH_SB(s),
    263						     1);
    264			PUT_SB_FREE_BLOCKS(s, SB_FREE_BLOCKS(s) - (end - *beg));
    265			journal_mark_dirty(th, SB_BUFFER_WITH_SB(s));
    266
    267			return end - (*beg);
    268		} else {
    269			*beg = next;
    270		}
    271	}
    272}
    273
    274static int bmap_hash_id(struct super_block *s, u32 id)
    275{
    276	char *hash_in = NULL;
    277	unsigned long hash;
    278	unsigned bm;
    279
    280	if (id <= 2) {
    281		bm = 1;
    282	} else {
    283		hash_in = (char *)(&id);
    284		hash = keyed_hash(hash_in, 4);
    285		bm = hash % reiserfs_bmap_count(s);
    286		if (!bm)
    287			bm = 1;
    288	}
    289	/* this can only be true when SB_BMAP_NR = 1 */
    290	if (bm >= reiserfs_bmap_count(s))
    291		bm = 0;
    292	return bm;
    293}
    294
    295/*
    296 * hashes the id and then returns > 0 if the block group for the
    297 * corresponding hash is full
    298 */
    299static inline int block_group_used(struct super_block *s, u32 id)
    300{
    301	int bm = bmap_hash_id(s, id);
    302	struct reiserfs_bitmap_info *info = &SB_AP_BITMAP(s)[bm];
    303
    304	/*
    305	 * If we don't have cached information on this bitmap block, we're
    306	 * going to have to load it later anyway. Loading it here allows us
    307	 * to make a better decision. This favors long-term performance gain
    308	 * with a better on-disk layout vs. a short term gain of skipping the
    309	 * read and potentially having a bad placement.
    310	 */
    311	if (info->free_count == UINT_MAX) {
    312		struct buffer_head *bh = reiserfs_read_bitmap_block(s, bm);
    313		brelse(bh);
    314	}
    315
    316	if (info->free_count > ((s->s_blocksize << 3) * 60 / 100)) {
    317		return 0;
    318	}
    319	return 1;
    320}
    321
    322/*
    323 * the packing is returned in disk byte order
    324 */
    325__le32 reiserfs_choose_packing(struct inode * dir)
    326{
    327	__le32 packing;
    328	if (TEST_OPTION(packing_groups, dir->i_sb)) {
    329		u32 parent_dir = le32_to_cpu(INODE_PKEY(dir)->k_dir_id);
    330		/*
    331		 * some versions of reiserfsck expect packing locality 1 to be
    332		 * special
    333		 */
    334		if (parent_dir == 1 || block_group_used(dir->i_sb, parent_dir))
    335			packing = INODE_PKEY(dir)->k_objectid;
    336		else
    337			packing = INODE_PKEY(dir)->k_dir_id;
    338	} else
    339		packing = INODE_PKEY(dir)->k_objectid;
    340	return packing;
    341}
    342
    343/*
    344 * Tries to find contiguous zero bit window (given size) in given region of
    345 * bitmap and place new blocks there. Returns number of allocated blocks.
    346 */
    347static int scan_bitmap(struct reiserfs_transaction_handle *th,
    348		       b_blocknr_t * start, b_blocknr_t finish,
    349		       int min, int max, int unfm, sector_t file_block)
    350{
    351	int nr_allocated = 0;
    352	struct super_block *s = th->t_super;
    353	unsigned int bm, off;
    354	unsigned int end_bm, end_off;
    355	unsigned int off_max = s->s_blocksize << 3;
    356
    357	BUG_ON(!th->t_trans_id);
    358	PROC_INFO_INC(s, scan_bitmap.call);
    359
    360	/* No point in looking for more free blocks */
    361	if (SB_FREE_BLOCKS(s) <= 0)
    362		return 0;
    363
    364	get_bit_address(s, *start, &bm, &off);
    365	get_bit_address(s, finish, &end_bm, &end_off);
    366	if (bm > reiserfs_bmap_count(s))
    367		return 0;
    368	if (end_bm > reiserfs_bmap_count(s))
    369		end_bm = reiserfs_bmap_count(s);
    370
    371	/*
    372	 * When the bitmap is more than 10% free, anyone can allocate.
    373	 * When it's less than 10% free, only files that already use the
    374	 * bitmap are allowed. Once we pass 80% full, this restriction
    375	 * is lifted.
    376	 *
    377	 * We do this so that files that grow later still have space close to
    378	 * their original allocation. This improves locality, and presumably
    379	 * performance as a result.
    380	 *
    381	 * This is only an allocation policy and does not make up for getting a
    382	 * bad hint. Decent hinting must be implemented for this to work well.
    383	 */
    384	if (TEST_OPTION(skip_busy, s)
    385	    && SB_FREE_BLOCKS(s) > SB_BLOCK_COUNT(s) / 20) {
    386		for (; bm < end_bm; bm++, off = 0) {
    387			if ((off && (!unfm || (file_block != 0)))
    388			    || SB_AP_BITMAP(s)[bm].free_count >
    389			    (s->s_blocksize << 3) / 10)
    390				nr_allocated =
    391				    scan_bitmap_block(th, bm, &off, off_max,
    392						      min, max, unfm);
    393			if (nr_allocated)
    394				goto ret;
    395		}
    396		/* we know from above that start is a reasonable number */
    397		get_bit_address(s, *start, &bm, &off);
    398	}
    399
    400	for (; bm < end_bm; bm++, off = 0) {
    401		nr_allocated =
    402		    scan_bitmap_block(th, bm, &off, off_max, min, max, unfm);
    403		if (nr_allocated)
    404			goto ret;
    405	}
    406
    407	nr_allocated =
    408	    scan_bitmap_block(th, bm, &off, end_off + 1, min, max, unfm);
    409
    410ret:
    411	*start = bm * off_max + off;
    412	return nr_allocated;
    413
    414}
    415
    416static void _reiserfs_free_block(struct reiserfs_transaction_handle *th,
    417				 struct inode *inode, b_blocknr_t block,
    418				 int for_unformatted)
    419{
    420	struct super_block *s = th->t_super;
    421	struct reiserfs_super_block *rs;
    422	struct buffer_head *sbh, *bmbh;
    423	struct reiserfs_bitmap_info *apbi;
    424	unsigned int nr, offset;
    425
    426	BUG_ON(!th->t_trans_id);
    427	PROC_INFO_INC(s, free_block);
    428	rs = SB_DISK_SUPER_BLOCK(s);
    429	sbh = SB_BUFFER_WITH_SB(s);
    430	apbi = SB_AP_BITMAP(s);
    431
    432	get_bit_address(s, block, &nr, &offset);
    433
    434	if (nr >= reiserfs_bmap_count(s)) {
    435		reiserfs_error(s, "vs-4075", "block %lu is out of range",
    436			       block);
    437		return;
    438	}
    439
    440	bmbh = reiserfs_read_bitmap_block(s, nr);
    441	if (!bmbh)
    442		return;
    443
    444	reiserfs_prepare_for_journal(s, bmbh, 1);
    445
    446	/* clear bit for the given block in bit map */
    447	if (!reiserfs_test_and_clear_le_bit(offset, bmbh->b_data)) {
    448		reiserfs_error(s, "vs-4080",
    449			       "block %lu: bit already cleared", block);
    450	}
    451	apbi[nr].free_count++;
    452	journal_mark_dirty(th, bmbh);
    453	brelse(bmbh);
    454
    455	reiserfs_prepare_for_journal(s, sbh, 1);
    456	/* update super block */
    457	set_sb_free_blocks(rs, sb_free_blocks(rs) + 1);
    458
    459	journal_mark_dirty(th, sbh);
    460	if (for_unformatted) {
    461		int depth = reiserfs_write_unlock_nested(s);
    462		dquot_free_block_nodirty(inode, 1);
    463		reiserfs_write_lock_nested(s, depth);
    464	}
    465}
    466
    467void reiserfs_free_block(struct reiserfs_transaction_handle *th,
    468			 struct inode *inode, b_blocknr_t block,
    469			 int for_unformatted)
    470{
    471	struct super_block *s = th->t_super;
    472
    473	BUG_ON(!th->t_trans_id);
    474	RFALSE(!s, "vs-4061: trying to free block on nonexistent device");
    475	if (!is_reusable(s, block, 1))
    476		return;
    477
    478	if (block > sb_block_count(REISERFS_SB(s)->s_rs)) {
    479		reiserfs_error(th->t_super, "bitmap-4072",
    480			       "Trying to free block outside file system "
    481			       "boundaries (%lu > %lu)",
    482			       block, sb_block_count(REISERFS_SB(s)->s_rs));
    483		return;
    484	}
    485	/* mark it before we clear it, just in case */
    486	journal_mark_freed(th, s, block);
    487	_reiserfs_free_block(th, inode, block, for_unformatted);
    488}
    489
    490/* preallocated blocks don't need to be run through journal_mark_freed */
    491static void reiserfs_free_prealloc_block(struct reiserfs_transaction_handle *th,
    492					 struct inode *inode, b_blocknr_t block)
    493{
    494	BUG_ON(!th->t_trans_id);
    495	RFALSE(!th->t_super,
    496	       "vs-4060: trying to free block on nonexistent device");
    497	if (!is_reusable(th->t_super, block, 1))
    498		return;
    499	_reiserfs_free_block(th, inode, block, 1);
    500}
    501
    502static void __discard_prealloc(struct reiserfs_transaction_handle *th,
    503			       struct reiserfs_inode_info *ei)
    504{
    505	unsigned long save = ei->i_prealloc_block;
    506	int dirty = 0;
    507	struct inode *inode = &ei->vfs_inode;
    508
    509	BUG_ON(!th->t_trans_id);
    510#ifdef CONFIG_REISERFS_CHECK
    511	if (ei->i_prealloc_count < 0)
    512		reiserfs_error(th->t_super, "zam-4001",
    513			       "inode has negative prealloc blocks count.");
    514#endif
    515	while (ei->i_prealloc_count > 0) {
    516		b_blocknr_t block_to_free;
    517
    518		/*
    519		 * reiserfs_free_prealloc_block can drop the write lock,
    520		 * which could allow another caller to free the same block.
    521		 * We can protect against it by modifying the prealloc
    522		 * state before calling it.
    523		 */
    524		block_to_free = ei->i_prealloc_block++;
    525		ei->i_prealloc_count--;
    526		reiserfs_free_prealloc_block(th, inode, block_to_free);
    527		dirty = 1;
    528	}
    529	if (dirty)
    530		reiserfs_update_sd(th, inode);
    531	ei->i_prealloc_block = save;
    532	list_del_init(&ei->i_prealloc_list);
    533}
    534
    535/* FIXME: It should be inline function */
    536void reiserfs_discard_prealloc(struct reiserfs_transaction_handle *th,
    537			       struct inode *inode)
    538{
    539	struct reiserfs_inode_info *ei = REISERFS_I(inode);
    540
    541	BUG_ON(!th->t_trans_id);
    542	if (ei->i_prealloc_count)
    543		__discard_prealloc(th, ei);
    544}
    545
    546void reiserfs_discard_all_prealloc(struct reiserfs_transaction_handle *th)
    547{
    548	struct list_head *plist = &SB_JOURNAL(th->t_super)->j_prealloc_list;
    549
    550	BUG_ON(!th->t_trans_id);
    551	while (!list_empty(plist)) {
    552		struct reiserfs_inode_info *ei;
    553		ei = list_entry(plist->next, struct reiserfs_inode_info,
    554				i_prealloc_list);
    555#ifdef CONFIG_REISERFS_CHECK
    556		if (!ei->i_prealloc_count) {
    557			reiserfs_error(th->t_super, "zam-4001",
    558				       "inode is in prealloc list but has "
    559				       "no preallocated blocks.");
    560		}
    561#endif
    562		__discard_prealloc(th, ei);
    563	}
    564}
    565
    566void reiserfs_init_alloc_options(struct super_block *s)
    567{
    568	set_bit(_ALLOC_skip_busy, &SB_ALLOC_OPTS(s));
    569	set_bit(_ALLOC_dirid_groups, &SB_ALLOC_OPTS(s));
    570	set_bit(_ALLOC_packing_groups, &SB_ALLOC_OPTS(s));
    571}
    572
    573/* block allocator related options are parsed here */
    574int reiserfs_parse_alloc_options(struct super_block *s, char *options)
    575{
    576	char *this_char, *value;
    577
    578	/* clear default settings */
    579	REISERFS_SB(s)->s_alloc_options.bits = 0;
    580
    581	while ((this_char = strsep(&options, ":")) != NULL) {
    582		if ((value = strchr(this_char, '=')) != NULL)
    583			*value++ = 0;
    584
    585		if (!strcmp(this_char, "concentrating_formatted_nodes")) {
    586			int temp;
    587			SET_OPTION(concentrating_formatted_nodes);
    588			temp = (value
    589				&& *value) ? simple_strtoul(value, &value,
    590							    0) : 10;
    591			if (temp <= 0 || temp > 100) {
    592				REISERFS_SB(s)->s_alloc_options.border = 10;
    593			} else {
    594				REISERFS_SB(s)->s_alloc_options.border =
    595				    100 / temp;
    596			}
    597			continue;
    598		}
    599		if (!strcmp(this_char, "displacing_large_files")) {
    600			SET_OPTION(displacing_large_files);
    601			REISERFS_SB(s)->s_alloc_options.large_file_size =
    602			    (value
    603			     && *value) ? simple_strtoul(value, &value, 0) : 16;
    604			continue;
    605		}
    606		if (!strcmp(this_char, "displacing_new_packing_localities")) {
    607			SET_OPTION(displacing_new_packing_localities);
    608			continue;
    609		}
    610
    611		if (!strcmp(this_char, "old_hashed_relocation")) {
    612			SET_OPTION(old_hashed_relocation);
    613			continue;
    614		}
    615
    616		if (!strcmp(this_char, "new_hashed_relocation")) {
    617			SET_OPTION(new_hashed_relocation);
    618			continue;
    619		}
    620
    621		if (!strcmp(this_char, "dirid_groups")) {
    622			SET_OPTION(dirid_groups);
    623			continue;
    624		}
    625		if (!strcmp(this_char, "oid_groups")) {
    626			SET_OPTION(oid_groups);
    627			continue;
    628		}
    629		if (!strcmp(this_char, "packing_groups")) {
    630			SET_OPTION(packing_groups);
    631			continue;
    632		}
    633		if (!strcmp(this_char, "hashed_formatted_nodes")) {
    634			SET_OPTION(hashed_formatted_nodes);
    635			continue;
    636		}
    637
    638		if (!strcmp(this_char, "skip_busy")) {
    639			SET_OPTION(skip_busy);
    640			continue;
    641		}
    642
    643		if (!strcmp(this_char, "hundredth_slices")) {
    644			SET_OPTION(hundredth_slices);
    645			continue;
    646		}
    647
    648		if (!strcmp(this_char, "old_way")) {
    649			SET_OPTION(old_way);
    650			continue;
    651		}
    652
    653		if (!strcmp(this_char, "displace_based_on_dirid")) {
    654			SET_OPTION(displace_based_on_dirid);
    655			continue;
    656		}
    657
    658		if (!strcmp(this_char, "preallocmin")) {
    659			REISERFS_SB(s)->s_alloc_options.preallocmin =
    660			    (value
    661			     && *value) ? simple_strtoul(value, &value, 0) : 4;
    662			continue;
    663		}
    664
    665		if (!strcmp(this_char, "preallocsize")) {
    666			REISERFS_SB(s)->s_alloc_options.preallocsize =
    667			    (value
    668			     && *value) ? simple_strtoul(value, &value,
    669							 0) :
    670			    PREALLOCATION_SIZE;
    671			continue;
    672		}
    673
    674		reiserfs_warning(s, "zam-4001", "unknown option - %s",
    675				 this_char);
    676		return 1;
    677	}
    678
    679	reiserfs_info(s, "allocator options = [%08x]\n", SB_ALLOC_OPTS(s));
    680	return 0;
    681}
    682
    683static void print_sep(struct seq_file *seq, int *first)
    684{
    685	if (!*first)
    686		seq_puts(seq, ":");
    687	else
    688		*first = 0;
    689}
    690
    691void show_alloc_options(struct seq_file *seq, struct super_block *s)
    692{
    693	int first = 1;
    694
    695	if (SB_ALLOC_OPTS(s) == ((1 << _ALLOC_skip_busy) |
    696		(1 << _ALLOC_dirid_groups) | (1 << _ALLOC_packing_groups)))
    697		return;
    698
    699	seq_puts(seq, ",alloc=");
    700
    701	if (TEST_OPTION(concentrating_formatted_nodes, s)) {
    702		print_sep(seq, &first);
    703		if (REISERFS_SB(s)->s_alloc_options.border != 10) {
    704			seq_printf(seq, "concentrating_formatted_nodes=%d",
    705				100 / REISERFS_SB(s)->s_alloc_options.border);
    706		} else
    707			seq_puts(seq, "concentrating_formatted_nodes");
    708	}
    709	if (TEST_OPTION(displacing_large_files, s)) {
    710		print_sep(seq, &first);
    711		if (REISERFS_SB(s)->s_alloc_options.large_file_size != 16) {
    712			seq_printf(seq, "displacing_large_files=%lu",
    713			    REISERFS_SB(s)->s_alloc_options.large_file_size);
    714		} else
    715			seq_puts(seq, "displacing_large_files");
    716	}
    717	if (TEST_OPTION(displacing_new_packing_localities, s)) {
    718		print_sep(seq, &first);
    719		seq_puts(seq, "displacing_new_packing_localities");
    720	}
    721	if (TEST_OPTION(old_hashed_relocation, s)) {
    722		print_sep(seq, &first);
    723		seq_puts(seq, "old_hashed_relocation");
    724	}
    725	if (TEST_OPTION(new_hashed_relocation, s)) {
    726		print_sep(seq, &first);
    727		seq_puts(seq, "new_hashed_relocation");
    728	}
    729	if (TEST_OPTION(dirid_groups, s)) {
    730		print_sep(seq, &first);
    731		seq_puts(seq, "dirid_groups");
    732	}
    733	if (TEST_OPTION(oid_groups, s)) {
    734		print_sep(seq, &first);
    735		seq_puts(seq, "oid_groups");
    736	}
    737	if (TEST_OPTION(packing_groups, s)) {
    738		print_sep(seq, &first);
    739		seq_puts(seq, "packing_groups");
    740	}
    741	if (TEST_OPTION(hashed_formatted_nodes, s)) {
    742		print_sep(seq, &first);
    743		seq_puts(seq, "hashed_formatted_nodes");
    744	}
    745	if (TEST_OPTION(skip_busy, s)) {
    746		print_sep(seq, &first);
    747		seq_puts(seq, "skip_busy");
    748	}
    749	if (TEST_OPTION(hundredth_slices, s)) {
    750		print_sep(seq, &first);
    751		seq_puts(seq, "hundredth_slices");
    752	}
    753	if (TEST_OPTION(old_way, s)) {
    754		print_sep(seq, &first);
    755		seq_puts(seq, "old_way");
    756	}
    757	if (TEST_OPTION(displace_based_on_dirid, s)) {
    758		print_sep(seq, &first);
    759		seq_puts(seq, "displace_based_on_dirid");
    760	}
    761	if (REISERFS_SB(s)->s_alloc_options.preallocmin != 0) {
    762		print_sep(seq, &first);
    763		seq_printf(seq, "preallocmin=%d",
    764				REISERFS_SB(s)->s_alloc_options.preallocmin);
    765	}
    766	if (REISERFS_SB(s)->s_alloc_options.preallocsize != 17) {
    767		print_sep(seq, &first);
    768		seq_printf(seq, "preallocsize=%d",
    769				REISERFS_SB(s)->s_alloc_options.preallocsize);
    770	}
    771}
    772
    773static inline void new_hashed_relocation(reiserfs_blocknr_hint_t * hint)
    774{
    775	char *hash_in;
    776
    777	if (hint->formatted_node) {
    778		hash_in = (char *)&hint->key.k_dir_id;
    779	} else {
    780		if (!hint->inode) {
    781			/*hint->search_start = hint->beg;*/
    782			hash_in = (char *)&hint->key.k_dir_id;
    783		} else
    784		    if (TEST_OPTION(displace_based_on_dirid, hint->th->t_super))
    785			hash_in = (char *)(&INODE_PKEY(hint->inode)->k_dir_id);
    786		else
    787			hash_in =
    788			    (char *)(&INODE_PKEY(hint->inode)->k_objectid);
    789	}
    790
    791	hint->search_start =
    792	    hint->beg + keyed_hash(hash_in, 4) % (hint->end - hint->beg);
    793}
    794
    795/*
    796 * Relocation based on dirid, hashing them into a given bitmap block
    797 * files. Formatted nodes are unaffected, a separate policy covers them
    798 */
    799static void dirid_groups(reiserfs_blocknr_hint_t * hint)
    800{
    801	unsigned long hash;
    802	__u32 dirid = 0;
    803	int bm = 0;
    804	struct super_block *sb = hint->th->t_super;
    805
    806	if (hint->inode)
    807		dirid = le32_to_cpu(INODE_PKEY(hint->inode)->k_dir_id);
    808	else if (hint->formatted_node)
    809		dirid = hint->key.k_dir_id;
    810
    811	if (dirid) {
    812		bm = bmap_hash_id(sb, dirid);
    813		hash = bm * (sb->s_blocksize << 3);
    814		/* give a portion of the block group to metadata */
    815		if (hint->inode)
    816			hash += sb->s_blocksize / 2;
    817		hint->search_start = hash;
    818	}
    819}
    820
    821/*
    822 * Relocation based on oid, hashing them into a given bitmap block
    823 * files. Formatted nodes are unaffected, a separate policy covers them
    824 */
    825static void oid_groups(reiserfs_blocknr_hint_t * hint)
    826{
    827	if (hint->inode) {
    828		unsigned long hash;
    829		__u32 oid;
    830		__u32 dirid;
    831		int bm;
    832
    833		dirid = le32_to_cpu(INODE_PKEY(hint->inode)->k_dir_id);
    834
    835		/*
    836		 * keep the root dir and it's first set of subdirs close to
    837		 * the start of the disk
    838		 */
    839		if (dirid <= 2)
    840			hash = (hint->inode->i_sb->s_blocksize << 3);
    841		else {
    842			oid = le32_to_cpu(INODE_PKEY(hint->inode)->k_objectid);
    843			bm = bmap_hash_id(hint->inode->i_sb, oid);
    844			hash = bm * (hint->inode->i_sb->s_blocksize << 3);
    845		}
    846		hint->search_start = hash;
    847	}
    848}
    849
    850/*
    851 * returns 1 if it finds an indirect item and gets valid hint info
    852 * from it, otherwise 0
    853 */
    854static int get_left_neighbor(reiserfs_blocknr_hint_t * hint)
    855{
    856	struct treepath *path;
    857	struct buffer_head *bh;
    858	struct item_head *ih;
    859	int pos_in_item;
    860	__le32 *item;
    861	int ret = 0;
    862
    863	/*
    864	 * reiserfs code can call this function w/o pointer to path
    865	 * structure supplied; then we rely on supplied search_start
    866	 */
    867	if (!hint->path)
    868		return 0;
    869
    870	path = hint->path;
    871	bh = get_last_bh(path);
    872	RFALSE(!bh, "green-4002: Illegal path specified to get_left_neighbor");
    873	ih = tp_item_head(path);
    874	pos_in_item = path->pos_in_item;
    875	item = tp_item_body(path);
    876
    877	hint->search_start = bh->b_blocknr;
    878
    879	/*
    880	 * for indirect item: go to left and look for the first non-hole entry
    881	 * in the indirect item
    882	 */
    883	if (!hint->formatted_node && is_indirect_le_ih(ih)) {
    884		if (pos_in_item == I_UNFM_NUM(ih))
    885			pos_in_item--;
    886		while (pos_in_item >= 0) {
    887			int t = get_block_num(item, pos_in_item);
    888			if (t) {
    889				hint->search_start = t;
    890				ret = 1;
    891				break;
    892			}
    893			pos_in_item--;
    894		}
    895	}
    896
    897	/* does result value fit into specified region? */
    898	return ret;
    899}
    900
    901/*
    902 * should be, if formatted node, then try to put on first part of the device
    903 * specified as number of percent with mount option device, else try to put
    904 * on last of device.  This is not to say it is good code to do so,
    905 * but the effect should be measured.
    906 */
    907static inline void set_border_in_hint(struct super_block *s,
    908				      reiserfs_blocknr_hint_t * hint)
    909{
    910	b_blocknr_t border =
    911	    SB_BLOCK_COUNT(s) / REISERFS_SB(s)->s_alloc_options.border;
    912
    913	if (hint->formatted_node)
    914		hint->end = border - 1;
    915	else
    916		hint->beg = border;
    917}
    918
    919static inline void displace_large_file(reiserfs_blocknr_hint_t * hint)
    920{
    921	if (TEST_OPTION(displace_based_on_dirid, hint->th->t_super))
    922		hint->search_start =
    923		    hint->beg +
    924		    keyed_hash((char *)(&INODE_PKEY(hint->inode)->k_dir_id),
    925			       4) % (hint->end - hint->beg);
    926	else
    927		hint->search_start =
    928		    hint->beg +
    929		    keyed_hash((char *)(&INODE_PKEY(hint->inode)->k_objectid),
    930			       4) % (hint->end - hint->beg);
    931}
    932
    933static inline void hash_formatted_node(reiserfs_blocknr_hint_t * hint)
    934{
    935	char *hash_in;
    936
    937	if (!hint->inode)
    938		hash_in = (char *)&hint->key.k_dir_id;
    939	else if (TEST_OPTION(displace_based_on_dirid, hint->th->t_super))
    940		hash_in = (char *)(&INODE_PKEY(hint->inode)->k_dir_id);
    941	else
    942		hash_in = (char *)(&INODE_PKEY(hint->inode)->k_objectid);
    943
    944	hint->search_start =
    945	    hint->beg + keyed_hash(hash_in, 4) % (hint->end - hint->beg);
    946}
    947
    948static inline int
    949this_blocknr_allocation_would_make_it_a_large_file(reiserfs_blocknr_hint_t *
    950						   hint)
    951{
    952	return hint->block ==
    953	    REISERFS_SB(hint->th->t_super)->s_alloc_options.large_file_size;
    954}
    955
    956#ifdef DISPLACE_NEW_PACKING_LOCALITIES
    957static inline void displace_new_packing_locality(reiserfs_blocknr_hint_t * hint)
    958{
    959	struct in_core_key *key = &hint->key;
    960
    961	hint->th->displace_new_blocks = 0;
    962	hint->search_start =
    963	    hint->beg + keyed_hash((char *)(&key->k_objectid),
    964				   4) % (hint->end - hint->beg);
    965}
    966#endif
    967
    968static inline int old_hashed_relocation(reiserfs_blocknr_hint_t * hint)
    969{
    970	b_blocknr_t border;
    971	u32 hash_in;
    972
    973	if (hint->formatted_node || hint->inode == NULL) {
    974		return 0;
    975	}
    976
    977	hash_in = le32_to_cpu((INODE_PKEY(hint->inode))->k_dir_id);
    978	border =
    979	    hint->beg + (u32) keyed_hash(((char *)(&hash_in)),
    980					 4) % (hint->end - hint->beg - 1);
    981	if (border > hint->search_start)
    982		hint->search_start = border;
    983
    984	return 1;
    985}
    986
    987static inline int old_way(reiserfs_blocknr_hint_t * hint)
    988{
    989	b_blocknr_t border;
    990
    991	if (hint->formatted_node || hint->inode == NULL) {
    992		return 0;
    993	}
    994
    995	border =
    996	    hint->beg +
    997	    le32_to_cpu(INODE_PKEY(hint->inode)->k_dir_id) % (hint->end -
    998							      hint->beg);
    999	if (border > hint->search_start)
   1000		hint->search_start = border;
   1001
   1002	return 1;
   1003}
   1004
   1005static inline void hundredth_slices(reiserfs_blocknr_hint_t * hint)
   1006{
   1007	struct in_core_key *key = &hint->key;
   1008	b_blocknr_t slice_start;
   1009
   1010	slice_start =
   1011	    (keyed_hash((char *)(&key->k_dir_id), 4) % 100) * (hint->end / 100);
   1012	if (slice_start > hint->search_start
   1013	    || slice_start + (hint->end / 100) <= hint->search_start) {
   1014		hint->search_start = slice_start;
   1015	}
   1016}
   1017
   1018static void determine_search_start(reiserfs_blocknr_hint_t * hint,
   1019				   int amount_needed)
   1020{
   1021	struct super_block *s = hint->th->t_super;
   1022	int unfm_hint;
   1023
   1024	hint->beg = 0;
   1025	hint->end = SB_BLOCK_COUNT(s) - 1;
   1026
   1027	/* This is former border algorithm. Now with tunable border offset */
   1028	if (concentrating_formatted_nodes(s))
   1029		set_border_in_hint(s, hint);
   1030
   1031#ifdef DISPLACE_NEW_PACKING_LOCALITIES
   1032	/*
   1033	 * whenever we create a new directory, we displace it.  At first
   1034	 * we will hash for location, later we might look for a moderately
   1035	 * empty place for it
   1036	 */
   1037	if (displacing_new_packing_localities(s)
   1038	    && hint->th->displace_new_blocks) {
   1039		displace_new_packing_locality(hint);
   1040
   1041		/*
   1042		 * we do not continue determine_search_start,
   1043		 * if new packing locality is being displaced
   1044		 */
   1045		return;
   1046	}
   1047#endif
   1048
   1049	/*
   1050	 * all persons should feel encouraged to add more special cases
   1051	 * here and test them
   1052	 */
   1053
   1054	if (displacing_large_files(s) && !hint->formatted_node
   1055	    && this_blocknr_allocation_would_make_it_a_large_file(hint)) {
   1056		displace_large_file(hint);
   1057		return;
   1058	}
   1059
   1060	/*
   1061	 * if none of our special cases is relevant, use the left
   1062	 * neighbor in the tree order of the new node we are allocating for
   1063	 */
   1064	if (hint->formatted_node && TEST_OPTION(hashed_formatted_nodes, s)) {
   1065		hash_formatted_node(hint);
   1066		return;
   1067	}
   1068
   1069	unfm_hint = get_left_neighbor(hint);
   1070
   1071	/*
   1072	 * Mimic old block allocator behaviour, that is if VFS allowed for
   1073	 * preallocation, new blocks are displaced based on directory ID.
   1074	 * Also, if suggested search_start is less than last preallocated
   1075	 * block, we start searching from it, assuming that HDD dataflow
   1076	 * is faster in forward direction
   1077	 */
   1078	if (TEST_OPTION(old_way, s)) {
   1079		if (!hint->formatted_node) {
   1080			if (!reiserfs_hashed_relocation(s))
   1081				old_way(hint);
   1082			else if (!reiserfs_no_unhashed_relocation(s))
   1083				old_hashed_relocation(hint);
   1084
   1085			if (hint->inode
   1086			    && hint->search_start <
   1087			    REISERFS_I(hint->inode)->i_prealloc_block)
   1088				hint->search_start =
   1089				    REISERFS_I(hint->inode)->i_prealloc_block;
   1090		}
   1091		return;
   1092	}
   1093
   1094	/* This is an approach proposed by Hans */
   1095	if (TEST_OPTION(hundredth_slices, s)
   1096	    && !(displacing_large_files(s) && !hint->formatted_node)) {
   1097		hundredth_slices(hint);
   1098		return;
   1099	}
   1100
   1101	/* old_hashed_relocation only works on unformatted */
   1102	if (!unfm_hint && !hint->formatted_node &&
   1103	    TEST_OPTION(old_hashed_relocation, s)) {
   1104		old_hashed_relocation(hint);
   1105	}
   1106
   1107	/* new_hashed_relocation works with both formatted/unformatted nodes */
   1108	if ((!unfm_hint || hint->formatted_node) &&
   1109	    TEST_OPTION(new_hashed_relocation, s)) {
   1110		new_hashed_relocation(hint);
   1111	}
   1112
   1113	/* dirid grouping works only on unformatted nodes */
   1114	if (!unfm_hint && !hint->formatted_node && TEST_OPTION(dirid_groups, s)) {
   1115		dirid_groups(hint);
   1116	}
   1117#ifdef DISPLACE_NEW_PACKING_LOCALITIES
   1118	if (hint->formatted_node && TEST_OPTION(dirid_groups, s)) {
   1119		dirid_groups(hint);
   1120	}
   1121#endif
   1122
   1123	/* oid grouping works only on unformatted nodes */
   1124	if (!unfm_hint && !hint->formatted_node && TEST_OPTION(oid_groups, s)) {
   1125		oid_groups(hint);
   1126	}
   1127	return;
   1128}
   1129
   1130static int determine_prealloc_size(reiserfs_blocknr_hint_t * hint)
   1131{
   1132	/* make minimum size a mount option and benchmark both ways */
   1133	/* we preallocate blocks only for regular files, specific size */
   1134	/* benchmark preallocating always and see what happens */
   1135
   1136	hint->prealloc_size = 0;
   1137
   1138	if (!hint->formatted_node && hint->preallocate) {
   1139		if (S_ISREG(hint->inode->i_mode) && !IS_PRIVATE(hint->inode)
   1140		    && hint->inode->i_size >=
   1141		    REISERFS_SB(hint->th->t_super)->s_alloc_options.
   1142		    preallocmin * hint->inode->i_sb->s_blocksize)
   1143			hint->prealloc_size =
   1144			    REISERFS_SB(hint->th->t_super)->s_alloc_options.
   1145			    preallocsize - 1;
   1146	}
   1147	return CARRY_ON;
   1148}
   1149
   1150static inline int allocate_without_wrapping_disk(reiserfs_blocknr_hint_t * hint,
   1151						 b_blocknr_t * new_blocknrs,
   1152						 b_blocknr_t start,
   1153						 b_blocknr_t finish, int min,
   1154						 int amount_needed,
   1155						 int prealloc_size)
   1156{
   1157	int rest = amount_needed;
   1158	int nr_allocated;
   1159
   1160	while (rest > 0 && start <= finish) {
   1161		nr_allocated = scan_bitmap(hint->th, &start, finish, min,
   1162					   rest + prealloc_size,
   1163					   !hint->formatted_node, hint->block);
   1164
   1165		if (nr_allocated == 0)	/* no new blocks allocated, return */
   1166			break;
   1167
   1168		/* fill free_blocknrs array first */
   1169		while (rest > 0 && nr_allocated > 0) {
   1170			*new_blocknrs++ = start++;
   1171			rest--;
   1172			nr_allocated--;
   1173		}
   1174
   1175		/* do we have something to fill prealloc. array also ? */
   1176		if (nr_allocated > 0) {
   1177			/*
   1178			 * it means prealloc_size was greater that 0 and
   1179			 * we do preallocation
   1180			 */
   1181			list_add(&REISERFS_I(hint->inode)->i_prealloc_list,
   1182				 &SB_JOURNAL(hint->th->t_super)->
   1183				 j_prealloc_list);
   1184			REISERFS_I(hint->inode)->i_prealloc_block = start;
   1185			REISERFS_I(hint->inode)->i_prealloc_count =
   1186			    nr_allocated;
   1187			break;
   1188		}
   1189	}
   1190
   1191	return (amount_needed - rest);
   1192}
   1193
   1194static inline int blocknrs_and_prealloc_arrays_from_search_start
   1195    (reiserfs_blocknr_hint_t * hint, b_blocknr_t * new_blocknrs,
   1196     int amount_needed) {
   1197	struct super_block *s = hint->th->t_super;
   1198	b_blocknr_t start = hint->search_start;
   1199	b_blocknr_t finish = SB_BLOCK_COUNT(s) - 1;
   1200	int passno = 0;
   1201	int nr_allocated = 0;
   1202	int depth;
   1203
   1204	determine_prealloc_size(hint);
   1205	if (!hint->formatted_node) {
   1206		int quota_ret;
   1207#ifdef REISERQUOTA_DEBUG
   1208		reiserfs_debug(s, REISERFS_DEBUG_CODE,
   1209			       "reiserquota: allocating %d blocks id=%u",
   1210			       amount_needed, hint->inode->i_uid);
   1211#endif
   1212		depth = reiserfs_write_unlock_nested(s);
   1213		quota_ret =
   1214		    dquot_alloc_block_nodirty(hint->inode, amount_needed);
   1215		if (quota_ret) {	/* Quota exceeded? */
   1216			reiserfs_write_lock_nested(s, depth);
   1217			return QUOTA_EXCEEDED;
   1218		}
   1219		if (hint->preallocate && hint->prealloc_size) {
   1220#ifdef REISERQUOTA_DEBUG
   1221			reiserfs_debug(s, REISERFS_DEBUG_CODE,
   1222				       "reiserquota: allocating (prealloc) %d blocks id=%u",
   1223				       hint->prealloc_size, hint->inode->i_uid);
   1224#endif
   1225			quota_ret = dquot_prealloc_block_nodirty(hint->inode,
   1226							 hint->prealloc_size);
   1227			if (quota_ret)
   1228				hint->preallocate = hint->prealloc_size = 0;
   1229		}
   1230		/* for unformatted nodes, force large allocations */
   1231		reiserfs_write_lock_nested(s, depth);
   1232	}
   1233
   1234	do {
   1235		switch (passno++) {
   1236		case 0:	/* Search from hint->search_start to end of disk */
   1237			start = hint->search_start;
   1238			finish = SB_BLOCK_COUNT(s) - 1;
   1239			break;
   1240		case 1:	/* Search from hint->beg to hint->search_start */
   1241			start = hint->beg;
   1242			finish = hint->search_start;
   1243			break;
   1244		case 2:	/* Last chance: Search from 0 to hint->beg */
   1245			start = 0;
   1246			finish = hint->beg;
   1247			break;
   1248		default:
   1249			/* We've tried searching everywhere, not enough space */
   1250			/* Free the blocks */
   1251			if (!hint->formatted_node) {
   1252#ifdef REISERQUOTA_DEBUG
   1253				reiserfs_debug(s, REISERFS_DEBUG_CODE,
   1254					       "reiserquota: freeing (nospace) %d blocks id=%u",
   1255					       amount_needed +
   1256					       hint->prealloc_size -
   1257					       nr_allocated,
   1258					       hint->inode->i_uid);
   1259#endif
   1260				/* Free not allocated blocks */
   1261				depth = reiserfs_write_unlock_nested(s);
   1262				dquot_free_block_nodirty(hint->inode,
   1263					amount_needed + hint->prealloc_size -
   1264					nr_allocated);
   1265				reiserfs_write_lock_nested(s, depth);
   1266			}
   1267			while (nr_allocated--)
   1268				reiserfs_free_block(hint->th, hint->inode,
   1269						    new_blocknrs[nr_allocated],
   1270						    !hint->formatted_node);
   1271
   1272			return NO_DISK_SPACE;
   1273		}
   1274	} while ((nr_allocated += allocate_without_wrapping_disk(hint,
   1275								 new_blocknrs +
   1276								 nr_allocated,
   1277								 start, finish,
   1278								 1,
   1279								 amount_needed -
   1280								 nr_allocated,
   1281								 hint->
   1282								 prealloc_size))
   1283		 < amount_needed);
   1284	if (!hint->formatted_node &&
   1285	    amount_needed + hint->prealloc_size >
   1286	    nr_allocated + REISERFS_I(hint->inode)->i_prealloc_count) {
   1287		/* Some of preallocation blocks were not allocated */
   1288#ifdef REISERQUOTA_DEBUG
   1289		reiserfs_debug(s, REISERFS_DEBUG_CODE,
   1290			       "reiserquota: freeing (failed prealloc) %d blocks id=%u",
   1291			       amount_needed + hint->prealloc_size -
   1292			       nr_allocated -
   1293			       REISERFS_I(hint->inode)->i_prealloc_count,
   1294			       hint->inode->i_uid);
   1295#endif
   1296
   1297		depth = reiserfs_write_unlock_nested(s);
   1298		dquot_free_block_nodirty(hint->inode, amount_needed +
   1299					 hint->prealloc_size - nr_allocated -
   1300					 REISERFS_I(hint->inode)->
   1301					 i_prealloc_count);
   1302		reiserfs_write_lock_nested(s, depth);
   1303	}
   1304
   1305	return CARRY_ON;
   1306}
   1307
   1308/* grab new blocknrs from preallocated list */
   1309/* return amount still needed after using them */
   1310static int use_preallocated_list_if_available(reiserfs_blocknr_hint_t * hint,
   1311					      b_blocknr_t * new_blocknrs,
   1312					      int amount_needed)
   1313{
   1314	struct inode *inode = hint->inode;
   1315
   1316	if (REISERFS_I(inode)->i_prealloc_count > 0) {
   1317		while (amount_needed) {
   1318
   1319			*new_blocknrs++ = REISERFS_I(inode)->i_prealloc_block++;
   1320			REISERFS_I(inode)->i_prealloc_count--;
   1321
   1322			amount_needed--;
   1323
   1324			if (REISERFS_I(inode)->i_prealloc_count <= 0) {
   1325				list_del(&REISERFS_I(inode)->i_prealloc_list);
   1326				break;
   1327			}
   1328		}
   1329	}
   1330	/* return amount still needed after using preallocated blocks */
   1331	return amount_needed;
   1332}
   1333
   1334int reiserfs_allocate_blocknrs(reiserfs_blocknr_hint_t *hint,
   1335			       b_blocknr_t *new_blocknrs,
   1336			       int amount_needed,
   1337			       /* Amount of blocks we have already reserved */
   1338			       int reserved_by_us)
   1339{
   1340	int initial_amount_needed = amount_needed;
   1341	int ret;
   1342	struct super_block *s = hint->th->t_super;
   1343
   1344	/* Check if there is enough space, taking into account reserved space */
   1345	if (SB_FREE_BLOCKS(s) - REISERFS_SB(s)->reserved_blocks <
   1346	    amount_needed - reserved_by_us)
   1347		return NO_DISK_SPACE;
   1348	/* should this be if !hint->inode &&  hint->preallocate? */
   1349	/* do you mean hint->formatted_node can be removed ? - Zam */
   1350	/*
   1351	 * hint->formatted_node cannot be removed because we try to access
   1352	 * inode information here, and there is often no inode associated with
   1353	 * metadata allocations - green
   1354	 */
   1355
   1356	if (!hint->formatted_node && hint->preallocate) {
   1357		amount_needed = use_preallocated_list_if_available
   1358		    (hint, new_blocknrs, amount_needed);
   1359
   1360		/*
   1361		 * We have all the block numbers we need from the
   1362		 * prealloc list
   1363		 */
   1364		if (amount_needed == 0)
   1365			return CARRY_ON;
   1366		new_blocknrs += (initial_amount_needed - amount_needed);
   1367	}
   1368
   1369	/* find search start and save it in hint structure */
   1370	determine_search_start(hint, amount_needed);
   1371	if (hint->search_start >= SB_BLOCK_COUNT(s))
   1372		hint->search_start = SB_BLOCK_COUNT(s) - 1;
   1373
   1374	/* allocation itself; fill new_blocknrs and preallocation arrays */
   1375	ret = blocknrs_and_prealloc_arrays_from_search_start
   1376	    (hint, new_blocknrs, amount_needed);
   1377
   1378	/*
   1379	 * We used prealloc. list to fill (partially) new_blocknrs array.
   1380	 * If final allocation fails we need to return blocks back to
   1381	 * prealloc. list or just free them. -- Zam (I chose second
   1382	 * variant)
   1383	 */
   1384	if (ret != CARRY_ON) {
   1385		while (amount_needed++ < initial_amount_needed) {
   1386			reiserfs_free_block(hint->th, hint->inode,
   1387					    *(--new_blocknrs), 1);
   1388		}
   1389	}
   1390	return ret;
   1391}
   1392
   1393void reiserfs_cache_bitmap_metadata(struct super_block *sb,
   1394                                    struct buffer_head *bh,
   1395                                    struct reiserfs_bitmap_info *info)
   1396{
   1397	unsigned long *cur = (unsigned long *)(bh->b_data + bh->b_size);
   1398
   1399	/* The first bit must ALWAYS be 1 */
   1400	if (!reiserfs_test_le_bit(0, (unsigned long *)bh->b_data))
   1401		reiserfs_error(sb, "reiserfs-2025", "bitmap block %lu is "
   1402			       "corrupted: first bit must be 1", bh->b_blocknr);
   1403
   1404	info->free_count = 0;
   1405
   1406	while (--cur >= (unsigned long *)bh->b_data) {
   1407		/* 0 and ~0 are special, we can optimize for them */
   1408		if (*cur == 0)
   1409			info->free_count += BITS_PER_LONG;
   1410		else if (*cur != ~0L)	/* A mix, investigate */
   1411			info->free_count += BITS_PER_LONG - hweight_long(*cur);
   1412	}
   1413}
   1414
   1415struct buffer_head *reiserfs_read_bitmap_block(struct super_block *sb,
   1416                                               unsigned int bitmap)
   1417{
   1418	b_blocknr_t block = (sb->s_blocksize << 3) * bitmap;
   1419	struct reiserfs_bitmap_info *info = SB_AP_BITMAP(sb) + bitmap;
   1420	struct buffer_head *bh;
   1421
   1422	/*
   1423	 * Way old format filesystems had the bitmaps packed up front.
   1424	 * I doubt there are any of these left, but just in case...
   1425	 */
   1426	if (unlikely(test_bit(REISERFS_OLD_FORMAT,
   1427			      &REISERFS_SB(sb)->s_properties)))
   1428		block = REISERFS_SB(sb)->s_sbh->b_blocknr + 1 + bitmap;
   1429	else if (bitmap == 0)
   1430		block = (REISERFS_DISK_OFFSET_IN_BYTES >> sb->s_blocksize_bits) + 1;
   1431
   1432	bh = sb_bread(sb, block);
   1433	if (bh == NULL)
   1434		reiserfs_warning(sb, "sh-2029: %s: bitmap block (#%u) "
   1435		                 "reading failed", __func__, block);
   1436	else {
   1437		if (buffer_locked(bh)) {
   1438			int depth;
   1439			PROC_INFO_INC(sb, scan_bitmap.wait);
   1440			depth = reiserfs_write_unlock_nested(sb);
   1441			__wait_on_buffer(bh);
   1442			reiserfs_write_lock_nested(sb, depth);
   1443		}
   1444		BUG_ON(!buffer_uptodate(bh));
   1445		BUG_ON(atomic_read(&bh->b_count) == 0);
   1446
   1447		if (info->free_count == UINT_MAX)
   1448			reiserfs_cache_bitmap_metadata(sb, bh, info);
   1449	}
   1450
   1451	return bh;
   1452}
   1453
   1454int reiserfs_init_bitmap_cache(struct super_block *sb)
   1455{
   1456	struct reiserfs_bitmap_info *bitmap;
   1457	unsigned int bmap_nr = reiserfs_bmap_count(sb);
   1458
   1459	bitmap = vmalloc(array_size(bmap_nr, sizeof(*bitmap)));
   1460	if (bitmap == NULL)
   1461		return -ENOMEM;
   1462
   1463	memset(bitmap, 0xff, sizeof(*bitmap) * bmap_nr);
   1464
   1465	SB_AP_BITMAP(sb) = bitmap;
   1466
   1467	return 0;
   1468}
   1469
   1470void reiserfs_free_bitmap_cache(struct super_block *sb)
   1471{
   1472	if (SB_AP_BITMAP(sb)) {
   1473		vfree(SB_AP_BITMAP(sb));
   1474		SB_AP_BITMAP(sb) = NULL;
   1475	}
   1476}