super.c - cachepc-linux - Fork of AMDESE/linux with modifications for CachePC side-channel attack

	cachepc-linux Fork of AMDESE/linux with modifications for CachePC side-channel attack
	git clone https://git.sinitax.com/sinitax/cachepc-linux
	Log \| Files \| Refs \| README \| LICENSE \| sfeed.txt
super.c (70892B)
      1// SPDX-License-Identifier: GPL-2.0-or-later
      2/*
      3 * super.c
      4 *
      5 * load/unload driver, mount/dismount volumes
      6 *
      7 * Copyright (C) 2002, 2004 Oracle.  All rights reserved.
      8 */
      9
     10#include <linux/module.h>
     11#include <linux/fs.h>
     12#include <linux/types.h>
     13#include <linux/slab.h>
     14#include <linux/highmem.h>
     15#include <linux/init.h>
     16#include <linux/random.h>
     17#include <linux/statfs.h>
     18#include <linux/moduleparam.h>
     19#include <linux/blkdev.h>
     20#include <linux/socket.h>
     21#include <linux/inet.h>
     22#include <linux/parser.h>
     23#include <linux/crc32.h>
     24#include <linux/debugfs.h>
     25#include <linux/mount.h>
     26#include <linux/seq_file.h>
     27#include <linux/quotaops.h>
     28#include <linux/signal.h>
     29
     30#define CREATE_TRACE_POINTS
     31#include "ocfs2_trace.h"
     32
     33#include <cluster/masklog.h>
     34
     35#include "ocfs2.h"
     36
     37/* this should be the only file to include a version 1 header */
     38#include "ocfs1_fs_compat.h"
     39
     40#include "alloc.h"
     41#include "aops.h"
     42#include "blockcheck.h"
     43#include "dlmglue.h"
     44#include "export.h"
     45#include "extent_map.h"
     46#include "heartbeat.h"
     47#include "inode.h"
     48#include "journal.h"
     49#include "localalloc.h"
     50#include "namei.h"
     51#include "slot_map.h"
     52#include "super.h"
     53#include "sysfile.h"
     54#include "uptodate.h"
     55#include "xattr.h"
     56#include "quota.h"
     57#include "refcounttree.h"
     58#include "suballoc.h"
     59
     60#include "buffer_head_io.h"
     61#include "filecheck.h"
     62
     63static struct kmem_cache *ocfs2_inode_cachep;
     64struct kmem_cache *ocfs2_dquot_cachep;
     65struct kmem_cache *ocfs2_qf_chunk_cachep;
     66
     67static struct dentry *ocfs2_debugfs_root;
     68
     69MODULE_AUTHOR("Oracle");
     70MODULE_LICENSE("GPL");
     71MODULE_DESCRIPTION("OCFS2 cluster file system");
     72
     73struct mount_options
     74{
     75	unsigned long	commit_interval;
     76	unsigned long	mount_opt;
     77	unsigned int	atime_quantum;
     78	unsigned short	slot;
     79	int		localalloc_opt;
     80	unsigned int	resv_level;
     81	int		dir_resv_level;
     82	char		cluster_stack[OCFS2_STACK_LABEL_LEN + 1];
     83};
     84
     85static int ocfs2_parse_options(struct super_block *sb, char *options,
     86			       struct mount_options *mopt,
     87			       int is_remount);
     88static int ocfs2_check_set_options(struct super_block *sb,
     89				   struct mount_options *options);
     90static int ocfs2_show_options(struct seq_file *s, struct dentry *root);
     91static void ocfs2_put_super(struct super_block *sb);
     92static int ocfs2_mount_volume(struct super_block *sb);
     93static int ocfs2_remount(struct super_block *sb, int *flags, char *data);
     94static void ocfs2_dismount_volume(struct super_block *sb, int mnt_err);
     95static int ocfs2_initialize_mem_caches(void);
     96static void ocfs2_free_mem_caches(void);
     97static void ocfs2_delete_osb(struct ocfs2_super *osb);
     98
     99static int ocfs2_statfs(struct dentry *dentry, struct kstatfs *buf);
    100
    101static int ocfs2_sync_fs(struct super_block *sb, int wait);
    102
    103static int ocfs2_init_global_system_inodes(struct ocfs2_super *osb);
    104static int ocfs2_init_local_system_inodes(struct ocfs2_super *osb);
    105static void ocfs2_release_system_inodes(struct ocfs2_super *osb);
    106static int ocfs2_check_volume(struct ocfs2_super *osb);
    107static int ocfs2_verify_volume(struct ocfs2_dinode *di,
    108			       struct buffer_head *bh,
    109			       u32 sectsize,
    110			       struct ocfs2_blockcheck_stats *stats);
    111static int ocfs2_initialize_super(struct super_block *sb,
    112				  struct buffer_head *bh,
    113				  int sector_size,
    114				  struct ocfs2_blockcheck_stats *stats);
    115static int ocfs2_get_sector(struct super_block *sb,
    116			    struct buffer_head **bh,
    117			    int block,
    118			    int sect_size);
    119static struct inode *ocfs2_alloc_inode(struct super_block *sb);
    120static void ocfs2_free_inode(struct inode *inode);
    121static int ocfs2_susp_quotas(struct ocfs2_super *osb, int unsuspend);
    122static int ocfs2_enable_quotas(struct ocfs2_super *osb);
    123static void ocfs2_disable_quotas(struct ocfs2_super *osb);
    124
    125static struct dquot **ocfs2_get_dquots(struct inode *inode)
    126{
    127	return OCFS2_I(inode)->i_dquot;
    128}
    129
    130static const struct super_operations ocfs2_sops = {
    131	.statfs		= ocfs2_statfs,
    132	.alloc_inode	= ocfs2_alloc_inode,
    133	.free_inode	= ocfs2_free_inode,
    134	.drop_inode	= ocfs2_drop_inode,
    135	.evict_inode	= ocfs2_evict_inode,
    136	.sync_fs	= ocfs2_sync_fs,
    137	.put_super	= ocfs2_put_super,
    138	.remount_fs	= ocfs2_remount,
    139	.show_options   = ocfs2_show_options,
    140	.quota_read	= ocfs2_quota_read,
    141	.quota_write	= ocfs2_quota_write,
    142	.get_dquots	= ocfs2_get_dquots,
    143};
    144
    145enum {
    146	Opt_barrier,
    147	Opt_err_panic,
    148	Opt_err_ro,
    149	Opt_intr,
    150	Opt_nointr,
    151	Opt_hb_none,
    152	Opt_hb_local,
    153	Opt_hb_global,
    154	Opt_data_ordered,
    155	Opt_data_writeback,
    156	Opt_atime_quantum,
    157	Opt_slot,
    158	Opt_commit,
    159	Opt_localalloc,
    160	Opt_localflocks,
    161	Opt_stack,
    162	Opt_user_xattr,
    163	Opt_nouser_xattr,
    164	Opt_inode64,
    165	Opt_acl,
    166	Opt_noacl,
    167	Opt_usrquota,
    168	Opt_grpquota,
    169	Opt_coherency_buffered,
    170	Opt_coherency_full,
    171	Opt_resv_level,
    172	Opt_dir_resv_level,
    173	Opt_journal_async_commit,
    174	Opt_err_cont,
    175	Opt_nocluster,
    176	Opt_err,
    177};
    178
    179static const match_table_t tokens = {
    180	{Opt_barrier, "barrier=%u"},
    181	{Opt_err_panic, "errors=panic"},
    182	{Opt_err_ro, "errors=remount-ro"},
    183	{Opt_intr, "intr"},
    184	{Opt_nointr, "nointr"},
    185	{Opt_hb_none, OCFS2_HB_NONE},
    186	{Opt_hb_local, OCFS2_HB_LOCAL},
    187	{Opt_hb_global, OCFS2_HB_GLOBAL},
    188	{Opt_data_ordered, "data=ordered"},
    189	{Opt_data_writeback, "data=writeback"},
    190	{Opt_atime_quantum, "atime_quantum=%u"},
    191	{Opt_slot, "preferred_slot=%u"},
    192	{Opt_commit, "commit=%u"},
    193	{Opt_localalloc, "localalloc=%d"},
    194	{Opt_localflocks, "localflocks"},
    195	{Opt_stack, "cluster_stack=%s"},
    196	{Opt_user_xattr, "user_xattr"},
    197	{Opt_nouser_xattr, "nouser_xattr"},
    198	{Opt_inode64, "inode64"},
    199	{Opt_acl, "acl"},
    200	{Opt_noacl, "noacl"},
    201	{Opt_usrquota, "usrquota"},
    202	{Opt_grpquota, "grpquota"},
    203	{Opt_coherency_buffered, "coherency=buffered"},
    204	{Opt_coherency_full, "coherency=full"},
    205	{Opt_resv_level, "resv_level=%u"},
    206	{Opt_dir_resv_level, "dir_resv_level=%u"},
    207	{Opt_journal_async_commit, "journal_async_commit"},
    208	{Opt_err_cont, "errors=continue"},
    209	{Opt_nocluster, "nocluster"},
    210	{Opt_err, NULL}
    211};
    212
    213#ifdef CONFIG_DEBUG_FS
    214static int ocfs2_osb_dump(struct ocfs2_super *osb, char *buf, int len)
    215{
    216	struct ocfs2_cluster_connection *cconn = osb->cconn;
    217	struct ocfs2_recovery_map *rm = osb->recovery_map;
    218	struct ocfs2_orphan_scan *os = &osb->osb_orphan_scan;
    219	int i, out = 0;
    220	unsigned long flags;
    221
    222	out += scnprintf(buf + out, len - out,
    223			"%10s => Id: %-s  Uuid: %-s  Gen: 0x%X  Label: %-s\n",
    224			"Device", osb->dev_str, osb->uuid_str,
    225			osb->fs_generation, osb->vol_label);
    226
    227	out += scnprintf(buf + out, len - out,
    228			"%10s => State: %d  Flags: 0x%lX\n", "Volume",
    229			atomic_read(&osb->vol_state), osb->osb_flags);
    230
    231	out += scnprintf(buf + out, len - out,
    232			"%10s => Block: %lu  Cluster: %d\n", "Sizes",
    233			osb->sb->s_blocksize, osb->s_clustersize);
    234
    235	out += scnprintf(buf + out, len - out,
    236			"%10s => Compat: 0x%X  Incompat: 0x%X  "
    237			"ROcompat: 0x%X\n",
    238			"Features", osb->s_feature_compat,
    239			osb->s_feature_incompat, osb->s_feature_ro_compat);
    240
    241	out += scnprintf(buf + out, len - out,
    242			"%10s => Opts: 0x%lX  AtimeQuanta: %u\n", "Mount",
    243			osb->s_mount_opt, osb->s_atime_quantum);
    244
    245	if (cconn) {
    246		out += scnprintf(buf + out, len - out,
    247				"%10s => Stack: %s  Name: %*s  "
    248				"Version: %d.%d\n", "Cluster",
    249				(*osb->osb_cluster_stack == '\0' ?
    250				 "o2cb" : osb->osb_cluster_stack),
    251				cconn->cc_namelen, cconn->cc_name,
    252				cconn->cc_version.pv_major,
    253				cconn->cc_version.pv_minor);
    254	}
    255
    256	spin_lock_irqsave(&osb->dc_task_lock, flags);
    257	out += scnprintf(buf + out, len - out,
    258			"%10s => Pid: %d  Count: %lu  WakeSeq: %lu  "
    259			"WorkSeq: %lu\n", "DownCnvt",
    260			(osb->dc_task ?  task_pid_nr(osb->dc_task) : -1),
    261			osb->blocked_lock_count, osb->dc_wake_sequence,
    262			osb->dc_work_sequence);
    263	spin_unlock_irqrestore(&osb->dc_task_lock, flags);
    264
    265	spin_lock(&osb->osb_lock);
    266	out += scnprintf(buf + out, len - out, "%10s => Pid: %d  Nodes:",
    267			"Recovery",
    268			(osb->recovery_thread_task ?
    269			 task_pid_nr(osb->recovery_thread_task) : -1));
    270	if (rm->rm_used == 0)
    271		out += scnprintf(buf + out, len - out, " None\n");
    272	else {
    273		for (i = 0; i < rm->rm_used; i++)
    274			out += scnprintf(buf + out, len - out, " %d",
    275					rm->rm_entries[i]);
    276		out += scnprintf(buf + out, len - out, "\n");
    277	}
    278	spin_unlock(&osb->osb_lock);
    279
    280	out += scnprintf(buf + out, len - out,
    281			"%10s => Pid: %d  Interval: %lu\n", "Commit",
    282			(osb->commit_task ? task_pid_nr(osb->commit_task) : -1),
    283			osb->osb_commit_interval);
    284
    285	out += scnprintf(buf + out, len - out,
    286			"%10s => State: %d  TxnId: %lu  NumTxns: %d\n",
    287			"Journal", osb->journal->j_state,
    288			osb->journal->j_trans_id,
    289			atomic_read(&osb->journal->j_num_trans));
    290
    291	out += scnprintf(buf + out, len - out,
    292			"%10s => GlobalAllocs: %d  LocalAllocs: %d  "
    293			"SubAllocs: %d  LAWinMoves: %d  SAExtends: %d\n",
    294			"Stats",
    295			atomic_read(&osb->alloc_stats.bitmap_data),
    296			atomic_read(&osb->alloc_stats.local_data),
    297			atomic_read(&osb->alloc_stats.bg_allocs),
    298			atomic_read(&osb->alloc_stats.moves),
    299			atomic_read(&osb->alloc_stats.bg_extends));
    300
    301	out += scnprintf(buf + out, len - out,
    302			"%10s => State: %u  Descriptor: %llu  Size: %u bits  "
    303			"Default: %u bits\n",
    304			"LocalAlloc", osb->local_alloc_state,
    305			(unsigned long long)osb->la_last_gd,
    306			osb->local_alloc_bits, osb->local_alloc_default_bits);
    307
    308	spin_lock(&osb->osb_lock);
    309	out += scnprintf(buf + out, len - out,
    310			"%10s => InodeSlot: %d  StolenInodes: %d, "
    311			"MetaSlot: %d  StolenMeta: %d\n", "Steal",
    312			osb->s_inode_steal_slot,
    313			atomic_read(&osb->s_num_inodes_stolen),
    314			osb->s_meta_steal_slot,
    315			atomic_read(&osb->s_num_meta_stolen));
    316	spin_unlock(&osb->osb_lock);
    317
    318	out += scnprintf(buf + out, len - out, "OrphanScan => ");
    319	out += scnprintf(buf + out, len - out, "Local: %u  Global: %u ",
    320			os->os_count, os->os_seqno);
    321	out += scnprintf(buf + out, len - out, " Last Scan: ");
    322	if (atomic_read(&os->os_state) == ORPHAN_SCAN_INACTIVE)
    323		out += scnprintf(buf + out, len - out, "Disabled\n");
    324	else
    325		out += scnprintf(buf + out, len - out, "%lu seconds ago\n",
    326				(unsigned long)(ktime_get_seconds() - os->os_scantime));
    327
    328	out += scnprintf(buf + out, len - out, "%10s => %3s  %10s\n",
    329			"Slots", "Num", "RecoGen");
    330	for (i = 0; i < osb->max_slots; ++i) {
    331		out += scnprintf(buf + out, len - out,
    332				"%10s  %c %3d  %10d\n",
    333				" ",
    334				(i == osb->slot_num ? '*' : ' '),
    335				i, osb->slot_recovery_generations[i]);
    336	}
    337
    338	return out;
    339}
    340
    341static int ocfs2_osb_debug_open(struct inode *inode, struct file *file)
    342{
    343	struct ocfs2_super *osb = inode->i_private;
    344	char *buf = NULL;
    345
    346	buf = kmalloc(PAGE_SIZE, GFP_KERNEL);
    347	if (!buf)
    348		goto bail;
    349
    350	i_size_write(inode, ocfs2_osb_dump(osb, buf, PAGE_SIZE));
    351
    352	file->private_data = buf;
    353
    354	return 0;
    355bail:
    356	return -ENOMEM;
    357}
    358
    359static int ocfs2_debug_release(struct inode *inode, struct file *file)
    360{
    361	kfree(file->private_data);
    362	return 0;
    363}
    364
    365static ssize_t ocfs2_debug_read(struct file *file, char __user *buf,
    366				size_t nbytes, loff_t *ppos)
    367{
    368	return simple_read_from_buffer(buf, nbytes, ppos, file->private_data,
    369				       i_size_read(file->f_mapping->host));
    370}
    371#else
    372static int ocfs2_osb_debug_open(struct inode *inode, struct file *file)
    373{
    374	return 0;
    375}
    376static int ocfs2_debug_release(struct inode *inode, struct file *file)
    377{
    378	return 0;
    379}
    380static ssize_t ocfs2_debug_read(struct file *file, char __user *buf,
    381				size_t nbytes, loff_t *ppos)
    382{
    383	return 0;
    384}
    385#endif	/* CONFIG_DEBUG_FS */
    386
    387static const struct file_operations ocfs2_osb_debug_fops = {
    388	.open =		ocfs2_osb_debug_open,
    389	.release =	ocfs2_debug_release,
    390	.read =		ocfs2_debug_read,
    391	.llseek =	generic_file_llseek,
    392};
    393
    394static int ocfs2_sync_fs(struct super_block *sb, int wait)
    395{
    396	int status;
    397	tid_t target;
    398	struct ocfs2_super *osb = OCFS2_SB(sb);
    399
    400	if (ocfs2_is_hard_readonly(osb))
    401		return -EROFS;
    402
    403	if (wait) {
    404		status = ocfs2_flush_truncate_log(osb);
    405		if (status < 0)
    406			mlog_errno(status);
    407	} else {
    408		ocfs2_schedule_truncate_log_flush(osb, 0);
    409	}
    410
    411	if (jbd2_journal_start_commit(osb->journal->j_journal,
    412				      &target)) {
    413		if (wait)
    414			jbd2_log_wait_commit(osb->journal->j_journal,
    415					     target);
    416	}
    417	return 0;
    418}
    419
    420static int ocfs2_need_system_inode(struct ocfs2_super *osb, int ino)
    421{
    422	if (!OCFS2_HAS_RO_COMPAT_FEATURE(osb->sb, OCFS2_FEATURE_RO_COMPAT_USRQUOTA)
    423	    && (ino == USER_QUOTA_SYSTEM_INODE
    424		|| ino == LOCAL_USER_QUOTA_SYSTEM_INODE))
    425		return 0;
    426	if (!OCFS2_HAS_RO_COMPAT_FEATURE(osb->sb, OCFS2_FEATURE_RO_COMPAT_GRPQUOTA)
    427	    && (ino == GROUP_QUOTA_SYSTEM_INODE
    428		|| ino == LOCAL_GROUP_QUOTA_SYSTEM_INODE))
    429		return 0;
    430	return 1;
    431}
    432
    433static int ocfs2_init_global_system_inodes(struct ocfs2_super *osb)
    434{
    435	struct inode *new = NULL;
    436	int status = 0;
    437	int i;
    438
    439	new = ocfs2_iget(osb, osb->root_blkno, OCFS2_FI_FLAG_SYSFILE, 0);
    440	if (IS_ERR(new)) {
    441		status = PTR_ERR(new);
    442		mlog_errno(status);
    443		goto bail;
    444	}
    445	osb->root_inode = new;
    446
    447	new = ocfs2_iget(osb, osb->system_dir_blkno, OCFS2_FI_FLAG_SYSFILE, 0);
    448	if (IS_ERR(new)) {
    449		status = PTR_ERR(new);
    450		mlog_errno(status);
    451		goto bail;
    452	}
    453	osb->sys_root_inode = new;
    454
    455	for (i = OCFS2_FIRST_ONLINE_SYSTEM_INODE;
    456	     i <= OCFS2_LAST_GLOBAL_SYSTEM_INODE; i++) {
    457		if (!ocfs2_need_system_inode(osb, i))
    458			continue;
    459		new = ocfs2_get_system_file_inode(osb, i, osb->slot_num);
    460		if (!new) {
    461			ocfs2_release_system_inodes(osb);
    462			status = ocfs2_is_soft_readonly(osb) ? -EROFS : -EINVAL;
    463			mlog_errno(status);
    464			mlog(ML_ERROR, "Unable to load system inode %d, "
    465			     "possibly corrupt fs?", i);
    466			goto bail;
    467		}
    468		// the array now has one ref, so drop this one
    469		iput(new);
    470	}
    471
    472bail:
    473	if (status)
    474		mlog_errno(status);
    475	return status;
    476}
    477
    478static int ocfs2_init_local_system_inodes(struct ocfs2_super *osb)
    479{
    480	struct inode *new = NULL;
    481	int status = 0;
    482	int i;
    483
    484	for (i = OCFS2_LAST_GLOBAL_SYSTEM_INODE + 1;
    485	     i < NUM_SYSTEM_INODES;
    486	     i++) {
    487		if (!ocfs2_need_system_inode(osb, i))
    488			continue;
    489		new = ocfs2_get_system_file_inode(osb, i, osb->slot_num);
    490		if (!new) {
    491			ocfs2_release_system_inodes(osb);
    492			status = ocfs2_is_soft_readonly(osb) ? -EROFS : -EINVAL;
    493			mlog(ML_ERROR, "status=%d, sysfile=%d, slot=%d\n",
    494			     status, i, osb->slot_num);
    495			goto bail;
    496		}
    497		/* the array now has one ref, so drop this one */
    498		iput(new);
    499	}
    500
    501bail:
    502	if (status)
    503		mlog_errno(status);
    504	return status;
    505}
    506
    507static void ocfs2_release_system_inodes(struct ocfs2_super *osb)
    508{
    509	int i;
    510	struct inode *inode;
    511
    512	for (i = 0; i < NUM_GLOBAL_SYSTEM_INODES; i++) {
    513		inode = osb->global_system_inodes[i];
    514		if (inode) {
    515			iput(inode);
    516			osb->global_system_inodes[i] = NULL;
    517		}
    518	}
    519
    520	inode = osb->sys_root_inode;
    521	if (inode) {
    522		iput(inode);
    523		osb->sys_root_inode = NULL;
    524	}
    525
    526	inode = osb->root_inode;
    527	if (inode) {
    528		iput(inode);
    529		osb->root_inode = NULL;
    530	}
    531
    532	if (!osb->local_system_inodes)
    533		return;
    534
    535	for (i = 0; i < NUM_LOCAL_SYSTEM_INODES * osb->max_slots; i++) {
    536		if (osb->local_system_inodes[i]) {
    537			iput(osb->local_system_inodes[i]);
    538			osb->local_system_inodes[i] = NULL;
    539		}
    540	}
    541
    542	kfree(osb->local_system_inodes);
    543	osb->local_system_inodes = NULL;
    544}
    545
    546/* We're allocating fs objects, use GFP_NOFS */
    547static struct inode *ocfs2_alloc_inode(struct super_block *sb)
    548{
    549	struct ocfs2_inode_info *oi;
    550
    551	oi = alloc_inode_sb(sb, ocfs2_inode_cachep, GFP_NOFS);
    552	if (!oi)
    553		return NULL;
    554
    555	oi->i_sync_tid = 0;
    556	oi->i_datasync_tid = 0;
    557	memset(&oi->i_dquot, 0, sizeof(oi->i_dquot));
    558
    559	jbd2_journal_init_jbd_inode(&oi->ip_jinode, &oi->vfs_inode);
    560	return &oi->vfs_inode;
    561}
    562
    563static void ocfs2_free_inode(struct inode *inode)
    564{
    565	kmem_cache_free(ocfs2_inode_cachep, OCFS2_I(inode));
    566}
    567
    568static unsigned long long ocfs2_max_file_offset(unsigned int bbits,
    569						unsigned int cbits)
    570{
    571	unsigned int bytes = 1 << cbits;
    572	unsigned int trim = bytes;
    573	unsigned int bitshift = 32;
    574
    575	/*
    576	 * i_size and all block offsets in ocfs2 are always 64 bits
    577	 * wide. i_clusters is 32 bits, in cluster-sized units. So on
    578	 * 64 bit platforms, cluster size will be the limiting factor.
    579	 */
    580
    581#if BITS_PER_LONG == 32
    582	BUILD_BUG_ON(sizeof(sector_t) != 8);
    583	/*
    584	 * We might be limited by page cache size.
    585	 */
    586	if (bytes > PAGE_SIZE) {
    587		bytes = PAGE_SIZE;
    588		trim = 1;
    589		/*
    590		 * Shift by 31 here so that we don't get larger than
    591		 * MAX_LFS_FILESIZE
    592		 */
    593		bitshift = 31;
    594	}
    595#endif
    596
    597	/*
    598	 * Trim by a whole cluster when we can actually approach the
    599	 * on-disk limits. Otherwise we can overflow i_clusters when
    600	 * an extent start is at the max offset.
    601	 */
    602	return (((unsigned long long)bytes) << bitshift) - trim;
    603}
    604
    605static int ocfs2_remount(struct super_block *sb, int *flags, char *data)
    606{
    607	int incompat_features;
    608	int ret = 0;
    609	struct mount_options parsed_options;
    610	struct ocfs2_super *osb = OCFS2_SB(sb);
    611	u32 tmp;
    612
    613	sync_filesystem(sb);
    614
    615	if (!ocfs2_parse_options(sb, data, &parsed_options, 1) ||
    616	    !ocfs2_check_set_options(sb, &parsed_options)) {
    617		ret = -EINVAL;
    618		goto out;
    619	}
    620
    621	tmp = OCFS2_MOUNT_NOCLUSTER;
    622	if ((osb->s_mount_opt & tmp) != (parsed_options.mount_opt & tmp)) {
    623		ret = -EINVAL;
    624		mlog(ML_ERROR, "Cannot change nocluster option on remount\n");
    625		goto out;
    626	}
    627
    628	tmp = OCFS2_MOUNT_HB_LOCAL | OCFS2_MOUNT_HB_GLOBAL |
    629		OCFS2_MOUNT_HB_NONE;
    630	if ((osb->s_mount_opt & tmp) != (parsed_options.mount_opt & tmp)) {
    631		ret = -EINVAL;
    632		mlog(ML_ERROR, "Cannot change heartbeat mode on remount\n");
    633		goto out;
    634	}
    635
    636	if ((osb->s_mount_opt & OCFS2_MOUNT_DATA_WRITEBACK) !=
    637	    (parsed_options.mount_opt & OCFS2_MOUNT_DATA_WRITEBACK)) {
    638		ret = -EINVAL;
    639		mlog(ML_ERROR, "Cannot change data mode on remount\n");
    640		goto out;
    641	}
    642
    643	/* Probably don't want this on remount; it might
    644	 * mess with other nodes */
    645	if (!(osb->s_mount_opt & OCFS2_MOUNT_INODE64) &&
    646	    (parsed_options.mount_opt & OCFS2_MOUNT_INODE64)) {
    647		ret = -EINVAL;
    648		mlog(ML_ERROR, "Cannot enable inode64 on remount\n");
    649		goto out;
    650	}
    651
    652	/* We're going to/from readonly mode. */
    653	if ((bool)(*flags & SB_RDONLY) != sb_rdonly(sb)) {
    654		/* Disable quota accounting before remounting RO */
    655		if (*flags & SB_RDONLY) {
    656			ret = ocfs2_susp_quotas(osb, 0);
    657			if (ret < 0)
    658				goto out;
    659		}
    660		/* Lock here so the check of HARD_RO and the potential
    661		 * setting of SOFT_RO is atomic. */
    662		spin_lock(&osb->osb_lock);
    663		if (osb->osb_flags & OCFS2_OSB_HARD_RO) {
    664			mlog(ML_ERROR, "Remount on readonly device is forbidden.\n");
    665			ret = -EROFS;
    666			goto unlock_osb;
    667		}
    668
    669		if (*flags & SB_RDONLY) {
    670			sb->s_flags |= SB_RDONLY;
    671			osb->osb_flags |= OCFS2_OSB_SOFT_RO;
    672		} else {
    673			if (osb->osb_flags & OCFS2_OSB_ERROR_FS) {
    674				mlog(ML_ERROR, "Cannot remount RDWR "
    675				     "filesystem due to previous errors.\n");
    676				ret = -EROFS;
    677				goto unlock_osb;
    678			}
    679			incompat_features = OCFS2_HAS_RO_COMPAT_FEATURE(sb, ~OCFS2_FEATURE_RO_COMPAT_SUPP);
    680			if (incompat_features) {
    681				mlog(ML_ERROR, "Cannot remount RDWR because "
    682				     "of unsupported optional features "
    683				     "(%x).\n", incompat_features);
    684				ret = -EINVAL;
    685				goto unlock_osb;
    686			}
    687			sb->s_flags &= ~SB_RDONLY;
    688			osb->osb_flags &= ~OCFS2_OSB_SOFT_RO;
    689		}
    690		trace_ocfs2_remount(sb->s_flags, osb->osb_flags, *flags);
    691unlock_osb:
    692		spin_unlock(&osb->osb_lock);
    693		/* Enable quota accounting after remounting RW */
    694		if (!ret && !(*flags & SB_RDONLY)) {
    695			if (sb_any_quota_suspended(sb))
    696				ret = ocfs2_susp_quotas(osb, 1);
    697			else
    698				ret = ocfs2_enable_quotas(osb);
    699			if (ret < 0) {
    700				/* Return back changes... */
    701				spin_lock(&osb->osb_lock);
    702				sb->s_flags |= SB_RDONLY;
    703				osb->osb_flags |= OCFS2_OSB_SOFT_RO;
    704				spin_unlock(&osb->osb_lock);
    705				goto out;
    706			}
    707		}
    708	}
    709
    710	if (!ret) {
    711		/* Only save off the new mount options in case of a successful
    712		 * remount. */
    713		osb->s_mount_opt = parsed_options.mount_opt;
    714		osb->s_atime_quantum = parsed_options.atime_quantum;
    715		osb->preferred_slot = parsed_options.slot;
    716		if (parsed_options.commit_interval)
    717			osb->osb_commit_interval = parsed_options.commit_interval;
    718
    719		if (!ocfs2_is_hard_readonly(osb))
    720			ocfs2_set_journal_params(osb);
    721
    722		sb->s_flags = (sb->s_flags & ~SB_POSIXACL) |
    723			((osb->s_mount_opt & OCFS2_MOUNT_POSIX_ACL) ?
    724							SB_POSIXACL : 0);
    725	}
    726out:
    727	return ret;
    728}
    729
    730static int ocfs2_sb_probe(struct super_block *sb,
    731			  struct buffer_head **bh,
    732			  int *sector_size,
    733			  struct ocfs2_blockcheck_stats *stats)
    734{
    735	int status, tmpstat;
    736	struct ocfs1_vol_disk_hdr *hdr;
    737	struct ocfs2_dinode *di;
    738	int blksize;
    739
    740	*bh = NULL;
    741
    742	/* may be > 512 */
    743	*sector_size = bdev_logical_block_size(sb->s_bdev);
    744	if (*sector_size > OCFS2_MAX_BLOCKSIZE) {
    745		mlog(ML_ERROR, "Hardware sector size too large: %d (max=%d)\n",
    746		     *sector_size, OCFS2_MAX_BLOCKSIZE);
    747		status = -EINVAL;
    748		goto bail;
    749	}
    750
    751	/* Can this really happen? */
    752	if (*sector_size < OCFS2_MIN_BLOCKSIZE)
    753		*sector_size = OCFS2_MIN_BLOCKSIZE;
    754
    755	/* check block zero for old format */
    756	status = ocfs2_get_sector(sb, bh, 0, *sector_size);
    757	if (status < 0) {
    758		mlog_errno(status);
    759		goto bail;
    760	}
    761	hdr = (struct ocfs1_vol_disk_hdr *) (*bh)->b_data;
    762	if (hdr->major_version == OCFS1_MAJOR_VERSION) {
    763		mlog(ML_ERROR, "incompatible version: %u.%u\n",
    764		     hdr->major_version, hdr->minor_version);
    765		status = -EINVAL;
    766	}
    767	if (memcmp(hdr->signature, OCFS1_VOLUME_SIGNATURE,
    768		   strlen(OCFS1_VOLUME_SIGNATURE)) == 0) {
    769		mlog(ML_ERROR, "incompatible volume signature: %8s\n",
    770		     hdr->signature);
    771		status = -EINVAL;
    772	}
    773	brelse(*bh);
    774	*bh = NULL;
    775	if (status < 0) {
    776		mlog(ML_ERROR, "This is an ocfs v1 filesystem which must be "
    777		     "upgraded before mounting with ocfs v2\n");
    778		goto bail;
    779	}
    780
    781	/*
    782	 * Now check at magic offset for 512, 1024, 2048, 4096
    783	 * blocksizes.  4096 is the maximum blocksize because it is
    784	 * the minimum clustersize.
    785	 */
    786	status = -EINVAL;
    787	for (blksize = *sector_size;
    788	     blksize <= OCFS2_MAX_BLOCKSIZE;
    789	     blksize <<= 1) {
    790		tmpstat = ocfs2_get_sector(sb, bh,
    791					   OCFS2_SUPER_BLOCK_BLKNO,
    792					   blksize);
    793		if (tmpstat < 0) {
    794			status = tmpstat;
    795			mlog_errno(status);
    796			break;
    797		}
    798		di = (struct ocfs2_dinode *) (*bh)->b_data;
    799		memset(stats, 0, sizeof(struct ocfs2_blockcheck_stats));
    800		spin_lock_init(&stats->b_lock);
    801		tmpstat = ocfs2_verify_volume(di, *bh, blksize, stats);
    802		if (tmpstat < 0) {
    803			brelse(*bh);
    804			*bh = NULL;
    805		}
    806		if (tmpstat != -EAGAIN) {
    807			status = tmpstat;
    808			break;
    809		}
    810	}
    811
    812bail:
    813	return status;
    814}
    815
    816static int ocfs2_verify_heartbeat(struct ocfs2_super *osb)
    817{
    818	u32 hb_enabled = OCFS2_MOUNT_HB_LOCAL | OCFS2_MOUNT_HB_GLOBAL;
    819
    820	if (osb->s_mount_opt & hb_enabled) {
    821		if (ocfs2_mount_local(osb)) {
    822			mlog(ML_ERROR, "Cannot heartbeat on a locally "
    823			     "mounted device.\n");
    824			return -EINVAL;
    825		}
    826		if (ocfs2_userspace_stack(osb)) {
    827			mlog(ML_ERROR, "Userspace stack expected, but "
    828			     "o2cb heartbeat arguments passed to mount\n");
    829			return -EINVAL;
    830		}
    831		if (((osb->s_mount_opt & OCFS2_MOUNT_HB_GLOBAL) &&
    832		     !ocfs2_cluster_o2cb_global_heartbeat(osb)) ||
    833		    ((osb->s_mount_opt & OCFS2_MOUNT_HB_LOCAL) &&
    834		     ocfs2_cluster_o2cb_global_heartbeat(osb))) {
    835			mlog(ML_ERROR, "Mismatching o2cb heartbeat modes\n");
    836			return -EINVAL;
    837		}
    838	}
    839
    840	if (!(osb->s_mount_opt & hb_enabled)) {
    841		if (!ocfs2_mount_local(osb) && !ocfs2_is_hard_readonly(osb) &&
    842		    !ocfs2_userspace_stack(osb)) {
    843			mlog(ML_ERROR, "Heartbeat has to be started to mount "
    844			     "a read-write clustered device.\n");
    845			return -EINVAL;
    846		}
    847	}
    848
    849	return 0;
    850}
    851
    852/*
    853 * If we're using a userspace stack, mount should have passed
    854 * a name that matches the disk.  If not, mount should not
    855 * have passed a stack.
    856 */
    857static int ocfs2_verify_userspace_stack(struct ocfs2_super *osb,
    858					struct mount_options *mopt)
    859{
    860	if (!ocfs2_userspace_stack(osb) && mopt->cluster_stack[0]) {
    861		mlog(ML_ERROR,
    862		     "cluster stack passed to mount, but this filesystem "
    863		     "does not support it\n");
    864		return -EINVAL;
    865	}
    866
    867	if (ocfs2_userspace_stack(osb) &&
    868	    !(osb->s_mount_opt & OCFS2_MOUNT_NOCLUSTER) &&
    869	    strncmp(osb->osb_cluster_stack, mopt->cluster_stack,
    870		    OCFS2_STACK_LABEL_LEN)) {
    871		mlog(ML_ERROR,
    872		     "cluster stack passed to mount (\"%s\") does not "
    873		     "match the filesystem (\"%s\")\n",
    874		     mopt->cluster_stack,
    875		     osb->osb_cluster_stack);
    876		return -EINVAL;
    877	}
    878
    879	return 0;
    880}
    881
    882static int ocfs2_susp_quotas(struct ocfs2_super *osb, int unsuspend)
    883{
    884	int type;
    885	struct super_block *sb = osb->sb;
    886	unsigned int feature[OCFS2_MAXQUOTAS] = {
    887					OCFS2_FEATURE_RO_COMPAT_USRQUOTA,
    888					OCFS2_FEATURE_RO_COMPAT_GRPQUOTA};
    889	int status = 0;
    890
    891	for (type = 0; type < OCFS2_MAXQUOTAS; type++) {
    892		if (!OCFS2_HAS_RO_COMPAT_FEATURE(sb, feature[type]))
    893			continue;
    894		if (unsuspend)
    895			status = dquot_resume(sb, type);
    896		else {
    897			struct ocfs2_mem_dqinfo *oinfo;
    898
    899			/* Cancel periodic syncing before suspending */
    900			oinfo = sb_dqinfo(sb, type)->dqi_priv;
    901			cancel_delayed_work_sync(&oinfo->dqi_sync_work);
    902			status = dquot_suspend(sb, type);
    903		}
    904		if (status < 0)
    905			break;
    906	}
    907	if (status < 0)
    908		mlog(ML_ERROR, "Failed to suspend/unsuspend quotas on "
    909		     "remount (error = %d).\n", status);
    910	return status;
    911}
    912
    913static int ocfs2_enable_quotas(struct ocfs2_super *osb)
    914{
    915	struct inode *inode[OCFS2_MAXQUOTAS] = { NULL, NULL };
    916	struct super_block *sb = osb->sb;
    917	unsigned int feature[OCFS2_MAXQUOTAS] = {
    918					OCFS2_FEATURE_RO_COMPAT_USRQUOTA,
    919					OCFS2_FEATURE_RO_COMPAT_GRPQUOTA};
    920	unsigned int ino[OCFS2_MAXQUOTAS] = {
    921					LOCAL_USER_QUOTA_SYSTEM_INODE,
    922					LOCAL_GROUP_QUOTA_SYSTEM_INODE };
    923	int status;
    924	int type;
    925
    926	sb_dqopt(sb)->flags |= DQUOT_QUOTA_SYS_FILE | DQUOT_NEGATIVE_USAGE;
    927	for (type = 0; type < OCFS2_MAXQUOTAS; type++) {
    928		if (!OCFS2_HAS_RO_COMPAT_FEATURE(sb, feature[type]))
    929			continue;
    930		inode[type] = ocfs2_get_system_file_inode(osb, ino[type],
    931							osb->slot_num);
    932		if (!inode[type]) {
    933			status = -ENOENT;
    934			goto out_quota_off;
    935		}
    936		status = dquot_load_quota_inode(inode[type], type, QFMT_OCFS2,
    937						DQUOT_USAGE_ENABLED);
    938		if (status < 0)
    939			goto out_quota_off;
    940	}
    941
    942	for (type = 0; type < OCFS2_MAXQUOTAS; type++)
    943		iput(inode[type]);
    944	return 0;
    945out_quota_off:
    946	ocfs2_disable_quotas(osb);
    947	for (type = 0; type < OCFS2_MAXQUOTAS; type++)
    948		iput(inode[type]);
    949	mlog_errno(status);
    950	return status;
    951}
    952
    953static void ocfs2_disable_quotas(struct ocfs2_super *osb)
    954{
    955	int type;
    956	struct inode *inode;
    957	struct super_block *sb = osb->sb;
    958	struct ocfs2_mem_dqinfo *oinfo;
    959
    960	/* We mostly ignore errors in this function because there's not much
    961	 * we can do when we see them */
    962	for (type = 0; type < OCFS2_MAXQUOTAS; type++) {
    963		if (!sb_has_quota_loaded(sb, type))
    964			continue;
    965		oinfo = sb_dqinfo(sb, type)->dqi_priv;
    966		cancel_delayed_work_sync(&oinfo->dqi_sync_work);
    967		inode = igrab(sb->s_dquot.files[type]);
    968		/* Turn off quotas. This will remove all dquot structures from
    969		 * memory and so they will be automatically synced to global
    970		 * quota files */
    971		dquot_disable(sb, type, DQUOT_USAGE_ENABLED |
    972					DQUOT_LIMITS_ENABLED);
    973		iput(inode);
    974	}
    975}
    976
    977static int ocfs2_fill_super(struct super_block *sb, void *data, int silent)
    978{
    979	struct dentry *root;
    980	int status, sector_size;
    981	struct mount_options parsed_options;
    982	struct inode *inode = NULL;
    983	struct ocfs2_super *osb = NULL;
    984	struct buffer_head *bh = NULL;
    985	char nodestr[12];
    986	struct ocfs2_blockcheck_stats stats;
    987
    988	trace_ocfs2_fill_super(sb, data, silent);
    989
    990	if (!ocfs2_parse_options(sb, data, &parsed_options, 0)) {
    991		status = -EINVAL;
    992		goto out;
    993	}
    994
    995	/* probe for superblock */
    996	status = ocfs2_sb_probe(sb, &bh, &sector_size, &stats);
    997	if (status < 0) {
    998		mlog(ML_ERROR, "superblock probe failed!\n");
    999		goto out;
   1000	}
   1001
   1002	status = ocfs2_initialize_super(sb, bh, sector_size, &stats);
   1003	brelse(bh);
   1004	bh = NULL;
   1005	if (status < 0)
   1006		goto out;
   1007
   1008	osb = OCFS2_SB(sb);
   1009
   1010	if (!ocfs2_check_set_options(sb, &parsed_options)) {
   1011		status = -EINVAL;
   1012		goto out_super;
   1013	}
   1014	osb->s_mount_opt = parsed_options.mount_opt;
   1015	osb->s_atime_quantum = parsed_options.atime_quantum;
   1016	osb->preferred_slot = parsed_options.slot;
   1017	osb->osb_commit_interval = parsed_options.commit_interval;
   1018
   1019	ocfs2_la_set_sizes(osb, parsed_options.localalloc_opt);
   1020	osb->osb_resv_level = parsed_options.resv_level;
   1021	osb->osb_dir_resv_level = parsed_options.resv_level;
   1022	if (parsed_options.dir_resv_level == -1)
   1023		osb->osb_dir_resv_level = parsed_options.resv_level;
   1024	else
   1025		osb->osb_dir_resv_level = parsed_options.dir_resv_level;
   1026
   1027	status = ocfs2_verify_userspace_stack(osb, &parsed_options);
   1028	if (status)
   1029		goto out_super;
   1030
   1031	sb->s_magic = OCFS2_SUPER_MAGIC;
   1032
   1033	sb->s_flags = (sb->s_flags & ~(SB_POSIXACL | SB_NOSEC)) |
   1034		((osb->s_mount_opt & OCFS2_MOUNT_POSIX_ACL) ? SB_POSIXACL : 0);
   1035
   1036	/* Hard readonly mode only if: bdev_read_only, SB_RDONLY,
   1037	 * heartbeat=none */
   1038	if (bdev_read_only(sb->s_bdev)) {
   1039		if (!sb_rdonly(sb)) {
   1040			status = -EACCES;
   1041			mlog(ML_ERROR, "Readonly device detected but readonly "
   1042			     "mount was not specified.\n");
   1043			goto out_super;
   1044		}
   1045
   1046		/* You should not be able to start a local heartbeat
   1047		 * on a readonly device. */
   1048		if (osb->s_mount_opt & OCFS2_MOUNT_HB_LOCAL) {
   1049			status = -EROFS;
   1050			mlog(ML_ERROR, "Local heartbeat specified on readonly "
   1051			     "device.\n");
   1052			goto out_super;
   1053		}
   1054
   1055		status = ocfs2_check_journals_nolocks(osb);
   1056		if (status < 0) {
   1057			if (status == -EROFS)
   1058				mlog(ML_ERROR, "Recovery required on readonly "
   1059				     "file system, but write access is "
   1060				     "unavailable.\n");
   1061			goto out_super;
   1062		}
   1063
   1064		ocfs2_set_ro_flag(osb, 1);
   1065
   1066		printk(KERN_NOTICE "ocfs2: Readonly device (%s) detected. "
   1067		       "Cluster services will not be used for this mount. "
   1068		       "Recovery will be skipped.\n", osb->dev_str);
   1069	}
   1070
   1071	if (!ocfs2_is_hard_readonly(osb)) {
   1072		if (sb_rdonly(sb))
   1073			ocfs2_set_ro_flag(osb, 0);
   1074	}
   1075
   1076	status = ocfs2_verify_heartbeat(osb);
   1077	if (status < 0)
   1078		goto out_super;
   1079
   1080	osb->osb_debug_root = debugfs_create_dir(osb->uuid_str,
   1081						 ocfs2_debugfs_root);
   1082
   1083	debugfs_create_file("fs_state", S_IFREG|S_IRUSR, osb->osb_debug_root,
   1084			    osb, &ocfs2_osb_debug_fops);
   1085
   1086	if (ocfs2_meta_ecc(osb))
   1087		ocfs2_blockcheck_stats_debugfs_install( &osb->osb_ecc_stats,
   1088							osb->osb_debug_root);
   1089
   1090	status = ocfs2_mount_volume(sb);
   1091	if (status < 0)
   1092		goto out_debugfs;
   1093
   1094	if (osb->root_inode)
   1095		inode = igrab(osb->root_inode);
   1096
   1097	if (!inode) {
   1098		status = -EIO;
   1099		goto out_dismount;
   1100	}
   1101
   1102	osb->osb_dev_kset = kset_create_and_add(sb->s_id, NULL,
   1103						&ocfs2_kset->kobj);
   1104	if (!osb->osb_dev_kset) {
   1105		status = -ENOMEM;
   1106		mlog(ML_ERROR, "Unable to create device kset %s.\n", sb->s_id);
   1107		goto out_dismount;
   1108	}
   1109
   1110	/* Create filecheck sysfs related directories/files at
   1111	 * /sys/fs/ocfs2/<devname>/filecheck */
   1112	if (ocfs2_filecheck_create_sysfs(osb)) {
   1113		status = -ENOMEM;
   1114		mlog(ML_ERROR, "Unable to create filecheck sysfs directory at "
   1115			"/sys/fs/ocfs2/%s/filecheck.\n", sb->s_id);
   1116		goto out_dismount;
   1117	}
   1118
   1119	root = d_make_root(inode);
   1120	if (!root) {
   1121		status = -ENOMEM;
   1122		goto out_dismount;
   1123	}
   1124
   1125	sb->s_root = root;
   1126
   1127	ocfs2_complete_mount_recovery(osb);
   1128
   1129	if (ocfs2_mount_local(osb))
   1130		snprintf(nodestr, sizeof(nodestr), "local");
   1131	else
   1132		snprintf(nodestr, sizeof(nodestr), "%u", osb->node_num);
   1133
   1134	printk(KERN_INFO "ocfs2: Mounting device (%s) on (node %s, slot %d) "
   1135	       "with %s data mode.\n",
   1136	       osb->dev_str, nodestr, osb->slot_num,
   1137	       osb->s_mount_opt & OCFS2_MOUNT_DATA_WRITEBACK ? "writeback" :
   1138	       "ordered");
   1139
   1140	if ((osb->s_mount_opt & OCFS2_MOUNT_NOCLUSTER) &&
   1141	   !(osb->s_feature_incompat & OCFS2_FEATURE_INCOMPAT_LOCAL_MOUNT))
   1142		printk(KERN_NOTICE "ocfs2: The shared device (%s) is mounted "
   1143		       "without cluster aware mode.\n", osb->dev_str);
   1144
   1145	atomic_set(&osb->vol_state, VOLUME_MOUNTED);
   1146	wake_up(&osb->osb_mount_event);
   1147
   1148	/* Now we can initialize quotas because we can afford to wait
   1149	 * for cluster locks recovery now. That also means that truncation
   1150	 * log recovery can happen but that waits for proper quota setup */
   1151	if (!sb_rdonly(sb)) {
   1152		status = ocfs2_enable_quotas(osb);
   1153		if (status < 0) {
   1154			/* We have to err-out specially here because
   1155			 * s_root is already set */
   1156			mlog_errno(status);
   1157			atomic_set(&osb->vol_state, VOLUME_DISABLED);
   1158			wake_up(&osb->osb_mount_event);
   1159			return status;
   1160		}
   1161	}
   1162
   1163	ocfs2_complete_quota_recovery(osb);
   1164
   1165	/* Now we wake up again for processes waiting for quotas */
   1166	atomic_set(&osb->vol_state, VOLUME_MOUNTED_QUOTAS);
   1167	wake_up(&osb->osb_mount_event);
   1168
   1169	/* Start this when the mount is almost sure of being successful */
   1170	ocfs2_orphan_scan_start(osb);
   1171
   1172	return status;
   1173
   1174out_dismount:
   1175	atomic_set(&osb->vol_state, VOLUME_DISABLED);
   1176	wake_up(&osb->osb_mount_event);
   1177	ocfs2_dismount_volume(sb, 1);
   1178	goto out;
   1179
   1180out_debugfs:
   1181	debugfs_remove_recursive(osb->osb_debug_root);
   1182out_super:
   1183	ocfs2_release_system_inodes(osb);
   1184	kfree(osb->recovery_map);
   1185	ocfs2_delete_osb(osb);
   1186	kfree(osb);
   1187out:
   1188	mlog_errno(status);
   1189
   1190	return status;
   1191}
   1192
   1193static struct dentry *ocfs2_mount(struct file_system_type *fs_type,
   1194			int flags,
   1195			const char *dev_name,
   1196			void *data)
   1197{
   1198	return mount_bdev(fs_type, flags, dev_name, data, ocfs2_fill_super);
   1199}
   1200
   1201static struct file_system_type ocfs2_fs_type = {
   1202	.owner          = THIS_MODULE,
   1203	.name           = "ocfs2",
   1204	.mount          = ocfs2_mount,
   1205	.kill_sb        = kill_block_super,
   1206	.fs_flags       = FS_REQUIRES_DEV|FS_RENAME_DOES_D_MOVE,
   1207	.next           = NULL
   1208};
   1209MODULE_ALIAS_FS("ocfs2");
   1210
   1211static int ocfs2_check_set_options(struct super_block *sb,
   1212				   struct mount_options *options)
   1213{
   1214	if (options->mount_opt & OCFS2_MOUNT_USRQUOTA &&
   1215	    !OCFS2_HAS_RO_COMPAT_FEATURE(sb,
   1216					 OCFS2_FEATURE_RO_COMPAT_USRQUOTA)) {
   1217		mlog(ML_ERROR, "User quotas were requested, but this "
   1218		     "filesystem does not have the feature enabled.\n");
   1219		return 0;
   1220	}
   1221	if (options->mount_opt & OCFS2_MOUNT_GRPQUOTA &&
   1222	    !OCFS2_HAS_RO_COMPAT_FEATURE(sb,
   1223					 OCFS2_FEATURE_RO_COMPAT_GRPQUOTA)) {
   1224		mlog(ML_ERROR, "Group quotas were requested, but this "
   1225		     "filesystem does not have the feature enabled.\n");
   1226		return 0;
   1227	}
   1228	if (options->mount_opt & OCFS2_MOUNT_POSIX_ACL &&
   1229	    !OCFS2_HAS_INCOMPAT_FEATURE(sb, OCFS2_FEATURE_INCOMPAT_XATTR)) {
   1230		mlog(ML_ERROR, "ACL support requested but extended attributes "
   1231		     "feature is not enabled\n");
   1232		return 0;
   1233	}
   1234	/* No ACL setting specified? Use XATTR feature... */
   1235	if (!(options->mount_opt & (OCFS2_MOUNT_POSIX_ACL |
   1236				    OCFS2_MOUNT_NO_POSIX_ACL))) {
   1237		if (OCFS2_HAS_INCOMPAT_FEATURE(sb, OCFS2_FEATURE_INCOMPAT_XATTR))
   1238			options->mount_opt |= OCFS2_MOUNT_POSIX_ACL;
   1239		else
   1240			options->mount_opt |= OCFS2_MOUNT_NO_POSIX_ACL;
   1241	}
   1242	return 1;
   1243}
   1244
   1245static int ocfs2_parse_options(struct super_block *sb,
   1246			       char *options,
   1247			       struct mount_options *mopt,
   1248			       int is_remount)
   1249{
   1250	int status, user_stack = 0;
   1251	char *p;
   1252	u32 tmp;
   1253	int token, option;
   1254	substring_t args[MAX_OPT_ARGS];
   1255
   1256	trace_ocfs2_parse_options(is_remount, options ? options : "(none)");
   1257
   1258	mopt->commit_interval = 0;
   1259	mopt->mount_opt = OCFS2_MOUNT_NOINTR;
   1260	mopt->atime_quantum = OCFS2_DEFAULT_ATIME_QUANTUM;
   1261	mopt->slot = OCFS2_INVALID_SLOT;
   1262	mopt->localalloc_opt = -1;
   1263	mopt->cluster_stack[0] = '\0';
   1264	mopt->resv_level = OCFS2_DEFAULT_RESV_LEVEL;
   1265	mopt->dir_resv_level = -1;
   1266
   1267	if (!options) {
   1268		status = 1;
   1269		goto bail;
   1270	}
   1271
   1272	while ((p = strsep(&options, ",")) != NULL) {
   1273		if (!*p)
   1274			continue;
   1275
   1276		token = match_token(p, tokens, args);
   1277		switch (token) {
   1278		case Opt_hb_local:
   1279			mopt->mount_opt |= OCFS2_MOUNT_HB_LOCAL;
   1280			break;
   1281		case Opt_hb_none:
   1282			mopt->mount_opt |= OCFS2_MOUNT_HB_NONE;
   1283			break;
   1284		case Opt_hb_global:
   1285			mopt->mount_opt |= OCFS2_MOUNT_HB_GLOBAL;
   1286			break;
   1287		case Opt_barrier:
   1288			if (match_int(&args[0], &option)) {
   1289				status = 0;
   1290				goto bail;
   1291			}
   1292			if (option)
   1293				mopt->mount_opt |= OCFS2_MOUNT_BARRIER;
   1294			else
   1295				mopt->mount_opt &= ~OCFS2_MOUNT_BARRIER;
   1296			break;
   1297		case Opt_intr:
   1298			mopt->mount_opt &= ~OCFS2_MOUNT_NOINTR;
   1299			break;
   1300		case Opt_nointr:
   1301			mopt->mount_opt |= OCFS2_MOUNT_NOINTR;
   1302			break;
   1303		case Opt_err_panic:
   1304			mopt->mount_opt &= ~OCFS2_MOUNT_ERRORS_CONT;
   1305			mopt->mount_opt &= ~OCFS2_MOUNT_ERRORS_ROFS;
   1306			mopt->mount_opt |= OCFS2_MOUNT_ERRORS_PANIC;
   1307			break;
   1308		case Opt_err_ro:
   1309			mopt->mount_opt &= ~OCFS2_MOUNT_ERRORS_CONT;
   1310			mopt->mount_opt &= ~OCFS2_MOUNT_ERRORS_PANIC;
   1311			mopt->mount_opt |= OCFS2_MOUNT_ERRORS_ROFS;
   1312			break;
   1313		case Opt_err_cont:
   1314			mopt->mount_opt &= ~OCFS2_MOUNT_ERRORS_ROFS;
   1315			mopt->mount_opt &= ~OCFS2_MOUNT_ERRORS_PANIC;
   1316			mopt->mount_opt |= OCFS2_MOUNT_ERRORS_CONT;
   1317			break;
   1318		case Opt_data_ordered:
   1319			mopt->mount_opt &= ~OCFS2_MOUNT_DATA_WRITEBACK;
   1320			break;
   1321		case Opt_data_writeback:
   1322			mopt->mount_opt |= OCFS2_MOUNT_DATA_WRITEBACK;
   1323			break;
   1324		case Opt_user_xattr:
   1325			mopt->mount_opt &= ~OCFS2_MOUNT_NOUSERXATTR;
   1326			break;
   1327		case Opt_nouser_xattr:
   1328			mopt->mount_opt |= OCFS2_MOUNT_NOUSERXATTR;
   1329			break;
   1330		case Opt_atime_quantum:
   1331			if (match_int(&args[0], &option)) {
   1332				status = 0;
   1333				goto bail;
   1334			}
   1335			if (option >= 0)
   1336				mopt->atime_quantum = option;
   1337			break;
   1338		case Opt_slot:
   1339			if (match_int(&args[0], &option)) {
   1340				status = 0;
   1341				goto bail;
   1342			}
   1343			if (option)
   1344				mopt->slot = (u16)option;
   1345			break;
   1346		case Opt_commit:
   1347			if (match_int(&args[0], &option)) {
   1348				status = 0;
   1349				goto bail;
   1350			}
   1351			if (option < 0)
   1352				return 0;
   1353			if (option == 0)
   1354				option = JBD2_DEFAULT_MAX_COMMIT_AGE;
   1355			mopt->commit_interval = HZ * option;
   1356			break;
   1357		case Opt_localalloc:
   1358			if (match_int(&args[0], &option)) {
   1359				status = 0;
   1360				goto bail;
   1361			}
   1362			if (option >= 0)
   1363				mopt->localalloc_opt = option;
   1364			break;
   1365		case Opt_localflocks:
   1366			/*
   1367			 * Changing this during remount could race
   1368			 * flock() requests, or "unbalance" existing
   1369			 * ones (e.g., a lock is taken in one mode but
   1370			 * dropped in the other). If users care enough
   1371			 * to flip locking modes during remount, we
   1372			 * could add a "local" flag to individual
   1373			 * flock structures for proper tracking of
   1374			 * state.
   1375			 */
   1376			if (!is_remount)
   1377				mopt->mount_opt |= OCFS2_MOUNT_LOCALFLOCKS;
   1378			break;
   1379		case Opt_stack:
   1380			/* Check both that the option we were passed
   1381			 * is of the right length and that it is a proper
   1382			 * string of the right length.
   1383			 */
   1384			if (((args[0].to - args[0].from) !=
   1385			     OCFS2_STACK_LABEL_LEN) ||
   1386			    (strnlen(args[0].from,
   1387				     OCFS2_STACK_LABEL_LEN) !=
   1388			     OCFS2_STACK_LABEL_LEN)) {
   1389				mlog(ML_ERROR,
   1390				     "Invalid cluster_stack option\n");
   1391				status = 0;
   1392				goto bail;
   1393			}
   1394			memcpy(mopt->cluster_stack, args[0].from,
   1395			       OCFS2_STACK_LABEL_LEN);
   1396			mopt->cluster_stack[OCFS2_STACK_LABEL_LEN] = '\0';
   1397			/*
   1398			 * Open code the memcmp here as we don't have
   1399			 * an osb to pass to
   1400			 * ocfs2_userspace_stack().
   1401			 */
   1402			if (memcmp(mopt->cluster_stack,
   1403				   OCFS2_CLASSIC_CLUSTER_STACK,
   1404				   OCFS2_STACK_LABEL_LEN))
   1405				user_stack = 1;
   1406			break;
   1407		case Opt_inode64:
   1408			mopt->mount_opt |= OCFS2_MOUNT_INODE64;
   1409			break;
   1410		case Opt_usrquota:
   1411			mopt->mount_opt |= OCFS2_MOUNT_USRQUOTA;
   1412			break;
   1413		case Opt_grpquota:
   1414			mopt->mount_opt |= OCFS2_MOUNT_GRPQUOTA;
   1415			break;
   1416		case Opt_coherency_buffered:
   1417			mopt->mount_opt |= OCFS2_MOUNT_COHERENCY_BUFFERED;
   1418			break;
   1419		case Opt_coherency_full:
   1420			mopt->mount_opt &= ~OCFS2_MOUNT_COHERENCY_BUFFERED;
   1421			break;
   1422		case Opt_acl:
   1423			mopt->mount_opt |= OCFS2_MOUNT_POSIX_ACL;
   1424			mopt->mount_opt &= ~OCFS2_MOUNT_NO_POSIX_ACL;
   1425			break;
   1426		case Opt_noacl:
   1427			mopt->mount_opt |= OCFS2_MOUNT_NO_POSIX_ACL;
   1428			mopt->mount_opt &= ~OCFS2_MOUNT_POSIX_ACL;
   1429			break;
   1430		case Opt_resv_level:
   1431			if (is_remount)
   1432				break;
   1433			if (match_int(&args[0], &option)) {
   1434				status = 0;
   1435				goto bail;
   1436			}
   1437			if (option >= OCFS2_MIN_RESV_LEVEL &&
   1438			    option < OCFS2_MAX_RESV_LEVEL)
   1439				mopt->resv_level = option;
   1440			break;
   1441		case Opt_dir_resv_level:
   1442			if (is_remount)
   1443				break;
   1444			if (match_int(&args[0], &option)) {
   1445				status = 0;
   1446				goto bail;
   1447			}
   1448			if (option >= OCFS2_MIN_RESV_LEVEL &&
   1449			    option < OCFS2_MAX_RESV_LEVEL)
   1450				mopt->dir_resv_level = option;
   1451			break;
   1452		case Opt_journal_async_commit:
   1453			mopt->mount_opt |= OCFS2_MOUNT_JOURNAL_ASYNC_COMMIT;
   1454			break;
   1455		case Opt_nocluster:
   1456			mopt->mount_opt |= OCFS2_MOUNT_NOCLUSTER;
   1457			break;
   1458		default:
   1459			mlog(ML_ERROR,
   1460			     "Unrecognized mount option \"%s\" "
   1461			     "or missing value\n", p);
   1462			status = 0;
   1463			goto bail;
   1464		}
   1465	}
   1466
   1467	if (user_stack == 0) {
   1468		/* Ensure only one heartbeat mode */
   1469		tmp = mopt->mount_opt & (OCFS2_MOUNT_HB_LOCAL |
   1470					 OCFS2_MOUNT_HB_GLOBAL |
   1471					 OCFS2_MOUNT_HB_NONE);
   1472		if (hweight32(tmp) != 1) {
   1473			mlog(ML_ERROR, "Invalid heartbeat mount options\n");
   1474			status = 0;
   1475			goto bail;
   1476		}
   1477	}
   1478
   1479	status = 1;
   1480
   1481bail:
   1482	return status;
   1483}
   1484
   1485static int ocfs2_show_options(struct seq_file *s, struct dentry *root)
   1486{
   1487	struct ocfs2_super *osb = OCFS2_SB(root->d_sb);
   1488	unsigned long opts = osb->s_mount_opt;
   1489	unsigned int local_alloc_megs;
   1490
   1491	if (opts & (OCFS2_MOUNT_HB_LOCAL | OCFS2_MOUNT_HB_GLOBAL)) {
   1492		seq_printf(s, ",_netdev");
   1493		if (opts & OCFS2_MOUNT_HB_LOCAL)
   1494			seq_printf(s, ",%s", OCFS2_HB_LOCAL);
   1495		else
   1496			seq_printf(s, ",%s", OCFS2_HB_GLOBAL);
   1497	} else
   1498		seq_printf(s, ",%s", OCFS2_HB_NONE);
   1499
   1500	if (opts & OCFS2_MOUNT_NOINTR)
   1501		seq_printf(s, ",nointr");
   1502
   1503	if (opts & OCFS2_MOUNT_DATA_WRITEBACK)
   1504		seq_printf(s, ",data=writeback");
   1505	else
   1506		seq_printf(s, ",data=ordered");
   1507
   1508	if (opts & OCFS2_MOUNT_BARRIER)
   1509		seq_printf(s, ",barrier=1");
   1510
   1511	if (opts & OCFS2_MOUNT_ERRORS_PANIC)
   1512		seq_printf(s, ",errors=panic");
   1513	else if (opts & OCFS2_MOUNT_ERRORS_CONT)
   1514		seq_printf(s, ",errors=continue");
   1515	else
   1516		seq_printf(s, ",errors=remount-ro");
   1517
   1518	if (osb->preferred_slot != OCFS2_INVALID_SLOT)
   1519		seq_printf(s, ",preferred_slot=%d", osb->preferred_slot);
   1520
   1521	seq_printf(s, ",atime_quantum=%u", osb->s_atime_quantum);
   1522
   1523	if (osb->osb_commit_interval)
   1524		seq_printf(s, ",commit=%u",
   1525			   (unsigned) (osb->osb_commit_interval / HZ));
   1526
   1527	local_alloc_megs = osb->local_alloc_bits >> (20 - osb->s_clustersize_bits);
   1528	if (local_alloc_megs != ocfs2_la_default_mb(osb))
   1529		seq_printf(s, ",localalloc=%d", local_alloc_megs);
   1530
   1531	if (opts & OCFS2_MOUNT_LOCALFLOCKS)
   1532		seq_printf(s, ",localflocks,");
   1533
   1534	if (osb->osb_cluster_stack[0])
   1535		seq_show_option_n(s, "cluster_stack", osb->osb_cluster_stack,
   1536				  OCFS2_STACK_LABEL_LEN);
   1537	if (opts & OCFS2_MOUNT_USRQUOTA)
   1538		seq_printf(s, ",usrquota");
   1539	if (opts & OCFS2_MOUNT_GRPQUOTA)
   1540		seq_printf(s, ",grpquota");
   1541
   1542	if (opts & OCFS2_MOUNT_COHERENCY_BUFFERED)
   1543		seq_printf(s, ",coherency=buffered");
   1544	else
   1545		seq_printf(s, ",coherency=full");
   1546
   1547	if (opts & OCFS2_MOUNT_NOUSERXATTR)
   1548		seq_printf(s, ",nouser_xattr");
   1549	else
   1550		seq_printf(s, ",user_xattr");
   1551
   1552	if (opts & OCFS2_MOUNT_INODE64)
   1553		seq_printf(s, ",inode64");
   1554
   1555	if (opts & OCFS2_MOUNT_POSIX_ACL)
   1556		seq_printf(s, ",acl");
   1557	else
   1558		seq_printf(s, ",noacl");
   1559
   1560	if (osb->osb_resv_level != OCFS2_DEFAULT_RESV_LEVEL)
   1561		seq_printf(s, ",resv_level=%d", osb->osb_resv_level);
   1562
   1563	if (osb->osb_dir_resv_level != osb->osb_resv_level)
   1564		seq_printf(s, ",dir_resv_level=%d", osb->osb_resv_level);
   1565
   1566	if (opts & OCFS2_MOUNT_JOURNAL_ASYNC_COMMIT)
   1567		seq_printf(s, ",journal_async_commit");
   1568
   1569	if (opts & OCFS2_MOUNT_NOCLUSTER)
   1570		seq_printf(s, ",nocluster");
   1571
   1572	return 0;
   1573}
   1574
   1575static int __init ocfs2_init(void)
   1576{
   1577	int status;
   1578
   1579	status = init_ocfs2_uptodate_cache();
   1580	if (status < 0)
   1581		goto out1;
   1582
   1583	status = ocfs2_initialize_mem_caches();
   1584	if (status < 0)
   1585		goto out2;
   1586
   1587	ocfs2_debugfs_root = debugfs_create_dir("ocfs2", NULL);
   1588
   1589	ocfs2_set_locking_protocol();
   1590
   1591	status = register_quota_format(&ocfs2_quota_format);
   1592	if (status < 0)
   1593		goto out3;
   1594	status = register_filesystem(&ocfs2_fs_type);
   1595	if (!status)
   1596		return 0;
   1597
   1598	unregister_quota_format(&ocfs2_quota_format);
   1599out3:
   1600	debugfs_remove(ocfs2_debugfs_root);
   1601	ocfs2_free_mem_caches();
   1602out2:
   1603	exit_ocfs2_uptodate_cache();
   1604out1:
   1605	mlog_errno(status);
   1606	return status;
   1607}
   1608
   1609static void __exit ocfs2_exit(void)
   1610{
   1611	unregister_quota_format(&ocfs2_quota_format);
   1612
   1613	debugfs_remove(ocfs2_debugfs_root);
   1614
   1615	ocfs2_free_mem_caches();
   1616
   1617	unregister_filesystem(&ocfs2_fs_type);
   1618
   1619	exit_ocfs2_uptodate_cache();
   1620}
   1621
   1622static void ocfs2_put_super(struct super_block *sb)
   1623{
   1624	trace_ocfs2_put_super(sb);
   1625
   1626	ocfs2_sync_blockdev(sb);
   1627	ocfs2_dismount_volume(sb, 0);
   1628}
   1629
   1630static int ocfs2_statfs(struct dentry *dentry, struct kstatfs *buf)
   1631{
   1632	struct ocfs2_super *osb;
   1633	u32 numbits, freebits;
   1634	int status;
   1635	struct ocfs2_dinode *bm_lock;
   1636	struct buffer_head *bh = NULL;
   1637	struct inode *inode = NULL;
   1638
   1639	trace_ocfs2_statfs(dentry->d_sb, buf);
   1640
   1641	osb = OCFS2_SB(dentry->d_sb);
   1642
   1643	inode = ocfs2_get_system_file_inode(osb,
   1644					    GLOBAL_BITMAP_SYSTEM_INODE,
   1645					    OCFS2_INVALID_SLOT);
   1646	if (!inode) {
   1647		mlog(ML_ERROR, "failed to get bitmap inode\n");
   1648		status = -EIO;
   1649		goto bail;
   1650	}
   1651
   1652	status = ocfs2_inode_lock(inode, &bh, 0);
   1653	if (status < 0) {
   1654		mlog_errno(status);
   1655		goto bail;
   1656	}
   1657
   1658	bm_lock = (struct ocfs2_dinode *) bh->b_data;
   1659
   1660	numbits = le32_to_cpu(bm_lock->id1.bitmap1.i_total);
   1661	freebits = numbits - le32_to_cpu(bm_lock->id1.bitmap1.i_used);
   1662
   1663	buf->f_type = OCFS2_SUPER_MAGIC;
   1664	buf->f_bsize = dentry->d_sb->s_blocksize;
   1665	buf->f_namelen = OCFS2_MAX_FILENAME_LEN;
   1666	buf->f_blocks = ((sector_t) numbits) *
   1667			(osb->s_clustersize >> osb->sb->s_blocksize_bits);
   1668	buf->f_bfree = ((sector_t) freebits) *
   1669		       (osb->s_clustersize >> osb->sb->s_blocksize_bits);
   1670	buf->f_bavail = buf->f_bfree;
   1671	buf->f_files = numbits;
   1672	buf->f_ffree = freebits;
   1673	buf->f_fsid.val[0] = crc32_le(0, osb->uuid_str, OCFS2_VOL_UUID_LEN)
   1674				& 0xFFFFFFFFUL;
   1675	buf->f_fsid.val[1] = crc32_le(0, osb->uuid_str + OCFS2_VOL_UUID_LEN,
   1676				OCFS2_VOL_UUID_LEN) & 0xFFFFFFFFUL;
   1677
   1678	brelse(bh);
   1679
   1680	ocfs2_inode_unlock(inode, 0);
   1681	status = 0;
   1682bail:
   1683	iput(inode);
   1684
   1685	if (status)
   1686		mlog_errno(status);
   1687
   1688	return status;
   1689}
   1690
   1691static void ocfs2_inode_init_once(void *data)
   1692{
   1693	struct ocfs2_inode_info *oi = data;
   1694
   1695	oi->ip_flags = 0;
   1696	oi->ip_open_count = 0;
   1697	spin_lock_init(&oi->ip_lock);
   1698	ocfs2_extent_map_init(&oi->vfs_inode);
   1699	INIT_LIST_HEAD(&oi->ip_io_markers);
   1700	INIT_LIST_HEAD(&oi->ip_unwritten_list);
   1701	oi->ip_dir_start_lookup = 0;
   1702	init_rwsem(&oi->ip_alloc_sem);
   1703	init_rwsem(&oi->ip_xattr_sem);
   1704	mutex_init(&oi->ip_io_mutex);
   1705
   1706	oi->ip_blkno = 0ULL;
   1707	oi->ip_clusters = 0;
   1708	oi->ip_next_orphan = NULL;
   1709
   1710	ocfs2_resv_init_once(&oi->ip_la_data_resv);
   1711
   1712	ocfs2_lock_res_init_once(&oi->ip_rw_lockres);
   1713	ocfs2_lock_res_init_once(&oi->ip_inode_lockres);
   1714	ocfs2_lock_res_init_once(&oi->ip_open_lockres);
   1715
   1716	ocfs2_metadata_cache_init(INODE_CACHE(&oi->vfs_inode),
   1717				  &ocfs2_inode_caching_ops);
   1718
   1719	inode_init_once(&oi->vfs_inode);
   1720}
   1721
   1722static int ocfs2_initialize_mem_caches(void)
   1723{
   1724	ocfs2_inode_cachep = kmem_cache_create("ocfs2_inode_cache",
   1725				       sizeof(struct ocfs2_inode_info),
   1726				       0,
   1727				       (SLAB_HWCACHE_ALIGN|SLAB_RECLAIM_ACCOUNT|
   1728						SLAB_MEM_SPREAD|SLAB_ACCOUNT),
   1729				       ocfs2_inode_init_once);
   1730	ocfs2_dquot_cachep = kmem_cache_create("ocfs2_dquot_cache",
   1731					sizeof(struct ocfs2_dquot),
   1732					0,
   1733					(SLAB_HWCACHE_ALIGN|SLAB_RECLAIM_ACCOUNT|
   1734						SLAB_MEM_SPREAD),
   1735					NULL);
   1736	ocfs2_qf_chunk_cachep = kmem_cache_create("ocfs2_qf_chunk_cache",
   1737					sizeof(struct ocfs2_quota_chunk),
   1738					0,
   1739					(SLAB_RECLAIM_ACCOUNT|SLAB_MEM_SPREAD),
   1740					NULL);
   1741	if (!ocfs2_inode_cachep || !ocfs2_dquot_cachep ||
   1742	    !ocfs2_qf_chunk_cachep) {
   1743		kmem_cache_destroy(ocfs2_inode_cachep);
   1744		kmem_cache_destroy(ocfs2_dquot_cachep);
   1745		kmem_cache_destroy(ocfs2_qf_chunk_cachep);
   1746		return -ENOMEM;
   1747	}
   1748
   1749	return 0;
   1750}
   1751
   1752static void ocfs2_free_mem_caches(void)
   1753{
   1754	/*
   1755	 * Make sure all delayed rcu free inodes are flushed before we
   1756	 * destroy cache.
   1757	 */
   1758	rcu_barrier();
   1759	kmem_cache_destroy(ocfs2_inode_cachep);
   1760	ocfs2_inode_cachep = NULL;
   1761
   1762	kmem_cache_destroy(ocfs2_dquot_cachep);
   1763	ocfs2_dquot_cachep = NULL;
   1764
   1765	kmem_cache_destroy(ocfs2_qf_chunk_cachep);
   1766	ocfs2_qf_chunk_cachep = NULL;
   1767}
   1768
   1769static int ocfs2_get_sector(struct super_block *sb,
   1770			    struct buffer_head **bh,
   1771			    int block,
   1772			    int sect_size)
   1773{
   1774	if (!sb_set_blocksize(sb, sect_size)) {
   1775		mlog(ML_ERROR, "unable to set blocksize\n");
   1776		return -EIO;
   1777	}
   1778
   1779	*bh = sb_getblk(sb, block);
   1780	if (!*bh) {
   1781		mlog_errno(-ENOMEM);
   1782		return -ENOMEM;
   1783	}
   1784	lock_buffer(*bh);
   1785	if (!buffer_dirty(*bh))
   1786		clear_buffer_uptodate(*bh);
   1787	unlock_buffer(*bh);
   1788	ll_rw_block(REQ_OP_READ, 0, 1, bh);
   1789	wait_on_buffer(*bh);
   1790	if (!buffer_uptodate(*bh)) {
   1791		mlog_errno(-EIO);
   1792		brelse(*bh);
   1793		*bh = NULL;
   1794		return -EIO;
   1795	}
   1796
   1797	return 0;
   1798}
   1799
   1800static int ocfs2_mount_volume(struct super_block *sb)
   1801{
   1802	int status = 0;
   1803	struct ocfs2_super *osb = OCFS2_SB(sb);
   1804
   1805	if (ocfs2_is_hard_readonly(osb))
   1806		goto out;
   1807
   1808	mutex_init(&osb->obs_trim_fs_mutex);
   1809
   1810	status = ocfs2_dlm_init(osb);
   1811	if (status < 0) {
   1812		mlog_errno(status);
   1813		if (status == -EBADR && ocfs2_userspace_stack(osb))
   1814			mlog(ML_ERROR, "couldn't mount because cluster name on"
   1815			" disk does not match the running cluster name.\n");
   1816		goto out;
   1817	}
   1818
   1819	status = ocfs2_super_lock(osb, 1);
   1820	if (status < 0) {
   1821		mlog_errno(status);
   1822		goto out_dlm;
   1823	}
   1824
   1825	/* This will load up the node map and add ourselves to it. */
   1826	status = ocfs2_find_slot(osb);
   1827	if (status < 0) {
   1828		mlog_errno(status);
   1829		goto out_super_lock;
   1830	}
   1831
   1832	/* load all node-local system inodes */
   1833	status = ocfs2_init_local_system_inodes(osb);
   1834	if (status < 0) {
   1835		mlog_errno(status);
   1836		goto out_super_lock;
   1837	}
   1838
   1839	status = ocfs2_check_volume(osb);
   1840	if (status < 0) {
   1841		mlog_errno(status);
   1842		goto out_system_inodes;
   1843	}
   1844
   1845	status = ocfs2_truncate_log_init(osb);
   1846	if (status < 0) {
   1847		mlog_errno(status);
   1848		goto out_system_inodes;
   1849	}
   1850
   1851	ocfs2_super_unlock(osb, 1);
   1852	return 0;
   1853
   1854out_system_inodes:
   1855	if (osb->local_alloc_state == OCFS2_LA_ENABLED)
   1856		ocfs2_shutdown_local_alloc(osb);
   1857	ocfs2_release_system_inodes(osb);
   1858	/* before journal shutdown, we should release slot_info */
   1859	ocfs2_free_slot_info(osb);
   1860	ocfs2_journal_shutdown(osb);
   1861out_super_lock:
   1862	ocfs2_super_unlock(osb, 1);
   1863out_dlm:
   1864	ocfs2_dlm_shutdown(osb, 0);
   1865out:
   1866	return status;
   1867}
   1868
   1869static void ocfs2_dismount_volume(struct super_block *sb, int mnt_err)
   1870{
   1871	int tmp, hangup_needed = 0;
   1872	struct ocfs2_super *osb = NULL;
   1873	char nodestr[12];
   1874
   1875	trace_ocfs2_dismount_volume(sb);
   1876
   1877	BUG_ON(!sb);
   1878	osb = OCFS2_SB(sb);
   1879	BUG_ON(!osb);
   1880
   1881	/* Remove file check sysfs related directores/files,
   1882	 * and wait for the pending file check operations */
   1883	ocfs2_filecheck_remove_sysfs(osb);
   1884
   1885	kset_unregister(osb->osb_dev_kset);
   1886
   1887	/* Orphan scan should be stopped as early as possible */
   1888	ocfs2_orphan_scan_stop(osb);
   1889
   1890	ocfs2_disable_quotas(osb);
   1891
   1892	/* All dquots should be freed by now */
   1893	WARN_ON(!llist_empty(&osb->dquot_drop_list));
   1894	/* Wait for worker to be done with the work structure in osb */
   1895	cancel_work_sync(&osb->dquot_drop_work);
   1896
   1897	ocfs2_shutdown_local_alloc(osb);
   1898
   1899	ocfs2_truncate_log_shutdown(osb);
   1900
   1901	/* This will disable recovery and flush any recovery work. */
   1902	ocfs2_recovery_exit(osb);
   1903
   1904	ocfs2_sync_blockdev(sb);
   1905
   1906	ocfs2_purge_refcount_trees(osb);
   1907
   1908	/* No cluster connection means we've failed during mount, so skip
   1909	 * all the steps which depended on that to complete. */
   1910	if (osb->cconn) {
   1911		tmp = ocfs2_super_lock(osb, 1);
   1912		if (tmp < 0) {
   1913			mlog_errno(tmp);
   1914			return;
   1915		}
   1916	}
   1917
   1918	if (osb->slot_num != OCFS2_INVALID_SLOT)
   1919		ocfs2_put_slot(osb);
   1920
   1921	if (osb->cconn)
   1922		ocfs2_super_unlock(osb, 1);
   1923
   1924	ocfs2_release_system_inodes(osb);
   1925
   1926	ocfs2_journal_shutdown(osb);
   1927
   1928	/*
   1929	 * If we're dismounting due to mount error, mount.ocfs2 will clean
   1930	 * up heartbeat.  If we're a local mount, there is no heartbeat.
   1931	 * If we failed before we got a uuid_str yet, we can't stop
   1932	 * heartbeat.  Otherwise, do it.
   1933	 */
   1934	if (!mnt_err && !ocfs2_mount_local(osb) && osb->uuid_str &&
   1935	    !ocfs2_is_hard_readonly(osb))
   1936		hangup_needed = 1;
   1937
   1938	if (osb->cconn)
   1939		ocfs2_dlm_shutdown(osb, hangup_needed);
   1940
   1941	ocfs2_blockcheck_stats_debugfs_remove(&osb->osb_ecc_stats);
   1942	debugfs_remove_recursive(osb->osb_debug_root);
   1943
   1944	if (hangup_needed)
   1945		ocfs2_cluster_hangup(osb->uuid_str, strlen(osb->uuid_str));
   1946
   1947	atomic_set(&osb->vol_state, VOLUME_DISMOUNTED);
   1948
   1949	if (ocfs2_mount_local(osb))
   1950		snprintf(nodestr, sizeof(nodestr), "local");
   1951	else
   1952		snprintf(nodestr, sizeof(nodestr), "%u", osb->node_num);
   1953
   1954	printk(KERN_INFO "ocfs2: Unmounting device (%s) on (node %s)\n",
   1955	       osb->dev_str, nodestr);
   1956
   1957	ocfs2_delete_osb(osb);
   1958	kfree(osb);
   1959	sb->s_dev = 0;
   1960	sb->s_fs_info = NULL;
   1961}
   1962
   1963static int ocfs2_setup_osb_uuid(struct ocfs2_super *osb, const unsigned char *uuid,
   1964				unsigned uuid_bytes)
   1965{
   1966	int i, ret;
   1967	char *ptr;
   1968
   1969	BUG_ON(uuid_bytes != OCFS2_VOL_UUID_LEN);
   1970
   1971	osb->uuid_str = kzalloc(OCFS2_VOL_UUID_LEN * 2 + 1, GFP_KERNEL);
   1972	if (osb->uuid_str == NULL)
   1973		return -ENOMEM;
   1974
   1975	for (i = 0, ptr = osb->uuid_str; i < OCFS2_VOL_UUID_LEN; i++) {
   1976		/* print with null */
   1977		ret = snprintf(ptr, 3, "%02X", uuid[i]);
   1978		if (ret != 2) /* drop super cleans up */
   1979			return -EINVAL;
   1980		/* then only advance past the last char */
   1981		ptr += 2;
   1982	}
   1983
   1984	return 0;
   1985}
   1986
   1987/* Make sure entire volume is addressable by our journal.  Requires
   1988   osb_clusters_at_boot to be valid and for the journal to have been
   1989   initialized by ocfs2_journal_init(). */
   1990static int ocfs2_journal_addressable(struct ocfs2_super *osb)
   1991{
   1992	int status = 0;
   1993	u64 max_block =
   1994		ocfs2_clusters_to_blocks(osb->sb,
   1995					 osb->osb_clusters_at_boot) - 1;
   1996
   1997	/* 32-bit block number is always OK. */
   1998	if (max_block <= (u32)~0ULL)
   1999		goto out;
   2000
   2001	/* Volume is "huge", so see if our journal is new enough to
   2002	   support it. */
   2003	if (!(OCFS2_HAS_COMPAT_FEATURE(osb->sb,
   2004				       OCFS2_FEATURE_COMPAT_JBD2_SB) &&
   2005	      jbd2_journal_check_used_features(osb->journal->j_journal, 0, 0,
   2006					       JBD2_FEATURE_INCOMPAT_64BIT))) {
   2007		mlog(ML_ERROR, "The journal cannot address the entire volume. "
   2008		     "Enable the 'block64' journal option with tunefs.ocfs2");
   2009		status = -EFBIG;
   2010		goto out;
   2011	}
   2012
   2013 out:
   2014	return status;
   2015}
   2016
   2017static int ocfs2_initialize_super(struct super_block *sb,
   2018				  struct buffer_head *bh,
   2019				  int sector_size,
   2020				  struct ocfs2_blockcheck_stats *stats)
   2021{
   2022	int status;
   2023	int i, cbits, bbits;
   2024	struct ocfs2_dinode *di = (struct ocfs2_dinode *)bh->b_data;
   2025	struct inode *inode = NULL;
   2026	struct ocfs2_super *osb;
   2027	u64 total_blocks;
   2028
   2029	osb = kzalloc(sizeof(struct ocfs2_super), GFP_KERNEL);
   2030	if (!osb) {
   2031		status = -ENOMEM;
   2032		mlog_errno(status);
   2033		goto out;
   2034	}
   2035
   2036	sb->s_fs_info = osb;
   2037	sb->s_op = &ocfs2_sops;
   2038	sb->s_d_op = &ocfs2_dentry_ops;
   2039	sb->s_export_op = &ocfs2_export_ops;
   2040	sb->s_qcop = &dquot_quotactl_sysfile_ops;
   2041	sb->dq_op = &ocfs2_quota_operations;
   2042	sb->s_quota_types = QTYPE_MASK_USR | QTYPE_MASK_GRP;
   2043	sb->s_xattr = ocfs2_xattr_handlers;
   2044	sb->s_time_gran = 1;
   2045	sb->s_flags |= SB_NOATIME;
   2046	/* this is needed to support O_LARGEFILE */
   2047	cbits = le32_to_cpu(di->id2.i_super.s_clustersize_bits);
   2048	bbits = le32_to_cpu(di->id2.i_super.s_blocksize_bits);
   2049	sb->s_maxbytes = ocfs2_max_file_offset(bbits, cbits);
   2050	memcpy(&sb->s_uuid, di->id2.i_super.s_uuid,
   2051	       sizeof(di->id2.i_super.s_uuid));
   2052
   2053	osb->osb_dx_mask = (1 << (cbits - bbits)) - 1;
   2054
   2055	for (i = 0; i < 3; i++)
   2056		osb->osb_dx_seed[i] = le32_to_cpu(di->id2.i_super.s_dx_seed[i]);
   2057	osb->osb_dx_seed[3] = le32_to_cpu(di->id2.i_super.s_uuid_hash);
   2058
   2059	osb->sb = sb;
   2060	osb->s_sectsize_bits = blksize_bits(sector_size);
   2061	BUG_ON(!osb->s_sectsize_bits);
   2062
   2063	spin_lock_init(&osb->dc_task_lock);
   2064	init_waitqueue_head(&osb->dc_event);
   2065	osb->dc_work_sequence = 0;
   2066	osb->dc_wake_sequence = 0;
   2067	INIT_LIST_HEAD(&osb->blocked_lock_list);
   2068	osb->blocked_lock_count = 0;
   2069	spin_lock_init(&osb->osb_lock);
   2070	spin_lock_init(&osb->osb_xattr_lock);
   2071	ocfs2_init_steal_slots(osb);
   2072
   2073	mutex_init(&osb->system_file_mutex);
   2074
   2075	atomic_set(&osb->alloc_stats.moves, 0);
   2076	atomic_set(&osb->alloc_stats.local_data, 0);
   2077	atomic_set(&osb->alloc_stats.bitmap_data, 0);
   2078	atomic_set(&osb->alloc_stats.bg_allocs, 0);
   2079	atomic_set(&osb->alloc_stats.bg_extends, 0);
   2080
   2081	/* Copy the blockcheck stats from the superblock probe */
   2082	osb->osb_ecc_stats = *stats;
   2083
   2084	ocfs2_init_node_maps(osb);
   2085
   2086	snprintf(osb->dev_str, sizeof(osb->dev_str), "%u,%u",
   2087		 MAJOR(osb->sb->s_dev), MINOR(osb->sb->s_dev));
   2088
   2089	osb->max_slots = le16_to_cpu(di->id2.i_super.s_max_slots);
   2090	if (osb->max_slots > OCFS2_MAX_SLOTS || osb->max_slots == 0) {
   2091		mlog(ML_ERROR, "Invalid number of node slots (%u)\n",
   2092		     osb->max_slots);
   2093		status = -EINVAL;
   2094		goto out;
   2095	}
   2096
   2097	ocfs2_orphan_scan_init(osb);
   2098
   2099	status = ocfs2_recovery_init(osb);
   2100	if (status) {
   2101		mlog(ML_ERROR, "Unable to initialize recovery state\n");
   2102		mlog_errno(status);
   2103		goto out;
   2104	}
   2105
   2106	init_waitqueue_head(&osb->checkpoint_event);
   2107
   2108	osb->s_atime_quantum = OCFS2_DEFAULT_ATIME_QUANTUM;
   2109
   2110	osb->slot_num = OCFS2_INVALID_SLOT;
   2111
   2112	osb->s_xattr_inline_size = le16_to_cpu(
   2113					di->id2.i_super.s_xattr_inline_size);
   2114
   2115	osb->local_alloc_state = OCFS2_LA_UNUSED;
   2116	osb->local_alloc_bh = NULL;
   2117	INIT_DELAYED_WORK(&osb->la_enable_wq, ocfs2_la_enable_worker);
   2118
   2119	init_waitqueue_head(&osb->osb_mount_event);
   2120
   2121	ocfs2_resmap_init(osb, &osb->osb_la_resmap);
   2122
   2123	osb->vol_label = kmalloc(OCFS2_MAX_VOL_LABEL_LEN, GFP_KERNEL);
   2124	if (!osb->vol_label) {
   2125		mlog(ML_ERROR, "unable to alloc vol label\n");
   2126		status = -ENOMEM;
   2127		goto out_recovery_map;
   2128	}
   2129
   2130	osb->slot_recovery_generations =
   2131		kcalloc(osb->max_slots, sizeof(*osb->slot_recovery_generations),
   2132			GFP_KERNEL);
   2133	if (!osb->slot_recovery_generations) {
   2134		status = -ENOMEM;
   2135		mlog_errno(status);
   2136		goto out_vol_label;
   2137	}
   2138
   2139	init_waitqueue_head(&osb->osb_wipe_event);
   2140	osb->osb_orphan_wipes = kcalloc(osb->max_slots,
   2141					sizeof(*osb->osb_orphan_wipes),
   2142					GFP_KERNEL);
   2143	if (!osb->osb_orphan_wipes) {
   2144		status = -ENOMEM;
   2145		mlog_errno(status);
   2146		goto out_slot_recovery_gen;
   2147	}
   2148
   2149	osb->osb_rf_lock_tree = RB_ROOT;
   2150
   2151	osb->s_feature_compat =
   2152		le32_to_cpu(OCFS2_RAW_SB(di)->s_feature_compat);
   2153	osb->s_feature_ro_compat =
   2154		le32_to_cpu(OCFS2_RAW_SB(di)->s_feature_ro_compat);
   2155	osb->s_feature_incompat =
   2156		le32_to_cpu(OCFS2_RAW_SB(di)->s_feature_incompat);
   2157
   2158	if ((i = OCFS2_HAS_INCOMPAT_FEATURE(osb->sb, ~OCFS2_FEATURE_INCOMPAT_SUPP))) {
   2159		mlog(ML_ERROR, "couldn't mount because of unsupported "
   2160		     "optional features (%x).\n", i);
   2161		status = -EINVAL;
   2162		goto out_orphan_wipes;
   2163	}
   2164	if (!sb_rdonly(osb->sb) && (i = OCFS2_HAS_RO_COMPAT_FEATURE(osb->sb, ~OCFS2_FEATURE_RO_COMPAT_SUPP))) {
   2165		mlog(ML_ERROR, "couldn't mount RDWR because of "
   2166		     "unsupported optional features (%x).\n", i);
   2167		status = -EINVAL;
   2168		goto out_orphan_wipes;
   2169	}
   2170
   2171	if (ocfs2_clusterinfo_valid(osb)) {
   2172		/*
   2173		 * ci_stack and ci_cluster in ocfs2_cluster_info may not be null
   2174		 * terminated, so make sure no overflow happens here by using
   2175		 * memcpy. Destination strings will always be null terminated
   2176		 * because osb is allocated using kzalloc.
   2177		 */
   2178		osb->osb_stackflags =
   2179			OCFS2_RAW_SB(di)->s_cluster_info.ci_stackflags;
   2180		memcpy(osb->osb_cluster_stack,
   2181		       OCFS2_RAW_SB(di)->s_cluster_info.ci_stack,
   2182		       OCFS2_STACK_LABEL_LEN);
   2183		if (strlen(osb->osb_cluster_stack) != OCFS2_STACK_LABEL_LEN) {
   2184			mlog(ML_ERROR,
   2185			     "couldn't mount because of an invalid "
   2186			     "cluster stack label (%s) \n",
   2187			     osb->osb_cluster_stack);
   2188			status = -EINVAL;
   2189			goto out_orphan_wipes;
   2190		}
   2191		memcpy(osb->osb_cluster_name,
   2192			OCFS2_RAW_SB(di)->s_cluster_info.ci_cluster,
   2193			OCFS2_CLUSTER_NAME_LEN);
   2194	} else {
   2195		/* The empty string is identical with classic tools that
   2196		 * don't know about s_cluster_info. */
   2197		osb->osb_cluster_stack[0] = '\0';
   2198	}
   2199
   2200	get_random_bytes(&osb->s_next_generation, sizeof(u32));
   2201
   2202	/*
   2203	 * FIXME
   2204	 * This should be done in ocfs2_journal_init(), but any inode
   2205	 * writes back operation will cause the filesystem to crash.
   2206	 */
   2207	status = ocfs2_journal_alloc(osb);
   2208	if (status < 0)
   2209		goto out_orphan_wipes;
   2210
   2211	INIT_WORK(&osb->dquot_drop_work, ocfs2_drop_dquot_refs);
   2212	init_llist_head(&osb->dquot_drop_list);
   2213
   2214	/* get some pseudo constants for clustersize bits */
   2215	osb->s_clustersize_bits =
   2216		le32_to_cpu(di->id2.i_super.s_clustersize_bits);
   2217	osb->s_clustersize = 1 << osb->s_clustersize_bits;
   2218
   2219	if (osb->s_clustersize < OCFS2_MIN_CLUSTERSIZE ||
   2220	    osb->s_clustersize > OCFS2_MAX_CLUSTERSIZE) {
   2221		mlog(ML_ERROR, "Volume has invalid cluster size (%d)\n",
   2222		     osb->s_clustersize);
   2223		status = -EINVAL;
   2224		goto out_journal;
   2225	}
   2226
   2227	total_blocks = ocfs2_clusters_to_blocks(osb->sb,
   2228						le32_to_cpu(di->i_clusters));
   2229
   2230	status = generic_check_addressable(osb->sb->s_blocksize_bits,
   2231					   total_blocks);
   2232	if (status) {
   2233		mlog(ML_ERROR, "Volume too large "
   2234		     "to mount safely on this system");
   2235		status = -EFBIG;
   2236		goto out_journal;
   2237	}
   2238
   2239	if (ocfs2_setup_osb_uuid(osb, di->id2.i_super.s_uuid,
   2240				 sizeof(di->id2.i_super.s_uuid))) {
   2241		mlog(ML_ERROR, "Out of memory trying to setup our uuid.\n");
   2242		status = -ENOMEM;
   2243		goto out_journal;
   2244	}
   2245
   2246	strlcpy(osb->vol_label, di->id2.i_super.s_label,
   2247		OCFS2_MAX_VOL_LABEL_LEN);
   2248	osb->root_blkno = le64_to_cpu(di->id2.i_super.s_root_blkno);
   2249	osb->system_dir_blkno = le64_to_cpu(di->id2.i_super.s_system_dir_blkno);
   2250	osb->first_cluster_group_blkno =
   2251		le64_to_cpu(di->id2.i_super.s_first_cluster_group);
   2252	osb->fs_generation = le32_to_cpu(di->i_fs_generation);
   2253	osb->uuid_hash = le32_to_cpu(di->id2.i_super.s_uuid_hash);
   2254	trace_ocfs2_initialize_super(osb->vol_label, osb->uuid_str,
   2255				     (unsigned long long)osb->root_blkno,
   2256				     (unsigned long long)osb->system_dir_blkno,
   2257				     osb->s_clustersize_bits);
   2258
   2259	osb->osb_dlm_debug = ocfs2_new_dlm_debug();
   2260	if (!osb->osb_dlm_debug) {
   2261		status = -ENOMEM;
   2262		mlog_errno(status);
   2263		goto out_uuid_str;
   2264	}
   2265
   2266	atomic_set(&osb->vol_state, VOLUME_INIT);
   2267
   2268	/* load root, system_dir, and all global system inodes */
   2269	status = ocfs2_init_global_system_inodes(osb);
   2270	if (status < 0) {
   2271		mlog_errno(status);
   2272		goto out_dlm_out;
   2273	}
   2274
   2275	/*
   2276	 * global bitmap
   2277	 */
   2278	inode = ocfs2_get_system_file_inode(osb, GLOBAL_BITMAP_SYSTEM_INODE,
   2279					    OCFS2_INVALID_SLOT);
   2280	if (!inode) {
   2281		status = -EINVAL;
   2282		mlog_errno(status);
   2283		goto out_system_inodes;
   2284	}
   2285
   2286	osb->bitmap_blkno = OCFS2_I(inode)->ip_blkno;
   2287	osb->osb_clusters_at_boot = OCFS2_I(inode)->ip_clusters;
   2288	iput(inode);
   2289
   2290	osb->bitmap_cpg = ocfs2_group_bitmap_size(sb, 0,
   2291				 osb->s_feature_incompat) * 8;
   2292
   2293	status = ocfs2_init_slot_info(osb);
   2294	if (status < 0) {
   2295		mlog_errno(status);
   2296		goto out_system_inodes;
   2297	}
   2298
   2299	osb->ocfs2_wq = alloc_ordered_workqueue("ocfs2_wq", WQ_MEM_RECLAIM);
   2300	if (!osb->ocfs2_wq) {
   2301		status = -ENOMEM;
   2302		mlog_errno(status);
   2303		goto out_slot_info;
   2304	}
   2305
   2306	return status;
   2307
   2308out_slot_info:
   2309	ocfs2_free_slot_info(osb);
   2310out_system_inodes:
   2311	ocfs2_release_system_inodes(osb);
   2312out_dlm_out:
   2313	ocfs2_put_dlm_debug(osb->osb_dlm_debug);
   2314out_uuid_str:
   2315	kfree(osb->uuid_str);
   2316out_journal:
   2317	kfree(osb->journal);
   2318out_orphan_wipes:
   2319	kfree(osb->osb_orphan_wipes);
   2320out_slot_recovery_gen:
   2321	kfree(osb->slot_recovery_generations);
   2322out_vol_label:
   2323	kfree(osb->vol_label);
   2324out_recovery_map:
   2325	kfree(osb->recovery_map);
   2326out:
   2327	kfree(osb);
   2328	sb->s_fs_info = NULL;
   2329	return status;
   2330}
   2331
   2332/*
   2333 * will return: -EAGAIN if it is ok to keep searching for superblocks
   2334 *              -EINVAL if there is a bad superblock
   2335 *              0 on success
   2336 */
   2337static int ocfs2_verify_volume(struct ocfs2_dinode *di,
   2338			       struct buffer_head *bh,
   2339			       u32 blksz,
   2340			       struct ocfs2_blockcheck_stats *stats)
   2341{
   2342	int status = -EAGAIN;
   2343
   2344	if (memcmp(di->i_signature, OCFS2_SUPER_BLOCK_SIGNATURE,
   2345		   strlen(OCFS2_SUPER_BLOCK_SIGNATURE)) == 0) {
   2346		/* We have to do a raw check of the feature here */
   2347		if (le32_to_cpu(di->id2.i_super.s_feature_incompat) &
   2348		    OCFS2_FEATURE_INCOMPAT_META_ECC) {
   2349			status = ocfs2_block_check_validate(bh->b_data,
   2350							    bh->b_size,
   2351							    &di->i_check,
   2352							    stats);
   2353			if (status)
   2354				goto out;
   2355		}
   2356		status = -EINVAL;
   2357		if ((1 << le32_to_cpu(di->id2.i_super.s_blocksize_bits)) != blksz) {
   2358			mlog(ML_ERROR, "found superblock with incorrect block "
   2359			     "size: found %u, should be %u\n",
   2360			     1 << le32_to_cpu(di->id2.i_super.s_blocksize_bits),
   2361			       blksz);
   2362		} else if (le16_to_cpu(di->id2.i_super.s_major_rev_level) !=
   2363			   OCFS2_MAJOR_REV_LEVEL ||
   2364			   le16_to_cpu(di->id2.i_super.s_minor_rev_level) !=
   2365			   OCFS2_MINOR_REV_LEVEL) {
   2366			mlog(ML_ERROR, "found superblock with bad version: "
   2367			     "found %u.%u, should be %u.%u\n",
   2368			     le16_to_cpu(di->id2.i_super.s_major_rev_level),
   2369			     le16_to_cpu(di->id2.i_super.s_minor_rev_level),
   2370			     OCFS2_MAJOR_REV_LEVEL,
   2371			     OCFS2_MINOR_REV_LEVEL);
   2372		} else if (bh->b_blocknr != le64_to_cpu(di->i_blkno)) {
   2373			mlog(ML_ERROR, "bad block number on superblock: "
   2374			     "found %llu, should be %llu\n",
   2375			     (unsigned long long)le64_to_cpu(di->i_blkno),
   2376			     (unsigned long long)bh->b_blocknr);
   2377		} else if (le32_to_cpu(di->id2.i_super.s_clustersize_bits) < 12 ||
   2378			    le32_to_cpu(di->id2.i_super.s_clustersize_bits) > 20) {
   2379			mlog(ML_ERROR, "bad cluster size found: %u\n",
   2380			     1 << le32_to_cpu(di->id2.i_super.s_clustersize_bits));
   2381		} else if (!le64_to_cpu(di->id2.i_super.s_root_blkno)) {
   2382			mlog(ML_ERROR, "bad root_blkno: 0\n");
   2383		} else if (!le64_to_cpu(di->id2.i_super.s_system_dir_blkno)) {
   2384			mlog(ML_ERROR, "bad system_dir_blkno: 0\n");
   2385		} else if (le16_to_cpu(di->id2.i_super.s_max_slots) > OCFS2_MAX_SLOTS) {
   2386			mlog(ML_ERROR,
   2387			     "Superblock slots found greater than file system "
   2388			     "maximum: found %u, max %u\n",
   2389			     le16_to_cpu(di->id2.i_super.s_max_slots),
   2390			     OCFS2_MAX_SLOTS);
   2391		} else {
   2392			/* found it! */
   2393			status = 0;
   2394		}
   2395	}
   2396
   2397out:
   2398	if (status && status != -EAGAIN)
   2399		mlog_errno(status);
   2400	return status;
   2401}
   2402
   2403static int ocfs2_check_volume(struct ocfs2_super *osb)
   2404{
   2405	int status;
   2406	int dirty;
   2407	int local;
   2408	struct ocfs2_dinode *local_alloc = NULL; /* only used if we
   2409						  * recover
   2410						  * ourselves. */
   2411
   2412	/* Init our journal object. */
   2413	status = ocfs2_journal_init(osb, &dirty);
   2414	if (status < 0) {
   2415		mlog(ML_ERROR, "Could not initialize journal!\n");
   2416		goto finally;
   2417	}
   2418
   2419	/* Now that journal has been initialized, check to make sure
   2420	   entire volume is addressable. */
   2421	status = ocfs2_journal_addressable(osb);
   2422	if (status)
   2423		goto finally;
   2424
   2425	/* If the journal was unmounted cleanly then we don't want to
   2426	 * recover anything. Otherwise, journal_load will do that
   2427	 * dirty work for us :) */
   2428	if (!dirty) {
   2429		status = ocfs2_journal_wipe(osb->journal, 0);
   2430		if (status < 0) {
   2431			mlog_errno(status);
   2432			goto finally;
   2433		}
   2434	} else {
   2435		printk(KERN_NOTICE "ocfs2: File system on device (%s) was not "
   2436		       "unmounted cleanly, recovering it.\n", osb->dev_str);
   2437	}
   2438
   2439	local = ocfs2_mount_local(osb);
   2440
   2441	/* will play back anything left in the journal. */
   2442	status = ocfs2_journal_load(osb->journal, local, dirty);
   2443	if (status < 0) {
   2444		mlog(ML_ERROR, "ocfs2 journal load failed! %d\n", status);
   2445		goto finally;
   2446	}
   2447
   2448	if (osb->s_mount_opt & OCFS2_MOUNT_JOURNAL_ASYNC_COMMIT)
   2449		jbd2_journal_set_features(osb->journal->j_journal,
   2450				JBD2_FEATURE_COMPAT_CHECKSUM, 0,
   2451				JBD2_FEATURE_INCOMPAT_ASYNC_COMMIT);
   2452	else
   2453		jbd2_journal_clear_features(osb->journal->j_journal,
   2454				JBD2_FEATURE_COMPAT_CHECKSUM, 0,
   2455				JBD2_FEATURE_INCOMPAT_ASYNC_COMMIT);
   2456
   2457	if (dirty) {
   2458		/* recover my local alloc if we didn't unmount cleanly. */
   2459		status = ocfs2_begin_local_alloc_recovery(osb,
   2460							  osb->slot_num,
   2461							  &local_alloc);
   2462		if (status < 0) {
   2463			mlog_errno(status);
   2464			goto finally;
   2465		}
   2466		/* we complete the recovery process after we've marked
   2467		 * ourselves as mounted. */
   2468	}
   2469
   2470	status = ocfs2_load_local_alloc(osb);
   2471	if (status < 0) {
   2472		mlog_errno(status);
   2473		goto finally;
   2474	}
   2475
   2476	if (dirty) {
   2477		/* Recovery will be completed after we've mounted the
   2478		 * rest of the volume. */
   2479		osb->local_alloc_copy = local_alloc;
   2480		local_alloc = NULL;
   2481	}
   2482
   2483	/* go through each journal, trylock it and if you get the
   2484	 * lock, and it's marked as dirty, set the bit in the recover
   2485	 * map and launch a recovery thread for it. */
   2486	status = ocfs2_mark_dead_nodes(osb);
   2487	if (status < 0) {
   2488		mlog_errno(status);
   2489		goto finally;
   2490	}
   2491
   2492	status = ocfs2_compute_replay_slots(osb);
   2493	if (status < 0)
   2494		mlog_errno(status);
   2495
   2496finally:
   2497	kfree(local_alloc);
   2498
   2499	if (status)
   2500		mlog_errno(status);
   2501	return status;
   2502}
   2503
   2504/*
   2505 * The routine gets called from dismount or close whenever a dismount on
   2506 * volume is requested and the osb open count becomes 1.
   2507 * It will remove the osb from the global list and also free up all the
   2508 * initialized resources and fileobject.
   2509 */
   2510static void ocfs2_delete_osb(struct ocfs2_super *osb)
   2511{
   2512	/* This function assumes that the caller has the main osb resource */
   2513
   2514	/* ocfs2_initializer_super have already created this workqueue */
   2515	if (osb->ocfs2_wq)
   2516		destroy_workqueue(osb->ocfs2_wq);
   2517
   2518	ocfs2_free_slot_info(osb);
   2519
   2520	kfree(osb->osb_orphan_wipes);
   2521	kfree(osb->slot_recovery_generations);
   2522	/* FIXME
   2523	 * This belongs in journal shutdown, but because we have to
   2524	 * allocate osb->journal at the middle of ocfs2_initialize_super(),
   2525	 * we free it here.
   2526	 */
   2527	kfree(osb->journal);
   2528	kfree(osb->local_alloc_copy);
   2529	kfree(osb->uuid_str);
   2530	kfree(osb->vol_label);
   2531	ocfs2_put_dlm_debug(osb->osb_dlm_debug);
   2532	memset(osb, 0, sizeof(struct ocfs2_super));
   2533}
   2534
   2535/* Depending on the mount option passed, perform one of the following:
   2536 * Put OCFS2 into a readonly state (default)
   2537 * Return EIO so that only the process errs
   2538 * Fix the error as if fsck.ocfs2 -y
   2539 * panic
   2540 */
   2541static int ocfs2_handle_error(struct super_block *sb)
   2542{
   2543	struct ocfs2_super *osb = OCFS2_SB(sb);
   2544	int rv = 0;
   2545
   2546	ocfs2_set_osb_flag(osb, OCFS2_OSB_ERROR_FS);
   2547	pr_crit("On-disk corruption discovered. "
   2548		"Please run fsck.ocfs2 once the filesystem is unmounted.\n");
   2549
   2550	if (osb->s_mount_opt & OCFS2_MOUNT_ERRORS_PANIC) {
   2551		panic("OCFS2: (device %s): panic forced after error\n",
   2552		      sb->s_id);
   2553	} else if (osb->s_mount_opt & OCFS2_MOUNT_ERRORS_CONT) {
   2554		pr_crit("OCFS2: Returning error to the calling process.\n");
   2555		rv = -EIO;
   2556	} else { /* default option */
   2557		rv = -EROFS;
   2558		if (sb_rdonly(sb) && (ocfs2_is_soft_readonly(osb) || ocfs2_is_hard_readonly(osb)))
   2559			return rv;
   2560
   2561		pr_crit("OCFS2: File system is now read-only.\n");
   2562		sb->s_flags |= SB_RDONLY;
   2563		ocfs2_set_ro_flag(osb, 0);
   2564	}
   2565
   2566	return rv;
   2567}
   2568
   2569int __ocfs2_error(struct super_block *sb, const char *function,
   2570		  const char *fmt, ...)
   2571{
   2572	struct va_format vaf;
   2573	va_list args;
   2574
   2575	va_start(args, fmt);
   2576	vaf.fmt = fmt;
   2577	vaf.va = &args;
   2578
   2579	/* Not using mlog here because we want to show the actual
   2580	 * function the error came from. */
   2581	printk(KERN_CRIT "OCFS2: ERROR (device %s): %s: %pV",
   2582	       sb->s_id, function, &vaf);
   2583
   2584	va_end(args);
   2585
   2586	return ocfs2_handle_error(sb);
   2587}
   2588
   2589/* Handle critical errors. This is intentionally more drastic than
   2590 * ocfs2_handle_error, so we only use for things like journal errors,
   2591 * etc. */
   2592void __ocfs2_abort(struct super_block *sb, const char *function,
   2593		   const char *fmt, ...)
   2594{
   2595	struct va_format vaf;
   2596	va_list args;
   2597
   2598	va_start(args, fmt);
   2599
   2600	vaf.fmt = fmt;
   2601	vaf.va = &args;
   2602
   2603	printk(KERN_CRIT "OCFS2: abort (device %s): %s: %pV",
   2604	       sb->s_id, function, &vaf);
   2605
   2606	va_end(args);
   2607
   2608	/* We don't have the cluster support yet to go straight to
   2609	 * hard readonly in here. Until then, we want to keep
   2610	 * ocfs2_abort() so that we can at least mark critical
   2611	 * errors.
   2612	 *
   2613	 * TODO: This should abort the journal and alert other nodes
   2614	 * that our slot needs recovery. */
   2615
   2616	/* Force a panic(). This stinks, but it's better than letting
   2617	 * things continue without having a proper hard readonly
   2618	 * here. */
   2619	if (!ocfs2_mount_local(OCFS2_SB(sb)))
   2620		OCFS2_SB(sb)->s_mount_opt |= OCFS2_MOUNT_ERRORS_PANIC;
   2621	ocfs2_handle_error(sb);
   2622}
   2623
   2624/*
   2625 * Void signal blockers, because in-kernel sigprocmask() only fails
   2626 * when SIG_* is wrong.
   2627 */
   2628void ocfs2_block_signals(sigset_t *oldset)
   2629{
   2630	int rc;
   2631	sigset_t blocked;
   2632
   2633	sigfillset(&blocked);
   2634	rc = sigprocmask(SIG_BLOCK, &blocked, oldset);
   2635	BUG_ON(rc);
   2636}
   2637
   2638void ocfs2_unblock_signals(sigset_t *oldset)
   2639{
   2640	int rc = sigprocmask(SIG_SETMASK, oldset, NULL);
   2641	BUG_ON(rc);
   2642}
   2643
   2644module_init(ocfs2_init);
   2645module_exit(ocfs2_exit);