cachepc-linux

Fork of AMDESE/linux with modifications for CachePC side-channel attack
git clone https://git.sinitax.com/sinitax/cachepc-linux
Log | Files | Refs | README | LICENSE | sfeed.txt

dlmfs.c (15015B)


      1// SPDX-License-Identifier: GPL-2.0-or-later
      2/*
      3 * dlmfs.c
      4 *
      5 * Code which implements the kernel side of a minimal userspace
      6 * interface to our DLM. This file handles the virtual file system
      7 * used for communication with userspace. Credit should go to ramfs,
      8 * which was a template for the fs side of this module.
      9 *
     10 * Copyright (C) 2003, 2004 Oracle.  All rights reserved.
     11 */
     12
     13/* Simple VFS hooks based on: */
     14/*
     15 * Resizable simple ram filesystem for Linux.
     16 *
     17 * Copyright (C) 2000 Linus Torvalds.
     18 *               2000 Transmeta Corp.
     19 */
     20
     21#include <linux/module.h>
     22#include <linux/fs.h>
     23#include <linux/pagemap.h>
     24#include <linux/types.h>
     25#include <linux/slab.h>
     26#include <linux/highmem.h>
     27#include <linux/init.h>
     28#include <linux/string.h>
     29#include <linux/backing-dev.h>
     30#include <linux/poll.h>
     31
     32#include <linux/uaccess.h>
     33
     34#include "../stackglue.h"
     35#include "userdlm.h"
     36
     37#define MLOG_MASK_PREFIX ML_DLMFS
     38#include "../cluster/masklog.h"
     39
     40
     41static const struct super_operations dlmfs_ops;
     42static const struct file_operations dlmfs_file_operations;
     43static const struct inode_operations dlmfs_dir_inode_operations;
     44static const struct inode_operations dlmfs_root_inode_operations;
     45static const struct inode_operations dlmfs_file_inode_operations;
     46static struct kmem_cache *dlmfs_inode_cache;
     47
     48struct workqueue_struct *user_dlm_worker;
     49
     50
     51
     52/*
     53 * These are the ABI capabilities of dlmfs.
     54 *
     55 * Over time, dlmfs has added some features that were not part of the
     56 * initial ABI.  Unfortunately, some of these features are not detectable
     57 * via standard usage.  For example, Linux's default poll always returns
     58 * EPOLLIN, so there is no way for a caller of poll(2) to know when dlmfs
     59 * added poll support.  Instead, we provide this list of new capabilities.
     60 *
     61 * Capabilities is a read-only attribute.  We do it as a module parameter
     62 * so we can discover it whether dlmfs is built in, loaded, or even not
     63 * loaded.
     64 *
     65 * The ABI features are local to this machine's dlmfs mount.  This is
     66 * distinct from the locking protocol, which is concerned with inter-node
     67 * interaction.
     68 *
     69 * Capabilities:
     70 * - bast	: EPOLLIN against the file descriptor of a held lock
     71 *		  signifies a bast fired on the lock.
     72 */
     73#define DLMFS_CAPABILITIES "bast stackglue"
     74static int param_set_dlmfs_capabilities(const char *val,
     75					const struct kernel_param *kp)
     76{
     77	printk(KERN_ERR "%s: readonly parameter\n", kp->name);
     78	return -EINVAL;
     79}
     80static int param_get_dlmfs_capabilities(char *buffer,
     81					const struct kernel_param *kp)
     82{
     83	return strlcpy(buffer, DLMFS_CAPABILITIES,
     84		       strlen(DLMFS_CAPABILITIES) + 1);
     85}
     86module_param_call(capabilities, param_set_dlmfs_capabilities,
     87		  param_get_dlmfs_capabilities, NULL, 0444);
     88MODULE_PARM_DESC(capabilities, DLMFS_CAPABILITIES);
     89
     90
     91/*
     92 * decodes a set of open flags into a valid lock level and a set of flags.
     93 * returns < 0 if we have invalid flags
     94 * flags which mean something to us:
     95 * O_RDONLY -> PRMODE level
     96 * O_WRONLY -> EXMODE level
     97 *
     98 * O_NONBLOCK -> NOQUEUE
     99 */
    100static int dlmfs_decode_open_flags(int open_flags,
    101				   int *level,
    102				   int *flags)
    103{
    104	if (open_flags & (O_WRONLY|O_RDWR))
    105		*level = DLM_LOCK_EX;
    106	else
    107		*level = DLM_LOCK_PR;
    108
    109	*flags = 0;
    110	if (open_flags & O_NONBLOCK)
    111		*flags |= DLM_LKF_NOQUEUE;
    112
    113	return 0;
    114}
    115
    116static int dlmfs_file_open(struct inode *inode,
    117			   struct file *file)
    118{
    119	int status, level, flags;
    120	struct dlmfs_filp_private *fp = NULL;
    121	struct dlmfs_inode_private *ip;
    122
    123	if (S_ISDIR(inode->i_mode))
    124		BUG();
    125
    126	mlog(0, "open called on inode %lu, flags 0x%x\n", inode->i_ino,
    127		file->f_flags);
    128
    129	status = dlmfs_decode_open_flags(file->f_flags, &level, &flags);
    130	if (status < 0)
    131		goto bail;
    132
    133	/* We don't want to honor O_APPEND at read/write time as it
    134	 * doesn't make sense for LVB writes. */
    135	file->f_flags &= ~O_APPEND;
    136
    137	fp = kmalloc(sizeof(*fp), GFP_NOFS);
    138	if (!fp) {
    139		status = -ENOMEM;
    140		goto bail;
    141	}
    142	fp->fp_lock_level = level;
    143
    144	ip = DLMFS_I(inode);
    145
    146	status = user_dlm_cluster_lock(&ip->ip_lockres, level, flags);
    147	if (status < 0) {
    148		/* this is a strange error to return here but I want
    149		 * to be able userspace to be able to distinguish a
    150		 * valid lock request from one that simply couldn't be
    151		 * granted. */
    152		if (flags & DLM_LKF_NOQUEUE && status == -EAGAIN)
    153			status = -ETXTBSY;
    154		kfree(fp);
    155		goto bail;
    156	}
    157
    158	file->private_data = fp;
    159bail:
    160	return status;
    161}
    162
    163static int dlmfs_file_release(struct inode *inode,
    164			      struct file *file)
    165{
    166	int level;
    167	struct dlmfs_inode_private *ip = DLMFS_I(inode);
    168	struct dlmfs_filp_private *fp = file->private_data;
    169
    170	if (S_ISDIR(inode->i_mode))
    171		BUG();
    172
    173	mlog(0, "close called on inode %lu\n", inode->i_ino);
    174
    175	if (fp) {
    176		level = fp->fp_lock_level;
    177		if (level != DLM_LOCK_IV)
    178			user_dlm_cluster_unlock(&ip->ip_lockres, level);
    179
    180		kfree(fp);
    181		file->private_data = NULL;
    182	}
    183
    184	return 0;
    185}
    186
    187/*
    188 * We do ->setattr() just to override size changes.  Our size is the size
    189 * of the LVB and nothing else.
    190 */
    191static int dlmfs_file_setattr(struct user_namespace *mnt_userns,
    192			      struct dentry *dentry, struct iattr *attr)
    193{
    194	int error;
    195	struct inode *inode = d_inode(dentry);
    196
    197	attr->ia_valid &= ~ATTR_SIZE;
    198	error = setattr_prepare(&init_user_ns, dentry, attr);
    199	if (error)
    200		return error;
    201
    202	setattr_copy(&init_user_ns, inode, attr);
    203	mark_inode_dirty(inode);
    204	return 0;
    205}
    206
    207static __poll_t dlmfs_file_poll(struct file *file, poll_table *wait)
    208{
    209	__poll_t event = 0;
    210	struct inode *inode = file_inode(file);
    211	struct dlmfs_inode_private *ip = DLMFS_I(inode);
    212
    213	poll_wait(file, &ip->ip_lockres.l_event, wait);
    214
    215	spin_lock(&ip->ip_lockres.l_lock);
    216	if (ip->ip_lockres.l_flags & USER_LOCK_BLOCKED)
    217		event = EPOLLIN | EPOLLRDNORM;
    218	spin_unlock(&ip->ip_lockres.l_lock);
    219
    220	return event;
    221}
    222
    223static ssize_t dlmfs_file_read(struct file *file,
    224			       char __user *buf,
    225			       size_t count,
    226			       loff_t *ppos)
    227{
    228	char lvb[DLM_LVB_LEN];
    229
    230	if (!user_dlm_read_lvb(file_inode(file), lvb))
    231		return 0;
    232
    233	return simple_read_from_buffer(buf, count, ppos, lvb, sizeof(lvb));
    234}
    235
    236static ssize_t dlmfs_file_write(struct file *filp,
    237				const char __user *buf,
    238				size_t count,
    239				loff_t *ppos)
    240{
    241	char lvb_buf[DLM_LVB_LEN];
    242	int bytes_left;
    243	struct inode *inode = file_inode(filp);
    244
    245	mlog(0, "inode %lu, count = %zu, *ppos = %llu\n",
    246		inode->i_ino, count, *ppos);
    247
    248	if (*ppos >= DLM_LVB_LEN)
    249		return -ENOSPC;
    250
    251	/* don't write past the lvb */
    252	if (count > DLM_LVB_LEN - *ppos)
    253		count = DLM_LVB_LEN - *ppos;
    254
    255	if (!count)
    256		return 0;
    257
    258	bytes_left = copy_from_user(lvb_buf, buf, count);
    259	count -= bytes_left;
    260	if (count)
    261		user_dlm_write_lvb(inode, lvb_buf, count);
    262
    263	*ppos = *ppos + count;
    264	mlog(0, "wrote %zu bytes\n", count);
    265	return count;
    266}
    267
    268static void dlmfs_init_once(void *foo)
    269{
    270	struct dlmfs_inode_private *ip =
    271		(struct dlmfs_inode_private *) foo;
    272
    273	ip->ip_conn = NULL;
    274	ip->ip_parent = NULL;
    275
    276	inode_init_once(&ip->ip_vfs_inode);
    277}
    278
    279static struct inode *dlmfs_alloc_inode(struct super_block *sb)
    280{
    281	struct dlmfs_inode_private *ip;
    282
    283	ip = alloc_inode_sb(sb, dlmfs_inode_cache, GFP_NOFS);
    284	if (!ip)
    285		return NULL;
    286
    287	return &ip->ip_vfs_inode;
    288}
    289
    290static void dlmfs_free_inode(struct inode *inode)
    291{
    292	kmem_cache_free(dlmfs_inode_cache, DLMFS_I(inode));
    293}
    294
    295static void dlmfs_evict_inode(struct inode *inode)
    296{
    297	int status;
    298	struct dlmfs_inode_private *ip;
    299
    300	clear_inode(inode);
    301
    302	mlog(0, "inode %lu\n", inode->i_ino);
    303
    304	ip = DLMFS_I(inode);
    305
    306	if (S_ISREG(inode->i_mode)) {
    307		status = user_dlm_destroy_lock(&ip->ip_lockres);
    308		if (status < 0)
    309			mlog_errno(status);
    310		iput(ip->ip_parent);
    311		goto clear_fields;
    312	}
    313
    314	mlog(0, "we're a directory, ip->ip_conn = 0x%p\n", ip->ip_conn);
    315	/* we must be a directory. If required, lets unregister the
    316	 * dlm context now. */
    317	if (ip->ip_conn)
    318		user_dlm_unregister(ip->ip_conn);
    319clear_fields:
    320	ip->ip_parent = NULL;
    321	ip->ip_conn = NULL;
    322}
    323
    324static struct inode *dlmfs_get_root_inode(struct super_block *sb)
    325{
    326	struct inode *inode = new_inode(sb);
    327	umode_t mode = S_IFDIR | 0755;
    328
    329	if (inode) {
    330		inode->i_ino = get_next_ino();
    331		inode_init_owner(&init_user_ns, inode, NULL, mode);
    332		inode->i_atime = inode->i_mtime = inode->i_ctime = current_time(inode);
    333		inc_nlink(inode);
    334
    335		inode->i_fop = &simple_dir_operations;
    336		inode->i_op = &dlmfs_root_inode_operations;
    337	}
    338
    339	return inode;
    340}
    341
    342static struct inode *dlmfs_get_inode(struct inode *parent,
    343				     struct dentry *dentry,
    344				     umode_t mode)
    345{
    346	struct super_block *sb = parent->i_sb;
    347	struct inode * inode = new_inode(sb);
    348	struct dlmfs_inode_private *ip;
    349
    350	if (!inode)
    351		return NULL;
    352
    353	inode->i_ino = get_next_ino();
    354	inode_init_owner(&init_user_ns, inode, parent, mode);
    355	inode->i_atime = inode->i_mtime = inode->i_ctime = current_time(inode);
    356
    357	ip = DLMFS_I(inode);
    358	ip->ip_conn = DLMFS_I(parent)->ip_conn;
    359
    360	switch (mode & S_IFMT) {
    361	default:
    362		/* for now we don't support anything other than
    363		 * directories and regular files. */
    364		BUG();
    365		break;
    366	case S_IFREG:
    367		inode->i_op = &dlmfs_file_inode_operations;
    368		inode->i_fop = &dlmfs_file_operations;
    369
    370		i_size_write(inode,  DLM_LVB_LEN);
    371
    372		user_dlm_lock_res_init(&ip->ip_lockres, dentry);
    373
    374		/* released at clear_inode time, this insures that we
    375		 * get to drop the dlm reference on each lock *before*
    376		 * we call the unregister code for releasing parent
    377		 * directories. */
    378		ip->ip_parent = igrab(parent);
    379		BUG_ON(!ip->ip_parent);
    380		break;
    381	case S_IFDIR:
    382		inode->i_op = &dlmfs_dir_inode_operations;
    383		inode->i_fop = &simple_dir_operations;
    384
    385		/* directory inodes start off with i_nlink ==
    386		 * 2 (for "." entry) */
    387		inc_nlink(inode);
    388		break;
    389	}
    390	return inode;
    391}
    392
    393/*
    394 * File creation. Allocate an inode, and we're done..
    395 */
    396/* SMP-safe */
    397static int dlmfs_mkdir(struct user_namespace * mnt_userns,
    398		       struct inode * dir,
    399		       struct dentry * dentry,
    400		       umode_t mode)
    401{
    402	int status;
    403	struct inode *inode = NULL;
    404	const struct qstr *domain = &dentry->d_name;
    405	struct dlmfs_inode_private *ip;
    406	struct ocfs2_cluster_connection *conn;
    407
    408	mlog(0, "mkdir %.*s\n", domain->len, domain->name);
    409
    410	/* verify that we have a proper domain */
    411	if (domain->len >= GROUP_NAME_MAX) {
    412		status = -EINVAL;
    413		mlog(ML_ERROR, "invalid domain name for directory.\n");
    414		goto bail;
    415	}
    416
    417	inode = dlmfs_get_inode(dir, dentry, mode | S_IFDIR);
    418	if (!inode) {
    419		status = -ENOMEM;
    420		mlog_errno(status);
    421		goto bail;
    422	}
    423
    424	ip = DLMFS_I(inode);
    425
    426	conn = user_dlm_register(domain);
    427	if (IS_ERR(conn)) {
    428		status = PTR_ERR(conn);
    429		mlog(ML_ERROR, "Error %d could not register domain \"%.*s\"\n",
    430		     status, domain->len, domain->name);
    431		goto bail;
    432	}
    433	ip->ip_conn = conn;
    434
    435	inc_nlink(dir);
    436	d_instantiate(dentry, inode);
    437	dget(dentry);	/* Extra count - pin the dentry in core */
    438
    439	status = 0;
    440bail:
    441	if (status < 0)
    442		iput(inode);
    443	return status;
    444}
    445
    446static int dlmfs_create(struct user_namespace *mnt_userns,
    447			struct inode *dir,
    448			struct dentry *dentry,
    449			umode_t mode,
    450			bool excl)
    451{
    452	int status = 0;
    453	struct inode *inode;
    454	const struct qstr *name = &dentry->d_name;
    455
    456	mlog(0, "create %.*s\n", name->len, name->name);
    457
    458	/* verify name is valid and doesn't contain any dlm reserved
    459	 * characters */
    460	if (name->len >= USER_DLM_LOCK_ID_MAX_LEN ||
    461	    name->name[0] == '$') {
    462		status = -EINVAL;
    463		mlog(ML_ERROR, "invalid lock name, %.*s\n", name->len,
    464		     name->name);
    465		goto bail;
    466	}
    467
    468	inode = dlmfs_get_inode(dir, dentry, mode | S_IFREG);
    469	if (!inode) {
    470		status = -ENOMEM;
    471		mlog_errno(status);
    472		goto bail;
    473	}
    474
    475	d_instantiate(dentry, inode);
    476	dget(dentry);	/* Extra count - pin the dentry in core */
    477bail:
    478	return status;
    479}
    480
    481static int dlmfs_unlink(struct inode *dir,
    482			struct dentry *dentry)
    483{
    484	int status;
    485	struct inode *inode = d_inode(dentry);
    486
    487	mlog(0, "unlink inode %lu\n", inode->i_ino);
    488
    489	/* if there are no current holders, or none that are waiting
    490	 * to acquire a lock, this basically destroys our lockres. */
    491	status = user_dlm_destroy_lock(&DLMFS_I(inode)->ip_lockres);
    492	if (status < 0) {
    493		mlog(ML_ERROR, "unlink %pd, error %d from destroy\n",
    494		     dentry, status);
    495		goto bail;
    496	}
    497	status = simple_unlink(dir, dentry);
    498bail:
    499	return status;
    500}
    501
    502static int dlmfs_fill_super(struct super_block * sb,
    503			    void * data,
    504			    int silent)
    505{
    506	sb->s_maxbytes = MAX_LFS_FILESIZE;
    507	sb->s_blocksize = PAGE_SIZE;
    508	sb->s_blocksize_bits = PAGE_SHIFT;
    509	sb->s_magic = DLMFS_MAGIC;
    510	sb->s_op = &dlmfs_ops;
    511	sb->s_root = d_make_root(dlmfs_get_root_inode(sb));
    512	if (!sb->s_root)
    513		return -ENOMEM;
    514	return 0;
    515}
    516
    517static const struct file_operations dlmfs_file_operations = {
    518	.open		= dlmfs_file_open,
    519	.release	= dlmfs_file_release,
    520	.poll		= dlmfs_file_poll,
    521	.read		= dlmfs_file_read,
    522	.write		= dlmfs_file_write,
    523	.llseek		= default_llseek,
    524};
    525
    526static const struct inode_operations dlmfs_dir_inode_operations = {
    527	.create		= dlmfs_create,
    528	.lookup		= simple_lookup,
    529	.unlink		= dlmfs_unlink,
    530};
    531
    532/* this way we can restrict mkdir to only the toplevel of the fs. */
    533static const struct inode_operations dlmfs_root_inode_operations = {
    534	.lookup		= simple_lookup,
    535	.mkdir		= dlmfs_mkdir,
    536	.rmdir		= simple_rmdir,
    537};
    538
    539static const struct super_operations dlmfs_ops = {
    540	.statfs		= simple_statfs,
    541	.alloc_inode	= dlmfs_alloc_inode,
    542	.free_inode	= dlmfs_free_inode,
    543	.evict_inode	= dlmfs_evict_inode,
    544	.drop_inode	= generic_delete_inode,
    545};
    546
    547static const struct inode_operations dlmfs_file_inode_operations = {
    548	.getattr	= simple_getattr,
    549	.setattr	= dlmfs_file_setattr,
    550};
    551
    552static struct dentry *dlmfs_mount(struct file_system_type *fs_type,
    553	int flags, const char *dev_name, void *data)
    554{
    555	return mount_nodev(fs_type, flags, data, dlmfs_fill_super);
    556}
    557
    558static struct file_system_type dlmfs_fs_type = {
    559	.owner		= THIS_MODULE,
    560	.name		= "ocfs2_dlmfs",
    561	.mount		= dlmfs_mount,
    562	.kill_sb	= kill_litter_super,
    563};
    564MODULE_ALIAS_FS("ocfs2_dlmfs");
    565
    566static int __init init_dlmfs_fs(void)
    567{
    568	int status;
    569	int cleanup_inode = 0, cleanup_worker = 0;
    570
    571	dlmfs_inode_cache = kmem_cache_create("dlmfs_inode_cache",
    572				sizeof(struct dlmfs_inode_private),
    573				0, (SLAB_HWCACHE_ALIGN|SLAB_RECLAIM_ACCOUNT|
    574					SLAB_MEM_SPREAD|SLAB_ACCOUNT),
    575				dlmfs_init_once);
    576	if (!dlmfs_inode_cache) {
    577		status = -ENOMEM;
    578		goto bail;
    579	}
    580	cleanup_inode = 1;
    581
    582	user_dlm_worker = alloc_workqueue("user_dlm", WQ_MEM_RECLAIM, 0);
    583	if (!user_dlm_worker) {
    584		status = -ENOMEM;
    585		goto bail;
    586	}
    587	cleanup_worker = 1;
    588
    589	user_dlm_set_locking_protocol();
    590	status = register_filesystem(&dlmfs_fs_type);
    591bail:
    592	if (status) {
    593		if (cleanup_inode)
    594			kmem_cache_destroy(dlmfs_inode_cache);
    595		if (cleanup_worker)
    596			destroy_workqueue(user_dlm_worker);
    597	} else
    598		printk("OCFS2 User DLM kernel interface loaded\n");
    599	return status;
    600}
    601
    602static void __exit exit_dlmfs_fs(void)
    603{
    604	unregister_filesystem(&dlmfs_fs_type);
    605
    606	destroy_workqueue(user_dlm_worker);
    607
    608	/*
    609	 * Make sure all delayed rcu free inodes are flushed before we
    610	 * destroy cache.
    611	 */
    612	rcu_barrier();
    613	kmem_cache_destroy(dlmfs_inode_cache);
    614
    615}
    616
    617MODULE_AUTHOR("Oracle");
    618MODULE_LICENSE("GPL");
    619MODULE_DESCRIPTION("OCFS2 DLM-Filesystem");
    620
    621module_init(init_dlmfs_fs)
    622module_exit(exit_dlmfs_fs)