cachepc-linux

Fork of AMDESE/linux with modifications for CachePC side-channel attack
git clone https://git.sinitax.com/sinitax/cachepc-linux
Log | Files | Refs | README | LICENSE | sfeed.txt

file.c (16013B)


      1// SPDX-License-Identifier: GPL-2.0-only
      2/*
      3 * Copyright (C) 2017 Red Hat, Inc.
      4 */
      5
      6#include <linux/cred.h>
      7#include <linux/file.h>
      8#include <linux/mount.h>
      9#include <linux/xattr.h>
     10#include <linux/uio.h>
     11#include <linux/uaccess.h>
     12#include <linux/splice.h>
     13#include <linux/security.h>
     14#include <linux/mm.h>
     15#include <linux/fs.h>
     16#include "overlayfs.h"
     17
     18struct ovl_aio_req {
     19	struct kiocb iocb;
     20	refcount_t ref;
     21	struct kiocb *orig_iocb;
     22	struct fd fd;
     23};
     24
     25static struct kmem_cache *ovl_aio_request_cachep;
     26
     27static char ovl_whatisit(struct inode *inode, struct inode *realinode)
     28{
     29	if (realinode != ovl_inode_upper(inode))
     30		return 'l';
     31	if (ovl_has_upperdata(inode))
     32		return 'u';
     33	else
     34		return 'm';
     35}
     36
     37/* No atime modificaton nor notify on underlying */
     38#define OVL_OPEN_FLAGS (O_NOATIME | FMODE_NONOTIFY)
     39
     40static struct file *ovl_open_realfile(const struct file *file,
     41				      struct path *realpath)
     42{
     43	struct inode *realinode = d_inode(realpath->dentry);
     44	struct inode *inode = file_inode(file);
     45	struct user_namespace *real_mnt_userns;
     46	struct file *realfile;
     47	const struct cred *old_cred;
     48	int flags = file->f_flags | OVL_OPEN_FLAGS;
     49	int acc_mode = ACC_MODE(flags);
     50	int err;
     51
     52	if (flags & O_APPEND)
     53		acc_mode |= MAY_APPEND;
     54
     55	old_cred = ovl_override_creds(inode->i_sb);
     56	real_mnt_userns = mnt_user_ns(realpath->mnt);
     57	err = inode_permission(real_mnt_userns, realinode, MAY_OPEN | acc_mode);
     58	if (err) {
     59		realfile = ERR_PTR(err);
     60	} else {
     61		if (!inode_owner_or_capable(real_mnt_userns, realinode))
     62			flags &= ~O_NOATIME;
     63
     64		realfile = open_with_fake_path(&file->f_path, flags, realinode,
     65					       current_cred());
     66	}
     67	revert_creds(old_cred);
     68
     69	pr_debug("open(%p[%pD2/%c], 0%o) -> (%p, 0%o)\n",
     70		 file, file, ovl_whatisit(inode, realinode), file->f_flags,
     71		 realfile, IS_ERR(realfile) ? 0 : realfile->f_flags);
     72
     73	return realfile;
     74}
     75
     76#define OVL_SETFL_MASK (O_APPEND | O_NONBLOCK | O_NDELAY | O_DIRECT)
     77
     78static int ovl_change_flags(struct file *file, unsigned int flags)
     79{
     80	struct inode *inode = file_inode(file);
     81	int err;
     82
     83	flags &= OVL_SETFL_MASK;
     84
     85	if (((flags ^ file->f_flags) & O_APPEND) && IS_APPEND(inode))
     86		return -EPERM;
     87
     88	if ((flags & O_DIRECT) && !(file->f_mode & FMODE_CAN_ODIRECT))
     89		return -EINVAL;
     90
     91	if (file->f_op->check_flags) {
     92		err = file->f_op->check_flags(flags);
     93		if (err)
     94			return err;
     95	}
     96
     97	spin_lock(&file->f_lock);
     98	file->f_flags = (file->f_flags & ~OVL_SETFL_MASK) | flags;
     99	spin_unlock(&file->f_lock);
    100
    101	return 0;
    102}
    103
    104static int ovl_real_fdget_meta(const struct file *file, struct fd *real,
    105			       bool allow_meta)
    106{
    107	struct dentry *dentry = file_dentry(file);
    108	struct path realpath;
    109
    110	real->flags = 0;
    111	real->file = file->private_data;
    112
    113	if (allow_meta)
    114		ovl_path_real(dentry, &realpath);
    115	else
    116		ovl_path_realdata(dentry, &realpath);
    117
    118	/* Has it been copied up since we'd opened it? */
    119	if (unlikely(file_inode(real->file) != d_inode(realpath.dentry))) {
    120		real->flags = FDPUT_FPUT;
    121		real->file = ovl_open_realfile(file, &realpath);
    122
    123		return PTR_ERR_OR_ZERO(real->file);
    124	}
    125
    126	/* Did the flags change since open? */
    127	if (unlikely((file->f_flags ^ real->file->f_flags) & ~OVL_OPEN_FLAGS))
    128		return ovl_change_flags(real->file, file->f_flags);
    129
    130	return 0;
    131}
    132
    133static int ovl_real_fdget(const struct file *file, struct fd *real)
    134{
    135	if (d_is_dir(file_dentry(file))) {
    136		real->flags = 0;
    137		real->file = ovl_dir_real_file(file, false);
    138
    139		return PTR_ERR_OR_ZERO(real->file);
    140	}
    141
    142	return ovl_real_fdget_meta(file, real, false);
    143}
    144
    145static int ovl_open(struct inode *inode, struct file *file)
    146{
    147	struct dentry *dentry = file_dentry(file);
    148	struct file *realfile;
    149	struct path realpath;
    150	int err;
    151
    152	err = ovl_maybe_copy_up(dentry, file->f_flags);
    153	if (err)
    154		return err;
    155
    156	/* No longer need these flags, so don't pass them on to underlying fs */
    157	file->f_flags &= ~(O_CREAT | O_EXCL | O_NOCTTY | O_TRUNC);
    158
    159	ovl_path_realdata(dentry, &realpath);
    160	realfile = ovl_open_realfile(file, &realpath);
    161	if (IS_ERR(realfile))
    162		return PTR_ERR(realfile);
    163
    164	file->private_data = realfile;
    165
    166	return 0;
    167}
    168
    169static int ovl_release(struct inode *inode, struct file *file)
    170{
    171	fput(file->private_data);
    172
    173	return 0;
    174}
    175
    176static loff_t ovl_llseek(struct file *file, loff_t offset, int whence)
    177{
    178	struct inode *inode = file_inode(file);
    179	struct fd real;
    180	const struct cred *old_cred;
    181	loff_t ret;
    182
    183	/*
    184	 * The two special cases below do not need to involve real fs,
    185	 * so we can optimizing concurrent callers.
    186	 */
    187	if (offset == 0) {
    188		if (whence == SEEK_CUR)
    189			return file->f_pos;
    190
    191		if (whence == SEEK_SET)
    192			return vfs_setpos(file, 0, 0);
    193	}
    194
    195	ret = ovl_real_fdget(file, &real);
    196	if (ret)
    197		return ret;
    198
    199	/*
    200	 * Overlay file f_pos is the master copy that is preserved
    201	 * through copy up and modified on read/write, but only real
    202	 * fs knows how to SEEK_HOLE/SEEK_DATA and real fs may impose
    203	 * limitations that are more strict than ->s_maxbytes for specific
    204	 * files, so we use the real file to perform seeks.
    205	 */
    206	ovl_inode_lock(inode);
    207	real.file->f_pos = file->f_pos;
    208
    209	old_cred = ovl_override_creds(inode->i_sb);
    210	ret = vfs_llseek(real.file, offset, whence);
    211	revert_creds(old_cred);
    212
    213	file->f_pos = real.file->f_pos;
    214	ovl_inode_unlock(inode);
    215
    216	fdput(real);
    217
    218	return ret;
    219}
    220
    221static void ovl_file_accessed(struct file *file)
    222{
    223	struct inode *inode, *upperinode;
    224
    225	if (file->f_flags & O_NOATIME)
    226		return;
    227
    228	inode = file_inode(file);
    229	upperinode = ovl_inode_upper(inode);
    230
    231	if (!upperinode)
    232		return;
    233
    234	if ((!timespec64_equal(&inode->i_mtime, &upperinode->i_mtime) ||
    235	     !timespec64_equal(&inode->i_ctime, &upperinode->i_ctime))) {
    236		inode->i_mtime = upperinode->i_mtime;
    237		inode->i_ctime = upperinode->i_ctime;
    238	}
    239
    240	touch_atime(&file->f_path);
    241}
    242
    243static rwf_t ovl_iocb_to_rwf(int ifl)
    244{
    245	rwf_t flags = 0;
    246
    247	if (ifl & IOCB_NOWAIT)
    248		flags |= RWF_NOWAIT;
    249	if (ifl & IOCB_HIPRI)
    250		flags |= RWF_HIPRI;
    251	if (ifl & IOCB_DSYNC)
    252		flags |= RWF_DSYNC;
    253	if (ifl & IOCB_SYNC)
    254		flags |= RWF_SYNC;
    255
    256	return flags;
    257}
    258
    259static inline void ovl_aio_put(struct ovl_aio_req *aio_req)
    260{
    261	if (refcount_dec_and_test(&aio_req->ref)) {
    262		fdput(aio_req->fd);
    263		kmem_cache_free(ovl_aio_request_cachep, aio_req);
    264	}
    265}
    266
    267static void ovl_aio_cleanup_handler(struct ovl_aio_req *aio_req)
    268{
    269	struct kiocb *iocb = &aio_req->iocb;
    270	struct kiocb *orig_iocb = aio_req->orig_iocb;
    271
    272	if (iocb->ki_flags & IOCB_WRITE) {
    273		struct inode *inode = file_inode(orig_iocb->ki_filp);
    274
    275		/* Actually acquired in ovl_write_iter() */
    276		__sb_writers_acquired(file_inode(iocb->ki_filp)->i_sb,
    277				      SB_FREEZE_WRITE);
    278		file_end_write(iocb->ki_filp);
    279		ovl_copyattr(inode);
    280	}
    281
    282	orig_iocb->ki_pos = iocb->ki_pos;
    283	ovl_aio_put(aio_req);
    284}
    285
    286static void ovl_aio_rw_complete(struct kiocb *iocb, long res)
    287{
    288	struct ovl_aio_req *aio_req = container_of(iocb,
    289						   struct ovl_aio_req, iocb);
    290	struct kiocb *orig_iocb = aio_req->orig_iocb;
    291
    292	ovl_aio_cleanup_handler(aio_req);
    293	orig_iocb->ki_complete(orig_iocb, res);
    294}
    295
    296static ssize_t ovl_read_iter(struct kiocb *iocb, struct iov_iter *iter)
    297{
    298	struct file *file = iocb->ki_filp;
    299	struct fd real;
    300	const struct cred *old_cred;
    301	ssize_t ret;
    302
    303	if (!iov_iter_count(iter))
    304		return 0;
    305
    306	ret = ovl_real_fdget(file, &real);
    307	if (ret)
    308		return ret;
    309
    310	ret = -EINVAL;
    311	if (iocb->ki_flags & IOCB_DIRECT &&
    312	    !(real.file->f_mode & FMODE_CAN_ODIRECT))
    313		goto out_fdput;
    314
    315	old_cred = ovl_override_creds(file_inode(file)->i_sb);
    316	if (is_sync_kiocb(iocb)) {
    317		ret = vfs_iter_read(real.file, iter, &iocb->ki_pos,
    318				    ovl_iocb_to_rwf(iocb->ki_flags));
    319	} else {
    320		struct ovl_aio_req *aio_req;
    321
    322		ret = -ENOMEM;
    323		aio_req = kmem_cache_zalloc(ovl_aio_request_cachep, GFP_KERNEL);
    324		if (!aio_req)
    325			goto out;
    326
    327		aio_req->fd = real;
    328		real.flags = 0;
    329		aio_req->orig_iocb = iocb;
    330		kiocb_clone(&aio_req->iocb, iocb, real.file);
    331		aio_req->iocb.ki_complete = ovl_aio_rw_complete;
    332		refcount_set(&aio_req->ref, 2);
    333		ret = vfs_iocb_iter_read(real.file, &aio_req->iocb, iter);
    334		ovl_aio_put(aio_req);
    335		if (ret != -EIOCBQUEUED)
    336			ovl_aio_cleanup_handler(aio_req);
    337	}
    338out:
    339	revert_creds(old_cred);
    340	ovl_file_accessed(file);
    341out_fdput:
    342	fdput(real);
    343
    344	return ret;
    345}
    346
    347static ssize_t ovl_write_iter(struct kiocb *iocb, struct iov_iter *iter)
    348{
    349	struct file *file = iocb->ki_filp;
    350	struct inode *inode = file_inode(file);
    351	struct fd real;
    352	const struct cred *old_cred;
    353	ssize_t ret;
    354	int ifl = iocb->ki_flags;
    355
    356	if (!iov_iter_count(iter))
    357		return 0;
    358
    359	inode_lock(inode);
    360	/* Update mode */
    361	ovl_copyattr(inode);
    362	ret = file_remove_privs(file);
    363	if (ret)
    364		goto out_unlock;
    365
    366	ret = ovl_real_fdget(file, &real);
    367	if (ret)
    368		goto out_unlock;
    369
    370	ret = -EINVAL;
    371	if (iocb->ki_flags & IOCB_DIRECT &&
    372	    !(real.file->f_mode & FMODE_CAN_ODIRECT))
    373		goto out_fdput;
    374
    375	if (!ovl_should_sync(OVL_FS(inode->i_sb)))
    376		ifl &= ~(IOCB_DSYNC | IOCB_SYNC);
    377
    378	old_cred = ovl_override_creds(file_inode(file)->i_sb);
    379	if (is_sync_kiocb(iocb)) {
    380		file_start_write(real.file);
    381		ret = vfs_iter_write(real.file, iter, &iocb->ki_pos,
    382				     ovl_iocb_to_rwf(ifl));
    383		file_end_write(real.file);
    384		/* Update size */
    385		ovl_copyattr(inode);
    386	} else {
    387		struct ovl_aio_req *aio_req;
    388
    389		ret = -ENOMEM;
    390		aio_req = kmem_cache_zalloc(ovl_aio_request_cachep, GFP_KERNEL);
    391		if (!aio_req)
    392			goto out;
    393
    394		file_start_write(real.file);
    395		/* Pacify lockdep, same trick as done in aio_write() */
    396		__sb_writers_release(file_inode(real.file)->i_sb,
    397				     SB_FREEZE_WRITE);
    398		aio_req->fd = real;
    399		real.flags = 0;
    400		aio_req->orig_iocb = iocb;
    401		kiocb_clone(&aio_req->iocb, iocb, real.file);
    402		aio_req->iocb.ki_flags = ifl;
    403		aio_req->iocb.ki_complete = ovl_aio_rw_complete;
    404		refcount_set(&aio_req->ref, 2);
    405		ret = vfs_iocb_iter_write(real.file, &aio_req->iocb, iter);
    406		ovl_aio_put(aio_req);
    407		if (ret != -EIOCBQUEUED)
    408			ovl_aio_cleanup_handler(aio_req);
    409	}
    410out:
    411	revert_creds(old_cred);
    412out_fdput:
    413	fdput(real);
    414
    415out_unlock:
    416	inode_unlock(inode);
    417
    418	return ret;
    419}
    420
    421/*
    422 * Calling iter_file_splice_write() directly from overlay's f_op may deadlock
    423 * due to lock order inversion between pipe->mutex in iter_file_splice_write()
    424 * and file_start_write(real.file) in ovl_write_iter().
    425 *
    426 * So do everything ovl_write_iter() does and call iter_file_splice_write() on
    427 * the real file.
    428 */
    429static ssize_t ovl_splice_write(struct pipe_inode_info *pipe, struct file *out,
    430				loff_t *ppos, size_t len, unsigned int flags)
    431{
    432	struct fd real;
    433	const struct cred *old_cred;
    434	struct inode *inode = file_inode(out);
    435	ssize_t ret;
    436
    437	inode_lock(inode);
    438	/* Update mode */
    439	ovl_copyattr(inode);
    440	ret = file_remove_privs(out);
    441	if (ret)
    442		goto out_unlock;
    443
    444	ret = ovl_real_fdget(out, &real);
    445	if (ret)
    446		goto out_unlock;
    447
    448	old_cred = ovl_override_creds(inode->i_sb);
    449	file_start_write(real.file);
    450
    451	ret = iter_file_splice_write(pipe, real.file, ppos, len, flags);
    452
    453	file_end_write(real.file);
    454	/* Update size */
    455	ovl_copyattr(inode);
    456	revert_creds(old_cred);
    457	fdput(real);
    458
    459out_unlock:
    460	inode_unlock(inode);
    461
    462	return ret;
    463}
    464
    465static int ovl_fsync(struct file *file, loff_t start, loff_t end, int datasync)
    466{
    467	struct fd real;
    468	const struct cred *old_cred;
    469	int ret;
    470
    471	ret = ovl_sync_status(OVL_FS(file_inode(file)->i_sb));
    472	if (ret <= 0)
    473		return ret;
    474
    475	ret = ovl_real_fdget_meta(file, &real, !datasync);
    476	if (ret)
    477		return ret;
    478
    479	/* Don't sync lower file for fear of receiving EROFS error */
    480	if (file_inode(real.file) == ovl_inode_upper(file_inode(file))) {
    481		old_cred = ovl_override_creds(file_inode(file)->i_sb);
    482		ret = vfs_fsync_range(real.file, start, end, datasync);
    483		revert_creds(old_cred);
    484	}
    485
    486	fdput(real);
    487
    488	return ret;
    489}
    490
    491static int ovl_mmap(struct file *file, struct vm_area_struct *vma)
    492{
    493	struct file *realfile = file->private_data;
    494	const struct cred *old_cred;
    495	int ret;
    496
    497	if (!realfile->f_op->mmap)
    498		return -ENODEV;
    499
    500	if (WARN_ON(file != vma->vm_file))
    501		return -EIO;
    502
    503	vma_set_file(vma, realfile);
    504
    505	old_cred = ovl_override_creds(file_inode(file)->i_sb);
    506	ret = call_mmap(vma->vm_file, vma);
    507	revert_creds(old_cred);
    508	ovl_file_accessed(file);
    509
    510	return ret;
    511}
    512
    513static long ovl_fallocate(struct file *file, int mode, loff_t offset, loff_t len)
    514{
    515	struct inode *inode = file_inode(file);
    516	struct fd real;
    517	const struct cred *old_cred;
    518	int ret;
    519
    520	ret = ovl_real_fdget(file, &real);
    521	if (ret)
    522		return ret;
    523
    524	old_cred = ovl_override_creds(file_inode(file)->i_sb);
    525	ret = vfs_fallocate(real.file, mode, offset, len);
    526	revert_creds(old_cred);
    527
    528	/* Update size */
    529	ovl_copyattr(inode);
    530
    531	fdput(real);
    532
    533	return ret;
    534}
    535
    536static int ovl_fadvise(struct file *file, loff_t offset, loff_t len, int advice)
    537{
    538	struct fd real;
    539	const struct cred *old_cred;
    540	int ret;
    541
    542	ret = ovl_real_fdget(file, &real);
    543	if (ret)
    544		return ret;
    545
    546	old_cred = ovl_override_creds(file_inode(file)->i_sb);
    547	ret = vfs_fadvise(real.file, offset, len, advice);
    548	revert_creds(old_cred);
    549
    550	fdput(real);
    551
    552	return ret;
    553}
    554
    555enum ovl_copyop {
    556	OVL_COPY,
    557	OVL_CLONE,
    558	OVL_DEDUPE,
    559};
    560
    561static loff_t ovl_copyfile(struct file *file_in, loff_t pos_in,
    562			    struct file *file_out, loff_t pos_out,
    563			    loff_t len, unsigned int flags, enum ovl_copyop op)
    564{
    565	struct inode *inode_out = file_inode(file_out);
    566	struct fd real_in, real_out;
    567	const struct cred *old_cred;
    568	loff_t ret;
    569
    570	ret = ovl_real_fdget(file_out, &real_out);
    571	if (ret)
    572		return ret;
    573
    574	ret = ovl_real_fdget(file_in, &real_in);
    575	if (ret) {
    576		fdput(real_out);
    577		return ret;
    578	}
    579
    580	old_cred = ovl_override_creds(file_inode(file_out)->i_sb);
    581	switch (op) {
    582	case OVL_COPY:
    583		ret = vfs_copy_file_range(real_in.file, pos_in,
    584					  real_out.file, pos_out, len, flags);
    585		break;
    586
    587	case OVL_CLONE:
    588		ret = vfs_clone_file_range(real_in.file, pos_in,
    589					   real_out.file, pos_out, len, flags);
    590		break;
    591
    592	case OVL_DEDUPE:
    593		ret = vfs_dedupe_file_range_one(real_in.file, pos_in,
    594						real_out.file, pos_out, len,
    595						flags);
    596		break;
    597	}
    598	revert_creds(old_cred);
    599
    600	/* Update size */
    601	ovl_copyattr(inode_out);
    602
    603	fdput(real_in);
    604	fdput(real_out);
    605
    606	return ret;
    607}
    608
    609static ssize_t ovl_copy_file_range(struct file *file_in, loff_t pos_in,
    610				   struct file *file_out, loff_t pos_out,
    611				   size_t len, unsigned int flags)
    612{
    613	return ovl_copyfile(file_in, pos_in, file_out, pos_out, len, flags,
    614			    OVL_COPY);
    615}
    616
    617static loff_t ovl_remap_file_range(struct file *file_in, loff_t pos_in,
    618				   struct file *file_out, loff_t pos_out,
    619				   loff_t len, unsigned int remap_flags)
    620{
    621	enum ovl_copyop op;
    622
    623	if (remap_flags & ~(REMAP_FILE_DEDUP | REMAP_FILE_ADVISORY))
    624		return -EINVAL;
    625
    626	if (remap_flags & REMAP_FILE_DEDUP)
    627		op = OVL_DEDUPE;
    628	else
    629		op = OVL_CLONE;
    630
    631	/*
    632	 * Don't copy up because of a dedupe request, this wouldn't make sense
    633	 * most of the time (data would be duplicated instead of deduplicated).
    634	 */
    635	if (op == OVL_DEDUPE &&
    636	    (!ovl_inode_upper(file_inode(file_in)) ||
    637	     !ovl_inode_upper(file_inode(file_out))))
    638		return -EPERM;
    639
    640	return ovl_copyfile(file_in, pos_in, file_out, pos_out, len,
    641			    remap_flags, op);
    642}
    643
    644static int ovl_flush(struct file *file, fl_owner_t id)
    645{
    646	struct fd real;
    647	const struct cred *old_cred;
    648	int err;
    649
    650	err = ovl_real_fdget(file, &real);
    651	if (err)
    652		return err;
    653
    654	if (real.file->f_op->flush) {
    655		old_cred = ovl_override_creds(file_inode(file)->i_sb);
    656		err = real.file->f_op->flush(real.file, id);
    657		revert_creds(old_cred);
    658	}
    659	fdput(real);
    660
    661	return err;
    662}
    663
    664const struct file_operations ovl_file_operations = {
    665	.open		= ovl_open,
    666	.release	= ovl_release,
    667	.llseek		= ovl_llseek,
    668	.read_iter	= ovl_read_iter,
    669	.write_iter	= ovl_write_iter,
    670	.fsync		= ovl_fsync,
    671	.mmap		= ovl_mmap,
    672	.fallocate	= ovl_fallocate,
    673	.fadvise	= ovl_fadvise,
    674	.flush		= ovl_flush,
    675	.splice_read    = generic_file_splice_read,
    676	.splice_write   = ovl_splice_write,
    677
    678	.copy_file_range	= ovl_copy_file_range,
    679	.remap_file_range	= ovl_remap_file_range,
    680};
    681
    682int __init ovl_aio_request_cache_init(void)
    683{
    684	ovl_aio_request_cachep = kmem_cache_create("ovl_aio_req",
    685						   sizeof(struct ovl_aio_req),
    686						   0, SLAB_HWCACHE_ALIGN, NULL);
    687	if (!ovl_aio_request_cachep)
    688		return -ENOMEM;
    689
    690	return 0;
    691}
    692
    693void ovl_aio_request_cache_destroy(void)
    694{
    695	kmem_cache_destroy(ovl_aio_request_cachep);
    696}