cachepc-linux

Fork of AMDESE/linux with modifications for CachePC side-channel attack
git clone https://git.sinitax.com/sinitax/cachepc-linux
Log | Files | Refs | README | LICENSE | sfeed.txt

sync.c (10636B)


      1// SPDX-License-Identifier: GPL-2.0
      2/*
      3 * High-level sync()-related operations
      4 */
      5
      6#include <linux/blkdev.h>
      7#include <linux/kernel.h>
      8#include <linux/file.h>
      9#include <linux/fs.h>
     10#include <linux/slab.h>
     11#include <linux/export.h>
     12#include <linux/namei.h>
     13#include <linux/sched.h>
     14#include <linux/writeback.h>
     15#include <linux/syscalls.h>
     16#include <linux/linkage.h>
     17#include <linux/pagemap.h>
     18#include <linux/quotaops.h>
     19#include <linux/backing-dev.h>
     20#include "internal.h"
     21
     22#define VALID_FLAGS (SYNC_FILE_RANGE_WAIT_BEFORE|SYNC_FILE_RANGE_WRITE| \
     23			SYNC_FILE_RANGE_WAIT_AFTER)
     24
     25/*
     26 * Write out and wait upon all dirty data associated with this
     27 * superblock.  Filesystem data as well as the underlying block
     28 * device.  Takes the superblock lock.
     29 */
     30int sync_filesystem(struct super_block *sb)
     31{
     32	int ret = 0;
     33
     34	/*
     35	 * We need to be protected against the filesystem going from
     36	 * r/o to r/w or vice versa.
     37	 */
     38	WARN_ON(!rwsem_is_locked(&sb->s_umount));
     39
     40	/*
     41	 * No point in syncing out anything if the filesystem is read-only.
     42	 */
     43	if (sb_rdonly(sb))
     44		return 0;
     45
     46	/*
     47	 * Do the filesystem syncing work.  For simple filesystems
     48	 * writeback_inodes_sb(sb) just dirties buffers with inodes so we have
     49	 * to submit I/O for these buffers via sync_blockdev().  This also
     50	 * speeds up the wait == 1 case since in that case write_inode()
     51	 * methods call sync_dirty_buffer() and thus effectively write one block
     52	 * at a time.
     53	 */
     54	writeback_inodes_sb(sb, WB_REASON_SYNC);
     55	if (sb->s_op->sync_fs) {
     56		ret = sb->s_op->sync_fs(sb, 0);
     57		if (ret)
     58			return ret;
     59	}
     60	ret = sync_blockdev_nowait(sb->s_bdev);
     61	if (ret)
     62		return ret;
     63
     64	sync_inodes_sb(sb);
     65	if (sb->s_op->sync_fs) {
     66		ret = sb->s_op->sync_fs(sb, 1);
     67		if (ret)
     68			return ret;
     69	}
     70	return sync_blockdev(sb->s_bdev);
     71}
     72EXPORT_SYMBOL(sync_filesystem);
     73
     74static void sync_inodes_one_sb(struct super_block *sb, void *arg)
     75{
     76	if (!sb_rdonly(sb))
     77		sync_inodes_sb(sb);
     78}
     79
     80static void sync_fs_one_sb(struct super_block *sb, void *arg)
     81{
     82	if (!sb_rdonly(sb) && !(sb->s_iflags & SB_I_SKIP_SYNC) &&
     83	    sb->s_op->sync_fs)
     84		sb->s_op->sync_fs(sb, *(int *)arg);
     85}
     86
     87/*
     88 * Sync everything. We start by waking flusher threads so that most of
     89 * writeback runs on all devices in parallel. Then we sync all inodes reliably
     90 * which effectively also waits for all flusher threads to finish doing
     91 * writeback. At this point all data is on disk so metadata should be stable
     92 * and we tell filesystems to sync their metadata via ->sync_fs() calls.
     93 * Finally, we writeout all block devices because some filesystems (e.g. ext2)
     94 * just write metadata (such as inodes or bitmaps) to block device page cache
     95 * and do not sync it on their own in ->sync_fs().
     96 */
     97void ksys_sync(void)
     98{
     99	int nowait = 0, wait = 1;
    100
    101	wakeup_flusher_threads(WB_REASON_SYNC);
    102	iterate_supers(sync_inodes_one_sb, NULL);
    103	iterate_supers(sync_fs_one_sb, &nowait);
    104	iterate_supers(sync_fs_one_sb, &wait);
    105	sync_bdevs(false);
    106	sync_bdevs(true);
    107	if (unlikely(laptop_mode))
    108		laptop_sync_completion();
    109}
    110
    111SYSCALL_DEFINE0(sync)
    112{
    113	ksys_sync();
    114	return 0;
    115}
    116
    117static void do_sync_work(struct work_struct *work)
    118{
    119	int nowait = 0;
    120
    121	/*
    122	 * Sync twice to reduce the possibility we skipped some inodes / pages
    123	 * because they were temporarily locked
    124	 */
    125	iterate_supers(sync_inodes_one_sb, &nowait);
    126	iterate_supers(sync_fs_one_sb, &nowait);
    127	sync_bdevs(false);
    128	iterate_supers(sync_inodes_one_sb, &nowait);
    129	iterate_supers(sync_fs_one_sb, &nowait);
    130	sync_bdevs(false);
    131	printk("Emergency Sync complete\n");
    132	kfree(work);
    133}
    134
    135void emergency_sync(void)
    136{
    137	struct work_struct *work;
    138
    139	work = kmalloc(sizeof(*work), GFP_ATOMIC);
    140	if (work) {
    141		INIT_WORK(work, do_sync_work);
    142		schedule_work(work);
    143	}
    144}
    145
    146/*
    147 * sync a single super
    148 */
    149SYSCALL_DEFINE1(syncfs, int, fd)
    150{
    151	struct fd f = fdget(fd);
    152	struct super_block *sb;
    153	int ret, ret2;
    154
    155	if (!f.file)
    156		return -EBADF;
    157	sb = f.file->f_path.dentry->d_sb;
    158
    159	down_read(&sb->s_umount);
    160	ret = sync_filesystem(sb);
    161	up_read(&sb->s_umount);
    162
    163	ret2 = errseq_check_and_advance(&sb->s_wb_err, &f.file->f_sb_err);
    164
    165	fdput(f);
    166	return ret ? ret : ret2;
    167}
    168
    169/**
    170 * vfs_fsync_range - helper to sync a range of data & metadata to disk
    171 * @file:		file to sync
    172 * @start:		offset in bytes of the beginning of data range to sync
    173 * @end:		offset in bytes of the end of data range (inclusive)
    174 * @datasync:		perform only datasync
    175 *
    176 * Write back data in range @start..@end and metadata for @file to disk.  If
    177 * @datasync is set only metadata needed to access modified file data is
    178 * written.
    179 */
    180int vfs_fsync_range(struct file *file, loff_t start, loff_t end, int datasync)
    181{
    182	struct inode *inode = file->f_mapping->host;
    183
    184	if (!file->f_op->fsync)
    185		return -EINVAL;
    186	if (!datasync && (inode->i_state & I_DIRTY_TIME))
    187		mark_inode_dirty_sync(inode);
    188	return file->f_op->fsync(file, start, end, datasync);
    189}
    190EXPORT_SYMBOL(vfs_fsync_range);
    191
    192/**
    193 * vfs_fsync - perform a fsync or fdatasync on a file
    194 * @file:		file to sync
    195 * @datasync:		only perform a fdatasync operation
    196 *
    197 * Write back data and metadata for @file to disk.  If @datasync is
    198 * set only metadata needed to access modified file data is written.
    199 */
    200int vfs_fsync(struct file *file, int datasync)
    201{
    202	return vfs_fsync_range(file, 0, LLONG_MAX, datasync);
    203}
    204EXPORT_SYMBOL(vfs_fsync);
    205
    206static int do_fsync(unsigned int fd, int datasync)
    207{
    208	struct fd f = fdget(fd);
    209	int ret = -EBADF;
    210
    211	if (f.file) {
    212		ret = vfs_fsync(f.file, datasync);
    213		fdput(f);
    214	}
    215	return ret;
    216}
    217
    218SYSCALL_DEFINE1(fsync, unsigned int, fd)
    219{
    220	return do_fsync(fd, 0);
    221}
    222
    223SYSCALL_DEFINE1(fdatasync, unsigned int, fd)
    224{
    225	return do_fsync(fd, 1);
    226}
    227
    228int sync_file_range(struct file *file, loff_t offset, loff_t nbytes,
    229		    unsigned int flags)
    230{
    231	int ret;
    232	struct address_space *mapping;
    233	loff_t endbyte;			/* inclusive */
    234	umode_t i_mode;
    235
    236	ret = -EINVAL;
    237	if (flags & ~VALID_FLAGS)
    238		goto out;
    239
    240	endbyte = offset + nbytes;
    241
    242	if ((s64)offset < 0)
    243		goto out;
    244	if ((s64)endbyte < 0)
    245		goto out;
    246	if (endbyte < offset)
    247		goto out;
    248
    249	if (sizeof(pgoff_t) == 4) {
    250		if (offset >= (0x100000000ULL << PAGE_SHIFT)) {
    251			/*
    252			 * The range starts outside a 32 bit machine's
    253			 * pagecache addressing capabilities.  Let it "succeed"
    254			 */
    255			ret = 0;
    256			goto out;
    257		}
    258		if (endbyte >= (0x100000000ULL << PAGE_SHIFT)) {
    259			/*
    260			 * Out to EOF
    261			 */
    262			nbytes = 0;
    263		}
    264	}
    265
    266	if (nbytes == 0)
    267		endbyte = LLONG_MAX;
    268	else
    269		endbyte--;		/* inclusive */
    270
    271	i_mode = file_inode(file)->i_mode;
    272	ret = -ESPIPE;
    273	if (!S_ISREG(i_mode) && !S_ISBLK(i_mode) && !S_ISDIR(i_mode) &&
    274			!S_ISLNK(i_mode))
    275		goto out;
    276
    277	mapping = file->f_mapping;
    278	ret = 0;
    279	if (flags & SYNC_FILE_RANGE_WAIT_BEFORE) {
    280		ret = file_fdatawait_range(file, offset, endbyte);
    281		if (ret < 0)
    282			goto out;
    283	}
    284
    285	if (flags & SYNC_FILE_RANGE_WRITE) {
    286		int sync_mode = WB_SYNC_NONE;
    287
    288		if ((flags & SYNC_FILE_RANGE_WRITE_AND_WAIT) ==
    289			     SYNC_FILE_RANGE_WRITE_AND_WAIT)
    290			sync_mode = WB_SYNC_ALL;
    291
    292		ret = __filemap_fdatawrite_range(mapping, offset, endbyte,
    293						 sync_mode);
    294		if (ret < 0)
    295			goto out;
    296	}
    297
    298	if (flags & SYNC_FILE_RANGE_WAIT_AFTER)
    299		ret = file_fdatawait_range(file, offset, endbyte);
    300
    301out:
    302	return ret;
    303}
    304
    305/*
    306 * ksys_sync_file_range() permits finely controlled syncing over a segment of
    307 * a file in the range offset .. (offset+nbytes-1) inclusive.  If nbytes is
    308 * zero then ksys_sync_file_range() will operate from offset out to EOF.
    309 *
    310 * The flag bits are:
    311 *
    312 * SYNC_FILE_RANGE_WAIT_BEFORE: wait upon writeout of all pages in the range
    313 * before performing the write.
    314 *
    315 * SYNC_FILE_RANGE_WRITE: initiate writeout of all those dirty pages in the
    316 * range which are not presently under writeback. Note that this may block for
    317 * significant periods due to exhaustion of disk request structures.
    318 *
    319 * SYNC_FILE_RANGE_WAIT_AFTER: wait upon writeout of all pages in the range
    320 * after performing the write.
    321 *
    322 * Useful combinations of the flag bits are:
    323 *
    324 * SYNC_FILE_RANGE_WAIT_BEFORE|SYNC_FILE_RANGE_WRITE: ensures that all pages
    325 * in the range which were dirty on entry to ksys_sync_file_range() are placed
    326 * under writeout.  This is a start-write-for-data-integrity operation.
    327 *
    328 * SYNC_FILE_RANGE_WRITE: start writeout of all dirty pages in the range which
    329 * are not presently under writeout.  This is an asynchronous flush-to-disk
    330 * operation.  Not suitable for data integrity operations.
    331 *
    332 * SYNC_FILE_RANGE_WAIT_BEFORE (or SYNC_FILE_RANGE_WAIT_AFTER): wait for
    333 * completion of writeout of all pages in the range.  This will be used after an
    334 * earlier SYNC_FILE_RANGE_WAIT_BEFORE|SYNC_FILE_RANGE_WRITE operation to wait
    335 * for that operation to complete and to return the result.
    336 *
    337 * SYNC_FILE_RANGE_WAIT_BEFORE|SYNC_FILE_RANGE_WRITE|SYNC_FILE_RANGE_WAIT_AFTER
    338 * (a.k.a. SYNC_FILE_RANGE_WRITE_AND_WAIT):
    339 * a traditional sync() operation.  This is a write-for-data-integrity operation
    340 * which will ensure that all pages in the range which were dirty on entry to
    341 * ksys_sync_file_range() are written to disk.  It should be noted that disk
    342 * caches are not flushed by this call, so there are no guarantees here that the
    343 * data will be available on disk after a crash.
    344 *
    345 *
    346 * SYNC_FILE_RANGE_WAIT_BEFORE and SYNC_FILE_RANGE_WAIT_AFTER will detect any
    347 * I/O errors or ENOSPC conditions and will return those to the caller, after
    348 * clearing the EIO and ENOSPC flags in the address_space.
    349 *
    350 * It should be noted that none of these operations write out the file's
    351 * metadata.  So unless the application is strictly performing overwrites of
    352 * already-instantiated disk blocks, there are no guarantees here that the data
    353 * will be available after a crash.
    354 */
    355int ksys_sync_file_range(int fd, loff_t offset, loff_t nbytes,
    356			 unsigned int flags)
    357{
    358	int ret;
    359	struct fd f;
    360
    361	ret = -EBADF;
    362	f = fdget(fd);
    363	if (f.file)
    364		ret = sync_file_range(f.file, offset, nbytes, flags);
    365
    366	fdput(f);
    367	return ret;
    368}
    369
    370SYSCALL_DEFINE4(sync_file_range, int, fd, loff_t, offset, loff_t, nbytes,
    371				unsigned int, flags)
    372{
    373	return ksys_sync_file_range(fd, offset, nbytes, flags);
    374}
    375
    376#if defined(CONFIG_COMPAT) && defined(__ARCH_WANT_COMPAT_SYNC_FILE_RANGE)
    377COMPAT_SYSCALL_DEFINE6(sync_file_range, int, fd, compat_arg_u64_dual(offset),
    378		       compat_arg_u64_dual(nbytes), unsigned int, flags)
    379{
    380	return ksys_sync_file_range(fd, compat_arg_u64_glue(offset),
    381				    compat_arg_u64_glue(nbytes), flags);
    382}
    383#endif
    384
    385/* It would be nice if people remember that not all the world's an i386
    386   when they introduce new system calls */
    387SYSCALL_DEFINE4(sync_file_range2, int, fd, unsigned int, flags,
    388				 loff_t, offset, loff_t, nbytes)
    389{
    390	return ksys_sync_file_range(fd, offset, nbytes, flags);
    391}