cachepc-linux

Fork of AMDESE/linux with modifications for CachePC side-channel attack
git clone https://git.sinitax.com/sinitax/cachepc-linux
Log | Files | Refs | README | LICENSE | sfeed.txt

file.c (130392B)


      1// SPDX-License-Identifier: LGPL-2.1
      2/*
      3 *
      4 *   vfs operations that deal with files
      5 *
      6 *   Copyright (C) International Business Machines  Corp., 2002,2010
      7 *   Author(s): Steve French (sfrench@us.ibm.com)
      8 *              Jeremy Allison (jra@samba.org)
      9 *
     10 */
     11#include <linux/fs.h>
     12#include <linux/backing-dev.h>
     13#include <linux/stat.h>
     14#include <linux/fcntl.h>
     15#include <linux/pagemap.h>
     16#include <linux/pagevec.h>
     17#include <linux/writeback.h>
     18#include <linux/task_io_accounting_ops.h>
     19#include <linux/delay.h>
     20#include <linux/mount.h>
     21#include <linux/slab.h>
     22#include <linux/swap.h>
     23#include <linux/mm.h>
     24#include <asm/div64.h>
     25#include "cifsfs.h"
     26#include "cifspdu.h"
     27#include "cifsglob.h"
     28#include "cifsproto.h"
     29#include "cifs_unicode.h"
     30#include "cifs_debug.h"
     31#include "cifs_fs_sb.h"
     32#include "fscache.h"
     33#include "smbdirect.h"
     34#include "fs_context.h"
     35#include "cifs_ioctl.h"
     36
     37static inline int cifs_convert_flags(unsigned int flags)
     38{
     39	if ((flags & O_ACCMODE) == O_RDONLY)
     40		return GENERIC_READ;
     41	else if ((flags & O_ACCMODE) == O_WRONLY)
     42		return GENERIC_WRITE;
     43	else if ((flags & O_ACCMODE) == O_RDWR) {
     44		/* GENERIC_ALL is too much permission to request
     45		   can cause unnecessary access denied on create */
     46		/* return GENERIC_ALL; */
     47		return (GENERIC_READ | GENERIC_WRITE);
     48	}
     49
     50	return (READ_CONTROL | FILE_WRITE_ATTRIBUTES | FILE_READ_ATTRIBUTES |
     51		FILE_WRITE_EA | FILE_APPEND_DATA | FILE_WRITE_DATA |
     52		FILE_READ_DATA);
     53}
     54
     55static u32 cifs_posix_convert_flags(unsigned int flags)
     56{
     57	u32 posix_flags = 0;
     58
     59	if ((flags & O_ACCMODE) == O_RDONLY)
     60		posix_flags = SMB_O_RDONLY;
     61	else if ((flags & O_ACCMODE) == O_WRONLY)
     62		posix_flags = SMB_O_WRONLY;
     63	else if ((flags & O_ACCMODE) == O_RDWR)
     64		posix_flags = SMB_O_RDWR;
     65
     66	if (flags & O_CREAT) {
     67		posix_flags |= SMB_O_CREAT;
     68		if (flags & O_EXCL)
     69			posix_flags |= SMB_O_EXCL;
     70	} else if (flags & O_EXCL)
     71		cifs_dbg(FYI, "Application %s pid %d has incorrectly set O_EXCL flag but not O_CREAT on file open. Ignoring O_EXCL\n",
     72			 current->comm, current->tgid);
     73
     74	if (flags & O_TRUNC)
     75		posix_flags |= SMB_O_TRUNC;
     76	/* be safe and imply O_SYNC for O_DSYNC */
     77	if (flags & O_DSYNC)
     78		posix_flags |= SMB_O_SYNC;
     79	if (flags & O_DIRECTORY)
     80		posix_flags |= SMB_O_DIRECTORY;
     81	if (flags & O_NOFOLLOW)
     82		posix_flags |= SMB_O_NOFOLLOW;
     83	if (flags & O_DIRECT)
     84		posix_flags |= SMB_O_DIRECT;
     85
     86	return posix_flags;
     87}
     88
     89static inline int cifs_get_disposition(unsigned int flags)
     90{
     91	if ((flags & (O_CREAT | O_EXCL)) == (O_CREAT | O_EXCL))
     92		return FILE_CREATE;
     93	else if ((flags & (O_CREAT | O_TRUNC)) == (O_CREAT | O_TRUNC))
     94		return FILE_OVERWRITE_IF;
     95	else if ((flags & O_CREAT) == O_CREAT)
     96		return FILE_OPEN_IF;
     97	else if ((flags & O_TRUNC) == O_TRUNC)
     98		return FILE_OVERWRITE;
     99	else
    100		return FILE_OPEN;
    101}
    102
    103int cifs_posix_open(const char *full_path, struct inode **pinode,
    104			struct super_block *sb, int mode, unsigned int f_flags,
    105			__u32 *poplock, __u16 *pnetfid, unsigned int xid)
    106{
    107	int rc;
    108	FILE_UNIX_BASIC_INFO *presp_data;
    109	__u32 posix_flags = 0;
    110	struct cifs_sb_info *cifs_sb = CIFS_SB(sb);
    111	struct cifs_fattr fattr;
    112	struct tcon_link *tlink;
    113	struct cifs_tcon *tcon;
    114
    115	cifs_dbg(FYI, "posix open %s\n", full_path);
    116
    117	presp_data = kzalloc(sizeof(FILE_UNIX_BASIC_INFO), GFP_KERNEL);
    118	if (presp_data == NULL)
    119		return -ENOMEM;
    120
    121	tlink = cifs_sb_tlink(cifs_sb);
    122	if (IS_ERR(tlink)) {
    123		rc = PTR_ERR(tlink);
    124		goto posix_open_ret;
    125	}
    126
    127	tcon = tlink_tcon(tlink);
    128	mode &= ~current_umask();
    129
    130	posix_flags = cifs_posix_convert_flags(f_flags);
    131	rc = CIFSPOSIXCreate(xid, tcon, posix_flags, mode, pnetfid, presp_data,
    132			     poplock, full_path, cifs_sb->local_nls,
    133			     cifs_remap(cifs_sb));
    134	cifs_put_tlink(tlink);
    135
    136	if (rc)
    137		goto posix_open_ret;
    138
    139	if (presp_data->Type == cpu_to_le32(-1))
    140		goto posix_open_ret; /* open ok, caller does qpathinfo */
    141
    142	if (!pinode)
    143		goto posix_open_ret; /* caller does not need info */
    144
    145	cifs_unix_basic_to_fattr(&fattr, presp_data, cifs_sb);
    146
    147	/* get new inode and set it up */
    148	if (*pinode == NULL) {
    149		cifs_fill_uniqueid(sb, &fattr);
    150		*pinode = cifs_iget(sb, &fattr);
    151		if (!*pinode) {
    152			rc = -ENOMEM;
    153			goto posix_open_ret;
    154		}
    155	} else {
    156		cifs_revalidate_mapping(*pinode);
    157		rc = cifs_fattr_to_inode(*pinode, &fattr);
    158	}
    159
    160posix_open_ret:
    161	kfree(presp_data);
    162	return rc;
    163}
    164
    165static int
    166cifs_nt_open(const char *full_path, struct inode *inode, struct cifs_sb_info *cifs_sb,
    167	     struct cifs_tcon *tcon, unsigned int f_flags, __u32 *oplock,
    168	     struct cifs_fid *fid, unsigned int xid)
    169{
    170	int rc;
    171	int desired_access;
    172	int disposition;
    173	int create_options = CREATE_NOT_DIR;
    174	FILE_ALL_INFO *buf;
    175	struct TCP_Server_Info *server = tcon->ses->server;
    176	struct cifs_open_parms oparms;
    177
    178	if (!server->ops->open)
    179		return -ENOSYS;
    180
    181	desired_access = cifs_convert_flags(f_flags);
    182
    183/*********************************************************************
    184 *  open flag mapping table:
    185 *
    186 *	POSIX Flag            CIFS Disposition
    187 *	----------            ----------------
    188 *	O_CREAT               FILE_OPEN_IF
    189 *	O_CREAT | O_EXCL      FILE_CREATE
    190 *	O_CREAT | O_TRUNC     FILE_OVERWRITE_IF
    191 *	O_TRUNC               FILE_OVERWRITE
    192 *	none of the above     FILE_OPEN
    193 *
    194 *	Note that there is not a direct match between disposition
    195 *	FILE_SUPERSEDE (ie create whether or not file exists although
    196 *	O_CREAT | O_TRUNC is similar but truncates the existing
    197 *	file rather than creating a new file as FILE_SUPERSEDE does
    198 *	(which uses the attributes / metadata passed in on open call)
    199 *?
    200 *?  O_SYNC is a reasonable match to CIFS writethrough flag
    201 *?  and the read write flags match reasonably.  O_LARGEFILE
    202 *?  is irrelevant because largefile support is always used
    203 *?  by this client. Flags O_APPEND, O_DIRECT, O_DIRECTORY,
    204 *	 O_FASYNC, O_NOFOLLOW, O_NONBLOCK need further investigation
    205 *********************************************************************/
    206
    207	disposition = cifs_get_disposition(f_flags);
    208
    209	/* BB pass O_SYNC flag through on file attributes .. BB */
    210
    211	buf = kmalloc(sizeof(FILE_ALL_INFO), GFP_KERNEL);
    212	if (!buf)
    213		return -ENOMEM;
    214
    215	/* O_SYNC also has bit for O_DSYNC so following check picks up either */
    216	if (f_flags & O_SYNC)
    217		create_options |= CREATE_WRITE_THROUGH;
    218
    219	if (f_flags & O_DIRECT)
    220		create_options |= CREATE_NO_BUFFER;
    221
    222	oparms.tcon = tcon;
    223	oparms.cifs_sb = cifs_sb;
    224	oparms.desired_access = desired_access;
    225	oparms.create_options = cifs_create_options(cifs_sb, create_options);
    226	oparms.disposition = disposition;
    227	oparms.path = full_path;
    228	oparms.fid = fid;
    229	oparms.reconnect = false;
    230
    231	rc = server->ops->open(xid, &oparms, oplock, buf);
    232
    233	if (rc)
    234		goto out;
    235
    236	/* TODO: Add support for calling posix query info but with passing in fid */
    237	if (tcon->unix_ext)
    238		rc = cifs_get_inode_info_unix(&inode, full_path, inode->i_sb,
    239					      xid);
    240	else
    241		rc = cifs_get_inode_info(&inode, full_path, buf, inode->i_sb,
    242					 xid, fid);
    243
    244	if (rc) {
    245		server->ops->close(xid, tcon, fid);
    246		if (rc == -ESTALE)
    247			rc = -EOPENSTALE;
    248	}
    249
    250out:
    251	kfree(buf);
    252	return rc;
    253}
    254
    255static bool
    256cifs_has_mand_locks(struct cifsInodeInfo *cinode)
    257{
    258	struct cifs_fid_locks *cur;
    259	bool has_locks = false;
    260
    261	down_read(&cinode->lock_sem);
    262	list_for_each_entry(cur, &cinode->llist, llist) {
    263		if (!list_empty(&cur->locks)) {
    264			has_locks = true;
    265			break;
    266		}
    267	}
    268	up_read(&cinode->lock_sem);
    269	return has_locks;
    270}
    271
    272void
    273cifs_down_write(struct rw_semaphore *sem)
    274{
    275	while (!down_write_trylock(sem))
    276		msleep(10);
    277}
    278
    279static void cifsFileInfo_put_work(struct work_struct *work);
    280
    281struct cifsFileInfo *
    282cifs_new_fileinfo(struct cifs_fid *fid, struct file *file,
    283		  struct tcon_link *tlink, __u32 oplock)
    284{
    285	struct dentry *dentry = file_dentry(file);
    286	struct inode *inode = d_inode(dentry);
    287	struct cifsInodeInfo *cinode = CIFS_I(inode);
    288	struct cifsFileInfo *cfile;
    289	struct cifs_fid_locks *fdlocks;
    290	struct cifs_tcon *tcon = tlink_tcon(tlink);
    291	struct TCP_Server_Info *server = tcon->ses->server;
    292
    293	cfile = kzalloc(sizeof(struct cifsFileInfo), GFP_KERNEL);
    294	if (cfile == NULL)
    295		return cfile;
    296
    297	fdlocks = kzalloc(sizeof(struct cifs_fid_locks), GFP_KERNEL);
    298	if (!fdlocks) {
    299		kfree(cfile);
    300		return NULL;
    301	}
    302
    303	INIT_LIST_HEAD(&fdlocks->locks);
    304	fdlocks->cfile = cfile;
    305	cfile->llist = fdlocks;
    306
    307	cfile->count = 1;
    308	cfile->pid = current->tgid;
    309	cfile->uid = current_fsuid();
    310	cfile->dentry = dget(dentry);
    311	cfile->f_flags = file->f_flags;
    312	cfile->invalidHandle = false;
    313	cfile->deferred_close_scheduled = false;
    314	cfile->tlink = cifs_get_tlink(tlink);
    315	INIT_WORK(&cfile->oplock_break, cifs_oplock_break);
    316	INIT_WORK(&cfile->put, cifsFileInfo_put_work);
    317	INIT_DELAYED_WORK(&cfile->deferred, smb2_deferred_work_close);
    318	mutex_init(&cfile->fh_mutex);
    319	spin_lock_init(&cfile->file_info_lock);
    320
    321	cifs_sb_active(inode->i_sb);
    322
    323	/*
    324	 * If the server returned a read oplock and we have mandatory brlocks,
    325	 * set oplock level to None.
    326	 */
    327	if (server->ops->is_read_op(oplock) && cifs_has_mand_locks(cinode)) {
    328		cifs_dbg(FYI, "Reset oplock val from read to None due to mand locks\n");
    329		oplock = 0;
    330	}
    331
    332	cifs_down_write(&cinode->lock_sem);
    333	list_add(&fdlocks->llist, &cinode->llist);
    334	up_write(&cinode->lock_sem);
    335
    336	spin_lock(&tcon->open_file_lock);
    337	if (fid->pending_open->oplock != CIFS_OPLOCK_NO_CHANGE && oplock)
    338		oplock = fid->pending_open->oplock;
    339	list_del(&fid->pending_open->olist);
    340
    341	fid->purge_cache = false;
    342	server->ops->set_fid(cfile, fid, oplock);
    343
    344	list_add(&cfile->tlist, &tcon->openFileList);
    345	atomic_inc(&tcon->num_local_opens);
    346
    347	/* if readable file instance put first in list*/
    348	spin_lock(&cinode->open_file_lock);
    349	if (file->f_mode & FMODE_READ)
    350		list_add(&cfile->flist, &cinode->openFileList);
    351	else
    352		list_add_tail(&cfile->flist, &cinode->openFileList);
    353	spin_unlock(&cinode->open_file_lock);
    354	spin_unlock(&tcon->open_file_lock);
    355
    356	if (fid->purge_cache)
    357		cifs_zap_mapping(inode);
    358
    359	file->private_data = cfile;
    360	return cfile;
    361}
    362
    363struct cifsFileInfo *
    364cifsFileInfo_get(struct cifsFileInfo *cifs_file)
    365{
    366	spin_lock(&cifs_file->file_info_lock);
    367	cifsFileInfo_get_locked(cifs_file);
    368	spin_unlock(&cifs_file->file_info_lock);
    369	return cifs_file;
    370}
    371
    372static void cifsFileInfo_put_final(struct cifsFileInfo *cifs_file)
    373{
    374	struct inode *inode = d_inode(cifs_file->dentry);
    375	struct cifsInodeInfo *cifsi = CIFS_I(inode);
    376	struct cifsLockInfo *li, *tmp;
    377	struct super_block *sb = inode->i_sb;
    378
    379	/*
    380	 * Delete any outstanding lock records. We'll lose them when the file
    381	 * is closed anyway.
    382	 */
    383	cifs_down_write(&cifsi->lock_sem);
    384	list_for_each_entry_safe(li, tmp, &cifs_file->llist->locks, llist) {
    385		list_del(&li->llist);
    386		cifs_del_lock_waiters(li);
    387		kfree(li);
    388	}
    389	list_del(&cifs_file->llist->llist);
    390	kfree(cifs_file->llist);
    391	up_write(&cifsi->lock_sem);
    392
    393	cifs_put_tlink(cifs_file->tlink);
    394	dput(cifs_file->dentry);
    395	cifs_sb_deactive(sb);
    396	kfree(cifs_file);
    397}
    398
    399static void cifsFileInfo_put_work(struct work_struct *work)
    400{
    401	struct cifsFileInfo *cifs_file = container_of(work,
    402			struct cifsFileInfo, put);
    403
    404	cifsFileInfo_put_final(cifs_file);
    405}
    406
    407/**
    408 * cifsFileInfo_put - release a reference of file priv data
    409 *
    410 * Always potentially wait for oplock handler. See _cifsFileInfo_put().
    411 *
    412 * @cifs_file:	cifs/smb3 specific info (eg refcounts) for an open file
    413 */
    414void cifsFileInfo_put(struct cifsFileInfo *cifs_file)
    415{
    416	_cifsFileInfo_put(cifs_file, true, true);
    417}
    418
    419/**
    420 * _cifsFileInfo_put - release a reference of file priv data
    421 *
    422 * This may involve closing the filehandle @cifs_file out on the
    423 * server. Must be called without holding tcon->open_file_lock,
    424 * cinode->open_file_lock and cifs_file->file_info_lock.
    425 *
    426 * If @wait_for_oplock_handler is true and we are releasing the last
    427 * reference, wait for any running oplock break handler of the file
    428 * and cancel any pending one.
    429 *
    430 * @cifs_file:	cifs/smb3 specific info (eg refcounts) for an open file
    431 * @wait_oplock_handler: must be false if called from oplock_break_handler
    432 * @offload:	not offloaded on close and oplock breaks
    433 *
    434 */
    435void _cifsFileInfo_put(struct cifsFileInfo *cifs_file,
    436		       bool wait_oplock_handler, bool offload)
    437{
    438	struct inode *inode = d_inode(cifs_file->dentry);
    439	struct cifs_tcon *tcon = tlink_tcon(cifs_file->tlink);
    440	struct TCP_Server_Info *server = tcon->ses->server;
    441	struct cifsInodeInfo *cifsi = CIFS_I(inode);
    442	struct super_block *sb = inode->i_sb;
    443	struct cifs_sb_info *cifs_sb = CIFS_SB(sb);
    444	struct cifs_fid fid;
    445	struct cifs_pending_open open;
    446	bool oplock_break_cancelled;
    447
    448	spin_lock(&tcon->open_file_lock);
    449	spin_lock(&cifsi->open_file_lock);
    450	spin_lock(&cifs_file->file_info_lock);
    451	if (--cifs_file->count > 0) {
    452		spin_unlock(&cifs_file->file_info_lock);
    453		spin_unlock(&cifsi->open_file_lock);
    454		spin_unlock(&tcon->open_file_lock);
    455		return;
    456	}
    457	spin_unlock(&cifs_file->file_info_lock);
    458
    459	if (server->ops->get_lease_key)
    460		server->ops->get_lease_key(inode, &fid);
    461
    462	/* store open in pending opens to make sure we don't miss lease break */
    463	cifs_add_pending_open_locked(&fid, cifs_file->tlink, &open);
    464
    465	/* remove it from the lists */
    466	list_del(&cifs_file->flist);
    467	list_del(&cifs_file->tlist);
    468	atomic_dec(&tcon->num_local_opens);
    469
    470	if (list_empty(&cifsi->openFileList)) {
    471		cifs_dbg(FYI, "closing last open instance for inode %p\n",
    472			 d_inode(cifs_file->dentry));
    473		/*
    474		 * In strict cache mode we need invalidate mapping on the last
    475		 * close  because it may cause a error when we open this file
    476		 * again and get at least level II oplock.
    477		 */
    478		if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_STRICT_IO)
    479			set_bit(CIFS_INO_INVALID_MAPPING, &cifsi->flags);
    480		cifs_set_oplock_level(cifsi, 0);
    481	}
    482
    483	spin_unlock(&cifsi->open_file_lock);
    484	spin_unlock(&tcon->open_file_lock);
    485
    486	oplock_break_cancelled = wait_oplock_handler ?
    487		cancel_work_sync(&cifs_file->oplock_break) : false;
    488
    489	if (!tcon->need_reconnect && !cifs_file->invalidHandle) {
    490		struct TCP_Server_Info *server = tcon->ses->server;
    491		unsigned int xid;
    492
    493		xid = get_xid();
    494		if (server->ops->close_getattr)
    495			server->ops->close_getattr(xid, tcon, cifs_file);
    496		else if (server->ops->close)
    497			server->ops->close(xid, tcon, &cifs_file->fid);
    498		_free_xid(xid);
    499	}
    500
    501	if (oplock_break_cancelled)
    502		cifs_done_oplock_break(cifsi);
    503
    504	cifs_del_pending_open(&open);
    505
    506	if (offload)
    507		queue_work(fileinfo_put_wq, &cifs_file->put);
    508	else
    509		cifsFileInfo_put_final(cifs_file);
    510}
    511
    512int cifs_open(struct inode *inode, struct file *file)
    513
    514{
    515	int rc = -EACCES;
    516	unsigned int xid;
    517	__u32 oplock;
    518	struct cifs_sb_info *cifs_sb;
    519	struct TCP_Server_Info *server;
    520	struct cifs_tcon *tcon;
    521	struct tcon_link *tlink;
    522	struct cifsFileInfo *cfile = NULL;
    523	void *page;
    524	const char *full_path;
    525	bool posix_open_ok = false;
    526	struct cifs_fid fid;
    527	struct cifs_pending_open open;
    528
    529	xid = get_xid();
    530
    531	cifs_sb = CIFS_SB(inode->i_sb);
    532	if (unlikely(cifs_forced_shutdown(cifs_sb))) {
    533		free_xid(xid);
    534		return -EIO;
    535	}
    536
    537	tlink = cifs_sb_tlink(cifs_sb);
    538	if (IS_ERR(tlink)) {
    539		free_xid(xid);
    540		return PTR_ERR(tlink);
    541	}
    542	tcon = tlink_tcon(tlink);
    543	server = tcon->ses->server;
    544
    545	page = alloc_dentry_path();
    546	full_path = build_path_from_dentry(file_dentry(file), page);
    547	if (IS_ERR(full_path)) {
    548		rc = PTR_ERR(full_path);
    549		goto out;
    550	}
    551
    552	cifs_dbg(FYI, "inode = 0x%p file flags are 0x%x for %s\n",
    553		 inode, file->f_flags, full_path);
    554
    555	if (file->f_flags & O_DIRECT &&
    556	    cifs_sb->mnt_cifs_flags & CIFS_MOUNT_STRICT_IO) {
    557		if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_NO_BRL)
    558			file->f_op = &cifs_file_direct_nobrl_ops;
    559		else
    560			file->f_op = &cifs_file_direct_ops;
    561	}
    562
    563	/* Get the cached handle as SMB2 close is deferred */
    564	rc = cifs_get_readable_path(tcon, full_path, &cfile);
    565	if (rc == 0) {
    566		if (file->f_flags == cfile->f_flags) {
    567			file->private_data = cfile;
    568			spin_lock(&CIFS_I(inode)->deferred_lock);
    569			cifs_del_deferred_close(cfile);
    570			spin_unlock(&CIFS_I(inode)->deferred_lock);
    571			goto use_cache;
    572		} else {
    573			_cifsFileInfo_put(cfile, true, false);
    574		}
    575	}
    576
    577	if (server->oplocks)
    578		oplock = REQ_OPLOCK;
    579	else
    580		oplock = 0;
    581
    582	if (!tcon->broken_posix_open && tcon->unix_ext &&
    583	    cap_unix(tcon->ses) && (CIFS_UNIX_POSIX_PATH_OPS_CAP &
    584				le64_to_cpu(tcon->fsUnixInfo.Capability))) {
    585		/* can not refresh inode info since size could be stale */
    586		rc = cifs_posix_open(full_path, &inode, inode->i_sb,
    587				cifs_sb->ctx->file_mode /* ignored */,
    588				file->f_flags, &oplock, &fid.netfid, xid);
    589		if (rc == 0) {
    590			cifs_dbg(FYI, "posix open succeeded\n");
    591			posix_open_ok = true;
    592		} else if ((rc == -EINVAL) || (rc == -EOPNOTSUPP)) {
    593			if (tcon->ses->serverNOS)
    594				cifs_dbg(VFS, "server %s of type %s returned unexpected error on SMB posix open, disabling posix open support. Check if server update available.\n",
    595					 tcon->ses->ip_addr,
    596					 tcon->ses->serverNOS);
    597			tcon->broken_posix_open = true;
    598		} else if ((rc != -EIO) && (rc != -EREMOTE) &&
    599			 (rc != -EOPNOTSUPP)) /* path not found or net err */
    600			goto out;
    601		/*
    602		 * Else fallthrough to retry open the old way on network i/o
    603		 * or DFS errors.
    604		 */
    605	}
    606
    607	if (server->ops->get_lease_key)
    608		server->ops->get_lease_key(inode, &fid);
    609
    610	cifs_add_pending_open(&fid, tlink, &open);
    611
    612	if (!posix_open_ok) {
    613		if (server->ops->get_lease_key)
    614			server->ops->get_lease_key(inode, &fid);
    615
    616		rc = cifs_nt_open(full_path, inode, cifs_sb, tcon,
    617				  file->f_flags, &oplock, &fid, xid);
    618		if (rc) {
    619			cifs_del_pending_open(&open);
    620			goto out;
    621		}
    622	}
    623
    624	cfile = cifs_new_fileinfo(&fid, file, tlink, oplock);
    625	if (cfile == NULL) {
    626		if (server->ops->close)
    627			server->ops->close(xid, tcon, &fid);
    628		cifs_del_pending_open(&open);
    629		rc = -ENOMEM;
    630		goto out;
    631	}
    632
    633	if ((oplock & CIFS_CREATE_ACTION) && !posix_open_ok && tcon->unix_ext) {
    634		/*
    635		 * Time to set mode which we can not set earlier due to
    636		 * problems creating new read-only files.
    637		 */
    638		struct cifs_unix_set_info_args args = {
    639			.mode	= inode->i_mode,
    640			.uid	= INVALID_UID, /* no change */
    641			.gid	= INVALID_GID, /* no change */
    642			.ctime	= NO_CHANGE_64,
    643			.atime	= NO_CHANGE_64,
    644			.mtime	= NO_CHANGE_64,
    645			.device	= 0,
    646		};
    647		CIFSSMBUnixSetFileInfo(xid, tcon, &args, fid.netfid,
    648				       cfile->pid);
    649	}
    650
    651use_cache:
    652	fscache_use_cookie(cifs_inode_cookie(file_inode(file)),
    653			   file->f_mode & FMODE_WRITE);
    654	if (file->f_flags & O_DIRECT &&
    655	    (!((file->f_flags & O_ACCMODE) != O_RDONLY) ||
    656	     file->f_flags & O_APPEND))
    657		cifs_invalidate_cache(file_inode(file),
    658				      FSCACHE_INVAL_DIO_WRITE);
    659
    660out:
    661	free_dentry_path(page);
    662	free_xid(xid);
    663	cifs_put_tlink(tlink);
    664	return rc;
    665}
    666
    667static int cifs_push_posix_locks(struct cifsFileInfo *cfile);
    668
    669/*
    670 * Try to reacquire byte range locks that were released when session
    671 * to server was lost.
    672 */
    673static int
    674cifs_relock_file(struct cifsFileInfo *cfile)
    675{
    676	struct cifs_sb_info *cifs_sb = CIFS_SB(cfile->dentry->d_sb);
    677	struct cifsInodeInfo *cinode = CIFS_I(d_inode(cfile->dentry));
    678	struct cifs_tcon *tcon = tlink_tcon(cfile->tlink);
    679	int rc = 0;
    680
    681	down_read_nested(&cinode->lock_sem, SINGLE_DEPTH_NESTING);
    682	if (cinode->can_cache_brlcks) {
    683		/* can cache locks - no need to relock */
    684		up_read(&cinode->lock_sem);
    685		return rc;
    686	}
    687
    688	if (cap_unix(tcon->ses) &&
    689	    (CIFS_UNIX_FCNTL_CAP & le64_to_cpu(tcon->fsUnixInfo.Capability)) &&
    690	    ((cifs_sb->mnt_cifs_flags & CIFS_MOUNT_NOPOSIXBRL) == 0))
    691		rc = cifs_push_posix_locks(cfile);
    692	else
    693		rc = tcon->ses->server->ops->push_mand_locks(cfile);
    694
    695	up_read(&cinode->lock_sem);
    696	return rc;
    697}
    698
    699static int
    700cifs_reopen_file(struct cifsFileInfo *cfile, bool can_flush)
    701{
    702	int rc = -EACCES;
    703	unsigned int xid;
    704	__u32 oplock;
    705	struct cifs_sb_info *cifs_sb;
    706	struct cifs_tcon *tcon;
    707	struct TCP_Server_Info *server;
    708	struct cifsInodeInfo *cinode;
    709	struct inode *inode;
    710	void *page;
    711	const char *full_path;
    712	int desired_access;
    713	int disposition = FILE_OPEN;
    714	int create_options = CREATE_NOT_DIR;
    715	struct cifs_open_parms oparms;
    716
    717	xid = get_xid();
    718	mutex_lock(&cfile->fh_mutex);
    719	if (!cfile->invalidHandle) {
    720		mutex_unlock(&cfile->fh_mutex);
    721		free_xid(xid);
    722		return 0;
    723	}
    724
    725	inode = d_inode(cfile->dentry);
    726	cifs_sb = CIFS_SB(inode->i_sb);
    727	tcon = tlink_tcon(cfile->tlink);
    728	server = tcon->ses->server;
    729
    730	/*
    731	 * Can not grab rename sem here because various ops, including those
    732	 * that already have the rename sem can end up causing writepage to get
    733	 * called and if the server was down that means we end up here, and we
    734	 * can never tell if the caller already has the rename_sem.
    735	 */
    736	page = alloc_dentry_path();
    737	full_path = build_path_from_dentry(cfile->dentry, page);
    738	if (IS_ERR(full_path)) {
    739		mutex_unlock(&cfile->fh_mutex);
    740		free_dentry_path(page);
    741		free_xid(xid);
    742		return PTR_ERR(full_path);
    743	}
    744
    745	cifs_dbg(FYI, "inode = 0x%p file flags 0x%x for %s\n",
    746		 inode, cfile->f_flags, full_path);
    747
    748	if (tcon->ses->server->oplocks)
    749		oplock = REQ_OPLOCK;
    750	else
    751		oplock = 0;
    752
    753	if (tcon->unix_ext && cap_unix(tcon->ses) &&
    754	    (CIFS_UNIX_POSIX_PATH_OPS_CAP &
    755				le64_to_cpu(tcon->fsUnixInfo.Capability))) {
    756		/*
    757		 * O_CREAT, O_EXCL and O_TRUNC already had their effect on the
    758		 * original open. Must mask them off for a reopen.
    759		 */
    760		unsigned int oflags = cfile->f_flags &
    761						~(O_CREAT | O_EXCL | O_TRUNC);
    762
    763		rc = cifs_posix_open(full_path, NULL, inode->i_sb,
    764				     cifs_sb->ctx->file_mode /* ignored */,
    765				     oflags, &oplock, &cfile->fid.netfid, xid);
    766		if (rc == 0) {
    767			cifs_dbg(FYI, "posix reopen succeeded\n");
    768			oparms.reconnect = true;
    769			goto reopen_success;
    770		}
    771		/*
    772		 * fallthrough to retry open the old way on errors, especially
    773		 * in the reconnect path it is important to retry hard
    774		 */
    775	}
    776
    777	desired_access = cifs_convert_flags(cfile->f_flags);
    778
    779	/* O_SYNC also has bit for O_DSYNC so following check picks up either */
    780	if (cfile->f_flags & O_SYNC)
    781		create_options |= CREATE_WRITE_THROUGH;
    782
    783	if (cfile->f_flags & O_DIRECT)
    784		create_options |= CREATE_NO_BUFFER;
    785
    786	if (server->ops->get_lease_key)
    787		server->ops->get_lease_key(inode, &cfile->fid);
    788
    789	oparms.tcon = tcon;
    790	oparms.cifs_sb = cifs_sb;
    791	oparms.desired_access = desired_access;
    792	oparms.create_options = cifs_create_options(cifs_sb, create_options);
    793	oparms.disposition = disposition;
    794	oparms.path = full_path;
    795	oparms.fid = &cfile->fid;
    796	oparms.reconnect = true;
    797
    798	/*
    799	 * Can not refresh inode by passing in file_info buf to be returned by
    800	 * ops->open and then calling get_inode_info with returned buf since
    801	 * file might have write behind data that needs to be flushed and server
    802	 * version of file size can be stale. If we knew for sure that inode was
    803	 * not dirty locally we could do this.
    804	 */
    805	rc = server->ops->open(xid, &oparms, &oplock, NULL);
    806	if (rc == -ENOENT && oparms.reconnect == false) {
    807		/* durable handle timeout is expired - open the file again */
    808		rc = server->ops->open(xid, &oparms, &oplock, NULL);
    809		/* indicate that we need to relock the file */
    810		oparms.reconnect = true;
    811	}
    812
    813	if (rc) {
    814		mutex_unlock(&cfile->fh_mutex);
    815		cifs_dbg(FYI, "cifs_reopen returned 0x%x\n", rc);
    816		cifs_dbg(FYI, "oplock: %d\n", oplock);
    817		goto reopen_error_exit;
    818	}
    819
    820reopen_success:
    821	cfile->invalidHandle = false;
    822	mutex_unlock(&cfile->fh_mutex);
    823	cinode = CIFS_I(inode);
    824
    825	if (can_flush) {
    826		rc = filemap_write_and_wait(inode->i_mapping);
    827		if (!is_interrupt_error(rc))
    828			mapping_set_error(inode->i_mapping, rc);
    829
    830		if (tcon->posix_extensions)
    831			rc = smb311_posix_get_inode_info(&inode, full_path, inode->i_sb, xid);
    832		else if (tcon->unix_ext)
    833			rc = cifs_get_inode_info_unix(&inode, full_path,
    834						      inode->i_sb, xid);
    835		else
    836			rc = cifs_get_inode_info(&inode, full_path, NULL,
    837						 inode->i_sb, xid, NULL);
    838	}
    839	/*
    840	 * Else we are writing out data to server already and could deadlock if
    841	 * we tried to flush data, and since we do not know if we have data that
    842	 * would invalidate the current end of file on the server we can not go
    843	 * to the server to get the new inode info.
    844	 */
    845
    846	/*
    847	 * If the server returned a read oplock and we have mandatory brlocks,
    848	 * set oplock level to None.
    849	 */
    850	if (server->ops->is_read_op(oplock) && cifs_has_mand_locks(cinode)) {
    851		cifs_dbg(FYI, "Reset oplock val from read to None due to mand locks\n");
    852		oplock = 0;
    853	}
    854
    855	server->ops->set_fid(cfile, &cfile->fid, oplock);
    856	if (oparms.reconnect)
    857		cifs_relock_file(cfile);
    858
    859reopen_error_exit:
    860	free_dentry_path(page);
    861	free_xid(xid);
    862	return rc;
    863}
    864
    865void smb2_deferred_work_close(struct work_struct *work)
    866{
    867	struct cifsFileInfo *cfile = container_of(work,
    868			struct cifsFileInfo, deferred.work);
    869
    870	spin_lock(&CIFS_I(d_inode(cfile->dentry))->deferred_lock);
    871	cifs_del_deferred_close(cfile);
    872	cfile->deferred_close_scheduled = false;
    873	spin_unlock(&CIFS_I(d_inode(cfile->dentry))->deferred_lock);
    874	_cifsFileInfo_put(cfile, true, false);
    875}
    876
    877int cifs_close(struct inode *inode, struct file *file)
    878{
    879	struct cifsFileInfo *cfile;
    880	struct cifsInodeInfo *cinode = CIFS_I(inode);
    881	struct cifs_sb_info *cifs_sb = CIFS_SB(inode->i_sb);
    882	struct cifs_deferred_close *dclose;
    883
    884	cifs_fscache_unuse_inode_cookie(inode, file->f_mode & FMODE_WRITE);
    885
    886	if (file->private_data != NULL) {
    887		cfile = file->private_data;
    888		file->private_data = NULL;
    889		dclose = kmalloc(sizeof(struct cifs_deferred_close), GFP_KERNEL);
    890		if ((cinode->oplock == CIFS_CACHE_RHW_FLG) &&
    891		    cinode->lease_granted &&
    892		    !test_bit(CIFS_INO_CLOSE_ON_LOCK, &cinode->flags) &&
    893		    dclose) {
    894			if (test_and_clear_bit(CIFS_INO_MODIFIED_ATTR, &cinode->flags)) {
    895				inode->i_ctime = inode->i_mtime = current_time(inode);
    896			}
    897			spin_lock(&cinode->deferred_lock);
    898			cifs_add_deferred_close(cfile, dclose);
    899			if (cfile->deferred_close_scheduled &&
    900			    delayed_work_pending(&cfile->deferred)) {
    901				/*
    902				 * If there is no pending work, mod_delayed_work queues new work.
    903				 * So, Increase the ref count to avoid use-after-free.
    904				 */
    905				if (!mod_delayed_work(deferredclose_wq,
    906						&cfile->deferred, cifs_sb->ctx->acregmax))
    907					cifsFileInfo_get(cfile);
    908			} else {
    909				/* Deferred close for files */
    910				queue_delayed_work(deferredclose_wq,
    911						&cfile->deferred, cifs_sb->ctx->acregmax);
    912				cfile->deferred_close_scheduled = true;
    913				spin_unlock(&cinode->deferred_lock);
    914				return 0;
    915			}
    916			spin_unlock(&cinode->deferred_lock);
    917			_cifsFileInfo_put(cfile, true, false);
    918		} else {
    919			_cifsFileInfo_put(cfile, true, false);
    920			kfree(dclose);
    921		}
    922	}
    923
    924	/* return code from the ->release op is always ignored */
    925	return 0;
    926}
    927
    928void
    929cifs_reopen_persistent_handles(struct cifs_tcon *tcon)
    930{
    931	struct cifsFileInfo *open_file;
    932	struct list_head *tmp;
    933	struct list_head *tmp1;
    934	struct list_head tmp_list;
    935
    936	if (!tcon->use_persistent || !tcon->need_reopen_files)
    937		return;
    938
    939	tcon->need_reopen_files = false;
    940
    941	cifs_dbg(FYI, "Reopen persistent handles\n");
    942	INIT_LIST_HEAD(&tmp_list);
    943
    944	/* list all files open on tree connection, reopen resilient handles  */
    945	spin_lock(&tcon->open_file_lock);
    946	list_for_each(tmp, &tcon->openFileList) {
    947		open_file = list_entry(tmp, struct cifsFileInfo, tlist);
    948		if (!open_file->invalidHandle)
    949			continue;
    950		cifsFileInfo_get(open_file);
    951		list_add_tail(&open_file->rlist, &tmp_list);
    952	}
    953	spin_unlock(&tcon->open_file_lock);
    954
    955	list_for_each_safe(tmp, tmp1, &tmp_list) {
    956		open_file = list_entry(tmp, struct cifsFileInfo, rlist);
    957		if (cifs_reopen_file(open_file, false /* do not flush */))
    958			tcon->need_reopen_files = true;
    959		list_del_init(&open_file->rlist);
    960		cifsFileInfo_put(open_file);
    961	}
    962}
    963
    964int cifs_closedir(struct inode *inode, struct file *file)
    965{
    966	int rc = 0;
    967	unsigned int xid;
    968	struct cifsFileInfo *cfile = file->private_data;
    969	struct cifs_tcon *tcon;
    970	struct TCP_Server_Info *server;
    971	char *buf;
    972
    973	cifs_dbg(FYI, "Closedir inode = 0x%p\n", inode);
    974
    975	if (cfile == NULL)
    976		return rc;
    977
    978	xid = get_xid();
    979	tcon = tlink_tcon(cfile->tlink);
    980	server = tcon->ses->server;
    981
    982	cifs_dbg(FYI, "Freeing private data in close dir\n");
    983	spin_lock(&cfile->file_info_lock);
    984	if (server->ops->dir_needs_close(cfile)) {
    985		cfile->invalidHandle = true;
    986		spin_unlock(&cfile->file_info_lock);
    987		if (server->ops->close_dir)
    988			rc = server->ops->close_dir(xid, tcon, &cfile->fid);
    989		else
    990			rc = -ENOSYS;
    991		cifs_dbg(FYI, "Closing uncompleted readdir with rc %d\n", rc);
    992		/* not much we can do if it fails anyway, ignore rc */
    993		rc = 0;
    994	} else
    995		spin_unlock(&cfile->file_info_lock);
    996
    997	buf = cfile->srch_inf.ntwrk_buf_start;
    998	if (buf) {
    999		cifs_dbg(FYI, "closedir free smb buf in srch struct\n");
   1000		cfile->srch_inf.ntwrk_buf_start = NULL;
   1001		if (cfile->srch_inf.smallBuf)
   1002			cifs_small_buf_release(buf);
   1003		else
   1004			cifs_buf_release(buf);
   1005	}
   1006
   1007	cifs_put_tlink(cfile->tlink);
   1008	kfree(file->private_data);
   1009	file->private_data = NULL;
   1010	/* BB can we lock the filestruct while this is going on? */
   1011	free_xid(xid);
   1012	return rc;
   1013}
   1014
   1015static struct cifsLockInfo *
   1016cifs_lock_init(__u64 offset, __u64 length, __u8 type, __u16 flags)
   1017{
   1018	struct cifsLockInfo *lock =
   1019		kmalloc(sizeof(struct cifsLockInfo), GFP_KERNEL);
   1020	if (!lock)
   1021		return lock;
   1022	lock->offset = offset;
   1023	lock->length = length;
   1024	lock->type = type;
   1025	lock->pid = current->tgid;
   1026	lock->flags = flags;
   1027	INIT_LIST_HEAD(&lock->blist);
   1028	init_waitqueue_head(&lock->block_q);
   1029	return lock;
   1030}
   1031
   1032void
   1033cifs_del_lock_waiters(struct cifsLockInfo *lock)
   1034{
   1035	struct cifsLockInfo *li, *tmp;
   1036	list_for_each_entry_safe(li, tmp, &lock->blist, blist) {
   1037		list_del_init(&li->blist);
   1038		wake_up(&li->block_q);
   1039	}
   1040}
   1041
   1042#define CIFS_LOCK_OP	0
   1043#define CIFS_READ_OP	1
   1044#define CIFS_WRITE_OP	2
   1045
   1046/* @rw_check : 0 - no op, 1 - read, 2 - write */
   1047static bool
   1048cifs_find_fid_lock_conflict(struct cifs_fid_locks *fdlocks, __u64 offset,
   1049			    __u64 length, __u8 type, __u16 flags,
   1050			    struct cifsFileInfo *cfile,
   1051			    struct cifsLockInfo **conf_lock, int rw_check)
   1052{
   1053	struct cifsLockInfo *li;
   1054	struct cifsFileInfo *cur_cfile = fdlocks->cfile;
   1055	struct TCP_Server_Info *server = tlink_tcon(cfile->tlink)->ses->server;
   1056
   1057	list_for_each_entry(li, &fdlocks->locks, llist) {
   1058		if (offset + length <= li->offset ||
   1059		    offset >= li->offset + li->length)
   1060			continue;
   1061		if (rw_check != CIFS_LOCK_OP && current->tgid == li->pid &&
   1062		    server->ops->compare_fids(cfile, cur_cfile)) {
   1063			/* shared lock prevents write op through the same fid */
   1064			if (!(li->type & server->vals->shared_lock_type) ||
   1065			    rw_check != CIFS_WRITE_OP)
   1066				continue;
   1067		}
   1068		if ((type & server->vals->shared_lock_type) &&
   1069		    ((server->ops->compare_fids(cfile, cur_cfile) &&
   1070		     current->tgid == li->pid) || type == li->type))
   1071			continue;
   1072		if (rw_check == CIFS_LOCK_OP &&
   1073		    (flags & FL_OFDLCK) && (li->flags & FL_OFDLCK) &&
   1074		    server->ops->compare_fids(cfile, cur_cfile))
   1075			continue;
   1076		if (conf_lock)
   1077			*conf_lock = li;
   1078		return true;
   1079	}
   1080	return false;
   1081}
   1082
   1083bool
   1084cifs_find_lock_conflict(struct cifsFileInfo *cfile, __u64 offset, __u64 length,
   1085			__u8 type, __u16 flags,
   1086			struct cifsLockInfo **conf_lock, int rw_check)
   1087{
   1088	bool rc = false;
   1089	struct cifs_fid_locks *cur;
   1090	struct cifsInodeInfo *cinode = CIFS_I(d_inode(cfile->dentry));
   1091
   1092	list_for_each_entry(cur, &cinode->llist, llist) {
   1093		rc = cifs_find_fid_lock_conflict(cur, offset, length, type,
   1094						 flags, cfile, conf_lock,
   1095						 rw_check);
   1096		if (rc)
   1097			break;
   1098	}
   1099
   1100	return rc;
   1101}
   1102
   1103/*
   1104 * Check if there is another lock that prevents us to set the lock (mandatory
   1105 * style). If such a lock exists, update the flock structure with its
   1106 * properties. Otherwise, set the flock type to F_UNLCK if we can cache brlocks
   1107 * or leave it the same if we can't. Returns 0 if we don't need to request to
   1108 * the server or 1 otherwise.
   1109 */
   1110static int
   1111cifs_lock_test(struct cifsFileInfo *cfile, __u64 offset, __u64 length,
   1112	       __u8 type, struct file_lock *flock)
   1113{
   1114	int rc = 0;
   1115	struct cifsLockInfo *conf_lock;
   1116	struct cifsInodeInfo *cinode = CIFS_I(d_inode(cfile->dentry));
   1117	struct TCP_Server_Info *server = tlink_tcon(cfile->tlink)->ses->server;
   1118	bool exist;
   1119
   1120	down_read(&cinode->lock_sem);
   1121
   1122	exist = cifs_find_lock_conflict(cfile, offset, length, type,
   1123					flock->fl_flags, &conf_lock,
   1124					CIFS_LOCK_OP);
   1125	if (exist) {
   1126		flock->fl_start = conf_lock->offset;
   1127		flock->fl_end = conf_lock->offset + conf_lock->length - 1;
   1128		flock->fl_pid = conf_lock->pid;
   1129		if (conf_lock->type & server->vals->shared_lock_type)
   1130			flock->fl_type = F_RDLCK;
   1131		else
   1132			flock->fl_type = F_WRLCK;
   1133	} else if (!cinode->can_cache_brlcks)
   1134		rc = 1;
   1135	else
   1136		flock->fl_type = F_UNLCK;
   1137
   1138	up_read(&cinode->lock_sem);
   1139	return rc;
   1140}
   1141
   1142static void
   1143cifs_lock_add(struct cifsFileInfo *cfile, struct cifsLockInfo *lock)
   1144{
   1145	struct cifsInodeInfo *cinode = CIFS_I(d_inode(cfile->dentry));
   1146	cifs_down_write(&cinode->lock_sem);
   1147	list_add_tail(&lock->llist, &cfile->llist->locks);
   1148	up_write(&cinode->lock_sem);
   1149}
   1150
   1151/*
   1152 * Set the byte-range lock (mandatory style). Returns:
   1153 * 1) 0, if we set the lock and don't need to request to the server;
   1154 * 2) 1, if no locks prevent us but we need to request to the server;
   1155 * 3) -EACCES, if there is a lock that prevents us and wait is false.
   1156 */
   1157static int
   1158cifs_lock_add_if(struct cifsFileInfo *cfile, struct cifsLockInfo *lock,
   1159		 bool wait)
   1160{
   1161	struct cifsLockInfo *conf_lock;
   1162	struct cifsInodeInfo *cinode = CIFS_I(d_inode(cfile->dentry));
   1163	bool exist;
   1164	int rc = 0;
   1165
   1166try_again:
   1167	exist = false;
   1168	cifs_down_write(&cinode->lock_sem);
   1169
   1170	exist = cifs_find_lock_conflict(cfile, lock->offset, lock->length,
   1171					lock->type, lock->flags, &conf_lock,
   1172					CIFS_LOCK_OP);
   1173	if (!exist && cinode->can_cache_brlcks) {
   1174		list_add_tail(&lock->llist, &cfile->llist->locks);
   1175		up_write(&cinode->lock_sem);
   1176		return rc;
   1177	}
   1178
   1179	if (!exist)
   1180		rc = 1;
   1181	else if (!wait)
   1182		rc = -EACCES;
   1183	else {
   1184		list_add_tail(&lock->blist, &conf_lock->blist);
   1185		up_write(&cinode->lock_sem);
   1186		rc = wait_event_interruptible(lock->block_q,
   1187					(lock->blist.prev == &lock->blist) &&
   1188					(lock->blist.next == &lock->blist));
   1189		if (!rc)
   1190			goto try_again;
   1191		cifs_down_write(&cinode->lock_sem);
   1192		list_del_init(&lock->blist);
   1193	}
   1194
   1195	up_write(&cinode->lock_sem);
   1196	return rc;
   1197}
   1198
   1199/*
   1200 * Check if there is another lock that prevents us to set the lock (posix
   1201 * style). If such a lock exists, update the flock structure with its
   1202 * properties. Otherwise, set the flock type to F_UNLCK if we can cache brlocks
   1203 * or leave it the same if we can't. Returns 0 if we don't need to request to
   1204 * the server or 1 otherwise.
   1205 */
   1206static int
   1207cifs_posix_lock_test(struct file *file, struct file_lock *flock)
   1208{
   1209	int rc = 0;
   1210	struct cifsInodeInfo *cinode = CIFS_I(file_inode(file));
   1211	unsigned char saved_type = flock->fl_type;
   1212
   1213	if ((flock->fl_flags & FL_POSIX) == 0)
   1214		return 1;
   1215
   1216	down_read(&cinode->lock_sem);
   1217	posix_test_lock(file, flock);
   1218
   1219	if (flock->fl_type == F_UNLCK && !cinode->can_cache_brlcks) {
   1220		flock->fl_type = saved_type;
   1221		rc = 1;
   1222	}
   1223
   1224	up_read(&cinode->lock_sem);
   1225	return rc;
   1226}
   1227
   1228/*
   1229 * Set the byte-range lock (posix style). Returns:
   1230 * 1) <0, if the error occurs while setting the lock;
   1231 * 2) 0, if we set the lock and don't need to request to the server;
   1232 * 3) FILE_LOCK_DEFERRED, if we will wait for some other file_lock;
   1233 * 4) FILE_LOCK_DEFERRED + 1, if we need to request to the server.
   1234 */
   1235static int
   1236cifs_posix_lock_set(struct file *file, struct file_lock *flock)
   1237{
   1238	struct cifsInodeInfo *cinode = CIFS_I(file_inode(file));
   1239	int rc = FILE_LOCK_DEFERRED + 1;
   1240
   1241	if ((flock->fl_flags & FL_POSIX) == 0)
   1242		return rc;
   1243
   1244	cifs_down_write(&cinode->lock_sem);
   1245	if (!cinode->can_cache_brlcks) {
   1246		up_write(&cinode->lock_sem);
   1247		return rc;
   1248	}
   1249
   1250	rc = posix_lock_file(file, flock, NULL);
   1251	up_write(&cinode->lock_sem);
   1252	return rc;
   1253}
   1254
   1255int
   1256cifs_push_mandatory_locks(struct cifsFileInfo *cfile)
   1257{
   1258	unsigned int xid;
   1259	int rc = 0, stored_rc;
   1260	struct cifsLockInfo *li, *tmp;
   1261	struct cifs_tcon *tcon;
   1262	unsigned int num, max_num, max_buf;
   1263	LOCKING_ANDX_RANGE *buf, *cur;
   1264	static const int types[] = {
   1265		LOCKING_ANDX_LARGE_FILES,
   1266		LOCKING_ANDX_SHARED_LOCK | LOCKING_ANDX_LARGE_FILES
   1267	};
   1268	int i;
   1269
   1270	xid = get_xid();
   1271	tcon = tlink_tcon(cfile->tlink);
   1272
   1273	/*
   1274	 * Accessing maxBuf is racy with cifs_reconnect - need to store value
   1275	 * and check it before using.
   1276	 */
   1277	max_buf = tcon->ses->server->maxBuf;
   1278	if (max_buf < (sizeof(struct smb_hdr) + sizeof(LOCKING_ANDX_RANGE))) {
   1279		free_xid(xid);
   1280		return -EINVAL;
   1281	}
   1282
   1283	BUILD_BUG_ON(sizeof(struct smb_hdr) + sizeof(LOCKING_ANDX_RANGE) >
   1284		     PAGE_SIZE);
   1285	max_buf = min_t(unsigned int, max_buf - sizeof(struct smb_hdr),
   1286			PAGE_SIZE);
   1287	max_num = (max_buf - sizeof(struct smb_hdr)) /
   1288						sizeof(LOCKING_ANDX_RANGE);
   1289	buf = kcalloc(max_num, sizeof(LOCKING_ANDX_RANGE), GFP_KERNEL);
   1290	if (!buf) {
   1291		free_xid(xid);
   1292		return -ENOMEM;
   1293	}
   1294
   1295	for (i = 0; i < 2; i++) {
   1296		cur = buf;
   1297		num = 0;
   1298		list_for_each_entry_safe(li, tmp, &cfile->llist->locks, llist) {
   1299			if (li->type != types[i])
   1300				continue;
   1301			cur->Pid = cpu_to_le16(li->pid);
   1302			cur->LengthLow = cpu_to_le32((u32)li->length);
   1303			cur->LengthHigh = cpu_to_le32((u32)(li->length>>32));
   1304			cur->OffsetLow = cpu_to_le32((u32)li->offset);
   1305			cur->OffsetHigh = cpu_to_le32((u32)(li->offset>>32));
   1306			if (++num == max_num) {
   1307				stored_rc = cifs_lockv(xid, tcon,
   1308						       cfile->fid.netfid,
   1309						       (__u8)li->type, 0, num,
   1310						       buf);
   1311				if (stored_rc)
   1312					rc = stored_rc;
   1313				cur = buf;
   1314				num = 0;
   1315			} else
   1316				cur++;
   1317		}
   1318
   1319		if (num) {
   1320			stored_rc = cifs_lockv(xid, tcon, cfile->fid.netfid,
   1321					       (__u8)types[i], 0, num, buf);
   1322			if (stored_rc)
   1323				rc = stored_rc;
   1324		}
   1325	}
   1326
   1327	kfree(buf);
   1328	free_xid(xid);
   1329	return rc;
   1330}
   1331
   1332static __u32
   1333hash_lockowner(fl_owner_t owner)
   1334{
   1335	return cifs_lock_secret ^ hash32_ptr((const void *)owner);
   1336}
   1337
   1338struct lock_to_push {
   1339	struct list_head llist;
   1340	__u64 offset;
   1341	__u64 length;
   1342	__u32 pid;
   1343	__u16 netfid;
   1344	__u8 type;
   1345};
   1346
   1347static int
   1348cifs_push_posix_locks(struct cifsFileInfo *cfile)
   1349{
   1350	struct inode *inode = d_inode(cfile->dentry);
   1351	struct cifs_tcon *tcon = tlink_tcon(cfile->tlink);
   1352	struct file_lock *flock;
   1353	struct file_lock_context *flctx = inode->i_flctx;
   1354	unsigned int count = 0, i;
   1355	int rc = 0, xid, type;
   1356	struct list_head locks_to_send, *el;
   1357	struct lock_to_push *lck, *tmp;
   1358	__u64 length;
   1359
   1360	xid = get_xid();
   1361
   1362	if (!flctx)
   1363		goto out;
   1364
   1365	spin_lock(&flctx->flc_lock);
   1366	list_for_each(el, &flctx->flc_posix) {
   1367		count++;
   1368	}
   1369	spin_unlock(&flctx->flc_lock);
   1370
   1371	INIT_LIST_HEAD(&locks_to_send);
   1372
   1373	/*
   1374	 * Allocating count locks is enough because no FL_POSIX locks can be
   1375	 * added to the list while we are holding cinode->lock_sem that
   1376	 * protects locking operations of this inode.
   1377	 */
   1378	for (i = 0; i < count; i++) {
   1379		lck = kmalloc(sizeof(struct lock_to_push), GFP_KERNEL);
   1380		if (!lck) {
   1381			rc = -ENOMEM;
   1382			goto err_out;
   1383		}
   1384		list_add_tail(&lck->llist, &locks_to_send);
   1385	}
   1386
   1387	el = locks_to_send.next;
   1388	spin_lock(&flctx->flc_lock);
   1389	list_for_each_entry(flock, &flctx->flc_posix, fl_list) {
   1390		if (el == &locks_to_send) {
   1391			/*
   1392			 * The list ended. We don't have enough allocated
   1393			 * structures - something is really wrong.
   1394			 */
   1395			cifs_dbg(VFS, "Can't push all brlocks!\n");
   1396			break;
   1397		}
   1398		length = cifs_flock_len(flock);
   1399		if (flock->fl_type == F_RDLCK || flock->fl_type == F_SHLCK)
   1400			type = CIFS_RDLCK;
   1401		else
   1402			type = CIFS_WRLCK;
   1403		lck = list_entry(el, struct lock_to_push, llist);
   1404		lck->pid = hash_lockowner(flock->fl_owner);
   1405		lck->netfid = cfile->fid.netfid;
   1406		lck->length = length;
   1407		lck->type = type;
   1408		lck->offset = flock->fl_start;
   1409	}
   1410	spin_unlock(&flctx->flc_lock);
   1411
   1412	list_for_each_entry_safe(lck, tmp, &locks_to_send, llist) {
   1413		int stored_rc;
   1414
   1415		stored_rc = CIFSSMBPosixLock(xid, tcon, lck->netfid, lck->pid,
   1416					     lck->offset, lck->length, NULL,
   1417					     lck->type, 0);
   1418		if (stored_rc)
   1419			rc = stored_rc;
   1420		list_del(&lck->llist);
   1421		kfree(lck);
   1422	}
   1423
   1424out:
   1425	free_xid(xid);
   1426	return rc;
   1427err_out:
   1428	list_for_each_entry_safe(lck, tmp, &locks_to_send, llist) {
   1429		list_del(&lck->llist);
   1430		kfree(lck);
   1431	}
   1432	goto out;
   1433}
   1434
   1435static int
   1436cifs_push_locks(struct cifsFileInfo *cfile)
   1437{
   1438	struct cifs_sb_info *cifs_sb = CIFS_SB(cfile->dentry->d_sb);
   1439	struct cifsInodeInfo *cinode = CIFS_I(d_inode(cfile->dentry));
   1440	struct cifs_tcon *tcon = tlink_tcon(cfile->tlink);
   1441	int rc = 0;
   1442
   1443	/* we are going to update can_cache_brlcks here - need a write access */
   1444	cifs_down_write(&cinode->lock_sem);
   1445	if (!cinode->can_cache_brlcks) {
   1446		up_write(&cinode->lock_sem);
   1447		return rc;
   1448	}
   1449
   1450	if (cap_unix(tcon->ses) &&
   1451	    (CIFS_UNIX_FCNTL_CAP & le64_to_cpu(tcon->fsUnixInfo.Capability)) &&
   1452	    ((cifs_sb->mnt_cifs_flags & CIFS_MOUNT_NOPOSIXBRL) == 0))
   1453		rc = cifs_push_posix_locks(cfile);
   1454	else
   1455		rc = tcon->ses->server->ops->push_mand_locks(cfile);
   1456
   1457	cinode->can_cache_brlcks = false;
   1458	up_write(&cinode->lock_sem);
   1459	return rc;
   1460}
   1461
   1462static void
   1463cifs_read_flock(struct file_lock *flock, __u32 *type, int *lock, int *unlock,
   1464		bool *wait_flag, struct TCP_Server_Info *server)
   1465{
   1466	if (flock->fl_flags & FL_POSIX)
   1467		cifs_dbg(FYI, "Posix\n");
   1468	if (flock->fl_flags & FL_FLOCK)
   1469		cifs_dbg(FYI, "Flock\n");
   1470	if (flock->fl_flags & FL_SLEEP) {
   1471		cifs_dbg(FYI, "Blocking lock\n");
   1472		*wait_flag = true;
   1473	}
   1474	if (flock->fl_flags & FL_ACCESS)
   1475		cifs_dbg(FYI, "Process suspended by mandatory locking - not implemented yet\n");
   1476	if (flock->fl_flags & FL_LEASE)
   1477		cifs_dbg(FYI, "Lease on file - not implemented yet\n");
   1478	if (flock->fl_flags &
   1479	    (~(FL_POSIX | FL_FLOCK | FL_SLEEP |
   1480	       FL_ACCESS | FL_LEASE | FL_CLOSE | FL_OFDLCK)))
   1481		cifs_dbg(FYI, "Unknown lock flags 0x%x\n", flock->fl_flags);
   1482
   1483	*type = server->vals->large_lock_type;
   1484	if (flock->fl_type == F_WRLCK) {
   1485		cifs_dbg(FYI, "F_WRLCK\n");
   1486		*type |= server->vals->exclusive_lock_type;
   1487		*lock = 1;
   1488	} else if (flock->fl_type == F_UNLCK) {
   1489		cifs_dbg(FYI, "F_UNLCK\n");
   1490		*type |= server->vals->unlock_lock_type;
   1491		*unlock = 1;
   1492		/* Check if unlock includes more than one lock range */
   1493	} else if (flock->fl_type == F_RDLCK) {
   1494		cifs_dbg(FYI, "F_RDLCK\n");
   1495		*type |= server->vals->shared_lock_type;
   1496		*lock = 1;
   1497	} else if (flock->fl_type == F_EXLCK) {
   1498		cifs_dbg(FYI, "F_EXLCK\n");
   1499		*type |= server->vals->exclusive_lock_type;
   1500		*lock = 1;
   1501	} else if (flock->fl_type == F_SHLCK) {
   1502		cifs_dbg(FYI, "F_SHLCK\n");
   1503		*type |= server->vals->shared_lock_type;
   1504		*lock = 1;
   1505	} else
   1506		cifs_dbg(FYI, "Unknown type of lock\n");
   1507}
   1508
   1509static int
   1510cifs_getlk(struct file *file, struct file_lock *flock, __u32 type,
   1511	   bool wait_flag, bool posix_lck, unsigned int xid)
   1512{
   1513	int rc = 0;
   1514	__u64 length = cifs_flock_len(flock);
   1515	struct cifsFileInfo *cfile = (struct cifsFileInfo *)file->private_data;
   1516	struct cifs_tcon *tcon = tlink_tcon(cfile->tlink);
   1517	struct TCP_Server_Info *server = tcon->ses->server;
   1518	__u16 netfid = cfile->fid.netfid;
   1519
   1520	if (posix_lck) {
   1521		int posix_lock_type;
   1522
   1523		rc = cifs_posix_lock_test(file, flock);
   1524		if (!rc)
   1525			return rc;
   1526
   1527		if (type & server->vals->shared_lock_type)
   1528			posix_lock_type = CIFS_RDLCK;
   1529		else
   1530			posix_lock_type = CIFS_WRLCK;
   1531		rc = CIFSSMBPosixLock(xid, tcon, netfid,
   1532				      hash_lockowner(flock->fl_owner),
   1533				      flock->fl_start, length, flock,
   1534				      posix_lock_type, wait_flag);
   1535		return rc;
   1536	}
   1537
   1538	rc = cifs_lock_test(cfile, flock->fl_start, length, type, flock);
   1539	if (!rc)
   1540		return rc;
   1541
   1542	/* BB we could chain these into one lock request BB */
   1543	rc = server->ops->mand_lock(xid, cfile, flock->fl_start, length, type,
   1544				    1, 0, false);
   1545	if (rc == 0) {
   1546		rc = server->ops->mand_lock(xid, cfile, flock->fl_start, length,
   1547					    type, 0, 1, false);
   1548		flock->fl_type = F_UNLCK;
   1549		if (rc != 0)
   1550			cifs_dbg(VFS, "Error unlocking previously locked range %d during test of lock\n",
   1551				 rc);
   1552		return 0;
   1553	}
   1554
   1555	if (type & server->vals->shared_lock_type) {
   1556		flock->fl_type = F_WRLCK;
   1557		return 0;
   1558	}
   1559
   1560	type &= ~server->vals->exclusive_lock_type;
   1561
   1562	rc = server->ops->mand_lock(xid, cfile, flock->fl_start, length,
   1563				    type | server->vals->shared_lock_type,
   1564				    1, 0, false);
   1565	if (rc == 0) {
   1566		rc = server->ops->mand_lock(xid, cfile, flock->fl_start, length,
   1567			type | server->vals->shared_lock_type, 0, 1, false);
   1568		flock->fl_type = F_RDLCK;
   1569		if (rc != 0)
   1570			cifs_dbg(VFS, "Error unlocking previously locked range %d during test of lock\n",
   1571				 rc);
   1572	} else
   1573		flock->fl_type = F_WRLCK;
   1574
   1575	return 0;
   1576}
   1577
   1578void
   1579cifs_move_llist(struct list_head *source, struct list_head *dest)
   1580{
   1581	struct list_head *li, *tmp;
   1582	list_for_each_safe(li, tmp, source)
   1583		list_move(li, dest);
   1584}
   1585
   1586void
   1587cifs_free_llist(struct list_head *llist)
   1588{
   1589	struct cifsLockInfo *li, *tmp;
   1590	list_for_each_entry_safe(li, tmp, llist, llist) {
   1591		cifs_del_lock_waiters(li);
   1592		list_del(&li->llist);
   1593		kfree(li);
   1594	}
   1595}
   1596
   1597int
   1598cifs_unlock_range(struct cifsFileInfo *cfile, struct file_lock *flock,
   1599		  unsigned int xid)
   1600{
   1601	int rc = 0, stored_rc;
   1602	static const int types[] = {
   1603		LOCKING_ANDX_LARGE_FILES,
   1604		LOCKING_ANDX_SHARED_LOCK | LOCKING_ANDX_LARGE_FILES
   1605	};
   1606	unsigned int i;
   1607	unsigned int max_num, num, max_buf;
   1608	LOCKING_ANDX_RANGE *buf, *cur;
   1609	struct cifs_tcon *tcon = tlink_tcon(cfile->tlink);
   1610	struct cifsInodeInfo *cinode = CIFS_I(d_inode(cfile->dentry));
   1611	struct cifsLockInfo *li, *tmp;
   1612	__u64 length = cifs_flock_len(flock);
   1613	struct list_head tmp_llist;
   1614
   1615	INIT_LIST_HEAD(&tmp_llist);
   1616
   1617	/*
   1618	 * Accessing maxBuf is racy with cifs_reconnect - need to store value
   1619	 * and check it before using.
   1620	 */
   1621	max_buf = tcon->ses->server->maxBuf;
   1622	if (max_buf < (sizeof(struct smb_hdr) + sizeof(LOCKING_ANDX_RANGE)))
   1623		return -EINVAL;
   1624
   1625	BUILD_BUG_ON(sizeof(struct smb_hdr) + sizeof(LOCKING_ANDX_RANGE) >
   1626		     PAGE_SIZE);
   1627	max_buf = min_t(unsigned int, max_buf - sizeof(struct smb_hdr),
   1628			PAGE_SIZE);
   1629	max_num = (max_buf - sizeof(struct smb_hdr)) /
   1630						sizeof(LOCKING_ANDX_RANGE);
   1631	buf = kcalloc(max_num, sizeof(LOCKING_ANDX_RANGE), GFP_KERNEL);
   1632	if (!buf)
   1633		return -ENOMEM;
   1634
   1635	cifs_down_write(&cinode->lock_sem);
   1636	for (i = 0; i < 2; i++) {
   1637		cur = buf;
   1638		num = 0;
   1639		list_for_each_entry_safe(li, tmp, &cfile->llist->locks, llist) {
   1640			if (flock->fl_start > li->offset ||
   1641			    (flock->fl_start + length) <
   1642			    (li->offset + li->length))
   1643				continue;
   1644			if (current->tgid != li->pid)
   1645				continue;
   1646			if (types[i] != li->type)
   1647				continue;
   1648			if (cinode->can_cache_brlcks) {
   1649				/*
   1650				 * We can cache brlock requests - simply remove
   1651				 * a lock from the file's list.
   1652				 */
   1653				list_del(&li->llist);
   1654				cifs_del_lock_waiters(li);
   1655				kfree(li);
   1656				continue;
   1657			}
   1658			cur->Pid = cpu_to_le16(li->pid);
   1659			cur->LengthLow = cpu_to_le32((u32)li->length);
   1660			cur->LengthHigh = cpu_to_le32((u32)(li->length>>32));
   1661			cur->OffsetLow = cpu_to_le32((u32)li->offset);
   1662			cur->OffsetHigh = cpu_to_le32((u32)(li->offset>>32));
   1663			/*
   1664			 * We need to save a lock here to let us add it again to
   1665			 * the file's list if the unlock range request fails on
   1666			 * the server.
   1667			 */
   1668			list_move(&li->llist, &tmp_llist);
   1669			if (++num == max_num) {
   1670				stored_rc = cifs_lockv(xid, tcon,
   1671						       cfile->fid.netfid,
   1672						       li->type, num, 0, buf);
   1673				if (stored_rc) {
   1674					/*
   1675					 * We failed on the unlock range
   1676					 * request - add all locks from the tmp
   1677					 * list to the head of the file's list.
   1678					 */
   1679					cifs_move_llist(&tmp_llist,
   1680							&cfile->llist->locks);
   1681					rc = stored_rc;
   1682				} else
   1683					/*
   1684					 * The unlock range request succeed -
   1685					 * free the tmp list.
   1686					 */
   1687					cifs_free_llist(&tmp_llist);
   1688				cur = buf;
   1689				num = 0;
   1690			} else
   1691				cur++;
   1692		}
   1693		if (num) {
   1694			stored_rc = cifs_lockv(xid, tcon, cfile->fid.netfid,
   1695					       types[i], num, 0, buf);
   1696			if (stored_rc) {
   1697				cifs_move_llist(&tmp_llist,
   1698						&cfile->llist->locks);
   1699				rc = stored_rc;
   1700			} else
   1701				cifs_free_llist(&tmp_llist);
   1702		}
   1703	}
   1704
   1705	up_write(&cinode->lock_sem);
   1706	kfree(buf);
   1707	return rc;
   1708}
   1709
   1710static int
   1711cifs_setlk(struct file *file, struct file_lock *flock, __u32 type,
   1712	   bool wait_flag, bool posix_lck, int lock, int unlock,
   1713	   unsigned int xid)
   1714{
   1715	int rc = 0;
   1716	__u64 length = cifs_flock_len(flock);
   1717	struct cifsFileInfo *cfile = (struct cifsFileInfo *)file->private_data;
   1718	struct cifs_tcon *tcon = tlink_tcon(cfile->tlink);
   1719	struct TCP_Server_Info *server = tcon->ses->server;
   1720	struct inode *inode = d_inode(cfile->dentry);
   1721
   1722	if (posix_lck) {
   1723		int posix_lock_type;
   1724
   1725		rc = cifs_posix_lock_set(file, flock);
   1726		if (rc <= FILE_LOCK_DEFERRED)
   1727			return rc;
   1728
   1729		if (type & server->vals->shared_lock_type)
   1730			posix_lock_type = CIFS_RDLCK;
   1731		else
   1732			posix_lock_type = CIFS_WRLCK;
   1733
   1734		if (unlock == 1)
   1735			posix_lock_type = CIFS_UNLCK;
   1736
   1737		rc = CIFSSMBPosixLock(xid, tcon, cfile->fid.netfid,
   1738				      hash_lockowner(flock->fl_owner),
   1739				      flock->fl_start, length,
   1740				      NULL, posix_lock_type, wait_flag);
   1741		goto out;
   1742	}
   1743
   1744	if (lock) {
   1745		struct cifsLockInfo *lock;
   1746
   1747		lock = cifs_lock_init(flock->fl_start, length, type,
   1748				      flock->fl_flags);
   1749		if (!lock)
   1750			return -ENOMEM;
   1751
   1752		rc = cifs_lock_add_if(cfile, lock, wait_flag);
   1753		if (rc < 0) {
   1754			kfree(lock);
   1755			return rc;
   1756		}
   1757		if (!rc)
   1758			goto out;
   1759
   1760		/*
   1761		 * Windows 7 server can delay breaking lease from read to None
   1762		 * if we set a byte-range lock on a file - break it explicitly
   1763		 * before sending the lock to the server to be sure the next
   1764		 * read won't conflict with non-overlapted locks due to
   1765		 * pagereading.
   1766		 */
   1767		if (!CIFS_CACHE_WRITE(CIFS_I(inode)) &&
   1768					CIFS_CACHE_READ(CIFS_I(inode))) {
   1769			cifs_zap_mapping(inode);
   1770			cifs_dbg(FYI, "Set no oplock for inode=%p due to mand locks\n",
   1771				 inode);
   1772			CIFS_I(inode)->oplock = 0;
   1773		}
   1774
   1775		rc = server->ops->mand_lock(xid, cfile, flock->fl_start, length,
   1776					    type, 1, 0, wait_flag);
   1777		if (rc) {
   1778			kfree(lock);
   1779			return rc;
   1780		}
   1781
   1782		cifs_lock_add(cfile, lock);
   1783	} else if (unlock)
   1784		rc = server->ops->mand_unlock_range(cfile, flock, xid);
   1785
   1786out:
   1787	if ((flock->fl_flags & FL_POSIX) || (flock->fl_flags & FL_FLOCK)) {
   1788		/*
   1789		 * If this is a request to remove all locks because we
   1790		 * are closing the file, it doesn't matter if the
   1791		 * unlocking failed as both cifs.ko and the SMB server
   1792		 * remove the lock on file close
   1793		 */
   1794		if (rc) {
   1795			cifs_dbg(VFS, "%s failed rc=%d\n", __func__, rc);
   1796			if (!(flock->fl_flags & FL_CLOSE))
   1797				return rc;
   1798		}
   1799		rc = locks_lock_file_wait(file, flock);
   1800	}
   1801	return rc;
   1802}
   1803
   1804int cifs_flock(struct file *file, int cmd, struct file_lock *fl)
   1805{
   1806	int rc, xid;
   1807	int lock = 0, unlock = 0;
   1808	bool wait_flag = false;
   1809	bool posix_lck = false;
   1810	struct cifs_sb_info *cifs_sb;
   1811	struct cifs_tcon *tcon;
   1812	struct cifsFileInfo *cfile;
   1813	__u32 type;
   1814
   1815	rc = -EACCES;
   1816	xid = get_xid();
   1817
   1818	if (!(fl->fl_flags & FL_FLOCK))
   1819		return -ENOLCK;
   1820
   1821	cfile = (struct cifsFileInfo *)file->private_data;
   1822	tcon = tlink_tcon(cfile->tlink);
   1823
   1824	cifs_read_flock(fl, &type, &lock, &unlock, &wait_flag,
   1825			tcon->ses->server);
   1826	cifs_sb = CIFS_FILE_SB(file);
   1827
   1828	if (cap_unix(tcon->ses) &&
   1829	    (CIFS_UNIX_FCNTL_CAP & le64_to_cpu(tcon->fsUnixInfo.Capability)) &&
   1830	    ((cifs_sb->mnt_cifs_flags & CIFS_MOUNT_NOPOSIXBRL) == 0))
   1831		posix_lck = true;
   1832
   1833	if (!lock && !unlock) {
   1834		/*
   1835		 * if no lock or unlock then nothing to do since we do not
   1836		 * know what it is
   1837		 */
   1838		free_xid(xid);
   1839		return -EOPNOTSUPP;
   1840	}
   1841
   1842	rc = cifs_setlk(file, fl, type, wait_flag, posix_lck, lock, unlock,
   1843			xid);
   1844	free_xid(xid);
   1845	return rc;
   1846
   1847
   1848}
   1849
   1850int cifs_lock(struct file *file, int cmd, struct file_lock *flock)
   1851{
   1852	int rc, xid;
   1853	int lock = 0, unlock = 0;
   1854	bool wait_flag = false;
   1855	bool posix_lck = false;
   1856	struct cifs_sb_info *cifs_sb;
   1857	struct cifs_tcon *tcon;
   1858	struct cifsFileInfo *cfile;
   1859	__u32 type;
   1860
   1861	rc = -EACCES;
   1862	xid = get_xid();
   1863
   1864	cifs_dbg(FYI, "Lock parm: 0x%x flockflags: 0x%x flocktype: 0x%x start: %lld end: %lld\n",
   1865		 cmd, flock->fl_flags, flock->fl_type,
   1866		 flock->fl_start, flock->fl_end);
   1867
   1868	cfile = (struct cifsFileInfo *)file->private_data;
   1869	tcon = tlink_tcon(cfile->tlink);
   1870
   1871	cifs_read_flock(flock, &type, &lock, &unlock, &wait_flag,
   1872			tcon->ses->server);
   1873	cifs_sb = CIFS_FILE_SB(file);
   1874	set_bit(CIFS_INO_CLOSE_ON_LOCK, &CIFS_I(d_inode(cfile->dentry))->flags);
   1875
   1876	if (cap_unix(tcon->ses) &&
   1877	    (CIFS_UNIX_FCNTL_CAP & le64_to_cpu(tcon->fsUnixInfo.Capability)) &&
   1878	    ((cifs_sb->mnt_cifs_flags & CIFS_MOUNT_NOPOSIXBRL) == 0))
   1879		posix_lck = true;
   1880	/*
   1881	 * BB add code here to normalize offset and length to account for
   1882	 * negative length which we can not accept over the wire.
   1883	 */
   1884	if (IS_GETLK(cmd)) {
   1885		rc = cifs_getlk(file, flock, type, wait_flag, posix_lck, xid);
   1886		free_xid(xid);
   1887		return rc;
   1888	}
   1889
   1890	if (!lock && !unlock) {
   1891		/*
   1892		 * if no lock or unlock then nothing to do since we do not
   1893		 * know what it is
   1894		 */
   1895		free_xid(xid);
   1896		return -EOPNOTSUPP;
   1897	}
   1898
   1899	rc = cifs_setlk(file, flock, type, wait_flag, posix_lck, lock, unlock,
   1900			xid);
   1901	free_xid(xid);
   1902	return rc;
   1903}
   1904
   1905/*
   1906 * update the file size (if needed) after a write. Should be called with
   1907 * the inode->i_lock held
   1908 */
   1909void
   1910cifs_update_eof(struct cifsInodeInfo *cifsi, loff_t offset,
   1911		      unsigned int bytes_written)
   1912{
   1913	loff_t end_of_write = offset + bytes_written;
   1914
   1915	if (end_of_write > cifsi->server_eof)
   1916		cifsi->server_eof = end_of_write;
   1917}
   1918
   1919static ssize_t
   1920cifs_write(struct cifsFileInfo *open_file, __u32 pid, const char *write_data,
   1921	   size_t write_size, loff_t *offset)
   1922{
   1923	int rc = 0;
   1924	unsigned int bytes_written = 0;
   1925	unsigned int total_written;
   1926	struct cifs_tcon *tcon;
   1927	struct TCP_Server_Info *server;
   1928	unsigned int xid;
   1929	struct dentry *dentry = open_file->dentry;
   1930	struct cifsInodeInfo *cifsi = CIFS_I(d_inode(dentry));
   1931	struct cifs_io_parms io_parms = {0};
   1932
   1933	cifs_dbg(FYI, "write %zd bytes to offset %lld of %pd\n",
   1934		 write_size, *offset, dentry);
   1935
   1936	tcon = tlink_tcon(open_file->tlink);
   1937	server = tcon->ses->server;
   1938
   1939	if (!server->ops->sync_write)
   1940		return -ENOSYS;
   1941
   1942	xid = get_xid();
   1943
   1944	for (total_written = 0; write_size > total_written;
   1945	     total_written += bytes_written) {
   1946		rc = -EAGAIN;
   1947		while (rc == -EAGAIN) {
   1948			struct kvec iov[2];
   1949			unsigned int len;
   1950
   1951			if (open_file->invalidHandle) {
   1952				/* we could deadlock if we called
   1953				   filemap_fdatawait from here so tell
   1954				   reopen_file not to flush data to
   1955				   server now */
   1956				rc = cifs_reopen_file(open_file, false);
   1957				if (rc != 0)
   1958					break;
   1959			}
   1960
   1961			len = min(server->ops->wp_retry_size(d_inode(dentry)),
   1962				  (unsigned int)write_size - total_written);
   1963			/* iov[0] is reserved for smb header */
   1964			iov[1].iov_base = (char *)write_data + total_written;
   1965			iov[1].iov_len = len;
   1966			io_parms.pid = pid;
   1967			io_parms.tcon = tcon;
   1968			io_parms.offset = *offset;
   1969			io_parms.length = len;
   1970			rc = server->ops->sync_write(xid, &open_file->fid,
   1971					&io_parms, &bytes_written, iov, 1);
   1972		}
   1973		if (rc || (bytes_written == 0)) {
   1974			if (total_written)
   1975				break;
   1976			else {
   1977				free_xid(xid);
   1978				return rc;
   1979			}
   1980		} else {
   1981			spin_lock(&d_inode(dentry)->i_lock);
   1982			cifs_update_eof(cifsi, *offset, bytes_written);
   1983			spin_unlock(&d_inode(dentry)->i_lock);
   1984			*offset += bytes_written;
   1985		}
   1986	}
   1987
   1988	cifs_stats_bytes_written(tcon, total_written);
   1989
   1990	if (total_written > 0) {
   1991		spin_lock(&d_inode(dentry)->i_lock);
   1992		if (*offset > d_inode(dentry)->i_size) {
   1993			i_size_write(d_inode(dentry), *offset);
   1994			d_inode(dentry)->i_blocks = (512 - 1 + *offset) >> 9;
   1995		}
   1996		spin_unlock(&d_inode(dentry)->i_lock);
   1997	}
   1998	mark_inode_dirty_sync(d_inode(dentry));
   1999	free_xid(xid);
   2000	return total_written;
   2001}
   2002
   2003struct cifsFileInfo *find_readable_file(struct cifsInodeInfo *cifs_inode,
   2004					bool fsuid_only)
   2005{
   2006	struct cifsFileInfo *open_file = NULL;
   2007	struct cifs_sb_info *cifs_sb = CIFS_SB(cifs_inode->netfs.inode.i_sb);
   2008
   2009	/* only filter by fsuid on multiuser mounts */
   2010	if (!(cifs_sb->mnt_cifs_flags & CIFS_MOUNT_MULTIUSER))
   2011		fsuid_only = false;
   2012
   2013	spin_lock(&cifs_inode->open_file_lock);
   2014	/* we could simply get the first_list_entry since write-only entries
   2015	   are always at the end of the list but since the first entry might
   2016	   have a close pending, we go through the whole list */
   2017	list_for_each_entry(open_file, &cifs_inode->openFileList, flist) {
   2018		if (fsuid_only && !uid_eq(open_file->uid, current_fsuid()))
   2019			continue;
   2020		if (OPEN_FMODE(open_file->f_flags) & FMODE_READ) {
   2021			if ((!open_file->invalidHandle)) {
   2022				/* found a good file */
   2023				/* lock it so it will not be closed on us */
   2024				cifsFileInfo_get(open_file);
   2025				spin_unlock(&cifs_inode->open_file_lock);
   2026				return open_file;
   2027			} /* else might as well continue, and look for
   2028			     another, or simply have the caller reopen it
   2029			     again rather than trying to fix this handle */
   2030		} else /* write only file */
   2031			break; /* write only files are last so must be done */
   2032	}
   2033	spin_unlock(&cifs_inode->open_file_lock);
   2034	return NULL;
   2035}
   2036
   2037/* Return -EBADF if no handle is found and general rc otherwise */
   2038int
   2039cifs_get_writable_file(struct cifsInodeInfo *cifs_inode, int flags,
   2040		       struct cifsFileInfo **ret_file)
   2041{
   2042	struct cifsFileInfo *open_file, *inv_file = NULL;
   2043	struct cifs_sb_info *cifs_sb;
   2044	bool any_available = false;
   2045	int rc = -EBADF;
   2046	unsigned int refind = 0;
   2047	bool fsuid_only = flags & FIND_WR_FSUID_ONLY;
   2048	bool with_delete = flags & FIND_WR_WITH_DELETE;
   2049	*ret_file = NULL;
   2050
   2051	/*
   2052	 * Having a null inode here (because mapping->host was set to zero by
   2053	 * the VFS or MM) should not happen but we had reports of on oops (due
   2054	 * to it being zero) during stress testcases so we need to check for it
   2055	 */
   2056
   2057	if (cifs_inode == NULL) {
   2058		cifs_dbg(VFS, "Null inode passed to cifs_writeable_file\n");
   2059		dump_stack();
   2060		return rc;
   2061	}
   2062
   2063	cifs_sb = CIFS_SB(cifs_inode->netfs.inode.i_sb);
   2064
   2065	/* only filter by fsuid on multiuser mounts */
   2066	if (!(cifs_sb->mnt_cifs_flags & CIFS_MOUNT_MULTIUSER))
   2067		fsuid_only = false;
   2068
   2069	spin_lock(&cifs_inode->open_file_lock);
   2070refind_writable:
   2071	if (refind > MAX_REOPEN_ATT) {
   2072		spin_unlock(&cifs_inode->open_file_lock);
   2073		return rc;
   2074	}
   2075	list_for_each_entry(open_file, &cifs_inode->openFileList, flist) {
   2076		if (!any_available && open_file->pid != current->tgid)
   2077			continue;
   2078		if (fsuid_only && !uid_eq(open_file->uid, current_fsuid()))
   2079			continue;
   2080		if (with_delete && !(open_file->fid.access & DELETE))
   2081			continue;
   2082		if (OPEN_FMODE(open_file->f_flags) & FMODE_WRITE) {
   2083			if (!open_file->invalidHandle) {
   2084				/* found a good writable file */
   2085				cifsFileInfo_get(open_file);
   2086				spin_unlock(&cifs_inode->open_file_lock);
   2087				*ret_file = open_file;
   2088				return 0;
   2089			} else {
   2090				if (!inv_file)
   2091					inv_file = open_file;
   2092			}
   2093		}
   2094	}
   2095	/* couldn't find useable FH with same pid, try any available */
   2096	if (!any_available) {
   2097		any_available = true;
   2098		goto refind_writable;
   2099	}
   2100
   2101	if (inv_file) {
   2102		any_available = false;
   2103		cifsFileInfo_get(inv_file);
   2104	}
   2105
   2106	spin_unlock(&cifs_inode->open_file_lock);
   2107
   2108	if (inv_file) {
   2109		rc = cifs_reopen_file(inv_file, false);
   2110		if (!rc) {
   2111			*ret_file = inv_file;
   2112			return 0;
   2113		}
   2114
   2115		spin_lock(&cifs_inode->open_file_lock);
   2116		list_move_tail(&inv_file->flist, &cifs_inode->openFileList);
   2117		spin_unlock(&cifs_inode->open_file_lock);
   2118		cifsFileInfo_put(inv_file);
   2119		++refind;
   2120		inv_file = NULL;
   2121		spin_lock(&cifs_inode->open_file_lock);
   2122		goto refind_writable;
   2123	}
   2124
   2125	return rc;
   2126}
   2127
   2128struct cifsFileInfo *
   2129find_writable_file(struct cifsInodeInfo *cifs_inode, int flags)
   2130{
   2131	struct cifsFileInfo *cfile;
   2132	int rc;
   2133
   2134	rc = cifs_get_writable_file(cifs_inode, flags, &cfile);
   2135	if (rc)
   2136		cifs_dbg(FYI, "Couldn't find writable handle rc=%d\n", rc);
   2137
   2138	return cfile;
   2139}
   2140
   2141int
   2142cifs_get_writable_path(struct cifs_tcon *tcon, const char *name,
   2143		       int flags,
   2144		       struct cifsFileInfo **ret_file)
   2145{
   2146	struct cifsFileInfo *cfile;
   2147	void *page = alloc_dentry_path();
   2148
   2149	*ret_file = NULL;
   2150
   2151	spin_lock(&tcon->open_file_lock);
   2152	list_for_each_entry(cfile, &tcon->openFileList, tlist) {
   2153		struct cifsInodeInfo *cinode;
   2154		const char *full_path = build_path_from_dentry(cfile->dentry, page);
   2155		if (IS_ERR(full_path)) {
   2156			spin_unlock(&tcon->open_file_lock);
   2157			free_dentry_path(page);
   2158			return PTR_ERR(full_path);
   2159		}
   2160		if (strcmp(full_path, name))
   2161			continue;
   2162
   2163		cinode = CIFS_I(d_inode(cfile->dentry));
   2164		spin_unlock(&tcon->open_file_lock);
   2165		free_dentry_path(page);
   2166		return cifs_get_writable_file(cinode, flags, ret_file);
   2167	}
   2168
   2169	spin_unlock(&tcon->open_file_lock);
   2170	free_dentry_path(page);
   2171	return -ENOENT;
   2172}
   2173
   2174int
   2175cifs_get_readable_path(struct cifs_tcon *tcon, const char *name,
   2176		       struct cifsFileInfo **ret_file)
   2177{
   2178	struct cifsFileInfo *cfile;
   2179	void *page = alloc_dentry_path();
   2180
   2181	*ret_file = NULL;
   2182
   2183	spin_lock(&tcon->open_file_lock);
   2184	list_for_each_entry(cfile, &tcon->openFileList, tlist) {
   2185		struct cifsInodeInfo *cinode;
   2186		const char *full_path = build_path_from_dentry(cfile->dentry, page);
   2187		if (IS_ERR(full_path)) {
   2188			spin_unlock(&tcon->open_file_lock);
   2189			free_dentry_path(page);
   2190			return PTR_ERR(full_path);
   2191		}
   2192		if (strcmp(full_path, name))
   2193			continue;
   2194
   2195		cinode = CIFS_I(d_inode(cfile->dentry));
   2196		spin_unlock(&tcon->open_file_lock);
   2197		free_dentry_path(page);
   2198		*ret_file = find_readable_file(cinode, 0);
   2199		return *ret_file ? 0 : -ENOENT;
   2200	}
   2201
   2202	spin_unlock(&tcon->open_file_lock);
   2203	free_dentry_path(page);
   2204	return -ENOENT;
   2205}
   2206
   2207static int cifs_partialpagewrite(struct page *page, unsigned from, unsigned to)
   2208{
   2209	struct address_space *mapping = page->mapping;
   2210	loff_t offset = (loff_t)page->index << PAGE_SHIFT;
   2211	char *write_data;
   2212	int rc = -EFAULT;
   2213	int bytes_written = 0;
   2214	struct inode *inode;
   2215	struct cifsFileInfo *open_file;
   2216
   2217	if (!mapping || !mapping->host)
   2218		return -EFAULT;
   2219
   2220	inode = page->mapping->host;
   2221
   2222	offset += (loff_t)from;
   2223	write_data = kmap(page);
   2224	write_data += from;
   2225
   2226	if ((to > PAGE_SIZE) || (from > to)) {
   2227		kunmap(page);
   2228		return -EIO;
   2229	}
   2230
   2231	/* racing with truncate? */
   2232	if (offset > mapping->host->i_size) {
   2233		kunmap(page);
   2234		return 0; /* don't care */
   2235	}
   2236
   2237	/* check to make sure that we are not extending the file */
   2238	if (mapping->host->i_size - offset < (loff_t)to)
   2239		to = (unsigned)(mapping->host->i_size - offset);
   2240
   2241	rc = cifs_get_writable_file(CIFS_I(mapping->host), FIND_WR_ANY,
   2242				    &open_file);
   2243	if (!rc) {
   2244		bytes_written = cifs_write(open_file, open_file->pid,
   2245					   write_data, to - from, &offset);
   2246		cifsFileInfo_put(open_file);
   2247		/* Does mm or vfs already set times? */
   2248		inode->i_atime = inode->i_mtime = current_time(inode);
   2249		if ((bytes_written > 0) && (offset))
   2250			rc = 0;
   2251		else if (bytes_written < 0)
   2252			rc = bytes_written;
   2253		else
   2254			rc = -EFAULT;
   2255	} else {
   2256		cifs_dbg(FYI, "No writable handle for write page rc=%d\n", rc);
   2257		if (!is_retryable_error(rc))
   2258			rc = -EIO;
   2259	}
   2260
   2261	kunmap(page);
   2262	return rc;
   2263}
   2264
   2265static struct cifs_writedata *
   2266wdata_alloc_and_fillpages(pgoff_t tofind, struct address_space *mapping,
   2267			  pgoff_t end, pgoff_t *index,
   2268			  unsigned int *found_pages)
   2269{
   2270	struct cifs_writedata *wdata;
   2271
   2272	wdata = cifs_writedata_alloc((unsigned int)tofind,
   2273				     cifs_writev_complete);
   2274	if (!wdata)
   2275		return NULL;
   2276
   2277	*found_pages = find_get_pages_range_tag(mapping, index, end,
   2278				PAGECACHE_TAG_DIRTY, tofind, wdata->pages);
   2279	return wdata;
   2280}
   2281
   2282static unsigned int
   2283wdata_prepare_pages(struct cifs_writedata *wdata, unsigned int found_pages,
   2284		    struct address_space *mapping,
   2285		    struct writeback_control *wbc,
   2286		    pgoff_t end, pgoff_t *index, pgoff_t *next, bool *done)
   2287{
   2288	unsigned int nr_pages = 0, i;
   2289	struct page *page;
   2290
   2291	for (i = 0; i < found_pages; i++) {
   2292		page = wdata->pages[i];
   2293		/*
   2294		 * At this point we hold neither the i_pages lock nor the
   2295		 * page lock: the page may be truncated or invalidated
   2296		 * (changing page->mapping to NULL), or even swizzled
   2297		 * back from swapper_space to tmpfs file mapping
   2298		 */
   2299
   2300		if (nr_pages == 0)
   2301			lock_page(page);
   2302		else if (!trylock_page(page))
   2303			break;
   2304
   2305		if (unlikely(page->mapping != mapping)) {
   2306			unlock_page(page);
   2307			break;
   2308		}
   2309
   2310		if (!wbc->range_cyclic && page->index > end) {
   2311			*done = true;
   2312			unlock_page(page);
   2313			break;
   2314		}
   2315
   2316		if (*next && (page->index != *next)) {
   2317			/* Not next consecutive page */
   2318			unlock_page(page);
   2319			break;
   2320		}
   2321
   2322		if (wbc->sync_mode != WB_SYNC_NONE)
   2323			wait_on_page_writeback(page);
   2324
   2325		if (PageWriteback(page) ||
   2326				!clear_page_dirty_for_io(page)) {
   2327			unlock_page(page);
   2328			break;
   2329		}
   2330
   2331		/*
   2332		 * This actually clears the dirty bit in the radix tree.
   2333		 * See cifs_writepage() for more commentary.
   2334		 */
   2335		set_page_writeback(page);
   2336		if (page_offset(page) >= i_size_read(mapping->host)) {
   2337			*done = true;
   2338			unlock_page(page);
   2339			end_page_writeback(page);
   2340			break;
   2341		}
   2342
   2343		wdata->pages[i] = page;
   2344		*next = page->index + 1;
   2345		++nr_pages;
   2346	}
   2347
   2348	/* reset index to refind any pages skipped */
   2349	if (nr_pages == 0)
   2350		*index = wdata->pages[0]->index + 1;
   2351
   2352	/* put any pages we aren't going to use */
   2353	for (i = nr_pages; i < found_pages; i++) {
   2354		put_page(wdata->pages[i]);
   2355		wdata->pages[i] = NULL;
   2356	}
   2357
   2358	return nr_pages;
   2359}
   2360
   2361static int
   2362wdata_send_pages(struct cifs_writedata *wdata, unsigned int nr_pages,
   2363		 struct address_space *mapping, struct writeback_control *wbc)
   2364{
   2365	int rc;
   2366
   2367	wdata->sync_mode = wbc->sync_mode;
   2368	wdata->nr_pages = nr_pages;
   2369	wdata->offset = page_offset(wdata->pages[0]);
   2370	wdata->pagesz = PAGE_SIZE;
   2371	wdata->tailsz = min(i_size_read(mapping->host) -
   2372			page_offset(wdata->pages[nr_pages - 1]),
   2373			(loff_t)PAGE_SIZE);
   2374	wdata->bytes = ((nr_pages - 1) * PAGE_SIZE) + wdata->tailsz;
   2375	wdata->pid = wdata->cfile->pid;
   2376
   2377	rc = adjust_credits(wdata->server, &wdata->credits, wdata->bytes);
   2378	if (rc)
   2379		return rc;
   2380
   2381	if (wdata->cfile->invalidHandle)
   2382		rc = -EAGAIN;
   2383	else
   2384		rc = wdata->server->ops->async_writev(wdata,
   2385						      cifs_writedata_release);
   2386
   2387	return rc;
   2388}
   2389
   2390static int cifs_writepages(struct address_space *mapping,
   2391			   struct writeback_control *wbc)
   2392{
   2393	struct inode *inode = mapping->host;
   2394	struct cifs_sb_info *cifs_sb = CIFS_SB(inode->i_sb);
   2395	struct TCP_Server_Info *server;
   2396	bool done = false, scanned = false, range_whole = false;
   2397	pgoff_t end, index;
   2398	struct cifs_writedata *wdata;
   2399	struct cifsFileInfo *cfile = NULL;
   2400	int rc = 0;
   2401	int saved_rc = 0;
   2402	unsigned int xid;
   2403
   2404	/*
   2405	 * If wsize is smaller than the page cache size, default to writing
   2406	 * one page at a time via cifs_writepage
   2407	 */
   2408	if (cifs_sb->ctx->wsize < PAGE_SIZE)
   2409		return generic_writepages(mapping, wbc);
   2410
   2411	xid = get_xid();
   2412	if (wbc->range_cyclic) {
   2413		index = mapping->writeback_index; /* Start from prev offset */
   2414		end = -1;
   2415	} else {
   2416		index = wbc->range_start >> PAGE_SHIFT;
   2417		end = wbc->range_end >> PAGE_SHIFT;
   2418		if (wbc->range_start == 0 && wbc->range_end == LLONG_MAX)
   2419			range_whole = true;
   2420		scanned = true;
   2421	}
   2422	server = cifs_pick_channel(cifs_sb_master_tcon(cifs_sb)->ses);
   2423
   2424retry:
   2425	while (!done && index <= end) {
   2426		unsigned int i, nr_pages, found_pages, wsize;
   2427		pgoff_t next = 0, tofind, saved_index = index;
   2428		struct cifs_credits credits_on_stack;
   2429		struct cifs_credits *credits = &credits_on_stack;
   2430		int get_file_rc = 0;
   2431
   2432		if (cfile)
   2433			cifsFileInfo_put(cfile);
   2434
   2435		rc = cifs_get_writable_file(CIFS_I(inode), FIND_WR_ANY, &cfile);
   2436
   2437		/* in case of an error store it to return later */
   2438		if (rc)
   2439			get_file_rc = rc;
   2440
   2441		rc = server->ops->wait_mtu_credits(server, cifs_sb->ctx->wsize,
   2442						   &wsize, credits);
   2443		if (rc != 0) {
   2444			done = true;
   2445			break;
   2446		}
   2447
   2448		tofind = min((wsize / PAGE_SIZE) - 1, end - index) + 1;
   2449
   2450		wdata = wdata_alloc_and_fillpages(tofind, mapping, end, &index,
   2451						  &found_pages);
   2452		if (!wdata) {
   2453			rc = -ENOMEM;
   2454			done = true;
   2455			add_credits_and_wake_if(server, credits, 0);
   2456			break;
   2457		}
   2458
   2459		if (found_pages == 0) {
   2460			kref_put(&wdata->refcount, cifs_writedata_release);
   2461			add_credits_and_wake_if(server, credits, 0);
   2462			break;
   2463		}
   2464
   2465		nr_pages = wdata_prepare_pages(wdata, found_pages, mapping, wbc,
   2466					       end, &index, &next, &done);
   2467
   2468		/* nothing to write? */
   2469		if (nr_pages == 0) {
   2470			kref_put(&wdata->refcount, cifs_writedata_release);
   2471			add_credits_and_wake_if(server, credits, 0);
   2472			continue;
   2473		}
   2474
   2475		wdata->credits = credits_on_stack;
   2476		wdata->cfile = cfile;
   2477		wdata->server = server;
   2478		cfile = NULL;
   2479
   2480		if (!wdata->cfile) {
   2481			cifs_dbg(VFS, "No writable handle in writepages rc=%d\n",
   2482				 get_file_rc);
   2483			if (is_retryable_error(get_file_rc))
   2484				rc = get_file_rc;
   2485			else
   2486				rc = -EBADF;
   2487		} else
   2488			rc = wdata_send_pages(wdata, nr_pages, mapping, wbc);
   2489
   2490		for (i = 0; i < nr_pages; ++i)
   2491			unlock_page(wdata->pages[i]);
   2492
   2493		/* send failure -- clean up the mess */
   2494		if (rc != 0) {
   2495			add_credits_and_wake_if(server, &wdata->credits, 0);
   2496			for (i = 0; i < nr_pages; ++i) {
   2497				if (is_retryable_error(rc))
   2498					redirty_page_for_writepage(wbc,
   2499							   wdata->pages[i]);
   2500				else
   2501					SetPageError(wdata->pages[i]);
   2502				end_page_writeback(wdata->pages[i]);
   2503				put_page(wdata->pages[i]);
   2504			}
   2505			if (!is_retryable_error(rc))
   2506				mapping_set_error(mapping, rc);
   2507		}
   2508		kref_put(&wdata->refcount, cifs_writedata_release);
   2509
   2510		if (wbc->sync_mode == WB_SYNC_ALL && rc == -EAGAIN) {
   2511			index = saved_index;
   2512			continue;
   2513		}
   2514
   2515		/* Return immediately if we received a signal during writing */
   2516		if (is_interrupt_error(rc)) {
   2517			done = true;
   2518			break;
   2519		}
   2520
   2521		if (rc != 0 && saved_rc == 0)
   2522			saved_rc = rc;
   2523
   2524		wbc->nr_to_write -= nr_pages;
   2525		if (wbc->nr_to_write <= 0)
   2526			done = true;
   2527
   2528		index = next;
   2529	}
   2530
   2531	if (!scanned && !done) {
   2532		/*
   2533		 * We hit the last page and there is more work to be done: wrap
   2534		 * back to the start of the file
   2535		 */
   2536		scanned = true;
   2537		index = 0;
   2538		goto retry;
   2539	}
   2540
   2541	if (saved_rc != 0)
   2542		rc = saved_rc;
   2543
   2544	if (wbc->range_cyclic || (range_whole && wbc->nr_to_write > 0))
   2545		mapping->writeback_index = index;
   2546
   2547	if (cfile)
   2548		cifsFileInfo_put(cfile);
   2549	free_xid(xid);
   2550	/* Indication to update ctime and mtime as close is deferred */
   2551	set_bit(CIFS_INO_MODIFIED_ATTR, &CIFS_I(inode)->flags);
   2552	return rc;
   2553}
   2554
   2555static int
   2556cifs_writepage_locked(struct page *page, struct writeback_control *wbc)
   2557{
   2558	int rc;
   2559	unsigned int xid;
   2560
   2561	xid = get_xid();
   2562/* BB add check for wbc flags */
   2563	get_page(page);
   2564	if (!PageUptodate(page))
   2565		cifs_dbg(FYI, "ppw - page not up to date\n");
   2566
   2567	/*
   2568	 * Set the "writeback" flag, and clear "dirty" in the radix tree.
   2569	 *
   2570	 * A writepage() implementation always needs to do either this,
   2571	 * or re-dirty the page with "redirty_page_for_writepage()" in
   2572	 * the case of a failure.
   2573	 *
   2574	 * Just unlocking the page will cause the radix tree tag-bits
   2575	 * to fail to update with the state of the page correctly.
   2576	 */
   2577	set_page_writeback(page);
   2578retry_write:
   2579	rc = cifs_partialpagewrite(page, 0, PAGE_SIZE);
   2580	if (is_retryable_error(rc)) {
   2581		if (wbc->sync_mode == WB_SYNC_ALL && rc == -EAGAIN)
   2582			goto retry_write;
   2583		redirty_page_for_writepage(wbc, page);
   2584	} else if (rc != 0) {
   2585		SetPageError(page);
   2586		mapping_set_error(page->mapping, rc);
   2587	} else {
   2588		SetPageUptodate(page);
   2589	}
   2590	end_page_writeback(page);
   2591	put_page(page);
   2592	free_xid(xid);
   2593	return rc;
   2594}
   2595
   2596static int cifs_writepage(struct page *page, struct writeback_control *wbc)
   2597{
   2598	int rc = cifs_writepage_locked(page, wbc);
   2599	unlock_page(page);
   2600	return rc;
   2601}
   2602
   2603static int cifs_write_end(struct file *file, struct address_space *mapping,
   2604			loff_t pos, unsigned len, unsigned copied,
   2605			struct page *page, void *fsdata)
   2606{
   2607	int rc;
   2608	struct inode *inode = mapping->host;
   2609	struct cifsFileInfo *cfile = file->private_data;
   2610	struct cifs_sb_info *cifs_sb = CIFS_SB(cfile->dentry->d_sb);
   2611	__u32 pid;
   2612
   2613	if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_RWPIDFORWARD)
   2614		pid = cfile->pid;
   2615	else
   2616		pid = current->tgid;
   2617
   2618	cifs_dbg(FYI, "write_end for page %p from pos %lld with %d bytes\n",
   2619		 page, pos, copied);
   2620
   2621	if (PageChecked(page)) {
   2622		if (copied == len)
   2623			SetPageUptodate(page);
   2624		ClearPageChecked(page);
   2625	} else if (!PageUptodate(page) && copied == PAGE_SIZE)
   2626		SetPageUptodate(page);
   2627
   2628	if (!PageUptodate(page)) {
   2629		char *page_data;
   2630		unsigned offset = pos & (PAGE_SIZE - 1);
   2631		unsigned int xid;
   2632
   2633		xid = get_xid();
   2634		/* this is probably better than directly calling
   2635		   partialpage_write since in this function the file handle is
   2636		   known which we might as well	leverage */
   2637		/* BB check if anything else missing out of ppw
   2638		   such as updating last write time */
   2639		page_data = kmap(page);
   2640		rc = cifs_write(cfile, pid, page_data + offset, copied, &pos);
   2641		/* if (rc < 0) should we set writebehind rc? */
   2642		kunmap(page);
   2643
   2644		free_xid(xid);
   2645	} else {
   2646		rc = copied;
   2647		pos += copied;
   2648		set_page_dirty(page);
   2649	}
   2650
   2651	if (rc > 0) {
   2652		spin_lock(&inode->i_lock);
   2653		if (pos > inode->i_size) {
   2654			i_size_write(inode, pos);
   2655			inode->i_blocks = (512 - 1 + pos) >> 9;
   2656		}
   2657		spin_unlock(&inode->i_lock);
   2658	}
   2659
   2660	unlock_page(page);
   2661	put_page(page);
   2662	/* Indication to update ctime and mtime as close is deferred */
   2663	set_bit(CIFS_INO_MODIFIED_ATTR, &CIFS_I(inode)->flags);
   2664
   2665	return rc;
   2666}
   2667
   2668int cifs_strict_fsync(struct file *file, loff_t start, loff_t end,
   2669		      int datasync)
   2670{
   2671	unsigned int xid;
   2672	int rc = 0;
   2673	struct cifs_tcon *tcon;
   2674	struct TCP_Server_Info *server;
   2675	struct cifsFileInfo *smbfile = file->private_data;
   2676	struct inode *inode = file_inode(file);
   2677	struct cifs_sb_info *cifs_sb = CIFS_SB(inode->i_sb);
   2678
   2679	rc = file_write_and_wait_range(file, start, end);
   2680	if (rc) {
   2681		trace_cifs_fsync_err(inode->i_ino, rc);
   2682		return rc;
   2683	}
   2684
   2685	xid = get_xid();
   2686
   2687	cifs_dbg(FYI, "Sync file - name: %pD datasync: 0x%x\n",
   2688		 file, datasync);
   2689
   2690	if (!CIFS_CACHE_READ(CIFS_I(inode))) {
   2691		rc = cifs_zap_mapping(inode);
   2692		if (rc) {
   2693			cifs_dbg(FYI, "rc: %d during invalidate phase\n", rc);
   2694			rc = 0; /* don't care about it in fsync */
   2695		}
   2696	}
   2697
   2698	tcon = tlink_tcon(smbfile->tlink);
   2699	if (!(cifs_sb->mnt_cifs_flags & CIFS_MOUNT_NOSSYNC)) {
   2700		server = tcon->ses->server;
   2701		if (server->ops->flush == NULL) {
   2702			rc = -ENOSYS;
   2703			goto strict_fsync_exit;
   2704		}
   2705
   2706		if ((OPEN_FMODE(smbfile->f_flags) & FMODE_WRITE) == 0) {
   2707			smbfile = find_writable_file(CIFS_I(inode), FIND_WR_ANY);
   2708			if (smbfile) {
   2709				rc = server->ops->flush(xid, tcon, &smbfile->fid);
   2710				cifsFileInfo_put(smbfile);
   2711			} else
   2712				cifs_dbg(FYI, "ignore fsync for file not open for write\n");
   2713		} else
   2714			rc = server->ops->flush(xid, tcon, &smbfile->fid);
   2715	}
   2716
   2717strict_fsync_exit:
   2718	free_xid(xid);
   2719	return rc;
   2720}
   2721
   2722int cifs_fsync(struct file *file, loff_t start, loff_t end, int datasync)
   2723{
   2724	unsigned int xid;
   2725	int rc = 0;
   2726	struct cifs_tcon *tcon;
   2727	struct TCP_Server_Info *server;
   2728	struct cifsFileInfo *smbfile = file->private_data;
   2729	struct inode *inode = file_inode(file);
   2730	struct cifs_sb_info *cifs_sb = CIFS_FILE_SB(file);
   2731
   2732	rc = file_write_and_wait_range(file, start, end);
   2733	if (rc) {
   2734		trace_cifs_fsync_err(file_inode(file)->i_ino, rc);
   2735		return rc;
   2736	}
   2737
   2738	xid = get_xid();
   2739
   2740	cifs_dbg(FYI, "Sync file - name: %pD datasync: 0x%x\n",
   2741		 file, datasync);
   2742
   2743	tcon = tlink_tcon(smbfile->tlink);
   2744	if (!(cifs_sb->mnt_cifs_flags & CIFS_MOUNT_NOSSYNC)) {
   2745		server = tcon->ses->server;
   2746		if (server->ops->flush == NULL) {
   2747			rc = -ENOSYS;
   2748			goto fsync_exit;
   2749		}
   2750
   2751		if ((OPEN_FMODE(smbfile->f_flags) & FMODE_WRITE) == 0) {
   2752			smbfile = find_writable_file(CIFS_I(inode), FIND_WR_ANY);
   2753			if (smbfile) {
   2754				rc = server->ops->flush(xid, tcon, &smbfile->fid);
   2755				cifsFileInfo_put(smbfile);
   2756			} else
   2757				cifs_dbg(FYI, "ignore fsync for file not open for write\n");
   2758		} else
   2759			rc = server->ops->flush(xid, tcon, &smbfile->fid);
   2760	}
   2761
   2762fsync_exit:
   2763	free_xid(xid);
   2764	return rc;
   2765}
   2766
   2767/*
   2768 * As file closes, flush all cached write data for this inode checking
   2769 * for write behind errors.
   2770 */
   2771int cifs_flush(struct file *file, fl_owner_t id)
   2772{
   2773	struct inode *inode = file_inode(file);
   2774	int rc = 0;
   2775
   2776	if (file->f_mode & FMODE_WRITE)
   2777		rc = filemap_write_and_wait(inode->i_mapping);
   2778
   2779	cifs_dbg(FYI, "Flush inode %p file %p rc %d\n", inode, file, rc);
   2780	if (rc) {
   2781		/* get more nuanced writeback errors */
   2782		rc = filemap_check_wb_err(file->f_mapping, 0);
   2783		trace_cifs_flush_err(inode->i_ino, rc);
   2784	}
   2785	return rc;
   2786}
   2787
   2788static int
   2789cifs_write_allocate_pages(struct page **pages, unsigned long num_pages)
   2790{
   2791	int rc = 0;
   2792	unsigned long i;
   2793
   2794	for (i = 0; i < num_pages; i++) {
   2795		pages[i] = alloc_page(GFP_KERNEL|__GFP_HIGHMEM);
   2796		if (!pages[i]) {
   2797			/*
   2798			 * save number of pages we have already allocated and
   2799			 * return with ENOMEM error
   2800			 */
   2801			num_pages = i;
   2802			rc = -ENOMEM;
   2803			break;
   2804		}
   2805	}
   2806
   2807	if (rc) {
   2808		for (i = 0; i < num_pages; i++)
   2809			put_page(pages[i]);
   2810	}
   2811	return rc;
   2812}
   2813
   2814static inline
   2815size_t get_numpages(const size_t wsize, const size_t len, size_t *cur_len)
   2816{
   2817	size_t num_pages;
   2818	size_t clen;
   2819
   2820	clen = min_t(const size_t, len, wsize);
   2821	num_pages = DIV_ROUND_UP(clen, PAGE_SIZE);
   2822
   2823	if (cur_len)
   2824		*cur_len = clen;
   2825
   2826	return num_pages;
   2827}
   2828
   2829static void
   2830cifs_uncached_writedata_release(struct kref *refcount)
   2831{
   2832	int i;
   2833	struct cifs_writedata *wdata = container_of(refcount,
   2834					struct cifs_writedata, refcount);
   2835
   2836	kref_put(&wdata->ctx->refcount, cifs_aio_ctx_release);
   2837	for (i = 0; i < wdata->nr_pages; i++)
   2838		put_page(wdata->pages[i]);
   2839	cifs_writedata_release(refcount);
   2840}
   2841
   2842static void collect_uncached_write_data(struct cifs_aio_ctx *ctx);
   2843
   2844static void
   2845cifs_uncached_writev_complete(struct work_struct *work)
   2846{
   2847	struct cifs_writedata *wdata = container_of(work,
   2848					struct cifs_writedata, work);
   2849	struct inode *inode = d_inode(wdata->cfile->dentry);
   2850	struct cifsInodeInfo *cifsi = CIFS_I(inode);
   2851
   2852	spin_lock(&inode->i_lock);
   2853	cifs_update_eof(cifsi, wdata->offset, wdata->bytes);
   2854	if (cifsi->server_eof > inode->i_size)
   2855		i_size_write(inode, cifsi->server_eof);
   2856	spin_unlock(&inode->i_lock);
   2857
   2858	complete(&wdata->done);
   2859	collect_uncached_write_data(wdata->ctx);
   2860	/* the below call can possibly free the last ref to aio ctx */
   2861	kref_put(&wdata->refcount, cifs_uncached_writedata_release);
   2862}
   2863
   2864static int
   2865wdata_fill_from_iovec(struct cifs_writedata *wdata, struct iov_iter *from,
   2866		      size_t *len, unsigned long *num_pages)
   2867{
   2868	size_t save_len, copied, bytes, cur_len = *len;
   2869	unsigned long i, nr_pages = *num_pages;
   2870
   2871	save_len = cur_len;
   2872	for (i = 0; i < nr_pages; i++) {
   2873		bytes = min_t(const size_t, cur_len, PAGE_SIZE);
   2874		copied = copy_page_from_iter(wdata->pages[i], 0, bytes, from);
   2875		cur_len -= copied;
   2876		/*
   2877		 * If we didn't copy as much as we expected, then that
   2878		 * may mean we trod into an unmapped area. Stop copying
   2879		 * at that point. On the next pass through the big
   2880		 * loop, we'll likely end up getting a zero-length
   2881		 * write and bailing out of it.
   2882		 */
   2883		if (copied < bytes)
   2884			break;
   2885	}
   2886	cur_len = save_len - cur_len;
   2887	*len = cur_len;
   2888
   2889	/*
   2890	 * If we have no data to send, then that probably means that
   2891	 * the copy above failed altogether. That's most likely because
   2892	 * the address in the iovec was bogus. Return -EFAULT and let
   2893	 * the caller free anything we allocated and bail out.
   2894	 */
   2895	if (!cur_len)
   2896		return -EFAULT;
   2897
   2898	/*
   2899	 * i + 1 now represents the number of pages we actually used in
   2900	 * the copy phase above.
   2901	 */
   2902	*num_pages = i + 1;
   2903	return 0;
   2904}
   2905
   2906static int
   2907cifs_resend_wdata(struct cifs_writedata *wdata, struct list_head *wdata_list,
   2908	struct cifs_aio_ctx *ctx)
   2909{
   2910	unsigned int wsize;
   2911	struct cifs_credits credits;
   2912	int rc;
   2913	struct TCP_Server_Info *server = wdata->server;
   2914
   2915	do {
   2916		if (wdata->cfile->invalidHandle) {
   2917			rc = cifs_reopen_file(wdata->cfile, false);
   2918			if (rc == -EAGAIN)
   2919				continue;
   2920			else if (rc)
   2921				break;
   2922		}
   2923
   2924
   2925		/*
   2926		 * Wait for credits to resend this wdata.
   2927		 * Note: we are attempting to resend the whole wdata not in
   2928		 * segments
   2929		 */
   2930		do {
   2931			rc = server->ops->wait_mtu_credits(server, wdata->bytes,
   2932						&wsize, &credits);
   2933			if (rc)
   2934				goto fail;
   2935
   2936			if (wsize < wdata->bytes) {
   2937				add_credits_and_wake_if(server, &credits, 0);
   2938				msleep(1000);
   2939			}
   2940		} while (wsize < wdata->bytes);
   2941		wdata->credits = credits;
   2942
   2943		rc = adjust_credits(server, &wdata->credits, wdata->bytes);
   2944
   2945		if (!rc) {
   2946			if (wdata->cfile->invalidHandle)
   2947				rc = -EAGAIN;
   2948			else {
   2949#ifdef CONFIG_CIFS_SMB_DIRECT
   2950				if (wdata->mr) {
   2951					wdata->mr->need_invalidate = true;
   2952					smbd_deregister_mr(wdata->mr);
   2953					wdata->mr = NULL;
   2954				}
   2955#endif
   2956				rc = server->ops->async_writev(wdata,
   2957					cifs_uncached_writedata_release);
   2958			}
   2959		}
   2960
   2961		/* If the write was successfully sent, we are done */
   2962		if (!rc) {
   2963			list_add_tail(&wdata->list, wdata_list);
   2964			return 0;
   2965		}
   2966
   2967		/* Roll back credits and retry if needed */
   2968		add_credits_and_wake_if(server, &wdata->credits, 0);
   2969	} while (rc == -EAGAIN);
   2970
   2971fail:
   2972	kref_put(&wdata->refcount, cifs_uncached_writedata_release);
   2973	return rc;
   2974}
   2975
   2976static int
   2977cifs_write_from_iter(loff_t offset, size_t len, struct iov_iter *from,
   2978		     struct cifsFileInfo *open_file,
   2979		     struct cifs_sb_info *cifs_sb, struct list_head *wdata_list,
   2980		     struct cifs_aio_ctx *ctx)
   2981{
   2982	int rc = 0;
   2983	size_t cur_len;
   2984	unsigned long nr_pages, num_pages, i;
   2985	struct cifs_writedata *wdata;
   2986	struct iov_iter saved_from = *from;
   2987	loff_t saved_offset = offset;
   2988	pid_t pid;
   2989	struct TCP_Server_Info *server;
   2990	struct page **pagevec;
   2991	size_t start;
   2992	unsigned int xid;
   2993
   2994	if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_RWPIDFORWARD)
   2995		pid = open_file->pid;
   2996	else
   2997		pid = current->tgid;
   2998
   2999	server = cifs_pick_channel(tlink_tcon(open_file->tlink)->ses);
   3000	xid = get_xid();
   3001
   3002	do {
   3003		unsigned int wsize;
   3004		struct cifs_credits credits_on_stack;
   3005		struct cifs_credits *credits = &credits_on_stack;
   3006
   3007		if (open_file->invalidHandle) {
   3008			rc = cifs_reopen_file(open_file, false);
   3009			if (rc == -EAGAIN)
   3010				continue;
   3011			else if (rc)
   3012				break;
   3013		}
   3014
   3015		rc = server->ops->wait_mtu_credits(server, cifs_sb->ctx->wsize,
   3016						   &wsize, credits);
   3017		if (rc)
   3018			break;
   3019
   3020		cur_len = min_t(const size_t, len, wsize);
   3021
   3022		if (ctx->direct_io) {
   3023			ssize_t result;
   3024
   3025			result = iov_iter_get_pages_alloc(
   3026				from, &pagevec, cur_len, &start);
   3027			if (result < 0) {
   3028				cifs_dbg(VFS,
   3029					 "direct_writev couldn't get user pages (rc=%zd) iter type %d iov_offset %zd count %zd\n",
   3030					 result, iov_iter_type(from),
   3031					 from->iov_offset, from->count);
   3032				dump_stack();
   3033
   3034				rc = result;
   3035				add_credits_and_wake_if(server, credits, 0);
   3036				break;
   3037			}
   3038			cur_len = (size_t)result;
   3039			iov_iter_advance(from, cur_len);
   3040
   3041			nr_pages =
   3042				(cur_len + start + PAGE_SIZE - 1) / PAGE_SIZE;
   3043
   3044			wdata = cifs_writedata_direct_alloc(pagevec,
   3045					     cifs_uncached_writev_complete);
   3046			if (!wdata) {
   3047				rc = -ENOMEM;
   3048				add_credits_and_wake_if(server, credits, 0);
   3049				break;
   3050			}
   3051
   3052
   3053			wdata->page_offset = start;
   3054			wdata->tailsz =
   3055				nr_pages > 1 ?
   3056					cur_len - (PAGE_SIZE - start) -
   3057					(nr_pages - 2) * PAGE_SIZE :
   3058					cur_len;
   3059		} else {
   3060			nr_pages = get_numpages(wsize, len, &cur_len);
   3061			wdata = cifs_writedata_alloc(nr_pages,
   3062					     cifs_uncached_writev_complete);
   3063			if (!wdata) {
   3064				rc = -ENOMEM;
   3065				add_credits_and_wake_if(server, credits, 0);
   3066				break;
   3067			}
   3068
   3069			rc = cifs_write_allocate_pages(wdata->pages, nr_pages);
   3070			if (rc) {
   3071				kvfree(wdata->pages);
   3072				kfree(wdata);
   3073				add_credits_and_wake_if(server, credits, 0);
   3074				break;
   3075			}
   3076
   3077			num_pages = nr_pages;
   3078			rc = wdata_fill_from_iovec(
   3079				wdata, from, &cur_len, &num_pages);
   3080			if (rc) {
   3081				for (i = 0; i < nr_pages; i++)
   3082					put_page(wdata->pages[i]);
   3083				kvfree(wdata->pages);
   3084				kfree(wdata);
   3085				add_credits_and_wake_if(server, credits, 0);
   3086				break;
   3087			}
   3088
   3089			/*
   3090			 * Bring nr_pages down to the number of pages we
   3091			 * actually used, and free any pages that we didn't use.
   3092			 */
   3093			for ( ; nr_pages > num_pages; nr_pages--)
   3094				put_page(wdata->pages[nr_pages - 1]);
   3095
   3096			wdata->tailsz = cur_len - ((nr_pages - 1) * PAGE_SIZE);
   3097		}
   3098
   3099		wdata->sync_mode = WB_SYNC_ALL;
   3100		wdata->nr_pages = nr_pages;
   3101		wdata->offset = (__u64)offset;
   3102		wdata->cfile = cifsFileInfo_get(open_file);
   3103		wdata->server = server;
   3104		wdata->pid = pid;
   3105		wdata->bytes = cur_len;
   3106		wdata->pagesz = PAGE_SIZE;
   3107		wdata->credits = credits_on_stack;
   3108		wdata->ctx = ctx;
   3109		kref_get(&ctx->refcount);
   3110
   3111		rc = adjust_credits(server, &wdata->credits, wdata->bytes);
   3112
   3113		if (!rc) {
   3114			if (wdata->cfile->invalidHandle)
   3115				rc = -EAGAIN;
   3116			else
   3117				rc = server->ops->async_writev(wdata,
   3118					cifs_uncached_writedata_release);
   3119		}
   3120
   3121		if (rc) {
   3122			add_credits_and_wake_if(server, &wdata->credits, 0);
   3123			kref_put(&wdata->refcount,
   3124				 cifs_uncached_writedata_release);
   3125			if (rc == -EAGAIN) {
   3126				*from = saved_from;
   3127				iov_iter_advance(from, offset - saved_offset);
   3128				continue;
   3129			}
   3130			break;
   3131		}
   3132
   3133		list_add_tail(&wdata->list, wdata_list);
   3134		offset += cur_len;
   3135		len -= cur_len;
   3136	} while (len > 0);
   3137
   3138	free_xid(xid);
   3139	return rc;
   3140}
   3141
   3142static void collect_uncached_write_data(struct cifs_aio_ctx *ctx)
   3143{
   3144	struct cifs_writedata *wdata, *tmp;
   3145	struct cifs_tcon *tcon;
   3146	struct cifs_sb_info *cifs_sb;
   3147	struct dentry *dentry = ctx->cfile->dentry;
   3148	ssize_t rc;
   3149
   3150	tcon = tlink_tcon(ctx->cfile->tlink);
   3151	cifs_sb = CIFS_SB(dentry->d_sb);
   3152
   3153	mutex_lock(&ctx->aio_mutex);
   3154
   3155	if (list_empty(&ctx->list)) {
   3156		mutex_unlock(&ctx->aio_mutex);
   3157		return;
   3158	}
   3159
   3160	rc = ctx->rc;
   3161	/*
   3162	 * Wait for and collect replies for any successful sends in order of
   3163	 * increasing offset. Once an error is hit, then return without waiting
   3164	 * for any more replies.
   3165	 */
   3166restart_loop:
   3167	list_for_each_entry_safe(wdata, tmp, &ctx->list, list) {
   3168		if (!rc) {
   3169			if (!try_wait_for_completion(&wdata->done)) {
   3170				mutex_unlock(&ctx->aio_mutex);
   3171				return;
   3172			}
   3173
   3174			if (wdata->result)
   3175				rc = wdata->result;
   3176			else
   3177				ctx->total_len += wdata->bytes;
   3178
   3179			/* resend call if it's a retryable error */
   3180			if (rc == -EAGAIN) {
   3181				struct list_head tmp_list;
   3182				struct iov_iter tmp_from = ctx->iter;
   3183
   3184				INIT_LIST_HEAD(&tmp_list);
   3185				list_del_init(&wdata->list);
   3186
   3187				if (ctx->direct_io)
   3188					rc = cifs_resend_wdata(
   3189						wdata, &tmp_list, ctx);
   3190				else {
   3191					iov_iter_advance(&tmp_from,
   3192						 wdata->offset - ctx->pos);
   3193
   3194					rc = cifs_write_from_iter(wdata->offset,
   3195						wdata->bytes, &tmp_from,
   3196						ctx->cfile, cifs_sb, &tmp_list,
   3197						ctx);
   3198
   3199					kref_put(&wdata->refcount,
   3200						cifs_uncached_writedata_release);
   3201				}
   3202
   3203				list_splice(&tmp_list, &ctx->list);
   3204				goto restart_loop;
   3205			}
   3206		}
   3207		list_del_init(&wdata->list);
   3208		kref_put(&wdata->refcount, cifs_uncached_writedata_release);
   3209	}
   3210
   3211	cifs_stats_bytes_written(tcon, ctx->total_len);
   3212	set_bit(CIFS_INO_INVALID_MAPPING, &CIFS_I(dentry->d_inode)->flags);
   3213
   3214	ctx->rc = (rc == 0) ? ctx->total_len : rc;
   3215
   3216	mutex_unlock(&ctx->aio_mutex);
   3217
   3218	if (ctx->iocb && ctx->iocb->ki_complete)
   3219		ctx->iocb->ki_complete(ctx->iocb, ctx->rc);
   3220	else
   3221		complete(&ctx->done);
   3222}
   3223
   3224static ssize_t __cifs_writev(
   3225	struct kiocb *iocb, struct iov_iter *from, bool direct)
   3226{
   3227	struct file *file = iocb->ki_filp;
   3228	ssize_t total_written = 0;
   3229	struct cifsFileInfo *cfile;
   3230	struct cifs_tcon *tcon;
   3231	struct cifs_sb_info *cifs_sb;
   3232	struct cifs_aio_ctx *ctx;
   3233	struct iov_iter saved_from = *from;
   3234	size_t len = iov_iter_count(from);
   3235	int rc;
   3236
   3237	/*
   3238	 * iov_iter_get_pages_alloc doesn't work with ITER_KVEC.
   3239	 * In this case, fall back to non-direct write function.
   3240	 * this could be improved by getting pages directly in ITER_KVEC
   3241	 */
   3242	if (direct && iov_iter_is_kvec(from)) {
   3243		cifs_dbg(FYI, "use non-direct cifs_writev for kvec I/O\n");
   3244		direct = false;
   3245	}
   3246
   3247	rc = generic_write_checks(iocb, from);
   3248	if (rc <= 0)
   3249		return rc;
   3250
   3251	cifs_sb = CIFS_FILE_SB(file);
   3252	cfile = file->private_data;
   3253	tcon = tlink_tcon(cfile->tlink);
   3254
   3255	if (!tcon->ses->server->ops->async_writev)
   3256		return -ENOSYS;
   3257
   3258	ctx = cifs_aio_ctx_alloc();
   3259	if (!ctx)
   3260		return -ENOMEM;
   3261
   3262	ctx->cfile = cifsFileInfo_get(cfile);
   3263
   3264	if (!is_sync_kiocb(iocb))
   3265		ctx->iocb = iocb;
   3266
   3267	ctx->pos = iocb->ki_pos;
   3268
   3269	if (direct) {
   3270		ctx->direct_io = true;
   3271		ctx->iter = *from;
   3272		ctx->len = len;
   3273	} else {
   3274		rc = setup_aio_ctx_iter(ctx, from, WRITE);
   3275		if (rc) {
   3276			kref_put(&ctx->refcount, cifs_aio_ctx_release);
   3277			return rc;
   3278		}
   3279	}
   3280
   3281	/* grab a lock here due to read response handlers can access ctx */
   3282	mutex_lock(&ctx->aio_mutex);
   3283
   3284	rc = cifs_write_from_iter(iocb->ki_pos, ctx->len, &saved_from,
   3285				  cfile, cifs_sb, &ctx->list, ctx);
   3286
   3287	/*
   3288	 * If at least one write was successfully sent, then discard any rc
   3289	 * value from the later writes. If the other write succeeds, then
   3290	 * we'll end up returning whatever was written. If it fails, then
   3291	 * we'll get a new rc value from that.
   3292	 */
   3293	if (!list_empty(&ctx->list))
   3294		rc = 0;
   3295
   3296	mutex_unlock(&ctx->aio_mutex);
   3297
   3298	if (rc) {
   3299		kref_put(&ctx->refcount, cifs_aio_ctx_release);
   3300		return rc;
   3301	}
   3302
   3303	if (!is_sync_kiocb(iocb)) {
   3304		kref_put(&ctx->refcount, cifs_aio_ctx_release);
   3305		return -EIOCBQUEUED;
   3306	}
   3307
   3308	rc = wait_for_completion_killable(&ctx->done);
   3309	if (rc) {
   3310		mutex_lock(&ctx->aio_mutex);
   3311		ctx->rc = rc = -EINTR;
   3312		total_written = ctx->total_len;
   3313		mutex_unlock(&ctx->aio_mutex);
   3314	} else {
   3315		rc = ctx->rc;
   3316		total_written = ctx->total_len;
   3317	}
   3318
   3319	kref_put(&ctx->refcount, cifs_aio_ctx_release);
   3320
   3321	if (unlikely(!total_written))
   3322		return rc;
   3323
   3324	iocb->ki_pos += total_written;
   3325	return total_written;
   3326}
   3327
   3328ssize_t cifs_direct_writev(struct kiocb *iocb, struct iov_iter *from)
   3329{
   3330	return __cifs_writev(iocb, from, true);
   3331}
   3332
   3333ssize_t cifs_user_writev(struct kiocb *iocb, struct iov_iter *from)
   3334{
   3335	return __cifs_writev(iocb, from, false);
   3336}
   3337
   3338static ssize_t
   3339cifs_writev(struct kiocb *iocb, struct iov_iter *from)
   3340{
   3341	struct file *file = iocb->ki_filp;
   3342	struct cifsFileInfo *cfile = (struct cifsFileInfo *)file->private_data;
   3343	struct inode *inode = file->f_mapping->host;
   3344	struct cifsInodeInfo *cinode = CIFS_I(inode);
   3345	struct TCP_Server_Info *server = tlink_tcon(cfile->tlink)->ses->server;
   3346	ssize_t rc;
   3347
   3348	inode_lock(inode);
   3349	/*
   3350	 * We need to hold the sem to be sure nobody modifies lock list
   3351	 * with a brlock that prevents writing.
   3352	 */
   3353	down_read(&cinode->lock_sem);
   3354
   3355	rc = generic_write_checks(iocb, from);
   3356	if (rc <= 0)
   3357		goto out;
   3358
   3359	if (!cifs_find_lock_conflict(cfile, iocb->ki_pos, iov_iter_count(from),
   3360				     server->vals->exclusive_lock_type, 0,
   3361				     NULL, CIFS_WRITE_OP))
   3362		rc = __generic_file_write_iter(iocb, from);
   3363	else
   3364		rc = -EACCES;
   3365out:
   3366	up_read(&cinode->lock_sem);
   3367	inode_unlock(inode);
   3368
   3369	if (rc > 0)
   3370		rc = generic_write_sync(iocb, rc);
   3371	return rc;
   3372}
   3373
   3374ssize_t
   3375cifs_strict_writev(struct kiocb *iocb, struct iov_iter *from)
   3376{
   3377	struct inode *inode = file_inode(iocb->ki_filp);
   3378	struct cifsInodeInfo *cinode = CIFS_I(inode);
   3379	struct cifs_sb_info *cifs_sb = CIFS_SB(inode->i_sb);
   3380	struct cifsFileInfo *cfile = (struct cifsFileInfo *)
   3381						iocb->ki_filp->private_data;
   3382	struct cifs_tcon *tcon = tlink_tcon(cfile->tlink);
   3383	ssize_t written;
   3384
   3385	written = cifs_get_writer(cinode);
   3386	if (written)
   3387		return written;
   3388
   3389	if (CIFS_CACHE_WRITE(cinode)) {
   3390		if (cap_unix(tcon->ses) &&
   3391		(CIFS_UNIX_FCNTL_CAP & le64_to_cpu(tcon->fsUnixInfo.Capability))
   3392		  && ((cifs_sb->mnt_cifs_flags & CIFS_MOUNT_NOPOSIXBRL) == 0)) {
   3393			written = generic_file_write_iter(iocb, from);
   3394			goto out;
   3395		}
   3396		written = cifs_writev(iocb, from);
   3397		goto out;
   3398	}
   3399	/*
   3400	 * For non-oplocked files in strict cache mode we need to write the data
   3401	 * to the server exactly from the pos to pos+len-1 rather than flush all
   3402	 * affected pages because it may cause a error with mandatory locks on
   3403	 * these pages but not on the region from pos to ppos+len-1.
   3404	 */
   3405	written = cifs_user_writev(iocb, from);
   3406	if (CIFS_CACHE_READ(cinode)) {
   3407		/*
   3408		 * We have read level caching and we have just sent a write
   3409		 * request to the server thus making data in the cache stale.
   3410		 * Zap the cache and set oplock/lease level to NONE to avoid
   3411		 * reading stale data from the cache. All subsequent read
   3412		 * operations will read new data from the server.
   3413		 */
   3414		cifs_zap_mapping(inode);
   3415		cifs_dbg(FYI, "Set Oplock/Lease to NONE for inode=%p after write\n",
   3416			 inode);
   3417		cinode->oplock = 0;
   3418	}
   3419out:
   3420	cifs_put_writer(cinode);
   3421	return written;
   3422}
   3423
   3424static struct cifs_readdata *
   3425cifs_readdata_direct_alloc(struct page **pages, work_func_t complete)
   3426{
   3427	struct cifs_readdata *rdata;
   3428
   3429	rdata = kzalloc(sizeof(*rdata), GFP_KERNEL);
   3430	if (rdata != NULL) {
   3431		rdata->pages = pages;
   3432		kref_init(&rdata->refcount);
   3433		INIT_LIST_HEAD(&rdata->list);
   3434		init_completion(&rdata->done);
   3435		INIT_WORK(&rdata->work, complete);
   3436	}
   3437
   3438	return rdata;
   3439}
   3440
   3441static struct cifs_readdata *
   3442cifs_readdata_alloc(unsigned int nr_pages, work_func_t complete)
   3443{
   3444	struct page **pages =
   3445		kcalloc(nr_pages, sizeof(struct page *), GFP_KERNEL);
   3446	struct cifs_readdata *ret = NULL;
   3447
   3448	if (pages) {
   3449		ret = cifs_readdata_direct_alloc(pages, complete);
   3450		if (!ret)
   3451			kfree(pages);
   3452	}
   3453
   3454	return ret;
   3455}
   3456
   3457void
   3458cifs_readdata_release(struct kref *refcount)
   3459{
   3460	struct cifs_readdata *rdata = container_of(refcount,
   3461					struct cifs_readdata, refcount);
   3462#ifdef CONFIG_CIFS_SMB_DIRECT
   3463	if (rdata->mr) {
   3464		smbd_deregister_mr(rdata->mr);
   3465		rdata->mr = NULL;
   3466	}
   3467#endif
   3468	if (rdata->cfile)
   3469		cifsFileInfo_put(rdata->cfile);
   3470
   3471	kvfree(rdata->pages);
   3472	kfree(rdata);
   3473}
   3474
   3475static int
   3476cifs_read_allocate_pages(struct cifs_readdata *rdata, unsigned int nr_pages)
   3477{
   3478	int rc = 0;
   3479	struct page *page;
   3480	unsigned int i;
   3481
   3482	for (i = 0; i < nr_pages; i++) {
   3483		page = alloc_page(GFP_KERNEL|__GFP_HIGHMEM);
   3484		if (!page) {
   3485			rc = -ENOMEM;
   3486			break;
   3487		}
   3488		rdata->pages[i] = page;
   3489	}
   3490
   3491	if (rc) {
   3492		unsigned int nr_page_failed = i;
   3493
   3494		for (i = 0; i < nr_page_failed; i++) {
   3495			put_page(rdata->pages[i]);
   3496			rdata->pages[i] = NULL;
   3497		}
   3498	}
   3499	return rc;
   3500}
   3501
   3502static void
   3503cifs_uncached_readdata_release(struct kref *refcount)
   3504{
   3505	struct cifs_readdata *rdata = container_of(refcount,
   3506					struct cifs_readdata, refcount);
   3507	unsigned int i;
   3508
   3509	kref_put(&rdata->ctx->refcount, cifs_aio_ctx_release);
   3510	for (i = 0; i < rdata->nr_pages; i++) {
   3511		put_page(rdata->pages[i]);
   3512	}
   3513	cifs_readdata_release(refcount);
   3514}
   3515
   3516/**
   3517 * cifs_readdata_to_iov - copy data from pages in response to an iovec
   3518 * @rdata:	the readdata response with list of pages holding data
   3519 * @iter:	destination for our data
   3520 *
   3521 * This function copies data from a list of pages in a readdata response into
   3522 * an array of iovecs. It will first calculate where the data should go
   3523 * based on the info in the readdata and then copy the data into that spot.
   3524 */
   3525static int
   3526cifs_readdata_to_iov(struct cifs_readdata *rdata, struct iov_iter *iter)
   3527{
   3528	size_t remaining = rdata->got_bytes;
   3529	unsigned int i;
   3530
   3531	for (i = 0; i < rdata->nr_pages; i++) {
   3532		struct page *page = rdata->pages[i];
   3533		size_t copy = min_t(size_t, remaining, PAGE_SIZE);
   3534		size_t written;
   3535
   3536		if (unlikely(iov_iter_is_pipe(iter))) {
   3537			void *addr = kmap_atomic(page);
   3538
   3539			written = copy_to_iter(addr, copy, iter);
   3540			kunmap_atomic(addr);
   3541		} else
   3542			written = copy_page_to_iter(page, 0, copy, iter);
   3543		remaining -= written;
   3544		if (written < copy && iov_iter_count(iter) > 0)
   3545			break;
   3546	}
   3547	return remaining ? -EFAULT : 0;
   3548}
   3549
   3550static void collect_uncached_read_data(struct cifs_aio_ctx *ctx);
   3551
   3552static void
   3553cifs_uncached_readv_complete(struct work_struct *work)
   3554{
   3555	struct cifs_readdata *rdata = container_of(work,
   3556						struct cifs_readdata, work);
   3557
   3558	complete(&rdata->done);
   3559	collect_uncached_read_data(rdata->ctx);
   3560	/* the below call can possibly free the last ref to aio ctx */
   3561	kref_put(&rdata->refcount, cifs_uncached_readdata_release);
   3562}
   3563
   3564static int
   3565uncached_fill_pages(struct TCP_Server_Info *server,
   3566		    struct cifs_readdata *rdata, struct iov_iter *iter,
   3567		    unsigned int len)
   3568{
   3569	int result = 0;
   3570	unsigned int i;
   3571	unsigned int nr_pages = rdata->nr_pages;
   3572	unsigned int page_offset = rdata->page_offset;
   3573
   3574	rdata->got_bytes = 0;
   3575	rdata->tailsz = PAGE_SIZE;
   3576	for (i = 0; i < nr_pages; i++) {
   3577		struct page *page = rdata->pages[i];
   3578		size_t n;
   3579		unsigned int segment_size = rdata->pagesz;
   3580
   3581		if (i == 0)
   3582			segment_size -= page_offset;
   3583		else
   3584			page_offset = 0;
   3585
   3586
   3587		if (len <= 0) {
   3588			/* no need to hold page hostage */
   3589			rdata->pages[i] = NULL;
   3590			rdata->nr_pages--;
   3591			put_page(page);
   3592			continue;
   3593		}
   3594
   3595		n = len;
   3596		if (len >= segment_size)
   3597			/* enough data to fill the page */
   3598			n = segment_size;
   3599		else
   3600			rdata->tailsz = len;
   3601		len -= n;
   3602
   3603		if (iter)
   3604			result = copy_page_from_iter(
   3605					page, page_offset, n, iter);
   3606#ifdef CONFIG_CIFS_SMB_DIRECT
   3607		else if (rdata->mr)
   3608			result = n;
   3609#endif
   3610		else
   3611			result = cifs_read_page_from_socket(
   3612					server, page, page_offset, n);
   3613		if (result < 0)
   3614			break;
   3615
   3616		rdata->got_bytes += result;
   3617	}
   3618
   3619	return rdata->got_bytes > 0 && result != -ECONNABORTED ?
   3620						rdata->got_bytes : result;
   3621}
   3622
   3623static int
   3624cifs_uncached_read_into_pages(struct TCP_Server_Info *server,
   3625			      struct cifs_readdata *rdata, unsigned int len)
   3626{
   3627	return uncached_fill_pages(server, rdata, NULL, len);
   3628}
   3629
   3630static int
   3631cifs_uncached_copy_into_pages(struct TCP_Server_Info *server,
   3632			      struct cifs_readdata *rdata,
   3633			      struct iov_iter *iter)
   3634{
   3635	return uncached_fill_pages(server, rdata, iter, iter->count);
   3636}
   3637
   3638static int cifs_resend_rdata(struct cifs_readdata *rdata,
   3639			struct list_head *rdata_list,
   3640			struct cifs_aio_ctx *ctx)
   3641{
   3642	unsigned int rsize;
   3643	struct cifs_credits credits;
   3644	int rc;
   3645	struct TCP_Server_Info *server;
   3646
   3647	/* XXX: should we pick a new channel here? */
   3648	server = rdata->server;
   3649
   3650	do {
   3651		if (rdata->cfile->invalidHandle) {
   3652			rc = cifs_reopen_file(rdata->cfile, true);
   3653			if (rc == -EAGAIN)
   3654				continue;
   3655			else if (rc)
   3656				break;
   3657		}
   3658
   3659		/*
   3660		 * Wait for credits to resend this rdata.
   3661		 * Note: we are attempting to resend the whole rdata not in
   3662		 * segments
   3663		 */
   3664		do {
   3665			rc = server->ops->wait_mtu_credits(server, rdata->bytes,
   3666						&rsize, &credits);
   3667
   3668			if (rc)
   3669				goto fail;
   3670
   3671			if (rsize < rdata->bytes) {
   3672				add_credits_and_wake_if(server, &credits, 0);
   3673				msleep(1000);
   3674			}
   3675		} while (rsize < rdata->bytes);
   3676		rdata->credits = credits;
   3677
   3678		rc = adjust_credits(server, &rdata->credits, rdata->bytes);
   3679		if (!rc) {
   3680			if (rdata->cfile->invalidHandle)
   3681				rc = -EAGAIN;
   3682			else {
   3683#ifdef CONFIG_CIFS_SMB_DIRECT
   3684				if (rdata->mr) {
   3685					rdata->mr->need_invalidate = true;
   3686					smbd_deregister_mr(rdata->mr);
   3687					rdata->mr = NULL;
   3688				}
   3689#endif
   3690				rc = server->ops->async_readv(rdata);
   3691			}
   3692		}
   3693
   3694		/* If the read was successfully sent, we are done */
   3695		if (!rc) {
   3696			/* Add to aio pending list */
   3697			list_add_tail(&rdata->list, rdata_list);
   3698			return 0;
   3699		}
   3700
   3701		/* Roll back credits and retry if needed */
   3702		add_credits_and_wake_if(server, &rdata->credits, 0);
   3703	} while (rc == -EAGAIN);
   3704
   3705fail:
   3706	kref_put(&rdata->refcount, cifs_uncached_readdata_release);
   3707	return rc;
   3708}
   3709
   3710static int
   3711cifs_send_async_read(loff_t offset, size_t len, struct cifsFileInfo *open_file,
   3712		     struct cifs_sb_info *cifs_sb, struct list_head *rdata_list,
   3713		     struct cifs_aio_ctx *ctx)
   3714{
   3715	struct cifs_readdata *rdata;
   3716	unsigned int npages, rsize;
   3717	struct cifs_credits credits_on_stack;
   3718	struct cifs_credits *credits = &credits_on_stack;
   3719	size_t cur_len;
   3720	int rc;
   3721	pid_t pid;
   3722	struct TCP_Server_Info *server;
   3723	struct page **pagevec;
   3724	size_t start;
   3725	struct iov_iter direct_iov = ctx->iter;
   3726
   3727	server = cifs_pick_channel(tlink_tcon(open_file->tlink)->ses);
   3728
   3729	if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_RWPIDFORWARD)
   3730		pid = open_file->pid;
   3731	else
   3732		pid = current->tgid;
   3733
   3734	if (ctx->direct_io)
   3735		iov_iter_advance(&direct_iov, offset - ctx->pos);
   3736
   3737	do {
   3738		if (open_file->invalidHandle) {
   3739			rc = cifs_reopen_file(open_file, true);
   3740			if (rc == -EAGAIN)
   3741				continue;
   3742			else if (rc)
   3743				break;
   3744		}
   3745
   3746		if (cifs_sb->ctx->rsize == 0)
   3747			cifs_sb->ctx->rsize =
   3748				server->ops->negotiate_rsize(tlink_tcon(open_file->tlink),
   3749							     cifs_sb->ctx);
   3750
   3751		rc = server->ops->wait_mtu_credits(server, cifs_sb->ctx->rsize,
   3752						   &rsize, credits);
   3753		if (rc)
   3754			break;
   3755
   3756		cur_len = min_t(const size_t, len, rsize);
   3757
   3758		if (ctx->direct_io) {
   3759			ssize_t result;
   3760
   3761			result = iov_iter_get_pages_alloc(
   3762					&direct_iov, &pagevec,
   3763					cur_len, &start);
   3764			if (result < 0) {
   3765				cifs_dbg(VFS,
   3766					 "Couldn't get user pages (rc=%zd) iter type %d iov_offset %zd count %zd\n",
   3767					 result, iov_iter_type(&direct_iov),
   3768					 direct_iov.iov_offset,
   3769					 direct_iov.count);
   3770				dump_stack();
   3771
   3772				rc = result;
   3773				add_credits_and_wake_if(server, credits, 0);
   3774				break;
   3775			}
   3776			cur_len = (size_t)result;
   3777			iov_iter_advance(&direct_iov, cur_len);
   3778
   3779			rdata = cifs_readdata_direct_alloc(
   3780					pagevec, cifs_uncached_readv_complete);
   3781			if (!rdata) {
   3782				add_credits_and_wake_if(server, credits, 0);
   3783				rc = -ENOMEM;
   3784				break;
   3785			}
   3786
   3787			npages = (cur_len + start + PAGE_SIZE-1) / PAGE_SIZE;
   3788			rdata->page_offset = start;
   3789			rdata->tailsz = npages > 1 ?
   3790				cur_len-(PAGE_SIZE-start)-(npages-2)*PAGE_SIZE :
   3791				cur_len;
   3792
   3793		} else {
   3794
   3795			npages = DIV_ROUND_UP(cur_len, PAGE_SIZE);
   3796			/* allocate a readdata struct */
   3797			rdata = cifs_readdata_alloc(npages,
   3798					    cifs_uncached_readv_complete);
   3799			if (!rdata) {
   3800				add_credits_and_wake_if(server, credits, 0);
   3801				rc = -ENOMEM;
   3802				break;
   3803			}
   3804
   3805			rc = cifs_read_allocate_pages(rdata, npages);
   3806			if (rc) {
   3807				kvfree(rdata->pages);
   3808				kfree(rdata);
   3809				add_credits_and_wake_if(server, credits, 0);
   3810				break;
   3811			}
   3812
   3813			rdata->tailsz = PAGE_SIZE;
   3814		}
   3815
   3816		rdata->server = server;
   3817		rdata->cfile = cifsFileInfo_get(open_file);
   3818		rdata->nr_pages = npages;
   3819		rdata->offset = offset;
   3820		rdata->bytes = cur_len;
   3821		rdata->pid = pid;
   3822		rdata->pagesz = PAGE_SIZE;
   3823		rdata->read_into_pages = cifs_uncached_read_into_pages;
   3824		rdata->copy_into_pages = cifs_uncached_copy_into_pages;
   3825		rdata->credits = credits_on_stack;
   3826		rdata->ctx = ctx;
   3827		kref_get(&ctx->refcount);
   3828
   3829		rc = adjust_credits(server, &rdata->credits, rdata->bytes);
   3830
   3831		if (!rc) {
   3832			if (rdata->cfile->invalidHandle)
   3833				rc = -EAGAIN;
   3834			else
   3835				rc = server->ops->async_readv(rdata);
   3836		}
   3837
   3838		if (rc) {
   3839			add_credits_and_wake_if(server, &rdata->credits, 0);
   3840			kref_put(&rdata->refcount,
   3841				cifs_uncached_readdata_release);
   3842			if (rc == -EAGAIN) {
   3843				iov_iter_revert(&direct_iov, cur_len);
   3844				continue;
   3845			}
   3846			break;
   3847		}
   3848
   3849		list_add_tail(&rdata->list, rdata_list);
   3850		offset += cur_len;
   3851		len -= cur_len;
   3852	} while (len > 0);
   3853
   3854	return rc;
   3855}
   3856
   3857static void
   3858collect_uncached_read_data(struct cifs_aio_ctx *ctx)
   3859{
   3860	struct cifs_readdata *rdata, *tmp;
   3861	struct iov_iter *to = &ctx->iter;
   3862	struct cifs_sb_info *cifs_sb;
   3863	int rc;
   3864
   3865	cifs_sb = CIFS_SB(ctx->cfile->dentry->d_sb);
   3866
   3867	mutex_lock(&ctx->aio_mutex);
   3868
   3869	if (list_empty(&ctx->list)) {
   3870		mutex_unlock(&ctx->aio_mutex);
   3871		return;
   3872	}
   3873
   3874	rc = ctx->rc;
   3875	/* the loop below should proceed in the order of increasing offsets */
   3876again:
   3877	list_for_each_entry_safe(rdata, tmp, &ctx->list, list) {
   3878		if (!rc) {
   3879			if (!try_wait_for_completion(&rdata->done)) {
   3880				mutex_unlock(&ctx->aio_mutex);
   3881				return;
   3882			}
   3883
   3884			if (rdata->result == -EAGAIN) {
   3885				/* resend call if it's a retryable error */
   3886				struct list_head tmp_list;
   3887				unsigned int got_bytes = rdata->got_bytes;
   3888
   3889				list_del_init(&rdata->list);
   3890				INIT_LIST_HEAD(&tmp_list);
   3891
   3892				/*
   3893				 * Got a part of data and then reconnect has
   3894				 * happened -- fill the buffer and continue
   3895				 * reading.
   3896				 */
   3897				if (got_bytes && got_bytes < rdata->bytes) {
   3898					rc = 0;
   3899					if (!ctx->direct_io)
   3900						rc = cifs_readdata_to_iov(rdata, to);
   3901					if (rc) {
   3902						kref_put(&rdata->refcount,
   3903							cifs_uncached_readdata_release);
   3904						continue;
   3905					}
   3906				}
   3907
   3908				if (ctx->direct_io) {
   3909					/*
   3910					 * Re-use rdata as this is a
   3911					 * direct I/O
   3912					 */
   3913					rc = cifs_resend_rdata(
   3914						rdata,
   3915						&tmp_list, ctx);
   3916				} else {
   3917					rc = cifs_send_async_read(
   3918						rdata->offset + got_bytes,
   3919						rdata->bytes - got_bytes,
   3920						rdata->cfile, cifs_sb,
   3921						&tmp_list, ctx);
   3922
   3923					kref_put(&rdata->refcount,
   3924						cifs_uncached_readdata_release);
   3925				}
   3926
   3927				list_splice(&tmp_list, &ctx->list);
   3928
   3929				goto again;
   3930			} else if (rdata->result)
   3931				rc = rdata->result;
   3932			else if (!ctx->direct_io)
   3933				rc = cifs_readdata_to_iov(rdata, to);
   3934
   3935			/* if there was a short read -- discard anything left */
   3936			if (rdata->got_bytes && rdata->got_bytes < rdata->bytes)
   3937				rc = -ENODATA;
   3938
   3939			ctx->total_len += rdata->got_bytes;
   3940		}
   3941		list_del_init(&rdata->list);
   3942		kref_put(&rdata->refcount, cifs_uncached_readdata_release);
   3943	}
   3944
   3945	if (!ctx->direct_io)
   3946		ctx->total_len = ctx->len - iov_iter_count(to);
   3947
   3948	/* mask nodata case */
   3949	if (rc == -ENODATA)
   3950		rc = 0;
   3951
   3952	ctx->rc = (rc == 0) ? (ssize_t)ctx->total_len : rc;
   3953
   3954	mutex_unlock(&ctx->aio_mutex);
   3955
   3956	if (ctx->iocb && ctx->iocb->ki_complete)
   3957		ctx->iocb->ki_complete(ctx->iocb, ctx->rc);
   3958	else
   3959		complete(&ctx->done);
   3960}
   3961
   3962static ssize_t __cifs_readv(
   3963	struct kiocb *iocb, struct iov_iter *to, bool direct)
   3964{
   3965	size_t len;
   3966	struct file *file = iocb->ki_filp;
   3967	struct cifs_sb_info *cifs_sb;
   3968	struct cifsFileInfo *cfile;
   3969	struct cifs_tcon *tcon;
   3970	ssize_t rc, total_read = 0;
   3971	loff_t offset = iocb->ki_pos;
   3972	struct cifs_aio_ctx *ctx;
   3973
   3974	/*
   3975	 * iov_iter_get_pages_alloc() doesn't work with ITER_KVEC,
   3976	 * fall back to data copy read path
   3977	 * this could be improved by getting pages directly in ITER_KVEC
   3978	 */
   3979	if (direct && iov_iter_is_kvec(to)) {
   3980		cifs_dbg(FYI, "use non-direct cifs_user_readv for kvec I/O\n");
   3981		direct = false;
   3982	}
   3983
   3984	len = iov_iter_count(to);
   3985	if (!len)
   3986		return 0;
   3987
   3988	cifs_sb = CIFS_FILE_SB(file);
   3989	cfile = file->private_data;
   3990	tcon = tlink_tcon(cfile->tlink);
   3991
   3992	if (!tcon->ses->server->ops->async_readv)
   3993		return -ENOSYS;
   3994
   3995	if ((file->f_flags & O_ACCMODE) == O_WRONLY)
   3996		cifs_dbg(FYI, "attempting read on write only file instance\n");
   3997
   3998	ctx = cifs_aio_ctx_alloc();
   3999	if (!ctx)
   4000		return -ENOMEM;
   4001
   4002	ctx->cfile = cifsFileInfo_get(cfile);
   4003
   4004	if (!is_sync_kiocb(iocb))
   4005		ctx->iocb = iocb;
   4006
   4007	if (iter_is_iovec(to))
   4008		ctx->should_dirty = true;
   4009
   4010	if (direct) {
   4011		ctx->pos = offset;
   4012		ctx->direct_io = true;
   4013		ctx->iter = *to;
   4014		ctx->len = len;
   4015	} else {
   4016		rc = setup_aio_ctx_iter(ctx, to, READ);
   4017		if (rc) {
   4018			kref_put(&ctx->refcount, cifs_aio_ctx_release);
   4019			return rc;
   4020		}
   4021		len = ctx->len;
   4022	}
   4023
   4024	/* grab a lock here due to read response handlers can access ctx */
   4025	mutex_lock(&ctx->aio_mutex);
   4026
   4027	rc = cifs_send_async_read(offset, len, cfile, cifs_sb, &ctx->list, ctx);
   4028
   4029	/* if at least one read request send succeeded, then reset rc */
   4030	if (!list_empty(&ctx->list))
   4031		rc = 0;
   4032
   4033	mutex_unlock(&ctx->aio_mutex);
   4034
   4035	if (rc) {
   4036		kref_put(&ctx->refcount, cifs_aio_ctx_release);
   4037		return rc;
   4038	}
   4039
   4040	if (!is_sync_kiocb(iocb)) {
   4041		kref_put(&ctx->refcount, cifs_aio_ctx_release);
   4042		return -EIOCBQUEUED;
   4043	}
   4044
   4045	rc = wait_for_completion_killable(&ctx->done);
   4046	if (rc) {
   4047		mutex_lock(&ctx->aio_mutex);
   4048		ctx->rc = rc = -EINTR;
   4049		total_read = ctx->total_len;
   4050		mutex_unlock(&ctx->aio_mutex);
   4051	} else {
   4052		rc = ctx->rc;
   4053		total_read = ctx->total_len;
   4054	}
   4055
   4056	kref_put(&ctx->refcount, cifs_aio_ctx_release);
   4057
   4058	if (total_read) {
   4059		iocb->ki_pos += total_read;
   4060		return total_read;
   4061	}
   4062	return rc;
   4063}
   4064
   4065ssize_t cifs_direct_readv(struct kiocb *iocb, struct iov_iter *to)
   4066{
   4067	return __cifs_readv(iocb, to, true);
   4068}
   4069
   4070ssize_t cifs_user_readv(struct kiocb *iocb, struct iov_iter *to)
   4071{
   4072	return __cifs_readv(iocb, to, false);
   4073}
   4074
   4075ssize_t
   4076cifs_strict_readv(struct kiocb *iocb, struct iov_iter *to)
   4077{
   4078	struct inode *inode = file_inode(iocb->ki_filp);
   4079	struct cifsInodeInfo *cinode = CIFS_I(inode);
   4080	struct cifs_sb_info *cifs_sb = CIFS_SB(inode->i_sb);
   4081	struct cifsFileInfo *cfile = (struct cifsFileInfo *)
   4082						iocb->ki_filp->private_data;
   4083	struct cifs_tcon *tcon = tlink_tcon(cfile->tlink);
   4084	int rc = -EACCES;
   4085
   4086	/*
   4087	 * In strict cache mode we need to read from the server all the time
   4088	 * if we don't have level II oplock because the server can delay mtime
   4089	 * change - so we can't make a decision about inode invalidating.
   4090	 * And we can also fail with pagereading if there are mandatory locks
   4091	 * on pages affected by this read but not on the region from pos to
   4092	 * pos+len-1.
   4093	 */
   4094	if (!CIFS_CACHE_READ(cinode))
   4095		return cifs_user_readv(iocb, to);
   4096
   4097	if (cap_unix(tcon->ses) &&
   4098	    (CIFS_UNIX_FCNTL_CAP & le64_to_cpu(tcon->fsUnixInfo.Capability)) &&
   4099	    ((cifs_sb->mnt_cifs_flags & CIFS_MOUNT_NOPOSIXBRL) == 0))
   4100		return generic_file_read_iter(iocb, to);
   4101
   4102	/*
   4103	 * We need to hold the sem to be sure nobody modifies lock list
   4104	 * with a brlock that prevents reading.
   4105	 */
   4106	down_read(&cinode->lock_sem);
   4107	if (!cifs_find_lock_conflict(cfile, iocb->ki_pos, iov_iter_count(to),
   4108				     tcon->ses->server->vals->shared_lock_type,
   4109				     0, NULL, CIFS_READ_OP))
   4110		rc = generic_file_read_iter(iocb, to);
   4111	up_read(&cinode->lock_sem);
   4112	return rc;
   4113}
   4114
   4115static ssize_t
   4116cifs_read(struct file *file, char *read_data, size_t read_size, loff_t *offset)
   4117{
   4118	int rc = -EACCES;
   4119	unsigned int bytes_read = 0;
   4120	unsigned int total_read;
   4121	unsigned int current_read_size;
   4122	unsigned int rsize;
   4123	struct cifs_sb_info *cifs_sb;
   4124	struct cifs_tcon *tcon;
   4125	struct TCP_Server_Info *server;
   4126	unsigned int xid;
   4127	char *cur_offset;
   4128	struct cifsFileInfo *open_file;
   4129	struct cifs_io_parms io_parms = {0};
   4130	int buf_type = CIFS_NO_BUFFER;
   4131	__u32 pid;
   4132
   4133	xid = get_xid();
   4134	cifs_sb = CIFS_FILE_SB(file);
   4135
   4136	/* FIXME: set up handlers for larger reads and/or convert to async */
   4137	rsize = min_t(unsigned int, cifs_sb->ctx->rsize, CIFSMaxBufSize);
   4138
   4139	if (file->private_data == NULL) {
   4140		rc = -EBADF;
   4141		free_xid(xid);
   4142		return rc;
   4143	}
   4144	open_file = file->private_data;
   4145	tcon = tlink_tcon(open_file->tlink);
   4146	server = cifs_pick_channel(tcon->ses);
   4147
   4148	if (!server->ops->sync_read) {
   4149		free_xid(xid);
   4150		return -ENOSYS;
   4151	}
   4152
   4153	if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_RWPIDFORWARD)
   4154		pid = open_file->pid;
   4155	else
   4156		pid = current->tgid;
   4157
   4158	if ((file->f_flags & O_ACCMODE) == O_WRONLY)
   4159		cifs_dbg(FYI, "attempting read on write only file instance\n");
   4160
   4161	for (total_read = 0, cur_offset = read_data; read_size > total_read;
   4162	     total_read += bytes_read, cur_offset += bytes_read) {
   4163		do {
   4164			current_read_size = min_t(uint, read_size - total_read,
   4165						  rsize);
   4166			/*
   4167			 * For windows me and 9x we do not want to request more
   4168			 * than it negotiated since it will refuse the read
   4169			 * then.
   4170			 */
   4171			if (!(tcon->ses->capabilities &
   4172				tcon->ses->server->vals->cap_large_files)) {
   4173				current_read_size = min_t(uint,
   4174					current_read_size, CIFSMaxBufSize);
   4175			}
   4176			if (open_file->invalidHandle) {
   4177				rc = cifs_reopen_file(open_file, true);
   4178				if (rc != 0)
   4179					break;
   4180			}
   4181			io_parms.pid = pid;
   4182			io_parms.tcon = tcon;
   4183			io_parms.offset = *offset;
   4184			io_parms.length = current_read_size;
   4185			io_parms.server = server;
   4186			rc = server->ops->sync_read(xid, &open_file->fid, &io_parms,
   4187						    &bytes_read, &cur_offset,
   4188						    &buf_type);
   4189		} while (rc == -EAGAIN);
   4190
   4191		if (rc || (bytes_read == 0)) {
   4192			if (total_read) {
   4193				break;
   4194			} else {
   4195				free_xid(xid);
   4196				return rc;
   4197			}
   4198		} else {
   4199			cifs_stats_bytes_read(tcon, total_read);
   4200			*offset += bytes_read;
   4201		}
   4202	}
   4203	free_xid(xid);
   4204	return total_read;
   4205}
   4206
   4207/*
   4208 * If the page is mmap'ed into a process' page tables, then we need to make
   4209 * sure that it doesn't change while being written back.
   4210 */
   4211static vm_fault_t
   4212cifs_page_mkwrite(struct vm_fault *vmf)
   4213{
   4214	struct page *page = vmf->page;
   4215
   4216	/* Wait for the page to be written to the cache before we allow it to
   4217	 * be modified.  We then assume the entire page will need writing back.
   4218	 */
   4219#ifdef CONFIG_CIFS_FSCACHE
   4220	if (PageFsCache(page) &&
   4221	    wait_on_page_fscache_killable(page) < 0)
   4222		return VM_FAULT_RETRY;
   4223#endif
   4224
   4225	wait_on_page_writeback(page);
   4226
   4227	if (lock_page_killable(page) < 0)
   4228		return VM_FAULT_RETRY;
   4229	return VM_FAULT_LOCKED;
   4230}
   4231
   4232static const struct vm_operations_struct cifs_file_vm_ops = {
   4233	.fault = filemap_fault,
   4234	.map_pages = filemap_map_pages,
   4235	.page_mkwrite = cifs_page_mkwrite,
   4236};
   4237
   4238int cifs_file_strict_mmap(struct file *file, struct vm_area_struct *vma)
   4239{
   4240	int xid, rc = 0;
   4241	struct inode *inode = file_inode(file);
   4242
   4243	xid = get_xid();
   4244
   4245	if (!CIFS_CACHE_READ(CIFS_I(inode)))
   4246		rc = cifs_zap_mapping(inode);
   4247	if (!rc)
   4248		rc = generic_file_mmap(file, vma);
   4249	if (!rc)
   4250		vma->vm_ops = &cifs_file_vm_ops;
   4251
   4252	free_xid(xid);
   4253	return rc;
   4254}
   4255
   4256int cifs_file_mmap(struct file *file, struct vm_area_struct *vma)
   4257{
   4258	int rc, xid;
   4259
   4260	xid = get_xid();
   4261
   4262	rc = cifs_revalidate_file(file);
   4263	if (rc)
   4264		cifs_dbg(FYI, "Validation prior to mmap failed, error=%d\n",
   4265			 rc);
   4266	if (!rc)
   4267		rc = generic_file_mmap(file, vma);
   4268	if (!rc)
   4269		vma->vm_ops = &cifs_file_vm_ops;
   4270
   4271	free_xid(xid);
   4272	return rc;
   4273}
   4274
   4275static void
   4276cifs_readv_complete(struct work_struct *work)
   4277{
   4278	unsigned int i, got_bytes;
   4279	struct cifs_readdata *rdata = container_of(work,
   4280						struct cifs_readdata, work);
   4281
   4282	got_bytes = rdata->got_bytes;
   4283	for (i = 0; i < rdata->nr_pages; i++) {
   4284		struct page *page = rdata->pages[i];
   4285
   4286		if (rdata->result == 0 ||
   4287		    (rdata->result == -EAGAIN && got_bytes)) {
   4288			flush_dcache_page(page);
   4289			SetPageUptodate(page);
   4290		} else
   4291			SetPageError(page);
   4292
   4293		if (rdata->result == 0 ||
   4294		    (rdata->result == -EAGAIN && got_bytes))
   4295			cifs_readpage_to_fscache(rdata->mapping->host, page);
   4296
   4297		unlock_page(page);
   4298
   4299		got_bytes -= min_t(unsigned int, PAGE_SIZE, got_bytes);
   4300
   4301		put_page(page);
   4302		rdata->pages[i] = NULL;
   4303	}
   4304	kref_put(&rdata->refcount, cifs_readdata_release);
   4305}
   4306
   4307static int
   4308readpages_fill_pages(struct TCP_Server_Info *server,
   4309		     struct cifs_readdata *rdata, struct iov_iter *iter,
   4310		     unsigned int len)
   4311{
   4312	int result = 0;
   4313	unsigned int i;
   4314	u64 eof;
   4315	pgoff_t eof_index;
   4316	unsigned int nr_pages = rdata->nr_pages;
   4317	unsigned int page_offset = rdata->page_offset;
   4318
   4319	/* determine the eof that the server (probably) has */
   4320	eof = CIFS_I(rdata->mapping->host)->server_eof;
   4321	eof_index = eof ? (eof - 1) >> PAGE_SHIFT : 0;
   4322	cifs_dbg(FYI, "eof=%llu eof_index=%lu\n", eof, eof_index);
   4323
   4324	rdata->got_bytes = 0;
   4325	rdata->tailsz = PAGE_SIZE;
   4326	for (i = 0; i < nr_pages; i++) {
   4327		struct page *page = rdata->pages[i];
   4328		unsigned int to_read = rdata->pagesz;
   4329		size_t n;
   4330
   4331		if (i == 0)
   4332			to_read -= page_offset;
   4333		else
   4334			page_offset = 0;
   4335
   4336		n = to_read;
   4337
   4338		if (len >= to_read) {
   4339			len -= to_read;
   4340		} else if (len > 0) {
   4341			/* enough for partial page, fill and zero the rest */
   4342			zero_user(page, len + page_offset, to_read - len);
   4343			n = rdata->tailsz = len;
   4344			len = 0;
   4345		} else if (page->index > eof_index) {
   4346			/*
   4347			 * The VFS will not try to do readahead past the
   4348			 * i_size, but it's possible that we have outstanding
   4349			 * writes with gaps in the middle and the i_size hasn't
   4350			 * caught up yet. Populate those with zeroed out pages
   4351			 * to prevent the VFS from repeatedly attempting to
   4352			 * fill them until the writes are flushed.
   4353			 */
   4354			zero_user(page, 0, PAGE_SIZE);
   4355			flush_dcache_page(page);
   4356			SetPageUptodate(page);
   4357			unlock_page(page);
   4358			put_page(page);
   4359			rdata->pages[i] = NULL;
   4360			rdata->nr_pages--;
   4361			continue;
   4362		} else {
   4363			/* no need to hold page hostage */
   4364			unlock_page(page);
   4365			put_page(page);
   4366			rdata->pages[i] = NULL;
   4367			rdata->nr_pages--;
   4368			continue;
   4369		}
   4370
   4371		if (iter)
   4372			result = copy_page_from_iter(
   4373					page, page_offset, n, iter);
   4374#ifdef CONFIG_CIFS_SMB_DIRECT
   4375		else if (rdata->mr)
   4376			result = n;
   4377#endif
   4378		else
   4379			result = cifs_read_page_from_socket(
   4380					server, page, page_offset, n);
   4381		if (result < 0)
   4382			break;
   4383
   4384		rdata->got_bytes += result;
   4385	}
   4386
   4387	return rdata->got_bytes > 0 && result != -ECONNABORTED ?
   4388						rdata->got_bytes : result;
   4389}
   4390
   4391static int
   4392cifs_readpages_read_into_pages(struct TCP_Server_Info *server,
   4393			       struct cifs_readdata *rdata, unsigned int len)
   4394{
   4395	return readpages_fill_pages(server, rdata, NULL, len);
   4396}
   4397
   4398static int
   4399cifs_readpages_copy_into_pages(struct TCP_Server_Info *server,
   4400			       struct cifs_readdata *rdata,
   4401			       struct iov_iter *iter)
   4402{
   4403	return readpages_fill_pages(server, rdata, iter, iter->count);
   4404}
   4405
   4406static void cifs_readahead(struct readahead_control *ractl)
   4407{
   4408	int rc;
   4409	struct cifsFileInfo *open_file = ractl->file->private_data;
   4410	struct cifs_sb_info *cifs_sb = CIFS_FILE_SB(ractl->file);
   4411	struct TCP_Server_Info *server;
   4412	pid_t pid;
   4413	unsigned int xid, nr_pages, last_batch_size = 0, cache_nr_pages = 0;
   4414	pgoff_t next_cached = ULONG_MAX;
   4415	bool caching = fscache_cookie_enabled(cifs_inode_cookie(ractl->mapping->host)) &&
   4416		cifs_inode_cookie(ractl->mapping->host)->cache_priv;
   4417	bool check_cache = caching;
   4418
   4419	xid = get_xid();
   4420
   4421	if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_RWPIDFORWARD)
   4422		pid = open_file->pid;
   4423	else
   4424		pid = current->tgid;
   4425
   4426	rc = 0;
   4427	server = cifs_pick_channel(tlink_tcon(open_file->tlink)->ses);
   4428
   4429	cifs_dbg(FYI, "%s: file=%p mapping=%p num_pages=%u\n",
   4430		 __func__, ractl->file, ractl->mapping, readahead_count(ractl));
   4431
   4432	/*
   4433	 * Chop the readahead request up into rsize-sized read requests.
   4434	 */
   4435	while ((nr_pages = readahead_count(ractl) - last_batch_size)) {
   4436		unsigned int i, got, rsize;
   4437		struct page *page;
   4438		struct cifs_readdata *rdata;
   4439		struct cifs_credits credits_on_stack;
   4440		struct cifs_credits *credits = &credits_on_stack;
   4441		pgoff_t index = readahead_index(ractl) + last_batch_size;
   4442
   4443		/*
   4444		 * Find out if we have anything cached in the range of
   4445		 * interest, and if so, where the next chunk of cached data is.
   4446		 */
   4447		if (caching) {
   4448			if (check_cache) {
   4449				rc = cifs_fscache_query_occupancy(
   4450					ractl->mapping->host, index, nr_pages,
   4451					&next_cached, &cache_nr_pages);
   4452				if (rc < 0)
   4453					caching = false;
   4454				check_cache = false;
   4455			}
   4456
   4457			if (index == next_cached) {
   4458				/*
   4459				 * TODO: Send a whole batch of pages to be read
   4460				 * by the cache.
   4461				 */
   4462				page = readahead_page(ractl);
   4463				last_batch_size = 1 << thp_order(page);
   4464				if (cifs_readpage_from_fscache(ractl->mapping->host,
   4465							       page) < 0) {
   4466					/*
   4467					 * TODO: Deal with cache read failure
   4468					 * here, but for the moment, delegate
   4469					 * that to readpage.
   4470					 */
   4471					caching = false;
   4472				}
   4473				unlock_page(page);
   4474				next_cached++;
   4475				cache_nr_pages--;
   4476				if (cache_nr_pages == 0)
   4477					check_cache = true;
   4478				continue;
   4479			}
   4480		}
   4481
   4482		if (open_file->invalidHandle) {
   4483			rc = cifs_reopen_file(open_file, true);
   4484			if (rc) {
   4485				if (rc == -EAGAIN)
   4486					continue;
   4487				break;
   4488			}
   4489		}
   4490
   4491		if (cifs_sb->ctx->rsize == 0)
   4492			cifs_sb->ctx->rsize =
   4493				server->ops->negotiate_rsize(tlink_tcon(open_file->tlink),
   4494							     cifs_sb->ctx);
   4495
   4496		rc = server->ops->wait_mtu_credits(server, cifs_sb->ctx->rsize,
   4497						   &rsize, credits);
   4498		if (rc)
   4499			break;
   4500		nr_pages = min_t(size_t, rsize / PAGE_SIZE, readahead_count(ractl));
   4501		nr_pages = min_t(size_t, nr_pages, next_cached - index);
   4502
   4503		/*
   4504		 * Give up immediately if rsize is too small to read an entire
   4505		 * page. The VFS will fall back to readpage. We should never
   4506		 * reach this point however since we set ra_pages to 0 when the
   4507		 * rsize is smaller than a cache page.
   4508		 */
   4509		if (unlikely(!nr_pages)) {
   4510			add_credits_and_wake_if(server, credits, 0);
   4511			break;
   4512		}
   4513
   4514		rdata = cifs_readdata_alloc(nr_pages, cifs_readv_complete);
   4515		if (!rdata) {
   4516			/* best to give up if we're out of mem */
   4517			add_credits_and_wake_if(server, credits, 0);
   4518			break;
   4519		}
   4520
   4521		got = __readahead_batch(ractl, rdata->pages, nr_pages);
   4522		if (got != nr_pages) {
   4523			pr_warn("__readahead_batch() returned %u/%u\n",
   4524				got, nr_pages);
   4525			nr_pages = got;
   4526		}
   4527
   4528		rdata->nr_pages = nr_pages;
   4529		rdata->bytes	= readahead_batch_length(ractl);
   4530		rdata->cfile	= cifsFileInfo_get(open_file);
   4531		rdata->server	= server;
   4532		rdata->mapping	= ractl->mapping;
   4533		rdata->offset	= readahead_pos(ractl);
   4534		rdata->pid	= pid;
   4535		rdata->pagesz	= PAGE_SIZE;
   4536		rdata->tailsz	= PAGE_SIZE;
   4537		rdata->read_into_pages = cifs_readpages_read_into_pages;
   4538		rdata->copy_into_pages = cifs_readpages_copy_into_pages;
   4539		rdata->credits	= credits_on_stack;
   4540
   4541		rc = adjust_credits(server, &rdata->credits, rdata->bytes);
   4542		if (!rc) {
   4543			if (rdata->cfile->invalidHandle)
   4544				rc = -EAGAIN;
   4545			else
   4546				rc = server->ops->async_readv(rdata);
   4547		}
   4548
   4549		if (rc) {
   4550			add_credits_and_wake_if(server, &rdata->credits, 0);
   4551			for (i = 0; i < rdata->nr_pages; i++) {
   4552				page = rdata->pages[i];
   4553				unlock_page(page);
   4554				put_page(page);
   4555			}
   4556			/* Fallback to the readpage in error/reconnect cases */
   4557			kref_put(&rdata->refcount, cifs_readdata_release);
   4558			break;
   4559		}
   4560
   4561		kref_put(&rdata->refcount, cifs_readdata_release);
   4562		last_batch_size = nr_pages;
   4563	}
   4564
   4565	free_xid(xid);
   4566}
   4567
   4568/*
   4569 * cifs_readpage_worker must be called with the page pinned
   4570 */
   4571static int cifs_readpage_worker(struct file *file, struct page *page,
   4572	loff_t *poffset)
   4573{
   4574	char *read_data;
   4575	int rc;
   4576
   4577	/* Is the page cached? */
   4578	rc = cifs_readpage_from_fscache(file_inode(file), page);
   4579	if (rc == 0)
   4580		goto read_complete;
   4581
   4582	read_data = kmap(page);
   4583	/* for reads over a certain size could initiate async read ahead */
   4584
   4585	rc = cifs_read(file, read_data, PAGE_SIZE, poffset);
   4586
   4587	if (rc < 0)
   4588		goto io_error;
   4589	else
   4590		cifs_dbg(FYI, "Bytes read %d\n", rc);
   4591
   4592	/* we do not want atime to be less than mtime, it broke some apps */
   4593	file_inode(file)->i_atime = current_time(file_inode(file));
   4594	if (timespec64_compare(&(file_inode(file)->i_atime), &(file_inode(file)->i_mtime)))
   4595		file_inode(file)->i_atime = file_inode(file)->i_mtime;
   4596	else
   4597		file_inode(file)->i_atime = current_time(file_inode(file));
   4598
   4599	if (PAGE_SIZE > rc)
   4600		memset(read_data + rc, 0, PAGE_SIZE - rc);
   4601
   4602	flush_dcache_page(page);
   4603	SetPageUptodate(page);
   4604
   4605	/* send this page to the cache */
   4606	cifs_readpage_to_fscache(file_inode(file), page);
   4607
   4608	rc = 0;
   4609
   4610io_error:
   4611	kunmap(page);
   4612	unlock_page(page);
   4613
   4614read_complete:
   4615	return rc;
   4616}
   4617
   4618static int cifs_read_folio(struct file *file, struct folio *folio)
   4619{
   4620	struct page *page = &folio->page;
   4621	loff_t offset = page_file_offset(page);
   4622	int rc = -EACCES;
   4623	unsigned int xid;
   4624
   4625	xid = get_xid();
   4626
   4627	if (file->private_data == NULL) {
   4628		rc = -EBADF;
   4629		free_xid(xid);
   4630		return rc;
   4631	}
   4632
   4633	cifs_dbg(FYI, "read_folio %p at offset %d 0x%x\n",
   4634		 page, (int)offset, (int)offset);
   4635
   4636	rc = cifs_readpage_worker(file, page, &offset);
   4637
   4638	free_xid(xid);
   4639	return rc;
   4640}
   4641
   4642static int is_inode_writable(struct cifsInodeInfo *cifs_inode)
   4643{
   4644	struct cifsFileInfo *open_file;
   4645
   4646	spin_lock(&cifs_inode->open_file_lock);
   4647	list_for_each_entry(open_file, &cifs_inode->openFileList, flist) {
   4648		if (OPEN_FMODE(open_file->f_flags) & FMODE_WRITE) {
   4649			spin_unlock(&cifs_inode->open_file_lock);
   4650			return 1;
   4651		}
   4652	}
   4653	spin_unlock(&cifs_inode->open_file_lock);
   4654	return 0;
   4655}
   4656
   4657/* We do not want to update the file size from server for inodes
   4658   open for write - to avoid races with writepage extending
   4659   the file - in the future we could consider allowing
   4660   refreshing the inode only on increases in the file size
   4661   but this is tricky to do without racing with writebehind
   4662   page caching in the current Linux kernel design */
   4663bool is_size_safe_to_change(struct cifsInodeInfo *cifsInode, __u64 end_of_file)
   4664{
   4665	if (!cifsInode)
   4666		return true;
   4667
   4668	if (is_inode_writable(cifsInode)) {
   4669		/* This inode is open for write at least once */
   4670		struct cifs_sb_info *cifs_sb;
   4671
   4672		cifs_sb = CIFS_SB(cifsInode->netfs.inode.i_sb);
   4673		if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_DIRECT_IO) {
   4674			/* since no page cache to corrupt on directio
   4675			we can change size safely */
   4676			return true;
   4677		}
   4678
   4679		if (i_size_read(&cifsInode->netfs.inode) < end_of_file)
   4680			return true;
   4681
   4682		return false;
   4683	} else
   4684		return true;
   4685}
   4686
   4687static int cifs_write_begin(struct file *file, struct address_space *mapping,
   4688			loff_t pos, unsigned len,
   4689			struct page **pagep, void **fsdata)
   4690{
   4691	int oncethru = 0;
   4692	pgoff_t index = pos >> PAGE_SHIFT;
   4693	loff_t offset = pos & (PAGE_SIZE - 1);
   4694	loff_t page_start = pos & PAGE_MASK;
   4695	loff_t i_size;
   4696	struct page *page;
   4697	int rc = 0;
   4698
   4699	cifs_dbg(FYI, "write_begin from %lld len %d\n", (long long)pos, len);
   4700
   4701start:
   4702	page = grab_cache_page_write_begin(mapping, index);
   4703	if (!page) {
   4704		rc = -ENOMEM;
   4705		goto out;
   4706	}
   4707
   4708	if (PageUptodate(page))
   4709		goto out;
   4710
   4711	/*
   4712	 * If we write a full page it will be up to date, no need to read from
   4713	 * the server. If the write is short, we'll end up doing a sync write
   4714	 * instead.
   4715	 */
   4716	if (len == PAGE_SIZE)
   4717		goto out;
   4718
   4719	/*
   4720	 * optimize away the read when we have an oplock, and we're not
   4721	 * expecting to use any of the data we'd be reading in. That
   4722	 * is, when the page lies beyond the EOF, or straddles the EOF
   4723	 * and the write will cover all of the existing data.
   4724	 */
   4725	if (CIFS_CACHE_READ(CIFS_I(mapping->host))) {
   4726		i_size = i_size_read(mapping->host);
   4727		if (page_start >= i_size ||
   4728		    (offset == 0 && (pos + len) >= i_size)) {
   4729			zero_user_segments(page, 0, offset,
   4730					   offset + len,
   4731					   PAGE_SIZE);
   4732			/*
   4733			 * PageChecked means that the parts of the page
   4734			 * to which we're not writing are considered up
   4735			 * to date. Once the data is copied to the
   4736			 * page, it can be set uptodate.
   4737			 */
   4738			SetPageChecked(page);
   4739			goto out;
   4740		}
   4741	}
   4742
   4743	if ((file->f_flags & O_ACCMODE) != O_WRONLY && !oncethru) {
   4744		/*
   4745		 * might as well read a page, it is fast enough. If we get
   4746		 * an error, we don't need to return it. cifs_write_end will
   4747		 * do a sync write instead since PG_uptodate isn't set.
   4748		 */
   4749		cifs_readpage_worker(file, page, &page_start);
   4750		put_page(page);
   4751		oncethru = 1;
   4752		goto start;
   4753	} else {
   4754		/* we could try using another file handle if there is one -
   4755		   but how would we lock it to prevent close of that handle
   4756		   racing with this read? In any case
   4757		   this will be written out by write_end so is fine */
   4758	}
   4759out:
   4760	*pagep = page;
   4761	return rc;
   4762}
   4763
   4764static bool cifs_release_folio(struct folio *folio, gfp_t gfp)
   4765{
   4766	if (folio_test_private(folio))
   4767		return 0;
   4768	if (folio_test_fscache(folio)) {
   4769		if (current_is_kswapd() || !(gfp & __GFP_FS))
   4770			return false;
   4771		folio_wait_fscache(folio);
   4772	}
   4773	fscache_note_page_release(cifs_inode_cookie(folio->mapping->host));
   4774	return true;
   4775}
   4776
   4777static void cifs_invalidate_folio(struct folio *folio, size_t offset,
   4778				 size_t length)
   4779{
   4780	folio_wait_fscache(folio);
   4781}
   4782
   4783static int cifs_launder_folio(struct folio *folio)
   4784{
   4785	int rc = 0;
   4786	loff_t range_start = folio_pos(folio);
   4787	loff_t range_end = range_start + folio_size(folio);
   4788	struct writeback_control wbc = {
   4789		.sync_mode = WB_SYNC_ALL,
   4790		.nr_to_write = 0,
   4791		.range_start = range_start,
   4792		.range_end = range_end,
   4793	};
   4794
   4795	cifs_dbg(FYI, "Launder page: %lu\n", folio->index);
   4796
   4797	if (folio_clear_dirty_for_io(folio))
   4798		rc = cifs_writepage_locked(&folio->page, &wbc);
   4799
   4800	folio_wait_fscache(folio);
   4801	return rc;
   4802}
   4803
   4804void cifs_oplock_break(struct work_struct *work)
   4805{
   4806	struct cifsFileInfo *cfile = container_of(work, struct cifsFileInfo,
   4807						  oplock_break);
   4808	struct inode *inode = d_inode(cfile->dentry);
   4809	struct cifsInodeInfo *cinode = CIFS_I(inode);
   4810	struct cifs_tcon *tcon = tlink_tcon(cfile->tlink);
   4811	struct TCP_Server_Info *server = tcon->ses->server;
   4812	int rc = 0;
   4813	bool purge_cache = false;
   4814	bool is_deferred = false;
   4815	struct cifs_deferred_close *dclose;
   4816
   4817	wait_on_bit(&cinode->flags, CIFS_INODE_PENDING_WRITERS,
   4818			TASK_UNINTERRUPTIBLE);
   4819
   4820	server->ops->downgrade_oplock(server, cinode, cfile->oplock_level,
   4821				      cfile->oplock_epoch, &purge_cache);
   4822
   4823	if (!CIFS_CACHE_WRITE(cinode) && CIFS_CACHE_READ(cinode) &&
   4824						cifs_has_mand_locks(cinode)) {
   4825		cifs_dbg(FYI, "Reset oplock to None for inode=%p due to mand locks\n",
   4826			 inode);
   4827		cinode->oplock = 0;
   4828	}
   4829
   4830	if (inode && S_ISREG(inode->i_mode)) {
   4831		if (CIFS_CACHE_READ(cinode))
   4832			break_lease(inode, O_RDONLY);
   4833		else
   4834			break_lease(inode, O_WRONLY);
   4835		rc = filemap_fdatawrite(inode->i_mapping);
   4836		if (!CIFS_CACHE_READ(cinode) || purge_cache) {
   4837			rc = filemap_fdatawait(inode->i_mapping);
   4838			mapping_set_error(inode->i_mapping, rc);
   4839			cifs_zap_mapping(inode);
   4840		}
   4841		cifs_dbg(FYI, "Oplock flush inode %p rc %d\n", inode, rc);
   4842		if (CIFS_CACHE_WRITE(cinode))
   4843			goto oplock_break_ack;
   4844	}
   4845
   4846	rc = cifs_push_locks(cfile);
   4847	if (rc)
   4848		cifs_dbg(VFS, "Push locks rc = %d\n", rc);
   4849
   4850oplock_break_ack:
   4851	/*
   4852	 * When oplock break is received and there are no active
   4853	 * file handles but cached, then schedule deferred close immediately.
   4854	 * So, new open will not use cached handle.
   4855	 */
   4856	spin_lock(&CIFS_I(inode)->deferred_lock);
   4857	is_deferred = cifs_is_deferred_close(cfile, &dclose);
   4858	spin_unlock(&CIFS_I(inode)->deferred_lock);
   4859	if (is_deferred &&
   4860	    cfile->deferred_close_scheduled &&
   4861	    delayed_work_pending(&cfile->deferred)) {
   4862		if (cancel_delayed_work(&cfile->deferred)) {
   4863			_cifsFileInfo_put(cfile, false, false);
   4864			goto oplock_break_done;
   4865		}
   4866	}
   4867	/*
   4868	 * releasing stale oplock after recent reconnect of smb session using
   4869	 * a now incorrect file handle is not a data integrity issue but do
   4870	 * not bother sending an oplock release if session to server still is
   4871	 * disconnected since oplock already released by the server
   4872	 */
   4873	if (!cfile->oplock_break_cancelled) {
   4874		rc = tcon->ses->server->ops->oplock_response(tcon, &cfile->fid,
   4875							     cinode);
   4876		cifs_dbg(FYI, "Oplock release rc = %d\n", rc);
   4877	}
   4878oplock_break_done:
   4879	_cifsFileInfo_put(cfile, false /* do not wait for ourself */, false);
   4880	cifs_done_oplock_break(cinode);
   4881}
   4882
   4883/*
   4884 * The presence of cifs_direct_io() in the address space ops vector
   4885 * allowes open() O_DIRECT flags which would have failed otherwise.
   4886 *
   4887 * In the non-cached mode (mount with cache=none), we shunt off direct read and write requests
   4888 * so this method should never be called.
   4889 *
   4890 * Direct IO is not yet supported in the cached mode.
   4891 */
   4892static ssize_t
   4893cifs_direct_io(struct kiocb *iocb, struct iov_iter *iter)
   4894{
   4895        /*
   4896         * FIXME
   4897         * Eventually need to support direct IO for non forcedirectio mounts
   4898         */
   4899        return -EINVAL;
   4900}
   4901
   4902static int cifs_swap_activate(struct swap_info_struct *sis,
   4903			      struct file *swap_file, sector_t *span)
   4904{
   4905	struct cifsFileInfo *cfile = swap_file->private_data;
   4906	struct inode *inode = swap_file->f_mapping->host;
   4907	unsigned long blocks;
   4908	long long isize;
   4909
   4910	cifs_dbg(FYI, "swap activate\n");
   4911
   4912	if (!swap_file->f_mapping->a_ops->swap_rw)
   4913		/* Cannot support swap */
   4914		return -EINVAL;
   4915
   4916	spin_lock(&inode->i_lock);
   4917	blocks = inode->i_blocks;
   4918	isize = inode->i_size;
   4919	spin_unlock(&inode->i_lock);
   4920	if (blocks*512 < isize) {
   4921		pr_warn("swap activate: swapfile has holes\n");
   4922		return -EINVAL;
   4923	}
   4924	*span = sis->pages;
   4925
   4926	pr_warn_once("Swap support over SMB3 is experimental\n");
   4927
   4928	/*
   4929	 * TODO: consider adding ACL (or documenting how) to prevent other
   4930	 * users (on this or other systems) from reading it
   4931	 */
   4932
   4933
   4934	/* TODO: add sk_set_memalloc(inet) or similar */
   4935
   4936	if (cfile)
   4937		cfile->swapfile = true;
   4938	/*
   4939	 * TODO: Since file already open, we can't open with DENY_ALL here
   4940	 * but we could add call to grab a byte range lock to prevent others
   4941	 * from reading or writing the file
   4942	 */
   4943
   4944	sis->flags |= SWP_FS_OPS;
   4945	return add_swap_extent(sis, 0, sis->max, 0);
   4946}
   4947
   4948static void cifs_swap_deactivate(struct file *file)
   4949{
   4950	struct cifsFileInfo *cfile = file->private_data;
   4951
   4952	cifs_dbg(FYI, "swap deactivate\n");
   4953
   4954	/* TODO: undo sk_set_memalloc(inet) will eventually be needed */
   4955
   4956	if (cfile)
   4957		cfile->swapfile = false;
   4958
   4959	/* do we need to unpin (or unlock) the file */
   4960}
   4961
   4962/*
   4963 * Mark a page as having been made dirty and thus needing writeback.  We also
   4964 * need to pin the cache object to write back to.
   4965 */
   4966#ifdef CONFIG_CIFS_FSCACHE
   4967static bool cifs_dirty_folio(struct address_space *mapping, struct folio *folio)
   4968{
   4969	return fscache_dirty_folio(mapping, folio,
   4970					cifs_inode_cookie(mapping->host));
   4971}
   4972#else
   4973#define cifs_dirty_folio filemap_dirty_folio
   4974#endif
   4975
   4976const struct address_space_operations cifs_addr_ops = {
   4977	.read_folio = cifs_read_folio,
   4978	.readahead = cifs_readahead,
   4979	.writepage = cifs_writepage,
   4980	.writepages = cifs_writepages,
   4981	.write_begin = cifs_write_begin,
   4982	.write_end = cifs_write_end,
   4983	.dirty_folio = cifs_dirty_folio,
   4984	.release_folio = cifs_release_folio,
   4985	.direct_IO = cifs_direct_io,
   4986	.invalidate_folio = cifs_invalidate_folio,
   4987	.launder_folio = cifs_launder_folio,
   4988	/*
   4989	 * TODO: investigate and if useful we could add an cifs_migratePage
   4990	 * helper (under an CONFIG_MIGRATION) in the future, and also
   4991	 * investigate and add an is_dirty_writeback helper if needed
   4992	 */
   4993	.swap_activate = cifs_swap_activate,
   4994	.swap_deactivate = cifs_swap_deactivate,
   4995};
   4996
   4997/*
   4998 * cifs_readahead requires the server to support a buffer large enough to
   4999 * contain the header plus one complete page of data.  Otherwise, we need
   5000 * to leave cifs_readahead out of the address space operations.
   5001 */
   5002const struct address_space_operations cifs_addr_ops_smallbuf = {
   5003	.read_folio = cifs_read_folio,
   5004	.writepage = cifs_writepage,
   5005	.writepages = cifs_writepages,
   5006	.write_begin = cifs_write_begin,
   5007	.write_end = cifs_write_end,
   5008	.dirty_folio = cifs_dirty_folio,
   5009	.release_folio = cifs_release_folio,
   5010	.invalidate_folio = cifs_invalidate_folio,
   5011	.launder_folio = cifs_launder_folio,
   5012};