locks.c - cachepc-linux - Fork of AMDESE/linux with modifications for CachePC side-channel attack

	cachepc-linux Fork of AMDESE/linux with modifications for CachePC side-channel attack
	git clone https://git.sinitax.com/sinitax/cachepc-linux
	Log \| Files \| Refs \| README \| LICENSE \| sfeed.txt
locks.c (13123B)
      1// SPDX-License-Identifier: GPL-2.0
      2#include <linux/ceph/ceph_debug.h>
      3
      4#include <linux/file.h>
      5#include <linux/namei.h>
      6#include <linux/random.h>
      7
      8#include "super.h"
      9#include "mds_client.h"
     10#include <linux/ceph/pagelist.h>
     11
     12static u64 lock_secret;
     13static int ceph_lock_wait_for_completion(struct ceph_mds_client *mdsc,
     14                                         struct ceph_mds_request *req);
     15
     16static inline u64 secure_addr(void *addr)
     17{
     18	u64 v = lock_secret ^ (u64)(unsigned long)addr;
     19	/*
     20	 * Set the most significant bit, so that MDS knows the 'owner'
     21	 * is sufficient to identify the owner of lock. (old code uses
     22	 * both 'owner' and 'pid')
     23	 */
     24	v |= (1ULL << 63);
     25	return v;
     26}
     27
     28void __init ceph_flock_init(void)
     29{
     30	get_random_bytes(&lock_secret, sizeof(lock_secret));
     31}
     32
     33static void ceph_fl_copy_lock(struct file_lock *dst, struct file_lock *src)
     34{
     35	struct ceph_file_info *fi = dst->fl_file->private_data;
     36	struct inode *inode = file_inode(dst->fl_file);
     37	atomic_inc(&ceph_inode(inode)->i_filelock_ref);
     38	atomic_inc(&fi->num_locks);
     39}
     40
     41static void ceph_fl_release_lock(struct file_lock *fl)
     42{
     43	struct ceph_file_info *fi = fl->fl_file->private_data;
     44	struct inode *inode = file_inode(fl->fl_file);
     45	struct ceph_inode_info *ci = ceph_inode(inode);
     46	atomic_dec(&fi->num_locks);
     47	if (atomic_dec_and_test(&ci->i_filelock_ref)) {
     48		/* clear error when all locks are released */
     49		spin_lock(&ci->i_ceph_lock);
     50		ci->i_ceph_flags &= ~CEPH_I_ERROR_FILELOCK;
     51		spin_unlock(&ci->i_ceph_lock);
     52	}
     53}
     54
     55static const struct file_lock_operations ceph_fl_lock_ops = {
     56	.fl_copy_lock = ceph_fl_copy_lock,
     57	.fl_release_private = ceph_fl_release_lock,
     58};
     59
     60/*
     61 * Implement fcntl and flock locking functions.
     62 */
     63static int ceph_lock_message(u8 lock_type, u16 operation, struct inode *inode,
     64			     int cmd, u8 wait, struct file_lock *fl)
     65{
     66	struct ceph_mds_client *mdsc = ceph_sb_to_mdsc(inode->i_sb);
     67	struct ceph_mds_request *req;
     68	int err;
     69	u64 length = 0;
     70	u64 owner;
     71
     72	if (operation == CEPH_MDS_OP_SETFILELOCK) {
     73		/*
     74		 * increasing i_filelock_ref closes race window between
     75		 * handling request reply and adding file_lock struct to
     76		 * inode. Otherwise, auth caps may get trimmed in the
     77		 * window. Caller function will decrease the counter.
     78		 */
     79		fl->fl_ops = &ceph_fl_lock_ops;
     80		fl->fl_ops->fl_copy_lock(fl, NULL);
     81	}
     82
     83	if (operation != CEPH_MDS_OP_SETFILELOCK || cmd == CEPH_LOCK_UNLOCK)
     84		wait = 0;
     85
     86	req = ceph_mdsc_create_request(mdsc, operation, USE_AUTH_MDS);
     87	if (IS_ERR(req))
     88		return PTR_ERR(req);
     89	req->r_inode = inode;
     90	ihold(inode);
     91	req->r_num_caps = 1;
     92
     93	/* mds requires start and length rather than start and end */
     94	if (LLONG_MAX == fl->fl_end)
     95		length = 0;
     96	else
     97		length = fl->fl_end - fl->fl_start + 1;
     98
     99	owner = secure_addr(fl->fl_owner);
    100
    101	dout("ceph_lock_message: rule: %d, op: %d, owner: %llx, pid: %llu, "
    102	     "start: %llu, length: %llu, wait: %d, type: %d\n", (int)lock_type,
    103	     (int)operation, owner, (u64)fl->fl_pid, fl->fl_start, length,
    104	     wait, fl->fl_type);
    105
    106	req->r_args.filelock_change.rule = lock_type;
    107	req->r_args.filelock_change.type = cmd;
    108	req->r_args.filelock_change.owner = cpu_to_le64(owner);
    109	req->r_args.filelock_change.pid = cpu_to_le64((u64)fl->fl_pid);
    110	req->r_args.filelock_change.start = cpu_to_le64(fl->fl_start);
    111	req->r_args.filelock_change.length = cpu_to_le64(length);
    112	req->r_args.filelock_change.wait = wait;
    113
    114	err = ceph_mdsc_submit_request(mdsc, inode, req);
    115	if (!err)
    116		err = ceph_mdsc_wait_request(mdsc, req, wait ?
    117					ceph_lock_wait_for_completion : NULL);
    118	if (!err && operation == CEPH_MDS_OP_GETFILELOCK) {
    119		fl->fl_pid = -le64_to_cpu(req->r_reply_info.filelock_reply->pid);
    120		if (CEPH_LOCK_SHARED == req->r_reply_info.filelock_reply->type)
    121			fl->fl_type = F_RDLCK;
    122		else if (CEPH_LOCK_EXCL == req->r_reply_info.filelock_reply->type)
    123			fl->fl_type = F_WRLCK;
    124		else
    125			fl->fl_type = F_UNLCK;
    126
    127		fl->fl_start = le64_to_cpu(req->r_reply_info.filelock_reply->start);
    128		length = le64_to_cpu(req->r_reply_info.filelock_reply->start) +
    129						 le64_to_cpu(req->r_reply_info.filelock_reply->length);
    130		if (length >= 1)
    131			fl->fl_end = length -1;
    132		else
    133			fl->fl_end = 0;
    134
    135	}
    136	ceph_mdsc_put_request(req);
    137	dout("ceph_lock_message: rule: %d, op: %d, pid: %llu, start: %llu, "
    138	     "length: %llu, wait: %d, type: %d, err code %d\n", (int)lock_type,
    139	     (int)operation, (u64)fl->fl_pid, fl->fl_start,
    140	     length, wait, fl->fl_type, err);
    141	return err;
    142}
    143
    144static int ceph_lock_wait_for_completion(struct ceph_mds_client *mdsc,
    145                                         struct ceph_mds_request *req)
    146{
    147	struct ceph_mds_request *intr_req;
    148	struct inode *inode = req->r_inode;
    149	int err, lock_type;
    150
    151	BUG_ON(req->r_op != CEPH_MDS_OP_SETFILELOCK);
    152	if (req->r_args.filelock_change.rule == CEPH_LOCK_FCNTL)
    153		lock_type = CEPH_LOCK_FCNTL_INTR;
    154	else if (req->r_args.filelock_change.rule == CEPH_LOCK_FLOCK)
    155		lock_type = CEPH_LOCK_FLOCK_INTR;
    156	else
    157		BUG_ON(1);
    158	BUG_ON(req->r_args.filelock_change.type == CEPH_LOCK_UNLOCK);
    159
    160	err = wait_for_completion_interruptible(&req->r_completion);
    161	if (!err)
    162		return 0;
    163
    164	dout("ceph_lock_wait_for_completion: request %llu was interrupted\n",
    165	     req->r_tid);
    166
    167	mutex_lock(&mdsc->mutex);
    168	if (test_bit(CEPH_MDS_R_GOT_RESULT, &req->r_req_flags)) {
    169		err = 0;
    170	} else {
    171		/*
    172		 * ensure we aren't running concurrently with
    173		 * ceph_fill_trace or ceph_readdir_prepopulate, which
    174		 * rely on locks (dir mutex) held by our caller.
    175		 */
    176		mutex_lock(&req->r_fill_mutex);
    177		req->r_err = err;
    178		set_bit(CEPH_MDS_R_ABORTED, &req->r_req_flags);
    179		mutex_unlock(&req->r_fill_mutex);
    180
    181		if (!req->r_session) {
    182			// haven't sent the request
    183			err = 0;
    184		}
    185	}
    186	mutex_unlock(&mdsc->mutex);
    187	if (!err)
    188		return 0;
    189
    190	intr_req = ceph_mdsc_create_request(mdsc, CEPH_MDS_OP_SETFILELOCK,
    191					    USE_AUTH_MDS);
    192	if (IS_ERR(intr_req))
    193		return PTR_ERR(intr_req);
    194
    195	intr_req->r_inode = inode;
    196	ihold(inode);
    197	intr_req->r_num_caps = 1;
    198
    199	intr_req->r_args.filelock_change = req->r_args.filelock_change;
    200	intr_req->r_args.filelock_change.rule = lock_type;
    201	intr_req->r_args.filelock_change.type = CEPH_LOCK_UNLOCK;
    202
    203	err = ceph_mdsc_do_request(mdsc, inode, intr_req);
    204	ceph_mdsc_put_request(intr_req);
    205
    206	if (err && err != -ERESTARTSYS)
    207		return err;
    208
    209	wait_for_completion_killable(&req->r_safe_completion);
    210	return 0;
    211}
    212
    213static int try_unlock_file(struct file *file, struct file_lock *fl)
    214{
    215	int err;
    216	unsigned int orig_flags = fl->fl_flags;
    217	fl->fl_flags |= FL_EXISTS;
    218	err = locks_lock_file_wait(file, fl);
    219	fl->fl_flags = orig_flags;
    220	if (err == -ENOENT) {
    221		if (!(orig_flags & FL_EXISTS))
    222			err = 0;
    223		return err;
    224	}
    225	return 1;
    226}
    227
    228/*
    229 * Attempt to set an fcntl lock.
    230 * For now, this just goes away to the server. Later it may be more awesome.
    231 */
    232int ceph_lock(struct file *file, int cmd, struct file_lock *fl)
    233{
    234	struct inode *inode = file_inode(file);
    235	struct ceph_inode_info *ci = ceph_inode(inode);
    236	int err = 0;
    237	u16 op = CEPH_MDS_OP_SETFILELOCK;
    238	u8 wait = 0;
    239	u8 lock_cmd;
    240
    241	if (!(fl->fl_flags & FL_POSIX))
    242		return -ENOLCK;
    243
    244	if (ceph_inode_is_shutdown(inode))
    245		return -ESTALE;
    246
    247	dout("ceph_lock, fl_owner: %p\n", fl->fl_owner);
    248
    249	/* set wait bit as appropriate, then make command as Ceph expects it*/
    250	if (IS_GETLK(cmd))
    251		op = CEPH_MDS_OP_GETFILELOCK;
    252	else if (IS_SETLKW(cmd))
    253		wait = 1;
    254
    255	spin_lock(&ci->i_ceph_lock);
    256	if (ci->i_ceph_flags & CEPH_I_ERROR_FILELOCK) {
    257		err = -EIO;
    258	}
    259	spin_unlock(&ci->i_ceph_lock);
    260	if (err < 0) {
    261		if (op == CEPH_MDS_OP_SETFILELOCK && F_UNLCK == fl->fl_type)
    262			posix_lock_file(file, fl, NULL);
    263		return err;
    264	}
    265
    266	if (F_RDLCK == fl->fl_type)
    267		lock_cmd = CEPH_LOCK_SHARED;
    268	else if (F_WRLCK == fl->fl_type)
    269		lock_cmd = CEPH_LOCK_EXCL;
    270	else
    271		lock_cmd = CEPH_LOCK_UNLOCK;
    272
    273	if (op == CEPH_MDS_OP_SETFILELOCK && F_UNLCK == fl->fl_type) {
    274		err = try_unlock_file(file, fl);
    275		if (err <= 0)
    276			return err;
    277	}
    278
    279	err = ceph_lock_message(CEPH_LOCK_FCNTL, op, inode, lock_cmd, wait, fl);
    280	if (!err) {
    281		if (op == CEPH_MDS_OP_SETFILELOCK && F_UNLCK != fl->fl_type) {
    282			dout("mds locked, locking locally\n");
    283			err = posix_lock_file(file, fl, NULL);
    284			if (err) {
    285				/* undo! This should only happen if
    286				 * the kernel detects local
    287				 * deadlock. */
    288				ceph_lock_message(CEPH_LOCK_FCNTL, op, inode,
    289						  CEPH_LOCK_UNLOCK, 0, fl);
    290				dout("got %d on posix_lock_file, undid lock\n",
    291				     err);
    292			}
    293		}
    294	}
    295	return err;
    296}
    297
    298int ceph_flock(struct file *file, int cmd, struct file_lock *fl)
    299{
    300	struct inode *inode = file_inode(file);
    301	struct ceph_inode_info *ci = ceph_inode(inode);
    302	int err = 0;
    303	u8 wait = 0;
    304	u8 lock_cmd;
    305
    306	if (!(fl->fl_flags & FL_FLOCK))
    307		return -ENOLCK;
    308
    309	if (ceph_inode_is_shutdown(inode))
    310		return -ESTALE;
    311
    312	dout("ceph_flock, fl_file: %p\n", fl->fl_file);
    313
    314	spin_lock(&ci->i_ceph_lock);
    315	if (ci->i_ceph_flags & CEPH_I_ERROR_FILELOCK) {
    316		err = -EIO;
    317	}
    318	spin_unlock(&ci->i_ceph_lock);
    319	if (err < 0) {
    320		if (F_UNLCK == fl->fl_type)
    321			locks_lock_file_wait(file, fl);
    322		return err;
    323	}
    324
    325	if (IS_SETLKW(cmd))
    326		wait = 1;
    327
    328	if (F_RDLCK == fl->fl_type)
    329		lock_cmd = CEPH_LOCK_SHARED;
    330	else if (F_WRLCK == fl->fl_type)
    331		lock_cmd = CEPH_LOCK_EXCL;
    332	else
    333		lock_cmd = CEPH_LOCK_UNLOCK;
    334
    335	if (F_UNLCK == fl->fl_type) {
    336		err = try_unlock_file(file, fl);
    337		if (err <= 0)
    338			return err;
    339	}
    340
    341	err = ceph_lock_message(CEPH_LOCK_FLOCK, CEPH_MDS_OP_SETFILELOCK,
    342				inode, lock_cmd, wait, fl);
    343	if (!err && F_UNLCK != fl->fl_type) {
    344		err = locks_lock_file_wait(file, fl);
    345		if (err) {
    346			ceph_lock_message(CEPH_LOCK_FLOCK,
    347					  CEPH_MDS_OP_SETFILELOCK,
    348					  inode, CEPH_LOCK_UNLOCK, 0, fl);
    349			dout("got %d on locks_lock_file_wait, undid lock\n", err);
    350		}
    351	}
    352	return err;
    353}
    354
    355/*
    356 * Fills in the passed counter variables, so you can prepare pagelist metadata
    357 * before calling ceph_encode_locks.
    358 */
    359void ceph_count_locks(struct inode *inode, int *fcntl_count, int *flock_count)
    360{
    361	struct file_lock *lock;
    362	struct file_lock_context *ctx;
    363
    364	*fcntl_count = 0;
    365	*flock_count = 0;
    366
    367	ctx = inode->i_flctx;
    368	if (ctx) {
    369		spin_lock(&ctx->flc_lock);
    370		list_for_each_entry(lock, &ctx->flc_posix, fl_list)
    371			++(*fcntl_count);
    372		list_for_each_entry(lock, &ctx->flc_flock, fl_list)
    373			++(*flock_count);
    374		spin_unlock(&ctx->flc_lock);
    375	}
    376	dout("counted %d flock locks and %d fcntl locks\n",
    377	     *flock_count, *fcntl_count);
    378}
    379
    380/*
    381 * Given a pointer to a lock, convert it to a ceph filelock
    382 */
    383static int lock_to_ceph_filelock(struct file_lock *lock,
    384				 struct ceph_filelock *cephlock)
    385{
    386	int err = 0;
    387	cephlock->start = cpu_to_le64(lock->fl_start);
    388	cephlock->length = cpu_to_le64(lock->fl_end - lock->fl_start + 1);
    389	cephlock->client = cpu_to_le64(0);
    390	cephlock->pid = cpu_to_le64((u64)lock->fl_pid);
    391	cephlock->owner = cpu_to_le64(secure_addr(lock->fl_owner));
    392
    393	switch (lock->fl_type) {
    394	case F_RDLCK:
    395		cephlock->type = CEPH_LOCK_SHARED;
    396		break;
    397	case F_WRLCK:
    398		cephlock->type = CEPH_LOCK_EXCL;
    399		break;
    400	case F_UNLCK:
    401		cephlock->type = CEPH_LOCK_UNLOCK;
    402		break;
    403	default:
    404		dout("Have unknown lock type %d\n", lock->fl_type);
    405		err = -EINVAL;
    406	}
    407
    408	return err;
    409}
    410
    411/*
    412 * Encode the flock and fcntl locks for the given inode into the ceph_filelock
    413 * array. Must be called with inode->i_lock already held.
    414 * If we encounter more of a specific lock type than expected, return -ENOSPC.
    415 */
    416int ceph_encode_locks_to_buffer(struct inode *inode,
    417				struct ceph_filelock *flocks,
    418				int num_fcntl_locks, int num_flock_locks)
    419{
    420	struct file_lock *lock;
    421	struct file_lock_context *ctx = inode->i_flctx;
    422	int err = 0;
    423	int seen_fcntl = 0;
    424	int seen_flock = 0;
    425	int l = 0;
    426
    427	dout("encoding %d flock and %d fcntl locks\n", num_flock_locks,
    428	     num_fcntl_locks);
    429
    430	if (!ctx)
    431		return 0;
    432
    433	spin_lock(&ctx->flc_lock);
    434	list_for_each_entry(lock, &ctx->flc_posix, fl_list) {
    435		++seen_fcntl;
    436		if (seen_fcntl > num_fcntl_locks) {
    437			err = -ENOSPC;
    438			goto fail;
    439		}
    440		err = lock_to_ceph_filelock(lock, &flocks[l]);
    441		if (err)
    442			goto fail;
    443		++l;
    444	}
    445	list_for_each_entry(lock, &ctx->flc_flock, fl_list) {
    446		++seen_flock;
    447		if (seen_flock > num_flock_locks) {
    448			err = -ENOSPC;
    449			goto fail;
    450		}
    451		err = lock_to_ceph_filelock(lock, &flocks[l]);
    452		if (err)
    453			goto fail;
    454		++l;
    455	}
    456fail:
    457	spin_unlock(&ctx->flc_lock);
    458	return err;
    459}
    460
    461/*
    462 * Copy the encoded flock and fcntl locks into the pagelist.
    463 * Format is: #fcntl locks, sequential fcntl locks, #flock locks,
    464 * sequential flock locks.
    465 * Returns zero on success.
    466 */
    467int ceph_locks_to_pagelist(struct ceph_filelock *flocks,
    468			   struct ceph_pagelist *pagelist,
    469			   int num_fcntl_locks, int num_flock_locks)
    470{
    471	int err = 0;
    472	__le32 nlocks;
    473
    474	nlocks = cpu_to_le32(num_fcntl_locks);
    475	err = ceph_pagelist_append(pagelist, &nlocks, sizeof(nlocks));
    476	if (err)
    477		goto out_fail;
    478
    479	if (num_fcntl_locks > 0) {
    480		err = ceph_pagelist_append(pagelist, flocks,
    481					   num_fcntl_locks * sizeof(*flocks));
    482		if (err)
    483			goto out_fail;
    484	}
    485
    486	nlocks = cpu_to_le32(num_flock_locks);
    487	err = ceph_pagelist_append(pagelist, &nlocks, sizeof(nlocks));
    488	if (err)
    489		goto out_fail;
    490
    491	if (num_flock_locks > 0) {
    492		err = ceph_pagelist_append(pagelist, &flocks[num_fcntl_locks],
    493					   num_flock_locks * sizeof(*flocks));
    494	}
    495out_fail:
    496	return err;
    497}