cachepc-linux

Fork of AMDESE/linux with modifications for CachePC side-channel attack
git clone https://git.sinitax.com/sinitax/cachepc-linux
Log | Files | Refs | README | LICENSE | sfeed.txt

glock.c (73876B)


      1// SPDX-License-Identifier: GPL-2.0-only
      2/*
      3 * Copyright (C) Sistina Software, Inc.  1997-2003 All rights reserved.
      4 * Copyright (C) 2004-2008 Red Hat, Inc.  All rights reserved.
      5 */
      6
      7#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
      8
      9#include <linux/sched.h>
     10#include <linux/slab.h>
     11#include <linux/spinlock.h>
     12#include <linux/buffer_head.h>
     13#include <linux/delay.h>
     14#include <linux/sort.h>
     15#include <linux/hash.h>
     16#include <linux/jhash.h>
     17#include <linux/kallsyms.h>
     18#include <linux/gfs2_ondisk.h>
     19#include <linux/list.h>
     20#include <linux/wait.h>
     21#include <linux/module.h>
     22#include <linux/uaccess.h>
     23#include <linux/seq_file.h>
     24#include <linux/debugfs.h>
     25#include <linux/kthread.h>
     26#include <linux/freezer.h>
     27#include <linux/workqueue.h>
     28#include <linux/jiffies.h>
     29#include <linux/rcupdate.h>
     30#include <linux/rculist_bl.h>
     31#include <linux/bit_spinlock.h>
     32#include <linux/percpu.h>
     33#include <linux/list_sort.h>
     34#include <linux/lockref.h>
     35#include <linux/rhashtable.h>
     36
     37#include "gfs2.h"
     38#include "incore.h"
     39#include "glock.h"
     40#include "glops.h"
     41#include "inode.h"
     42#include "lops.h"
     43#include "meta_io.h"
     44#include "quota.h"
     45#include "super.h"
     46#include "util.h"
     47#include "bmap.h"
     48#define CREATE_TRACE_POINTS
     49#include "trace_gfs2.h"
     50
     51struct gfs2_glock_iter {
     52	struct gfs2_sbd *sdp;		/* incore superblock           */
     53	struct rhashtable_iter hti;	/* rhashtable iterator         */
     54	struct gfs2_glock *gl;		/* current glock struct        */
     55	loff_t last_pos;		/* last position               */
     56};
     57
     58typedef void (*glock_examiner) (struct gfs2_glock * gl);
     59
     60static void do_xmote(struct gfs2_glock *gl, struct gfs2_holder *gh, unsigned int target);
     61static void __gfs2_glock_dq(struct gfs2_holder *gh);
     62
     63static struct dentry *gfs2_root;
     64static struct workqueue_struct *glock_workqueue;
     65struct workqueue_struct *gfs2_delete_workqueue;
     66static LIST_HEAD(lru_list);
     67static atomic_t lru_count = ATOMIC_INIT(0);
     68static DEFINE_SPINLOCK(lru_lock);
     69
     70#define GFS2_GL_HASH_SHIFT      15
     71#define GFS2_GL_HASH_SIZE       BIT(GFS2_GL_HASH_SHIFT)
     72
     73static const struct rhashtable_params ht_parms = {
     74	.nelem_hint = GFS2_GL_HASH_SIZE * 3 / 4,
     75	.key_len = offsetofend(struct lm_lockname, ln_type),
     76	.key_offset = offsetof(struct gfs2_glock, gl_name),
     77	.head_offset = offsetof(struct gfs2_glock, gl_node),
     78};
     79
     80static struct rhashtable gl_hash_table;
     81
     82#define GLOCK_WAIT_TABLE_BITS 12
     83#define GLOCK_WAIT_TABLE_SIZE (1 << GLOCK_WAIT_TABLE_BITS)
     84static wait_queue_head_t glock_wait_table[GLOCK_WAIT_TABLE_SIZE] __cacheline_aligned;
     85
     86struct wait_glock_queue {
     87	struct lm_lockname *name;
     88	wait_queue_entry_t wait;
     89};
     90
     91static int glock_wake_function(wait_queue_entry_t *wait, unsigned int mode,
     92			       int sync, void *key)
     93{
     94	struct wait_glock_queue *wait_glock =
     95		container_of(wait, struct wait_glock_queue, wait);
     96	struct lm_lockname *wait_name = wait_glock->name;
     97	struct lm_lockname *wake_name = key;
     98
     99	if (wake_name->ln_sbd != wait_name->ln_sbd ||
    100	    wake_name->ln_number != wait_name->ln_number ||
    101	    wake_name->ln_type != wait_name->ln_type)
    102		return 0;
    103	return autoremove_wake_function(wait, mode, sync, key);
    104}
    105
    106static wait_queue_head_t *glock_waitqueue(struct lm_lockname *name)
    107{
    108	u32 hash = jhash2((u32 *)name, ht_parms.key_len / 4, 0);
    109
    110	return glock_wait_table + hash_32(hash, GLOCK_WAIT_TABLE_BITS);
    111}
    112
    113/**
    114 * wake_up_glock  -  Wake up waiters on a glock
    115 * @gl: the glock
    116 */
    117static void wake_up_glock(struct gfs2_glock *gl)
    118{
    119	wait_queue_head_t *wq = glock_waitqueue(&gl->gl_name);
    120
    121	if (waitqueue_active(wq))
    122		__wake_up(wq, TASK_NORMAL, 1, &gl->gl_name);
    123}
    124
    125static void gfs2_glock_dealloc(struct rcu_head *rcu)
    126{
    127	struct gfs2_glock *gl = container_of(rcu, struct gfs2_glock, gl_rcu);
    128
    129	kfree(gl->gl_lksb.sb_lvbptr);
    130	if (gl->gl_ops->go_flags & GLOF_ASPACE) {
    131		struct gfs2_glock_aspace *gla =
    132			container_of(gl, struct gfs2_glock_aspace, glock);
    133		kmem_cache_free(gfs2_glock_aspace_cachep, gla);
    134	} else
    135		kmem_cache_free(gfs2_glock_cachep, gl);
    136}
    137
    138/**
    139 * glock_blocked_by_withdraw - determine if we can still use a glock
    140 * @gl: the glock
    141 *
    142 * We need to allow some glocks to be enqueued, dequeued, promoted, and demoted
    143 * when we're withdrawn. For example, to maintain metadata integrity, we should
    144 * disallow the use of inode and rgrp glocks when withdrawn. Other glocks, like
    145 * iopen or the transaction glocks may be safely used because none of their
    146 * metadata goes through the journal. So in general, we should disallow all
    147 * glocks that are journaled, and allow all the others. One exception is:
    148 * we need to allow our active journal to be promoted and demoted so others
    149 * may recover it and we can reacquire it when they're done.
    150 */
    151static bool glock_blocked_by_withdraw(struct gfs2_glock *gl)
    152{
    153	struct gfs2_sbd *sdp = gl->gl_name.ln_sbd;
    154
    155	if (likely(!gfs2_withdrawn(sdp)))
    156		return false;
    157	if (gl->gl_ops->go_flags & GLOF_NONDISK)
    158		return false;
    159	if (!sdp->sd_jdesc ||
    160	    gl->gl_name.ln_number == sdp->sd_jdesc->jd_no_addr)
    161		return false;
    162	return true;
    163}
    164
    165void gfs2_glock_free(struct gfs2_glock *gl)
    166{
    167	struct gfs2_sbd *sdp = gl->gl_name.ln_sbd;
    168
    169	gfs2_glock_assert_withdraw(gl, atomic_read(&gl->gl_revokes) == 0);
    170	rhashtable_remove_fast(&gl_hash_table, &gl->gl_node, ht_parms);
    171	smp_mb();
    172	wake_up_glock(gl);
    173	call_rcu(&gl->gl_rcu, gfs2_glock_dealloc);
    174	if (atomic_dec_and_test(&sdp->sd_glock_disposal))
    175		wake_up(&sdp->sd_glock_wait);
    176}
    177
    178/**
    179 * gfs2_glock_hold() - increment reference count on glock
    180 * @gl: The glock to hold
    181 *
    182 */
    183
    184void gfs2_glock_hold(struct gfs2_glock *gl)
    185{
    186	GLOCK_BUG_ON(gl, __lockref_is_dead(&gl->gl_lockref));
    187	lockref_get(&gl->gl_lockref);
    188}
    189
    190/**
    191 * demote_ok - Check to see if it's ok to unlock a glock
    192 * @gl: the glock
    193 *
    194 * Returns: 1 if it's ok
    195 */
    196
    197static int demote_ok(const struct gfs2_glock *gl)
    198{
    199	const struct gfs2_glock_operations *glops = gl->gl_ops;
    200
    201	if (gl->gl_state == LM_ST_UNLOCKED)
    202		return 0;
    203	/*
    204	 * Note that demote_ok is used for the lru process of disposing of
    205	 * glocks. For this purpose, we don't care if the glock's holders
    206	 * have the HIF_MAY_DEMOTE flag set or not. If someone is using
    207	 * them, don't demote.
    208	 */
    209	if (!list_empty(&gl->gl_holders))
    210		return 0;
    211	if (glops->go_demote_ok)
    212		return glops->go_demote_ok(gl);
    213	return 1;
    214}
    215
    216
    217void gfs2_glock_add_to_lru(struct gfs2_glock *gl)
    218{
    219	if (!(gl->gl_ops->go_flags & GLOF_LRU))
    220		return;
    221
    222	spin_lock(&lru_lock);
    223
    224	list_move_tail(&gl->gl_lru, &lru_list);
    225
    226	if (!test_bit(GLF_LRU, &gl->gl_flags)) {
    227		set_bit(GLF_LRU, &gl->gl_flags);
    228		atomic_inc(&lru_count);
    229	}
    230
    231	spin_unlock(&lru_lock);
    232}
    233
    234static void gfs2_glock_remove_from_lru(struct gfs2_glock *gl)
    235{
    236	if (!(gl->gl_ops->go_flags & GLOF_LRU))
    237		return;
    238
    239	spin_lock(&lru_lock);
    240	if (test_bit(GLF_LRU, &gl->gl_flags)) {
    241		list_del_init(&gl->gl_lru);
    242		atomic_dec(&lru_count);
    243		clear_bit(GLF_LRU, &gl->gl_flags);
    244	}
    245	spin_unlock(&lru_lock);
    246}
    247
    248/*
    249 * Enqueue the glock on the work queue.  Passes one glock reference on to the
    250 * work queue.
    251 */
    252static void __gfs2_glock_queue_work(struct gfs2_glock *gl, unsigned long delay) {
    253	if (!queue_delayed_work(glock_workqueue, &gl->gl_work, delay)) {
    254		/*
    255		 * We are holding the lockref spinlock, and the work was still
    256		 * queued above.  The queued work (glock_work_func) takes that
    257		 * spinlock before dropping its glock reference(s), so it
    258		 * cannot have dropped them in the meantime.
    259		 */
    260		GLOCK_BUG_ON(gl, gl->gl_lockref.count < 2);
    261		gl->gl_lockref.count--;
    262	}
    263}
    264
    265static void gfs2_glock_queue_work(struct gfs2_glock *gl, unsigned long delay) {
    266	spin_lock(&gl->gl_lockref.lock);
    267	__gfs2_glock_queue_work(gl, delay);
    268	spin_unlock(&gl->gl_lockref.lock);
    269}
    270
    271static void __gfs2_glock_put(struct gfs2_glock *gl)
    272{
    273	struct gfs2_sbd *sdp = gl->gl_name.ln_sbd;
    274	struct address_space *mapping = gfs2_glock2aspace(gl);
    275
    276	lockref_mark_dead(&gl->gl_lockref);
    277
    278	gfs2_glock_remove_from_lru(gl);
    279	spin_unlock(&gl->gl_lockref.lock);
    280	GLOCK_BUG_ON(gl, !list_empty(&gl->gl_holders));
    281	if (mapping) {
    282		truncate_inode_pages_final(mapping);
    283		if (!gfs2_withdrawn(sdp))
    284			GLOCK_BUG_ON(gl, !mapping_empty(mapping));
    285	}
    286	trace_gfs2_glock_put(gl);
    287	sdp->sd_lockstruct.ls_ops->lm_put_lock(gl);
    288}
    289
    290/*
    291 * Cause the glock to be put in work queue context.
    292 */
    293void gfs2_glock_queue_put(struct gfs2_glock *gl)
    294{
    295	gfs2_glock_queue_work(gl, 0);
    296}
    297
    298/**
    299 * gfs2_glock_put() - Decrement reference count on glock
    300 * @gl: The glock to put
    301 *
    302 */
    303
    304void gfs2_glock_put(struct gfs2_glock *gl)
    305{
    306	if (lockref_put_or_lock(&gl->gl_lockref))
    307		return;
    308
    309	__gfs2_glock_put(gl);
    310}
    311
    312/**
    313 * may_grant - check if it's ok to grant a new lock
    314 * @gl: The glock
    315 * @current_gh: One of the current holders of @gl
    316 * @gh: The lock request which we wish to grant
    317 *
    318 * With our current compatibility rules, if a glock has one or more active
    319 * holders (HIF_HOLDER flag set), any of those holders can be passed in as
    320 * @current_gh; they are all the same as far as compatibility with the new @gh
    321 * goes.
    322 *
    323 * Returns true if it's ok to grant the lock.
    324 */
    325
    326static inline bool may_grant(struct gfs2_glock *gl,
    327			     struct gfs2_holder *current_gh,
    328			     struct gfs2_holder *gh)
    329{
    330	if (current_gh) {
    331		GLOCK_BUG_ON(gl, !test_bit(HIF_HOLDER, &current_gh->gh_iflags));
    332
    333		switch(current_gh->gh_state) {
    334		case LM_ST_EXCLUSIVE:
    335			/*
    336			 * Here we make a special exception to grant holders
    337			 * who agree to share the EX lock with other holders
    338			 * who also have the bit set. If the original holder
    339			 * has the LM_FLAG_NODE_SCOPE bit set, we grant more
    340			 * holders with the bit set.
    341			 */
    342			return gh->gh_state == LM_ST_EXCLUSIVE &&
    343			       (current_gh->gh_flags & LM_FLAG_NODE_SCOPE) &&
    344			       (gh->gh_flags & LM_FLAG_NODE_SCOPE);
    345
    346		case LM_ST_SHARED:
    347		case LM_ST_DEFERRED:
    348			return gh->gh_state == current_gh->gh_state;
    349
    350		default:
    351			return false;
    352		}
    353	}
    354
    355	if (gl->gl_state == gh->gh_state)
    356		return true;
    357	if (gh->gh_flags & GL_EXACT)
    358		return false;
    359	if (gl->gl_state == LM_ST_EXCLUSIVE) {
    360		return gh->gh_state == LM_ST_SHARED ||
    361		       gh->gh_state == LM_ST_DEFERRED;
    362	}
    363	if (gh->gh_flags & LM_FLAG_ANY)
    364		return gl->gl_state != LM_ST_UNLOCKED;
    365	return false;
    366}
    367
    368static void gfs2_holder_wake(struct gfs2_holder *gh)
    369{
    370	clear_bit(HIF_WAIT, &gh->gh_iflags);
    371	smp_mb__after_atomic();
    372	wake_up_bit(&gh->gh_iflags, HIF_WAIT);
    373	if (gh->gh_flags & GL_ASYNC) {
    374		struct gfs2_sbd *sdp = gh->gh_gl->gl_name.ln_sbd;
    375
    376		wake_up(&sdp->sd_async_glock_wait);
    377	}
    378}
    379
    380/**
    381 * do_error - Something unexpected has happened during a lock request
    382 * @gl: The glock
    383 * @ret: The status from the DLM
    384 */
    385
    386static void do_error(struct gfs2_glock *gl, const int ret)
    387{
    388	struct gfs2_holder *gh, *tmp;
    389
    390	list_for_each_entry_safe(gh, tmp, &gl->gl_holders, gh_list) {
    391		if (!test_bit(HIF_WAIT, &gh->gh_iflags))
    392			continue;
    393		if (ret & LM_OUT_ERROR)
    394			gh->gh_error = -EIO;
    395		else if (gh->gh_flags & (LM_FLAG_TRY | LM_FLAG_TRY_1CB))
    396			gh->gh_error = GLR_TRYFAILED;
    397		else
    398			continue;
    399		list_del_init(&gh->gh_list);
    400		trace_gfs2_glock_queue(gh, 0);
    401		gfs2_holder_wake(gh);
    402	}
    403}
    404
    405/**
    406 * demote_incompat_holders - demote incompatible demoteable holders
    407 * @gl: the glock we want to promote
    408 * @new_gh: the new holder to be promoted
    409 */
    410static void demote_incompat_holders(struct gfs2_glock *gl,
    411				    struct gfs2_holder *new_gh)
    412{
    413	struct gfs2_holder *gh, *tmp;
    414
    415	/*
    416	 * Demote incompatible holders before we make ourselves eligible.
    417	 * (This holder may or may not allow auto-demoting, but we don't want
    418	 * to demote the new holder before it's even granted.)
    419	 */
    420	list_for_each_entry_safe(gh, tmp, &gl->gl_holders, gh_list) {
    421		/*
    422		 * Since holders are at the front of the list, we stop when we
    423		 * find the first non-holder.
    424		 */
    425		if (!test_bit(HIF_HOLDER, &gh->gh_iflags))
    426			return;
    427		if (test_bit(HIF_MAY_DEMOTE, &gh->gh_iflags) &&
    428		    !may_grant(gl, new_gh, gh)) {
    429			/*
    430			 * We should not recurse into do_promote because
    431			 * __gfs2_glock_dq only calls handle_callback,
    432			 * gfs2_glock_add_to_lru and __gfs2_glock_queue_work.
    433			 */
    434			__gfs2_glock_dq(gh);
    435		}
    436	}
    437}
    438
    439/**
    440 * find_first_holder - find the first "holder" gh
    441 * @gl: the glock
    442 */
    443
    444static inline struct gfs2_holder *find_first_holder(const struct gfs2_glock *gl)
    445{
    446	struct gfs2_holder *gh;
    447
    448	if (!list_empty(&gl->gl_holders)) {
    449		gh = list_first_entry(&gl->gl_holders, struct gfs2_holder,
    450				      gh_list);
    451		if (test_bit(HIF_HOLDER, &gh->gh_iflags))
    452			return gh;
    453	}
    454	return NULL;
    455}
    456
    457/**
    458 * find_first_strong_holder - find the first non-demoteable holder
    459 * @gl: the glock
    460 *
    461 * Find the first holder that doesn't have the HIF_MAY_DEMOTE flag set.
    462 */
    463static inline struct gfs2_holder *
    464find_first_strong_holder(struct gfs2_glock *gl)
    465{
    466	struct gfs2_holder *gh;
    467
    468	list_for_each_entry(gh, &gl->gl_holders, gh_list) {
    469		if (!test_bit(HIF_HOLDER, &gh->gh_iflags))
    470			return NULL;
    471		if (!test_bit(HIF_MAY_DEMOTE, &gh->gh_iflags))
    472			return gh;
    473	}
    474	return NULL;
    475}
    476
    477/*
    478 * gfs2_instantiate - Call the glops instantiate function
    479 * @gh: The glock holder
    480 *
    481 * Returns: 0 if instantiate was successful, 2 if type specific operation is
    482 * underway, or error.
    483 */
    484int gfs2_instantiate(struct gfs2_holder *gh)
    485{
    486	struct gfs2_glock *gl = gh->gh_gl;
    487	const struct gfs2_glock_operations *glops = gl->gl_ops;
    488	int ret;
    489
    490again:
    491	if (!test_bit(GLF_INSTANTIATE_NEEDED, &gl->gl_flags))
    492		return 0;
    493
    494	/*
    495	 * Since we unlock the lockref lock, we set a flag to indicate
    496	 * instantiate is in progress.
    497	 */
    498	if (test_and_set_bit(GLF_INSTANTIATE_IN_PROG, &gl->gl_flags)) {
    499		wait_on_bit(&gl->gl_flags, GLF_INSTANTIATE_IN_PROG,
    500			    TASK_UNINTERRUPTIBLE);
    501		/*
    502		 * Here we just waited for a different instantiate to finish.
    503		 * But that may not have been successful, as when a process
    504		 * locks an inode glock _before_ it has an actual inode to
    505		 * instantiate into. So we check again. This process might
    506		 * have an inode to instantiate, so might be successful.
    507		 */
    508		goto again;
    509	}
    510
    511	ret = glops->go_instantiate(gh);
    512	if (!ret)
    513		clear_bit(GLF_INSTANTIATE_NEEDED, &gl->gl_flags);
    514	clear_and_wake_up_bit(GLF_INSTANTIATE_IN_PROG, &gl->gl_flags);
    515	return ret;
    516}
    517
    518/**
    519 * do_promote - promote as many requests as possible on the current queue
    520 * @gl: The glock
    521 * 
    522 * Returns: 1 if there is a blocked holder at the head of the list, or 2
    523 *          if a type specific operation is underway.
    524 */
    525
    526static int do_promote(struct gfs2_glock *gl)
    527__releases(&gl->gl_lockref.lock)
    528__acquires(&gl->gl_lockref.lock)
    529{
    530	struct gfs2_holder *gh, *tmp, *first_gh;
    531	bool incompat_holders_demoted = false;
    532	bool lock_released;
    533	int ret;
    534
    535restart:
    536	first_gh = find_first_strong_holder(gl);
    537	list_for_each_entry_safe(gh, tmp, &gl->gl_holders, gh_list) {
    538		lock_released = false;
    539		if (test_bit(HIF_HOLDER, &gh->gh_iflags))
    540			continue;
    541		if (!may_grant(gl, first_gh, gh)) {
    542			/*
    543			 * If we get here, it means we may not grant this holder for
    544			 * some reason. If this holder is the head of the list, it
    545			 * means we have a blocked holder at the head, so return 1.
    546			 */
    547			if (list_is_first(&gh->gh_list, &gl->gl_holders))
    548				return 1;
    549			do_error(gl, 0);
    550			break;
    551		}
    552		if (!incompat_holders_demoted) {
    553			demote_incompat_holders(gl, first_gh);
    554			incompat_holders_demoted = true;
    555			first_gh = gh;
    556		}
    557		if (test_bit(GLF_INSTANTIATE_NEEDED, &gl->gl_flags) &&
    558		    !(gh->gh_flags & GL_SKIP) && gl->gl_ops->go_instantiate) {
    559			lock_released = true;
    560			spin_unlock(&gl->gl_lockref.lock);
    561			ret = gfs2_instantiate(gh);
    562			spin_lock(&gl->gl_lockref.lock);
    563			if (ret) {
    564				if (ret == 1)
    565					return 2;
    566				gh->gh_error = ret;
    567				list_del_init(&gh->gh_list);
    568				trace_gfs2_glock_queue(gh, 0);
    569				gfs2_holder_wake(gh);
    570				goto restart;
    571			}
    572		}
    573		set_bit(HIF_HOLDER, &gh->gh_iflags);
    574		trace_gfs2_promote(gh);
    575		gfs2_holder_wake(gh);
    576		/*
    577		 * If we released the gl_lockref.lock the holders list may have
    578		 * changed. For that reason, we start again at the start of
    579		 * the holders queue.
    580		 */
    581		if (lock_released)
    582			goto restart;
    583	}
    584	return 0;
    585}
    586
    587/**
    588 * find_first_waiter - find the first gh that's waiting for the glock
    589 * @gl: the glock
    590 */
    591
    592static inline struct gfs2_holder *find_first_waiter(const struct gfs2_glock *gl)
    593{
    594	struct gfs2_holder *gh;
    595
    596	list_for_each_entry(gh, &gl->gl_holders, gh_list) {
    597		if (!test_bit(HIF_HOLDER, &gh->gh_iflags))
    598			return gh;
    599	}
    600	return NULL;
    601}
    602
    603/**
    604 * state_change - record that the glock is now in a different state
    605 * @gl: the glock
    606 * @new_state: the new state
    607 */
    608
    609static void state_change(struct gfs2_glock *gl, unsigned int new_state)
    610{
    611	int held1, held2;
    612
    613	held1 = (gl->gl_state != LM_ST_UNLOCKED);
    614	held2 = (new_state != LM_ST_UNLOCKED);
    615
    616	if (held1 != held2) {
    617		GLOCK_BUG_ON(gl, __lockref_is_dead(&gl->gl_lockref));
    618		if (held2)
    619			gl->gl_lockref.count++;
    620		else
    621			gl->gl_lockref.count--;
    622	}
    623	if (new_state != gl->gl_target)
    624		/* shorten our minimum hold time */
    625		gl->gl_hold_time = max(gl->gl_hold_time - GL_GLOCK_HOLD_DECR,
    626				       GL_GLOCK_MIN_HOLD);
    627	gl->gl_state = new_state;
    628	gl->gl_tchange = jiffies;
    629}
    630
    631static void gfs2_set_demote(struct gfs2_glock *gl)
    632{
    633	struct gfs2_sbd *sdp = gl->gl_name.ln_sbd;
    634
    635	set_bit(GLF_DEMOTE, &gl->gl_flags);
    636	smp_mb();
    637	wake_up(&sdp->sd_async_glock_wait);
    638}
    639
    640static void gfs2_demote_wake(struct gfs2_glock *gl)
    641{
    642	gl->gl_demote_state = LM_ST_EXCLUSIVE;
    643	clear_bit(GLF_DEMOTE, &gl->gl_flags);
    644	smp_mb__after_atomic();
    645	wake_up_bit(&gl->gl_flags, GLF_DEMOTE);
    646}
    647
    648/**
    649 * finish_xmote - The DLM has replied to one of our lock requests
    650 * @gl: The glock
    651 * @ret: The status from the DLM
    652 *
    653 */
    654
    655static void finish_xmote(struct gfs2_glock *gl, unsigned int ret)
    656{
    657	const struct gfs2_glock_operations *glops = gl->gl_ops;
    658	struct gfs2_holder *gh;
    659	unsigned state = ret & LM_OUT_ST_MASK;
    660	int rv;
    661
    662	spin_lock(&gl->gl_lockref.lock);
    663	trace_gfs2_glock_state_change(gl, state);
    664	state_change(gl, state);
    665	gh = find_first_waiter(gl);
    666
    667	/* Demote to UN request arrived during demote to SH or DF */
    668	if (test_bit(GLF_DEMOTE_IN_PROGRESS, &gl->gl_flags) &&
    669	    state != LM_ST_UNLOCKED && gl->gl_demote_state == LM_ST_UNLOCKED)
    670		gl->gl_target = LM_ST_UNLOCKED;
    671
    672	/* Check for state != intended state */
    673	if (unlikely(state != gl->gl_target)) {
    674		if (gh && (ret & LM_OUT_CANCELED))
    675			gfs2_holder_wake(gh);
    676		if (gh && !test_bit(GLF_DEMOTE_IN_PROGRESS, &gl->gl_flags)) {
    677			/* move to back of queue and try next entry */
    678			if (ret & LM_OUT_CANCELED) {
    679				if ((gh->gh_flags & LM_FLAG_PRIORITY) == 0)
    680					list_move_tail(&gh->gh_list, &gl->gl_holders);
    681				gh = find_first_waiter(gl);
    682				gl->gl_target = gh->gh_state;
    683				goto retry;
    684			}
    685			/* Some error or failed "try lock" - report it */
    686			if ((ret & LM_OUT_ERROR) ||
    687			    (gh->gh_flags & (LM_FLAG_TRY | LM_FLAG_TRY_1CB))) {
    688				gl->gl_target = gl->gl_state;
    689				do_error(gl, ret);
    690				goto out;
    691			}
    692		}
    693		switch(state) {
    694		/* Unlocked due to conversion deadlock, try again */
    695		case LM_ST_UNLOCKED:
    696retry:
    697			do_xmote(gl, gh, gl->gl_target);
    698			break;
    699		/* Conversion fails, unlock and try again */
    700		case LM_ST_SHARED:
    701		case LM_ST_DEFERRED:
    702			do_xmote(gl, gh, LM_ST_UNLOCKED);
    703			break;
    704		default: /* Everything else */
    705			fs_err(gl->gl_name.ln_sbd, "wanted %u got %u\n",
    706			       gl->gl_target, state);
    707			GLOCK_BUG_ON(gl, 1);
    708		}
    709		spin_unlock(&gl->gl_lockref.lock);
    710		return;
    711	}
    712
    713	/* Fast path - we got what we asked for */
    714	if (test_and_clear_bit(GLF_DEMOTE_IN_PROGRESS, &gl->gl_flags))
    715		gfs2_demote_wake(gl);
    716	if (state != LM_ST_UNLOCKED) {
    717		if (glops->go_xmote_bh) {
    718			spin_unlock(&gl->gl_lockref.lock);
    719			rv = glops->go_xmote_bh(gl);
    720			spin_lock(&gl->gl_lockref.lock);
    721			if (rv) {
    722				do_error(gl, rv);
    723				goto out;
    724			}
    725		}
    726		rv = do_promote(gl);
    727		if (rv == 2)
    728			goto out_locked;
    729	}
    730out:
    731	clear_bit(GLF_LOCK, &gl->gl_flags);
    732out_locked:
    733	spin_unlock(&gl->gl_lockref.lock);
    734}
    735
    736static bool is_system_glock(struct gfs2_glock *gl)
    737{
    738	struct gfs2_sbd *sdp = gl->gl_name.ln_sbd;
    739	struct gfs2_inode *m_ip = GFS2_I(sdp->sd_statfs_inode);
    740
    741	if (gl == m_ip->i_gl)
    742		return true;
    743	return false;
    744}
    745
    746/**
    747 * do_xmote - Calls the DLM to change the state of a lock
    748 * @gl: The lock state
    749 * @gh: The holder (only for promotes)
    750 * @target: The target lock state
    751 *
    752 */
    753
    754static void do_xmote(struct gfs2_glock *gl, struct gfs2_holder *gh, unsigned int target)
    755__releases(&gl->gl_lockref.lock)
    756__acquires(&gl->gl_lockref.lock)
    757{
    758	const struct gfs2_glock_operations *glops = gl->gl_ops;
    759	struct gfs2_sbd *sdp = gl->gl_name.ln_sbd;
    760	unsigned int lck_flags = (unsigned int)(gh ? gh->gh_flags : 0);
    761	int ret;
    762
    763	if (target != LM_ST_UNLOCKED && glock_blocked_by_withdraw(gl) &&
    764	    gh && !(gh->gh_flags & LM_FLAG_NOEXP))
    765		return;
    766	lck_flags &= (LM_FLAG_TRY | LM_FLAG_TRY_1CB | LM_FLAG_NOEXP |
    767		      LM_FLAG_PRIORITY);
    768	GLOCK_BUG_ON(gl, gl->gl_state == target);
    769	GLOCK_BUG_ON(gl, gl->gl_state == gl->gl_target);
    770	if ((target == LM_ST_UNLOCKED || target == LM_ST_DEFERRED) &&
    771	    glops->go_inval) {
    772		/*
    773		 * If another process is already doing the invalidate, let that
    774		 * finish first.  The glock state machine will get back to this
    775		 * holder again later.
    776		 */
    777		if (test_and_set_bit(GLF_INVALIDATE_IN_PROGRESS,
    778				     &gl->gl_flags))
    779			return;
    780		do_error(gl, 0); /* Fail queued try locks */
    781	}
    782	gl->gl_req = target;
    783	set_bit(GLF_BLOCKING, &gl->gl_flags);
    784	if ((gl->gl_req == LM_ST_UNLOCKED) ||
    785	    (gl->gl_state == LM_ST_EXCLUSIVE) ||
    786	    (lck_flags & (LM_FLAG_TRY|LM_FLAG_TRY_1CB)))
    787		clear_bit(GLF_BLOCKING, &gl->gl_flags);
    788	spin_unlock(&gl->gl_lockref.lock);
    789	if (glops->go_sync) {
    790		ret = glops->go_sync(gl);
    791		/* If we had a problem syncing (due to io errors or whatever,
    792		 * we should not invalidate the metadata or tell dlm to
    793		 * release the glock to other nodes.
    794		 */
    795		if (ret) {
    796			if (cmpxchg(&sdp->sd_log_error, 0, ret)) {
    797				fs_err(sdp, "Error %d syncing glock \n", ret);
    798				gfs2_dump_glock(NULL, gl, true);
    799			}
    800			goto skip_inval;
    801		}
    802	}
    803	if (test_bit(GLF_INVALIDATE_IN_PROGRESS, &gl->gl_flags)) {
    804		/*
    805		 * The call to go_sync should have cleared out the ail list.
    806		 * If there are still items, we have a problem. We ought to
    807		 * withdraw, but we can't because the withdraw code also uses
    808		 * glocks. Warn about the error, dump the glock, then fall
    809		 * through and wait for logd to do the withdraw for us.
    810		 */
    811		if ((atomic_read(&gl->gl_ail_count) != 0) &&
    812		    (!cmpxchg(&sdp->sd_log_error, 0, -EIO))) {
    813			gfs2_glock_assert_warn(gl,
    814					       !atomic_read(&gl->gl_ail_count));
    815			gfs2_dump_glock(NULL, gl, true);
    816		}
    817		glops->go_inval(gl, target == LM_ST_DEFERRED ? 0 : DIO_METADATA);
    818		clear_bit(GLF_INVALIDATE_IN_PROGRESS, &gl->gl_flags);
    819	}
    820
    821skip_inval:
    822	gfs2_glock_hold(gl);
    823	/*
    824	 * Check for an error encountered since we called go_sync and go_inval.
    825	 * If so, we can't withdraw from the glock code because the withdraw
    826	 * code itself uses glocks (see function signal_our_withdraw) to
    827	 * change the mount to read-only. Most importantly, we must not call
    828	 * dlm to unlock the glock until the journal is in a known good state
    829	 * (after journal replay) otherwise other nodes may use the object
    830	 * (rgrp or dinode) and then later, journal replay will corrupt the
    831	 * file system. The best we can do here is wait for the logd daemon
    832	 * to see sd_log_error and withdraw, and in the meantime, requeue the
    833	 * work for later.
    834	 *
    835	 * We make a special exception for some system glocks, such as the
    836	 * system statfs inode glock, which needs to be granted before the
    837	 * gfs2_quotad daemon can exit, and that exit needs to finish before
    838	 * we can unmount the withdrawn file system.
    839	 *
    840	 * However, if we're just unlocking the lock (say, for unmount, when
    841	 * gfs2_gl_hash_clear calls clear_glock) and recovery is complete
    842	 * then it's okay to tell dlm to unlock it.
    843	 */
    844	if (unlikely(sdp->sd_log_error && !gfs2_withdrawn(sdp)))
    845		gfs2_withdraw_delayed(sdp);
    846	if (glock_blocked_by_withdraw(gl) &&
    847	    (target != LM_ST_UNLOCKED ||
    848	     test_bit(SDF_WITHDRAW_RECOVERY, &sdp->sd_flags))) {
    849		if (!is_system_glock(gl)) {
    850			gfs2_glock_queue_work(gl, GL_GLOCK_DFT_HOLD);
    851			goto out;
    852		} else {
    853			clear_bit(GLF_INVALIDATE_IN_PROGRESS, &gl->gl_flags);
    854		}
    855	}
    856
    857	if (sdp->sd_lockstruct.ls_ops->lm_lock)	{
    858		/* lock_dlm */
    859		ret = sdp->sd_lockstruct.ls_ops->lm_lock(gl, target, lck_flags);
    860		if (ret == -EINVAL && gl->gl_target == LM_ST_UNLOCKED &&
    861		    target == LM_ST_UNLOCKED &&
    862		    test_bit(SDF_SKIP_DLM_UNLOCK, &sdp->sd_flags)) {
    863			finish_xmote(gl, target);
    864			gfs2_glock_queue_work(gl, 0);
    865		} else if (ret) {
    866			fs_err(sdp, "lm_lock ret %d\n", ret);
    867			GLOCK_BUG_ON(gl, !gfs2_withdrawn(sdp));
    868		}
    869	} else { /* lock_nolock */
    870		finish_xmote(gl, target);
    871		gfs2_glock_queue_work(gl, 0);
    872	}
    873out:
    874	spin_lock(&gl->gl_lockref.lock);
    875}
    876
    877/**
    878 * run_queue - do all outstanding tasks related to a glock
    879 * @gl: The glock in question
    880 * @nonblock: True if we must not block in run_queue
    881 *
    882 */
    883
    884static void run_queue(struct gfs2_glock *gl, const int nonblock)
    885__releases(&gl->gl_lockref.lock)
    886__acquires(&gl->gl_lockref.lock)
    887{
    888	struct gfs2_holder *gh = NULL;
    889	int ret;
    890
    891	if (test_and_set_bit(GLF_LOCK, &gl->gl_flags))
    892		return;
    893
    894	GLOCK_BUG_ON(gl, test_bit(GLF_DEMOTE_IN_PROGRESS, &gl->gl_flags));
    895
    896	if (test_bit(GLF_DEMOTE, &gl->gl_flags) &&
    897	    gl->gl_demote_state != gl->gl_state) {
    898		if (find_first_holder(gl))
    899			goto out_unlock;
    900		if (nonblock)
    901			goto out_sched;
    902		set_bit(GLF_DEMOTE_IN_PROGRESS, &gl->gl_flags);
    903		GLOCK_BUG_ON(gl, gl->gl_demote_state == LM_ST_EXCLUSIVE);
    904		gl->gl_target = gl->gl_demote_state;
    905	} else {
    906		if (test_bit(GLF_DEMOTE, &gl->gl_flags))
    907			gfs2_demote_wake(gl);
    908		ret = do_promote(gl);
    909		if (ret == 0)
    910			goto out_unlock;
    911		if (ret == 2)
    912			goto out;
    913		gh = find_first_waiter(gl);
    914		gl->gl_target = gh->gh_state;
    915		if (!(gh->gh_flags & (LM_FLAG_TRY | LM_FLAG_TRY_1CB)))
    916			do_error(gl, 0); /* Fail queued try locks */
    917	}
    918	do_xmote(gl, gh, gl->gl_target);
    919out:
    920	return;
    921
    922out_sched:
    923	clear_bit(GLF_LOCK, &gl->gl_flags);
    924	smp_mb__after_atomic();
    925	gl->gl_lockref.count++;
    926	__gfs2_glock_queue_work(gl, 0);
    927	return;
    928
    929out_unlock:
    930	clear_bit(GLF_LOCK, &gl->gl_flags);
    931	smp_mb__after_atomic();
    932	return;
    933}
    934
    935void gfs2_inode_remember_delete(struct gfs2_glock *gl, u64 generation)
    936{
    937	struct gfs2_inode_lvb *ri = (void *)gl->gl_lksb.sb_lvbptr;
    938
    939	if (ri->ri_magic == 0)
    940		ri->ri_magic = cpu_to_be32(GFS2_MAGIC);
    941	if (ri->ri_magic == cpu_to_be32(GFS2_MAGIC))
    942		ri->ri_generation_deleted = cpu_to_be64(generation);
    943}
    944
    945bool gfs2_inode_already_deleted(struct gfs2_glock *gl, u64 generation)
    946{
    947	struct gfs2_inode_lvb *ri = (void *)gl->gl_lksb.sb_lvbptr;
    948
    949	if (ri->ri_magic != cpu_to_be32(GFS2_MAGIC))
    950		return false;
    951	return generation <= be64_to_cpu(ri->ri_generation_deleted);
    952}
    953
    954static void gfs2_glock_poke(struct gfs2_glock *gl)
    955{
    956	int flags = LM_FLAG_TRY_1CB | LM_FLAG_ANY | GL_SKIP;
    957	struct gfs2_holder gh;
    958	int error;
    959
    960	__gfs2_holder_init(gl, LM_ST_SHARED, flags, &gh, _RET_IP_);
    961	error = gfs2_glock_nq(&gh);
    962	if (!error)
    963		gfs2_glock_dq(&gh);
    964	gfs2_holder_uninit(&gh);
    965}
    966
    967static bool gfs2_try_evict(struct gfs2_glock *gl)
    968{
    969	struct gfs2_inode *ip;
    970	bool evicted = false;
    971
    972	/*
    973	 * If there is contention on the iopen glock and we have an inode, try
    974	 * to grab and release the inode so that it can be evicted.  This will
    975	 * allow the remote node to go ahead and delete the inode without us
    976	 * having to do it, which will avoid rgrp glock thrashing.
    977	 *
    978	 * The remote node is likely still holding the corresponding inode
    979	 * glock, so it will run before we get to verify that the delete has
    980	 * happened below.
    981	 */
    982	spin_lock(&gl->gl_lockref.lock);
    983	ip = gl->gl_object;
    984	if (ip && !igrab(&ip->i_inode))
    985		ip = NULL;
    986	spin_unlock(&gl->gl_lockref.lock);
    987	if (ip) {
    988		struct gfs2_glock *inode_gl = NULL;
    989
    990		gl->gl_no_formal_ino = ip->i_no_formal_ino;
    991		set_bit(GIF_DEFERRED_DELETE, &ip->i_flags);
    992		d_prune_aliases(&ip->i_inode);
    993		iput(&ip->i_inode);
    994
    995		/* If the inode was evicted, gl->gl_object will now be NULL. */
    996		spin_lock(&gl->gl_lockref.lock);
    997		ip = gl->gl_object;
    998		if (ip) {
    999			inode_gl = ip->i_gl;
   1000			lockref_get(&inode_gl->gl_lockref);
   1001			clear_bit(GIF_DEFERRED_DELETE, &ip->i_flags);
   1002		}
   1003		spin_unlock(&gl->gl_lockref.lock);
   1004		if (inode_gl) {
   1005			gfs2_glock_poke(inode_gl);
   1006			gfs2_glock_put(inode_gl);
   1007		}
   1008		evicted = !ip;
   1009	}
   1010	return evicted;
   1011}
   1012
   1013static void delete_work_func(struct work_struct *work)
   1014{
   1015	struct delayed_work *dwork = to_delayed_work(work);
   1016	struct gfs2_glock *gl = container_of(dwork, struct gfs2_glock, gl_delete);
   1017	struct gfs2_sbd *sdp = gl->gl_name.ln_sbd;
   1018	struct inode *inode;
   1019	u64 no_addr = gl->gl_name.ln_number;
   1020
   1021	spin_lock(&gl->gl_lockref.lock);
   1022	clear_bit(GLF_PENDING_DELETE, &gl->gl_flags);
   1023	spin_unlock(&gl->gl_lockref.lock);
   1024
   1025	if (test_bit(GLF_DEMOTE, &gl->gl_flags)) {
   1026		/*
   1027		 * If we can evict the inode, give the remote node trying to
   1028		 * delete the inode some time before verifying that the delete
   1029		 * has happened.  Otherwise, if we cause contention on the inode glock
   1030		 * immediately, the remote node will think that we still have
   1031		 * the inode in use, and so it will give up waiting.
   1032		 *
   1033		 * If we can't evict the inode, signal to the remote node that
   1034		 * the inode is still in use.  We'll later try to delete the
   1035		 * inode locally in gfs2_evict_inode.
   1036		 *
   1037		 * FIXME: We only need to verify that the remote node has
   1038		 * deleted the inode because nodes before this remote delete
   1039		 * rework won't cooperate.  At a later time, when we no longer
   1040		 * care about compatibility with such nodes, we can skip this
   1041		 * step entirely.
   1042		 */
   1043		if (gfs2_try_evict(gl)) {
   1044			if (gfs2_queue_delete_work(gl, 5 * HZ))
   1045				return;
   1046		}
   1047		goto out;
   1048	}
   1049
   1050	inode = gfs2_lookup_by_inum(sdp, no_addr, gl->gl_no_formal_ino,
   1051				    GFS2_BLKST_UNLINKED);
   1052	if (!IS_ERR_OR_NULL(inode)) {
   1053		d_prune_aliases(inode);
   1054		iput(inode);
   1055	}
   1056out:
   1057	gfs2_glock_put(gl);
   1058}
   1059
   1060static void glock_work_func(struct work_struct *work)
   1061{
   1062	unsigned long delay = 0;
   1063	struct gfs2_glock *gl = container_of(work, struct gfs2_glock, gl_work.work);
   1064	unsigned int drop_refs = 1;
   1065
   1066	if (test_and_clear_bit(GLF_REPLY_PENDING, &gl->gl_flags)) {
   1067		finish_xmote(gl, gl->gl_reply);
   1068		drop_refs++;
   1069	}
   1070	spin_lock(&gl->gl_lockref.lock);
   1071	if (test_bit(GLF_PENDING_DEMOTE, &gl->gl_flags) &&
   1072	    gl->gl_state != LM_ST_UNLOCKED &&
   1073	    gl->gl_demote_state != LM_ST_EXCLUSIVE) {
   1074		unsigned long holdtime, now = jiffies;
   1075
   1076		holdtime = gl->gl_tchange + gl->gl_hold_time;
   1077		if (time_before(now, holdtime))
   1078			delay = holdtime - now;
   1079
   1080		if (!delay) {
   1081			clear_bit(GLF_PENDING_DEMOTE, &gl->gl_flags);
   1082			gfs2_set_demote(gl);
   1083		}
   1084	}
   1085	run_queue(gl, 0);
   1086	if (delay) {
   1087		/* Keep one glock reference for the work we requeue. */
   1088		drop_refs--;
   1089		if (gl->gl_name.ln_type != LM_TYPE_INODE)
   1090			delay = 0;
   1091		__gfs2_glock_queue_work(gl, delay);
   1092	}
   1093
   1094	/*
   1095	 * Drop the remaining glock references manually here. (Mind that
   1096	 * __gfs2_glock_queue_work depends on the lockref spinlock begin held
   1097	 * here as well.)
   1098	 */
   1099	gl->gl_lockref.count -= drop_refs;
   1100	if (!gl->gl_lockref.count) {
   1101		__gfs2_glock_put(gl);
   1102		return;
   1103	}
   1104	spin_unlock(&gl->gl_lockref.lock);
   1105}
   1106
   1107static struct gfs2_glock *find_insert_glock(struct lm_lockname *name,
   1108					    struct gfs2_glock *new)
   1109{
   1110	struct wait_glock_queue wait;
   1111	wait_queue_head_t *wq = glock_waitqueue(name);
   1112	struct gfs2_glock *gl;
   1113
   1114	wait.name = name;
   1115	init_wait(&wait.wait);
   1116	wait.wait.func = glock_wake_function;
   1117
   1118again:
   1119	prepare_to_wait(wq, &wait.wait, TASK_UNINTERRUPTIBLE);
   1120	rcu_read_lock();
   1121	if (new) {
   1122		gl = rhashtable_lookup_get_insert_fast(&gl_hash_table,
   1123			&new->gl_node, ht_parms);
   1124		if (IS_ERR(gl))
   1125			goto out;
   1126	} else {
   1127		gl = rhashtable_lookup_fast(&gl_hash_table,
   1128			name, ht_parms);
   1129	}
   1130	if (gl && !lockref_get_not_dead(&gl->gl_lockref)) {
   1131		rcu_read_unlock();
   1132		schedule();
   1133		goto again;
   1134	}
   1135out:
   1136	rcu_read_unlock();
   1137	finish_wait(wq, &wait.wait);
   1138	return gl;
   1139}
   1140
   1141/**
   1142 * gfs2_glock_get() - Get a glock, or create one if one doesn't exist
   1143 * @sdp: The GFS2 superblock
   1144 * @number: the lock number
   1145 * @glops: The glock_operations to use
   1146 * @create: If 0, don't create the glock if it doesn't exist
   1147 * @glp: the glock is returned here
   1148 *
   1149 * This does not lock a glock, just finds/creates structures for one.
   1150 *
   1151 * Returns: errno
   1152 */
   1153
   1154int gfs2_glock_get(struct gfs2_sbd *sdp, u64 number,
   1155		   const struct gfs2_glock_operations *glops, int create,
   1156		   struct gfs2_glock **glp)
   1157{
   1158	struct super_block *s = sdp->sd_vfs;
   1159	struct lm_lockname name = { .ln_number = number,
   1160				    .ln_type = glops->go_type,
   1161				    .ln_sbd = sdp };
   1162	struct gfs2_glock *gl, *tmp;
   1163	struct address_space *mapping;
   1164	int ret = 0;
   1165
   1166	gl = find_insert_glock(&name, NULL);
   1167	if (gl) {
   1168		*glp = gl;
   1169		return 0;
   1170	}
   1171	if (!create)
   1172		return -ENOENT;
   1173
   1174	if (glops->go_flags & GLOF_ASPACE) {
   1175		struct gfs2_glock_aspace *gla =
   1176			kmem_cache_alloc(gfs2_glock_aspace_cachep, GFP_NOFS);
   1177		if (!gla)
   1178			return -ENOMEM;
   1179		gl = &gla->glock;
   1180	} else {
   1181		gl = kmem_cache_alloc(gfs2_glock_cachep, GFP_NOFS);
   1182		if (!gl)
   1183			return -ENOMEM;
   1184	}
   1185	memset(&gl->gl_lksb, 0, sizeof(struct dlm_lksb));
   1186	gl->gl_ops = glops;
   1187
   1188	if (glops->go_flags & GLOF_LVB) {
   1189		gl->gl_lksb.sb_lvbptr = kzalloc(GDLM_LVB_SIZE, GFP_NOFS);
   1190		if (!gl->gl_lksb.sb_lvbptr) {
   1191			gfs2_glock_dealloc(&gl->gl_rcu);
   1192			return -ENOMEM;
   1193		}
   1194	}
   1195
   1196	atomic_inc(&sdp->sd_glock_disposal);
   1197	gl->gl_node.next = NULL;
   1198	gl->gl_flags = glops->go_instantiate ? BIT(GLF_INSTANTIATE_NEEDED) : 0;
   1199	gl->gl_name = name;
   1200	lockdep_set_subclass(&gl->gl_lockref.lock, glops->go_subclass);
   1201	gl->gl_lockref.count = 1;
   1202	gl->gl_state = LM_ST_UNLOCKED;
   1203	gl->gl_target = LM_ST_UNLOCKED;
   1204	gl->gl_demote_state = LM_ST_EXCLUSIVE;
   1205	gl->gl_dstamp = 0;
   1206	preempt_disable();
   1207	/* We use the global stats to estimate the initial per-glock stats */
   1208	gl->gl_stats = this_cpu_ptr(sdp->sd_lkstats)->lkstats[glops->go_type];
   1209	preempt_enable();
   1210	gl->gl_stats.stats[GFS2_LKS_DCOUNT] = 0;
   1211	gl->gl_stats.stats[GFS2_LKS_QCOUNT] = 0;
   1212	gl->gl_tchange = jiffies;
   1213	gl->gl_object = NULL;
   1214	gl->gl_hold_time = GL_GLOCK_DFT_HOLD;
   1215	INIT_DELAYED_WORK(&gl->gl_work, glock_work_func);
   1216	if (gl->gl_name.ln_type == LM_TYPE_IOPEN)
   1217		INIT_DELAYED_WORK(&gl->gl_delete, delete_work_func);
   1218
   1219	mapping = gfs2_glock2aspace(gl);
   1220	if (mapping) {
   1221                mapping->a_ops = &gfs2_meta_aops;
   1222		mapping->host = s->s_bdev->bd_inode;
   1223		mapping->flags = 0;
   1224		mapping_set_gfp_mask(mapping, GFP_NOFS);
   1225		mapping->private_data = NULL;
   1226		mapping->writeback_index = 0;
   1227	}
   1228
   1229	tmp = find_insert_glock(&name, gl);
   1230	if (!tmp) {
   1231		*glp = gl;
   1232		goto out;
   1233	}
   1234	if (IS_ERR(tmp)) {
   1235		ret = PTR_ERR(tmp);
   1236		goto out_free;
   1237	}
   1238	*glp = tmp;
   1239
   1240out_free:
   1241	gfs2_glock_dealloc(&gl->gl_rcu);
   1242	if (atomic_dec_and_test(&sdp->sd_glock_disposal))
   1243		wake_up(&sdp->sd_glock_wait);
   1244
   1245out:
   1246	return ret;
   1247}
   1248
   1249/**
   1250 * __gfs2_holder_init - initialize a struct gfs2_holder in the default way
   1251 * @gl: the glock
   1252 * @state: the state we're requesting
   1253 * @flags: the modifier flags
   1254 * @gh: the holder structure
   1255 *
   1256 */
   1257
   1258void __gfs2_holder_init(struct gfs2_glock *gl, unsigned int state, u16 flags,
   1259			struct gfs2_holder *gh, unsigned long ip)
   1260{
   1261	INIT_LIST_HEAD(&gh->gh_list);
   1262	gh->gh_gl = gl;
   1263	gh->gh_ip = ip;
   1264	gh->gh_owner_pid = get_pid(task_pid(current));
   1265	gh->gh_state = state;
   1266	gh->gh_flags = flags;
   1267	gh->gh_iflags = 0;
   1268	gfs2_glock_hold(gl);
   1269}
   1270
   1271/**
   1272 * gfs2_holder_reinit - reinitialize a struct gfs2_holder so we can requeue it
   1273 * @state: the state we're requesting
   1274 * @flags: the modifier flags
   1275 * @gh: the holder structure
   1276 *
   1277 * Don't mess with the glock.
   1278 *
   1279 */
   1280
   1281void gfs2_holder_reinit(unsigned int state, u16 flags, struct gfs2_holder *gh)
   1282{
   1283	gh->gh_state = state;
   1284	gh->gh_flags = flags;
   1285	gh->gh_iflags = 0;
   1286	gh->gh_ip = _RET_IP_;
   1287	put_pid(gh->gh_owner_pid);
   1288	gh->gh_owner_pid = get_pid(task_pid(current));
   1289}
   1290
   1291/**
   1292 * gfs2_holder_uninit - uninitialize a holder structure (drop glock reference)
   1293 * @gh: the holder structure
   1294 *
   1295 */
   1296
   1297void gfs2_holder_uninit(struct gfs2_holder *gh)
   1298{
   1299	put_pid(gh->gh_owner_pid);
   1300	gfs2_glock_put(gh->gh_gl);
   1301	gfs2_holder_mark_uninitialized(gh);
   1302	gh->gh_ip = 0;
   1303}
   1304
   1305static void gfs2_glock_update_hold_time(struct gfs2_glock *gl,
   1306					unsigned long start_time)
   1307{
   1308	/* Have we waited longer that a second? */
   1309	if (time_after(jiffies, start_time + HZ)) {
   1310		/* Lengthen the minimum hold time. */
   1311		gl->gl_hold_time = min(gl->gl_hold_time + GL_GLOCK_HOLD_INCR,
   1312				       GL_GLOCK_MAX_HOLD);
   1313	}
   1314}
   1315
   1316/**
   1317 * gfs2_glock_wait - wait on a glock acquisition
   1318 * @gh: the glock holder
   1319 *
   1320 * Returns: 0 on success
   1321 */
   1322
   1323int gfs2_glock_wait(struct gfs2_holder *gh)
   1324{
   1325	unsigned long start_time = jiffies;
   1326
   1327	might_sleep();
   1328	wait_on_bit(&gh->gh_iflags, HIF_WAIT, TASK_UNINTERRUPTIBLE);
   1329	gfs2_glock_update_hold_time(gh->gh_gl, start_time);
   1330	return gh->gh_error;
   1331}
   1332
   1333static int glocks_pending(unsigned int num_gh, struct gfs2_holder *ghs)
   1334{
   1335	int i;
   1336
   1337	for (i = 0; i < num_gh; i++)
   1338		if (test_bit(HIF_WAIT, &ghs[i].gh_iflags))
   1339			return 1;
   1340	return 0;
   1341}
   1342
   1343/**
   1344 * gfs2_glock_async_wait - wait on multiple asynchronous glock acquisitions
   1345 * @num_gh: the number of holders in the array
   1346 * @ghs: the glock holder array
   1347 *
   1348 * Returns: 0 on success, meaning all glocks have been granted and are held.
   1349 *          -ESTALE if the request timed out, meaning all glocks were released,
   1350 *          and the caller should retry the operation.
   1351 */
   1352
   1353int gfs2_glock_async_wait(unsigned int num_gh, struct gfs2_holder *ghs)
   1354{
   1355	struct gfs2_sbd *sdp = ghs[0].gh_gl->gl_name.ln_sbd;
   1356	int i, ret = 0, timeout = 0;
   1357	unsigned long start_time = jiffies;
   1358	bool keep_waiting;
   1359
   1360	might_sleep();
   1361	/*
   1362	 * Total up the (minimum hold time * 2) of all glocks and use that to
   1363	 * determine the max amount of time we should wait.
   1364	 */
   1365	for (i = 0; i < num_gh; i++)
   1366		timeout += ghs[i].gh_gl->gl_hold_time << 1;
   1367
   1368wait_for_dlm:
   1369	if (!wait_event_timeout(sdp->sd_async_glock_wait,
   1370				!glocks_pending(num_gh, ghs), timeout))
   1371		ret = -ESTALE; /* request timed out. */
   1372
   1373	/*
   1374	 * If dlm granted all our requests, we need to adjust the glock
   1375	 * minimum hold time values according to how long we waited.
   1376	 *
   1377	 * If our request timed out, we need to repeatedly release any held
   1378	 * glocks we acquired thus far to allow dlm to acquire the remaining
   1379	 * glocks without deadlocking.  We cannot currently cancel outstanding
   1380	 * glock acquisitions.
   1381	 *
   1382	 * The HIF_WAIT bit tells us which requests still need a response from
   1383	 * dlm.
   1384	 *
   1385	 * If dlm sent us any errors, we return the first error we find.
   1386	 */
   1387	keep_waiting = false;
   1388	for (i = 0; i < num_gh; i++) {
   1389		/* Skip holders we have already dequeued below. */
   1390		if (!gfs2_holder_queued(&ghs[i]))
   1391			continue;
   1392		/* Skip holders with a pending DLM response. */
   1393		if (test_bit(HIF_WAIT, &ghs[i].gh_iflags)) {
   1394			keep_waiting = true;
   1395			continue;
   1396		}
   1397
   1398		if (test_bit(HIF_HOLDER, &ghs[i].gh_iflags)) {
   1399			if (ret == -ESTALE)
   1400				gfs2_glock_dq(&ghs[i]);
   1401			else
   1402				gfs2_glock_update_hold_time(ghs[i].gh_gl,
   1403							    start_time);
   1404		}
   1405		if (!ret)
   1406			ret = ghs[i].gh_error;
   1407	}
   1408
   1409	if (keep_waiting)
   1410		goto wait_for_dlm;
   1411
   1412	/*
   1413	 * At this point, we've either acquired all locks or released them all.
   1414	 */
   1415	return ret;
   1416}
   1417
   1418/**
   1419 * handle_callback - process a demote request
   1420 * @gl: the glock
   1421 * @state: the state the caller wants us to change to
   1422 * @delay: zero to demote immediately; otherwise pending demote
   1423 * @remote: true if this came from a different cluster node
   1424 *
   1425 * There are only two requests that we are going to see in actual
   1426 * practise: LM_ST_SHARED and LM_ST_UNLOCKED
   1427 */
   1428
   1429static void handle_callback(struct gfs2_glock *gl, unsigned int state,
   1430			    unsigned long delay, bool remote)
   1431{
   1432	if (delay)
   1433		set_bit(GLF_PENDING_DEMOTE, &gl->gl_flags);
   1434	else
   1435		gfs2_set_demote(gl);
   1436	if (gl->gl_demote_state == LM_ST_EXCLUSIVE) {
   1437		gl->gl_demote_state = state;
   1438		gl->gl_demote_time = jiffies;
   1439	} else if (gl->gl_demote_state != LM_ST_UNLOCKED &&
   1440			gl->gl_demote_state != state) {
   1441		gl->gl_demote_state = LM_ST_UNLOCKED;
   1442	}
   1443	if (gl->gl_ops->go_callback)
   1444		gl->gl_ops->go_callback(gl, remote);
   1445	trace_gfs2_demote_rq(gl, remote);
   1446}
   1447
   1448void gfs2_print_dbg(struct seq_file *seq, const char *fmt, ...)
   1449{
   1450	struct va_format vaf;
   1451	va_list args;
   1452
   1453	va_start(args, fmt);
   1454
   1455	if (seq) {
   1456		seq_vprintf(seq, fmt, args);
   1457	} else {
   1458		vaf.fmt = fmt;
   1459		vaf.va = &args;
   1460
   1461		pr_err("%pV", &vaf);
   1462	}
   1463
   1464	va_end(args);
   1465}
   1466
   1467/**
   1468 * add_to_queue - Add a holder to the wait queue (but look for recursion)
   1469 * @gh: the holder structure to add
   1470 *
   1471 * Eventually we should move the recursive locking trap to a
   1472 * debugging option or something like that. This is the fast
   1473 * path and needs to have the minimum number of distractions.
   1474 * 
   1475 */
   1476
   1477static inline void add_to_queue(struct gfs2_holder *gh)
   1478__releases(&gl->gl_lockref.lock)
   1479__acquires(&gl->gl_lockref.lock)
   1480{
   1481	struct gfs2_glock *gl = gh->gh_gl;
   1482	struct gfs2_sbd *sdp = gl->gl_name.ln_sbd;
   1483	struct list_head *insert_pt = NULL;
   1484	struct gfs2_holder *gh2;
   1485	int try_futile = 0;
   1486
   1487	GLOCK_BUG_ON(gl, gh->gh_owner_pid == NULL);
   1488	if (test_and_set_bit(HIF_WAIT, &gh->gh_iflags))
   1489		GLOCK_BUG_ON(gl, true);
   1490
   1491	if (gh->gh_flags & (LM_FLAG_TRY | LM_FLAG_TRY_1CB)) {
   1492		if (test_bit(GLF_LOCK, &gl->gl_flags)) {
   1493			struct gfs2_holder *first_gh;
   1494
   1495			first_gh = find_first_strong_holder(gl);
   1496			try_futile = !may_grant(gl, first_gh, gh);
   1497		}
   1498		if (test_bit(GLF_INVALIDATE_IN_PROGRESS, &gl->gl_flags))
   1499			goto fail;
   1500	}
   1501
   1502	list_for_each_entry(gh2, &gl->gl_holders, gh_list) {
   1503		if (unlikely(gh2->gh_owner_pid == gh->gh_owner_pid &&
   1504		    (gh->gh_gl->gl_ops->go_type != LM_TYPE_FLOCK) &&
   1505		    !test_bit(HIF_MAY_DEMOTE, &gh2->gh_iflags)))
   1506			goto trap_recursive;
   1507		if (try_futile &&
   1508		    !(gh2->gh_flags & (LM_FLAG_TRY | LM_FLAG_TRY_1CB))) {
   1509fail:
   1510			gh->gh_error = GLR_TRYFAILED;
   1511			gfs2_holder_wake(gh);
   1512			return;
   1513		}
   1514		if (test_bit(HIF_HOLDER, &gh2->gh_iflags))
   1515			continue;
   1516		if (unlikely((gh->gh_flags & LM_FLAG_PRIORITY) && !insert_pt))
   1517			insert_pt = &gh2->gh_list;
   1518	}
   1519	trace_gfs2_glock_queue(gh, 1);
   1520	gfs2_glstats_inc(gl, GFS2_LKS_QCOUNT);
   1521	gfs2_sbstats_inc(gl, GFS2_LKS_QCOUNT);
   1522	if (likely(insert_pt == NULL)) {
   1523		list_add_tail(&gh->gh_list, &gl->gl_holders);
   1524		if (unlikely(gh->gh_flags & LM_FLAG_PRIORITY))
   1525			goto do_cancel;
   1526		return;
   1527	}
   1528	list_add_tail(&gh->gh_list, insert_pt);
   1529do_cancel:
   1530	gh = list_first_entry(&gl->gl_holders, struct gfs2_holder, gh_list);
   1531	if (!(gh->gh_flags & LM_FLAG_PRIORITY)) {
   1532		spin_unlock(&gl->gl_lockref.lock);
   1533		if (sdp->sd_lockstruct.ls_ops->lm_cancel)
   1534			sdp->sd_lockstruct.ls_ops->lm_cancel(gl);
   1535		spin_lock(&gl->gl_lockref.lock);
   1536	}
   1537	return;
   1538
   1539trap_recursive:
   1540	fs_err(sdp, "original: %pSR\n", (void *)gh2->gh_ip);
   1541	fs_err(sdp, "pid: %d\n", pid_nr(gh2->gh_owner_pid));
   1542	fs_err(sdp, "lock type: %d req lock state : %d\n",
   1543	       gh2->gh_gl->gl_name.ln_type, gh2->gh_state);
   1544	fs_err(sdp, "new: %pSR\n", (void *)gh->gh_ip);
   1545	fs_err(sdp, "pid: %d\n", pid_nr(gh->gh_owner_pid));
   1546	fs_err(sdp, "lock type: %d req lock state : %d\n",
   1547	       gh->gh_gl->gl_name.ln_type, gh->gh_state);
   1548	gfs2_dump_glock(NULL, gl, true);
   1549	BUG();
   1550}
   1551
   1552/**
   1553 * gfs2_glock_nq - enqueue a struct gfs2_holder onto a glock (acquire a glock)
   1554 * @gh: the holder structure
   1555 *
   1556 * if (gh->gh_flags & GL_ASYNC), this never returns an error
   1557 *
   1558 * Returns: 0, GLR_TRYFAILED, or errno on failure
   1559 */
   1560
   1561int gfs2_glock_nq(struct gfs2_holder *gh)
   1562{
   1563	struct gfs2_glock *gl = gh->gh_gl;
   1564	int error = 0;
   1565
   1566	if (glock_blocked_by_withdraw(gl) && !(gh->gh_flags & LM_FLAG_NOEXP))
   1567		return -EIO;
   1568
   1569	if (test_bit(GLF_LRU, &gl->gl_flags))
   1570		gfs2_glock_remove_from_lru(gl);
   1571
   1572	gh->gh_error = 0;
   1573	spin_lock(&gl->gl_lockref.lock);
   1574	add_to_queue(gh);
   1575	if (unlikely((LM_FLAG_NOEXP & gh->gh_flags) &&
   1576		     test_and_clear_bit(GLF_FROZEN, &gl->gl_flags))) {
   1577		set_bit(GLF_REPLY_PENDING, &gl->gl_flags);
   1578		gl->gl_lockref.count++;
   1579		__gfs2_glock_queue_work(gl, 0);
   1580	}
   1581	run_queue(gl, 1);
   1582	spin_unlock(&gl->gl_lockref.lock);
   1583
   1584	if (!(gh->gh_flags & GL_ASYNC))
   1585		error = gfs2_glock_wait(gh);
   1586
   1587	return error;
   1588}
   1589
   1590/**
   1591 * gfs2_glock_poll - poll to see if an async request has been completed
   1592 * @gh: the holder
   1593 *
   1594 * Returns: 1 if the request is ready to be gfs2_glock_wait()ed on
   1595 */
   1596
   1597int gfs2_glock_poll(struct gfs2_holder *gh)
   1598{
   1599	return test_bit(HIF_WAIT, &gh->gh_iflags) ? 0 : 1;
   1600}
   1601
   1602static inline bool needs_demote(struct gfs2_glock *gl)
   1603{
   1604	return (test_bit(GLF_DEMOTE, &gl->gl_flags) ||
   1605		test_bit(GLF_PENDING_DEMOTE, &gl->gl_flags));
   1606}
   1607
   1608static void __gfs2_glock_dq(struct gfs2_holder *gh)
   1609{
   1610	struct gfs2_glock *gl = gh->gh_gl;
   1611	struct gfs2_sbd *sdp = gl->gl_name.ln_sbd;
   1612	unsigned delay = 0;
   1613	int fast_path = 0;
   1614
   1615	/*
   1616	 * This while loop is similar to function demote_incompat_holders:
   1617	 * If the glock is due to be demoted (which may be from another node
   1618	 * or even if this holder is GL_NOCACHE), the weak holders are
   1619	 * demoted as well, allowing the glock to be demoted.
   1620	 */
   1621	while (gh) {
   1622		/*
   1623		 * If we're in the process of file system withdraw, we cannot
   1624		 * just dequeue any glocks until our journal is recovered, lest
   1625		 * we introduce file system corruption. We need two exceptions
   1626		 * to this rule: We need to allow unlocking of nondisk glocks
   1627		 * and the glock for our own journal that needs recovery.
   1628		 */
   1629		if (test_bit(SDF_WITHDRAW_RECOVERY, &sdp->sd_flags) &&
   1630		    glock_blocked_by_withdraw(gl) &&
   1631		    gh->gh_gl != sdp->sd_jinode_gl) {
   1632			sdp->sd_glock_dqs_held++;
   1633			spin_unlock(&gl->gl_lockref.lock);
   1634			might_sleep();
   1635			wait_on_bit(&sdp->sd_flags, SDF_WITHDRAW_RECOVERY,
   1636				    TASK_UNINTERRUPTIBLE);
   1637			spin_lock(&gl->gl_lockref.lock);
   1638		}
   1639
   1640		/*
   1641		 * This holder should not be cached, so mark it for demote.
   1642		 * Note: this should be done before the check for needs_demote
   1643		 * below.
   1644		 */
   1645		if (gh->gh_flags & GL_NOCACHE)
   1646			handle_callback(gl, LM_ST_UNLOCKED, 0, false);
   1647
   1648		list_del_init(&gh->gh_list);
   1649		clear_bit(HIF_HOLDER, &gh->gh_iflags);
   1650		trace_gfs2_glock_queue(gh, 0);
   1651
   1652		/*
   1653		 * If there hasn't been a demote request we are done.
   1654		 * (Let the remaining holders, if any, keep holding it.)
   1655		 */
   1656		if (!needs_demote(gl)) {
   1657			if (list_empty(&gl->gl_holders))
   1658				fast_path = 1;
   1659			break;
   1660		}
   1661		/*
   1662		 * If we have another strong holder (we cannot auto-demote)
   1663		 * we are done. It keeps holding it until it is done.
   1664		 */
   1665		if (find_first_strong_holder(gl))
   1666			break;
   1667
   1668		/*
   1669		 * If we have a weak holder at the head of the list, it
   1670		 * (and all others like it) must be auto-demoted. If there
   1671		 * are no more weak holders, we exit the while loop.
   1672		 */
   1673		gh = find_first_holder(gl);
   1674	}
   1675
   1676	if (!test_bit(GLF_LFLUSH, &gl->gl_flags) && demote_ok(gl))
   1677		gfs2_glock_add_to_lru(gl);
   1678
   1679	if (unlikely(!fast_path)) {
   1680		gl->gl_lockref.count++;
   1681		if (test_bit(GLF_PENDING_DEMOTE, &gl->gl_flags) &&
   1682		    !test_bit(GLF_DEMOTE, &gl->gl_flags) &&
   1683		    gl->gl_name.ln_type == LM_TYPE_INODE)
   1684			delay = gl->gl_hold_time;
   1685		__gfs2_glock_queue_work(gl, delay);
   1686	}
   1687}
   1688
   1689/**
   1690 * gfs2_glock_dq - dequeue a struct gfs2_holder from a glock (release a glock)
   1691 * @gh: the glock holder
   1692 *
   1693 */
   1694void gfs2_glock_dq(struct gfs2_holder *gh)
   1695{
   1696	struct gfs2_glock *gl = gh->gh_gl;
   1697
   1698	spin_lock(&gl->gl_lockref.lock);
   1699	if (list_is_first(&gh->gh_list, &gl->gl_holders) &&
   1700	    !test_bit(HIF_HOLDER, &gh->gh_iflags)) {
   1701		spin_unlock(&gl->gl_lockref.lock);
   1702		gl->gl_name.ln_sbd->sd_lockstruct.ls_ops->lm_cancel(gl);
   1703		wait_on_bit(&gh->gh_iflags, HIF_WAIT, TASK_UNINTERRUPTIBLE);
   1704		spin_lock(&gl->gl_lockref.lock);
   1705	}
   1706
   1707	__gfs2_glock_dq(gh);
   1708	spin_unlock(&gl->gl_lockref.lock);
   1709}
   1710
   1711void gfs2_glock_dq_wait(struct gfs2_holder *gh)
   1712{
   1713	struct gfs2_glock *gl = gh->gh_gl;
   1714	gfs2_glock_dq(gh);
   1715	might_sleep();
   1716	wait_on_bit(&gl->gl_flags, GLF_DEMOTE, TASK_UNINTERRUPTIBLE);
   1717}
   1718
   1719/**
   1720 * gfs2_glock_dq_uninit - dequeue a holder from a glock and initialize it
   1721 * @gh: the holder structure
   1722 *
   1723 */
   1724
   1725void gfs2_glock_dq_uninit(struct gfs2_holder *gh)
   1726{
   1727	gfs2_glock_dq(gh);
   1728	gfs2_holder_uninit(gh);
   1729}
   1730
   1731/**
   1732 * gfs2_glock_nq_num - acquire a glock based on lock number
   1733 * @sdp: the filesystem
   1734 * @number: the lock number
   1735 * @glops: the glock operations for the type of glock
   1736 * @state: the state to acquire the glock in
   1737 * @flags: modifier flags for the acquisition
   1738 * @gh: the struct gfs2_holder
   1739 *
   1740 * Returns: errno
   1741 */
   1742
   1743int gfs2_glock_nq_num(struct gfs2_sbd *sdp, u64 number,
   1744		      const struct gfs2_glock_operations *glops,
   1745		      unsigned int state, u16 flags, struct gfs2_holder *gh)
   1746{
   1747	struct gfs2_glock *gl;
   1748	int error;
   1749
   1750	error = gfs2_glock_get(sdp, number, glops, CREATE, &gl);
   1751	if (!error) {
   1752		error = gfs2_glock_nq_init(gl, state, flags, gh);
   1753		gfs2_glock_put(gl);
   1754	}
   1755
   1756	return error;
   1757}
   1758
   1759/**
   1760 * glock_compare - Compare two struct gfs2_glock structures for sorting
   1761 * @arg_a: the first structure
   1762 * @arg_b: the second structure
   1763 *
   1764 */
   1765
   1766static int glock_compare(const void *arg_a, const void *arg_b)
   1767{
   1768	const struct gfs2_holder *gh_a = *(const struct gfs2_holder **)arg_a;
   1769	const struct gfs2_holder *gh_b = *(const struct gfs2_holder **)arg_b;
   1770	const struct lm_lockname *a = &gh_a->gh_gl->gl_name;
   1771	const struct lm_lockname *b = &gh_b->gh_gl->gl_name;
   1772
   1773	if (a->ln_number > b->ln_number)
   1774		return 1;
   1775	if (a->ln_number < b->ln_number)
   1776		return -1;
   1777	BUG_ON(gh_a->gh_gl->gl_ops->go_type == gh_b->gh_gl->gl_ops->go_type);
   1778	return 0;
   1779}
   1780
   1781/**
   1782 * nq_m_sync - synchonously acquire more than one glock in deadlock free order
   1783 * @num_gh: the number of structures
   1784 * @ghs: an array of struct gfs2_holder structures
   1785 * @p: placeholder for the holder structure to pass back
   1786 *
   1787 * Returns: 0 on success (all glocks acquired),
   1788 *          errno on failure (no glocks acquired)
   1789 */
   1790
   1791static int nq_m_sync(unsigned int num_gh, struct gfs2_holder *ghs,
   1792		     struct gfs2_holder **p)
   1793{
   1794	unsigned int x;
   1795	int error = 0;
   1796
   1797	for (x = 0; x < num_gh; x++)
   1798		p[x] = &ghs[x];
   1799
   1800	sort(p, num_gh, sizeof(struct gfs2_holder *), glock_compare, NULL);
   1801
   1802	for (x = 0; x < num_gh; x++) {
   1803		p[x]->gh_flags &= ~(LM_FLAG_TRY | GL_ASYNC);
   1804
   1805		error = gfs2_glock_nq(p[x]);
   1806		if (error) {
   1807			while (x--)
   1808				gfs2_glock_dq(p[x]);
   1809			break;
   1810		}
   1811	}
   1812
   1813	return error;
   1814}
   1815
   1816/**
   1817 * gfs2_glock_nq_m - acquire multiple glocks
   1818 * @num_gh: the number of structures
   1819 * @ghs: an array of struct gfs2_holder structures
   1820 *
   1821 *
   1822 * Returns: 0 on success (all glocks acquired),
   1823 *          errno on failure (no glocks acquired)
   1824 */
   1825
   1826int gfs2_glock_nq_m(unsigned int num_gh, struct gfs2_holder *ghs)
   1827{
   1828	struct gfs2_holder *tmp[4];
   1829	struct gfs2_holder **pph = tmp;
   1830	int error = 0;
   1831
   1832	switch(num_gh) {
   1833	case 0:
   1834		return 0;
   1835	case 1:
   1836		ghs->gh_flags &= ~(LM_FLAG_TRY | GL_ASYNC);
   1837		return gfs2_glock_nq(ghs);
   1838	default:
   1839		if (num_gh <= 4)
   1840			break;
   1841		pph = kmalloc_array(num_gh, sizeof(struct gfs2_holder *),
   1842				    GFP_NOFS);
   1843		if (!pph)
   1844			return -ENOMEM;
   1845	}
   1846
   1847	error = nq_m_sync(num_gh, ghs, pph);
   1848
   1849	if (pph != tmp)
   1850		kfree(pph);
   1851
   1852	return error;
   1853}
   1854
   1855/**
   1856 * gfs2_glock_dq_m - release multiple glocks
   1857 * @num_gh: the number of structures
   1858 * @ghs: an array of struct gfs2_holder structures
   1859 *
   1860 */
   1861
   1862void gfs2_glock_dq_m(unsigned int num_gh, struct gfs2_holder *ghs)
   1863{
   1864	while (num_gh--)
   1865		gfs2_glock_dq(&ghs[num_gh]);
   1866}
   1867
   1868void gfs2_glock_cb(struct gfs2_glock *gl, unsigned int state)
   1869{
   1870	unsigned long delay = 0;
   1871	unsigned long holdtime;
   1872	unsigned long now = jiffies;
   1873
   1874	gfs2_glock_hold(gl);
   1875	spin_lock(&gl->gl_lockref.lock);
   1876	holdtime = gl->gl_tchange + gl->gl_hold_time;
   1877	if (!list_empty(&gl->gl_holders) &&
   1878	    gl->gl_name.ln_type == LM_TYPE_INODE) {
   1879		if (time_before(now, holdtime))
   1880			delay = holdtime - now;
   1881		if (test_bit(GLF_REPLY_PENDING, &gl->gl_flags))
   1882			delay = gl->gl_hold_time;
   1883	}
   1884	/*
   1885	 * Note 1: We cannot call demote_incompat_holders from handle_callback
   1886	 * or gfs2_set_demote due to recursion problems like: gfs2_glock_dq ->
   1887	 * handle_callback -> demote_incompat_holders -> gfs2_glock_dq
   1888	 * Plus, we only want to demote the holders if the request comes from
   1889	 * a remote cluster node because local holder conflicts are resolved
   1890	 * elsewhere.
   1891	 *
   1892	 * Note 2: if a remote node wants this glock in EX mode, lock_dlm will
   1893	 * request that we set our state to UNLOCKED. Here we mock up a holder
   1894	 * to make it look like someone wants the lock EX locally. Any SH
   1895	 * and DF requests should be able to share the lock without demoting.
   1896	 *
   1897	 * Note 3: We only want to demote the demoteable holders when there
   1898	 * are no more strong holders. The demoteable holders might as well
   1899	 * keep the glock until the last strong holder is done with it.
   1900	 */
   1901	if (!find_first_strong_holder(gl)) {
   1902		struct gfs2_holder mock_gh = {
   1903			.gh_gl = gl,
   1904			.gh_state = (state == LM_ST_UNLOCKED) ?
   1905				    LM_ST_EXCLUSIVE : state,
   1906			.gh_iflags = BIT(HIF_HOLDER)
   1907		};
   1908
   1909		demote_incompat_holders(gl, &mock_gh);
   1910	}
   1911	handle_callback(gl, state, delay, true);
   1912	__gfs2_glock_queue_work(gl, delay);
   1913	spin_unlock(&gl->gl_lockref.lock);
   1914}
   1915
   1916/**
   1917 * gfs2_should_freeze - Figure out if glock should be frozen
   1918 * @gl: The glock in question
   1919 *
   1920 * Glocks are not frozen if (a) the result of the dlm operation is
   1921 * an error, (b) the locking operation was an unlock operation or
   1922 * (c) if there is a "noexp" flagged request anywhere in the queue
   1923 *
   1924 * Returns: 1 if freezing should occur, 0 otherwise
   1925 */
   1926
   1927static int gfs2_should_freeze(const struct gfs2_glock *gl)
   1928{
   1929	const struct gfs2_holder *gh;
   1930
   1931	if (gl->gl_reply & ~LM_OUT_ST_MASK)
   1932		return 0;
   1933	if (gl->gl_target == LM_ST_UNLOCKED)
   1934		return 0;
   1935
   1936	list_for_each_entry(gh, &gl->gl_holders, gh_list) {
   1937		if (test_bit(HIF_HOLDER, &gh->gh_iflags))
   1938			continue;
   1939		if (LM_FLAG_NOEXP & gh->gh_flags)
   1940			return 0;
   1941	}
   1942
   1943	return 1;
   1944}
   1945
   1946/**
   1947 * gfs2_glock_complete - Callback used by locking
   1948 * @gl: Pointer to the glock
   1949 * @ret: The return value from the dlm
   1950 *
   1951 * The gl_reply field is under the gl_lockref.lock lock so that it is ok
   1952 * to use a bitfield shared with other glock state fields.
   1953 */
   1954
   1955void gfs2_glock_complete(struct gfs2_glock *gl, int ret)
   1956{
   1957	struct lm_lockstruct *ls = &gl->gl_name.ln_sbd->sd_lockstruct;
   1958
   1959	spin_lock(&gl->gl_lockref.lock);
   1960	gl->gl_reply = ret;
   1961
   1962	if (unlikely(test_bit(DFL_BLOCK_LOCKS, &ls->ls_recover_flags))) {
   1963		if (gfs2_should_freeze(gl)) {
   1964			set_bit(GLF_FROZEN, &gl->gl_flags);
   1965			spin_unlock(&gl->gl_lockref.lock);
   1966			return;
   1967		}
   1968	}
   1969
   1970	gl->gl_lockref.count++;
   1971	set_bit(GLF_REPLY_PENDING, &gl->gl_flags);
   1972	__gfs2_glock_queue_work(gl, 0);
   1973	spin_unlock(&gl->gl_lockref.lock);
   1974}
   1975
   1976static int glock_cmp(void *priv, const struct list_head *a,
   1977		     const struct list_head *b)
   1978{
   1979	struct gfs2_glock *gla, *glb;
   1980
   1981	gla = list_entry(a, struct gfs2_glock, gl_lru);
   1982	glb = list_entry(b, struct gfs2_glock, gl_lru);
   1983
   1984	if (gla->gl_name.ln_number > glb->gl_name.ln_number)
   1985		return 1;
   1986	if (gla->gl_name.ln_number < glb->gl_name.ln_number)
   1987		return -1;
   1988
   1989	return 0;
   1990}
   1991
   1992/**
   1993 * gfs2_dispose_glock_lru - Demote a list of glocks
   1994 * @list: The list to dispose of
   1995 *
   1996 * Disposing of glocks may involve disk accesses, so that here we sort
   1997 * the glocks by number (i.e. disk location of the inodes) so that if
   1998 * there are any such accesses, they'll be sent in order (mostly).
   1999 *
   2000 * Must be called under the lru_lock, but may drop and retake this
   2001 * lock. While the lru_lock is dropped, entries may vanish from the
   2002 * list, but no new entries will appear on the list (since it is
   2003 * private)
   2004 */
   2005
   2006static void gfs2_dispose_glock_lru(struct list_head *list)
   2007__releases(&lru_lock)
   2008__acquires(&lru_lock)
   2009{
   2010	struct gfs2_glock *gl;
   2011
   2012	list_sort(NULL, list, glock_cmp);
   2013
   2014	while(!list_empty(list)) {
   2015		gl = list_first_entry(list, struct gfs2_glock, gl_lru);
   2016		list_del_init(&gl->gl_lru);
   2017		clear_bit(GLF_LRU, &gl->gl_flags);
   2018		if (!spin_trylock(&gl->gl_lockref.lock)) {
   2019add_back_to_lru:
   2020			list_add(&gl->gl_lru, &lru_list);
   2021			set_bit(GLF_LRU, &gl->gl_flags);
   2022			atomic_inc(&lru_count);
   2023			continue;
   2024		}
   2025		if (test_and_set_bit(GLF_LOCK, &gl->gl_flags)) {
   2026			spin_unlock(&gl->gl_lockref.lock);
   2027			goto add_back_to_lru;
   2028		}
   2029		gl->gl_lockref.count++;
   2030		if (demote_ok(gl))
   2031			handle_callback(gl, LM_ST_UNLOCKED, 0, false);
   2032		WARN_ON(!test_and_clear_bit(GLF_LOCK, &gl->gl_flags));
   2033		__gfs2_glock_queue_work(gl, 0);
   2034		spin_unlock(&gl->gl_lockref.lock);
   2035		cond_resched_lock(&lru_lock);
   2036	}
   2037}
   2038
   2039/**
   2040 * gfs2_scan_glock_lru - Scan the LRU looking for locks to demote
   2041 * @nr: The number of entries to scan
   2042 *
   2043 * This function selects the entries on the LRU which are able to
   2044 * be demoted, and then kicks off the process by calling
   2045 * gfs2_dispose_glock_lru() above.
   2046 */
   2047
   2048static long gfs2_scan_glock_lru(int nr)
   2049{
   2050	struct gfs2_glock *gl;
   2051	LIST_HEAD(skipped);
   2052	LIST_HEAD(dispose);
   2053	long freed = 0;
   2054
   2055	spin_lock(&lru_lock);
   2056	while ((nr-- >= 0) && !list_empty(&lru_list)) {
   2057		gl = list_first_entry(&lru_list, struct gfs2_glock, gl_lru);
   2058
   2059		/* Test for being demotable */
   2060		if (!test_bit(GLF_LOCK, &gl->gl_flags)) {
   2061			list_move(&gl->gl_lru, &dispose);
   2062			atomic_dec(&lru_count);
   2063			freed++;
   2064			continue;
   2065		}
   2066
   2067		list_move(&gl->gl_lru, &skipped);
   2068	}
   2069	list_splice(&skipped, &lru_list);
   2070	if (!list_empty(&dispose))
   2071		gfs2_dispose_glock_lru(&dispose);
   2072	spin_unlock(&lru_lock);
   2073
   2074	return freed;
   2075}
   2076
   2077static unsigned long gfs2_glock_shrink_scan(struct shrinker *shrink,
   2078					    struct shrink_control *sc)
   2079{
   2080	if (!(sc->gfp_mask & __GFP_FS))
   2081		return SHRINK_STOP;
   2082	return gfs2_scan_glock_lru(sc->nr_to_scan);
   2083}
   2084
   2085static unsigned long gfs2_glock_shrink_count(struct shrinker *shrink,
   2086					     struct shrink_control *sc)
   2087{
   2088	return vfs_pressure_ratio(atomic_read(&lru_count));
   2089}
   2090
   2091static struct shrinker glock_shrinker = {
   2092	.seeks = DEFAULT_SEEKS,
   2093	.count_objects = gfs2_glock_shrink_count,
   2094	.scan_objects = gfs2_glock_shrink_scan,
   2095};
   2096
   2097/**
   2098 * glock_hash_walk - Call a function for glock in a hash bucket
   2099 * @examiner: the function
   2100 * @sdp: the filesystem
   2101 *
   2102 * Note that the function can be called multiple times on the same
   2103 * object.  So the user must ensure that the function can cope with
   2104 * that.
   2105 */
   2106
   2107static void glock_hash_walk(glock_examiner examiner, const struct gfs2_sbd *sdp)
   2108{
   2109	struct gfs2_glock *gl;
   2110	struct rhashtable_iter iter;
   2111
   2112	rhashtable_walk_enter(&gl_hash_table, &iter);
   2113
   2114	do {
   2115		rhashtable_walk_start(&iter);
   2116
   2117		while ((gl = rhashtable_walk_next(&iter)) && !IS_ERR(gl)) {
   2118			if (gl->gl_name.ln_sbd == sdp)
   2119				examiner(gl);
   2120		}
   2121
   2122		rhashtable_walk_stop(&iter);
   2123	} while (cond_resched(), gl == ERR_PTR(-EAGAIN));
   2124
   2125	rhashtable_walk_exit(&iter);
   2126}
   2127
   2128bool gfs2_queue_delete_work(struct gfs2_glock *gl, unsigned long delay)
   2129{
   2130	bool queued;
   2131
   2132	spin_lock(&gl->gl_lockref.lock);
   2133	queued = queue_delayed_work(gfs2_delete_workqueue,
   2134				    &gl->gl_delete, delay);
   2135	if (queued)
   2136		set_bit(GLF_PENDING_DELETE, &gl->gl_flags);
   2137	spin_unlock(&gl->gl_lockref.lock);
   2138	return queued;
   2139}
   2140
   2141void gfs2_cancel_delete_work(struct gfs2_glock *gl)
   2142{
   2143	if (cancel_delayed_work(&gl->gl_delete)) {
   2144		clear_bit(GLF_PENDING_DELETE, &gl->gl_flags);
   2145		gfs2_glock_put(gl);
   2146	}
   2147}
   2148
   2149bool gfs2_delete_work_queued(const struct gfs2_glock *gl)
   2150{
   2151	return test_bit(GLF_PENDING_DELETE, &gl->gl_flags);
   2152}
   2153
   2154static void flush_delete_work(struct gfs2_glock *gl)
   2155{
   2156	if (gl->gl_name.ln_type == LM_TYPE_IOPEN) {
   2157		if (cancel_delayed_work(&gl->gl_delete)) {
   2158			queue_delayed_work(gfs2_delete_workqueue,
   2159					   &gl->gl_delete, 0);
   2160		}
   2161	}
   2162}
   2163
   2164void gfs2_flush_delete_work(struct gfs2_sbd *sdp)
   2165{
   2166	glock_hash_walk(flush_delete_work, sdp);
   2167	flush_workqueue(gfs2_delete_workqueue);
   2168}
   2169
   2170/**
   2171 * thaw_glock - thaw out a glock which has an unprocessed reply waiting
   2172 * @gl: The glock to thaw
   2173 *
   2174 */
   2175
   2176static void thaw_glock(struct gfs2_glock *gl)
   2177{
   2178	if (!test_and_clear_bit(GLF_FROZEN, &gl->gl_flags))
   2179		return;
   2180	if (!lockref_get_not_dead(&gl->gl_lockref))
   2181		return;
   2182	set_bit(GLF_REPLY_PENDING, &gl->gl_flags);
   2183	gfs2_glock_queue_work(gl, 0);
   2184}
   2185
   2186/**
   2187 * clear_glock - look at a glock and see if we can free it from glock cache
   2188 * @gl: the glock to look at
   2189 *
   2190 */
   2191
   2192static void clear_glock(struct gfs2_glock *gl)
   2193{
   2194	gfs2_glock_remove_from_lru(gl);
   2195
   2196	spin_lock(&gl->gl_lockref.lock);
   2197	if (!__lockref_is_dead(&gl->gl_lockref)) {
   2198		gl->gl_lockref.count++;
   2199		if (gl->gl_state != LM_ST_UNLOCKED)
   2200			handle_callback(gl, LM_ST_UNLOCKED, 0, false);
   2201		__gfs2_glock_queue_work(gl, 0);
   2202	}
   2203	spin_unlock(&gl->gl_lockref.lock);
   2204}
   2205
   2206/**
   2207 * gfs2_glock_thaw - Thaw any frozen glocks
   2208 * @sdp: The super block
   2209 *
   2210 */
   2211
   2212void gfs2_glock_thaw(struct gfs2_sbd *sdp)
   2213{
   2214	glock_hash_walk(thaw_glock, sdp);
   2215}
   2216
   2217static void dump_glock(struct seq_file *seq, struct gfs2_glock *gl, bool fsid)
   2218{
   2219	spin_lock(&gl->gl_lockref.lock);
   2220	gfs2_dump_glock(seq, gl, fsid);
   2221	spin_unlock(&gl->gl_lockref.lock);
   2222}
   2223
   2224static void dump_glock_func(struct gfs2_glock *gl)
   2225{
   2226	dump_glock(NULL, gl, true);
   2227}
   2228
   2229/**
   2230 * gfs2_gl_hash_clear - Empty out the glock hash table
   2231 * @sdp: the filesystem
   2232 *
   2233 * Called when unmounting the filesystem.
   2234 */
   2235
   2236void gfs2_gl_hash_clear(struct gfs2_sbd *sdp)
   2237{
   2238	set_bit(SDF_SKIP_DLM_UNLOCK, &sdp->sd_flags);
   2239	flush_workqueue(glock_workqueue);
   2240	glock_hash_walk(clear_glock, sdp);
   2241	flush_workqueue(glock_workqueue);
   2242	wait_event_timeout(sdp->sd_glock_wait,
   2243			   atomic_read(&sdp->sd_glock_disposal) == 0,
   2244			   HZ * 600);
   2245	glock_hash_walk(dump_glock_func, sdp);
   2246}
   2247
   2248void gfs2_glock_finish_truncate(struct gfs2_inode *ip)
   2249{
   2250	struct gfs2_glock *gl = ip->i_gl;
   2251	int ret;
   2252
   2253	ret = gfs2_truncatei_resume(ip);
   2254	gfs2_glock_assert_withdraw(gl, ret == 0);
   2255
   2256	spin_lock(&gl->gl_lockref.lock);
   2257	clear_bit(GLF_LOCK, &gl->gl_flags);
   2258	run_queue(gl, 1);
   2259	spin_unlock(&gl->gl_lockref.lock);
   2260}
   2261
   2262static const char *state2str(unsigned state)
   2263{
   2264	switch(state) {
   2265	case LM_ST_UNLOCKED:
   2266		return "UN";
   2267	case LM_ST_SHARED:
   2268		return "SH";
   2269	case LM_ST_DEFERRED:
   2270		return "DF";
   2271	case LM_ST_EXCLUSIVE:
   2272		return "EX";
   2273	}
   2274	return "??";
   2275}
   2276
   2277static const char *hflags2str(char *buf, u16 flags, unsigned long iflags)
   2278{
   2279	char *p = buf;
   2280	if (flags & LM_FLAG_TRY)
   2281		*p++ = 't';
   2282	if (flags & LM_FLAG_TRY_1CB)
   2283		*p++ = 'T';
   2284	if (flags & LM_FLAG_NOEXP)
   2285		*p++ = 'e';
   2286	if (flags & LM_FLAG_ANY)
   2287		*p++ = 'A';
   2288	if (flags & LM_FLAG_PRIORITY)
   2289		*p++ = 'p';
   2290	if (flags & LM_FLAG_NODE_SCOPE)
   2291		*p++ = 'n';
   2292	if (flags & GL_ASYNC)
   2293		*p++ = 'a';
   2294	if (flags & GL_EXACT)
   2295		*p++ = 'E';
   2296	if (flags & GL_NOCACHE)
   2297		*p++ = 'c';
   2298	if (test_bit(HIF_HOLDER, &iflags))
   2299		*p++ = 'H';
   2300	if (test_bit(HIF_WAIT, &iflags))
   2301		*p++ = 'W';
   2302	if (test_bit(HIF_MAY_DEMOTE, &iflags))
   2303		*p++ = 'D';
   2304	if (flags & GL_SKIP)
   2305		*p++ = 's';
   2306	*p = 0;
   2307	return buf;
   2308}
   2309
   2310/**
   2311 * dump_holder - print information about a glock holder
   2312 * @seq: the seq_file struct
   2313 * @gh: the glock holder
   2314 * @fs_id_buf: pointer to file system id (if requested)
   2315 *
   2316 */
   2317
   2318static void dump_holder(struct seq_file *seq, const struct gfs2_holder *gh,
   2319			const char *fs_id_buf)
   2320{
   2321	struct task_struct *gh_owner = NULL;
   2322	char flags_buf[32];
   2323
   2324	rcu_read_lock();
   2325	if (gh->gh_owner_pid)
   2326		gh_owner = pid_task(gh->gh_owner_pid, PIDTYPE_PID);
   2327	gfs2_print_dbg(seq, "%s H: s:%s f:%s e:%d p:%ld [%s] %pS\n",
   2328		       fs_id_buf, state2str(gh->gh_state),
   2329		       hflags2str(flags_buf, gh->gh_flags, gh->gh_iflags),
   2330		       gh->gh_error,
   2331		       gh->gh_owner_pid ? (long)pid_nr(gh->gh_owner_pid) : -1,
   2332		       gh_owner ? gh_owner->comm : "(ended)",
   2333		       (void *)gh->gh_ip);
   2334	rcu_read_unlock();
   2335}
   2336
   2337static const char *gflags2str(char *buf, const struct gfs2_glock *gl)
   2338{
   2339	const unsigned long *gflags = &gl->gl_flags;
   2340	char *p = buf;
   2341
   2342	if (test_bit(GLF_LOCK, gflags))
   2343		*p++ = 'l';
   2344	if (test_bit(GLF_DEMOTE, gflags))
   2345		*p++ = 'D';
   2346	if (test_bit(GLF_PENDING_DEMOTE, gflags))
   2347		*p++ = 'd';
   2348	if (test_bit(GLF_DEMOTE_IN_PROGRESS, gflags))
   2349		*p++ = 'p';
   2350	if (test_bit(GLF_DIRTY, gflags))
   2351		*p++ = 'y';
   2352	if (test_bit(GLF_LFLUSH, gflags))
   2353		*p++ = 'f';
   2354	if (test_bit(GLF_INVALIDATE_IN_PROGRESS, gflags))
   2355		*p++ = 'i';
   2356	if (test_bit(GLF_REPLY_PENDING, gflags))
   2357		*p++ = 'r';
   2358	if (test_bit(GLF_INITIAL, gflags))
   2359		*p++ = 'I';
   2360	if (test_bit(GLF_FROZEN, gflags))
   2361		*p++ = 'F';
   2362	if (!list_empty(&gl->gl_holders))
   2363		*p++ = 'q';
   2364	if (test_bit(GLF_LRU, gflags))
   2365		*p++ = 'L';
   2366	if (gl->gl_object)
   2367		*p++ = 'o';
   2368	if (test_bit(GLF_BLOCKING, gflags))
   2369		*p++ = 'b';
   2370	if (test_bit(GLF_PENDING_DELETE, gflags))
   2371		*p++ = 'P';
   2372	if (test_bit(GLF_FREEING, gflags))
   2373		*p++ = 'x';
   2374	if (test_bit(GLF_INSTANTIATE_NEEDED, gflags))
   2375		*p++ = 'n';
   2376	if (test_bit(GLF_INSTANTIATE_IN_PROG, gflags))
   2377		*p++ = 'N';
   2378	*p = 0;
   2379	return buf;
   2380}
   2381
   2382/**
   2383 * gfs2_dump_glock - print information about a glock
   2384 * @seq: The seq_file struct
   2385 * @gl: the glock
   2386 * @fsid: If true, also dump the file system id
   2387 *
   2388 * The file format is as follows:
   2389 * One line per object, capital letters are used to indicate objects
   2390 * G = glock, I = Inode, R = rgrp, H = holder. Glocks are not indented,
   2391 * other objects are indented by a single space and follow the glock to
   2392 * which they are related. Fields are indicated by lower case letters
   2393 * followed by a colon and the field value, except for strings which are in
   2394 * [] so that its possible to see if they are composed of spaces for
   2395 * example. The field's are n = number (id of the object), f = flags,
   2396 * t = type, s = state, r = refcount, e = error, p = pid.
   2397 *
   2398 */
   2399
   2400void gfs2_dump_glock(struct seq_file *seq, struct gfs2_glock *gl, bool fsid)
   2401{
   2402	const struct gfs2_glock_operations *glops = gl->gl_ops;
   2403	unsigned long long dtime;
   2404	const struct gfs2_holder *gh;
   2405	char gflags_buf[32];
   2406	struct gfs2_sbd *sdp = gl->gl_name.ln_sbd;
   2407	char fs_id_buf[sizeof(sdp->sd_fsname) + 7];
   2408	unsigned long nrpages = 0;
   2409
   2410	if (gl->gl_ops->go_flags & GLOF_ASPACE) {
   2411		struct address_space *mapping = gfs2_glock2aspace(gl);
   2412
   2413		nrpages = mapping->nrpages;
   2414	}
   2415	memset(fs_id_buf, 0, sizeof(fs_id_buf));
   2416	if (fsid && sdp) /* safety precaution */
   2417		sprintf(fs_id_buf, "fsid=%s: ", sdp->sd_fsname);
   2418	dtime = jiffies - gl->gl_demote_time;
   2419	dtime *= 1000000/HZ; /* demote time in uSec */
   2420	if (!test_bit(GLF_DEMOTE, &gl->gl_flags))
   2421		dtime = 0;
   2422	gfs2_print_dbg(seq, "%sG:  s:%s n:%u/%llx f:%s t:%s d:%s/%llu a:%d "
   2423		       "v:%d r:%d m:%ld p:%lu\n",
   2424		       fs_id_buf, state2str(gl->gl_state),
   2425		       gl->gl_name.ln_type,
   2426		       (unsigned long long)gl->gl_name.ln_number,
   2427		       gflags2str(gflags_buf, gl),
   2428		       state2str(gl->gl_target),
   2429		       state2str(gl->gl_demote_state), dtime,
   2430		       atomic_read(&gl->gl_ail_count),
   2431		       atomic_read(&gl->gl_revokes),
   2432		       (int)gl->gl_lockref.count, gl->gl_hold_time, nrpages);
   2433
   2434	list_for_each_entry(gh, &gl->gl_holders, gh_list)
   2435		dump_holder(seq, gh, fs_id_buf);
   2436
   2437	if (gl->gl_state != LM_ST_UNLOCKED && glops->go_dump)
   2438		glops->go_dump(seq, gl, fs_id_buf);
   2439}
   2440
   2441static int gfs2_glstats_seq_show(struct seq_file *seq, void *iter_ptr)
   2442{
   2443	struct gfs2_glock *gl = iter_ptr;
   2444
   2445	seq_printf(seq, "G: n:%u/%llx rtt:%llu/%llu rttb:%llu/%llu irt:%llu/%llu dcnt: %llu qcnt: %llu\n",
   2446		   gl->gl_name.ln_type,
   2447		   (unsigned long long)gl->gl_name.ln_number,
   2448		   (unsigned long long)gl->gl_stats.stats[GFS2_LKS_SRTT],
   2449		   (unsigned long long)gl->gl_stats.stats[GFS2_LKS_SRTTVAR],
   2450		   (unsigned long long)gl->gl_stats.stats[GFS2_LKS_SRTTB],
   2451		   (unsigned long long)gl->gl_stats.stats[GFS2_LKS_SRTTVARB],
   2452		   (unsigned long long)gl->gl_stats.stats[GFS2_LKS_SIRT],
   2453		   (unsigned long long)gl->gl_stats.stats[GFS2_LKS_SIRTVAR],
   2454		   (unsigned long long)gl->gl_stats.stats[GFS2_LKS_DCOUNT],
   2455		   (unsigned long long)gl->gl_stats.stats[GFS2_LKS_QCOUNT]);
   2456	return 0;
   2457}
   2458
   2459static const char *gfs2_gltype[] = {
   2460	"type",
   2461	"reserved",
   2462	"nondisk",
   2463	"inode",
   2464	"rgrp",
   2465	"meta",
   2466	"iopen",
   2467	"flock",
   2468	"plock",
   2469	"quota",
   2470	"journal",
   2471};
   2472
   2473static const char *gfs2_stype[] = {
   2474	[GFS2_LKS_SRTT]		= "srtt",
   2475	[GFS2_LKS_SRTTVAR]	= "srttvar",
   2476	[GFS2_LKS_SRTTB]	= "srttb",
   2477	[GFS2_LKS_SRTTVARB]	= "srttvarb",
   2478	[GFS2_LKS_SIRT]		= "sirt",
   2479	[GFS2_LKS_SIRTVAR]	= "sirtvar",
   2480	[GFS2_LKS_DCOUNT]	= "dlm",
   2481	[GFS2_LKS_QCOUNT]	= "queue",
   2482};
   2483
   2484#define GFS2_NR_SBSTATS (ARRAY_SIZE(gfs2_gltype) * ARRAY_SIZE(gfs2_stype))
   2485
   2486static int gfs2_sbstats_seq_show(struct seq_file *seq, void *iter_ptr)
   2487{
   2488	struct gfs2_sbd *sdp = seq->private;
   2489	loff_t pos = *(loff_t *)iter_ptr;
   2490	unsigned index = pos >> 3;
   2491	unsigned subindex = pos & 0x07;
   2492	int i;
   2493
   2494	if (index == 0 && subindex != 0)
   2495		return 0;
   2496
   2497	seq_printf(seq, "%-10s %8s:", gfs2_gltype[index],
   2498		   (index == 0) ? "cpu": gfs2_stype[subindex]);
   2499
   2500	for_each_possible_cpu(i) {
   2501                const struct gfs2_pcpu_lkstats *lkstats = per_cpu_ptr(sdp->sd_lkstats, i);
   2502
   2503		if (index == 0)
   2504			seq_printf(seq, " %15u", i);
   2505		else
   2506			seq_printf(seq, " %15llu", (unsigned long long)lkstats->
   2507				   lkstats[index - 1].stats[subindex]);
   2508	}
   2509	seq_putc(seq, '\n');
   2510	return 0;
   2511}
   2512
   2513int __init gfs2_glock_init(void)
   2514{
   2515	int i, ret;
   2516
   2517	ret = rhashtable_init(&gl_hash_table, &ht_parms);
   2518	if (ret < 0)
   2519		return ret;
   2520
   2521	glock_workqueue = alloc_workqueue("glock_workqueue", WQ_MEM_RECLAIM |
   2522					  WQ_HIGHPRI | WQ_FREEZABLE, 0);
   2523	if (!glock_workqueue) {
   2524		rhashtable_destroy(&gl_hash_table);
   2525		return -ENOMEM;
   2526	}
   2527	gfs2_delete_workqueue = alloc_workqueue("delete_workqueue",
   2528						WQ_MEM_RECLAIM | WQ_FREEZABLE,
   2529						0);
   2530	if (!gfs2_delete_workqueue) {
   2531		destroy_workqueue(glock_workqueue);
   2532		rhashtable_destroy(&gl_hash_table);
   2533		return -ENOMEM;
   2534	}
   2535
   2536	ret = register_shrinker(&glock_shrinker);
   2537	if (ret) {
   2538		destroy_workqueue(gfs2_delete_workqueue);
   2539		destroy_workqueue(glock_workqueue);
   2540		rhashtable_destroy(&gl_hash_table);
   2541		return ret;
   2542	}
   2543
   2544	for (i = 0; i < GLOCK_WAIT_TABLE_SIZE; i++)
   2545		init_waitqueue_head(glock_wait_table + i);
   2546
   2547	return 0;
   2548}
   2549
   2550void gfs2_glock_exit(void)
   2551{
   2552	unregister_shrinker(&glock_shrinker);
   2553	rhashtable_destroy(&gl_hash_table);
   2554	destroy_workqueue(glock_workqueue);
   2555	destroy_workqueue(gfs2_delete_workqueue);
   2556}
   2557
   2558static void gfs2_glock_iter_next(struct gfs2_glock_iter *gi, loff_t n)
   2559{
   2560	struct gfs2_glock *gl = gi->gl;
   2561
   2562	if (gl) {
   2563		if (n == 0)
   2564			return;
   2565		if (!lockref_put_not_zero(&gl->gl_lockref))
   2566			gfs2_glock_queue_put(gl);
   2567	}
   2568	for (;;) {
   2569		gl = rhashtable_walk_next(&gi->hti);
   2570		if (IS_ERR_OR_NULL(gl)) {
   2571			if (gl == ERR_PTR(-EAGAIN)) {
   2572				n = 1;
   2573				continue;
   2574			}
   2575			gl = NULL;
   2576			break;
   2577		}
   2578		if (gl->gl_name.ln_sbd != gi->sdp)
   2579			continue;
   2580		if (n <= 1) {
   2581			if (!lockref_get_not_dead(&gl->gl_lockref))
   2582				continue;
   2583			break;
   2584		} else {
   2585			if (__lockref_is_dead(&gl->gl_lockref))
   2586				continue;
   2587			n--;
   2588		}
   2589	}
   2590	gi->gl = gl;
   2591}
   2592
   2593static void *gfs2_glock_seq_start(struct seq_file *seq, loff_t *pos)
   2594	__acquires(RCU)
   2595{
   2596	struct gfs2_glock_iter *gi = seq->private;
   2597	loff_t n;
   2598
   2599	/*
   2600	 * We can either stay where we are, skip to the next hash table
   2601	 * entry, or start from the beginning.
   2602	 */
   2603	if (*pos < gi->last_pos) {
   2604		rhashtable_walk_exit(&gi->hti);
   2605		rhashtable_walk_enter(&gl_hash_table, &gi->hti);
   2606		n = *pos + 1;
   2607	} else {
   2608		n = *pos - gi->last_pos;
   2609	}
   2610
   2611	rhashtable_walk_start(&gi->hti);
   2612
   2613	gfs2_glock_iter_next(gi, n);
   2614	gi->last_pos = *pos;
   2615	return gi->gl;
   2616}
   2617
   2618static void *gfs2_glock_seq_next(struct seq_file *seq, void *iter_ptr,
   2619				 loff_t *pos)
   2620{
   2621	struct gfs2_glock_iter *gi = seq->private;
   2622
   2623	(*pos)++;
   2624	gi->last_pos = *pos;
   2625	gfs2_glock_iter_next(gi, 1);
   2626	return gi->gl;
   2627}
   2628
   2629static void gfs2_glock_seq_stop(struct seq_file *seq, void *iter_ptr)
   2630	__releases(RCU)
   2631{
   2632	struct gfs2_glock_iter *gi = seq->private;
   2633
   2634	rhashtable_walk_stop(&gi->hti);
   2635}
   2636
   2637static int gfs2_glock_seq_show(struct seq_file *seq, void *iter_ptr)
   2638{
   2639	dump_glock(seq, iter_ptr, false);
   2640	return 0;
   2641}
   2642
   2643static void *gfs2_sbstats_seq_start(struct seq_file *seq, loff_t *pos)
   2644{
   2645	preempt_disable();
   2646	if (*pos >= GFS2_NR_SBSTATS)
   2647		return NULL;
   2648	return pos;
   2649}
   2650
   2651static void *gfs2_sbstats_seq_next(struct seq_file *seq, void *iter_ptr,
   2652				   loff_t *pos)
   2653{
   2654	(*pos)++;
   2655	if (*pos >= GFS2_NR_SBSTATS)
   2656		return NULL;
   2657	return pos;
   2658}
   2659
   2660static void gfs2_sbstats_seq_stop(struct seq_file *seq, void *iter_ptr)
   2661{
   2662	preempt_enable();
   2663}
   2664
   2665static const struct seq_operations gfs2_glock_seq_ops = {
   2666	.start = gfs2_glock_seq_start,
   2667	.next  = gfs2_glock_seq_next,
   2668	.stop  = gfs2_glock_seq_stop,
   2669	.show  = gfs2_glock_seq_show,
   2670};
   2671
   2672static const struct seq_operations gfs2_glstats_seq_ops = {
   2673	.start = gfs2_glock_seq_start,
   2674	.next  = gfs2_glock_seq_next,
   2675	.stop  = gfs2_glock_seq_stop,
   2676	.show  = gfs2_glstats_seq_show,
   2677};
   2678
   2679static const struct seq_operations gfs2_sbstats_sops = {
   2680	.start = gfs2_sbstats_seq_start,
   2681	.next  = gfs2_sbstats_seq_next,
   2682	.stop  = gfs2_sbstats_seq_stop,
   2683	.show  = gfs2_sbstats_seq_show,
   2684};
   2685
   2686#define GFS2_SEQ_GOODSIZE min(PAGE_SIZE << PAGE_ALLOC_COSTLY_ORDER, 65536UL)
   2687
   2688static int __gfs2_glocks_open(struct inode *inode, struct file *file,
   2689			      const struct seq_operations *ops)
   2690{
   2691	int ret = seq_open_private(file, ops, sizeof(struct gfs2_glock_iter));
   2692	if (ret == 0) {
   2693		struct seq_file *seq = file->private_data;
   2694		struct gfs2_glock_iter *gi = seq->private;
   2695
   2696		gi->sdp = inode->i_private;
   2697		seq->buf = kmalloc(GFS2_SEQ_GOODSIZE, GFP_KERNEL | __GFP_NOWARN);
   2698		if (seq->buf)
   2699			seq->size = GFS2_SEQ_GOODSIZE;
   2700		/*
   2701		 * Initially, we are "before" the first hash table entry; the
   2702		 * first call to rhashtable_walk_next gets us the first entry.
   2703		 */
   2704		gi->last_pos = -1;
   2705		gi->gl = NULL;
   2706		rhashtable_walk_enter(&gl_hash_table, &gi->hti);
   2707	}
   2708	return ret;
   2709}
   2710
   2711static int gfs2_glocks_open(struct inode *inode, struct file *file)
   2712{
   2713	return __gfs2_glocks_open(inode, file, &gfs2_glock_seq_ops);
   2714}
   2715
   2716static int gfs2_glocks_release(struct inode *inode, struct file *file)
   2717{
   2718	struct seq_file *seq = file->private_data;
   2719	struct gfs2_glock_iter *gi = seq->private;
   2720
   2721	if (gi->gl)
   2722		gfs2_glock_put(gi->gl);
   2723	rhashtable_walk_exit(&gi->hti);
   2724	return seq_release_private(inode, file);
   2725}
   2726
   2727static int gfs2_glstats_open(struct inode *inode, struct file *file)
   2728{
   2729	return __gfs2_glocks_open(inode, file, &gfs2_glstats_seq_ops);
   2730}
   2731
   2732static const struct file_operations gfs2_glocks_fops = {
   2733	.owner   = THIS_MODULE,
   2734	.open    = gfs2_glocks_open,
   2735	.read    = seq_read,
   2736	.llseek  = seq_lseek,
   2737	.release = gfs2_glocks_release,
   2738};
   2739
   2740static const struct file_operations gfs2_glstats_fops = {
   2741	.owner   = THIS_MODULE,
   2742	.open    = gfs2_glstats_open,
   2743	.read    = seq_read,
   2744	.llseek  = seq_lseek,
   2745	.release = gfs2_glocks_release,
   2746};
   2747
   2748DEFINE_SEQ_ATTRIBUTE(gfs2_sbstats);
   2749
   2750void gfs2_create_debugfs_file(struct gfs2_sbd *sdp)
   2751{
   2752	sdp->debugfs_dir = debugfs_create_dir(sdp->sd_table_name, gfs2_root);
   2753
   2754	debugfs_create_file("glocks", S_IFREG | S_IRUGO, sdp->debugfs_dir, sdp,
   2755			    &gfs2_glocks_fops);
   2756
   2757	debugfs_create_file("glstats", S_IFREG | S_IRUGO, sdp->debugfs_dir, sdp,
   2758			    &gfs2_glstats_fops);
   2759
   2760	debugfs_create_file("sbstats", S_IFREG | S_IRUGO, sdp->debugfs_dir, sdp,
   2761			    &gfs2_sbstats_fops);
   2762}
   2763
   2764void gfs2_delete_debugfs_file(struct gfs2_sbd *sdp)
   2765{
   2766	debugfs_remove_recursive(sdp->debugfs_dir);
   2767	sdp->debugfs_dir = NULL;
   2768}
   2769
   2770void gfs2_register_debugfs(void)
   2771{
   2772	gfs2_root = debugfs_create_dir("gfs2", NULL);
   2773}
   2774
   2775void gfs2_unregister_debugfs(void)
   2776{
   2777	debugfs_remove(gfs2_root);
   2778	gfs2_root = NULL;
   2779}