cachepc-linux

Fork of AMDESE/linux with modifications for CachePC side-channel attack
git clone https://git.sinitax.com/sinitax/cachepc-linux
Log | Files | Refs | README | LICENSE | sfeed.txt

xenbus_xs.c (22320B)


      1/******************************************************************************
      2 * xenbus_xs.c
      3 *
      4 * This is the kernel equivalent of the "xs" library.  We don't need everything
      5 * and we use xenbus_comms for communication.
      6 *
      7 * Copyright (C) 2005 Rusty Russell, IBM Corporation
      8 *
      9 * This program is free software; you can redistribute it and/or
     10 * modify it under the terms of the GNU General Public License version 2
     11 * as published by the Free Software Foundation; or, when distributed
     12 * separately from the Linux kernel or incorporated into other
     13 * software packages, subject to the following license:
     14 *
     15 * Permission is hereby granted, free of charge, to any person obtaining a copy
     16 * of this source file (the "Software"), to deal in the Software without
     17 * restriction, including without limitation the rights to use, copy, modify,
     18 * merge, publish, distribute, sublicense, and/or sell copies of the Software,
     19 * and to permit persons to whom the Software is furnished to do so, subject to
     20 * the following conditions:
     21 *
     22 * The above copyright notice and this permission notice shall be included in
     23 * all copies or substantial portions of the Software.
     24 *
     25 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
     26 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
     27 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
     28 * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
     29 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
     30 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
     31 * IN THE SOFTWARE.
     32 */
     33
     34#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
     35
     36#include <linux/unistd.h>
     37#include <linux/errno.h>
     38#include <linux/types.h>
     39#include <linux/uio.h>
     40#include <linux/kernel.h>
     41#include <linux/string.h>
     42#include <linux/err.h>
     43#include <linux/slab.h>
     44#include <linux/fcntl.h>
     45#include <linux/kthread.h>
     46#include <linux/reboot.h>
     47#include <linux/rwsem.h>
     48#include <linux/mutex.h>
     49#include <asm/xen/hypervisor.h>
     50#include <xen/xenbus.h>
     51#include <xen/xen.h>
     52#include "xenbus.h"
     53
     54/*
     55 * Framework to protect suspend/resume handling against normal Xenstore
     56 * message handling:
     57 * During suspend/resume there must be no open transaction and no pending
     58 * Xenstore request.
     59 * New watch events happening in this time can be ignored by firing all watches
     60 * after resume.
     61 */
     62
     63/* Lock protecting enter/exit critical region. */
     64static DEFINE_SPINLOCK(xs_state_lock);
     65/* Number of users in critical region (protected by xs_state_lock). */
     66static unsigned int xs_state_users;
     67/* Suspend handler waiting or already active (protected by xs_state_lock)? */
     68static int xs_suspend_active;
     69/* Unique Xenstore request id (protected by xs_state_lock). */
     70static uint32_t xs_request_id;
     71
     72/* Wait queue for all callers waiting for critical region to become usable. */
     73static DECLARE_WAIT_QUEUE_HEAD(xs_state_enter_wq);
     74/* Wait queue for suspend handling waiting for critical region being empty. */
     75static DECLARE_WAIT_QUEUE_HEAD(xs_state_exit_wq);
     76
     77/* List of registered watches, and a lock to protect it. */
     78static LIST_HEAD(watches);
     79static DEFINE_SPINLOCK(watches_lock);
     80
     81/* List of pending watch callback events, and a lock to protect it. */
     82static LIST_HEAD(watch_events);
     83static DEFINE_SPINLOCK(watch_events_lock);
     84
     85/* Protect watch (de)register against save/restore. */
     86static DECLARE_RWSEM(xs_watch_rwsem);
     87
     88/*
     89 * Details of the xenwatch callback kernel thread. The thread waits on the
     90 * watch_events_waitq for work to do (queued on watch_events list). When it
     91 * wakes up it acquires the xenwatch_mutex before reading the list and
     92 * carrying out work.
     93 */
     94static pid_t xenwatch_pid;
     95static DEFINE_MUTEX(xenwatch_mutex);
     96static DECLARE_WAIT_QUEUE_HEAD(watch_events_waitq);
     97
     98static void xs_suspend_enter(void)
     99{
    100	spin_lock(&xs_state_lock);
    101	xs_suspend_active++;
    102	spin_unlock(&xs_state_lock);
    103	wait_event(xs_state_exit_wq, xs_state_users == 0);
    104}
    105
    106static void xs_suspend_exit(void)
    107{
    108	xb_dev_generation_id++;
    109	spin_lock(&xs_state_lock);
    110	xs_suspend_active--;
    111	spin_unlock(&xs_state_lock);
    112	wake_up_all(&xs_state_enter_wq);
    113}
    114
    115static uint32_t xs_request_enter(struct xb_req_data *req)
    116{
    117	uint32_t rq_id;
    118
    119	req->type = req->msg.type;
    120
    121	spin_lock(&xs_state_lock);
    122
    123	while (!xs_state_users && xs_suspend_active) {
    124		spin_unlock(&xs_state_lock);
    125		wait_event(xs_state_enter_wq, xs_suspend_active == 0);
    126		spin_lock(&xs_state_lock);
    127	}
    128
    129	if (req->type == XS_TRANSACTION_START && !req->user_req)
    130		xs_state_users++;
    131	xs_state_users++;
    132	rq_id = xs_request_id++;
    133
    134	spin_unlock(&xs_state_lock);
    135
    136	return rq_id;
    137}
    138
    139void xs_request_exit(struct xb_req_data *req)
    140{
    141	spin_lock(&xs_state_lock);
    142	xs_state_users--;
    143	if ((req->type == XS_TRANSACTION_START && req->msg.type == XS_ERROR) ||
    144	    (req->type == XS_TRANSACTION_END && !req->user_req &&
    145	     !WARN_ON_ONCE(req->msg.type == XS_ERROR &&
    146			   !strcmp(req->body, "ENOENT"))))
    147		xs_state_users--;
    148	spin_unlock(&xs_state_lock);
    149
    150	if (xs_suspend_active && !xs_state_users)
    151		wake_up(&xs_state_exit_wq);
    152}
    153
    154static int get_error(const char *errorstring)
    155{
    156	unsigned int i;
    157
    158	for (i = 0; strcmp(errorstring, xsd_errors[i].errstring) != 0; i++) {
    159		if (i == ARRAY_SIZE(xsd_errors) - 1) {
    160			pr_warn("xen store gave: unknown error %s\n",
    161				errorstring);
    162			return EINVAL;
    163		}
    164	}
    165	return xsd_errors[i].errnum;
    166}
    167
    168static bool xenbus_ok(void)
    169{
    170	switch (xen_store_domain_type) {
    171	case XS_LOCAL:
    172		switch (system_state) {
    173		case SYSTEM_POWER_OFF:
    174		case SYSTEM_RESTART:
    175		case SYSTEM_HALT:
    176			return false;
    177		default:
    178			break;
    179		}
    180		return true;
    181	case XS_PV:
    182	case XS_HVM:
    183		/* FIXME: Could check that the remote domain is alive,
    184		 * but it is normally initial domain. */
    185		return true;
    186	default:
    187		break;
    188	}
    189	return false;
    190}
    191
    192static bool test_reply(struct xb_req_data *req)
    193{
    194	if (req->state == xb_req_state_got_reply || !xenbus_ok()) {
    195		/* read req->state before all other fields */
    196		virt_rmb();
    197		return true;
    198	}
    199
    200	/* Make sure to reread req->state each time. */
    201	barrier();
    202
    203	return false;
    204}
    205
    206static void *read_reply(struct xb_req_data *req)
    207{
    208	do {
    209		wait_event(req->wq, test_reply(req));
    210
    211		if (!xenbus_ok())
    212			/*
    213			 * If we are in the process of being shut-down there is
    214			 * no point of trying to contact XenBus - it is either
    215			 * killed (xenstored application) or the other domain
    216			 * has been killed or is unreachable.
    217			 */
    218			return ERR_PTR(-EIO);
    219		if (req->err)
    220			return ERR_PTR(req->err);
    221
    222	} while (req->state != xb_req_state_got_reply);
    223
    224	return req->body;
    225}
    226
    227static void xs_send(struct xb_req_data *req, struct xsd_sockmsg *msg)
    228{
    229	bool notify;
    230
    231	req->msg = *msg;
    232	req->err = 0;
    233	req->state = xb_req_state_queued;
    234	init_waitqueue_head(&req->wq);
    235
    236	/* Save the caller req_id and restore it later in the reply */
    237	req->caller_req_id = req->msg.req_id;
    238	req->msg.req_id = xs_request_enter(req);
    239
    240	mutex_lock(&xb_write_mutex);
    241	list_add_tail(&req->list, &xb_write_list);
    242	notify = list_is_singular(&xb_write_list);
    243	mutex_unlock(&xb_write_mutex);
    244
    245	if (notify)
    246		wake_up(&xb_waitq);
    247}
    248
    249static void *xs_wait_for_reply(struct xb_req_data *req, struct xsd_sockmsg *msg)
    250{
    251	void *ret;
    252
    253	ret = read_reply(req);
    254
    255	xs_request_exit(req);
    256
    257	msg->type = req->msg.type;
    258	msg->len = req->msg.len;
    259
    260	mutex_lock(&xb_write_mutex);
    261	if (req->state == xb_req_state_queued ||
    262	    req->state == xb_req_state_wait_reply)
    263		req->state = xb_req_state_aborted;
    264	else
    265		kfree(req);
    266	mutex_unlock(&xb_write_mutex);
    267
    268	return ret;
    269}
    270
    271static void xs_wake_up(struct xb_req_data *req)
    272{
    273	wake_up(&req->wq);
    274}
    275
    276int xenbus_dev_request_and_reply(struct xsd_sockmsg *msg, void *par)
    277{
    278	struct xb_req_data *req;
    279	struct kvec *vec;
    280
    281	req = kmalloc(sizeof(*req) + sizeof(*vec), GFP_KERNEL);
    282	if (!req)
    283		return -ENOMEM;
    284
    285	vec = (struct kvec *)(req + 1);
    286	vec->iov_len = msg->len;
    287	vec->iov_base = msg + 1;
    288
    289	req->vec = vec;
    290	req->num_vecs = 1;
    291	req->cb = xenbus_dev_queue_reply;
    292	req->par = par;
    293	req->user_req = true;
    294
    295	xs_send(req, msg);
    296
    297	return 0;
    298}
    299EXPORT_SYMBOL(xenbus_dev_request_and_reply);
    300
    301/* Send message to xs, get kmalloc'ed reply.  ERR_PTR() on error. */
    302static void *xs_talkv(struct xenbus_transaction t,
    303		      enum xsd_sockmsg_type type,
    304		      const struct kvec *iovec,
    305		      unsigned int num_vecs,
    306		      unsigned int *len)
    307{
    308	struct xb_req_data *req;
    309	struct xsd_sockmsg msg;
    310	void *ret = NULL;
    311	unsigned int i;
    312	int err;
    313
    314	req = kmalloc(sizeof(*req), GFP_NOIO | __GFP_HIGH);
    315	if (!req)
    316		return ERR_PTR(-ENOMEM);
    317
    318	req->vec = iovec;
    319	req->num_vecs = num_vecs;
    320	req->cb = xs_wake_up;
    321	req->user_req = false;
    322
    323	msg.req_id = 0;
    324	msg.tx_id = t.id;
    325	msg.type = type;
    326	msg.len = 0;
    327	for (i = 0; i < num_vecs; i++)
    328		msg.len += iovec[i].iov_len;
    329
    330	xs_send(req, &msg);
    331
    332	ret = xs_wait_for_reply(req, &msg);
    333	if (len)
    334		*len = msg.len;
    335
    336	if (IS_ERR(ret))
    337		return ret;
    338
    339	if (msg.type == XS_ERROR) {
    340		err = get_error(ret);
    341		kfree(ret);
    342		return ERR_PTR(-err);
    343	}
    344
    345	if (msg.type != type) {
    346		pr_warn_ratelimited("unexpected type [%d], expected [%d]\n",
    347				    msg.type, type);
    348		kfree(ret);
    349		return ERR_PTR(-EINVAL);
    350	}
    351	return ret;
    352}
    353
    354/* Simplified version of xs_talkv: single message. */
    355static void *xs_single(struct xenbus_transaction t,
    356		       enum xsd_sockmsg_type type,
    357		       const char *string,
    358		       unsigned int *len)
    359{
    360	struct kvec iovec;
    361
    362	iovec.iov_base = (void *)string;
    363	iovec.iov_len = strlen(string) + 1;
    364	return xs_talkv(t, type, &iovec, 1, len);
    365}
    366
    367/* Many commands only need an ack, don't care what it says. */
    368static int xs_error(char *reply)
    369{
    370	if (IS_ERR(reply))
    371		return PTR_ERR(reply);
    372	kfree(reply);
    373	return 0;
    374}
    375
    376static unsigned int count_strings(const char *strings, unsigned int len)
    377{
    378	unsigned int num;
    379	const char *p;
    380
    381	for (p = strings, num = 0; p < strings + len; p += strlen(p) + 1)
    382		num++;
    383
    384	return num;
    385}
    386
    387/* Return the path to dir with /name appended. Buffer must be kfree()'ed. */
    388static char *join(const char *dir, const char *name)
    389{
    390	char *buffer;
    391
    392	if (strlen(name) == 0)
    393		buffer = kasprintf(GFP_NOIO | __GFP_HIGH, "%s", dir);
    394	else
    395		buffer = kasprintf(GFP_NOIO | __GFP_HIGH, "%s/%s", dir, name);
    396	return (!buffer) ? ERR_PTR(-ENOMEM) : buffer;
    397}
    398
    399static char **split(char *strings, unsigned int len, unsigned int *num)
    400{
    401	char *p, **ret;
    402
    403	/* Count the strings. */
    404	*num = count_strings(strings, len);
    405
    406	/* Transfer to one big alloc for easy freeing. */
    407	ret = kmalloc(*num * sizeof(char *) + len, GFP_NOIO | __GFP_HIGH);
    408	if (!ret) {
    409		kfree(strings);
    410		return ERR_PTR(-ENOMEM);
    411	}
    412	memcpy(&ret[*num], strings, len);
    413	kfree(strings);
    414
    415	strings = (char *)&ret[*num];
    416	for (p = strings, *num = 0; p < strings + len; p += strlen(p) + 1)
    417		ret[(*num)++] = p;
    418
    419	return ret;
    420}
    421
    422char **xenbus_directory(struct xenbus_transaction t,
    423			const char *dir, const char *node, unsigned int *num)
    424{
    425	char *strings, *path;
    426	unsigned int len;
    427
    428	path = join(dir, node);
    429	if (IS_ERR(path))
    430		return (char **)path;
    431
    432	strings = xs_single(t, XS_DIRECTORY, path, &len);
    433	kfree(path);
    434	if (IS_ERR(strings))
    435		return (char **)strings;
    436
    437	return split(strings, len, num);
    438}
    439EXPORT_SYMBOL_GPL(xenbus_directory);
    440
    441/* Check if a path exists. Return 1 if it does. */
    442int xenbus_exists(struct xenbus_transaction t,
    443		  const char *dir, const char *node)
    444{
    445	char **d;
    446	int dir_n;
    447
    448	d = xenbus_directory(t, dir, node, &dir_n);
    449	if (IS_ERR(d))
    450		return 0;
    451	kfree(d);
    452	return 1;
    453}
    454EXPORT_SYMBOL_GPL(xenbus_exists);
    455
    456/* Get the value of a single file.
    457 * Returns a kmalloced value: call free() on it after use.
    458 * len indicates length in bytes.
    459 */
    460void *xenbus_read(struct xenbus_transaction t,
    461		  const char *dir, const char *node, unsigned int *len)
    462{
    463	char *path;
    464	void *ret;
    465
    466	path = join(dir, node);
    467	if (IS_ERR(path))
    468		return (void *)path;
    469
    470	ret = xs_single(t, XS_READ, path, len);
    471	kfree(path);
    472	return ret;
    473}
    474EXPORT_SYMBOL_GPL(xenbus_read);
    475
    476/* Write the value of a single file.
    477 * Returns -err on failure.
    478 */
    479int xenbus_write(struct xenbus_transaction t,
    480		 const char *dir, const char *node, const char *string)
    481{
    482	const char *path;
    483	struct kvec iovec[2];
    484	int ret;
    485
    486	path = join(dir, node);
    487	if (IS_ERR(path))
    488		return PTR_ERR(path);
    489
    490	iovec[0].iov_base = (void *)path;
    491	iovec[0].iov_len = strlen(path) + 1;
    492	iovec[1].iov_base = (void *)string;
    493	iovec[1].iov_len = strlen(string);
    494
    495	ret = xs_error(xs_talkv(t, XS_WRITE, iovec, ARRAY_SIZE(iovec), NULL));
    496	kfree(path);
    497	return ret;
    498}
    499EXPORT_SYMBOL_GPL(xenbus_write);
    500
    501/* Create a new directory. */
    502int xenbus_mkdir(struct xenbus_transaction t,
    503		 const char *dir, const char *node)
    504{
    505	char *path;
    506	int ret;
    507
    508	path = join(dir, node);
    509	if (IS_ERR(path))
    510		return PTR_ERR(path);
    511
    512	ret = xs_error(xs_single(t, XS_MKDIR, path, NULL));
    513	kfree(path);
    514	return ret;
    515}
    516EXPORT_SYMBOL_GPL(xenbus_mkdir);
    517
    518/* Destroy a file or directory (directories must be empty). */
    519int xenbus_rm(struct xenbus_transaction t, const char *dir, const char *node)
    520{
    521	char *path;
    522	int ret;
    523
    524	path = join(dir, node);
    525	if (IS_ERR(path))
    526		return PTR_ERR(path);
    527
    528	ret = xs_error(xs_single(t, XS_RM, path, NULL));
    529	kfree(path);
    530	return ret;
    531}
    532EXPORT_SYMBOL_GPL(xenbus_rm);
    533
    534/* Start a transaction: changes by others will not be seen during this
    535 * transaction, and changes will not be visible to others until end.
    536 */
    537int xenbus_transaction_start(struct xenbus_transaction *t)
    538{
    539	char *id_str;
    540
    541	id_str = xs_single(XBT_NIL, XS_TRANSACTION_START, "", NULL);
    542	if (IS_ERR(id_str))
    543		return PTR_ERR(id_str);
    544
    545	t->id = simple_strtoul(id_str, NULL, 0);
    546	kfree(id_str);
    547	return 0;
    548}
    549EXPORT_SYMBOL_GPL(xenbus_transaction_start);
    550
    551/* End a transaction.
    552 * If abandon is true, transaction is discarded instead of committed.
    553 */
    554int xenbus_transaction_end(struct xenbus_transaction t, int abort)
    555{
    556	char abortstr[2];
    557
    558	if (abort)
    559		strcpy(abortstr, "F");
    560	else
    561		strcpy(abortstr, "T");
    562
    563	return xs_error(xs_single(t, XS_TRANSACTION_END, abortstr, NULL));
    564}
    565EXPORT_SYMBOL_GPL(xenbus_transaction_end);
    566
    567/* Single read and scanf: returns -errno or num scanned. */
    568int xenbus_scanf(struct xenbus_transaction t,
    569		 const char *dir, const char *node, const char *fmt, ...)
    570{
    571	va_list ap;
    572	int ret;
    573	char *val;
    574
    575	val = xenbus_read(t, dir, node, NULL);
    576	if (IS_ERR(val))
    577		return PTR_ERR(val);
    578
    579	va_start(ap, fmt);
    580	ret = vsscanf(val, fmt, ap);
    581	va_end(ap);
    582	kfree(val);
    583	/* Distinctive errno. */
    584	if (ret == 0)
    585		return -ERANGE;
    586	return ret;
    587}
    588EXPORT_SYMBOL_GPL(xenbus_scanf);
    589
    590/* Read an (optional) unsigned value. */
    591unsigned int xenbus_read_unsigned(const char *dir, const char *node,
    592				  unsigned int default_val)
    593{
    594	unsigned int val;
    595	int ret;
    596
    597	ret = xenbus_scanf(XBT_NIL, dir, node, "%u", &val);
    598	if (ret <= 0)
    599		val = default_val;
    600
    601	return val;
    602}
    603EXPORT_SYMBOL_GPL(xenbus_read_unsigned);
    604
    605/* Single printf and write: returns -errno or 0. */
    606int xenbus_printf(struct xenbus_transaction t,
    607		  const char *dir, const char *node, const char *fmt, ...)
    608{
    609	va_list ap;
    610	int ret;
    611	char *buf;
    612
    613	va_start(ap, fmt);
    614	buf = kvasprintf(GFP_NOIO | __GFP_HIGH, fmt, ap);
    615	va_end(ap);
    616
    617	if (!buf)
    618		return -ENOMEM;
    619
    620	ret = xenbus_write(t, dir, node, buf);
    621
    622	kfree(buf);
    623
    624	return ret;
    625}
    626EXPORT_SYMBOL_GPL(xenbus_printf);
    627
    628/* Takes tuples of names, scanf-style args, and void **, NULL terminated. */
    629int xenbus_gather(struct xenbus_transaction t, const char *dir, ...)
    630{
    631	va_list ap;
    632	const char *name;
    633	int ret = 0;
    634
    635	va_start(ap, dir);
    636	while (ret == 0 && (name = va_arg(ap, char *)) != NULL) {
    637		const char *fmt = va_arg(ap, char *);
    638		void *result = va_arg(ap, void *);
    639		char *p;
    640
    641		p = xenbus_read(t, dir, name, NULL);
    642		if (IS_ERR(p)) {
    643			ret = PTR_ERR(p);
    644			break;
    645		}
    646		if (fmt) {
    647			if (sscanf(p, fmt, result) == 0)
    648				ret = -EINVAL;
    649			kfree(p);
    650		} else
    651			*(char **)result = p;
    652	}
    653	va_end(ap);
    654	return ret;
    655}
    656EXPORT_SYMBOL_GPL(xenbus_gather);
    657
    658static int xs_watch(const char *path, const char *token)
    659{
    660	struct kvec iov[2];
    661
    662	iov[0].iov_base = (void *)path;
    663	iov[0].iov_len = strlen(path) + 1;
    664	iov[1].iov_base = (void *)token;
    665	iov[1].iov_len = strlen(token) + 1;
    666
    667	return xs_error(xs_talkv(XBT_NIL, XS_WATCH, iov,
    668				 ARRAY_SIZE(iov), NULL));
    669}
    670
    671static int xs_unwatch(const char *path, const char *token)
    672{
    673	struct kvec iov[2];
    674
    675	iov[0].iov_base = (char *)path;
    676	iov[0].iov_len = strlen(path) + 1;
    677	iov[1].iov_base = (char *)token;
    678	iov[1].iov_len = strlen(token) + 1;
    679
    680	return xs_error(xs_talkv(XBT_NIL, XS_UNWATCH, iov,
    681				 ARRAY_SIZE(iov), NULL));
    682}
    683
    684static struct xenbus_watch *find_watch(const char *token)
    685{
    686	struct xenbus_watch *i, *cmp;
    687
    688	cmp = (void *)simple_strtoul(token, NULL, 16);
    689
    690	list_for_each_entry(i, &watches, list)
    691		if (i == cmp)
    692			return i;
    693
    694	return NULL;
    695}
    696
    697int xs_watch_msg(struct xs_watch_event *event)
    698{
    699	if (count_strings(event->body, event->len) != 2) {
    700		kfree(event);
    701		return -EINVAL;
    702	}
    703	event->path = (const char *)event->body;
    704	event->token = (const char *)strchr(event->body, '\0') + 1;
    705
    706	spin_lock(&watches_lock);
    707	event->handle = find_watch(event->token);
    708	if (event->handle != NULL &&
    709			(!event->handle->will_handle ||
    710			 event->handle->will_handle(event->handle,
    711				 event->path, event->token))) {
    712		spin_lock(&watch_events_lock);
    713		list_add_tail(&event->list, &watch_events);
    714		event->handle->nr_pending++;
    715		wake_up(&watch_events_waitq);
    716		spin_unlock(&watch_events_lock);
    717	} else
    718		kfree(event);
    719	spin_unlock(&watches_lock);
    720
    721	return 0;
    722}
    723
    724/*
    725 * Certain older XenBus toolstack cannot handle reading values that are
    726 * not populated. Some Xen 3.4 installation are incapable of doing this
    727 * so if we are running on anything older than 4 do not attempt to read
    728 * control/platform-feature-xs_reset_watches.
    729 */
    730static bool xen_strict_xenbus_quirk(void)
    731{
    732#ifdef CONFIG_X86
    733	uint32_t eax, ebx, ecx, edx, base;
    734
    735	base = xen_cpuid_base();
    736	cpuid(base + 1, &eax, &ebx, &ecx, &edx);
    737
    738	if ((eax >> 16) < 4)
    739		return true;
    740#endif
    741	return false;
    742
    743}
    744static void xs_reset_watches(void)
    745{
    746	int err;
    747
    748	if (!xen_hvm_domain() || xen_initial_domain())
    749		return;
    750
    751	if (xen_strict_xenbus_quirk())
    752		return;
    753
    754	if (!xenbus_read_unsigned("control",
    755				  "platform-feature-xs_reset_watches", 0))
    756		return;
    757
    758	err = xs_error(xs_single(XBT_NIL, XS_RESET_WATCHES, "", NULL));
    759	if (err && err != -EEXIST)
    760		pr_warn("xs_reset_watches failed: %d\n", err);
    761}
    762
    763/* Register callback to watch this node. */
    764int register_xenbus_watch(struct xenbus_watch *watch)
    765{
    766	/* Pointer in ascii is the token. */
    767	char token[sizeof(watch) * 2 + 1];
    768	int err;
    769
    770	sprintf(token, "%lX", (long)watch);
    771
    772	watch->nr_pending = 0;
    773
    774	down_read(&xs_watch_rwsem);
    775
    776	spin_lock(&watches_lock);
    777	BUG_ON(find_watch(token));
    778	list_add(&watch->list, &watches);
    779	spin_unlock(&watches_lock);
    780
    781	err = xs_watch(watch->node, token);
    782
    783	if (err) {
    784		spin_lock(&watches_lock);
    785		list_del(&watch->list);
    786		spin_unlock(&watches_lock);
    787	}
    788
    789	up_read(&xs_watch_rwsem);
    790
    791	return err;
    792}
    793EXPORT_SYMBOL_GPL(register_xenbus_watch);
    794
    795void unregister_xenbus_watch(struct xenbus_watch *watch)
    796{
    797	struct xs_watch_event *event, *tmp;
    798	char token[sizeof(watch) * 2 + 1];
    799	int err;
    800
    801	sprintf(token, "%lX", (long)watch);
    802
    803	down_read(&xs_watch_rwsem);
    804
    805	spin_lock(&watches_lock);
    806	BUG_ON(!find_watch(token));
    807	list_del(&watch->list);
    808	spin_unlock(&watches_lock);
    809
    810	err = xs_unwatch(watch->node, token);
    811	if (err)
    812		pr_warn("Failed to release watch %s: %i\n", watch->node, err);
    813
    814	up_read(&xs_watch_rwsem);
    815
    816	/* Make sure there are no callbacks running currently (unless
    817	   its us) */
    818	if (current->pid != xenwatch_pid)
    819		mutex_lock(&xenwatch_mutex);
    820
    821	/* Cancel pending watch events. */
    822	spin_lock(&watch_events_lock);
    823	if (watch->nr_pending) {
    824		list_for_each_entry_safe(event, tmp, &watch_events, list) {
    825			if (event->handle != watch)
    826				continue;
    827			list_del(&event->list);
    828			kfree(event);
    829		}
    830		watch->nr_pending = 0;
    831	}
    832	spin_unlock(&watch_events_lock);
    833
    834	if (current->pid != xenwatch_pid)
    835		mutex_unlock(&xenwatch_mutex);
    836}
    837EXPORT_SYMBOL_GPL(unregister_xenbus_watch);
    838
    839void xs_suspend(void)
    840{
    841	xs_suspend_enter();
    842
    843	down_write(&xs_watch_rwsem);
    844	mutex_lock(&xs_response_mutex);
    845}
    846
    847void xs_resume(void)
    848{
    849	struct xenbus_watch *watch;
    850	char token[sizeof(watch) * 2 + 1];
    851
    852	xb_init_comms();
    853
    854	mutex_unlock(&xs_response_mutex);
    855
    856	xs_suspend_exit();
    857
    858	/* No need for watches_lock: the xs_watch_rwsem is sufficient. */
    859	list_for_each_entry(watch, &watches, list) {
    860		sprintf(token, "%lX", (long)watch);
    861		xs_watch(watch->node, token);
    862	}
    863
    864	up_write(&xs_watch_rwsem);
    865}
    866
    867void xs_suspend_cancel(void)
    868{
    869	mutex_unlock(&xs_response_mutex);
    870	up_write(&xs_watch_rwsem);
    871
    872	xs_suspend_exit();
    873}
    874
    875static int xenwatch_thread(void *unused)
    876{
    877	struct xs_watch_event *event;
    878
    879	xenwatch_pid = current->pid;
    880
    881	for (;;) {
    882		wait_event_interruptible(watch_events_waitq,
    883					 !list_empty(&watch_events));
    884
    885		if (kthread_should_stop())
    886			break;
    887
    888		mutex_lock(&xenwatch_mutex);
    889
    890		spin_lock(&watch_events_lock);
    891		event = list_first_entry_or_null(&watch_events,
    892				struct xs_watch_event, list);
    893		if (event) {
    894			list_del(&event->list);
    895			event->handle->nr_pending--;
    896		}
    897		spin_unlock(&watch_events_lock);
    898
    899		if (event) {
    900			event->handle->callback(event->handle, event->path,
    901						event->token);
    902			kfree(event);
    903		}
    904
    905		mutex_unlock(&xenwatch_mutex);
    906	}
    907
    908	return 0;
    909}
    910
    911/*
    912 * Wake up all threads waiting for a xenstore reply. In case of shutdown all
    913 * pending replies will be marked as "aborted" in order to let the waiters
    914 * return in spite of xenstore possibly no longer being able to reply. This
    915 * will avoid blocking shutdown by a thread waiting for xenstore but being
    916 * necessary for shutdown processing to proceed.
    917 */
    918static int xs_reboot_notify(struct notifier_block *nb,
    919			    unsigned long code, void *unused)
    920{
    921	struct xb_req_data *req;
    922
    923	mutex_lock(&xb_write_mutex);
    924	list_for_each_entry(req, &xs_reply_list, list)
    925		wake_up(&req->wq);
    926	list_for_each_entry(req, &xb_write_list, list)
    927		wake_up(&req->wq);
    928	mutex_unlock(&xb_write_mutex);
    929	return NOTIFY_DONE;
    930}
    931
    932static struct notifier_block xs_reboot_nb = {
    933	.notifier_call = xs_reboot_notify,
    934};
    935
    936int xs_init(void)
    937{
    938	int err;
    939	struct task_struct *task;
    940
    941	register_reboot_notifier(&xs_reboot_nb);
    942
    943	/* Initialize the shared memory rings to talk to xenstored */
    944	err = xb_init_comms();
    945	if (err)
    946		return err;
    947
    948	task = kthread_run(xenwatch_thread, NULL, "xenwatch");
    949	if (IS_ERR(task))
    950		return PTR_ERR(task);
    951
    952	/* shutdown watches for kexec boot */
    953	xs_reset_watches();
    954
    955	return 0;
    956}