xenbus_comms.c (11421B)
1/****************************************************************************** 2 * xenbus_comms.c 3 * 4 * Low level code to talks to Xen Store: ringbuffer and event channel. 5 * 6 * Copyright (C) 2005 Rusty Russell, IBM Corporation 7 * 8 * This program is free software; you can redistribute it and/or 9 * modify it under the terms of the GNU General Public License version 2 10 * as published by the Free Software Foundation; or, when distributed 11 * separately from the Linux kernel or incorporated into other 12 * software packages, subject to the following license: 13 * 14 * Permission is hereby granted, free of charge, to any person obtaining a copy 15 * of this source file (the "Software"), to deal in the Software without 16 * restriction, including without limitation the rights to use, copy, modify, 17 * merge, publish, distribute, sublicense, and/or sell copies of the Software, 18 * and to permit persons to whom the Software is furnished to do so, subject to 19 * the following conditions: 20 * 21 * The above copyright notice and this permission notice shall be included in 22 * all copies or substantial portions of the Software. 23 * 24 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 25 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 26 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 27 * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 28 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING 29 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS 30 * IN THE SOFTWARE. 31 */ 32 33#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt 34 35#include <linux/wait.h> 36#include <linux/interrupt.h> 37#include <linux/kthread.h> 38#include <linux/sched.h> 39#include <linux/err.h> 40#include <xen/xenbus.h> 41#include <asm/xen/hypervisor.h> 42#include <xen/events.h> 43#include <xen/page.h> 44#include "xenbus.h" 45 46/* A list of replies. Currently only one will ever be outstanding. */ 47LIST_HEAD(xs_reply_list); 48 49/* A list of write requests. */ 50LIST_HEAD(xb_write_list); 51DECLARE_WAIT_QUEUE_HEAD(xb_waitq); 52DEFINE_MUTEX(xb_write_mutex); 53 54/* Protect xenbus reader thread against save/restore. */ 55DEFINE_MUTEX(xs_response_mutex); 56 57static int xenbus_irq; 58static struct task_struct *xenbus_task; 59 60static irqreturn_t wake_waiting(int irq, void *unused) 61{ 62 wake_up(&xb_waitq); 63 return IRQ_HANDLED; 64} 65 66static int check_indexes(XENSTORE_RING_IDX cons, XENSTORE_RING_IDX prod) 67{ 68 return ((prod - cons) <= XENSTORE_RING_SIZE); 69} 70 71static void *get_output_chunk(XENSTORE_RING_IDX cons, 72 XENSTORE_RING_IDX prod, 73 char *buf, uint32_t *len) 74{ 75 *len = XENSTORE_RING_SIZE - MASK_XENSTORE_IDX(prod); 76 if ((XENSTORE_RING_SIZE - (prod - cons)) < *len) 77 *len = XENSTORE_RING_SIZE - (prod - cons); 78 return buf + MASK_XENSTORE_IDX(prod); 79} 80 81static const void *get_input_chunk(XENSTORE_RING_IDX cons, 82 XENSTORE_RING_IDX prod, 83 const char *buf, uint32_t *len) 84{ 85 *len = XENSTORE_RING_SIZE - MASK_XENSTORE_IDX(cons); 86 if ((prod - cons) < *len) 87 *len = prod - cons; 88 return buf + MASK_XENSTORE_IDX(cons); 89} 90 91static int xb_data_to_write(void) 92{ 93 struct xenstore_domain_interface *intf = xen_store_interface; 94 95 return (intf->req_prod - intf->req_cons) != XENSTORE_RING_SIZE && 96 !list_empty(&xb_write_list); 97} 98 99/** 100 * xb_write - low level write 101 * @data: buffer to send 102 * @len: length of buffer 103 * 104 * Returns number of bytes written or -err. 105 */ 106static int xb_write(const void *data, unsigned int len) 107{ 108 struct xenstore_domain_interface *intf = xen_store_interface; 109 XENSTORE_RING_IDX cons, prod; 110 unsigned int bytes = 0; 111 112 while (len != 0) { 113 void *dst; 114 unsigned int avail; 115 116 /* Read indexes, then verify. */ 117 cons = intf->req_cons; 118 prod = intf->req_prod; 119 if (!check_indexes(cons, prod)) { 120 intf->req_cons = intf->req_prod = 0; 121 return -EIO; 122 } 123 if (!xb_data_to_write()) 124 return bytes; 125 126 /* Must write data /after/ reading the consumer index. */ 127 virt_mb(); 128 129 dst = get_output_chunk(cons, prod, intf->req, &avail); 130 if (avail == 0) 131 continue; 132 if (avail > len) 133 avail = len; 134 135 memcpy(dst, data, avail); 136 data += avail; 137 len -= avail; 138 bytes += avail; 139 140 /* Other side must not see new producer until data is there. */ 141 virt_wmb(); 142 intf->req_prod += avail; 143 144 /* Implies mb(): other side will see the updated producer. */ 145 if (prod <= intf->req_cons) 146 notify_remote_via_evtchn(xen_store_evtchn); 147 } 148 149 return bytes; 150} 151 152static int xb_data_to_read(void) 153{ 154 struct xenstore_domain_interface *intf = xen_store_interface; 155 return (intf->rsp_cons != intf->rsp_prod); 156} 157 158static int xb_read(void *data, unsigned int len) 159{ 160 struct xenstore_domain_interface *intf = xen_store_interface; 161 XENSTORE_RING_IDX cons, prod; 162 unsigned int bytes = 0; 163 164 while (len != 0) { 165 unsigned int avail; 166 const char *src; 167 168 /* Read indexes, then verify. */ 169 cons = intf->rsp_cons; 170 prod = intf->rsp_prod; 171 if (cons == prod) 172 return bytes; 173 174 if (!check_indexes(cons, prod)) { 175 intf->rsp_cons = intf->rsp_prod = 0; 176 return -EIO; 177 } 178 179 src = get_input_chunk(cons, prod, intf->rsp, &avail); 180 if (avail == 0) 181 continue; 182 if (avail > len) 183 avail = len; 184 185 /* Must read data /after/ reading the producer index. */ 186 virt_rmb(); 187 188 memcpy(data, src, avail); 189 data += avail; 190 len -= avail; 191 bytes += avail; 192 193 /* Other side must not see free space until we've copied out */ 194 virt_mb(); 195 intf->rsp_cons += avail; 196 197 /* Implies mb(): other side will see the updated consumer. */ 198 if (intf->rsp_prod - cons >= XENSTORE_RING_SIZE) 199 notify_remote_via_evtchn(xen_store_evtchn); 200 } 201 202 return bytes; 203} 204 205static int process_msg(void) 206{ 207 static struct { 208 struct xsd_sockmsg msg; 209 char *body; 210 union { 211 void *alloc; 212 struct xs_watch_event *watch; 213 }; 214 bool in_msg; 215 bool in_hdr; 216 unsigned int read; 217 } state; 218 struct xb_req_data *req; 219 int err; 220 unsigned int len; 221 222 if (!state.in_msg) { 223 state.in_msg = true; 224 state.in_hdr = true; 225 state.read = 0; 226 227 /* 228 * We must disallow save/restore while reading a message. 229 * A partial read across s/r leaves us out of sync with 230 * xenstored. 231 * xs_response_mutex is locked as long as we are processing one 232 * message. state.in_msg will be true as long as we are holding 233 * the lock here. 234 */ 235 mutex_lock(&xs_response_mutex); 236 237 if (!xb_data_to_read()) { 238 /* We raced with save/restore: pending data 'gone'. */ 239 mutex_unlock(&xs_response_mutex); 240 state.in_msg = false; 241 return 0; 242 } 243 } 244 245 if (state.in_hdr) { 246 if (state.read != sizeof(state.msg)) { 247 err = xb_read((void *)&state.msg + state.read, 248 sizeof(state.msg) - state.read); 249 if (err < 0) 250 goto out; 251 state.read += err; 252 if (state.read != sizeof(state.msg)) 253 return 0; 254 if (state.msg.len > XENSTORE_PAYLOAD_MAX) { 255 err = -EINVAL; 256 goto out; 257 } 258 } 259 260 len = state.msg.len + 1; 261 if (state.msg.type == XS_WATCH_EVENT) 262 len += sizeof(*state.watch); 263 264 state.alloc = kmalloc(len, GFP_NOIO | __GFP_HIGH); 265 if (!state.alloc) 266 return -ENOMEM; 267 268 if (state.msg.type == XS_WATCH_EVENT) 269 state.body = state.watch->body; 270 else 271 state.body = state.alloc; 272 state.in_hdr = false; 273 state.read = 0; 274 } 275 276 err = xb_read(state.body + state.read, state.msg.len - state.read); 277 if (err < 0) 278 goto out; 279 280 state.read += err; 281 if (state.read != state.msg.len) 282 return 0; 283 284 state.body[state.msg.len] = '\0'; 285 286 if (state.msg.type == XS_WATCH_EVENT) { 287 state.watch->len = state.msg.len; 288 err = xs_watch_msg(state.watch); 289 } else { 290 err = -ENOENT; 291 mutex_lock(&xb_write_mutex); 292 list_for_each_entry(req, &xs_reply_list, list) { 293 if (req->msg.req_id == state.msg.req_id) { 294 list_del(&req->list); 295 err = 0; 296 break; 297 } 298 } 299 mutex_unlock(&xb_write_mutex); 300 if (err) 301 goto out; 302 303 if (req->state == xb_req_state_wait_reply) { 304 req->msg.req_id = req->caller_req_id; 305 req->msg.type = state.msg.type; 306 req->msg.len = state.msg.len; 307 req->body = state.body; 308 /* write body, then update state */ 309 virt_wmb(); 310 req->state = xb_req_state_got_reply; 311 req->cb(req); 312 } else 313 kfree(req); 314 } 315 316 mutex_unlock(&xs_response_mutex); 317 318 state.in_msg = false; 319 state.alloc = NULL; 320 return err; 321 322 out: 323 mutex_unlock(&xs_response_mutex); 324 state.in_msg = false; 325 kfree(state.alloc); 326 state.alloc = NULL; 327 return err; 328} 329 330static int process_writes(void) 331{ 332 static struct { 333 struct xb_req_data *req; 334 int idx; 335 unsigned int written; 336 } state; 337 void *base; 338 unsigned int len; 339 int err = 0; 340 341 if (!xb_data_to_write()) 342 return 0; 343 344 mutex_lock(&xb_write_mutex); 345 346 if (!state.req) { 347 state.req = list_first_entry(&xb_write_list, 348 struct xb_req_data, list); 349 state.idx = -1; 350 state.written = 0; 351 } 352 353 if (state.req->state == xb_req_state_aborted) 354 goto out_err; 355 356 while (state.idx < state.req->num_vecs) { 357 if (state.idx < 0) { 358 base = &state.req->msg; 359 len = sizeof(state.req->msg); 360 } else { 361 base = state.req->vec[state.idx].iov_base; 362 len = state.req->vec[state.idx].iov_len; 363 } 364 err = xb_write(base + state.written, len - state.written); 365 if (err < 0) 366 goto out_err; 367 state.written += err; 368 if (state.written != len) 369 goto out; 370 371 state.idx++; 372 state.written = 0; 373 } 374 375 list_del(&state.req->list); 376 state.req->state = xb_req_state_wait_reply; 377 list_add_tail(&state.req->list, &xs_reply_list); 378 state.req = NULL; 379 380 out: 381 mutex_unlock(&xb_write_mutex); 382 383 return 0; 384 385 out_err: 386 state.req->msg.type = XS_ERROR; 387 state.req->err = err; 388 list_del(&state.req->list); 389 if (state.req->state == xb_req_state_aborted) 390 kfree(state.req); 391 else { 392 /* write err, then update state */ 393 virt_wmb(); 394 state.req->state = xb_req_state_got_reply; 395 wake_up(&state.req->wq); 396 } 397 398 mutex_unlock(&xb_write_mutex); 399 400 state.req = NULL; 401 402 return err; 403} 404 405static int xb_thread_work(void) 406{ 407 return xb_data_to_read() || xb_data_to_write(); 408} 409 410static int xenbus_thread(void *unused) 411{ 412 int err; 413 414 while (!kthread_should_stop()) { 415 if (wait_event_interruptible(xb_waitq, xb_thread_work())) 416 continue; 417 418 err = process_msg(); 419 if (err == -ENOMEM) 420 schedule(); 421 else if (err) 422 pr_warn_ratelimited("error %d while reading message\n", 423 err); 424 425 err = process_writes(); 426 if (err) 427 pr_warn_ratelimited("error %d while writing message\n", 428 err); 429 } 430 431 xenbus_task = NULL; 432 return 0; 433} 434 435/** 436 * xb_init_comms - Set up interrupt handler off store event channel. 437 */ 438int xb_init_comms(void) 439{ 440 struct xenstore_domain_interface *intf = xen_store_interface; 441 442 if (intf->req_prod != intf->req_cons) 443 pr_err("request ring is not quiescent (%08x:%08x)!\n", 444 intf->req_cons, intf->req_prod); 445 446 if (intf->rsp_prod != intf->rsp_cons) { 447 pr_warn("response ring is not quiescent (%08x:%08x): fixing up\n", 448 intf->rsp_cons, intf->rsp_prod); 449 /* breaks kdump */ 450 if (!reset_devices) 451 intf->rsp_cons = intf->rsp_prod; 452 } 453 454 if (xenbus_irq) { 455 /* Already have an irq; assume we're resuming */ 456 rebind_evtchn_irq(xen_store_evtchn, xenbus_irq); 457 } else { 458 int err; 459 460 err = bind_evtchn_to_irqhandler(xen_store_evtchn, wake_waiting, 461 0, "xenbus", &xb_waitq); 462 if (err < 0) { 463 pr_err("request irq failed %i\n", err); 464 return err; 465 } 466 467 xenbus_irq = err; 468 469 if (!xenbus_task) { 470 xenbus_task = kthread_run(xenbus_thread, NULL, 471 "xenbus"); 472 if (IS_ERR(xenbus_task)) 473 return PTR_ERR(xenbus_task); 474 } 475 } 476 477 return 0; 478} 479 480void xb_deinit_comms(void) 481{ 482 unbind_from_irqhandler(xenbus_irq, &xb_waitq); 483 xenbus_irq = 0; 484}