1 /******************************************************************************
2  * xenstore.c
3  *
4  * Low-level kernel interface to the XenStore.
5  *
6  * Copyright (C) 2005 Rusty Russell, IBM Corporation
7  * Copyright (C) 2009,2010 Spectra Logic Corporation
8  *
9  * This file may be distributed separately from the Linux kernel, or
10  * incorporated into other software packages, subject to the following license:
11  *
12  * Permission is hereby granted, free of charge, to any person obtaining a copy
13  * of this source file (the "Software"), to deal in the Software without
14  * restriction, including without limitation the rights to use, copy, modify,
15  * merge, publish, distribute, sublicense, and/or sell copies of the Software,
16  * and to permit persons to whom the Software is furnished to do so, subject to
17  * the following conditions:
18  *
19  * The above copyright notice and this permission notice shall be included in
20  * all copies or substantial portions of the Software.
21  *
22  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
23  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
24  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
25  * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
26  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
27  * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
28  * IN THE SOFTWARE.
29  */
30 
31 
32 #include <sys/cdefs.h>
33 __FBSDID("$FreeBSD: stable/9/sys/xen/xenstore/xenstore.c 251973 2013-06-18 23:14:21Z gibbs $");
34 
35 #include <sys/param.h>
36 #include <sys/bus.h>
37 #include <sys/kernel.h>
38 #include <sys/lock.h>
39 #include <sys/module.h>
40 #include <sys/mutex.h>
41 #include <sys/sx.h>
42 #include <sys/syslog.h>
43 #include <sys/malloc.h>
44 #include <sys/systm.h>
45 #include <sys/proc.h>
46 #include <sys/kthread.h>
47 #include <sys/sbuf.h>
48 #include <sys/sysctl.h>
49 #include <sys/uio.h>
50 #include <sys/unistd.h>
51 
52 #include <machine/xen/xen-os.h>
53 #include <machine/stdarg.h>
54 
55 #include <xen/evtchn.h>
56 #include <xen/gnttab.h>
57 #include <xen/hypervisor.h>
58 #include <xen/xen_intr.h>
59 
60 #include <xen/interface/hvm/params.h>
61 
62 #include <xen/xenstore/xenstorevar.h>
63 #include <xen/xenstore/xenstore_internal.h>
64 
65 #include <vm/vm.h>
66 #include <vm/pmap.h>
67 
68 /**
69  * \file xenstore.c
70  * \brief XenStore interface
71  *
72  * The XenStore interface is a simple storage system that is a means of
73  * communicating state and configuration data between the Xen Domain 0
74  * and the various guest domains.  All configuration data other than
75  * a small amount of essential information required during the early
76  * boot process of launching a Xen aware guest, is managed using the
77  * XenStore.
78  *
79  * The XenStore is ASCII string based, and has a structure and semantics
80  * similar to a filesystem.  There are files and directories, the directories
81  * able to contain files or other directories.  The depth of the hierachy
82  * is only limited by the XenStore's maximum path length.
83  *
84  * The communication channel between the XenStore service and other
85  * domains is via two, guest specific, ring buffers in a shared memory
86  * area.  One ring buffer is used for communicating in each direction.
87  * The grant table references for this shared memory are given to the
88  * guest either via the xen_start_info structure for a fully para-
89  * virtualized guest, or via HVM hypercalls for a hardware virtualized
90  * guest.
91  *
92  * The XenStore communication relies on an event channel and thus
93  * interrupts.  For this reason, the attachment of the XenStore
94  * relies on an interrupt driven configuration hook to hold off
95  * boot processing until communication with the XenStore service
96  * can be established.
97  *
98  * Several Xen services depend on the XenStore, most notably the
99  * XenBus used to discover and manage Xen devices.  These services
100  * are implemented as NewBus child attachments to a bus exported
101  * by this XenStore driver.
102  */
103 
104 static struct xs_watch *find_watch(const char *token);
105 
106 MALLOC_DEFINE(M_XENSTORE, "xenstore", "XenStore data and results");
107 
108 /**
109  * Pointer to shared memory communication structures allowing us
110  * to communicate with the XenStore service.
111  *
112  * When operating in full PV mode, this pointer is set early in kernel
113  * startup from within xen_machdep.c.  In HVM mode, we use hypercalls
114  * to get the guest frame number for the shared page and then map it
115  * into kva.  See xs_init() for details.
116  */
117 struct xenstore_domain_interface *xen_store;
118 
119 /*-------------------------- Private Data Structures ------------------------*/
120 
121 /**
122  * Structure capturing messages received from the XenStore service.
123  */
124 struct xs_stored_msg {
125 	TAILQ_ENTRY(xs_stored_msg) list;
126 
127 	struct xsd_sockmsg hdr;
128 
129 	union {
130 		/* Queued replies. */
131 		struct {
132 			char *body;
133 		} reply;
134 
135 		/* Queued watch events. */
136 		struct {
137 			struct xs_watch *handle;
138 			const char **vec;
139 			u_int vec_size;
140 		} watch;
141 	} u;
142 };
143 TAILQ_HEAD(xs_stored_msg_list, xs_stored_msg);
144 
145 /**
146  * Container for all XenStore related state.
147  */
148 struct xs_softc {
149 	/** Newbus device for the XenStore. */
150 	device_t xs_dev;
151 
152 	/**
153 	 * Lock serializing access to ring producer/consumer
154 	 * indexes.  Use of this lock guarantees that wakeups
155 	 * of blocking readers/writers are not missed due to
156 	 * races with the XenStore service.
157 	 */
158 	struct mtx ring_lock;
159 
160 	/*
161 	 * Mutex used to insure exclusive access to the outgoing
162 	 * communication ring.  We use a lock type that can be
163 	 * held while sleeping so that xs_write() can block waiting
164 	 * for space in the ring to free up, without allowing another
165 	 * writer to come in and corrupt a partial message write.
166 	 */
167 	struct sx request_mutex;
168 
169 	/**
170 	 * A list of replies to our requests.
171 	 *
172 	 * The reply list is filled by xs_rcv_thread().  It
173 	 * is consumed by the context that issued the request
174 	 * to which a reply is made.  The requester blocks in
175 	 * xs_read_reply().
176 	 *
177 	 * /note Only one requesting context can be active at a time.
178 	 *       This is guaranteed by the request_mutex and insures
179 	 *	 that the requester sees replies matching the order
180 	 *	 of its requests.
181 	 */
182 	struct xs_stored_msg_list reply_list;
183 
184 	/** Lock protecting the reply list. */
185 	struct mtx reply_lock;
186 
187 	/**
188 	 * List of registered watches.
189 	 */
190 	struct xs_watch_list  registered_watches;
191 
192 	/** Lock protecting the registered watches list. */
193 	struct mtx registered_watches_lock;
194 
195 	/**
196 	 * List of pending watch callback events.
197 	 */
198 	struct xs_stored_msg_list watch_events;
199 
200 	/** Lock protecting the watch calback list. */
201 	struct mtx watch_events_lock;
202 
203 	/**
204 	 * Sleepable lock used to prevent VM suspension while a
205 	 * xenstore transaction is outstanding.
206 	 *
207 	 * Each active transaction holds a shared lock on the
208 	 * suspend mutex.  Our suspend method blocks waiting
209 	 * to acquire an exclusive lock.  This guarantees that
210 	 * suspend processing will only proceed once all active
211 	 * transactions have been retired.
212 	 */
213 	struct sx suspend_mutex;
214 
215 	/**
216 	 * The processid of the xenwatch thread.
217 	 */
218 	pid_t xenwatch_pid;
219 
220 	/**
221 	 * Sleepable mutex used to gate the execution of XenStore
222 	 * watch event callbacks.
223 	 *
224 	 * xenwatch_thread holds an exclusive lock on this mutex
225 	 * while delivering event callbacks, and xenstore_unregister_watch()
226 	 * uses an exclusive lock of this mutex to guarantee that no
227 	 * callbacks of the just unregistered watch are pending
228 	 * before returning to its caller.
229 	 */
230 	struct sx xenwatch_mutex;
231 
232 #ifdef XENHVM
233 	/**
234 	 * The HVM guest pseudo-physical frame number.  This is Xen's mapping
235 	 * of the true machine frame number into our "physical address space".
236 	 */
237 	unsigned long gpfn;
238 #endif
239 
240 	/**
241 	 * The event channel for communicating with the
242 	 * XenStore service.
243 	 */
244 	int evtchn;
245 
246 	/** Interrupt number for our event channel. */
247 	u_int irq;
248 
249 	/**
250 	 * Interrupt driven config hook allowing us to defer
251 	 * attaching children until interrupts (and thus communication
252 	 * with the XenStore service) are available.
253 	 */
254 	struct intr_config_hook xs_attachcb;
255 };
256 
257 /*-------------------------------- Global Data ------------------------------*/
258 static struct xs_softc xs;
259 
260 /*------------------------- Private Utility Functions -----------------------*/
261 
262 /**
263  * Count and optionally record pointers to a number of NUL terminated
264  * strings in a buffer.
265  *
266  * \param strings  A pointer to a contiguous buffer of NUL terminated strings.
267  * \param dest	   An array to store pointers to each string found in strings.
268  * \param len	   The length of the buffer pointed to by strings.
269  *
270  * \return  A count of the number of strings found.
271  */
272 static u_int
extract_strings(const char * strings,const char ** dest,u_int len)273 extract_strings(const char *strings, const char **dest, u_int len)
274 {
275 	u_int num;
276 	const char *p;
277 
278 	for (p = strings, num = 0; p < strings + len; p += strlen(p) + 1) {
279 		if (dest != NULL)
280 			*dest++ = p;
281 		num++;
282 	}
283 
284 	return (num);
285 }
286 
287 /**
288  * Convert a contiguous buffer containing a series of NUL terminated
289  * strings into an array of pointers to strings.
290  *
291  * The returned pointer references the array of string pointers which
292  * is followed by the storage for the string data.  It is the client's
293  * responsibility to free this storage.
294  *
295  * The storage addressed by strings is free'd prior to split returning.
296  *
297  * \param strings  A pointer to a contiguous buffer of NUL terminated strings.
298  * \param len	   The length of the buffer pointed to by strings.
299  * \param num	   The number of strings found and returned in the strings
300  *                 array.
301  *
302  * \return  An array of pointers to the strings found in the input buffer.
303  */
304 static const char **
split(char * strings,u_int len,u_int * num)305 split(char *strings, u_int len, u_int *num)
306 {
307 	const char **ret;
308 
309 	/* Protect against unterminated buffers. */
310 	if (len > 0)
311 		strings[len - 1] = '\0';
312 
313 	/* Count the strings. */
314 	*num = extract_strings(strings, /*dest*/NULL, len);
315 
316 	/* Transfer to one big alloc for easy freeing by the caller. */
317 	ret = malloc(*num * sizeof(char *) + len, M_XENSTORE, M_WAITOK);
318 	memcpy(&ret[*num], strings, len);
319 	free(strings, M_XENSTORE);
320 
321 	/* Extract pointers to newly allocated array. */
322 	strings = (char *)&ret[*num];
323 	(void)extract_strings(strings, /*dest*/ret, len);
324 
325 	return (ret);
326 }
327 
328 /*------------------------- Public Utility Functions -------------------------*/
329 /*------- API comments for these methods can be found in xenstorevar.h -------*/
330 struct sbuf *
xs_join(const char * dir,const char * name)331 xs_join(const char *dir, const char *name)
332 {
333 	struct sbuf *sb;
334 
335 	sb = sbuf_new_auto();
336 	sbuf_cat(sb, dir);
337 	if (name[0] != '\0') {
338 		sbuf_putc(sb, '/');
339 		sbuf_cat(sb, name);
340 	}
341 	sbuf_finish(sb);
342 
343 	return (sb);
344 }
345 
346 /*-------------------- Low Level Communication Management --------------------*/
347 /**
348  * Interrupt handler for the XenStore event channel.
349  *
350  * XenStore reads and writes block on "xen_store" for buffer
351  * space.  Wakeup any blocking operations when the XenStore
352  * service has modified the queues.
353  */
354 static void
xs_intr(void * arg __unused)355 xs_intr(void * arg __unused /*__attribute__((unused))*/)
356 {
357 
358 	/*
359 	 * Hold ring lock across wakeup so that clients
360 	 * cannot miss a wakeup.
361 	 */
362 	mtx_lock(&xs.ring_lock);
363 	wakeup(xen_store);
364 	mtx_unlock(&xs.ring_lock);
365 }
366 
367 /**
368  * Verify that the indexes for a ring are valid.
369  *
370  * The difference between the producer and consumer cannot
371  * exceed the size of the ring.
372  *
373  * \param cons  The consumer index for the ring to test.
374  * \param prod  The producer index for the ring to test.
375  *
376  * \retval 1  If indexes are in range.
377  * \retval 0  If the indexes are out of range.
378  */
379 static int
xs_check_indexes(XENSTORE_RING_IDX cons,XENSTORE_RING_IDX prod)380 xs_check_indexes(XENSTORE_RING_IDX cons, XENSTORE_RING_IDX prod)
381 {
382 
383 	return ((prod - cons) <= XENSTORE_RING_SIZE);
384 }
385 
386 /**
387  * Return a pointer to, and the length of, the contiguous
388  * free region available for output in a ring buffer.
389  *
390  * \param cons  The consumer index for the ring.
391  * \param prod  The producer index for the ring.
392  * \param buf   The base address of the ring's storage.
393  * \param len   The amount of contiguous storage available.
394  *
395  * \return  A pointer to the start location of the free region.
396  */
397 static void *
xs_get_output_chunk(XENSTORE_RING_IDX cons,XENSTORE_RING_IDX prod,char * buf,uint32_t * len)398 xs_get_output_chunk(XENSTORE_RING_IDX cons, XENSTORE_RING_IDX prod,
399     char *buf, uint32_t *len)
400 {
401 
402 	*len = XENSTORE_RING_SIZE - MASK_XENSTORE_IDX(prod);
403 	if ((XENSTORE_RING_SIZE - (prod - cons)) < *len)
404 		*len = XENSTORE_RING_SIZE - (prod - cons);
405 	return (buf + MASK_XENSTORE_IDX(prod));
406 }
407 
408 /**
409  * Return a pointer to, and the length of, the contiguous
410  * data available to read from a ring buffer.
411  *
412  * \param cons  The consumer index for the ring.
413  * \param prod  The producer index for the ring.
414  * \param buf   The base address of the ring's storage.
415  * \param len   The amount of contiguous data available to read.
416  *
417  * \return  A pointer to the start location of the available data.
418  */
419 static const void *
xs_get_input_chunk(XENSTORE_RING_IDX cons,XENSTORE_RING_IDX prod,const char * buf,uint32_t * len)420 xs_get_input_chunk(XENSTORE_RING_IDX cons, XENSTORE_RING_IDX prod,
421     const char *buf, uint32_t *len)
422 {
423 
424 	*len = XENSTORE_RING_SIZE - MASK_XENSTORE_IDX(cons);
425 	if ((prod - cons) < *len)
426 		*len = prod - cons;
427 	return (buf + MASK_XENSTORE_IDX(cons));
428 }
429 
430 /**
431  * Transmit data to the XenStore service.
432  *
433  * \param tdata  A pointer to the contiguous data to send.
434  * \param len    The amount of data to send.
435  *
436  * \return  On success 0, otherwise an errno value indicating the
437  *          cause of failure.
438  *
439  * \invariant  Called from thread context.
440  * \invariant  The buffer pointed to by tdata is at least len bytes
441  *             in length.
442  * \invariant  xs.request_mutex exclusively locked.
443  */
444 static int
xs_write_store(const void * tdata,unsigned len)445 xs_write_store(const void *tdata, unsigned len)
446 {
447 	XENSTORE_RING_IDX cons, prod;
448 	const char *data = (const char *)tdata;
449 	int error;
450 
451 	sx_assert(&xs.request_mutex, SX_XLOCKED);
452 	while (len != 0) {
453 		void *dst;
454 		u_int avail;
455 
456 		/* Hold lock so we can't miss wakeups should we block. */
457 		mtx_lock(&xs.ring_lock);
458 		cons = xen_store->req_cons;
459 		prod = xen_store->req_prod;
460 		if ((prod - cons) == XENSTORE_RING_SIZE) {
461 			/*
462 			 * Output ring is full. Wait for a ring event.
463 			 *
464 			 * Note that the events from both queues
465 			 * are combined, so being woken does not
466 			 * guarantee that data exist in the read
467 			 * ring.
468 			 *
469 			 * To simplify error recovery and the retry,
470 			 * we specify PDROP so our lock is *not* held
471 			 * when msleep returns.
472 			 */
473 			error = msleep(xen_store, &xs.ring_lock, PCATCH|PDROP,
474 			     "xbwrite", /*timeout*/0);
475 			if (error && error != EWOULDBLOCK)
476 				return (error);
477 
478 			/* Try again. */
479 			continue;
480 		}
481 		mtx_unlock(&xs.ring_lock);
482 
483 		/* Verify queue sanity. */
484 		if (!xs_check_indexes(cons, prod)) {
485 			xen_store->req_cons = xen_store->req_prod = 0;
486 			return (EIO);
487 		}
488 
489 		dst = xs_get_output_chunk(cons, prod, xen_store->req, &avail);
490 		if (avail > len)
491 			avail = len;
492 
493 		memcpy(dst, data, avail);
494 		data += avail;
495 		len -= avail;
496 
497 		/*
498 		 * The store to the producer index, which indicates
499 		 * to the other side that new data has arrived, must
500 		 * be visible only after our copy of the data into the
501 		 * ring has completed.
502 		 */
503 		wmb();
504 		xen_store->req_prod += avail;
505 
506 		/*
507 		 * notify_remote_via_evtchn implies mb(). The other side
508 		 * will see the change to req_prod at the time of the
509 		 * interrupt.
510 		 */
511 		notify_remote_via_evtchn(xs.evtchn);
512 	}
513 
514 	return (0);
515 }
516 
517 /**
518  * Receive data from the XenStore service.
519  *
520  * \param tdata  A pointer to the contiguous buffer to receive the data.
521  * \param len    The amount of data to receive.
522  *
523  * \return  On success 0, otherwise an errno value indicating the
524  *          cause of failure.
525  *
526  * \invariant  Called from thread context.
527  * \invariant  The buffer pointed to by tdata is at least len bytes
528  *             in length.
529  *
530  * \note xs_read does not perform any internal locking to guarantee
531  *       serial access to the incoming ring buffer.  However, there
532  *	 is only one context processing reads: xs_rcv_thread().
533  */
534 static int
xs_read_store(void * tdata,unsigned len)535 xs_read_store(void *tdata, unsigned len)
536 {
537 	XENSTORE_RING_IDX cons, prod;
538 	char *data = (char *)tdata;
539 	int error;
540 
541 	while (len != 0) {
542 		u_int avail;
543 		const char *src;
544 
545 		/* Hold lock so we can't miss wakeups should we block. */
546 		mtx_lock(&xs.ring_lock);
547 		cons = xen_store->rsp_cons;
548 		prod = xen_store->rsp_prod;
549 		if (cons == prod) {
550 			/*
551 			 * Nothing to read. Wait for a ring event.
552 			 *
553 			 * Note that the events from both queues
554 			 * are combined, so being woken does not
555 			 * guarantee that data exist in the read
556 			 * ring.
557 			 *
558 			 * To simplify error recovery and the retry,
559 			 * we specify PDROP so our lock is *not* held
560 			 * when msleep returns.
561 			 */
562 			error = msleep(xen_store, &xs.ring_lock, PCATCH|PDROP,
563 			    "xbread", /*timeout*/0);
564 			if (error && error != EWOULDBLOCK)
565 				return (error);
566 			continue;
567 		}
568 		mtx_unlock(&xs.ring_lock);
569 
570 		/* Verify queue sanity. */
571 		if (!xs_check_indexes(cons, prod)) {
572 			xen_store->rsp_cons = xen_store->rsp_prod = 0;
573 			return (EIO);
574 		}
575 
576 		src = xs_get_input_chunk(cons, prod, xen_store->rsp, &avail);
577 		if (avail > len)
578 			avail = len;
579 
580 		/*
581 		 * Insure the data we read is related to the indexes
582 		 * we read above.
583 		 */
584 		rmb();
585 
586 		memcpy(data, src, avail);
587 		data += avail;
588 		len -= avail;
589 
590 		/*
591 		 * Insure that the producer of this ring does not see
592 		 * the ring space as free until after we have copied it
593 		 * out.
594 		 */
595 		mb();
596 		xen_store->rsp_cons += avail;
597 
598 		/*
599 		 * notify_remote_via_evtchn implies mb(). The producer
600 		 * will see the updated consumer index when the event
601 		 * is delivered.
602 		 */
603 		notify_remote_via_evtchn(xs.evtchn);
604 	}
605 
606 	return (0);
607 }
608 
609 /*----------------------- Received Message Processing ------------------------*/
610 /**
611  * Block reading the next message from the XenStore service and
612  * process the result.
613  *
614  * \param type  The returned type of the XenStore message received.
615  *
616  * \return  0 on success.  Otherwise an errno value indicating the
617  *          type of failure encountered.
618  */
619 static int
xs_process_msg(enum xsd_sockmsg_type * type)620 xs_process_msg(enum xsd_sockmsg_type *type)
621 {
622 	struct xs_stored_msg *msg;
623 	char *body;
624 	int error;
625 
626 	msg = malloc(sizeof(*msg), M_XENSTORE, M_WAITOK);
627 	error = xs_read_store(&msg->hdr, sizeof(msg->hdr));
628 	if (error) {
629 		free(msg, M_XENSTORE);
630 		return (error);
631 	}
632 
633 	body = malloc(msg->hdr.len + 1, M_XENSTORE, M_WAITOK);
634 	error = xs_read_store(body, msg->hdr.len);
635 	if (error) {
636 		free(body, M_XENSTORE);
637 		free(msg, M_XENSTORE);
638 		return (error);
639 	}
640 	body[msg->hdr.len] = '\0';
641 
642 	*type = msg->hdr.type;
643 	if (msg->hdr.type == XS_WATCH_EVENT) {
644 		msg->u.watch.vec = split(body, msg->hdr.len,
645 		    &msg->u.watch.vec_size);
646 
647 		mtx_lock(&xs.registered_watches_lock);
648 		msg->u.watch.handle = find_watch(
649 		    msg->u.watch.vec[XS_WATCH_TOKEN]);
650 		if (msg->u.watch.handle != NULL) {
651 			mtx_lock(&xs.watch_events_lock);
652 			TAILQ_INSERT_TAIL(&xs.watch_events, msg, list);
653 			wakeup(&xs.watch_events);
654 			mtx_unlock(&xs.watch_events_lock);
655 		} else {
656 			free(msg->u.watch.vec, M_XENSTORE);
657 			free(msg, M_XENSTORE);
658 		}
659 		mtx_unlock(&xs.registered_watches_lock);
660 	} else {
661 		msg->u.reply.body = body;
662 		mtx_lock(&xs.reply_lock);
663 		TAILQ_INSERT_TAIL(&xs.reply_list, msg, list);
664 		wakeup(&xs.reply_list);
665 		mtx_unlock(&xs.reply_lock);
666 	}
667 
668 	return (0);
669 }
670 
671 /**
672  * Thread body of the XenStore receive thread.
673  *
674  * This thread blocks waiting for data from the XenStore service
675  * and processes and received messages.
676  */
677 static void
xs_rcv_thread(void * arg __unused)678 xs_rcv_thread(void *arg __unused)
679 {
680 	int error;
681 	enum xsd_sockmsg_type type;
682 
683 	for (;;) {
684 		error = xs_process_msg(&type);
685 		if (error)
686 			printf("XENSTORE error %d while reading message\n",
687 			    error);
688 	}
689 }
690 
691 /*---------------- XenStore Message Request/Reply Processing -----------------*/
692 /**
693  * Filter invoked before transmitting any message to the XenStore service.
694  *
695  * The role of the filter may expand, but currently serves to manage
696  * the interactions of messages with transaction state.
697  *
698  * \param request_msg_type  The message type for the request.
699  */
700 static inline void
xs_request_filter(uint32_t request_msg_type)701 xs_request_filter(uint32_t request_msg_type)
702 {
703 	if (request_msg_type == XS_TRANSACTION_START)
704 		sx_slock(&xs.suspend_mutex);
705 }
706 
707 /**
708  * Filter invoked after transmitting any message to the XenStore service.
709  *
710  * The role of the filter may expand, but currently serves to manage
711  * the interactions of messages with transaction state.
712  *
713  * \param request_msg_type     The message type for the original request.
714  * \param reply_msg_type       The message type for any received reply.
715  * \param request_reply_error  The error status from the attempt to send
716  *                             the request or retrieve the reply.
717  */
718 static inline void
xs_reply_filter(uint32_t request_msg_type,uint32_t reply_msg_type,int request_reply_error)719 xs_reply_filter(uint32_t request_msg_type,
720     uint32_t reply_msg_type, int request_reply_error)
721 {
722 	/*
723 	 * The count of transactions drops if we attempted
724 	 * to end a transaction (even if that attempt fails
725 	 * in error), we receive a transaction end acknowledgement,
726 	 * or if our attempt to begin a transaction fails.
727 	 */
728 	if (request_msg_type == XS_TRANSACTION_END
729 	 || (request_reply_error == 0 && reply_msg_type == XS_TRANSACTION_END)
730 	 || (request_msg_type == XS_TRANSACTION_START
731 	  && (request_reply_error != 0 || reply_msg_type == XS_ERROR)))
732 		sx_sunlock(&xs.suspend_mutex);
733 
734 }
735 
736 #define xsd_error_count	(sizeof(xsd_errors) / sizeof(xsd_errors[0]))
737 
738 /**
739  * Convert a XenStore error string into an errno number.
740  *
741  * \param errorstring  The error string to convert.
742  *
743  * \return  The errno best matching the input string.
744  *
745  * \note Unknown error strings are converted to EINVAL.
746  */
747 static int
xs_get_error(const char * errorstring)748 xs_get_error(const char *errorstring)
749 {
750 	u_int i;
751 
752 	for (i = 0; i < xsd_error_count; i++) {
753 		if (!strcmp(errorstring, xsd_errors[i].errstring))
754 			return (xsd_errors[i].errnum);
755 	}
756 	log(LOG_WARNING, "XENSTORE xen store gave: unknown error %s",
757 	    errorstring);
758 	return (EINVAL);
759 }
760 
761 /**
762  * Block waiting for a reply to a message request.
763  *
764  * \param type	  The returned type of the reply.
765  * \param len	  The returned body length of the reply.
766  * \param result  The returned body of the reply.
767  *
768  * \return  0 on success.  Otherwise an errno indicating the
769  *          cause of failure.
770  */
771 static int
xs_read_reply(enum xsd_sockmsg_type * type,u_int * len,void ** result)772 xs_read_reply(enum xsd_sockmsg_type *type, u_int *len, void **result)
773 {
774 	struct xs_stored_msg *msg;
775 	char *body;
776 	int error;
777 
778 	mtx_lock(&xs.reply_lock);
779 	while (TAILQ_EMPTY(&xs.reply_list)) {
780 		error = mtx_sleep(&xs.reply_list, &xs.reply_lock,
781 		    PCATCH, "xswait", hz/10);
782 		if (error && error != EWOULDBLOCK) {
783 			mtx_unlock(&xs.reply_lock);
784 			return (error);
785 		}
786 	}
787 	msg = TAILQ_FIRST(&xs.reply_list);
788 	TAILQ_REMOVE(&xs.reply_list, msg, list);
789 	mtx_unlock(&xs.reply_lock);
790 
791 	*type = msg->hdr.type;
792 	if (len)
793 		*len = msg->hdr.len;
794 	body = msg->u.reply.body;
795 
796 	free(msg, M_XENSTORE);
797 	*result = body;
798 	return (0);
799 }
800 
801 /**
802  * Pass-thru interface for XenStore access by userland processes
803  * via the XenStore device.
804  *
805  * Reply type and length data are returned by overwriting these
806  * fields in the passed in request message.
807  *
808  * \param msg	  A properly formatted message to transmit to
809  *		  the XenStore service.
810  * \param result  The returned body of the reply.
811  *
812  * \return  0 on success.  Otherwise an errno indicating the cause
813  *          of failure.
814  *
815  * \note The returned result is provided in malloced storage and thus
816  *       must be free'd by the caller with 'free(result, M_XENSTORE);
817  */
818 int
xs_dev_request_and_reply(struct xsd_sockmsg * msg,void ** result)819 xs_dev_request_and_reply(struct xsd_sockmsg *msg, void **result)
820 {
821 	uint32_t request_type;
822 	int error;
823 
824 	request_type = msg->type;
825 	xs_request_filter(request_type);
826 
827 	sx_xlock(&xs.request_mutex);
828 	if ((error = xs_write_store(msg, sizeof(*msg) + msg->len)) == 0)
829 		error = xs_read_reply(&msg->type, &msg->len, result);
830 	sx_xunlock(&xs.request_mutex);
831 
832 	xs_reply_filter(request_type, msg->type, error);
833 
834 	return (error);
835 }
836 
837 /**
838  * Send a message with an optionally muti-part body to the XenStore service.
839  *
840  * \param t              The transaction to use for this request.
841  * \param request_type   The type of message to send.
842  * \param iovec          Pointers to the body sections of the request.
843  * \param num_vecs       The number of body sections in the request.
844  * \param len            The returned length of the reply.
845  * \param result         The returned body of the reply.
846  *
847  * \return  0 on success.  Otherwise an errno indicating
848  *          the cause of failure.
849  *
850  * \note The returned result is provided in malloced storage and thus
851  *       must be free'd by the caller with 'free(*result, M_XENSTORE);
852  */
853 static int
xs_talkv(struct xs_transaction t,enum xsd_sockmsg_type request_type,const struct iovec * iovec,u_int num_vecs,u_int * len,void ** result)854 xs_talkv(struct xs_transaction t, enum xsd_sockmsg_type request_type,
855     const struct iovec *iovec, u_int num_vecs, u_int *len, void **result)
856 {
857 	struct xsd_sockmsg msg;
858 	void *ret = NULL;
859 	u_int i;
860 	int error;
861 
862 	msg.tx_id = t.id;
863 	msg.req_id = 0;
864 	msg.type = request_type;
865 	msg.len = 0;
866 	for (i = 0; i < num_vecs; i++)
867 		msg.len += iovec[i].iov_len;
868 
869 	xs_request_filter(request_type);
870 
871 	sx_xlock(&xs.request_mutex);
872 	error = xs_write_store(&msg, sizeof(msg));
873 	if (error) {
874 		printf("xs_talkv failed %d\n", error);
875 		goto error_lock_held;
876 	}
877 
878 	for (i = 0; i < num_vecs; i++) {
879 		error = xs_write_store(iovec[i].iov_base, iovec[i].iov_len);
880 		if (error) {
881 			printf("xs_talkv failed %d\n", error);
882 			goto error_lock_held;
883 		}
884 	}
885 
886 	error = xs_read_reply(&msg.type, len, &ret);
887 
888 error_lock_held:
889 	sx_xunlock(&xs.request_mutex);
890 	xs_reply_filter(request_type, msg.type, error);
891 	if (error)
892 		return (error);
893 
894 	if (msg.type == XS_ERROR) {
895 		error = xs_get_error(ret);
896 		free(ret, M_XENSTORE);
897 		return (error);
898 	}
899 
900 	/* Reply is either error or an echo of our request message type. */
901 	KASSERT(msg.type == request_type, ("bad xenstore message type"));
902 
903 	if (result)
904 		*result = ret;
905 	else
906 		free(ret, M_XENSTORE);
907 
908 	return (0);
909 }
910 
911 /**
912  * Wrapper for xs_talkv allowing easy transmission of a message with
913  * a single, contiguous, message body.
914  *
915  * \param t              The transaction to use for this request.
916  * \param request_type   The type of message to send.
917  * \param body           The body of the request.
918  * \param len            The returned length of the reply.
919  * \param result         The returned body of the reply.
920  *
921  * \return  0 on success.  Otherwise an errno indicating
922  *          the cause of failure.
923  *
924  * \note The returned result is provided in malloced storage and thus
925  *       must be free'd by the caller with 'free(*result, M_XENSTORE);
926  */
927 static int
xs_single(struct xs_transaction t,enum xsd_sockmsg_type request_type,const char * body,u_int * len,void ** result)928 xs_single(struct xs_transaction t, enum xsd_sockmsg_type request_type,
929     const char *body, u_int *len, void **result)
930 {
931 	struct iovec iovec;
932 
933 	iovec.iov_base = (void *)(uintptr_t)body;
934 	iovec.iov_len = strlen(body) + 1;
935 
936 	return (xs_talkv(t, request_type, &iovec, 1, len, result));
937 }
938 
939 /*------------------------- XenStore Watch Support ---------------------------*/
940 /**
941  * Transmit a watch request to the XenStore service.
942  *
943  * \param path    The path in the XenStore to watch.
944  * \param tocken  A unique identifier for this watch.
945  *
946  * \return  0 on success.  Otherwise an errno indicating the
947  *          cause of failure.
948  */
949 static int
xs_watch(const char * path,const char * token)950 xs_watch(const char *path, const char *token)
951 {
952 	struct iovec iov[2];
953 
954 	iov[0].iov_base = (void *)(uintptr_t) path;
955 	iov[0].iov_len = strlen(path) + 1;
956 	iov[1].iov_base = (void *)(uintptr_t) token;
957 	iov[1].iov_len = strlen(token) + 1;
958 
959 	return (xs_talkv(XST_NIL, XS_WATCH, iov, 2, NULL, NULL));
960 }
961 
962 /**
963  * Transmit an uwatch request to the XenStore service.
964  *
965  * \param path    The path in the XenStore to watch.
966  * \param tocken  A unique identifier for this watch.
967  *
968  * \return  0 on success.  Otherwise an errno indicating the
969  *          cause of failure.
970  */
971 static int
xs_unwatch(const char * path,const char * token)972 xs_unwatch(const char *path, const char *token)
973 {
974 	struct iovec iov[2];
975 
976 	iov[0].iov_base = (void *)(uintptr_t) path;
977 	iov[0].iov_len = strlen(path) + 1;
978 	iov[1].iov_base = (void *)(uintptr_t) token;
979 	iov[1].iov_len = strlen(token) + 1;
980 
981 	return (xs_talkv(XST_NIL, XS_UNWATCH, iov, 2, NULL, NULL));
982 }
983 
984 /**
985  * Convert from watch token (unique identifier) to the associated
986  * internal tracking structure for this watch.
987  *
988  * \param tocken  The unique identifier for the watch to find.
989  *
990  * \return  A pointer to the found watch structure or NULL.
991  */
992 static struct xs_watch *
find_watch(const char * token)993 find_watch(const char *token)
994 {
995 	struct xs_watch *i, *cmp;
996 
997 	cmp = (void *)strtoul(token, NULL, 16);
998 
999 	LIST_FOREACH(i, &xs.registered_watches, list)
1000 		if (i == cmp)
1001 			return (i);
1002 
1003 	return (NULL);
1004 }
1005 
1006 /**
1007  * Thread body of the XenStore watch event dispatch thread.
1008  */
1009 static void
xenwatch_thread(void * unused)1010 xenwatch_thread(void *unused)
1011 {
1012 	struct xs_stored_msg *msg;
1013 
1014 	for (;;) {
1015 
1016 		mtx_lock(&xs.watch_events_lock);
1017 		while (TAILQ_EMPTY(&xs.watch_events))
1018 			mtx_sleep(&xs.watch_events,
1019 			    &xs.watch_events_lock,
1020 			    PWAIT | PCATCH, "waitev", hz/10);
1021 
1022 		mtx_unlock(&xs.watch_events_lock);
1023 		sx_xlock(&xs.xenwatch_mutex);
1024 
1025 		mtx_lock(&xs.watch_events_lock);
1026 		msg = TAILQ_FIRST(&xs.watch_events);
1027 		if (msg)
1028 			TAILQ_REMOVE(&xs.watch_events, msg, list);
1029 		mtx_unlock(&xs.watch_events_lock);
1030 
1031 		if (msg != NULL) {
1032 			/*
1033 			 * XXX There are messages coming in with a NULL
1034 			 * XXX callback.  This deserves further investigation;
1035 			 * XXX the workaround here simply prevents the kernel
1036 			 * XXX from panic'ing on startup.
1037 			 */
1038 			if (msg->u.watch.handle->callback != NULL)
1039 				msg->u.watch.handle->callback(
1040 					msg->u.watch.handle,
1041 					(const char **)msg->u.watch.vec,
1042 					msg->u.watch.vec_size);
1043 			free(msg->u.watch.vec, M_XENSTORE);
1044 			free(msg, M_XENSTORE);
1045 		}
1046 
1047 		sx_xunlock(&xs.xenwatch_mutex);
1048 	}
1049 }
1050 
1051 /*----------- XenStore Configuration, Initialization, and Control ------------*/
1052 /**
1053  * Setup communication channels with the XenStore service.
1054  *
1055  * \return  On success, 0. Otherwise an errno value indicating the
1056  *          type of failure.
1057  */
1058 static int
xs_init_comms(void)1059 xs_init_comms(void)
1060 {
1061 	int error;
1062 
1063 	if (xen_store->rsp_prod != xen_store->rsp_cons) {
1064 		log(LOG_WARNING, "XENSTORE response ring is not quiescent "
1065 		    "(%08x:%08x): fixing up\n",
1066 		    xen_store->rsp_cons, xen_store->rsp_prod);
1067 		xen_store->rsp_cons = xen_store->rsp_prod;
1068 	}
1069 
1070 	if (xs.irq)
1071 		unbind_from_irqhandler(xs.irq);
1072 
1073 	error = bind_caller_port_to_irqhandler(xs.evtchn, "xenstore",
1074 	    xs_intr, NULL, INTR_TYPE_NET, &xs.irq);
1075 	if (error) {
1076 		log(LOG_WARNING, "XENSTORE request irq failed %i\n", error);
1077 		return (error);
1078 	}
1079 
1080 	return (0);
1081 }
1082 
1083 /*------------------ Private Device Attachment Functions  --------------------*/
1084 static void
xs_identify(driver_t * driver,device_t parent)1085 xs_identify(driver_t *driver, device_t parent)
1086 {
1087 
1088 	BUS_ADD_CHILD(parent, 0, "xenstore", 0);
1089 }
1090 
1091 /**
1092  * Probe for the existance of the XenStore.
1093  *
1094  * \param dev
1095  */
1096 static int
xs_probe(device_t dev)1097 xs_probe(device_t dev)
1098 {
1099 	/*
1100 	 * We are either operating within a PV kernel or being probed
1101 	 * as the child of the successfully attached xenpci device.
1102 	 * Thus we are in a Xen environment and there will be a XenStore.
1103 	 * Unconditionally return success.
1104 	 */
1105 	device_set_desc(dev, "XenStore");
1106 	return (0);
1107 }
1108 
1109 static void
xs_attach_deferred(void * arg)1110 xs_attach_deferred(void *arg)
1111 {
1112 	xs_dev_init();
1113 
1114 	bus_generic_probe(xs.xs_dev);
1115 	bus_generic_attach(xs.xs_dev);
1116 
1117 	config_intrhook_disestablish(&xs.xs_attachcb);
1118 }
1119 
1120 /**
1121  * Attach to the XenStore.
1122  *
1123  * This routine also prepares for the probe/attach of drivers that rely
1124  * on the XenStore.
1125  */
1126 static int
xs_attach(device_t dev)1127 xs_attach(device_t dev)
1128 {
1129 	int error;
1130 
1131 	/* Allow us to get device_t from softc and vice-versa. */
1132 	xs.xs_dev = dev;
1133 	device_set_softc(dev, &xs);
1134 
1135 	/*
1136 	 * This seems to be a layering violation.  The XenStore is just
1137 	 * one of many clients of the Grant Table facility.  It happens
1138 	 * to be the first and a gating consumer to all other devices,
1139 	 * so this does work.  A better place would be in the PV support
1140 	 * code for fully PV kernels and the xenpci driver for HVM kernels.
1141 	 */
1142 	error = gnttab_init();
1143 	if (error != 0) {
1144 		log(LOG_WARNING,
1145 		    "XENSTORE: Error initializing grant tables: %d\n", error);
1146 		return (ENXIO);
1147 	}
1148 
1149 	/* Initialize the interface to xenstore. */
1150 	struct proc *p;
1151 
1152 #ifdef XENHVM
1153 	xs.evtchn = hvm_get_parameter(HVM_PARAM_STORE_EVTCHN);
1154 	xs.gpfn = hvm_get_parameter(HVM_PARAM_STORE_PFN);
1155 	xen_store = pmap_mapdev(xs.gpfn * PAGE_SIZE, PAGE_SIZE);
1156 #else
1157 	xs.evtchn = xen_start_info->store_evtchn;
1158 #endif
1159 
1160 	TAILQ_INIT(&xs.reply_list);
1161 	TAILQ_INIT(&xs.watch_events);
1162 
1163 	mtx_init(&xs.ring_lock, "ring lock", NULL, MTX_DEF);
1164 	mtx_init(&xs.reply_lock, "reply lock", NULL, MTX_DEF);
1165 	sx_init(&xs.xenwatch_mutex, "xenwatch");
1166 	sx_init(&xs.request_mutex, "xenstore request");
1167 	sx_init(&xs.suspend_mutex, "xenstore suspend");
1168 	mtx_init(&xs.registered_watches_lock, "watches", NULL, MTX_DEF);
1169 	mtx_init(&xs.watch_events_lock, "watch events", NULL, MTX_DEF);
1170 	xs.irq = 0;
1171 
1172 	/* Initialize the shared memory rings to talk to xenstored */
1173 	error = xs_init_comms();
1174 	if (error)
1175 		return (error);
1176 
1177 	error = kproc_create(xenwatch_thread, NULL, &p, RFHIGHPID,
1178 	    0, "xenwatch");
1179 	if (error)
1180 		return (error);
1181 	xs.xenwatch_pid = p->p_pid;
1182 
1183 	error = kproc_create(xs_rcv_thread, NULL, NULL,
1184 	    RFHIGHPID, 0, "xenstore_rcv");
1185 
1186 	xs.xs_attachcb.ich_func = xs_attach_deferred;
1187 	xs.xs_attachcb.ich_arg = NULL;
1188 	config_intrhook_establish(&xs.xs_attachcb);
1189 
1190 	return (error);
1191 }
1192 
1193 /**
1194  * Prepare for suspension of this VM by halting XenStore access after
1195  * all transactions and individual requests have completed.
1196  */
1197 static int
xs_suspend(device_t dev)1198 xs_suspend(device_t dev)
1199 {
1200 	int error;
1201 
1202 	/* Suspend child Xen devices. */
1203 	error = bus_generic_suspend(dev);
1204 	if (error != 0)
1205 		return (error);
1206 
1207 	sx_xlock(&xs.suspend_mutex);
1208 	sx_xlock(&xs.request_mutex);
1209 
1210 	return (0);
1211 }
1212 
1213 /**
1214  * Resume XenStore operations after this VM is resumed.
1215  */
1216 static int
xs_resume(device_t dev __unused)1217 xs_resume(device_t dev __unused)
1218 {
1219 	struct xs_watch *watch;
1220 	char token[sizeof(watch) * 2 + 1];
1221 
1222 	xs_init_comms();
1223 
1224 	sx_xunlock(&xs.request_mutex);
1225 
1226 	/*
1227 	 * No need for registered_watches_lock: the suspend_mutex
1228 	 * is sufficient.
1229 	 */
1230 	LIST_FOREACH(watch, &xs.registered_watches, list) {
1231 		sprintf(token, "%lX", (long)watch);
1232 		xs_watch(watch->node, token);
1233 	}
1234 
1235 	sx_xunlock(&xs.suspend_mutex);
1236 
1237 	/* Resume child Xen devices. */
1238 	bus_generic_resume(dev);
1239 
1240 	return (0);
1241 }
1242 
1243 /*-------------------- Private Device Attachment Data  -----------------------*/
1244 static device_method_t xenstore_methods[] = {
1245 	/* Device interface */
1246 	DEVMETHOD(device_identify,	xs_identify),
1247 	DEVMETHOD(device_probe,         xs_probe),
1248 	DEVMETHOD(device_attach,        xs_attach),
1249 	DEVMETHOD(device_detach,        bus_generic_detach),
1250 	DEVMETHOD(device_shutdown,      bus_generic_shutdown),
1251 	DEVMETHOD(device_suspend,       xs_suspend),
1252 	DEVMETHOD(device_resume,        xs_resume),
1253 
1254 	/* Bus interface */
1255 	DEVMETHOD(bus_add_child,        bus_generic_add_child),
1256 	DEVMETHOD(bus_alloc_resource,   bus_generic_alloc_resource),
1257 	DEVMETHOD(bus_release_resource, bus_generic_release_resource),
1258 	DEVMETHOD(bus_activate_resource, bus_generic_activate_resource),
1259 	DEVMETHOD(bus_deactivate_resource, bus_generic_deactivate_resource),
1260 
1261 	DEVMETHOD_END
1262 };
1263 
1264 DEFINE_CLASS_0(xenstore, xenstore_driver, xenstore_methods, 0);
1265 static devclass_t xenstore_devclass;
1266 
1267 #ifdef XENHVM
1268 DRIVER_MODULE(xenstore, xenpci, xenstore_driver, xenstore_devclass, 0, 0);
1269 #else
1270 DRIVER_MODULE(xenstore, nexus, xenstore_driver, xenstore_devclass, 0, 0);
1271 #endif
1272 
1273 /*------------------------------- Sysctl Data --------------------------------*/
1274 /* XXX Shouldn't the node be somewhere else? */
1275 SYSCTL_NODE(_dev, OID_AUTO, xen, CTLFLAG_RD, NULL, "Xen");
1276 SYSCTL_INT(_dev_xen, OID_AUTO, xsd_port, CTLFLAG_RD, &xs.evtchn, 0, "");
1277 SYSCTL_ULONG(_dev_xen, OID_AUTO, xsd_kva, CTLFLAG_RD, (u_long *) &xen_store, 0, "");
1278 
1279 /*-------------------------------- Public API --------------------------------*/
1280 /*------- API comments for these methods can be found in xenstorevar.h -------*/
1281 int
xs_directory(struct xs_transaction t,const char * dir,const char * node,u_int * num,const char *** result)1282 xs_directory(struct xs_transaction t, const char *dir, const char *node,
1283     u_int *num, const char ***result)
1284 {
1285 	struct sbuf *path;
1286 	char *strings;
1287 	u_int len = 0;
1288 	int error;
1289 
1290 	path = xs_join(dir, node);
1291 	error = xs_single(t, XS_DIRECTORY, sbuf_data(path), &len,
1292 	    (void **)&strings);
1293 	sbuf_delete(path);
1294 	if (error)
1295 		return (error);
1296 
1297 	*result = split(strings, len, num);
1298 
1299 	return (0);
1300 }
1301 
1302 int
xs_exists(struct xs_transaction t,const char * dir,const char * node)1303 xs_exists(struct xs_transaction t, const char *dir, const char *node)
1304 {
1305 	const char **d;
1306 	int error, dir_n;
1307 
1308 	error = xs_directory(t, dir, node, &dir_n, &d);
1309 	if (error)
1310 		return (0);
1311 	free(d, M_XENSTORE);
1312 	return (1);
1313 }
1314 
1315 int
xs_read(struct xs_transaction t,const char * dir,const char * node,u_int * len,void ** result)1316 xs_read(struct xs_transaction t, const char *dir, const char *node,
1317     u_int *len, void **result)
1318 {
1319 	struct sbuf *path;
1320 	void *ret;
1321 	int error;
1322 
1323 	path = xs_join(dir, node);
1324 	error = xs_single(t, XS_READ, sbuf_data(path), len, &ret);
1325 	sbuf_delete(path);
1326 	if (error)
1327 		return (error);
1328 	*result = ret;
1329 	return (0);
1330 }
1331 
1332 int
xs_write(struct xs_transaction t,const char * dir,const char * node,const char * string)1333 xs_write(struct xs_transaction t, const char *dir, const char *node,
1334     const char *string)
1335 {
1336 	struct sbuf *path;
1337 	struct iovec iovec[2];
1338 	int error;
1339 
1340 	path = xs_join(dir, node);
1341 
1342 	iovec[0].iov_base = (void *)(uintptr_t) sbuf_data(path);
1343 	iovec[0].iov_len = sbuf_len(path) + 1;
1344 	iovec[1].iov_base = (void *)(uintptr_t) string;
1345 	iovec[1].iov_len = strlen(string);
1346 
1347 	error = xs_talkv(t, XS_WRITE, iovec, 2, NULL, NULL);
1348 	sbuf_delete(path);
1349 
1350 	return (error);
1351 }
1352 
1353 int
xs_mkdir(struct xs_transaction t,const char * dir,const char * node)1354 xs_mkdir(struct xs_transaction t, const char *dir, const char *node)
1355 {
1356 	struct sbuf *path;
1357 	int ret;
1358 
1359 	path = xs_join(dir, node);
1360 	ret = xs_single(t, XS_MKDIR, sbuf_data(path), NULL, NULL);
1361 	sbuf_delete(path);
1362 
1363 	return (ret);
1364 }
1365 
1366 int
xs_rm(struct xs_transaction t,const char * dir,const char * node)1367 xs_rm(struct xs_transaction t, const char *dir, const char *node)
1368 {
1369 	struct sbuf *path;
1370 	int ret;
1371 
1372 	path = xs_join(dir, node);
1373 	ret = xs_single(t, XS_RM, sbuf_data(path), NULL, NULL);
1374 	sbuf_delete(path);
1375 
1376 	return (ret);
1377 }
1378 
1379 int
xs_rm_tree(struct xs_transaction xbt,const char * base,const char * node)1380 xs_rm_tree(struct xs_transaction xbt, const char *base, const char *node)
1381 {
1382 	struct xs_transaction local_xbt;
1383 	struct sbuf *root_path_sbuf;
1384 	struct sbuf *cur_path_sbuf;
1385 	char *root_path;
1386 	char *cur_path;
1387 	const char **dir;
1388 	int error;
1389 	int empty;
1390 
1391 retry:
1392 	root_path_sbuf = xs_join(base, node);
1393 	cur_path_sbuf  = xs_join(base, node);
1394 	root_path      = sbuf_data(root_path_sbuf);
1395 	cur_path       = sbuf_data(cur_path_sbuf);
1396 	dir            = NULL;
1397 	local_xbt.id   = 0;
1398 
1399 	if (xbt.id == 0) {
1400 		error = xs_transaction_start(&local_xbt);
1401 		if (error != 0)
1402 			goto out;
1403 		xbt = local_xbt;
1404 	}
1405 
1406 	empty = 0;
1407 	while (1) {
1408 		u_int count;
1409 		u_int i;
1410 
1411 		error = xs_directory(xbt, cur_path, "", &count, &dir);
1412 		if (error)
1413 			goto out;
1414 
1415 		for (i = 0; i < count; i++) {
1416 			error = xs_rm(xbt, cur_path, dir[i]);
1417 			if (error == ENOTEMPTY) {
1418 				struct sbuf *push_dir;
1419 
1420 				/*
1421 				 * Descend to clear out this sub directory.
1422 				 * We'll return to cur_dir once push_dir
1423 				 * is empty.
1424 				 */
1425 				push_dir = xs_join(cur_path, dir[i]);
1426 				sbuf_delete(cur_path_sbuf);
1427 				cur_path_sbuf = push_dir;
1428 				cur_path = sbuf_data(cur_path_sbuf);
1429 				break;
1430 			} else if (error != 0) {
1431 				goto out;
1432 			}
1433 		}
1434 
1435 		free(dir, M_XENSTORE);
1436 		dir = NULL;
1437 
1438 		if (i == count) {
1439 			char *last_slash;
1440 
1441 			/* Directory is empty.  It is now safe to remove. */
1442 			error = xs_rm(xbt, cur_path, "");
1443 			if (error != 0)
1444 				goto out;
1445 
1446 			if (!strcmp(cur_path, root_path))
1447 				break;
1448 
1449 			/* Return to processing the parent directory. */
1450 			last_slash = strrchr(cur_path, '/');
1451 			KASSERT(last_slash != NULL,
1452 				("xs_rm_tree: mangled path %s", cur_path));
1453 			*last_slash = '\0';
1454 		}
1455 	}
1456 
1457 out:
1458 	sbuf_delete(cur_path_sbuf);
1459 	sbuf_delete(root_path_sbuf);
1460 	if (dir != NULL)
1461 		free(dir, M_XENSTORE);
1462 
1463 	if (local_xbt.id != 0) {
1464 		int terror;
1465 
1466 		terror = xs_transaction_end(local_xbt, /*abort*/error != 0);
1467 		xbt.id = 0;
1468 		if (terror == EAGAIN && error == 0)
1469 			goto retry;
1470 	}
1471 	return (error);
1472 }
1473 
1474 int
xs_transaction_start(struct xs_transaction * t)1475 xs_transaction_start(struct xs_transaction *t)
1476 {
1477 	char *id_str;
1478 	int error;
1479 
1480 	error = xs_single(XST_NIL, XS_TRANSACTION_START, "", NULL,
1481 	    (void **)&id_str);
1482 	if (error == 0) {
1483 		t->id = strtoul(id_str, NULL, 0);
1484 		free(id_str, M_XENSTORE);
1485 	}
1486 	return (error);
1487 }
1488 
1489 int
xs_transaction_end(struct xs_transaction t,int abort)1490 xs_transaction_end(struct xs_transaction t, int abort)
1491 {
1492 	char abortstr[2];
1493 
1494 	if (abort)
1495 		strcpy(abortstr, "F");
1496 	else
1497 		strcpy(abortstr, "T");
1498 
1499 	return (xs_single(t, XS_TRANSACTION_END, abortstr, NULL, NULL));
1500 }
1501 
1502 int
xs_scanf(struct xs_transaction t,const char * dir,const char * node,int * scancountp,const char * fmt,...)1503 xs_scanf(struct xs_transaction t, const char *dir, const char *node,
1504      int *scancountp, const char *fmt, ...)
1505 {
1506 	va_list ap;
1507 	int error, ns;
1508 	char *val;
1509 
1510 	error = xs_read(t, dir, node, NULL, (void **) &val);
1511 	if (error)
1512 		return (error);
1513 
1514 	va_start(ap, fmt);
1515 	ns = vsscanf(val, fmt, ap);
1516 	va_end(ap);
1517 	free(val, M_XENSTORE);
1518 	/* Distinctive errno. */
1519 	if (ns == 0)
1520 		return (ERANGE);
1521 	if (scancountp)
1522 		*scancountp = ns;
1523 	return (0);
1524 }
1525 
1526 int
xs_vprintf(struct xs_transaction t,const char * dir,const char * node,const char * fmt,va_list ap)1527 xs_vprintf(struct xs_transaction t,
1528     const char *dir, const char *node, const char *fmt, va_list ap)
1529 {
1530 	struct sbuf *sb;
1531 	int error;
1532 
1533 	sb = sbuf_new_auto();
1534 	sbuf_vprintf(sb, fmt, ap);
1535 	sbuf_finish(sb);
1536 	error = xs_write(t, dir, node, sbuf_data(sb));
1537 	sbuf_delete(sb);
1538 
1539 	return (error);
1540 }
1541 
1542 int
xs_printf(struct xs_transaction t,const char * dir,const char * node,const char * fmt,...)1543 xs_printf(struct xs_transaction t, const char *dir, const char *node,
1544      const char *fmt, ...)
1545 {
1546 	va_list ap;
1547 	int error;
1548 
1549 	va_start(ap, fmt);
1550 	error = xs_vprintf(t, dir, node, fmt, ap);
1551 	va_end(ap);
1552 
1553 	return (error);
1554 }
1555 
1556 int
xs_gather(struct xs_transaction t,const char * dir,...)1557 xs_gather(struct xs_transaction t, const char *dir, ...)
1558 {
1559 	va_list ap;
1560 	const char *name;
1561 	int error;
1562 
1563 	va_start(ap, dir);
1564 	error = 0;
1565 	while (error == 0 && (name = va_arg(ap, char *)) != NULL) {
1566 		const char *fmt = va_arg(ap, char *);
1567 		void *result = va_arg(ap, void *);
1568 		char *p;
1569 
1570 		error = xs_read(t, dir, name, NULL, (void **) &p);
1571 		if (error)
1572 			break;
1573 
1574 		if (fmt) {
1575 			if (sscanf(p, fmt, result) == 0)
1576 				error = EINVAL;
1577 			free(p, M_XENSTORE);
1578 		} else
1579 			*(char **)result = p;
1580 	}
1581 	va_end(ap);
1582 
1583 	return (error);
1584 }
1585 
1586 int
xs_register_watch(struct xs_watch * watch)1587 xs_register_watch(struct xs_watch *watch)
1588 {
1589 	/* Pointer in ascii is the token. */
1590 	char token[sizeof(watch) * 2 + 1];
1591 	int error;
1592 
1593 	sprintf(token, "%lX", (long)watch);
1594 
1595 	sx_slock(&xs.suspend_mutex);
1596 
1597 	mtx_lock(&xs.registered_watches_lock);
1598 	KASSERT(find_watch(token) == NULL, ("watch already registered"));
1599 	LIST_INSERT_HEAD(&xs.registered_watches, watch, list);
1600 	mtx_unlock(&xs.registered_watches_lock);
1601 
1602 	error = xs_watch(watch->node, token);
1603 
1604 	/* Ignore errors due to multiple registration. */
1605 	if (error == EEXIST)
1606 		error = 0;
1607 
1608 	if (error != 0) {
1609 		mtx_lock(&xs.registered_watches_lock);
1610 		LIST_REMOVE(watch, list);
1611 		mtx_unlock(&xs.registered_watches_lock);
1612 	}
1613 
1614 	sx_sunlock(&xs.suspend_mutex);
1615 
1616 	return (error);
1617 }
1618 
1619 void
xs_unregister_watch(struct xs_watch * watch)1620 xs_unregister_watch(struct xs_watch *watch)
1621 {
1622 	struct xs_stored_msg *msg, *tmp;
1623 	char token[sizeof(watch) * 2 + 1];
1624 	int error;
1625 
1626 	sprintf(token, "%lX", (long)watch);
1627 
1628 	sx_slock(&xs.suspend_mutex);
1629 
1630 	mtx_lock(&xs.registered_watches_lock);
1631 	if (find_watch(token) == NULL) {
1632 		mtx_unlock(&xs.registered_watches_lock);
1633 		sx_sunlock(&xs.suspend_mutex);
1634 		return;
1635 	}
1636 	LIST_REMOVE(watch, list);
1637 	mtx_unlock(&xs.registered_watches_lock);
1638 
1639 	error = xs_unwatch(watch->node, token);
1640 	if (error)
1641 		log(LOG_WARNING, "XENSTORE Failed to release watch %s: %i\n",
1642 		    watch->node, error);
1643 
1644 	sx_sunlock(&xs.suspend_mutex);
1645 
1646 	/* Cancel pending watch events. */
1647 	mtx_lock(&xs.watch_events_lock);
1648 	TAILQ_FOREACH_SAFE(msg, &xs.watch_events, list, tmp) {
1649 		if (msg->u.watch.handle != watch)
1650 			continue;
1651 		TAILQ_REMOVE(&xs.watch_events, msg, list);
1652 		free(msg->u.watch.vec, M_XENSTORE);
1653 		free(msg, M_XENSTORE);
1654 	}
1655 	mtx_unlock(&xs.watch_events_lock);
1656 
1657 	/* Flush any currently-executing callback, unless we are it. :-) */
1658 	if (curproc->p_pid != xs.xenwatch_pid) {
1659 		sx_xlock(&xs.xenwatch_mutex);
1660 		sx_xunlock(&xs.xenwatch_mutex);
1661 	}
1662 }
1663