1 /* $NetBSD: xenbus_dev.c,v 1.19 2024/02/09 22:08:34 andvar Exp $ */
2 /*
3  * xenbus_dev.c
4  *
5  * Driver giving user-space access to the kernel's xenbus connection
6  * to xenstore.
7  *
8  * Copyright (c) 2005, Christian Limpach
9  * Copyright (c) 2005, Rusty Russell, IBM Corporation
10  *
11  * This file may be distributed separately from the Linux kernel, or
12  * incorporated into other software packages, subject to the following license:
13  *
14  * Permission is hereby granted, free of charge, to any person obtaining a copy
15  * of this source file (the "Software"), to deal in the Software without
16  * restriction, including without limitation the rights to use, copy, modify,
17  * merge, publish, distribute, sublicense, and/or sell copies of the Software,
18  * and to permit persons to whom the Software is furnished to do so, subject to
19  * the following conditions:
20  *
21  * The above copyright notice and this permission notice shall be included in
22  * all copies or substantial portions of the Software.
23  *
24  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
25  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
26  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
27  * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
28  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
29  * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
30  * IN THE SOFTWARE.
31  */
32 
33 #include <sys/cdefs.h>
34 __KERNEL_RCSID(0, "$NetBSD: xenbus_dev.c,v 1.19 2024/02/09 22:08:34 andvar Exp $");
35 
36 #include "opt_xen.h"
37 
38 #include <sys/types.h>
39 #include <sys/null.h>
40 #include <sys/errno.h>
41 #include <sys/param.h>
42 #include <sys/proc.h>
43 #include <sys/systm.h>
44 #include <sys/dirent.h>
45 #include <sys/stat.h>
46 #include <sys/tree.h>
47 #include <sys/vnode.h>
48 #include <miscfs/specfs/specdev.h>
49 #include <miscfs/kernfs/kernfs.h>
50 
51 #include <xen/kernfs_machdep.h>
52 
53 #include <xen/intr.h>
54 #include <xen/hypervisor.h>
55 #include <xen/xenbus.h>
56 #include "xenbus_comms.h"
57 
58 static int xenbus_dev_read(void *);
59 static int xenbus_dev_write(void *);
60 static int xenbus_dev_open(void *);
61 static int xenbus_dev_close(void *);
62 static int xsd_port_read(void *);
63 
64 #define DIR_MODE     (S_IRUSR|S_IXUSR|S_IRGRP|S_IXGRP|S_IROTH|S_IXOTH)
65 #define PRIVCMD_MODE    (S_IRUSR | S_IWUSR)
66 static const struct kernfs_fileop xenbus_fileops[] = {
67   { .kf_fileop = KERNFS_FILEOP_OPEN, .kf_vop = xenbus_dev_open },
68   { .kf_fileop = KERNFS_FILEOP_CLOSE, .kf_vop = xenbus_dev_close },
69   { .kf_fileop = KERNFS_FILEOP_READ, .kf_vop = xenbus_dev_read },
70   { .kf_fileop = KERNFS_FILEOP_WRITE, .kf_vop = xenbus_dev_write },
71 };
72 
73 #define XSD_MODE    (S_IRUSR)
74 static const struct kernfs_fileop xsd_port_fileops[] = {
75     { .kf_fileop = KERNFS_FILEOP_READ, .kf_vop = xsd_port_read },
76 };
77 
78 static kmutex_t xenbus_dev_open_mtx;
79 
80 void
xenbus_kernfs_init(void)81 xenbus_kernfs_init(void)
82 {
83           kernfs_entry_t *dkt;
84           kfstype kfst;
85 
86           kfst = KERNFS_ALLOCTYPE(xenbus_fileops);
87           KERNFS_ALLOCENTRY(dkt, KM_SLEEP);
88           KERNFS_INITENTRY(dkt, DT_REG, "xenbus", NULL, kfst, VREG,
89               PRIVCMD_MODE);
90           kernfs_addentry(kernxen_pkt, dkt);
91 
92           if (xendomain_is_dom0()) {
93                     kfst = KERNFS_ALLOCTYPE(xsd_port_fileops);
94                     KERNFS_ALLOCENTRY(dkt, KM_SLEEP);
95                     KERNFS_INITENTRY(dkt, DT_REG, "xsd_port", NULL,
96                         kfst, VREG, XSD_MODE);
97                     kernfs_addentry(kernxen_pkt, dkt);
98           }
99           mutex_init(&xenbus_dev_open_mtx, MUTEX_DEFAULT, IPL_NONE);
100 }
101 
102 /*
103  * several process may open /kern/xen/xenbus in parallel.
104  * In a transaction one or more write is followed by one or more read.
105  * Unfortunately we don't get a file descriptor identifier down there,
106  * which we could use to link a read() to a transaction started in a write().
107  * To work around this we keep a list of lwp that opended the xenbus file.
108  * This assumes that a single lwp won't open /kern/xen/xenbus more
109  * than once, and that a transaction started by one lwp won't be ended
110  * by another.
111  * because of this, we also assume that we always got the data before
112  * the read() syscall.
113  */
114 
115 struct xenbus_dev_transaction {
116           SLIST_ENTRY(xenbus_dev_transaction) trans_next;
117           struct xenbus_transaction *handle;
118 };
119 
120 struct xenbus_dev_lwp {
121           SLIST_ENTRY(xenbus_dev_lwp) lwp_next;
122           SLIST_HEAD(, xenbus_dev_transaction) transactions;
123           lwp_t *lwp;
124           /* Response queue. */
125 #define BUFFER_SIZE (PAGE_SIZE)
126 #define MASK_READ_IDX(idx) ((idx)&(BUFFER_SIZE-1))
127           char read_buffer[BUFFER_SIZE];
128           unsigned int read_cons, read_prod;
129           /* Partial request. */
130           unsigned int len;
131           union {
132                     struct xsd_sockmsg msg;
133                     char buffer[BUFFER_SIZE];
134           } u;
135           kmutex_t mtx;
136 };
137 
138 struct xenbus_dev_data {
139           /* lwps which opended this device */
140           SLIST_HEAD(, xenbus_dev_lwp) lwps;
141           kmutex_t mtx;
142 };
143 
144 
145 static int
xenbus_dev_read(void * v)146 xenbus_dev_read(void *v)
147 {
148           struct vop_read_args /* {
149                     struct vnode *a_vp;
150                     struct uio *a_uio;
151                     int  a_ioflag;
152                     struct ucred *a_cred;
153           } */ *ap = v;
154           struct kernfs_node *kfs = VTOKERN(ap->a_vp);
155           struct uio *uio = ap->a_uio;
156           struct xenbus_dev_data *u;
157           struct xenbus_dev_lwp *xlwp;
158           int err;
159           off_t offset;
160 
161           mutex_enter(&xenbus_dev_open_mtx);
162           u = kfs->kfs_v;
163           if (u == NULL) {
164                     mutex_exit(&xenbus_dev_open_mtx);
165                     return EBADF;
166           }
167           mutex_enter(&u->mtx);
168           mutex_exit(&xenbus_dev_open_mtx);
169           SLIST_FOREACH(xlwp, &u->lwps, lwp_next) {
170                     if (xlwp->lwp == curlwp) {
171                               break;
172                     }
173           }
174           if (xlwp == NULL) {
175                     mutex_exit(&u->mtx);
176                     return EBADF;
177           }
178           mutex_enter(&xlwp->mtx);
179           mutex_exit(&u->mtx);
180 
181           if (xlwp->read_prod == xlwp->read_cons) {
182                     err = EWOULDBLOCK;
183                     goto end;
184           }
185 
186           offset = uio->uio_offset;
187           if (xlwp->read_cons > xlwp->read_prod) {
188                     err = uiomove(
189                         &xlwp->read_buffer[MASK_READ_IDX(xlwp->read_cons)],
190                         0U - xlwp->read_cons, uio);
191                     if (err)
192                               goto end;
193                     xlwp->read_cons += (uio->uio_offset - offset);
194                     offset = uio->uio_offset;
195           }
196           err = uiomove(&xlwp->read_buffer[MASK_READ_IDX(xlwp->read_cons)],
197               xlwp->read_prod - xlwp->read_cons, uio);
198           if (err == 0)
199                     xlwp->read_cons += (uio->uio_offset - offset);
200 
201 end:
202           mutex_exit(&xlwp->mtx);
203           return err;
204 }
205 
206 static void
queue_reply(struct xenbus_dev_lwp * xlwp,char * data,unsigned int len)207 queue_reply(struct xenbus_dev_lwp *xlwp,
208                               char *data, unsigned int len)
209 {
210           int i;
211           KASSERT(mutex_owned(&xlwp->mtx));
212           for (i = 0; i < len; i++, xlwp->read_prod++)
213                     xlwp->read_buffer[MASK_READ_IDX(xlwp->read_prod)] = data[i];
214 
215           KASSERT((xlwp->read_prod - xlwp->read_cons) <= sizeof(xlwp->read_buffer));
216 }
217 
218 static int
xenbus_dev_write(void * v)219 xenbus_dev_write(void *v)
220 {
221           struct vop_write_args /* {
222                     struct vnode *a_vp;
223                     struct uio *a_uio;
224                     int  a_ioflag;
225                     struct ucred *a_cred;
226           } */ *ap = v;
227           struct kernfs_node *kfs = VTOKERN(ap->a_vp);
228           struct uio *uio = ap->a_uio;
229 
230           struct xenbus_dev_data *u;
231           struct xenbus_dev_lwp *xlwp;
232           struct xenbus_dev_transaction *trans;
233           void *reply;
234           int err;
235           size_t size;
236 
237           mutex_enter(&xenbus_dev_open_mtx);
238           u = kfs->kfs_v;
239           if (u == NULL) {
240                     mutex_exit(&xenbus_dev_open_mtx);
241                     return EBADF;
242           }
243           mutex_enter(&u->mtx);
244           mutex_exit(&xenbus_dev_open_mtx);
245           SLIST_FOREACH(xlwp, &u->lwps, lwp_next) {
246                     if (xlwp->lwp == curlwp) {
247                               break;
248                     }
249           }
250           if (xlwp == NULL) {
251                     mutex_exit(&u->mtx);
252                     return EBADF;
253           }
254           mutex_enter(&xlwp->mtx);
255           mutex_exit(&u->mtx);
256 
257           if (uio->uio_offset < 0) {
258                     err = EINVAL;
259                     goto end;
260           }
261           size = uio->uio_resid;
262 
263           if ((size + xlwp->len) > sizeof(xlwp->u.buffer)) {
264                     err = EINVAL;
265                     goto end;
266           }
267 
268           err = uiomove(xlwp->u.buffer + xlwp->len,
269                           sizeof(xlwp->u.buffer) -  xlwp->len, uio);
270           if (err)
271                     goto end;
272 
273           xlwp->len += size;
274           if (xlwp->len < (sizeof(xlwp->u.msg) + xlwp->u.msg.len))
275                     goto end;
276 
277           switch (xlwp->u.msg.type) {
278           case XS_TRANSACTION_START:
279           case XS_TRANSACTION_END:
280           case XS_DIRECTORY:
281           case XS_READ:
282           case XS_GET_PERMS:
283           case XS_RELEASE:
284           case XS_GET_DOMAIN_PATH:
285           case XS_WRITE:
286           case XS_MKDIR:
287           case XS_RM:
288           case XS_SET_PERMS:
289                     err = xenbus_dev_request_and_reply(&xlwp->u.msg, &reply);
290                     if (err == 0) {
291                               if (xlwp->u.msg.type == XS_TRANSACTION_START) {
292                                         trans = kmem_alloc(sizeof(*trans), KM_SLEEP);
293                                         trans->handle = (struct xenbus_transaction *)
294                                                   strtoul(reply, NULL, 0);
295                                         SLIST_INSERT_HEAD(&xlwp->transactions,
296                                             trans, trans_next);
297                               } else if (xlwp->u.msg.type == XS_TRANSACTION_END) {
298                                         SLIST_FOREACH(trans, &xlwp->transactions,
299                                                                 trans_next) {
300                                                   if ((unsigned long)trans->handle ==
301                                                       (unsigned long)xlwp->u.msg.tx_id)
302                                                             break;
303                                         }
304                                         if (trans == NULL) {
305                                                   err = EINVAL;
306                                                   goto end;
307                                         }
308                                         SLIST_REMOVE(&xlwp->transactions, trans,
309                                             xenbus_dev_transaction, trans_next);
310                                         kmem_free(trans, sizeof(*trans));
311                               }
312                               queue_reply(xlwp, (char *)&xlwp->u.msg,
313                                                             sizeof(xlwp->u.msg));
314                               queue_reply(xlwp, (char *)reply, xlwp->u.msg.len);
315 
316                               xenbus_dev_reply_free(&xlwp->u.msg, reply);
317                     }
318                     break;
319 
320           default:
321                     err = EINVAL;
322                     break;
323           }
324 
325           if (err == 0) {
326                     xlwp->len = 0;
327           }
328 end:
329           mutex_exit(&xlwp->mtx);
330           return err;
331 }
332 
333 static int
xenbus_dev_open(void * v)334 xenbus_dev_open(void *v)
335 {
336           struct vop_open_args /* {
337                     struct vnode *a_vp;
338                     int a_mode;
339                     struct ucred *a_cred;
340           } */ *ap = v;
341           struct kernfs_node *kfs = VTOKERN(ap->a_vp);
342           struct xenbus_dev_data *u;
343           struct xenbus_dev_lwp *xlwp;
344 
345           if (xen_start_info.store_evtchn == 0)
346                     return ENOENT;
347 
348           mutex_enter(&xenbus_dev_open_mtx);
349           u = kfs->kfs_v;
350           if (u == NULL) {
351                     mutex_exit(&xenbus_dev_open_mtx);
352 
353                     u = kmem_zalloc(sizeof(*u), KM_SLEEP);
354                     SLIST_INIT(&u->lwps);
355                     mutex_init(&u->mtx, MUTEX_DEFAULT, IPL_NONE);
356 
357                     mutex_enter(&xenbus_dev_open_mtx);
358                     /*
359                      * Must re-check if filled while waiting in alloc
360                      * by some other lwp.
361                      */
362                     if (kfs->kfs_v) {
363                               kmem_free(u, sizeof(*u));
364                               u = kfs->kfs_v;
365                     } else {
366                               kfs->kfs_v = u;
367                     }
368           };
369           mutex_exit(&xenbus_dev_open_mtx);
370 
371           mutex_enter(&u->mtx);
372           SLIST_FOREACH(xlwp, &u->lwps, lwp_next) {
373                     if (xlwp->lwp == curlwp) {
374                               break;
375                     }
376           }
377           if (xlwp == NULL) {
378                     mutex_exit(&u->mtx);
379 
380                     xlwp = kmem_zalloc(sizeof(*xlwp), KM_SLEEP);
381                     xlwp->lwp = curlwp;
382                     SLIST_INIT(&xlwp->transactions);
383                     mutex_init(&xlwp->mtx, MUTEX_DEFAULT, IPL_NONE);
384 
385                     mutex_enter(&u->mtx);
386                     /*
387                      * While alloc can block, this can't be re-entered with
388                      * curlwp, so no need to re-check. Also the node can't
389                      * be closed while we are blocked here.
390                      */
391                     SLIST_INSERT_HEAD(&u->lwps, xlwp, lwp_next);
392           }
393           mutex_exit(&u->mtx);
394 
395           return 0;
396 }
397 
398 static int
xenbus_dev_close(void * v)399 xenbus_dev_close(void *v)
400 {
401           struct vop_close_args /* {
402                     struct vnode *a_vp;
403                     int a_fflag;
404                     struct ucred *a_cred;
405           } */ *ap = v;
406           struct kernfs_node *kfs = VTOKERN(ap->a_vp);
407 
408           struct xenbus_dev_data *u;
409           struct xenbus_dev_lwp *xlwp;
410           struct xenbus_dev_transaction *trans;
411 
412           mutex_enter(&xenbus_dev_open_mtx);
413           u = kfs->kfs_v;
414           KASSERT(u != NULL);
415           mutex_enter(&u->mtx);
416           SLIST_FOREACH(xlwp, &u->lwps, lwp_next) {
417                     if (xlwp->lwp == curlwp) {
418                               break;
419                     }
420           }
421           if (xlwp == NULL) {
422                     mutex_exit(&u->mtx);
423                     mutex_exit(&xenbus_dev_open_mtx);
424                     return EBADF;
425           }
426           mutex_enter(&xlwp->mtx);
427           while (!SLIST_EMPTY(&xlwp->transactions)) {
428                     trans = SLIST_FIRST(&xlwp->transactions);
429                     xenbus_transaction_end(trans->handle, 1);
430                     SLIST_REMOVE_HEAD(&xlwp->transactions, trans_next);
431                     kmem_free(trans, sizeof(*trans));
432           }
433           mutex_exit(&xlwp->mtx);
434           SLIST_REMOVE(&u->lwps, xlwp, xenbus_dev_lwp, lwp_next);
435           mutex_destroy(&xlwp->mtx);
436 
437           if (!SLIST_EMPTY(&u->lwps)) {
438                     mutex_exit(&u->mtx);
439                     mutex_exit(&xenbus_dev_open_mtx);
440                     return 0;
441           }
442           mutex_exit(&u->mtx);
443           mutex_destroy(&u->mtx);
444           kfs->kfs_v = NULL;
445           mutex_exit(&xenbus_dev_open_mtx);
446           kmem_free(xlwp, sizeof(*xlwp));
447           kmem_free(u, sizeof(*u));
448           return 0;
449 }
450 
451 #define LD_STRLEN 21 /* a 64bit integer needs 20 digits in base10 */
452 
453 static int
xsd_port_read(void * v)454 xsd_port_read(void *v)
455 {
456           struct vop_read_args /* {
457                     struct vnode *a_vp;
458                     struct uio *a_uio;
459                     int  a_ioflag;
460                     struct ucred *a_cred;
461           } */ *ap = v;
462           struct uio *uio = ap->a_uio;
463           int off, error;
464           size_t len;
465           char strbuf[LD_STRLEN], *bf;
466 
467           off = (int)uio->uio_offset;
468           if (off < 0)
469                     return EINVAL;
470 
471           len  = snprintf(strbuf, sizeof(strbuf), "%ld\n",
472               (long)xen_start_info.store_evtchn);
473           if (off >= len) {
474                     bf = strbuf;
475                     len = 0;
476           } else {
477                     bf = &strbuf[off];
478                     len -= off;
479           }
480           error = uiomove(bf, len, uio);
481           return error;
482 }
483 
484 /*
485  * Local variables:
486  *  c-file-style: "linux"
487  *  indent-tabs-mode: t
488  *  c-indent-level: 8
489  *  c-basic-offset: 8
490  *  tab-width: 8
491  * End:
492  */
493