1 /**	$MirOS: src/sys/dev/vnd.c,v 1.25 2013/10/31 20:06:56 tg Exp $ */
2 /*	$OpenBSD: vnd.c,v 1.88 2008/07/23 16:24:43 beck Exp $	*/
3 /*	$NetBSD: vnd.c,v 1.26 1996/03/30 23:06:11 christos Exp $	*/
4 
5 /*
6  * Copyright © 2008, 2013
7  *	Thorsten “mirabilos” Glaser <tg@mirbsd.org>
8  * Copyright (c) 1988 University of Utah.
9  * Copyright (c) 1990, 1993
10  *	The Regents of the University of California.  All rights reserved.
11  *
12  * This code is derived from software contributed to Berkeley by
13  * the Systems Programming Group of the University of Utah Computer
14  * Science Department.
15  *
16  * Redistribution and use in source and binary forms, with or without
17  * modification, are permitted provided that the following conditions
18  * are met:
19  * 1. Redistributions of source code must retain the above copyright
20  *    notice, this list of conditions and the following disclaimer.
21  * 2. Redistributions in binary form must reproduce the above copyright
22  *    notice, this list of conditions and the following disclaimer in the
23  *    documentation and/or other materials provided with the distribution.
24  * 3. Neither the name of the University nor the names of its contributors
25  *    may be used to endorse or promote products derived from this software
26  *    without specific prior written permission.
27  *
28  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
29  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
30  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
31  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
32  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
33  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
34  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
35  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
36  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
37  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
38  * SUCH DAMAGE.
39  *
40  * from: Utah $Hdr: vn.c 1.13 94/04/02$
41  *
42  *	@(#)vn.c	8.6 (Berkeley) 4/1/94
43  */
44 
45 /*
46  * Vnode disk driver.
47  *
48  * Block/character interface to a vnode.  Allows one to treat a file
49  * as a disk (e.g. build a filesystem in it, mount it, etc.).
50  *
51  * NOTE 1: This uses either the VOP_BMAP/VOP_STRATEGY interface to the
52  * vnode or simple VOP_READ/VOP_WRITE.  The former is suitable for swapping
53  * as it doesn't distort the local buffer cache.  The latter is good for
54  * building disk images as it keeps the cache consistent after the block
55  * device is closed.
56  *
57  * NOTE 2: There is a security issue involved with this driver.
58  * Once mounted all access to the contents of the "mapped" file via
59  * the special file is controlled by the permissions on the special
60  * file, the protection of the mapped file is ignored (effectively,
61  * by using root credentials in all transactions).
62  *
63  * NOTE 3: Doesn't interact with leases, should it?
64  *
65  * NOTE 4: Trying to mount ffs read-write on a read-only vnd device
66  * makes ffs "very unhappy". Don't try this at work, kids!
67  */
68 
69 #include <sys/param.h>
70 #include <sys/systm.h>
71 #include <sys/namei.h>
72 #include <sys/proc.h>
73 #include <sys/errno.h>
74 #include <sys/buf.h>
75 #include <sys/malloc.h>
76 #include <sys/pool.h>
77 #include <sys/ioctl.h>
78 #include <sys/disklabel.h>
79 #include <sys/device.h>
80 #include <sys/disk.h>
81 #include <sys/stat.h>
82 #include <sys/mount.h>
83 #include <sys/vnode.h>
84 #include <sys/file.h>
85 #include <sys/uio.h>
86 #include <sys/conf.h>
87 
88 #include <crypto/blf.h>
89 #ifdef CRYPTO
90 #include <crypto/rijndael.h>
91 #endif
92 
93 #include <miscfs/specfs/specdev.h>
94 
95 #include <dev/vndioctl.h>
96 
97 #ifdef VNDDEBUG
98 int dovndcluster = 1;
99 int vnddebug = 0x00;
100 #define	VDB_FOLLOW	0x01
101 #define	VDB_INIT	0x02
102 #define	VDB_IO		0x04
103 #define	DNPRINTF(f, p...)	do { if ((f) & vnddebug) printf(p); } while (0)
104 #else
105 #define	DNPRINTF(f, p...)	/* nothing */
106 #endif	/* VNDDEBUG */
107 
108 /*
109  * vndunit is a bit weird.  have to reconstitute the dev_t for
110  * DISKUNIT(), but with the minor masked off.
111  */
112 #define	vndunit(x)	DISKUNIT(makedev(major(x), minor(x) & 0x7ff))
113 #define	vndsimple(x)	(minor(x) & 0x800)
114 
115 /* same as MAKEDISKDEV, preserving the vndsimple() property */
116 #define	VNDLABELDEV(dev)	\
117 	makedev(major(dev), DISKMINOR(vndunit(dev), RAW_PART) | \
118 	    (vndsimple(dev) ? 0x800 : 0))
119 
120 struct vndbuf {
121 	struct buf	vb_buf;
122 	struct buf	*vb_obp;
123 };
124 
125 /*
126  * struct vndbuf allocator
127  */
128 struct pool     vndbufpl;
129 
130 #define	getvndbuf()	pool_get(&vndbufpl, PR_WAITOK)
131 #define	putvndbuf(vbp)	pool_put(&vndbufpl, vbp);
132 
133 struct vnd_ctx {
134 #ifdef notyet
135 	u_char iv[VNDIOC_IVSZ];			/* encryption IV (!BLF) */
136 #endif
137 	size_t len;				/* key context size */
138 	union {
139 		void *ctx_ptr;			/* pointer for malloc/free */
140 		blf_ctx *blowfish;		/* key for BLF, BF_CBC */
141 #ifdef CRYPTO
142 		rijndael_ctx *rijndael;		/* key for AES*_CBC */
143 #endif
144 	} key;
145 	uint8_t alg;				/* algorithm to use */
146 };
147 
148 struct vnd_softc {
149 	struct device	 sc_dev;
150 	struct disk	 sc_dk;
151 
152 	char		 sc_file[VNDNLEN];	/* file we're covering */
153 	int		 sc_flags;		/* flags */
154 	size_t		 sc_size;		/* size of vnd in blocks */
155 	struct vnode	*sc_vp;			/* vnode */
156 	struct ucred	*sc_cred;		/* credentials */
157 	struct buf	 sc_tab;		/* transfer queue */
158 	struct vnd_ctx	 sc_enc;		/* encryption context */
159 #define sc_enc_iv	sc_enc.iv
160 #define sc_enc_len	sc_enc.len
161 #define sc_enc_ptr	sc_enc.key.ctx_ptr
162 #define sc_enc_blf	sc_enc.key.blowfish
163 #define sc_enc_aes	sc_enc.key.rijndael
164 #define sc_enc_alg	sc_enc.alg
165 };
166 
167 /* sc_flags */
168 #define	VNF_ALIVE	0x0001
169 #define	VNF_INITED	0x0002
170 #define	VNF_WANTED	0x0040
171 #define	VNF_LOCKED	0x0080
172 #define	VNF_LABELLING	0x0100
173 #define	VNF_WLABEL	0x0200
174 #define	VNF_HAVELABEL	0x0400
175 #define	VNF_SIMPLE	0x1000
176 #define	VNF_READONLY	0x2000
177 
178 #define	VNDRW(v)	((v)->sc_flags & VNF_READONLY ? FREAD : FREAD|FWRITE)
179 
180 struct vnd_softc *vnd_softc;
181 int numvnd = 0;
182 
183 struct dkdriver vnddkdriver = { vndstrategy };
184 
185 /* called by main() at boot time */
186 void	vndattach(int);
187 
188 void	vndclear(struct vnd_softc *);
189 void	vndstart(struct vnd_softc *);
190 int	vndsetcred(struct vnd_softc *, struct ucred *);
191 void	vndiodone(struct buf *);
192 void	vndshutdown(void);
193 void	vndgetdisklabel(dev_t, struct vnd_softc *, struct disklabel *, int);
194 void	vndencrypt(struct vnd_softc *, caddr_t, size_t, daddr_t, int);
195 #ifdef notyet
196 void	vndmkiv(u_char *, u_char *, size_t, daddr_t)
197     __attribute__((__bounded__(__string__, 1, 3)))
198     __attribute__((__bounded__(__minbytes__, 2, VNDIOC_IVSZ)));
199 #endif
200 size_t	vndbdevsize(struct vnode *, struct proc *);
201 
202 int	vndlock(struct vnd_softc *);
203 void	vndunlock(struct vnd_softc *);
204 
205 void
vndencrypt(struct vnd_softc * vnd,caddr_t addr,size_t size,daddr_t off,int encrypt)206 vndencrypt(struct vnd_softc *vnd, caddr_t addr, size_t size, daddr_t off,
207     int encrypt)
208 {
209 	size_t i, n;
210 	u_char iv[VNDIOC_MAXBSZ];
211 #ifdef notyet
212 #ifdef CRYPTO
213 	rijndael_do_cbc_t aes_op;
214 
215 	if (encrypt)
216 		aes_op = rijndael_cbc_encrypt_fast;
217 	else
218 		aes_op = rijndael_cbc_decrypt_fast;
219 #endif
220 #endif
221 
222 	n = dbtob(1);
223 	for (i = 0; i < size/n; i++) {
224 		switch (vnd->sc_enc_alg) {
225 		case VNDIOC_ALG_BLF:
226 			bzero(iv, sizeof (iv));
227 			bcopy((u_char *)&off, iv, sizeof (off));
228 #ifdef notyet
229 			goto vndencrypt_blowfish;
230 		case VNDIOC_ALG_BF_CBC:
231 			vndmkiv(iv, vnd->sc_enc_iv, VNDIOC_BSZ_BLF, off);
232  vndencrypt_blowfish:
233 #endif
234 			blf_ecb_encrypt(vnd->sc_enc_blf, iv, VNDIOC_BSZ_BLF);
235 			if (encrypt)
236 				blf_cbc_encrypt(vnd->sc_enc_blf, iv, addr, n);
237 			else
238 				blf_cbc_decrypt(vnd->sc_enc_blf, iv, addr, n);
239 			break;
240 #ifdef notyet
241 #ifdef CRYPTO
242 		case VNDIOC_ALG_AES128_CBC:
243 		case VNDIOC_ALG_AES192_CBC:
244 		case VNDIOC_ALG_AES256_CBC:
245 			vndmkiv(iv, vnd->sc_enc_iv, VNDIOC_BSZ_AES, off);
246 			(*rijndael_cbc_encrypt_fast)(vnd->sc_enc_aes,
247 			    NULL, iv, iv, 1);
248 			(*aes_op)(vnd->sc_enc_aes, iv, addr, addr,
249 			    n / VNDIOC_BSZ_AES);
250 			break;
251 #endif
252 #endif
253 		}
254 
255 		addr += n;
256 		off++;
257 	}
258 }
259 
260 #ifdef notyet
261 void
vndmkiv(u_char * dst,u_char * src,size_t numbytes,daddr_t off)262 vndmkiv(u_char *dst, u_char *src, size_t numbytes, daddr_t off)
263 {
264 	size_t n;
265 	uint64_t xoff[VNDIOC_IVSZ / sizeof (uint64_t)];
266 
267 	/* xoff has two elements, since VNDIOC_IVSZ == 16 */
268 	xoff[0] = (uint64_t)off;
269 	xoff[1] = ~((uint64_t)off);
270 
271 	n = MIN(numbytes, VNDIOC_IVSZ);
272 	bcopy(xoff, dst, n);
273 
274 	n = MAX(numbytes, VNDIOC_IVSZ);
275 	while (n--)
276 		dst[n % numbytes] ^= src[n % VNDIOC_IVSZ];
277 }
278 #endif
279 
280 void
vndattach(int num)281 vndattach(int num)
282 {
283 	char *mem;
284 	u_long size;
285 
286 	if (num <= 0)
287 		return;
288 	size = num * sizeof(struct vnd_softc);
289 	mem = malloc(size, M_DEVBUF, M_NOWAIT);
290 	if (mem == NULL) {
291 		printf("WARNING: no memory for vnode disks\n");
292 		return;
293 	}
294 	bzero(mem, size);
295 	vnd_softc = (struct vnd_softc *)mem;
296 	numvnd = num;
297 
298 	pool_init(&vndbufpl, sizeof(struct vndbuf), 0, 0, 0, "vndbufpl", NULL);
299 	pool_setlowat(&vndbufpl, 16);
300 	pool_sethiwat(&vndbufpl, 1024);
301 }
302 
303 int
vndopen(dev_t dev,int flags,int mode,struct proc * p)304 vndopen(dev_t dev, int flags, int mode, struct proc *p)
305 {
306 	int unit = vndunit(dev);
307 	struct vnd_softc *sc;
308 	int error = 0, part, pmask;
309 
310 	DNPRINTF(VDB_FOLLOW, "vndopen(%x, %x, %x, %p)\n", dev, flags, mode, p);
311 
312 	if (unit >= numvnd)
313 		return (ENXIO);
314 	sc = &vnd_softc[unit];
315 
316 	if ((error = vndlock(sc)) != 0)
317 		return (error);
318 
319 	if (!vndsimple(dev) && sc->sc_vp != NULL &&
320 	    (sc->sc_vp->v_type != VREG || sc->sc_enc_len != 0)) {
321 		error = EINVAL;
322 		goto bad;
323 	}
324 
325 	if ((flags & FWRITE) && (sc->sc_flags & VNF_READONLY)) {
326 		error = EROFS;
327 		goto bad;
328 	}
329 
330 	if ((sc->sc_flags & VNF_INITED) &&
331 	    (sc->sc_flags & VNF_HAVELABEL) == 0) {
332 		sc->sc_flags |= VNF_HAVELABEL;
333 		vndgetdisklabel(dev, sc, sc->sc_dk.dk_label, 0);
334 	}
335 
336 	part = DISKPART(dev);
337 	pmask = 1 << part;
338 
339 	/*
340 	 * If any partition is open, all succeeding openings must be of the
341 	 * same type or read-only.
342 	 */
343 	if (sc->sc_dk.dk_openmask) {
344 		if (((sc->sc_flags & VNF_SIMPLE) != 0) !=
345 		    (vndsimple(dev) != 0) && (flags & FWRITE)) {
346 			error = EBUSY;
347 			goto bad;
348 		}
349 	} else if (vndsimple(dev))
350 		sc->sc_flags |= VNF_SIMPLE;
351 	else
352 		sc->sc_flags &= ~VNF_SIMPLE;
353 
354 	/* Check that the partition exists. */
355 	if (part != RAW_PART &&
356 	    ((sc->sc_flags & VNF_HAVELABEL) == 0 ||
357 	    part >= sc->sc_dk.dk_label->d_npartitions ||
358 	    sc->sc_dk.dk_label->d_partitions[part].p_fstype == FS_UNUSED)) {
359 		error = ENXIO;
360 		goto bad;
361 	}
362 
363 	/* Prevent our unit from being unconfigured while open. */
364 	switch (mode) {
365 	case S_IFCHR:
366 		sc->sc_dk.dk_copenmask |= pmask;
367 		break;
368 
369 	case S_IFBLK:
370 		sc->sc_dk.dk_bopenmask |= pmask;
371 		break;
372 	}
373 	sc->sc_dk.dk_openmask =
374 	    sc->sc_dk.dk_copenmask | sc->sc_dk.dk_bopenmask;
375 
376 	error = 0;
377 bad:
378 	vndunlock(sc);
379 	return (error);
380 }
381 
382 /*
383  * Load the label information on the named device
384  */
385 void
vndgetdisklabel(dev_t dev,struct vnd_softc * sc,struct disklabel * lp,int spoofonly)386 vndgetdisklabel(dev_t dev, struct vnd_softc *sc, struct disklabel *lp,
387     int spoofonly)
388 {
389 	char *errstring = NULL;
390 
391 	bzero(lp, sizeof(struct disklabel));
392 	bzero(sc->sc_dk.dk_cpulabel, sizeof(struct cpu_disklabel));
393 
394 	lp->d_secsize = 512;
395 	lp->d_ntracks = 1;
396 	lp->d_nsectors = 100;
397 	lp->d_ncylinders = sc->sc_size / 100;
398 	lp->d_secpercyl = 100;		/* lp->d_ntracks * lp->d_nsectors */
399 
400 	strncpy(lp->d_typename, "vnd device", sizeof(lp->d_typename));
401 	lp->d_type = DTYPE_VND;
402 	strncpy(lp->d_packname, "fictitious", sizeof(lp->d_packname));
403 	lp->d_secperunit = sc->sc_size;
404 	lp->d_rpm = 3600;
405 	lp->d_interleave = 1;
406 	lp->d_flags = 0;
407 
408 	lp->d_partitions[RAW_PART].p_offset = 0;
409 	lp->d_partitions[RAW_PART].p_size = lp->d_secperunit;
410 	lp->d_partitions[RAW_PART].p_fstype = FS_UNUSED;
411 	lp->d_npartitions = RAW_PART + 1;
412 
413 	lp->d_magic = DISKMAGIC;
414 	lp->d_magic2 = DISKMAGIC;
415 	lp->d_checksum = dkcksum(lp);
416 
417 	/* Call the generic disklabel extraction routine */
418 	errstring = readdisklabel(VNDLABELDEV(dev), vndstrategy, lp,
419 	    sc->sc_dk.dk_cpulabel, spoofonly);
420 	if (errstring) {
421 		DNPRINTF(VDB_IO, "%s: %s\n", sc->sc_dev.dv_xname,
422 		    errstring);
423 		return;
424 	}
425 }
426 
427 int
vndclose(dev_t dev,int flags,int mode,struct proc * p)428 vndclose(dev_t dev, int flags, int mode, struct proc *p)
429 {
430 	int unit = vndunit(dev);
431 	struct vnd_softc *sc;
432 	int error = 0, part;
433 
434 	DNPRINTF(VDB_FOLLOW, "vndclose(%x, %x, %x, %p)\n", dev, flags, mode, p);
435 
436 	if (unit >= numvnd)
437 		return (ENXIO);
438 	sc = &vnd_softc[unit];
439 
440 	if ((error = vndlock(sc)) != 0)
441 		return (error);
442 
443 	part = DISKPART(dev);
444 
445 	/* ...that much closer to allowing unconfiguration... */
446 	switch (mode) {
447 	case S_IFCHR:
448 		sc->sc_dk.dk_copenmask &= ~(1 << part);
449 		break;
450 
451 	case S_IFBLK:
452 		sc->sc_dk.dk_bopenmask &= ~(1 << part);
453 		break;
454 	}
455 	sc->sc_dk.dk_openmask =
456 	    sc->sc_dk.dk_copenmask | sc->sc_dk.dk_bopenmask;
457 
458 	vndunlock(sc);
459 	return (0);
460 }
461 
462 /*
463  * Two methods are used, the traditional buffercache bypassing and the
464  * newer, cache-coherent on unmount, one.
465  *
466  * Former method:
467  * Break the request into bsize pieces and submit using VOP_BMAP/VOP_STRATEGY.
468  * Note that this driver can only be used for swapping over NFS on the hp
469  * since nfs_strategy on the vax cannot handle u-areas and page tables.
470  *
471  * Latter method:
472  * Repack the buffer into an uio structure and use VOP_READ/VOP_WRITE to
473  * access the underlying file.
474  */
475 void
vndstrategy(struct buf * bp)476 vndstrategy(struct buf *bp)
477 {
478 	int unit = vndunit(bp->b_dev);
479 	struct vnd_softc *vnd = &vnd_softc[unit];
480 	struct vndbuf *nbp;
481 	int bsize;
482 	off_t bn;
483 	caddr_t addr;
484 	size_t resid;
485 	int sz, flags, error, s;
486 	struct iovec aiov;
487 	struct uio auio;
488 	struct proc *p = curproc;
489 
490 	DNPRINTF(VDB_FOLLOW, "vndstrategy(%p): unit %d\n", bp, unit);
491 
492 	if ((vnd->sc_flags & VNF_INITED) == 0) {
493 		bp->b_error = ENXIO;
494 		bp->b_flags |= B_ERROR;
495 		s = splbio();
496 		biodone(bp);
497 		splx(s);
498 		return;
499 	}
500 
501 	bn = bp->b_blkno;
502 	bp->b_resid = bp->b_bcount;
503 
504 	if (bn < 0) {
505 		bp->b_error = EINVAL;
506 		bp->b_flags |= B_ERROR;
507 		s = splbio();
508 		biodone(bp);
509 		splx(s);
510 		return;
511 	}
512 
513 	/* If we have a label, do a boundary check. */
514 	if (vnd->sc_flags & VNF_HAVELABEL) {
515 		if (bounds_check_with_label(bp, vnd->sc_dk.dk_label,
516 		    vnd->sc_dk.dk_cpulabel, 1) <= 0) {
517 			s = splbio();
518 			biodone(bp);
519 			splx(s);
520 			return;
521 		}
522 
523 		/*
524 		 * bounds_check_with_label() changes bp->b_resid, reset it
525 		 */
526 		bp->b_resid = bp->b_bcount;
527 	}
528 
529 	/* Configured as read-only?  */
530 	if ((vnd->sc_flags & VNF_READONLY) &&
531 	   ((bp->b_flags & B_READ) == 0)) {
532 		bp->b_error = EROFS;
533 		bp->b_flags |= B_ERROR;
534 		s = splbio();
535 		biodone(bp);
536 		splx(s);
537 		return;
538 	}
539 
540 	sz = howmany(bp->b_bcount, DEV_BSIZE);
541 
542 	/* No bypassing of buffer cache?  */
543 	if (vndsimple(bp->b_dev)) {
544 		/* Loop until all queued requests are handled.  */
545 		for (;;) {
546 			int part = DISKPART(bp->b_dev);
547 			int off = vnd->sc_dk.dk_label->d_partitions[part].p_offset;
548 
549 			aiov.iov_base = bp->b_data;
550 			auio.uio_resid = aiov.iov_len = bp->b_bcount;
551 			auio.uio_iov = &aiov;
552 			auio.uio_iovcnt = 1;
553 			auio.uio_offset = dbtob((off_t)(bp->b_blkno + off));
554 			auio.uio_segflg = UIO_SYSSPACE;
555 			auio.uio_procp = p;
556 
557 			vn_lock(vnd->sc_vp, LK_EXCLUSIVE | LK_RETRY, p);
558 			if (bp->b_flags & B_READ) {
559 				auio.uio_rw = UIO_READ;
560 				bp->b_error = VOP_READ(vnd->sc_vp, &auio, 0,
561 				    vnd->sc_cred);
562 				if (vnd->sc_enc_len)
563 					vndencrypt(vnd,	bp->b_data,
564 					   bp->b_bcount, bp->b_blkno, 0);
565 			} else {
566 				if (vnd->sc_enc_len)
567 					vndencrypt(vnd, bp->b_data,
568 					   bp->b_bcount, bp->b_blkno, 1);
569 				auio.uio_rw = UIO_WRITE;
570 				/*
571 				 * Upper layer has already checked I/O for
572 				 * limits, so there is no need to do it again.
573 				 */
574 				bp->b_error = VOP_WRITE(vnd->sc_vp, &auio,
575 				    IO_NOLIMIT, vnd->sc_cred);
576 				/* Data in buffer cache needs to be in clear */
577 				if (vnd->sc_enc_len)
578 					vndencrypt(vnd, bp->b_data,
579 					   bp->b_bcount, bp->b_blkno, 0);
580 			}
581 			VOP_UNLOCK(vnd->sc_vp, 0, p);
582 			if (bp->b_error)
583 				bp->b_flags |= B_ERROR;
584 			bp->b_resid = auio.uio_resid;
585 			s = splbio();
586 			biodone(bp);
587 			splx(s);
588 
589 			/* If nothing more is queued, we are done.  */
590 			if (!vnd->sc_tab.b_active)
591 				return;
592 
593 			/*
594 			 * Dequeue now since lower level strategy
595 			 * routine might queue using same links.
596 			 */
597 			s = splbio();
598 			bp = vnd->sc_tab.b_actf;
599 			vnd->sc_tab.b_actf = bp->b_actf;
600 			vnd->sc_tab.b_active--;
601 			splx(s);
602 		}
603 	}
604 
605 	if (vnd->sc_vp->v_type != VREG || vnd->sc_enc_len != 0) {
606 		bp->b_error = EINVAL;
607 		bp->b_flags |= B_ERROR;
608 		s = splbio();
609 		biodone(bp);
610 		splx(s);
611 		return;
612 	}
613 
614 	/* The old-style buffercache bypassing method.  */
615 	bn += vnd->sc_dk.dk_label->d_partitions[DISKPART(bp->b_dev)].p_offset;
616 	bn = dbtob(bn);
617 	bsize = vnd->sc_vp->v_mount->mnt_stat.f_iosize;
618 	addr = bp->b_data;
619 	flags = bp->b_flags | B_CALL;
620 	for (resid = bp->b_resid; resid; resid -= sz) {
621 		struct vnode *vp;
622 		daddr_t nbn;
623 		int off, nra;
624 
625 		nra = 0;
626 		vn_lock(vnd->sc_vp, LK_RETRY | LK_EXCLUSIVE, p);
627 		error = VOP_BMAP(vnd->sc_vp, bn / bsize, &vp, &nbn, &nra);
628 		VOP_UNLOCK(vnd->sc_vp, 0, p);
629 		if (error == 0 && (long)nbn == -1)
630 			error = EIO;
631 #ifdef VNDDEBUG
632 		if (!dovndcluster)
633 			nra = 0;
634 #endif
635 
636 		if ((off = bn % bsize) != 0)
637 			sz = bsize - off;
638 		else
639 			sz = (1 + nra) * bsize;
640 		if (resid < sz)
641 			sz = resid;
642 
643 		DNPRINTF(VDB_IO, "vndstrategy: vp %p/%p bn %llx/%x sz %x\n",
644 		    vnd->sc_vp, vp, bn, nbn, sz);
645 
646 		s = splbio();
647 		nbp = getvndbuf();
648 		splx(s);
649 		nbp->vb_buf.b_flags = flags;
650 		nbp->vb_buf.b_bcount = sz;
651 		nbp->vb_buf.b_bufsize = bp->b_bufsize;
652 		nbp->vb_buf.b_error = 0;
653 		if (vp->v_type == VBLK || vp->v_type == VCHR)
654 			nbp->vb_buf.b_dev = vp->v_rdev;
655 		else
656 			nbp->vb_buf.b_dev = NODEV;
657 		nbp->vb_buf.b_data = addr;
658 		nbp->vb_buf.b_blkno = nbn + btodb(off);
659 		nbp->vb_buf.b_proc = bp->b_proc;
660 		nbp->vb_buf.b_iodone = vndiodone;
661 		nbp->vb_buf.b_vp = vp;
662 		nbp->vb_buf.b_dirtyoff = bp->b_dirtyoff;
663 		nbp->vb_buf.b_dirtyend = bp->b_dirtyend;
664 		nbp->vb_buf.b_validoff = bp->b_validoff;
665 		nbp->vb_buf.b_validend = bp->b_validend;
666 		LIST_INIT(&nbp->vb_buf.b_dep);
667 
668 		/* save a reference to the old buffer */
669 		nbp->vb_obp = bp;
670 
671 		/*
672 		 * If there was an error or a hole in the file...punt.
673 		 * Note that we deal with this after the nbp allocation.
674 		 * This ensures that we properly clean up any operations
675 		 * that we have already fired off.
676 		 *
677 		 * XXX we could deal with holes here but it would be
678 		 * a hassle (in the write case).
679 		 * We must still however charge for the write even if there
680 		 * was an error.
681 		 */
682 		if (error) {
683 			nbp->vb_buf.b_error = error;
684 			nbp->vb_buf.b_flags |= B_ERROR;
685 			bp->b_resid -= (resid - sz);
686 			s = splbio();
687 			/* charge for the write */
688 			if ((nbp->vb_buf.b_flags & B_READ) == 0)
689 				nbp->vb_buf.b_vp->v_numoutput++;
690 			biodone(&nbp->vb_buf);
691 			splx(s);
692 			return;
693 		}
694 		/*
695 		 * Just sort by block number
696 		 */
697 		nbp->vb_buf.b_cylinder = nbp->vb_buf.b_blkno;
698 		s = splbio();
699 		disksort(&vnd->sc_tab, &nbp->vb_buf);
700 		vnd->sc_tab.b_active++;
701 		vndstart(vnd);
702 		splx(s);
703 		bn += sz;
704 		addr += sz;
705 	}
706 }
707 
708 /*
709  * Feed requests sequentially.
710  * We do it this way to keep from flooding NFS servers if we are connected
711  * to an NFS file.  This places the burden on the client rather than the
712  * server.
713  */
714 void
vndstart(struct vnd_softc * vnd)715 vndstart(struct vnd_softc *vnd)
716 {
717 	struct buf *bp;
718 
719 	/*
720 	 * Dequeue now since lower level strategy routine might
721 	 * queue using same links
722 	 */
723 	bp = vnd->sc_tab.b_actf;
724 	vnd->sc_tab.b_actf = bp->b_actf;
725 
726 	DNPRINTF(VDB_IO,
727 	    "vndstart(%ld): bp %p vp %p blkno %x addr %p cnt %lx\n",
728 	    vnd-vnd_softc, bp, bp->b_vp, bp->b_blkno, bp->b_data,
729 	    bp->b_bcount);
730 
731 	/* Instrumentation. */
732 	disk_busy(&vnd->sc_dk);
733 
734 	if ((bp->b_flags & B_READ) == 0)
735 		bp->b_vp->v_numoutput++;
736 	VOP_STRATEGY(bp);
737 }
738 
739 void
vndiodone(struct buf * bp)740 vndiodone(struct buf *bp)
741 {
742 	struct vndbuf *vbp = (struct vndbuf *) bp;
743 	struct buf *pbp = vbp->vb_obp;
744 	struct vnd_softc *vnd = &vnd_softc[vndunit(pbp->b_dev)];
745 
746 	splassert(IPL_BIO);
747 
748 	DNPRINTF(VDB_IO,
749 	    "vndiodone(%ld): vbp %p vp %p blkno %x addr %p cnt %lx\n",
750 	    vnd-vnd_softc, vbp, vbp->vb_buf.b_vp, vbp->vb_buf.b_blkno,
751 	    vbp->vb_buf.b_data, vbp->vb_buf.b_bcount);
752 
753 	if (vbp->vb_buf.b_error) {
754 		DNPRINTF(VDB_IO, "vndiodone: vbp %p error %d\n", vbp,
755 		    vbp->vb_buf.b_error);
756 
757 		pbp->b_flags |= B_ERROR;
758 		/* XXX does this matter here? */
759 		(&vbp->vb_buf)->b_flags |= B_RAW;
760 		pbp->b_error = biowait(&vbp->vb_buf);
761 	}
762 	pbp->b_resid -= vbp->vb_buf.b_bcount;
763 	putvndbuf(vbp);
764 	if (vnd->sc_tab.b_active) {
765 		disk_unbusy(&vnd->sc_dk, (pbp->b_bcount - pbp->b_resid),
766 		    (pbp->b_flags & B_READ));
767 		if (!vnd->sc_tab.b_actf)
768 			vnd->sc_tab.b_active--;
769 	}
770 	if (pbp->b_resid == 0) {
771 		DNPRINTF(VDB_IO, "vndiodone: pbp %p iodone\n", pbp);
772 		biodone(pbp);
773 	}
774 
775 }
776 
777 /* ARGSUSED */
778 int
vndread(dev_t dev,struct uio * uio,int flags)779 vndread(dev_t dev, struct uio *uio, int flags)
780 {
781 	int unit = vndunit(dev);
782 	struct vnd_softc *sc;
783 
784 	DNPRINTF(VDB_FOLLOW, "vndread(%x, %p)\n", dev, uio);
785 
786 	if (unit >= numvnd)
787 		return (ENXIO);
788 	sc = &vnd_softc[unit];
789 
790 	if ((sc->sc_flags & VNF_INITED) == 0)
791 		return (ENXIO);
792 
793 	return (physio(vndstrategy, NULL, dev, B_READ, minphys, uio));
794 }
795 
796 /* ARGSUSED */
797 int
vndwrite(dev_t dev,struct uio * uio,int flags)798 vndwrite(dev_t dev, struct uio *uio, int flags)
799 {
800 	int unit = vndunit(dev);
801 	struct vnd_softc *sc;
802 
803 	DNPRINTF(VDB_FOLLOW, "vndwrite(%x, %p)\n", dev, uio);
804 
805 	if (unit >= numvnd)
806 		return (ENXIO);
807 	sc = &vnd_softc[unit];
808 
809 	if ((sc->sc_flags & VNF_INITED) == 0)
810 		return (ENXIO);
811 	if (sc->sc_flags & VNF_READONLY)
812 		return (EROFS);
813 
814 	return (physio(vndstrategy, NULL, dev, B_WRITE, minphys, uio));
815 }
816 
817 size_t
vndbdevsize(struct vnode * vp,struct proc * p)818 vndbdevsize(struct vnode *vp, struct proc *p)
819 {
820 	struct partinfo pi;
821 	struct bdevsw *bsw;
822 	long sscale;
823 	dev_t dev;
824 
825 	dev = vp->v_rdev;
826 	bsw = bdevsw_lookup(dev);
827 	if (bsw->d_ioctl == NULL)
828 		return (0);
829 	if (bsw->d_ioctl(dev, DIOCGPART, (caddr_t)&pi, FREAD, p))
830 		return (0);
831 	sscale = pi.disklab->d_secsize / DEV_BSIZE;
832 	DNPRINTF(VDB_INIT, "vndbdevsize: size %li secsize %li sscale %li\n",
833 	    (long)pi.part->p_size,(long)pi.disklab->d_secsize,sscale);
834 	return (pi.part->p_size * sscale);
835 }
836 
837 /* ARGSUSED */
838 int
vndioctl(dev_t dev,u_long cmd,caddr_t addr,int flag,struct proc * p)839 vndioctl(dev_t dev, u_long cmd, caddr_t addr, int flag, struct proc *p)
840 {
841 	int unit = vndunit(dev);
842 	struct vnd_softc *vnd;
843 	struct vnd_ioctl *vio;
844 	struct vnd_user *vnu;
845 	struct vattr vattr;
846 	struct nameidata nd;
847 	int error, part, pmask, s, ksz;
848 
849 	DNPRINTF(VDB_FOLLOW, "vndioctl(%x, %lx, %p, %x, %p): unit %d\n",
850 	    dev, cmd, addr, flag, p, unit);
851 
852 	error = suser(p, 0);
853 	if (error)
854 		return (error);
855 	if (unit >= numvnd)
856 		return (ENXIO);
857 
858 	vnd = &vnd_softc[unit];
859 	vio = (struct vnd_ioctl *)addr;
860 	switch (cmd) {
861 
862 	case VNDIOCSET:
863 		if (vnd->sc_flags & VNF_INITED)
864 			return (EBUSY);
865 		if (!(vnd->sc_flags & VNF_SIMPLE) && vio->vnd_keylen)
866 			return (EINVAL);
867 
868 		if ((error = vndlock(vnd)) != 0)
869 			return (error);
870 
871 		if ((error = copyinstr(vio->vnd_file, vnd->sc_file,
872 		    sizeof(vnd->sc_file), NULL))) {
873 			vndunlock(vnd);
874 			return (error);
875 		}
876 
877 		bzero(vnd->sc_dev.dv_xname, sizeof(vnd->sc_dev.dv_xname));
878 		if (snprintf(vnd->sc_dev.dv_xname, sizeof(vnd->sc_dev.dv_xname),
879 		    "vnd%d", unit) >= sizeof(vnd->sc_dev.dv_xname)) {
880 			printf("VNDIOCSET: device name too long\n");
881 			vndunlock(vnd);
882 			return(ENXIO);
883 		}
884 
885 		/*
886 		 * Open for read and write first. This lets vn_open() weed out
887 		 * directories, sockets, etc. so we don't have to worry about
888 		 * them.
889 		 */
890 		NDINIT(&nd, LOOKUP, FOLLOW, UIO_USERSPACE, vio->vnd_file, p);
891 		if (vio->vnd_options & VNDIOC_OPT_RDONLY)
892 			goto VNDIOCSET_readonly;
893 		vnd->sc_flags &= ~VNF_READONLY;
894 		error = vn_open(&nd, FREAD|FWRITE, 0);
895 		if (error == EROFS) {
896  VNDIOCSET_readonly:
897 			vnd->sc_flags |= VNF_READONLY;
898 			error = vn_open(&nd, FREAD, 0);
899 		}
900 		if (error) {
901 			vndunlock(vnd);
902 			return (error);
903 		}
904 
905 		if (nd.ni_vp->v_type != VREG && !vndsimple(dev)) {
906 			VOP_UNLOCK(nd.ni_vp, 0, p);
907 			vn_close(nd.ni_vp, VNDRW(vnd), p->p_ucred, p);
908 			vndunlock(vnd);
909 			return (EINVAL);
910 		}
911 
912 		if (nd.ni_vp->v_type == VBLK)
913 			vnd->sc_size = vndbdevsize(nd.ni_vp, p);
914 		else {
915 			error = VOP_GETATTR(nd.ni_vp, &vattr, p->p_ucred, p);
916 			if (error) {
917 				VOP_UNLOCK(nd.ni_vp, 0, p);
918 				vn_close(nd.ni_vp, VNDRW(vnd), p->p_ucred, p);
919 				vndunlock(vnd);
920 				return (error);
921 			}
922 			vnd->sc_size = btodb(vattr.va_size); /* note truncation */
923 		}
924 		VOP_UNLOCK(nd.ni_vp, 0, p);
925 		vnd->sc_vp = nd.ni_vp;
926 		if ((error = vndsetcred(vnd, p->p_ucred)) != 0) {
927 			(void) vn_close(nd.ni_vp, VNDRW(vnd), p->p_ucred, p);
928 			vndunlock(vnd);
929 			return (error);
930 		}
931 
932 		vnd->sc_enc_alg = vio->vnd_options >> VNDIOC_ALGSHIFT;
933 		if (vio->vnd_keylen <= 0 || !vio->vnd_key)
934 			ksz = 0;
935 		else
936 			ksz = MIN(vio->vnd_keylen, VNDIOC_MAXKSZ);
937 		vnd->sc_enc_len = 0;
938 
939 		if (vnd->sc_enc_alg && !ksz) {
940  VNDIOCSET_encinval:
941 			error = EINVAL;
942  VNDIOCSET_encerror:
943 			vn_close(nd.ni_vp, VNDRW(vnd), p->p_ucred, p);
944 			vndunlock(vnd);
945 			return (error);
946 		}
947 
948 		switch (vnd->sc_enc_alg) {
949 		case VNDIOC_ALG_BLF:
950 			ksz = MIN(ksz, VNDIOC_KSZ_BLF);
951 			break;
952 #ifdef notyet
953 		case VNDIOC_ALG_BF_CBC:
954 			ksz = MIN(ksz, VNDIOC_KSZ_BF_CBC);
955 			if (ksz <= VNDIOC_IVSZ)
956 				goto VNDIOCSET_encinval;
957 			break;
958 #ifdef CRYPTO
959 		case VNDIOC_ALG_AES128_CBC:
960 			if (ksz != VNDIOC_KSZ_AES128_CBC)
961 				goto VNDIOCSET_encinval;
962 			break;
963 		case VNDIOC_ALG_AES192_CBC:
964 			if (ksz != VNDIOC_KSZ_AES192_CBC)
965 				goto VNDIOCSET_encinval;
966 			break;
967 		case VNDIOC_ALG_AES256_CBC:
968 			if (ksz != VNDIOC_KSZ_AES256_CBC)
969 				goto VNDIOCSET_encinval;
970 			break;
971 #endif
972 #endif
973 		default:
974 			goto VNDIOCSET_encinval;
975 		}
976 
977 		if (ksz) {
978 			char key[VNDIOC_MAXKSZ];
979 
980 			if ((error = copyin(vio->vnd_key, key, ksz)) != 0)
981 				goto VNDIOCSET_encerror;
982 
983 			switch (vnd->sc_enc_alg) {
984 			case VNDIOC_ALG_BLF:
985 #ifdef notyet
986 			case VNDIOC_ALG_BF_CBC:
987 #endif
988 				vnd->sc_enc_len = sizeof (*vnd->sc_enc_blf);
989 				break;
990 #ifdef notyet
991 #ifdef CRYPTO
992 			case VNDIOC_ALG_AES128_CBC:
993 			case VNDIOC_ALG_AES192_CBC:
994 			case VNDIOC_ALG_AES256_CBC:
995 				vnd->sc_enc_len = sizeof (*vnd->sc_enc_aes);
996 				/* not implemented yet, abort */
997 				error = ENOCOFFEE;
998 				goto VNDIOCSET_encerror;
999 				/* remove these two lines once implemented */
1000 				break;
1001 #endif
1002 #endif
1003 			}
1004 			vnd->sc_enc_ptr = malloc(vnd->sc_enc_len, M_DEVBUF,
1005 			    M_WAITOK);
1006 
1007 			switch (vnd->sc_enc_alg) {
1008 			case VNDIOC_ALG_BLF:
1009 #ifdef notyet
1010 				bzero(vnd->sc_enc_iv, VNDIOC_IVSZ);
1011 #endif
1012 				blf_key(vnd->sc_enc_blf, key, ksz);
1013 				break;
1014 #ifdef notyet
1015 			case VNDIOC_ALG_BF_CBC:
1016 				bcopy(key, vnd->sc_enc_iv, VNDIOC_IVSZ);
1017 				blf_key(vnd->sc_enc_blf, key + VNDIOC_IVSZ,
1018 				    ksz - VNDIOC_IVSZ);
1019 				break;
1020 #ifdef CRYPTO
1021 			case VNDIOC_ALG_AES128_CBC:
1022 			case VNDIOC_ALG_AES192_CBC:
1023 			case VNDIOC_ALG_AES256_CBC:
1024 				/* not implemented */
1025 				break;
1026 #endif
1027 #endif
1028 			}
1029 
1030 			bzero(key, sizeof (key));
1031 		}
1032 
1033 		vio->vnd_size = dbtob((off_t)vnd->sc_size);
1034 		vnd->sc_flags |= VNF_INITED;
1035 
1036 		DNPRINTF(VDB_INIT, "vndioctl: SET vp %p size %llx\n",
1037 		    vnd->sc_vp, (unsigned long long)vnd->sc_size);
1038 
1039 		/* Attach the disk. */
1040 		vnd->sc_dk.dk_driver = &vnddkdriver;
1041 		vnd->sc_dk.dk_name = vnd->sc_dev.dv_xname;
1042 		disk_attach(&vnd->sc_dk);
1043 
1044 		vndunlock(vnd);
1045 
1046 		break;
1047 
1048 	case VNDIOCCLR:
1049 		if ((vnd->sc_flags & VNF_INITED) == 0)
1050 			return (ENXIO);
1051 
1052 		if ((error = vndlock(vnd)) != 0)
1053 			return (error);
1054 
1055 		/*
1056 		 * Don't unconfigure if any other partitions are open
1057 		 * or if both the character and block flavors of this
1058 		 * partition are open.
1059 		 */
1060 		part = DISKPART(dev);
1061 		pmask = (1 << part);
1062 		if ((vnd->sc_dk.dk_openmask & ~pmask) ||
1063 		    ((vnd->sc_dk.dk_bopenmask & pmask) &&
1064 		    (vnd->sc_dk.dk_copenmask & pmask))) {
1065 			vndunlock(vnd);
1066 			return (EBUSY);
1067 		}
1068 
1069 		vndclear(vnd);
1070 		DNPRINTF(VDB_INIT, "vndioctl: CLRed\n");
1071 
1072 		/* Free crypto key */
1073 		if (vnd->sc_enc_len) {
1074 			bzero(vnd->sc_enc_ptr, vnd->sc_enc_len);
1075 			free(vnd->sc_enc_ptr, M_DEVBUF);
1076 		}
1077 
1078 		/* Detach the disk. */
1079 		disk_detach(&vnd->sc_dk);
1080 
1081 		/* This must be atomic. */
1082 		s = splhigh();
1083 		vndunlock(vnd);
1084 		bzero(vnd, sizeof(struct vnd_softc));
1085 		splx(s);
1086 		break;
1087 
1088 	case VNDIOCGET:
1089 		vnu = (struct vnd_user *)addr;
1090 
1091 		if (vnu->vnu_unit == -1)
1092 			vnu->vnu_unit = unit;
1093 		if (vnu->vnu_unit >= numvnd)
1094 			return (ENXIO);
1095 		if (vnu->vnu_unit < 0)
1096 			return (EINVAL);
1097 
1098 		vnd = &vnd_softc[vnu->vnu_unit];
1099 
1100 		if (vnd->sc_flags & VNF_INITED) {
1101 			error = VOP_GETATTR(vnd->sc_vp, &vattr, p->p_ucred, p);
1102 			if (error)
1103 				return (error);
1104 
1105 			strlcpy(vnu->vnu_file, vnd->sc_file,
1106 			    sizeof(vnu->vnu_file));
1107 			vnu->vnu_dev = vattr.va_fsid;
1108 			vnu->vnu_ino = vattr.va_fileid;
1109 		} else {
1110 			vnu->vnu_dev = 0;
1111 			vnu->vnu_ino = 0;
1112 		}
1113 
1114 		break;
1115 
1116 	case DIOCGDINFO:
1117 		if ((vnd->sc_flags & VNF_HAVELABEL) == 0)
1118 			return (ENOTTY);
1119 		*(struct disklabel *)addr = *(vnd->sc_dk.dk_label);
1120 		return (0);
1121 
1122 	case DIOCGPART:
1123 		if ((vnd->sc_flags & VNF_HAVELABEL) == 0)
1124 			return (ENOTTY);
1125 		((struct partinfo *)addr)->disklab = vnd->sc_dk.dk_label;
1126 		((struct partinfo *)addr)->part =
1127 		    &vnd->sc_dk.dk_label->d_partitions[DISKPART(dev)];
1128 		return (0);
1129 
1130 	case DIOCWDINFO:
1131 	case DIOCSDINFO:
1132 		if ((vnd->sc_flags & VNF_HAVELABEL) == 0)
1133 			return (ENOTTY);
1134 		if (vnd->sc_flags & VNF_READONLY)
1135 			return (EROFS);
1136 		if ((flag & FWRITE) == 0)
1137 			return (EBADF);
1138 
1139 		if ((error = vndlock(vnd)) != 0)
1140 			return (error);
1141 		vnd->sc_flags |= VNF_LABELLING;
1142 
1143 		error = setdisklabel(vnd->sc_dk.dk_label,
1144 		    (struct disklabel *)addr, /*vnd->sc_dk.dk_openmask : */0,
1145 		    vnd->sc_dk.dk_cpulabel);
1146 		if (error == 0) {
1147 			if (cmd == DIOCWDINFO)
1148 				error = writedisklabel(VNDLABELDEV(dev),
1149 				    vndstrategy, vnd->sc_dk.dk_label,
1150 				    vnd->sc_dk.dk_cpulabel);
1151 		}
1152 
1153 		vnd->sc_flags &= ~VNF_LABELLING;
1154 		vndunlock(vnd);
1155 		return (error);
1156 
1157 	case DIOCWLABEL:
1158 		if ((flag & FWRITE) == 0)
1159 			return (EBADF);
1160 		if (vnd->sc_flags & VNF_READONLY)
1161 			return (EROFS);
1162 		if (*(int *)addr)
1163 			vnd->sc_flags |= VNF_WLABEL;
1164 		else
1165 			vnd->sc_flags &= ~VNF_WLABEL;
1166 		return (0);
1167 
1168 	default:
1169 		return (ENOTTY);
1170 	}
1171 
1172 	return (0);
1173 }
1174 
1175 /*
1176  * Duplicate the current processes' credentials.  Since we are called only
1177  * as the result of a SET ioctl and only root can do that, any future access
1178  * to this "disk" is essentially as root.  Note that credentials may change
1179  * if some other uid can write directly to the mapped file (NFS).
1180  */
1181 int
vndsetcred(struct vnd_softc * vnd,struct ucred * cred)1182 vndsetcred(struct vnd_softc *vnd, struct ucred *cred)
1183 {
1184 	struct uio auio;
1185 	struct iovec aiov;
1186 	char *tmpbuf;
1187 	int error;
1188 	struct proc *p = curproc;
1189 
1190 	vnd->sc_cred = crdup(cred);
1191 	tmpbuf = malloc(DEV_BSIZE, M_TEMP, M_WAITOK);
1192 
1193 	/* XXX: Horrible kludge to establish credentials for NFS */
1194 	aiov.iov_base = tmpbuf;
1195 	aiov.iov_len = MIN(DEV_BSIZE, dbtob((off_t)vnd->sc_size));
1196 	auio.uio_iov = &aiov;
1197 	auio.uio_iovcnt = 1;
1198 	auio.uio_offset = 0;
1199 	auio.uio_rw = UIO_READ;
1200 	auio.uio_segflg = UIO_SYSSPACE;
1201 	auio.uio_resid = aiov.iov_len;
1202 	vn_lock(vnd->sc_vp, LK_RETRY | LK_EXCLUSIVE, p);
1203 	error = VOP_READ(vnd->sc_vp, &auio, 0, vnd->sc_cred);
1204 	VOP_UNLOCK(vnd->sc_vp, 0, p);
1205 
1206 	free(tmpbuf, M_TEMP);
1207 	return (error);
1208 }
1209 
1210 void
vndshutdown(void)1211 vndshutdown(void)
1212 {
1213 	struct vnd_softc *vnd;
1214 
1215 	for (vnd = &vnd_softc[0]; vnd < &vnd_softc[numvnd]; vnd++)
1216 		if (vnd->sc_flags & VNF_INITED)
1217 			vndclear(vnd);
1218 }
1219 
1220 void
vndclear(struct vnd_softc * vnd)1221 vndclear(struct vnd_softc *vnd)
1222 {
1223 	struct vnode *vp = vnd->sc_vp;
1224 	struct proc *p = curproc;		/* XXX */
1225 
1226 	DNPRINTF(VDB_FOLLOW, "vndclear(%p): vp %p\n", vnd, vp);
1227 
1228 	vnd->sc_flags &= ~VNF_INITED;
1229 	if (vp == NULL)
1230 		panic("vndioctl: null vp");
1231 	(void) vn_close(vp, VNDRW(vnd), vnd->sc_cred, p);
1232 	crfree(vnd->sc_cred);
1233 	vnd->sc_vp = NULL;
1234 	vnd->sc_cred = NULL;
1235 	vnd->sc_size = 0;
1236 }
1237 
1238 int
vndsize(dev_t dev)1239 vndsize(dev_t dev)
1240 {
1241 	int unit = vndunit(dev);
1242 	struct vnd_softc *vnd = &vnd_softc[unit];
1243 
1244 	if (unit >= numvnd || (vnd->sc_flags & VNF_INITED) == 0)
1245 		return (-1);
1246 	return (vnd->sc_size);
1247 }
1248 
1249 int
vnddump(dev_t dev,daddr_t blkno,caddr_t va,size_t size)1250 vnddump(dev_t dev, daddr_t blkno, caddr_t va, size_t size)
1251 {
1252 
1253 	/* Not implemented. */
1254 	return (ENXIO);
1255 }
1256 
1257 /*
1258  * Wait interruptibly for an exclusive lock.
1259  *
1260  * XXX
1261  * Several drivers do this; it should be abstracted and made MP-safe.
1262  */
1263 int
vndlock(struct vnd_softc * sc)1264 vndlock(struct vnd_softc *sc)
1265 {
1266 	int error;
1267 
1268 	while ((sc->sc_flags & VNF_LOCKED) != 0) {
1269 		sc->sc_flags |= VNF_WANTED;
1270 		if ((error = tsleep(sc, PRIBIO | PCATCH, "vndlck", 0)) != 0)
1271 			return (error);
1272 	}
1273 	sc->sc_flags |= VNF_LOCKED;
1274 	return (0);
1275 }
1276 
1277 /*
1278  * Unlock and wake up any waiters.
1279  */
1280 void
vndunlock(struct vnd_softc * sc)1281 vndunlock(struct vnd_softc *sc)
1282 {
1283 
1284 	sc->sc_flags &= ~VNF_LOCKED;
1285 	if ((sc->sc_flags & VNF_WANTED) != 0) {
1286 		sc->sc_flags &= ~VNF_WANTED;
1287 		wakeup(sc);
1288 	}
1289 }
1290