1 /*	$OpenBSD: ccd.c,v 1.62 2005/05/22 19:40:51 art Exp $	*/
2 /*	$NetBSD: ccd.c,v 1.33 1996/05/05 04:21:14 thorpej Exp $	*/
3 
4 /*-
5  * Copyright (c) 1996 The NetBSD Foundation, Inc.
6  * Copyright (c) 1997 Niklas Hallqvist.
7  * Copyright (c) 2005 Michael Shalayeff.
8  * All rights reserved.
9  *
10  * This code is derived from software contributed to The NetBSD Foundation
11  * by Jason R. Thorpe.
12  * Niklas Hallqvist redid the buffer policy for better performance.
13  *
14  * Redistribution and use in source and binary forms, with or without
15  * modification, are permitted provided that the following conditions
16  * are met:
17  * 1. Redistributions of source code must retain the above copyright
18  *    notice, this list of conditions and the following disclaimer.
19  * 2. Redistributions in binary form must reproduce the above copyright
20  *    notice, this list of conditions and the following disclaimer in the
21  *    documentation and/or other materials provided with the distribution.
22  * 3. All advertising materials mentioning features or use of this software
23  *    must display the following acknowledgement:
24  *        This product includes software developed by the NetBSD
25  *        Foundation, Inc. and its contributors.
26  * 4. Neither the name of The NetBSD Foundation nor the names of its
27  *    contributors may be used to endorse or promote products derived
28  *    from this software without specific prior written permission.
29  *
30  * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
31  * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
32  * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
33  * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE
34  * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
35  * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
36  * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
37  * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
38  * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
39  * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
40  * POSSIBILITY OF SUCH DAMAGE.
41  */
42 
43 /*
44  * Copyright (c) 1988 University of Utah.
45  * Copyright (c) 1990, 1993
46  *	The Regents of the University of California.  All rights reserved.
47  *
48  * This code is derived from software contributed to Berkeley by
49  * the Systems Programming Group of the University of Utah Computer
50  * Science Department.
51  *
52  * Redistribution and use in source and binary forms, with or without
53  * modification, are permitted provided that the following conditions
54  * are met:
55  * 1. Redistributions of source code must retain the above copyright
56  *    notice, this list of conditions and the following disclaimer.
57  * 2. Redistributions in binary form must reproduce the above copyright
58  *    notice, this list of conditions and the following disclaimer in the
59  *    documentation and/or other materials provided with the distribution.
60  * 3. Neither the name of the University nor the names of its contributors
61  *    may be used to endorse or promote products derived from this software
62  *    without specific prior written permission.
63  *
64  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
65  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
66  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
67  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
68  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
69  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
70  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
71  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
72  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
73  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
74  * SUCH DAMAGE.
75  *
76  * from: Utah $Hdr: cd.c 1.6 90/11/28$
77  *
78  *	@(#)cd.c	8.2 (Berkeley) 11/16/93
79  */
80 
81 /*
82  * "Concatenated" disk driver.
83  *
84  * Dynamic configuration and disklabel support by:
85  *	Jason R. Thorpe <thorpej@nas.nasa.gov>
86  *	Numerical Aerodynamic Simulation Facility
87  *	Mail Stop 258-6
88  *	NASA Ames Research Center
89  *	Moffett Field, CA 94035
90  *
91  * Mirroring support based on code written by Satoshi Asami
92  * and Nisha Talagala.
93  *
94  * Buffer scatter/gather policy by Niklas Hallqvist.
95  */
96 /* #define	CCDDEBUG */
97 
98 #include <sys/param.h>
99 #include <sys/systm.h>
100 #include <sys/proc.h>
101 #include <sys/errno.h>
102 #include <sys/buf.h>
103 #include <sys/malloc.h>
104 #include <sys/pool.h>
105 #include <sys/namei.h>
106 #include <sys/stat.h>
107 #include <sys/ioctl.h>
108 #include <sys/disklabel.h>
109 #include <sys/device.h>
110 #include <sys/disk.h>
111 #include <sys/syslog.h>
112 #include <sys/fcntl.h>
113 #include <sys/vnode.h>
114 #include <sys/conf.h>
115 
116 #include <dev/ccdvar.h>
117 
118 #ifdef __GNUC__
119 #define INLINE static __inline
120 #else
121 #define INLINE
122 #endif
123 
124 /*
125  * Overridable value telling how many kvm spaces of MAXBSIZE we need for
126  * component I/O operations.
127  */
128 #ifndef CCD_CLUSTERS
129 #define CCD_CLUSTERS 16
130 #endif
131 
132 #if defined(CCDDEBUG) && !defined(DEBUG)
133 #define DEBUG
134 #endif
135 
136 #ifdef DEBUG
137 #define CCDB_FOLLOW	0x01
138 #define CCDB_INIT	0x02
139 #define CCDB_IO		0x04
140 #define CCDB_LABEL	0x08
141 #define CCDB_VNODE	0x10
142 int ccddebug = 0x00;
143 #endif
144 
145 #define	ccdunit(x)	DISKUNIT(x)
146 #define CCDLABELDEV(dev)	\
147 	(MAKEDISKDEV(major((dev)), ccdunit((dev)), RAW_PART))
148 
149 struct ccdbuf {
150 	struct buf	cb_buf;		/* new I/O buf */
151 	struct buf	*cb_obp;	/* ptr. to original I/O buf */
152 	struct ccd_softc*cb_sc;		/* point back to the device */
153 	struct ccdbuf	*cb_dep;	/* mutual ptrs for mirror part */
154 	int		cb_comp;	/* target component */
155 	int		cb_flags;	/* misc. flags */
156 #define CBF_MIRROR	0x01		/* we're for a mirror component */
157 #define CBF_OLD		0x02		/* use old I/O protocol */
158 #define CBF_DONE	0x04		/* this buffer is done */
159 
160 	int		cb_sgcnt;	/* scatter/gather segment count */
161 #define	CCD_SGMAX	(MAXBSIZE >> PAGE_SHIFT)
162 	struct ccdseg {
163 		caddr_t	cs_sgaddr;	/* scatter/gather segment addresses */
164 		long	cs_sglen;	/* scatter/gather segment lengths */
165 	} cb_sg[1];
166 };
167 
168 /* called by main() at boot time */
169 void	ccdattach(int);
170 
171 /* called by biodone() at interrupt time */
172 void	ccdiodone(struct buf *);
173 int	ccdsize(dev_t);
174 
175 void	ccdstart(struct ccd_softc *, struct buf *);
176 void	ccdinterleave(struct ccd_softc *);
177 void	ccdintr(struct ccd_softc *, struct buf *);
178 int	ccdinit(struct ccddevice *, char **, struct proc *);
179 int	ccdlookup(char *, struct proc *p, struct vnode **);
180 long	ccdbuffer(struct ccd_softc *, struct buf *, daddr_t, caddr_t,
181     long, struct ccdbuf **, int);
182 void	ccdgetdisklabel(dev_t, struct ccd_softc *, struct disklabel *,
183     struct cpu_disklabel *, int);
184 void	ccdmakedisklabel(struct ccd_softc *);
185 int	ccdlock(struct ccd_softc *);
186 void	ccdunlock(struct ccd_softc *);
187 INLINE struct ccdbuf *getccdbuf(void);
188 INLINE void putccdbuf(struct ccdbuf *);
189 
190 #ifdef DEBUG
191 void	printiinfo(struct ccdiinfo *);
192 #endif
193 
194 /* Non-private for the benefit of libkvm. */
195 struct	ccd_softc *ccd_softc;
196 struct	ccddevice *ccddevs;
197 int	numccd = 0;
198 int	ccdbufsizeof;
199 
200 /*
201  * A separate map so that locking on kernel_map won't happen in interrupts
202  * (XXX due to fragmentation this might fail easy and panic the kernel)
203  */
204 struct vm_map *ccdmap;
205 
206 /*
207  * Set when a process need some kvm.
208  * XXX should we fallback to old I/O policy instead when out of ccd kvm?
209  */
210 int ccd_need_kvm = 0;
211 
212 /*
213  * struct ccdbuf allocator
214  */
215 struct pool	ccdbufpl;
216 
217 /*
218  * Manage the ccd buffer structures.
219  */
220 INLINE struct ccdbuf *
getccdbuf(void)221 getccdbuf(void)
222 {
223 	struct ccdbuf *cbp;
224 
225 	if ((cbp = pool_get(&ccdbufpl, PR_WAITOK)))
226 		bzero(cbp, ccdbufsizeof);
227 	return (cbp);
228 }
229 
230 INLINE void
putccdbuf(struct ccdbuf * cbp)231 putccdbuf(struct ccdbuf *cbp)
232 {
233 	pool_put(&ccdbufpl, cbp);
234 }
235 
236 /*
237  * Called by main() during pseudo-device attachment.  All we need
238  * to do is allocate enough space for devices to be configured later.
239  */
240 void
ccdattach(int num)241 ccdattach(int num)
242 {
243 	if (num <= 0) {
244 #ifdef DIAGNOSTIC
245 		panic("ccdattach: count <= 0");
246 #endif
247 		return;
248 	}
249 
250 	ccd_softc = (struct ccd_softc *)malloc(num * sizeof(struct ccd_softc),
251 	    M_DEVBUF, M_NOWAIT);
252 	ccddevs = (struct ccddevice *)malloc(num * sizeof(struct ccddevice),
253 	    M_DEVBUF, M_NOWAIT);
254 	if ((ccd_softc == NULL) || (ccddevs == NULL)) {
255 		printf("WARNING: no memory for concatenated disks\n");
256 		if (ccd_softc != NULL)
257 			free(ccd_softc, M_DEVBUF);
258 		if (ccddevs != NULL)
259 			free(ccddevs, M_DEVBUF);
260 		return;
261 	}
262 	numccd = num;
263 	bzero(ccd_softc, num * sizeof(struct ccd_softc));
264 	bzero(ccddevs, num * sizeof(struct ccddevice));
265 
266 	ccdbufsizeof = sizeof(struct ccdbuf) +
267 	    (CCD_SGMAX - 1) * sizeof(struct ccdseg);
268 	pool_init(&ccdbufpl, ccdbufsizeof, 0, 0, 0, "ccdbufpl", NULL);
269 	pool_setlowat(&ccdbufpl, 16);
270 	pool_sethiwat(&ccdbufpl, 1024);
271 }
272 
273 int
ccdinit(struct ccddevice * ccd,char ** cpaths,struct proc * p)274 ccdinit(struct ccddevice *ccd, char **cpaths, struct proc *p)
275 {
276 	struct ccd_softc *cs = &ccd_softc[ccd->ccd_unit];
277 	struct ccdcinfo *ci = NULL;
278 	size_t size;
279 	int ix, rpm;
280 	struct vnode *vp;
281 	struct vattr va;
282 	size_t minsize;
283 	int maxsecsize;
284 	struct partinfo dpart;
285 	struct ccdgeom *ccg = &cs->sc_geom;
286 	char tmppath[MAXPATHLEN];
287 	int error;
288 
289 #ifdef DEBUG
290 	if (ccddebug & (CCDB_FOLLOW|CCDB_INIT))
291 		printf("ccdinit: unit %d cflags %b\n",
292 		    ccd->ccd_unit, ccd->ccd_flags, CCDF_BITS);
293 #endif
294 
295 	cs->sc_size = 0;
296 	cs->sc_ileave = ccd->ccd_interleave;
297 	cs->sc_nccdisks = ccd->ccd_ndev;
298 	if (snprintf(cs->sc_xname, sizeof(cs->sc_xname), "ccd%d",
299 	    ccd->ccd_unit) >= sizeof(cs->sc_xname)) {
300 		printf("ccdinit: device name too long.\n");
301 		return(ENXIO);
302 	}
303 
304 	/* Allocate space for the component info. */
305 	cs->sc_cinfo = malloc(cs->sc_nccdisks * sizeof(struct ccdcinfo),
306 	    M_DEVBUF, M_WAITOK);
307 	bzero(cs->sc_cinfo, cs->sc_nccdisks * sizeof(struct ccdcinfo));
308 
309 	/*
310 	 * Verify that each component piece exists and record
311 	 * relevant information about it.
312 	 */
313 	maxsecsize = 0;
314 	minsize = 0;
315 	rpm = 0;
316 	for (ix = 0; ix < cs->sc_nccdisks; ix++) {
317 		vp = ccd->ccd_vpp[ix];
318 		ci = &cs->sc_cinfo[ix];
319 		ci->ci_vp = vp;
320 
321 		/*
322 		 * Copy in the pathname of the component.
323 		 */
324 		bzero(tmppath, sizeof(tmppath));	/* sanity */
325 		error = copyinstr(cpaths[ix], tmppath,
326 		    MAXPATHLEN, &ci->ci_pathlen);
327 		if (error) {
328 #ifdef DEBUG
329 			if (ccddebug & (CCDB_FOLLOW|CCDB_INIT))
330 				printf("%s: can't copy path, error = %d\n",
331 				    cs->sc_xname, error);
332 #endif
333 			free(cs->sc_cinfo, M_DEVBUF);
334 			return (error);
335 		}
336 		ci->ci_path = malloc(ci->ci_pathlen, M_DEVBUF, M_WAITOK);
337 		bcopy(tmppath, ci->ci_path, ci->ci_pathlen);
338 
339 		/*
340 		 * XXX: Cache the component's dev_t.
341 		 */
342 		if ((error = VOP_GETATTR(vp, &va, p->p_ucred, p)) != 0) {
343 #ifdef DEBUG
344 			if (ccddebug & (CCDB_FOLLOW|CCDB_INIT))
345 				printf("%s: %s: getattr failed %s = %d\n",
346 				    cs->sc_xname, ci->ci_path,
347 				    "error", error);
348 #endif
349 			free(ci->ci_path, M_DEVBUF);
350 			free(cs->sc_cinfo, M_DEVBUF);
351 			return (error);
352 		}
353 		ci->ci_dev = va.va_rdev;
354 
355 		/*
356 		 * Get partition information for the component.
357 		 */
358 		error = VOP_IOCTL(vp, DIOCGPART, (caddr_t)&dpart,
359 		    FREAD, p->p_ucred, p);
360 		if (error) {
361 #ifdef DEBUG
362 			if (ccddebug & (CCDB_FOLLOW|CCDB_INIT))
363 				printf("%s: %s: ioctl failed, error = %d\n",
364 				    cs->sc_xname, ci->ci_path, error);
365 #endif
366 			free(ci->ci_path, M_DEVBUF);
367 			free(cs->sc_cinfo, M_DEVBUF);
368 			return (error);
369 		}
370 		if (dpart.part->p_fstype == FS_CCD ||
371 		    dpart.part->p_fstype == FS_BSDFFS) {
372 			maxsecsize =
373 			    ((dpart.disklab->d_secsize > maxsecsize) ?
374 			    dpart.disklab->d_secsize : maxsecsize);
375 			size = dpart.part->p_size;
376 		} else {
377 #ifdef DEBUG
378 			if (ccddebug & (CCDB_FOLLOW|CCDB_INIT))
379 				printf("%s: %s: incorrect partition type\n",
380 				    cs->sc_xname, ci->ci_path);
381 #endif
382 			free(ci->ci_path, M_DEVBUF);
383 			free(cs->sc_cinfo, M_DEVBUF);
384 			return (EFTYPE);
385 		}
386 
387 		/*
388 		 * Calculate the size, truncating to an interleave
389 		 * boundary if necessary.
390 		 */
391 		if (cs->sc_ileave > 1)
392 			size -= size % cs->sc_ileave;
393 
394 		if (size == 0) {
395 #ifdef DEBUG
396 			if (ccddebug & (CCDB_FOLLOW|CCDB_INIT))
397 				printf("%s: %s: size == 0\n",
398 				    cs->sc_xname, ci->ci_path);
399 #endif
400 			free(ci->ci_path, M_DEVBUF);
401 			free(cs->sc_cinfo, M_DEVBUF);
402 			return (ENODEV);
403 		}
404 
405 		if (minsize == 0 || size < minsize)
406 			minsize = size;
407 		ci->ci_size = size;
408 		cs->sc_size += size;
409 		rpm += dpart.disklab->d_rpm;
410 	}
411 	ccg->ccg_rpm = rpm / cs->sc_nccdisks;
412 
413 	/*
414 	 * Don't allow the interleave to be smaller than
415 	 * the biggest component sector.
416 	 */
417 	if ((cs->sc_ileave > 0) &&
418 	    (cs->sc_ileave < (maxsecsize / DEV_BSIZE))) {
419 #ifdef DEBUG
420 		if (ccddebug & (CCDB_FOLLOW|CCDB_INIT))
421 			printf("%s: interleave must be at least %d\n",
422 			    cs->sc_xname, (maxsecsize / DEV_BSIZE));
423 #endif
424 		free(ci->ci_path, M_DEVBUF);
425 		free(cs->sc_cinfo, M_DEVBUF);
426 		return (EINVAL);
427 	}
428 
429 	/*
430 	 * Mirroring support requires uniform interleave and
431 	 * and even number of components.
432 	 */
433 	if (ccd->ccd_flags & CCDF_MIRROR) {
434 		ccd->ccd_flags |= CCDF_UNIFORM;
435 		if (cs->sc_ileave == 0) {
436 #ifdef DEBUG
437 			if (ccddebug & (CCDB_FOLLOW|CCDB_INIT))
438 			printf("%s: mirroring requires interleave\n",
439 			    cs->sc_xname);
440 #endif
441 			free(ci->ci_path, M_DEVBUF);
442 			free(cs->sc_cinfo, M_DEVBUF);
443 			return (EINVAL);
444 		}
445 		if (cs->sc_nccdisks % 2) {
446 #ifdef DEBUG
447 			if (ccddebug & (CCDB_FOLLOW|CCDB_INIT))
448 			printf("%s: mirroring requires even # of components\n",
449 			    cs->sc_xname);
450 #endif
451 			free(ci->ci_path, M_DEVBUF);
452 			free(cs->sc_cinfo, M_DEVBUF);
453 			return (EINVAL);
454 		}
455 	}
456 
457 	/*
458 	 * If uniform interleave is desired set all sizes to that of
459 	 * the smallest component.
460 	 */
461 	ccg->ccg_ntracks = cs->sc_nccunits = cs->sc_nccdisks;
462 	if (ccd->ccd_flags & CCDF_UNIFORM) {
463 		for (ci = cs->sc_cinfo;
464 		     ci < &cs->sc_cinfo[cs->sc_nccdisks]; ci++)
465 			ci->ci_size = minsize;
466 
467 		if (ccd->ccd_flags & CCDF_MIRROR)
468 			cs->sc_nccunits = ccg->ccg_ntracks /= 2;
469 		cs->sc_size = ccg->ccg_ntracks * minsize;
470 	}
471 
472 	cs->sc_cflags = ccd->ccd_flags;	/* So we can find out later... */
473 
474 	/*
475 	 * Construct the interleave table.
476 	 */
477 	ccdinterleave(cs);
478 
479 	/*
480 	 * Create pseudo-geometry based on 1MB cylinders.  It's
481 	 * pretty close.
482 	 */
483 	ccg->ccg_secsize = DEV_BSIZE;
484 	ccg->ccg_nsectors = cs->sc_ileave? cs->sc_ileave :
485 	    1024 * (1024 / ccg->ccg_secsize);
486 	ccg->ccg_ncylinders = cs->sc_size / ccg->ccg_ntracks /
487 	    ccg->ccg_nsectors;
488 
489 	cs->sc_flags |= CCDF_INITED;
490 
491 	return (0);
492 }
493 
494 void
ccdinterleave(struct ccd_softc * cs)495 ccdinterleave(struct ccd_softc *cs)
496 {
497 	struct ccdcinfo *ci, *smallci;
498 	struct ccdiinfo *ii;
499 	daddr_t bn, lbn;
500 	int ix;
501 	u_long size;
502 
503 #ifdef DEBUG
504 	if (ccddebug & CCDB_INIT)
505 		printf("ccdinterleave(%p): ileave %d\n", cs, cs->sc_ileave);
506 #endif
507 	/*
508 	 * Allocate an interleave table.
509 	 * Chances are this is too big, but we don't care.
510 	 */
511 	size = (cs->sc_nccdisks + 1) * sizeof(struct ccdiinfo);
512 	cs->sc_itable = (struct ccdiinfo *)malloc(size, M_DEVBUF, M_WAITOK);
513 	bzero((caddr_t)cs->sc_itable, size);
514 
515 	/*
516 	 * Trivial case: no interleave (actually interleave of disk size).
517 	 * Each table entry represents a single component in its entirety.
518 	 */
519 	if (cs->sc_ileave == 0) {
520 		bn = 0;
521 		ii = cs->sc_itable;
522 
523 		for (ix = 0; ix < cs->sc_nccdisks; ix++) {
524 			/* Allocate space for ii_index. */
525 			ii->ii_index = malloc(sizeof(int), M_DEVBUF, M_WAITOK);
526 			ii->ii_ndisk = 1;
527 			ii->ii_startblk = bn;
528 			ii->ii_startoff = 0;
529 			ii->ii_index[0] = ix;
530 			bn += cs->sc_cinfo[ix].ci_size;
531 			ii++;
532 		}
533 		ii->ii_ndisk = 0;
534 #ifdef DEBUG
535 		if (ccddebug & CCDB_INIT)
536 			printiinfo(cs->sc_itable);
537 #endif
538 		return;
539 	}
540 
541 	/*
542 	 * The following isn't fast or pretty; it doesn't have to be.
543 	 */
544 	size = 0;
545 	bn = lbn = 0;
546 	for (ii = cs->sc_itable; ; ii++) {
547 		/* Allocate space for ii_index. */
548 		ii->ii_index = malloc((sizeof(int) * cs->sc_nccdisks),
549 		    M_DEVBUF, M_WAITOK);
550 
551 		/*
552 		 * Locate the smallest of the remaining components
553 		 */
554 		smallci = NULL;
555 		for (ci = cs->sc_cinfo;
556 		    ci < &cs->sc_cinfo[cs->sc_nccdisks]; ci++)
557 			if (ci->ci_size > size &&
558 			    (smallci == NULL ||
559 			    ci->ci_size < smallci->ci_size))
560 				smallci = ci;
561 
562 		/*
563 		 * Nobody left, all done
564 		 */
565 		if (smallci == NULL) {
566 			ii->ii_ndisk = 0;
567 			break;
568 		}
569 
570 		/*
571 		 * Record starting logical block and component offset
572 		 */
573 		ii->ii_startblk = bn / cs->sc_ileave;
574 		ii->ii_startoff = lbn;
575 
576 		/*
577 		 * Determine how many disks take part in this interleave
578 		 * and record their indices.
579 		 */
580 		ix = 0;
581 		for (ci = cs->sc_cinfo;
582 		    ci < &cs->sc_cinfo[cs->sc_nccunits]; ci++)
583 			if (ci->ci_size >= smallci->ci_size)
584 				ii->ii_index[ix++] = ci - cs->sc_cinfo;
585 		ii->ii_ndisk = ix;
586 		bn += ix * (smallci->ci_size - size);
587 		lbn = smallci->ci_size / cs->sc_ileave;
588 		size = smallci->ci_size;
589 	}
590 #ifdef DEBUG
591 	if (ccddebug & CCDB_INIT)
592 		printiinfo(cs->sc_itable);
593 #endif
594 }
595 
596 /* ARGSUSED */
597 int
ccdopen(dev_t dev,int flags,int fmt,struct proc * p)598 ccdopen(dev_t dev, int flags, int fmt, struct proc *p)
599 {
600 	int unit = ccdunit(dev);
601 	struct ccd_softc *cs;
602 	struct disklabel *lp;
603 	int error = 0, part, pmask;
604 
605 #ifdef DEBUG
606 	if (ccddebug & CCDB_FOLLOW)
607 		printf("ccdopen(%x, %x)\n", dev, flags);
608 #endif
609 	if (unit >= numccd)
610 		return (ENXIO);
611 	cs = &ccd_softc[unit];
612 
613 	if ((error = ccdlock(cs)) != 0)
614 		return (error);
615 
616 	lp = cs->sc_dkdev.dk_label;
617 
618 	part = DISKPART(dev);
619 	pmask = (1 << part);
620 
621 	/*
622 	 * If we're initialized, check to see if there are any other
623 	 * open partitions.  If not, then it's safe to update
624 	 * the in-core disklabel.
625 	 */
626 	if ((cs->sc_flags & CCDF_INITED) && (cs->sc_dkdev.dk_openmask == 0))
627 		ccdgetdisklabel(dev, cs, lp, cs->sc_dkdev.dk_cpulabel, 0);
628 
629 	/* Check that the partition exists. */
630 	if (part != RAW_PART) {
631 		if (((cs->sc_flags & CCDF_INITED) == 0) ||
632 		    ((part >= lp->d_npartitions) ||
633 		    (lp->d_partitions[part].p_fstype == FS_UNUSED))) {
634 			error = ENXIO;
635 			goto done;
636 		}
637 	}
638 
639 	/* Prevent our unit from being unconfigured while open. */
640 	switch (fmt) {
641 	case S_IFCHR:
642 		cs->sc_dkdev.dk_copenmask |= pmask;
643 		break;
644 
645 	case S_IFBLK:
646 		cs->sc_dkdev.dk_bopenmask |= pmask;
647 		break;
648 	}
649 	cs->sc_dkdev.dk_openmask =
650 	    cs->sc_dkdev.dk_copenmask | cs->sc_dkdev.dk_bopenmask;
651 
652  done:
653 	ccdunlock(cs);
654 	return (error);
655 }
656 
657 /* ARGSUSED */
658 int
ccdclose(dev_t dev,int flags,int fmt,struct proc * p)659 ccdclose(dev_t dev, int flags, int fmt, struct proc *p)
660 {
661 	int unit = ccdunit(dev);
662 	struct ccd_softc *cs;
663 	int error = 0, part;
664 
665 #ifdef DEBUG
666 	if (ccddebug & CCDB_FOLLOW)
667 		printf("ccdclose(%x, %x)\n", dev, flags);
668 #endif
669 
670 	if (unit >= numccd)
671 		return (ENXIO);
672 	cs = &ccd_softc[unit];
673 
674 	if ((error = ccdlock(cs)) != 0)
675 		return (error);
676 
677 	part = DISKPART(dev);
678 
679 	/* ...that much closer to allowing unconfiguration... */
680 	switch (fmt) {
681 	case S_IFCHR:
682 		cs->sc_dkdev.dk_copenmask &= ~(1 << part);
683 		break;
684 
685 	case S_IFBLK:
686 		cs->sc_dkdev.dk_bopenmask &= ~(1 << part);
687 		break;
688 	}
689 	cs->sc_dkdev.dk_openmask =
690 	    cs->sc_dkdev.dk_copenmask | cs->sc_dkdev.dk_bopenmask;
691 
692 	ccdunlock(cs);
693 	return (0);
694 }
695 
696 void
ccdstrategy(struct buf * bp)697 ccdstrategy(struct buf *bp)
698 {
699 	int unit = ccdunit(bp->b_dev);
700 	struct ccd_softc *cs = &ccd_softc[unit];
701 	int s;
702 	int wlabel;
703 	struct disklabel *lp;
704 
705 #ifdef DEBUG
706 	if (ccddebug & CCDB_FOLLOW)
707 		printf("ccdstrategy(%p): unit %d\n", bp, unit);
708 #endif
709 	if ((cs->sc_flags & CCDF_INITED) == 0) {
710 		bp->b_error = ENXIO;
711 		bp->b_resid = bp->b_bcount;
712 		bp->b_flags |= B_ERROR;
713 		goto done;
714 	}
715 
716 	/* If it's a nil transfer, wake up the top half now. */
717 	if (bp->b_bcount == 0)
718 		goto done;
719 
720 	lp = cs->sc_dkdev.dk_label;
721 
722 	/*
723 	 * Do bounds checking and adjust transfer.  If there's an
724 	 * error, the bounds check will flag that for us.
725 	 */
726 	wlabel = cs->sc_flags & (CCDF_WLABEL|CCDF_LABELLING);
727 	if (DISKPART(bp->b_dev) != RAW_PART &&
728 	    bounds_check_with_label(bp, lp, cs->sc_dkdev.dk_cpulabel,
729 	    wlabel) <= 0)
730 		goto done;
731 
732 	bp->b_resid = bp->b_bcount;
733 
734 	/*
735 	 * "Start" the unit.
736 	 */
737 	s = splbio();
738 	ccdstart(cs, bp);
739 	splx(s);
740 	return;
741 done:
742 	s = splbio();
743 	biodone(bp);
744 	splx(s);
745 }
746 
747 void
ccdstart(struct ccd_softc * cs,struct buf * bp)748 ccdstart(struct ccd_softc *cs, struct buf *bp)
749 {
750 	long bcount, rcount;
751 	struct ccdbuf **cbpp, *cbp;
752 	caddr_t addr;
753 	daddr_t bn;
754 	struct partition *pp;
755 	int i, old_io = cs->sc_cflags & CCDF_OLD;
756 
757 #ifdef DEBUG
758 	if (ccddebug & CCDB_FOLLOW)
759 		printf("ccdstart(%p, %p, %s)\n", cs, bp,
760 		    bp->b_flags & B_READ? "read" : "write");
761 #endif
762 
763 	/* Instrumentation. */
764 	disk_busy(&cs->sc_dkdev);
765 
766 	/*
767 	 * Translate the partition-relative block number to an absolute.
768 	 */
769 	bn = bp->b_blkno;
770 	if (DISKPART(bp->b_dev) != RAW_PART) {
771 		pp = &cs->sc_dkdev.dk_label->d_partitions[DISKPART(bp->b_dev)];
772 		bn += pp->p_offset;
773 	}
774 
775 	/*
776 	 * Allocate component buffers
777 	 */
778 	cbpp = malloc(2 * cs->sc_nccdisks * sizeof(struct ccdbuf *), M_DEVBUF,
779 	    M_WAITOK);
780 	bzero(cbpp, 2 * cs->sc_nccdisks * sizeof(struct ccdbuf *));
781 	addr = bp->b_data;
782 	old_io = old_io || ((vaddr_t)addr & PAGE_MASK);
783 	for (bcount = bp->b_bcount; bcount > 0; bcount -= rcount) {
784 		rcount = ccdbuffer(cs, bp, bn, addr, bcount, cbpp, old_io);
785 
786 		/*
787 		 * This is the old, slower, but less restrictive, mode of
788 		 * operation.  It allows interleaves which are not multiples
789 		 * of PAGE_SIZE and mirroring.
790 		 */
791 		if (old_io) {
792 			if ((cbpp[0]->cb_buf.b_flags & B_READ) == 0)
793 				cbpp[0]->cb_buf.b_vp->v_numoutput++;
794 			VOP_STRATEGY(&cbpp[0]->cb_buf);
795 
796 			if ((cs->sc_cflags & CCDF_MIRROR) &&
797 			    ((cbpp[0]->cb_buf.b_flags & B_READ) == 0)) {
798 				cbpp[1]->cb_buf.b_vp->v_numoutput++;
799 				VOP_STRATEGY(&cbpp[1]->cb_buf);
800 			}
801 		}
802 
803 		bn += btodb(rcount);
804 		addr += rcount;
805 	}
806 
807 	/* The new leaner mode of operation */
808 	if (!old_io)
809 		/*
810 		 * Fire off the requests
811 		 */
812 		for (i = 0; i < 2*cs->sc_nccdisks; i++) {
813 			cbp = cbpp[i];
814 			if (cbp) {
815 				if ((cbp->cb_buf.b_flags & B_READ) == 0)
816 					cbp->cb_buf.b_vp->v_numoutput++;
817 				VOP_STRATEGY(&cbp->cb_buf);
818 			}
819 		}
820 	free(cbpp, M_DEVBUF);
821 }
822 
823 /*
824  * Build a component buffer header.
825  */
826 long
ccdbuffer(struct ccd_softc * cs,struct buf * bp,daddr_t bn,caddr_t addr,long bcount,struct ccdbuf ** cbpp,int old_io)827 ccdbuffer(struct ccd_softc *cs, struct buf *bp, daddr_t bn, caddr_t addr,
828     long bcount, struct ccdbuf **cbpp, int old_io)
829 {
830 	struct ccdcinfo *ci, *ci2 = NULL;
831 	struct ccdbuf *cbp;
832 	daddr_t cbn, cboff, sblk;
833 	int ccdisk, ccdisk2, off;
834 	long old_bcount, cnt;
835 	struct ccdiinfo *ii;
836 	struct buf *nbp;
837 
838 #ifdef DEBUG
839 	if (ccddebug & CCDB_IO)
840 		printf("ccdbuffer(%p, %p, %d, %p, %ld, %p)\n",
841 		    cs, bp, bn, addr, bcount, cbpp);
842 #endif
843 
844 	/*
845 	 * Determine which component bn falls in.
846 	 */
847 	cbn = bn;
848 	cboff = 0;
849 
850 	if (cs->sc_ileave == 0) {
851 		/*
852 		 * Serially concatenated
853 		 */
854 		sblk = 0;
855 		for (ccdisk = 0, ci = &cs->sc_cinfo[ccdisk];
856 		    cbn >= sblk + ci->ci_size;
857 		    ccdisk++, ci = &cs->sc_cinfo[ccdisk])
858 			sblk += ci->ci_size;
859 		cbn -= sblk;
860 	} else {
861 		/*
862 		 * Interleaved
863 		 */
864 		cboff = cbn % cs->sc_ileave;
865 		cbn /= cs->sc_ileave;
866 		for (ii = cs->sc_itable; ii->ii_ndisk; ii++)
867 			if (ii->ii_startblk > cbn)
868 				break;
869 		ii--;
870 		off = cbn - ii->ii_startblk;
871 		if (ii->ii_ndisk == 1) {
872 			ccdisk = ii->ii_index[0];
873 			cbn = ii->ii_startoff + off;
874 		} else {
875 			ccdisk = ii->ii_index[off % ii->ii_ndisk];
876 			cbn = ii->ii_startoff + off / ii->ii_ndisk;
877 		}
878 		if (cs->sc_cflags & CCDF_MIRROR) {
879 			/* Mirrored data */
880 			ccdisk2 = ccdisk + ii->ii_ndisk;
881 			ci2 = &cs->sc_cinfo[ccdisk2];
882 			/* spread the read over both parts */
883 			if (bp->b_flags & B_READ &&
884 			    bcount > bp->b_bcount / 2 &&
885 			    (!(ci2->ci_flags & CCIF_FAILED) ||
886 			      ci->ci_flags & CCIF_FAILED))
887 				ccdisk = ccdisk2;
888 		}
889 		cbn *= cs->sc_ileave;
890 		ci = &cs->sc_cinfo[ccdisk];
891 #ifdef DEBUG
892 		if (ccddebug & CCDB_IO)
893 			printf("ccdisk %d cbn %d ci %p ci2 %p\n",
894 			    ccdisk, cbn, ci, ci2);
895 #endif
896 	}
897 
898 	/* Limit the operation at next component border */
899 	if (cs->sc_ileave == 0)
900 		cnt = dbtob(ci->ci_size - cbn);
901 	else
902 		cnt = dbtob(cs->sc_ileave - cboff);
903 	if (cnt < bcount)
904 		bcount = cnt;
905 
906 	if (old_io || cbpp[ccdisk] == NULL) {
907 		/*
908 		 * Setup new component buffer.
909 		 */
910 		cbp = cbpp[old_io ? 0 : ccdisk] = getccdbuf();
911 		cbp->cb_flags = old_io ? CBF_OLD : 0;
912 		nbp = &cbp->cb_buf;
913 		nbp->b_flags = bp->b_flags | B_CALL;
914 		nbp->b_iodone = ccdiodone;
915 		nbp->b_proc = bp->b_proc;
916 		nbp->b_dev = ci->ci_dev;		/* XXX */
917 		nbp->b_blkno = cbn + cboff;
918 		nbp->b_vp = ci->ci_vp;
919 		nbp->b_bcount = bcount;
920 		LIST_INIT(&nbp->b_dep);
921 
922 		/*
923 		 * context for ccdiodone
924 		 */
925 		cbp->cb_obp = bp;
926 		cbp->cb_sc = cs;
927 		cbp->cb_comp = ccdisk;
928 
929 		/* Deal with the different algorithms */
930 		if (old_io)
931 			nbp->b_data = addr;
932 		else {
933 			do {
934 				nbp->b_data = (caddr_t) uvm_km_valloc(ccdmap,
935 				    bp->b_bcount);
936 
937 				/*
938 				 * XXX Instead of sleeping, we might revert
939 				 * XXX to old I/O policy for this buffer set.
940 				 */
941 				if (nbp->b_data == NULL) {
942 					ccd_need_kvm++;
943 					tsleep(ccdmap, PRIBIO, "ccdbuffer", 0);
944 				}
945 			} while (nbp->b_data == NULL);
946 			cbp->cb_sgcnt = 0;
947 			old_bcount = 0;
948 		}
949 
950 		/*
951 		 * Mirrors have an additional write operation that is nearly
952 		 * identical to the first.
953 		 */
954 		if ((cs->sc_cflags & CCDF_MIRROR) &&
955 		    !(ci2->ci_flags & CCIF_FAILED) &&
956 		    ((cbp->cb_buf.b_flags & B_READ) == 0)) {
957 			struct ccdbuf *cbp2;
958 			cbpp[old_io? 1 : ccdisk2] = cbp2 = getccdbuf();
959 			*cbp2 = *cbp;
960 			cbp2->cb_flags = CBF_MIRROR | (old_io ? CBF_OLD : 0);
961 			cbp2->cb_buf.b_dev = ci2->ci_dev;	/* XXX */
962 			cbp2->cb_buf.b_vp = ci2->ci_vp;
963 			LIST_INIT(&cbp2->cb_buf.b_dep);
964 			cbp2->cb_comp = ccdisk2;
965 			cbp2->cb_dep = cbp;
966 			cbp->cb_dep = cbp2;
967 		}
968 	} else {
969 		/*
970 		 * Continue on an already started component buffer
971 		 */
972 		cbp = cbpp[ccdisk];
973 		nbp = &cbp->cb_buf;
974 
975 		/*
976 		 * Map the new pages at the end of the buffer.
977 		 */
978 		old_bcount = nbp->b_bcount;
979 		nbp->b_bcount += bcount;
980 	}
981 
982 	if (!old_io) {
983 #ifdef DEBUG
984 		if (ccddebug & CCDB_IO)
985 			printf("ccdbuffer: sg %d (%p/%x) off %x\n",
986 			    cbp->cb_sgcnt, addr, bcount, old_bcount);
987 #endif
988 		pagemove(addr, nbp->b_data + old_bcount, round_page(bcount));
989 		nbp->b_bufsize += round_page(bcount);
990 		cbp->cb_sg[cbp->cb_sgcnt].cs_sgaddr = addr;
991 		cbp->cb_sg[cbp->cb_sgcnt].cs_sglen = bcount;
992 		cbp->cb_sgcnt++;
993 	}
994 
995 #ifdef DEBUG
996 	if (ccddebug & CCDB_IO)
997 		printf(" dev %x(u%d): cbp %p bn %d addr %p bcnt %ld\n",
998 		    ci->ci_dev, ci-cs->sc_cinfo, cbp, bp->b_blkno,
999 		    bp->b_data, bp->b_bcount);
1000 #endif
1001 
1002 	return (bcount);
1003 }
1004 
1005 void
ccdintr(struct ccd_softc * cs,struct buf * bp)1006 ccdintr(struct ccd_softc *cs, struct buf *bp)
1007 {
1008 
1009 	splassert(IPL_BIO);
1010 
1011 #ifdef DEBUG
1012 	if (ccddebug & CCDB_FOLLOW)
1013 		printf("ccdintr(%p, %p)\n", cs, bp);
1014 #endif
1015 	/*
1016 	 * Request is done for better or worse, wakeup the top half.
1017 	 */
1018 	if (bp->b_flags & B_ERROR)
1019 		bp->b_resid = bp->b_bcount;
1020 	disk_unbusy(&cs->sc_dkdev, (bp->b_bcount - bp->b_resid),
1021 	    (bp->b_flags & B_READ));
1022 	biodone(bp);
1023 }
1024 
1025 /*
1026  * Called at interrupt time.
1027  * Mark the component as done and if all components are done,
1028  * take a ccd interrupt.
1029  */
1030 void
ccdiodone(struct buf * vbp)1031 ccdiodone(struct buf *vbp)
1032 {
1033 	struct ccdbuf *cbp = (struct ccdbuf *)vbp;
1034 	struct buf *bp = cbp->cb_obp;
1035 	struct ccd_softc *cs = cbp->cb_sc;
1036 	int old_io = cbp->cb_flags & CBF_OLD;
1037 	int i;
1038 	long count = bp->b_bcount, off;
1039 	char *comptype;
1040 
1041 	splassert(IPL_BIO);
1042 
1043 #ifdef DEBUG
1044 	if (ccddebug & CCDB_FOLLOW)
1045 		printf("ccdiodone(%p)\n", cbp);
1046 	if (ccddebug & CCDB_IO) {
1047 		if (cbp->cb_flags & CBF_MIRROR)
1048 			printf("ccdiodone: mirror component\n");
1049 		else
1050 			printf("ccdiodone: bp %p bcount %ld resid %ld\n",
1051 			    bp, bp->b_bcount, bp->b_resid);
1052 		printf(" dev %x(u%d), cbp %p bn %d addr %p bcnt %ld\n",
1053 		    vbp->b_dev, cbp->cb_comp, cbp, vbp->b_blkno,
1054 		    vbp->b_data, vbp->b_bcount);
1055 	}
1056 #endif
1057 
1058 	if (vbp->b_flags & B_ERROR) {
1059 		cs->sc_cinfo[cbp->cb_comp].ci_flags |= CCIF_FAILED;
1060 		if (cbp->cb_flags & CBF_MIRROR)
1061 			comptype = " (mirror)";
1062 		else {
1063 			bp->b_flags |= B_ERROR;
1064 			bp->b_error = vbp->b_error ?
1065 			    vbp->b_error : EIO;
1066 			comptype = "";
1067 		}
1068 
1069 		printf("%s: error %d on component %d%s\n",
1070 		    cs->sc_xname, bp->b_error, cbp->cb_comp, comptype);
1071 	}
1072 	cbp->cb_flags |= CBF_DONE;
1073 
1074 	if (cbp->cb_dep &&
1075 	    (cbp->cb_dep->cb_flags & CBF_DONE) != (cbp->cb_flags & CBF_DONE))
1076 		return;
1077 
1078 	if (cbp->cb_flags & CBF_MIRROR &&
1079 	    !(cbp->cb_dep->cb_flags & CBF_MIRROR)) {
1080 		cbp = cbp->cb_dep;
1081 		vbp = (struct buf *)cbp;
1082 	}
1083 
1084 	if (!old_io) {
1085 		/*
1086 		 * Gather all the pieces and put them where they should be.
1087 		 */
1088 		for (i = 0, off = 0; i < cbp->cb_sgcnt; i++) {
1089 #ifdef DEBUG
1090 			if (ccddebug & CCDB_IO)
1091 				printf("ccdiodone: sg %d (%p/%x) off %x\n", i,
1092 				    cbp->cb_sg[i].cs_sgaddr,
1093 				    cbp->cb_sg[i].cs_sglen, off);
1094 #endif
1095 			pagemove(vbp->b_data + off, cbp->cb_sg[i].cs_sgaddr,
1096 			    round_page(cbp->cb_sg[i].cs_sglen));
1097 			off += cbp->cb_sg[i].cs_sglen;
1098 		}
1099 
1100 		uvm_km_free(ccdmap, (vaddr_t)vbp->b_data, count);
1101 		if (ccd_need_kvm) {
1102 			ccd_need_kvm = 0;
1103 			wakeup(ccdmap);
1104 		}
1105 	}
1106 	count = vbp->b_bcount;
1107 
1108 	putccdbuf(cbp);
1109 	if (cbp->cb_dep)
1110 		putccdbuf(cbp->cb_dep);
1111 
1112 	/*
1113 	 * If all done, "interrupt".
1114 	 *
1115 	 * Note that mirror component buffers aren't counted against
1116 	 * the original I/O buffer.
1117 	 */
1118 	if (count > bp->b_resid)
1119 		panic("ccdiodone: count");
1120 	bp->b_resid -= count;
1121 	if (bp->b_resid == 0)
1122 		ccdintr(cs, bp);
1123 }
1124 
1125 /* ARGSUSED */
1126 int
ccdread(dev_t dev,struct uio * uio,int flags)1127 ccdread(dev_t dev, struct uio *uio, int flags)
1128 {
1129 	int unit = ccdunit(dev);
1130 	struct ccd_softc *cs;
1131 
1132 #ifdef DEBUG
1133 	if (ccddebug & CCDB_FOLLOW)
1134 		printf("ccdread(%x, %p)\n", dev, uio);
1135 #endif
1136 	if (unit >= numccd)
1137 		return (ENXIO);
1138 	cs = &ccd_softc[unit];
1139 
1140 	if ((cs->sc_flags & CCDF_INITED) == 0)
1141 		return (ENXIO);
1142 
1143 	/*
1144 	 * XXX: It's not clear that using minphys() is completely safe,
1145 	 * in particular, for raw I/O.  Underlying devices might have some
1146 	 * non-obvious limits, because of the copy to user-space.
1147 	 */
1148 	return (physio(ccdstrategy, NULL, dev, B_READ, minphys, uio));
1149 }
1150 
1151 /* ARGSUSED */
1152 int
ccdwrite(dev_t dev,struct uio * uio,int flags)1153 ccdwrite(dev_t dev, struct uio *uio, int flags)
1154 {
1155 	int unit = ccdunit(dev);
1156 	struct ccd_softc *cs;
1157 
1158 #ifdef DEBUG
1159 	if (ccddebug & CCDB_FOLLOW)
1160 		printf("ccdwrite(%x, %p)\n", dev, uio);
1161 #endif
1162 	if (unit >= numccd)
1163 		return (ENXIO);
1164 	cs = &ccd_softc[unit];
1165 
1166 	if ((cs->sc_flags & CCDF_INITED) == 0)
1167 		return (ENXIO);
1168 
1169 	/*
1170 	 * XXX: It's not clear that using minphys() is completely safe,
1171 	 * in particular, for raw I/O.  Underlying devices might have some
1172 	 * non-obvious limits, because of the copy to user-space.
1173 	 */
1174 	return (physio(ccdstrategy, NULL, dev, B_WRITE, minphys, uio));
1175 }
1176 
1177 int
ccdioctl(dev_t dev,u_long cmd,caddr_t data,int flag,struct proc * p)1178 ccdioctl(dev_t dev, u_long cmd, caddr_t data, int flag, struct proc *p)
1179 {
1180 	int unit = ccdunit(dev);
1181 	int i, j, lookedup = 0, error = 0;
1182 	int part, pmask, s;
1183 	struct ccd_softc *cs;
1184 	struct ccd_ioctl *ccio = (struct ccd_ioctl *)data;
1185 	struct ccddevice ccd;
1186 	char **cpp;
1187 	struct vnode **vpp;
1188 	vaddr_t min, max;
1189 
1190 	if (unit >= numccd)
1191 		return (ENXIO);
1192 
1193 	cs = &ccd_softc[unit];
1194 	if (cmd != CCDIOCSET && !(cs->sc_flags & CCDF_INITED))
1195 		return (ENXIO);
1196 
1197 	/* access control */
1198 	switch (cmd) {
1199 	case CCDIOCSET:
1200 	case CCDIOCCLR:
1201 	case DIOCWDINFO:
1202 	case DIOCSDINFO:
1203 	case DIOCWLABEL:
1204 		if ((flag & FWRITE) == 0)
1205 			return (EBADF);
1206 	}
1207 
1208 	bzero(&ccd, sizeof(ccd));
1209 	switch (cmd) {
1210 	case CCDIOCSET:
1211 		if (cs->sc_flags & CCDF_INITED)
1212 			return (EBUSY);
1213 
1214 		if ((error = ccdlock(cs)) != 0)
1215 			return (error);
1216 
1217 		/* Fill in some important bits. */
1218 		ccd.ccd_unit = unit;
1219 		ccd.ccd_interleave = ccio->ccio_ileave;
1220 		ccd.ccd_flags = ccio->ccio_flags & CCDF_USERMASK;
1221 
1222 		/* XXX the new code is unstable still */
1223 		ccd.ccd_flags |= CCDF_OLD;
1224 
1225 		/*
1226 		 * Interleaving which is not a multiple of the click size
1227 		 * must use the old I/O code (by design)
1228 		 */
1229 		if (ccio->ccio_ileave % (PAGE_SIZE / DEV_BSIZE) != 0)
1230 			ccd.ccd_flags |= CCDF_OLD;
1231 
1232 		/*
1233 		 * Allocate space for and copy in the array of
1234 		 * componet pathnames and device numbers.
1235 		 */
1236 		cpp = malloc(ccio->ccio_ndisks * sizeof(char *),
1237 		    M_DEVBUF, M_WAITOK);
1238 		vpp = malloc(ccio->ccio_ndisks * sizeof(struct vnode *),
1239 		    M_DEVBUF, M_WAITOK);
1240 
1241 		error = copyin((caddr_t)ccio->ccio_disks, (caddr_t)cpp,
1242 		    ccio->ccio_ndisks * sizeof(char **));
1243 		if (error) {
1244 			free(vpp, M_DEVBUF);
1245 			free(cpp, M_DEVBUF);
1246 			ccdunlock(cs);
1247 			return (error);
1248 		}
1249 
1250 #ifdef DEBUG
1251 		if (ccddebug & CCDB_INIT)
1252 			for (i = 0; i < ccio->ccio_ndisks; ++i)
1253 				printf("ccdioctl: component %d: %p\n",
1254 				    i, cpp[i]);
1255 #endif
1256 
1257 		for (i = 0; i < ccio->ccio_ndisks; ++i) {
1258 #ifdef DEBUG
1259 			if (ccddebug & CCDB_INIT)
1260 				printf("ccdioctl: lookedup = %d\n", lookedup);
1261 #endif
1262 			if ((error = ccdlookup(cpp[i], p, &vpp[i])) != 0) {
1263 				for (j = 0; j < lookedup; ++j)
1264 					(void)vn_close(vpp[j], FREAD|FWRITE,
1265 					    p->p_ucred, p);
1266 				free(vpp, M_DEVBUF);
1267 				free(cpp, M_DEVBUF);
1268 				ccdunlock(cs);
1269 				return (error);
1270 			}
1271 			++lookedup;
1272 		}
1273 		ccd.ccd_cpp = cpp;
1274 		ccd.ccd_vpp = vpp;
1275 		ccd.ccd_ndev = ccio->ccio_ndisks;
1276 
1277 		/*
1278 		 * Initialize the ccd.  Fills in the softc for us.
1279 		 */
1280 		if ((error = ccdinit(&ccd, cpp, p)) != 0) {
1281 			for (j = 0; j < lookedup; ++j)
1282 				(void)vn_close(vpp[j], FREAD|FWRITE,
1283 				    p->p_ucred, p);
1284 			bzero(&ccd_softc[unit], sizeof(struct ccd_softc));
1285 			free(vpp, M_DEVBUF);
1286 			free(cpp, M_DEVBUF);
1287 			ccdunlock(cs);
1288 			return (error);
1289 		}
1290 
1291 		/*
1292 		 * The ccd has been successfully initialized, so
1293 		 * we can place it into the array.  Don't try to
1294 		 * read the disklabel until the disk has been attached,
1295 		 * because space for the disklabel is allocated
1296 		 * in disk_attach();
1297 		 */
1298 		bcopy(&ccd, &ccddevs[unit], sizeof(ccd));
1299 		ccio->ccio_unit = unit;
1300 		ccio->ccio_size = cs->sc_size;
1301 
1302 		/*
1303 		 * If we use the optimized protocol we need some kvm space
1304 		 * for the component buffers.  Allocate it here.
1305 		 *
1306 		 * XXX I'd like to have a more dynamic way of acquiring kvm
1307 		 * XXX space, but that is problematic as we are not allowed
1308 		 * XXX to lock the kernel_map in interrupt context.  It is
1309 		 * XXX doable via a freelist implementation though.
1310 		 */
1311 		if (!ccdmap && !(ccd.ccd_flags & CCDF_OLD)) {
1312 			min = vm_map_min(kernel_map);
1313 			ccdmap = uvm_km_suballoc(kernel_map, &min, &max,
1314 			    CCD_CLUSTERS * MAXBSIZE, VM_MAP_INTRSAFE,
1315 			    FALSE, NULL);
1316 		}
1317 
1318 		/* Attach the disk. */
1319 		cs->sc_dkdev.dk_name = cs->sc_xname;
1320 		disk_attach(&cs->sc_dkdev);
1321 
1322 		/* Try and read the disklabel. */
1323 		ccdgetdisklabel(dev, cs, cs->sc_dkdev.dk_label,
1324 		    cs->sc_dkdev.dk_cpulabel, 0);
1325 
1326 		ccdunlock(cs);
1327 		break;
1328 
1329 	case CCDIOCCLR:
1330 		if ((error = ccdlock(cs)) != 0)
1331 			return (error);
1332 
1333 		/*
1334 		 * Don't unconfigure if any other partitions are open
1335 		 * or if both the character and block flavors of this
1336 		 * partition are open.
1337 		 */
1338 		part = DISKPART(dev);
1339 		pmask = (1 << part);
1340 		if ((cs->sc_dkdev.dk_openmask & ~pmask) ||
1341 		    ((cs->sc_dkdev.dk_bopenmask & pmask) &&
1342 		    (cs->sc_dkdev.dk_copenmask & pmask))) {
1343 			ccdunlock(cs);
1344 			return (EBUSY);
1345 		}
1346 
1347 		/*
1348 		 * Free ccd_softc information and clear entry.
1349 		 */
1350 
1351 		/* Close the components and free their pathnames. */
1352 		for (i = 0; i < cs->sc_nccdisks; ++i) {
1353 			/*
1354 			 * XXX: this close could potentially fail and
1355 			 * cause Bad Things.  Maybe we need to force
1356 			 * the close to happen?
1357 			 */
1358 #ifdef DEBUG
1359 			if (ccddebug & CCDB_VNODE)
1360 				vprint("CCDIOCCLR: vnode info",
1361 				    cs->sc_cinfo[i].ci_vp);
1362 #endif
1363 			(void)vn_close(cs->sc_cinfo[i].ci_vp, FREAD|FWRITE,
1364 			    p->p_ucred, p);
1365 			free(cs->sc_cinfo[i].ci_path, M_DEVBUF);
1366 		}
1367 
1368 		/* Free interleave index. */
1369 		for (i = 0; cs->sc_itable[i].ii_ndisk; ++i)
1370 			free(cs->sc_itable[i].ii_index, M_DEVBUF);
1371 
1372 		/* Free component info and interleave table. */
1373 		free(cs->sc_cinfo, M_DEVBUF);
1374 		free(cs->sc_itable, M_DEVBUF);
1375 		cs->sc_flags &= ~CCDF_INITED;
1376 
1377 		/*
1378 		 * Free ccddevice information and clear entry.
1379 		 */
1380 		free(ccddevs[unit].ccd_cpp, M_DEVBUF);
1381 		free(ccddevs[unit].ccd_vpp, M_DEVBUF);
1382 		bcopy(&ccd, &ccddevs[unit], sizeof(ccd));
1383 
1384 		/* Detatch the disk. */
1385 		disk_detach(&cs->sc_dkdev);
1386 
1387 		/* This must be atomic. */
1388 		s = splhigh();
1389 		ccdunlock(cs);
1390 		bzero(cs, sizeof(struct ccd_softc));
1391 		splx(s);
1392 		break;
1393 
1394 	case DIOCGPDINFO: {
1395 		struct cpu_disklabel osdep;
1396 
1397 		if ((error = ccdlock(cs)) != 0)
1398 			return (error);
1399 
1400 		ccdgetdisklabel(dev, cs, (struct disklabel *)data,
1401 		    &osdep, 1);
1402 
1403 		ccdunlock(cs);
1404 		break;
1405 	}
1406 
1407 	case DIOCGDINFO:
1408 		*(struct disklabel *)data = *(cs->sc_dkdev.dk_label);
1409 		break;
1410 
1411 	case DIOCGPART:
1412 		((struct partinfo *)data)->disklab = cs->sc_dkdev.dk_label;
1413 		((struct partinfo *)data)->part =
1414 		    &cs->sc_dkdev.dk_label->d_partitions[DISKPART(dev)];
1415 		break;
1416 
1417 	case DIOCWDINFO:
1418 	case DIOCSDINFO:
1419 		if ((error = ccdlock(cs)) != 0)
1420 			return (error);
1421 
1422 		cs->sc_flags |= CCDF_LABELLING;
1423 
1424 		error = setdisklabel(cs->sc_dkdev.dk_label,
1425 		    (struct disklabel *)data, 0, cs->sc_dkdev.dk_cpulabel);
1426 		if (error == 0) {
1427 			if (cmd == DIOCWDINFO)
1428 				error = writedisklabel(CCDLABELDEV(dev),
1429 				    ccdstrategy, cs->sc_dkdev.dk_label,
1430 				    cs->sc_dkdev.dk_cpulabel);
1431 		}
1432 
1433 		cs->sc_flags &= ~CCDF_LABELLING;
1434 
1435 		ccdunlock(cs);
1436 
1437 		if (error)
1438 			return (error);
1439 		break;
1440 
1441 	case DIOCWLABEL:
1442 		if (*(int *)data != 0)
1443 			cs->sc_flags |= CCDF_WLABEL;
1444 		else
1445 			cs->sc_flags &= ~CCDF_WLABEL;
1446 		break;
1447 
1448 	default:
1449 		return (ENOTTY);
1450 	}
1451 
1452 	return (0);
1453 }
1454 
1455 int
ccdsize(dev_t dev)1456 ccdsize(dev_t dev)
1457 {
1458 	struct ccd_softc *cs;
1459 	int part, size, unit;
1460 
1461 	unit = ccdunit(dev);
1462 	if (unit >= numccd)
1463 		return (-1);
1464 
1465 	cs = &ccd_softc[unit];
1466 	if ((cs->sc_flags & CCDF_INITED) == 0)
1467 		return (-1);
1468 
1469 	if (ccdopen(dev, 0, S_IFBLK, curproc))
1470 		return (-1);
1471 
1472 	part = DISKPART(dev);
1473 	if (cs->sc_dkdev.dk_label->d_partitions[part].p_fstype != FS_SWAP)
1474 		size = -1;
1475 	else
1476 		size = cs->sc_dkdev.dk_label->d_partitions[part].p_size;
1477 
1478 	if (ccdclose(dev, 0, S_IFBLK, curproc))
1479 		return (-1);
1480 
1481 	return (size);
1482 }
1483 
1484 int
ccddump(dev_t dev,daddr_t blkno,caddr_t va,size_t size)1485 ccddump(dev_t dev, daddr_t blkno, caddr_t va, size_t size)
1486 {
1487 
1488 	/* Not implemented. */
1489 	return ENXIO;
1490 }
1491 
1492 /*
1493  * Lookup the provided name in the filesystem.  If the file exists,
1494  * is a valid block device, and isn't being used by anyone else,
1495  * set *vpp to the file's vnode.
1496  */
1497 int
ccdlookup(char * path,struct proc * p,struct vnode ** vpp)1498 ccdlookup(char *path, struct proc *p, struct vnode **vpp)
1499 {
1500 	struct nameidata nd;
1501 	struct vnode *vp;
1502 	struct vattr va;
1503 	int error;
1504 
1505 	NDINIT(&nd, LOOKUP, FOLLOW, UIO_USERSPACE, path, p);
1506 	if ((error = vn_open(&nd, FREAD|FWRITE, 0)) != 0) {
1507 #ifdef DEBUG
1508 		if (ccddebug & (CCDB_FOLLOW|CCDB_INIT))
1509 			printf("ccdlookup: vn_open error = %d\n", error);
1510 #endif
1511 		return (error);
1512 	}
1513 	vp = nd.ni_vp;
1514 
1515 	if (vp->v_usecount > 1) {
1516 		VOP_UNLOCK(vp, 0, p);
1517 		(void)vn_close(vp, FREAD|FWRITE, p->p_ucred, p);
1518 		return (EBUSY);
1519 	}
1520 
1521 	if ((error = VOP_GETATTR(vp, &va, p->p_ucred, p)) != 0) {
1522 #ifdef DEBUG
1523 		if (ccddebug & (CCDB_FOLLOW|CCDB_INIT))
1524 			printf("ccdlookup: getattr error = %d\n", error);
1525 #endif
1526 		VOP_UNLOCK(vp, 0, p);
1527 		(void)vn_close(vp, FREAD|FWRITE, p->p_ucred, p);
1528 		return (error);
1529 	}
1530 
1531 	/* XXX: eventually we should handle VREG, too. */
1532 	if (va.va_type != VBLK) {
1533 		VOP_UNLOCK(vp, 0, p);
1534 		(void)vn_close(vp, FREAD|FWRITE, p->p_ucred, p);
1535 		return (ENOTBLK);
1536 	}
1537 
1538 #ifdef DEBUG
1539 	if (ccddebug & CCDB_VNODE)
1540 		vprint("ccdlookup: vnode info", vp);
1541 #endif
1542 
1543 	VOP_UNLOCK(vp, 0, p);
1544 	*vpp = vp;
1545 	return (0);
1546 }
1547 
1548 /*
1549  * Read the disklabel from the ccd.  If one is not present, fake one
1550  * up.
1551  */
1552 void
ccdgetdisklabel(dev_t dev,struct ccd_softc * cs,struct disklabel * lp,struct cpu_disklabel * clp,int spoofonly)1553 ccdgetdisklabel(dev_t dev, struct ccd_softc *cs, struct disklabel *lp,
1554     struct cpu_disklabel *clp, int spoofonly)
1555 {
1556 	struct ccdgeom *ccg = &cs->sc_geom;
1557 	char *errstring;
1558 
1559 	bzero(lp, sizeof(*lp));
1560 	bzero(clp, sizeof(*clp));
1561 
1562 	lp->d_secperunit = cs->sc_size;
1563 	lp->d_secsize = ccg->ccg_secsize;
1564 	lp->d_nsectors = ccg->ccg_nsectors;
1565 	lp->d_ntracks = ccg->ccg_ntracks;
1566 	lp->d_ncylinders = ccg->ccg_ncylinders;
1567 	lp->d_secpercyl = lp->d_ntracks * lp->d_nsectors;
1568 	lp->d_rpm = ccg->ccg_rpm;
1569 
1570 	strncpy(lp->d_typename, "ccd", sizeof(lp->d_typename));
1571 	lp->d_type = DTYPE_CCD;
1572 	strncpy(lp->d_packname, "fictitious", sizeof(lp->d_packname));
1573 	lp->d_interleave = 1;
1574 	lp->d_flags = 0;
1575 
1576 	lp->d_partitions[RAW_PART].p_offset = 0;
1577 	lp->d_partitions[RAW_PART].p_size = cs->sc_size;
1578 	lp->d_partitions[RAW_PART].p_fstype = FS_UNUSED;
1579 	lp->d_npartitions = RAW_PART + 1;
1580 
1581 	lp->d_magic = DISKMAGIC;
1582 	lp->d_magic2 = DISKMAGIC;
1583 	lp->d_checksum = dkcksum(cs->sc_dkdev.dk_label);
1584 
1585 	/*
1586 	 * Call the generic disklabel extraction routine.
1587 	 */
1588 	errstring = readdisklabel(CCDLABELDEV(dev), ccdstrategy,
1589 	    cs->sc_dkdev.dk_label, cs->sc_dkdev.dk_cpulabel, spoofonly);
1590 	if (errstring)
1591 		ccdmakedisklabel(cs);
1592 
1593 #ifdef DEBUG
1594 	/* It's actually extremely common to have unlabeled ccds. */
1595 	if (ccddebug & CCDB_LABEL)
1596 		if (errstring != NULL)
1597 			printf("%s: %s\n", cs->sc_xname, errstring);
1598 #endif
1599 }
1600 
1601 /*
1602  * Take care of things one might want to take care of in the event
1603  * that a disklabel isn't present.
1604  */
1605 void
ccdmakedisklabel(struct ccd_softc * cs)1606 ccdmakedisklabel(struct ccd_softc *cs)
1607 {
1608 	struct disklabel *lp = cs->sc_dkdev.dk_label;
1609 
1610 	/*
1611 	 * For historical reasons, if there's no disklabel present
1612 	 * the raw partition must be marked FS_BSDFFS.
1613 	 */
1614 	lp->d_partitions[RAW_PART].p_fstype = FS_BSDFFS;
1615 
1616 	strncpy(lp->d_packname, "default label", sizeof(lp->d_packname));
1617 }
1618 
1619 /*
1620  * Wait interruptibly for an exclusive lock.
1621  *
1622  * XXX
1623  * Several drivers do this; it should be abstracted and made MP-safe.
1624  */
1625 int
ccdlock(struct ccd_softc * cs)1626 ccdlock(struct ccd_softc *cs)
1627 {
1628 	int error;
1629 
1630 	while ((cs->sc_flags & CCDF_LOCKED) != 0) {
1631 		cs->sc_flags |= CCDF_WANTED;
1632 		if ((error = tsleep(cs, PRIBIO | PCATCH, "ccdlck", 0)) != 0)
1633 			return (error);
1634 	}
1635 	cs->sc_flags |= CCDF_LOCKED;
1636 	return (0);
1637 }
1638 
1639 /*
1640  * Unlock and wake up any waiters.
1641  */
1642 void
ccdunlock(struct ccd_softc * cs)1643 ccdunlock(struct ccd_softc *cs)
1644 {
1645 
1646 	cs->sc_flags &= ~CCDF_LOCKED;
1647 	if ((cs->sc_flags & CCDF_WANTED) != 0) {
1648 		cs->sc_flags &= ~CCDF_WANTED;
1649 		wakeup(cs);
1650 	}
1651 }
1652 
1653 #ifdef DEBUG
1654 void
printiinfo(struct ccdiinfo * ii)1655 printiinfo(struct ccdiinfo *ii)
1656 {
1657 	int ix, i;
1658 
1659 	for (ix = 0; ii->ii_ndisk; ix++, ii++) {
1660 		printf(" itab[%d]: #dk %d sblk %d soff %d",
1661 		       ix, ii->ii_ndisk, ii->ii_startblk, ii->ii_startoff);
1662 		for (i = 0; i < ii->ii_ndisk; i++)
1663 			printf(" %d", ii->ii_index[i]);
1664 		printf("\n");
1665 	}
1666 }
1667 #endif
1668