1 /* $OpenBSD: ccd.c,v 1.62 2005/05/22 19:40:51 art Exp $ */
2 /* $NetBSD: ccd.c,v 1.33 1996/05/05 04:21:14 thorpej Exp $ */
3
4 /*-
5 * Copyright (c) 1996 The NetBSD Foundation, Inc.
6 * Copyright (c) 1997 Niklas Hallqvist.
7 * Copyright (c) 2005 Michael Shalayeff.
8 * All rights reserved.
9 *
10 * This code is derived from software contributed to The NetBSD Foundation
11 * by Jason R. Thorpe.
12 * Niklas Hallqvist redid the buffer policy for better performance.
13 *
14 * Redistribution and use in source and binary forms, with or without
15 * modification, are permitted provided that the following conditions
16 * are met:
17 * 1. Redistributions of source code must retain the above copyright
18 * notice, this list of conditions and the following disclaimer.
19 * 2. Redistributions in binary form must reproduce the above copyright
20 * notice, this list of conditions and the following disclaimer in the
21 * documentation and/or other materials provided with the distribution.
22 * 3. All advertising materials mentioning features or use of this software
23 * must display the following acknowledgement:
24 * This product includes software developed by the NetBSD
25 * Foundation, Inc. and its contributors.
26 * 4. Neither the name of The NetBSD Foundation nor the names of its
27 * contributors may be used to endorse or promote products derived
28 * from this software without specific prior written permission.
29 *
30 * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
31 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
32 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
33 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE
34 * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
35 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
36 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
37 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
38 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
39 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
40 * POSSIBILITY OF SUCH DAMAGE.
41 */
42
43 /*
44 * Copyright (c) 1988 University of Utah.
45 * Copyright (c) 1990, 1993
46 * The Regents of the University of California. All rights reserved.
47 *
48 * This code is derived from software contributed to Berkeley by
49 * the Systems Programming Group of the University of Utah Computer
50 * Science Department.
51 *
52 * Redistribution and use in source and binary forms, with or without
53 * modification, are permitted provided that the following conditions
54 * are met:
55 * 1. Redistributions of source code must retain the above copyright
56 * notice, this list of conditions and the following disclaimer.
57 * 2. Redistributions in binary form must reproduce the above copyright
58 * notice, this list of conditions and the following disclaimer in the
59 * documentation and/or other materials provided with the distribution.
60 * 3. Neither the name of the University nor the names of its contributors
61 * may be used to endorse or promote products derived from this software
62 * without specific prior written permission.
63 *
64 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
65 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
66 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
67 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
68 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
69 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
70 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
71 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
72 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
73 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
74 * SUCH DAMAGE.
75 *
76 * from: Utah $Hdr: cd.c 1.6 90/11/28$
77 *
78 * @(#)cd.c 8.2 (Berkeley) 11/16/93
79 */
80
81 /*
82 * "Concatenated" disk driver.
83 *
84 * Dynamic configuration and disklabel support by:
85 * Jason R. Thorpe <thorpej@nas.nasa.gov>
86 * Numerical Aerodynamic Simulation Facility
87 * Mail Stop 258-6
88 * NASA Ames Research Center
89 * Moffett Field, CA 94035
90 *
91 * Mirroring support based on code written by Satoshi Asami
92 * and Nisha Talagala.
93 *
94 * Buffer scatter/gather policy by Niklas Hallqvist.
95 */
96 /* #define CCDDEBUG */
97
98 #include <sys/param.h>
99 #include <sys/systm.h>
100 #include <sys/proc.h>
101 #include <sys/errno.h>
102 #include <sys/buf.h>
103 #include <sys/malloc.h>
104 #include <sys/pool.h>
105 #include <sys/namei.h>
106 #include <sys/stat.h>
107 #include <sys/ioctl.h>
108 #include <sys/disklabel.h>
109 #include <sys/device.h>
110 #include <sys/disk.h>
111 #include <sys/syslog.h>
112 #include <sys/fcntl.h>
113 #include <sys/vnode.h>
114 #include <sys/conf.h>
115
116 #include <dev/ccdvar.h>
117
118 #ifdef __GNUC__
119 #define INLINE static __inline
120 #else
121 #define INLINE
122 #endif
123
124 /*
125 * Overridable value telling how many kvm spaces of MAXBSIZE we need for
126 * component I/O operations.
127 */
128 #ifndef CCD_CLUSTERS
129 #define CCD_CLUSTERS 16
130 #endif
131
132 #if defined(CCDDEBUG) && !defined(DEBUG)
133 #define DEBUG
134 #endif
135
136 #ifdef DEBUG
137 #define CCDB_FOLLOW 0x01
138 #define CCDB_INIT 0x02
139 #define CCDB_IO 0x04
140 #define CCDB_LABEL 0x08
141 #define CCDB_VNODE 0x10
142 int ccddebug = 0x00;
143 #endif
144
145 #define ccdunit(x) DISKUNIT(x)
146 #define CCDLABELDEV(dev) \
147 (MAKEDISKDEV(major((dev)), ccdunit((dev)), RAW_PART))
148
149 struct ccdbuf {
150 struct buf cb_buf; /* new I/O buf */
151 struct buf *cb_obp; /* ptr. to original I/O buf */
152 struct ccd_softc*cb_sc; /* point back to the device */
153 struct ccdbuf *cb_dep; /* mutual ptrs for mirror part */
154 int cb_comp; /* target component */
155 int cb_flags; /* misc. flags */
156 #define CBF_MIRROR 0x01 /* we're for a mirror component */
157 #define CBF_OLD 0x02 /* use old I/O protocol */
158 #define CBF_DONE 0x04 /* this buffer is done */
159
160 int cb_sgcnt; /* scatter/gather segment count */
161 #define CCD_SGMAX (MAXBSIZE >> PAGE_SHIFT)
162 struct ccdseg {
163 caddr_t cs_sgaddr; /* scatter/gather segment addresses */
164 long cs_sglen; /* scatter/gather segment lengths */
165 } cb_sg[1];
166 };
167
168 /* called by main() at boot time */
169 void ccdattach(int);
170
171 /* called by biodone() at interrupt time */
172 void ccdiodone(struct buf *);
173 int ccdsize(dev_t);
174
175 void ccdstart(struct ccd_softc *, struct buf *);
176 void ccdinterleave(struct ccd_softc *);
177 void ccdintr(struct ccd_softc *, struct buf *);
178 int ccdinit(struct ccddevice *, char **, struct proc *);
179 int ccdlookup(char *, struct proc *p, struct vnode **);
180 long ccdbuffer(struct ccd_softc *, struct buf *, daddr_t, caddr_t,
181 long, struct ccdbuf **, int);
182 void ccdgetdisklabel(dev_t, struct ccd_softc *, struct disklabel *,
183 struct cpu_disklabel *, int);
184 void ccdmakedisklabel(struct ccd_softc *);
185 int ccdlock(struct ccd_softc *);
186 void ccdunlock(struct ccd_softc *);
187 INLINE struct ccdbuf *getccdbuf(void);
188 INLINE void putccdbuf(struct ccdbuf *);
189
190 #ifdef DEBUG
191 void printiinfo(struct ccdiinfo *);
192 #endif
193
194 /* Non-private for the benefit of libkvm. */
195 struct ccd_softc *ccd_softc;
196 struct ccddevice *ccddevs;
197 int numccd = 0;
198 int ccdbufsizeof;
199
200 /*
201 * A separate map so that locking on kernel_map won't happen in interrupts
202 * (XXX due to fragmentation this might fail easy and panic the kernel)
203 */
204 struct vm_map *ccdmap;
205
206 /*
207 * Set when a process need some kvm.
208 * XXX should we fallback to old I/O policy instead when out of ccd kvm?
209 */
210 int ccd_need_kvm = 0;
211
212 /*
213 * struct ccdbuf allocator
214 */
215 struct pool ccdbufpl;
216
217 /*
218 * Manage the ccd buffer structures.
219 */
220 INLINE struct ccdbuf *
getccdbuf(void)221 getccdbuf(void)
222 {
223 struct ccdbuf *cbp;
224
225 if ((cbp = pool_get(&ccdbufpl, PR_WAITOK)))
226 bzero(cbp, ccdbufsizeof);
227 return (cbp);
228 }
229
230 INLINE void
putccdbuf(struct ccdbuf * cbp)231 putccdbuf(struct ccdbuf *cbp)
232 {
233 pool_put(&ccdbufpl, cbp);
234 }
235
236 /*
237 * Called by main() during pseudo-device attachment. All we need
238 * to do is allocate enough space for devices to be configured later.
239 */
240 void
ccdattach(int num)241 ccdattach(int num)
242 {
243 if (num <= 0) {
244 #ifdef DIAGNOSTIC
245 panic("ccdattach: count <= 0");
246 #endif
247 return;
248 }
249
250 ccd_softc = (struct ccd_softc *)malloc(num * sizeof(struct ccd_softc),
251 M_DEVBUF, M_NOWAIT);
252 ccddevs = (struct ccddevice *)malloc(num * sizeof(struct ccddevice),
253 M_DEVBUF, M_NOWAIT);
254 if ((ccd_softc == NULL) || (ccddevs == NULL)) {
255 printf("WARNING: no memory for concatenated disks\n");
256 if (ccd_softc != NULL)
257 free(ccd_softc, M_DEVBUF);
258 if (ccddevs != NULL)
259 free(ccddevs, M_DEVBUF);
260 return;
261 }
262 numccd = num;
263 bzero(ccd_softc, num * sizeof(struct ccd_softc));
264 bzero(ccddevs, num * sizeof(struct ccddevice));
265
266 ccdbufsizeof = sizeof(struct ccdbuf) +
267 (CCD_SGMAX - 1) * sizeof(struct ccdseg);
268 pool_init(&ccdbufpl, ccdbufsizeof, 0, 0, 0, "ccdbufpl", NULL);
269 pool_setlowat(&ccdbufpl, 16);
270 pool_sethiwat(&ccdbufpl, 1024);
271 }
272
273 int
ccdinit(struct ccddevice * ccd,char ** cpaths,struct proc * p)274 ccdinit(struct ccddevice *ccd, char **cpaths, struct proc *p)
275 {
276 struct ccd_softc *cs = &ccd_softc[ccd->ccd_unit];
277 struct ccdcinfo *ci = NULL;
278 size_t size;
279 int ix, rpm;
280 struct vnode *vp;
281 struct vattr va;
282 size_t minsize;
283 int maxsecsize;
284 struct partinfo dpart;
285 struct ccdgeom *ccg = &cs->sc_geom;
286 char tmppath[MAXPATHLEN];
287 int error;
288
289 #ifdef DEBUG
290 if (ccddebug & (CCDB_FOLLOW|CCDB_INIT))
291 printf("ccdinit: unit %d cflags %b\n",
292 ccd->ccd_unit, ccd->ccd_flags, CCDF_BITS);
293 #endif
294
295 cs->sc_size = 0;
296 cs->sc_ileave = ccd->ccd_interleave;
297 cs->sc_nccdisks = ccd->ccd_ndev;
298 if (snprintf(cs->sc_xname, sizeof(cs->sc_xname), "ccd%d",
299 ccd->ccd_unit) >= sizeof(cs->sc_xname)) {
300 printf("ccdinit: device name too long.\n");
301 return(ENXIO);
302 }
303
304 /* Allocate space for the component info. */
305 cs->sc_cinfo = malloc(cs->sc_nccdisks * sizeof(struct ccdcinfo),
306 M_DEVBUF, M_WAITOK);
307 bzero(cs->sc_cinfo, cs->sc_nccdisks * sizeof(struct ccdcinfo));
308
309 /*
310 * Verify that each component piece exists and record
311 * relevant information about it.
312 */
313 maxsecsize = 0;
314 minsize = 0;
315 rpm = 0;
316 for (ix = 0; ix < cs->sc_nccdisks; ix++) {
317 vp = ccd->ccd_vpp[ix];
318 ci = &cs->sc_cinfo[ix];
319 ci->ci_vp = vp;
320
321 /*
322 * Copy in the pathname of the component.
323 */
324 bzero(tmppath, sizeof(tmppath)); /* sanity */
325 error = copyinstr(cpaths[ix], tmppath,
326 MAXPATHLEN, &ci->ci_pathlen);
327 if (error) {
328 #ifdef DEBUG
329 if (ccddebug & (CCDB_FOLLOW|CCDB_INIT))
330 printf("%s: can't copy path, error = %d\n",
331 cs->sc_xname, error);
332 #endif
333 free(cs->sc_cinfo, M_DEVBUF);
334 return (error);
335 }
336 ci->ci_path = malloc(ci->ci_pathlen, M_DEVBUF, M_WAITOK);
337 bcopy(tmppath, ci->ci_path, ci->ci_pathlen);
338
339 /*
340 * XXX: Cache the component's dev_t.
341 */
342 if ((error = VOP_GETATTR(vp, &va, p->p_ucred, p)) != 0) {
343 #ifdef DEBUG
344 if (ccddebug & (CCDB_FOLLOW|CCDB_INIT))
345 printf("%s: %s: getattr failed %s = %d\n",
346 cs->sc_xname, ci->ci_path,
347 "error", error);
348 #endif
349 free(ci->ci_path, M_DEVBUF);
350 free(cs->sc_cinfo, M_DEVBUF);
351 return (error);
352 }
353 ci->ci_dev = va.va_rdev;
354
355 /*
356 * Get partition information for the component.
357 */
358 error = VOP_IOCTL(vp, DIOCGPART, (caddr_t)&dpart,
359 FREAD, p->p_ucred, p);
360 if (error) {
361 #ifdef DEBUG
362 if (ccddebug & (CCDB_FOLLOW|CCDB_INIT))
363 printf("%s: %s: ioctl failed, error = %d\n",
364 cs->sc_xname, ci->ci_path, error);
365 #endif
366 free(ci->ci_path, M_DEVBUF);
367 free(cs->sc_cinfo, M_DEVBUF);
368 return (error);
369 }
370 if (dpart.part->p_fstype == FS_CCD ||
371 dpart.part->p_fstype == FS_BSDFFS) {
372 maxsecsize =
373 ((dpart.disklab->d_secsize > maxsecsize) ?
374 dpart.disklab->d_secsize : maxsecsize);
375 size = dpart.part->p_size;
376 } else {
377 #ifdef DEBUG
378 if (ccddebug & (CCDB_FOLLOW|CCDB_INIT))
379 printf("%s: %s: incorrect partition type\n",
380 cs->sc_xname, ci->ci_path);
381 #endif
382 free(ci->ci_path, M_DEVBUF);
383 free(cs->sc_cinfo, M_DEVBUF);
384 return (EFTYPE);
385 }
386
387 /*
388 * Calculate the size, truncating to an interleave
389 * boundary if necessary.
390 */
391 if (cs->sc_ileave > 1)
392 size -= size % cs->sc_ileave;
393
394 if (size == 0) {
395 #ifdef DEBUG
396 if (ccddebug & (CCDB_FOLLOW|CCDB_INIT))
397 printf("%s: %s: size == 0\n",
398 cs->sc_xname, ci->ci_path);
399 #endif
400 free(ci->ci_path, M_DEVBUF);
401 free(cs->sc_cinfo, M_DEVBUF);
402 return (ENODEV);
403 }
404
405 if (minsize == 0 || size < minsize)
406 minsize = size;
407 ci->ci_size = size;
408 cs->sc_size += size;
409 rpm += dpart.disklab->d_rpm;
410 }
411 ccg->ccg_rpm = rpm / cs->sc_nccdisks;
412
413 /*
414 * Don't allow the interleave to be smaller than
415 * the biggest component sector.
416 */
417 if ((cs->sc_ileave > 0) &&
418 (cs->sc_ileave < (maxsecsize / DEV_BSIZE))) {
419 #ifdef DEBUG
420 if (ccddebug & (CCDB_FOLLOW|CCDB_INIT))
421 printf("%s: interleave must be at least %d\n",
422 cs->sc_xname, (maxsecsize / DEV_BSIZE));
423 #endif
424 free(ci->ci_path, M_DEVBUF);
425 free(cs->sc_cinfo, M_DEVBUF);
426 return (EINVAL);
427 }
428
429 /*
430 * Mirroring support requires uniform interleave and
431 * and even number of components.
432 */
433 if (ccd->ccd_flags & CCDF_MIRROR) {
434 ccd->ccd_flags |= CCDF_UNIFORM;
435 if (cs->sc_ileave == 0) {
436 #ifdef DEBUG
437 if (ccddebug & (CCDB_FOLLOW|CCDB_INIT))
438 printf("%s: mirroring requires interleave\n",
439 cs->sc_xname);
440 #endif
441 free(ci->ci_path, M_DEVBUF);
442 free(cs->sc_cinfo, M_DEVBUF);
443 return (EINVAL);
444 }
445 if (cs->sc_nccdisks % 2) {
446 #ifdef DEBUG
447 if (ccddebug & (CCDB_FOLLOW|CCDB_INIT))
448 printf("%s: mirroring requires even # of components\n",
449 cs->sc_xname);
450 #endif
451 free(ci->ci_path, M_DEVBUF);
452 free(cs->sc_cinfo, M_DEVBUF);
453 return (EINVAL);
454 }
455 }
456
457 /*
458 * If uniform interleave is desired set all sizes to that of
459 * the smallest component.
460 */
461 ccg->ccg_ntracks = cs->sc_nccunits = cs->sc_nccdisks;
462 if (ccd->ccd_flags & CCDF_UNIFORM) {
463 for (ci = cs->sc_cinfo;
464 ci < &cs->sc_cinfo[cs->sc_nccdisks]; ci++)
465 ci->ci_size = minsize;
466
467 if (ccd->ccd_flags & CCDF_MIRROR)
468 cs->sc_nccunits = ccg->ccg_ntracks /= 2;
469 cs->sc_size = ccg->ccg_ntracks * minsize;
470 }
471
472 cs->sc_cflags = ccd->ccd_flags; /* So we can find out later... */
473
474 /*
475 * Construct the interleave table.
476 */
477 ccdinterleave(cs);
478
479 /*
480 * Create pseudo-geometry based on 1MB cylinders. It's
481 * pretty close.
482 */
483 ccg->ccg_secsize = DEV_BSIZE;
484 ccg->ccg_nsectors = cs->sc_ileave? cs->sc_ileave :
485 1024 * (1024 / ccg->ccg_secsize);
486 ccg->ccg_ncylinders = cs->sc_size / ccg->ccg_ntracks /
487 ccg->ccg_nsectors;
488
489 cs->sc_flags |= CCDF_INITED;
490
491 return (0);
492 }
493
494 void
ccdinterleave(struct ccd_softc * cs)495 ccdinterleave(struct ccd_softc *cs)
496 {
497 struct ccdcinfo *ci, *smallci;
498 struct ccdiinfo *ii;
499 daddr_t bn, lbn;
500 int ix;
501 u_long size;
502
503 #ifdef DEBUG
504 if (ccddebug & CCDB_INIT)
505 printf("ccdinterleave(%p): ileave %d\n", cs, cs->sc_ileave);
506 #endif
507 /*
508 * Allocate an interleave table.
509 * Chances are this is too big, but we don't care.
510 */
511 size = (cs->sc_nccdisks + 1) * sizeof(struct ccdiinfo);
512 cs->sc_itable = (struct ccdiinfo *)malloc(size, M_DEVBUF, M_WAITOK);
513 bzero((caddr_t)cs->sc_itable, size);
514
515 /*
516 * Trivial case: no interleave (actually interleave of disk size).
517 * Each table entry represents a single component in its entirety.
518 */
519 if (cs->sc_ileave == 0) {
520 bn = 0;
521 ii = cs->sc_itable;
522
523 for (ix = 0; ix < cs->sc_nccdisks; ix++) {
524 /* Allocate space for ii_index. */
525 ii->ii_index = malloc(sizeof(int), M_DEVBUF, M_WAITOK);
526 ii->ii_ndisk = 1;
527 ii->ii_startblk = bn;
528 ii->ii_startoff = 0;
529 ii->ii_index[0] = ix;
530 bn += cs->sc_cinfo[ix].ci_size;
531 ii++;
532 }
533 ii->ii_ndisk = 0;
534 #ifdef DEBUG
535 if (ccddebug & CCDB_INIT)
536 printiinfo(cs->sc_itable);
537 #endif
538 return;
539 }
540
541 /*
542 * The following isn't fast or pretty; it doesn't have to be.
543 */
544 size = 0;
545 bn = lbn = 0;
546 for (ii = cs->sc_itable; ; ii++) {
547 /* Allocate space for ii_index. */
548 ii->ii_index = malloc((sizeof(int) * cs->sc_nccdisks),
549 M_DEVBUF, M_WAITOK);
550
551 /*
552 * Locate the smallest of the remaining components
553 */
554 smallci = NULL;
555 for (ci = cs->sc_cinfo;
556 ci < &cs->sc_cinfo[cs->sc_nccdisks]; ci++)
557 if (ci->ci_size > size &&
558 (smallci == NULL ||
559 ci->ci_size < smallci->ci_size))
560 smallci = ci;
561
562 /*
563 * Nobody left, all done
564 */
565 if (smallci == NULL) {
566 ii->ii_ndisk = 0;
567 break;
568 }
569
570 /*
571 * Record starting logical block and component offset
572 */
573 ii->ii_startblk = bn / cs->sc_ileave;
574 ii->ii_startoff = lbn;
575
576 /*
577 * Determine how many disks take part in this interleave
578 * and record their indices.
579 */
580 ix = 0;
581 for (ci = cs->sc_cinfo;
582 ci < &cs->sc_cinfo[cs->sc_nccunits]; ci++)
583 if (ci->ci_size >= smallci->ci_size)
584 ii->ii_index[ix++] = ci - cs->sc_cinfo;
585 ii->ii_ndisk = ix;
586 bn += ix * (smallci->ci_size - size);
587 lbn = smallci->ci_size / cs->sc_ileave;
588 size = smallci->ci_size;
589 }
590 #ifdef DEBUG
591 if (ccddebug & CCDB_INIT)
592 printiinfo(cs->sc_itable);
593 #endif
594 }
595
596 /* ARGSUSED */
597 int
ccdopen(dev_t dev,int flags,int fmt,struct proc * p)598 ccdopen(dev_t dev, int flags, int fmt, struct proc *p)
599 {
600 int unit = ccdunit(dev);
601 struct ccd_softc *cs;
602 struct disklabel *lp;
603 int error = 0, part, pmask;
604
605 #ifdef DEBUG
606 if (ccddebug & CCDB_FOLLOW)
607 printf("ccdopen(%x, %x)\n", dev, flags);
608 #endif
609 if (unit >= numccd)
610 return (ENXIO);
611 cs = &ccd_softc[unit];
612
613 if ((error = ccdlock(cs)) != 0)
614 return (error);
615
616 lp = cs->sc_dkdev.dk_label;
617
618 part = DISKPART(dev);
619 pmask = (1 << part);
620
621 /*
622 * If we're initialized, check to see if there are any other
623 * open partitions. If not, then it's safe to update
624 * the in-core disklabel.
625 */
626 if ((cs->sc_flags & CCDF_INITED) && (cs->sc_dkdev.dk_openmask == 0))
627 ccdgetdisklabel(dev, cs, lp, cs->sc_dkdev.dk_cpulabel, 0);
628
629 /* Check that the partition exists. */
630 if (part != RAW_PART) {
631 if (((cs->sc_flags & CCDF_INITED) == 0) ||
632 ((part >= lp->d_npartitions) ||
633 (lp->d_partitions[part].p_fstype == FS_UNUSED))) {
634 error = ENXIO;
635 goto done;
636 }
637 }
638
639 /* Prevent our unit from being unconfigured while open. */
640 switch (fmt) {
641 case S_IFCHR:
642 cs->sc_dkdev.dk_copenmask |= pmask;
643 break;
644
645 case S_IFBLK:
646 cs->sc_dkdev.dk_bopenmask |= pmask;
647 break;
648 }
649 cs->sc_dkdev.dk_openmask =
650 cs->sc_dkdev.dk_copenmask | cs->sc_dkdev.dk_bopenmask;
651
652 done:
653 ccdunlock(cs);
654 return (error);
655 }
656
657 /* ARGSUSED */
658 int
ccdclose(dev_t dev,int flags,int fmt,struct proc * p)659 ccdclose(dev_t dev, int flags, int fmt, struct proc *p)
660 {
661 int unit = ccdunit(dev);
662 struct ccd_softc *cs;
663 int error = 0, part;
664
665 #ifdef DEBUG
666 if (ccddebug & CCDB_FOLLOW)
667 printf("ccdclose(%x, %x)\n", dev, flags);
668 #endif
669
670 if (unit >= numccd)
671 return (ENXIO);
672 cs = &ccd_softc[unit];
673
674 if ((error = ccdlock(cs)) != 0)
675 return (error);
676
677 part = DISKPART(dev);
678
679 /* ...that much closer to allowing unconfiguration... */
680 switch (fmt) {
681 case S_IFCHR:
682 cs->sc_dkdev.dk_copenmask &= ~(1 << part);
683 break;
684
685 case S_IFBLK:
686 cs->sc_dkdev.dk_bopenmask &= ~(1 << part);
687 break;
688 }
689 cs->sc_dkdev.dk_openmask =
690 cs->sc_dkdev.dk_copenmask | cs->sc_dkdev.dk_bopenmask;
691
692 ccdunlock(cs);
693 return (0);
694 }
695
696 void
ccdstrategy(struct buf * bp)697 ccdstrategy(struct buf *bp)
698 {
699 int unit = ccdunit(bp->b_dev);
700 struct ccd_softc *cs = &ccd_softc[unit];
701 int s;
702 int wlabel;
703 struct disklabel *lp;
704
705 #ifdef DEBUG
706 if (ccddebug & CCDB_FOLLOW)
707 printf("ccdstrategy(%p): unit %d\n", bp, unit);
708 #endif
709 if ((cs->sc_flags & CCDF_INITED) == 0) {
710 bp->b_error = ENXIO;
711 bp->b_resid = bp->b_bcount;
712 bp->b_flags |= B_ERROR;
713 goto done;
714 }
715
716 /* If it's a nil transfer, wake up the top half now. */
717 if (bp->b_bcount == 0)
718 goto done;
719
720 lp = cs->sc_dkdev.dk_label;
721
722 /*
723 * Do bounds checking and adjust transfer. If there's an
724 * error, the bounds check will flag that for us.
725 */
726 wlabel = cs->sc_flags & (CCDF_WLABEL|CCDF_LABELLING);
727 if (DISKPART(bp->b_dev) != RAW_PART &&
728 bounds_check_with_label(bp, lp, cs->sc_dkdev.dk_cpulabel,
729 wlabel) <= 0)
730 goto done;
731
732 bp->b_resid = bp->b_bcount;
733
734 /*
735 * "Start" the unit.
736 */
737 s = splbio();
738 ccdstart(cs, bp);
739 splx(s);
740 return;
741 done:
742 s = splbio();
743 biodone(bp);
744 splx(s);
745 }
746
747 void
ccdstart(struct ccd_softc * cs,struct buf * bp)748 ccdstart(struct ccd_softc *cs, struct buf *bp)
749 {
750 long bcount, rcount;
751 struct ccdbuf **cbpp, *cbp;
752 caddr_t addr;
753 daddr_t bn;
754 struct partition *pp;
755 int i, old_io = cs->sc_cflags & CCDF_OLD;
756
757 #ifdef DEBUG
758 if (ccddebug & CCDB_FOLLOW)
759 printf("ccdstart(%p, %p, %s)\n", cs, bp,
760 bp->b_flags & B_READ? "read" : "write");
761 #endif
762
763 /* Instrumentation. */
764 disk_busy(&cs->sc_dkdev);
765
766 /*
767 * Translate the partition-relative block number to an absolute.
768 */
769 bn = bp->b_blkno;
770 if (DISKPART(bp->b_dev) != RAW_PART) {
771 pp = &cs->sc_dkdev.dk_label->d_partitions[DISKPART(bp->b_dev)];
772 bn += pp->p_offset;
773 }
774
775 /*
776 * Allocate component buffers
777 */
778 cbpp = malloc(2 * cs->sc_nccdisks * sizeof(struct ccdbuf *), M_DEVBUF,
779 M_WAITOK);
780 bzero(cbpp, 2 * cs->sc_nccdisks * sizeof(struct ccdbuf *));
781 addr = bp->b_data;
782 old_io = old_io || ((vaddr_t)addr & PAGE_MASK);
783 for (bcount = bp->b_bcount; bcount > 0; bcount -= rcount) {
784 rcount = ccdbuffer(cs, bp, bn, addr, bcount, cbpp, old_io);
785
786 /*
787 * This is the old, slower, but less restrictive, mode of
788 * operation. It allows interleaves which are not multiples
789 * of PAGE_SIZE and mirroring.
790 */
791 if (old_io) {
792 if ((cbpp[0]->cb_buf.b_flags & B_READ) == 0)
793 cbpp[0]->cb_buf.b_vp->v_numoutput++;
794 VOP_STRATEGY(&cbpp[0]->cb_buf);
795
796 if ((cs->sc_cflags & CCDF_MIRROR) &&
797 ((cbpp[0]->cb_buf.b_flags & B_READ) == 0)) {
798 cbpp[1]->cb_buf.b_vp->v_numoutput++;
799 VOP_STRATEGY(&cbpp[1]->cb_buf);
800 }
801 }
802
803 bn += btodb(rcount);
804 addr += rcount;
805 }
806
807 /* The new leaner mode of operation */
808 if (!old_io)
809 /*
810 * Fire off the requests
811 */
812 for (i = 0; i < 2*cs->sc_nccdisks; i++) {
813 cbp = cbpp[i];
814 if (cbp) {
815 if ((cbp->cb_buf.b_flags & B_READ) == 0)
816 cbp->cb_buf.b_vp->v_numoutput++;
817 VOP_STRATEGY(&cbp->cb_buf);
818 }
819 }
820 free(cbpp, M_DEVBUF);
821 }
822
823 /*
824 * Build a component buffer header.
825 */
826 long
ccdbuffer(struct ccd_softc * cs,struct buf * bp,daddr_t bn,caddr_t addr,long bcount,struct ccdbuf ** cbpp,int old_io)827 ccdbuffer(struct ccd_softc *cs, struct buf *bp, daddr_t bn, caddr_t addr,
828 long bcount, struct ccdbuf **cbpp, int old_io)
829 {
830 struct ccdcinfo *ci, *ci2 = NULL;
831 struct ccdbuf *cbp;
832 daddr_t cbn, cboff, sblk;
833 int ccdisk, ccdisk2, off;
834 long old_bcount, cnt;
835 struct ccdiinfo *ii;
836 struct buf *nbp;
837
838 #ifdef DEBUG
839 if (ccddebug & CCDB_IO)
840 printf("ccdbuffer(%p, %p, %d, %p, %ld, %p)\n",
841 cs, bp, bn, addr, bcount, cbpp);
842 #endif
843
844 /*
845 * Determine which component bn falls in.
846 */
847 cbn = bn;
848 cboff = 0;
849
850 if (cs->sc_ileave == 0) {
851 /*
852 * Serially concatenated
853 */
854 sblk = 0;
855 for (ccdisk = 0, ci = &cs->sc_cinfo[ccdisk];
856 cbn >= sblk + ci->ci_size;
857 ccdisk++, ci = &cs->sc_cinfo[ccdisk])
858 sblk += ci->ci_size;
859 cbn -= sblk;
860 } else {
861 /*
862 * Interleaved
863 */
864 cboff = cbn % cs->sc_ileave;
865 cbn /= cs->sc_ileave;
866 for (ii = cs->sc_itable; ii->ii_ndisk; ii++)
867 if (ii->ii_startblk > cbn)
868 break;
869 ii--;
870 off = cbn - ii->ii_startblk;
871 if (ii->ii_ndisk == 1) {
872 ccdisk = ii->ii_index[0];
873 cbn = ii->ii_startoff + off;
874 } else {
875 ccdisk = ii->ii_index[off % ii->ii_ndisk];
876 cbn = ii->ii_startoff + off / ii->ii_ndisk;
877 }
878 if (cs->sc_cflags & CCDF_MIRROR) {
879 /* Mirrored data */
880 ccdisk2 = ccdisk + ii->ii_ndisk;
881 ci2 = &cs->sc_cinfo[ccdisk2];
882 /* spread the read over both parts */
883 if (bp->b_flags & B_READ &&
884 bcount > bp->b_bcount / 2 &&
885 (!(ci2->ci_flags & CCIF_FAILED) ||
886 ci->ci_flags & CCIF_FAILED))
887 ccdisk = ccdisk2;
888 }
889 cbn *= cs->sc_ileave;
890 ci = &cs->sc_cinfo[ccdisk];
891 #ifdef DEBUG
892 if (ccddebug & CCDB_IO)
893 printf("ccdisk %d cbn %d ci %p ci2 %p\n",
894 ccdisk, cbn, ci, ci2);
895 #endif
896 }
897
898 /* Limit the operation at next component border */
899 if (cs->sc_ileave == 0)
900 cnt = dbtob(ci->ci_size - cbn);
901 else
902 cnt = dbtob(cs->sc_ileave - cboff);
903 if (cnt < bcount)
904 bcount = cnt;
905
906 if (old_io || cbpp[ccdisk] == NULL) {
907 /*
908 * Setup new component buffer.
909 */
910 cbp = cbpp[old_io ? 0 : ccdisk] = getccdbuf();
911 cbp->cb_flags = old_io ? CBF_OLD : 0;
912 nbp = &cbp->cb_buf;
913 nbp->b_flags = bp->b_flags | B_CALL;
914 nbp->b_iodone = ccdiodone;
915 nbp->b_proc = bp->b_proc;
916 nbp->b_dev = ci->ci_dev; /* XXX */
917 nbp->b_blkno = cbn + cboff;
918 nbp->b_vp = ci->ci_vp;
919 nbp->b_bcount = bcount;
920 LIST_INIT(&nbp->b_dep);
921
922 /*
923 * context for ccdiodone
924 */
925 cbp->cb_obp = bp;
926 cbp->cb_sc = cs;
927 cbp->cb_comp = ccdisk;
928
929 /* Deal with the different algorithms */
930 if (old_io)
931 nbp->b_data = addr;
932 else {
933 do {
934 nbp->b_data = (caddr_t) uvm_km_valloc(ccdmap,
935 bp->b_bcount);
936
937 /*
938 * XXX Instead of sleeping, we might revert
939 * XXX to old I/O policy for this buffer set.
940 */
941 if (nbp->b_data == NULL) {
942 ccd_need_kvm++;
943 tsleep(ccdmap, PRIBIO, "ccdbuffer", 0);
944 }
945 } while (nbp->b_data == NULL);
946 cbp->cb_sgcnt = 0;
947 old_bcount = 0;
948 }
949
950 /*
951 * Mirrors have an additional write operation that is nearly
952 * identical to the first.
953 */
954 if ((cs->sc_cflags & CCDF_MIRROR) &&
955 !(ci2->ci_flags & CCIF_FAILED) &&
956 ((cbp->cb_buf.b_flags & B_READ) == 0)) {
957 struct ccdbuf *cbp2;
958 cbpp[old_io? 1 : ccdisk2] = cbp2 = getccdbuf();
959 *cbp2 = *cbp;
960 cbp2->cb_flags = CBF_MIRROR | (old_io ? CBF_OLD : 0);
961 cbp2->cb_buf.b_dev = ci2->ci_dev; /* XXX */
962 cbp2->cb_buf.b_vp = ci2->ci_vp;
963 LIST_INIT(&cbp2->cb_buf.b_dep);
964 cbp2->cb_comp = ccdisk2;
965 cbp2->cb_dep = cbp;
966 cbp->cb_dep = cbp2;
967 }
968 } else {
969 /*
970 * Continue on an already started component buffer
971 */
972 cbp = cbpp[ccdisk];
973 nbp = &cbp->cb_buf;
974
975 /*
976 * Map the new pages at the end of the buffer.
977 */
978 old_bcount = nbp->b_bcount;
979 nbp->b_bcount += bcount;
980 }
981
982 if (!old_io) {
983 #ifdef DEBUG
984 if (ccddebug & CCDB_IO)
985 printf("ccdbuffer: sg %d (%p/%x) off %x\n",
986 cbp->cb_sgcnt, addr, bcount, old_bcount);
987 #endif
988 pagemove(addr, nbp->b_data + old_bcount, round_page(bcount));
989 nbp->b_bufsize += round_page(bcount);
990 cbp->cb_sg[cbp->cb_sgcnt].cs_sgaddr = addr;
991 cbp->cb_sg[cbp->cb_sgcnt].cs_sglen = bcount;
992 cbp->cb_sgcnt++;
993 }
994
995 #ifdef DEBUG
996 if (ccddebug & CCDB_IO)
997 printf(" dev %x(u%d): cbp %p bn %d addr %p bcnt %ld\n",
998 ci->ci_dev, ci-cs->sc_cinfo, cbp, bp->b_blkno,
999 bp->b_data, bp->b_bcount);
1000 #endif
1001
1002 return (bcount);
1003 }
1004
1005 void
ccdintr(struct ccd_softc * cs,struct buf * bp)1006 ccdintr(struct ccd_softc *cs, struct buf *bp)
1007 {
1008
1009 splassert(IPL_BIO);
1010
1011 #ifdef DEBUG
1012 if (ccddebug & CCDB_FOLLOW)
1013 printf("ccdintr(%p, %p)\n", cs, bp);
1014 #endif
1015 /*
1016 * Request is done for better or worse, wakeup the top half.
1017 */
1018 if (bp->b_flags & B_ERROR)
1019 bp->b_resid = bp->b_bcount;
1020 disk_unbusy(&cs->sc_dkdev, (bp->b_bcount - bp->b_resid),
1021 (bp->b_flags & B_READ));
1022 biodone(bp);
1023 }
1024
1025 /*
1026 * Called at interrupt time.
1027 * Mark the component as done and if all components are done,
1028 * take a ccd interrupt.
1029 */
1030 void
ccdiodone(struct buf * vbp)1031 ccdiodone(struct buf *vbp)
1032 {
1033 struct ccdbuf *cbp = (struct ccdbuf *)vbp;
1034 struct buf *bp = cbp->cb_obp;
1035 struct ccd_softc *cs = cbp->cb_sc;
1036 int old_io = cbp->cb_flags & CBF_OLD;
1037 int i;
1038 long count = bp->b_bcount, off;
1039 char *comptype;
1040
1041 splassert(IPL_BIO);
1042
1043 #ifdef DEBUG
1044 if (ccddebug & CCDB_FOLLOW)
1045 printf("ccdiodone(%p)\n", cbp);
1046 if (ccddebug & CCDB_IO) {
1047 if (cbp->cb_flags & CBF_MIRROR)
1048 printf("ccdiodone: mirror component\n");
1049 else
1050 printf("ccdiodone: bp %p bcount %ld resid %ld\n",
1051 bp, bp->b_bcount, bp->b_resid);
1052 printf(" dev %x(u%d), cbp %p bn %d addr %p bcnt %ld\n",
1053 vbp->b_dev, cbp->cb_comp, cbp, vbp->b_blkno,
1054 vbp->b_data, vbp->b_bcount);
1055 }
1056 #endif
1057
1058 if (vbp->b_flags & B_ERROR) {
1059 cs->sc_cinfo[cbp->cb_comp].ci_flags |= CCIF_FAILED;
1060 if (cbp->cb_flags & CBF_MIRROR)
1061 comptype = " (mirror)";
1062 else {
1063 bp->b_flags |= B_ERROR;
1064 bp->b_error = vbp->b_error ?
1065 vbp->b_error : EIO;
1066 comptype = "";
1067 }
1068
1069 printf("%s: error %d on component %d%s\n",
1070 cs->sc_xname, bp->b_error, cbp->cb_comp, comptype);
1071 }
1072 cbp->cb_flags |= CBF_DONE;
1073
1074 if (cbp->cb_dep &&
1075 (cbp->cb_dep->cb_flags & CBF_DONE) != (cbp->cb_flags & CBF_DONE))
1076 return;
1077
1078 if (cbp->cb_flags & CBF_MIRROR &&
1079 !(cbp->cb_dep->cb_flags & CBF_MIRROR)) {
1080 cbp = cbp->cb_dep;
1081 vbp = (struct buf *)cbp;
1082 }
1083
1084 if (!old_io) {
1085 /*
1086 * Gather all the pieces and put them where they should be.
1087 */
1088 for (i = 0, off = 0; i < cbp->cb_sgcnt; i++) {
1089 #ifdef DEBUG
1090 if (ccddebug & CCDB_IO)
1091 printf("ccdiodone: sg %d (%p/%x) off %x\n", i,
1092 cbp->cb_sg[i].cs_sgaddr,
1093 cbp->cb_sg[i].cs_sglen, off);
1094 #endif
1095 pagemove(vbp->b_data + off, cbp->cb_sg[i].cs_sgaddr,
1096 round_page(cbp->cb_sg[i].cs_sglen));
1097 off += cbp->cb_sg[i].cs_sglen;
1098 }
1099
1100 uvm_km_free(ccdmap, (vaddr_t)vbp->b_data, count);
1101 if (ccd_need_kvm) {
1102 ccd_need_kvm = 0;
1103 wakeup(ccdmap);
1104 }
1105 }
1106 count = vbp->b_bcount;
1107
1108 putccdbuf(cbp);
1109 if (cbp->cb_dep)
1110 putccdbuf(cbp->cb_dep);
1111
1112 /*
1113 * If all done, "interrupt".
1114 *
1115 * Note that mirror component buffers aren't counted against
1116 * the original I/O buffer.
1117 */
1118 if (count > bp->b_resid)
1119 panic("ccdiodone: count");
1120 bp->b_resid -= count;
1121 if (bp->b_resid == 0)
1122 ccdintr(cs, bp);
1123 }
1124
1125 /* ARGSUSED */
1126 int
ccdread(dev_t dev,struct uio * uio,int flags)1127 ccdread(dev_t dev, struct uio *uio, int flags)
1128 {
1129 int unit = ccdunit(dev);
1130 struct ccd_softc *cs;
1131
1132 #ifdef DEBUG
1133 if (ccddebug & CCDB_FOLLOW)
1134 printf("ccdread(%x, %p)\n", dev, uio);
1135 #endif
1136 if (unit >= numccd)
1137 return (ENXIO);
1138 cs = &ccd_softc[unit];
1139
1140 if ((cs->sc_flags & CCDF_INITED) == 0)
1141 return (ENXIO);
1142
1143 /*
1144 * XXX: It's not clear that using minphys() is completely safe,
1145 * in particular, for raw I/O. Underlying devices might have some
1146 * non-obvious limits, because of the copy to user-space.
1147 */
1148 return (physio(ccdstrategy, NULL, dev, B_READ, minphys, uio));
1149 }
1150
1151 /* ARGSUSED */
1152 int
ccdwrite(dev_t dev,struct uio * uio,int flags)1153 ccdwrite(dev_t dev, struct uio *uio, int flags)
1154 {
1155 int unit = ccdunit(dev);
1156 struct ccd_softc *cs;
1157
1158 #ifdef DEBUG
1159 if (ccddebug & CCDB_FOLLOW)
1160 printf("ccdwrite(%x, %p)\n", dev, uio);
1161 #endif
1162 if (unit >= numccd)
1163 return (ENXIO);
1164 cs = &ccd_softc[unit];
1165
1166 if ((cs->sc_flags & CCDF_INITED) == 0)
1167 return (ENXIO);
1168
1169 /*
1170 * XXX: It's not clear that using minphys() is completely safe,
1171 * in particular, for raw I/O. Underlying devices might have some
1172 * non-obvious limits, because of the copy to user-space.
1173 */
1174 return (physio(ccdstrategy, NULL, dev, B_WRITE, minphys, uio));
1175 }
1176
1177 int
ccdioctl(dev_t dev,u_long cmd,caddr_t data,int flag,struct proc * p)1178 ccdioctl(dev_t dev, u_long cmd, caddr_t data, int flag, struct proc *p)
1179 {
1180 int unit = ccdunit(dev);
1181 int i, j, lookedup = 0, error = 0;
1182 int part, pmask, s;
1183 struct ccd_softc *cs;
1184 struct ccd_ioctl *ccio = (struct ccd_ioctl *)data;
1185 struct ccddevice ccd;
1186 char **cpp;
1187 struct vnode **vpp;
1188 vaddr_t min, max;
1189
1190 if (unit >= numccd)
1191 return (ENXIO);
1192
1193 cs = &ccd_softc[unit];
1194 if (cmd != CCDIOCSET && !(cs->sc_flags & CCDF_INITED))
1195 return (ENXIO);
1196
1197 /* access control */
1198 switch (cmd) {
1199 case CCDIOCSET:
1200 case CCDIOCCLR:
1201 case DIOCWDINFO:
1202 case DIOCSDINFO:
1203 case DIOCWLABEL:
1204 if ((flag & FWRITE) == 0)
1205 return (EBADF);
1206 }
1207
1208 bzero(&ccd, sizeof(ccd));
1209 switch (cmd) {
1210 case CCDIOCSET:
1211 if (cs->sc_flags & CCDF_INITED)
1212 return (EBUSY);
1213
1214 if ((error = ccdlock(cs)) != 0)
1215 return (error);
1216
1217 /* Fill in some important bits. */
1218 ccd.ccd_unit = unit;
1219 ccd.ccd_interleave = ccio->ccio_ileave;
1220 ccd.ccd_flags = ccio->ccio_flags & CCDF_USERMASK;
1221
1222 /* XXX the new code is unstable still */
1223 ccd.ccd_flags |= CCDF_OLD;
1224
1225 /*
1226 * Interleaving which is not a multiple of the click size
1227 * must use the old I/O code (by design)
1228 */
1229 if (ccio->ccio_ileave % (PAGE_SIZE / DEV_BSIZE) != 0)
1230 ccd.ccd_flags |= CCDF_OLD;
1231
1232 /*
1233 * Allocate space for and copy in the array of
1234 * componet pathnames and device numbers.
1235 */
1236 cpp = malloc(ccio->ccio_ndisks * sizeof(char *),
1237 M_DEVBUF, M_WAITOK);
1238 vpp = malloc(ccio->ccio_ndisks * sizeof(struct vnode *),
1239 M_DEVBUF, M_WAITOK);
1240
1241 error = copyin((caddr_t)ccio->ccio_disks, (caddr_t)cpp,
1242 ccio->ccio_ndisks * sizeof(char **));
1243 if (error) {
1244 free(vpp, M_DEVBUF);
1245 free(cpp, M_DEVBUF);
1246 ccdunlock(cs);
1247 return (error);
1248 }
1249
1250 #ifdef DEBUG
1251 if (ccddebug & CCDB_INIT)
1252 for (i = 0; i < ccio->ccio_ndisks; ++i)
1253 printf("ccdioctl: component %d: %p\n",
1254 i, cpp[i]);
1255 #endif
1256
1257 for (i = 0; i < ccio->ccio_ndisks; ++i) {
1258 #ifdef DEBUG
1259 if (ccddebug & CCDB_INIT)
1260 printf("ccdioctl: lookedup = %d\n", lookedup);
1261 #endif
1262 if ((error = ccdlookup(cpp[i], p, &vpp[i])) != 0) {
1263 for (j = 0; j < lookedup; ++j)
1264 (void)vn_close(vpp[j], FREAD|FWRITE,
1265 p->p_ucred, p);
1266 free(vpp, M_DEVBUF);
1267 free(cpp, M_DEVBUF);
1268 ccdunlock(cs);
1269 return (error);
1270 }
1271 ++lookedup;
1272 }
1273 ccd.ccd_cpp = cpp;
1274 ccd.ccd_vpp = vpp;
1275 ccd.ccd_ndev = ccio->ccio_ndisks;
1276
1277 /*
1278 * Initialize the ccd. Fills in the softc for us.
1279 */
1280 if ((error = ccdinit(&ccd, cpp, p)) != 0) {
1281 for (j = 0; j < lookedup; ++j)
1282 (void)vn_close(vpp[j], FREAD|FWRITE,
1283 p->p_ucred, p);
1284 bzero(&ccd_softc[unit], sizeof(struct ccd_softc));
1285 free(vpp, M_DEVBUF);
1286 free(cpp, M_DEVBUF);
1287 ccdunlock(cs);
1288 return (error);
1289 }
1290
1291 /*
1292 * The ccd has been successfully initialized, so
1293 * we can place it into the array. Don't try to
1294 * read the disklabel until the disk has been attached,
1295 * because space for the disklabel is allocated
1296 * in disk_attach();
1297 */
1298 bcopy(&ccd, &ccddevs[unit], sizeof(ccd));
1299 ccio->ccio_unit = unit;
1300 ccio->ccio_size = cs->sc_size;
1301
1302 /*
1303 * If we use the optimized protocol we need some kvm space
1304 * for the component buffers. Allocate it here.
1305 *
1306 * XXX I'd like to have a more dynamic way of acquiring kvm
1307 * XXX space, but that is problematic as we are not allowed
1308 * XXX to lock the kernel_map in interrupt context. It is
1309 * XXX doable via a freelist implementation though.
1310 */
1311 if (!ccdmap && !(ccd.ccd_flags & CCDF_OLD)) {
1312 min = vm_map_min(kernel_map);
1313 ccdmap = uvm_km_suballoc(kernel_map, &min, &max,
1314 CCD_CLUSTERS * MAXBSIZE, VM_MAP_INTRSAFE,
1315 FALSE, NULL);
1316 }
1317
1318 /* Attach the disk. */
1319 cs->sc_dkdev.dk_name = cs->sc_xname;
1320 disk_attach(&cs->sc_dkdev);
1321
1322 /* Try and read the disklabel. */
1323 ccdgetdisklabel(dev, cs, cs->sc_dkdev.dk_label,
1324 cs->sc_dkdev.dk_cpulabel, 0);
1325
1326 ccdunlock(cs);
1327 break;
1328
1329 case CCDIOCCLR:
1330 if ((error = ccdlock(cs)) != 0)
1331 return (error);
1332
1333 /*
1334 * Don't unconfigure if any other partitions are open
1335 * or if both the character and block flavors of this
1336 * partition are open.
1337 */
1338 part = DISKPART(dev);
1339 pmask = (1 << part);
1340 if ((cs->sc_dkdev.dk_openmask & ~pmask) ||
1341 ((cs->sc_dkdev.dk_bopenmask & pmask) &&
1342 (cs->sc_dkdev.dk_copenmask & pmask))) {
1343 ccdunlock(cs);
1344 return (EBUSY);
1345 }
1346
1347 /*
1348 * Free ccd_softc information and clear entry.
1349 */
1350
1351 /* Close the components and free their pathnames. */
1352 for (i = 0; i < cs->sc_nccdisks; ++i) {
1353 /*
1354 * XXX: this close could potentially fail and
1355 * cause Bad Things. Maybe we need to force
1356 * the close to happen?
1357 */
1358 #ifdef DEBUG
1359 if (ccddebug & CCDB_VNODE)
1360 vprint("CCDIOCCLR: vnode info",
1361 cs->sc_cinfo[i].ci_vp);
1362 #endif
1363 (void)vn_close(cs->sc_cinfo[i].ci_vp, FREAD|FWRITE,
1364 p->p_ucred, p);
1365 free(cs->sc_cinfo[i].ci_path, M_DEVBUF);
1366 }
1367
1368 /* Free interleave index. */
1369 for (i = 0; cs->sc_itable[i].ii_ndisk; ++i)
1370 free(cs->sc_itable[i].ii_index, M_DEVBUF);
1371
1372 /* Free component info and interleave table. */
1373 free(cs->sc_cinfo, M_DEVBUF);
1374 free(cs->sc_itable, M_DEVBUF);
1375 cs->sc_flags &= ~CCDF_INITED;
1376
1377 /*
1378 * Free ccddevice information and clear entry.
1379 */
1380 free(ccddevs[unit].ccd_cpp, M_DEVBUF);
1381 free(ccddevs[unit].ccd_vpp, M_DEVBUF);
1382 bcopy(&ccd, &ccddevs[unit], sizeof(ccd));
1383
1384 /* Detatch the disk. */
1385 disk_detach(&cs->sc_dkdev);
1386
1387 /* This must be atomic. */
1388 s = splhigh();
1389 ccdunlock(cs);
1390 bzero(cs, sizeof(struct ccd_softc));
1391 splx(s);
1392 break;
1393
1394 case DIOCGPDINFO: {
1395 struct cpu_disklabel osdep;
1396
1397 if ((error = ccdlock(cs)) != 0)
1398 return (error);
1399
1400 ccdgetdisklabel(dev, cs, (struct disklabel *)data,
1401 &osdep, 1);
1402
1403 ccdunlock(cs);
1404 break;
1405 }
1406
1407 case DIOCGDINFO:
1408 *(struct disklabel *)data = *(cs->sc_dkdev.dk_label);
1409 break;
1410
1411 case DIOCGPART:
1412 ((struct partinfo *)data)->disklab = cs->sc_dkdev.dk_label;
1413 ((struct partinfo *)data)->part =
1414 &cs->sc_dkdev.dk_label->d_partitions[DISKPART(dev)];
1415 break;
1416
1417 case DIOCWDINFO:
1418 case DIOCSDINFO:
1419 if ((error = ccdlock(cs)) != 0)
1420 return (error);
1421
1422 cs->sc_flags |= CCDF_LABELLING;
1423
1424 error = setdisklabel(cs->sc_dkdev.dk_label,
1425 (struct disklabel *)data, 0, cs->sc_dkdev.dk_cpulabel);
1426 if (error == 0) {
1427 if (cmd == DIOCWDINFO)
1428 error = writedisklabel(CCDLABELDEV(dev),
1429 ccdstrategy, cs->sc_dkdev.dk_label,
1430 cs->sc_dkdev.dk_cpulabel);
1431 }
1432
1433 cs->sc_flags &= ~CCDF_LABELLING;
1434
1435 ccdunlock(cs);
1436
1437 if (error)
1438 return (error);
1439 break;
1440
1441 case DIOCWLABEL:
1442 if (*(int *)data != 0)
1443 cs->sc_flags |= CCDF_WLABEL;
1444 else
1445 cs->sc_flags &= ~CCDF_WLABEL;
1446 break;
1447
1448 default:
1449 return (ENOTTY);
1450 }
1451
1452 return (0);
1453 }
1454
1455 int
ccdsize(dev_t dev)1456 ccdsize(dev_t dev)
1457 {
1458 struct ccd_softc *cs;
1459 int part, size, unit;
1460
1461 unit = ccdunit(dev);
1462 if (unit >= numccd)
1463 return (-1);
1464
1465 cs = &ccd_softc[unit];
1466 if ((cs->sc_flags & CCDF_INITED) == 0)
1467 return (-1);
1468
1469 if (ccdopen(dev, 0, S_IFBLK, curproc))
1470 return (-1);
1471
1472 part = DISKPART(dev);
1473 if (cs->sc_dkdev.dk_label->d_partitions[part].p_fstype != FS_SWAP)
1474 size = -1;
1475 else
1476 size = cs->sc_dkdev.dk_label->d_partitions[part].p_size;
1477
1478 if (ccdclose(dev, 0, S_IFBLK, curproc))
1479 return (-1);
1480
1481 return (size);
1482 }
1483
1484 int
ccddump(dev_t dev,daddr_t blkno,caddr_t va,size_t size)1485 ccddump(dev_t dev, daddr_t blkno, caddr_t va, size_t size)
1486 {
1487
1488 /* Not implemented. */
1489 return ENXIO;
1490 }
1491
1492 /*
1493 * Lookup the provided name in the filesystem. If the file exists,
1494 * is a valid block device, and isn't being used by anyone else,
1495 * set *vpp to the file's vnode.
1496 */
1497 int
ccdlookup(char * path,struct proc * p,struct vnode ** vpp)1498 ccdlookup(char *path, struct proc *p, struct vnode **vpp)
1499 {
1500 struct nameidata nd;
1501 struct vnode *vp;
1502 struct vattr va;
1503 int error;
1504
1505 NDINIT(&nd, LOOKUP, FOLLOW, UIO_USERSPACE, path, p);
1506 if ((error = vn_open(&nd, FREAD|FWRITE, 0)) != 0) {
1507 #ifdef DEBUG
1508 if (ccddebug & (CCDB_FOLLOW|CCDB_INIT))
1509 printf("ccdlookup: vn_open error = %d\n", error);
1510 #endif
1511 return (error);
1512 }
1513 vp = nd.ni_vp;
1514
1515 if (vp->v_usecount > 1) {
1516 VOP_UNLOCK(vp, 0, p);
1517 (void)vn_close(vp, FREAD|FWRITE, p->p_ucred, p);
1518 return (EBUSY);
1519 }
1520
1521 if ((error = VOP_GETATTR(vp, &va, p->p_ucred, p)) != 0) {
1522 #ifdef DEBUG
1523 if (ccddebug & (CCDB_FOLLOW|CCDB_INIT))
1524 printf("ccdlookup: getattr error = %d\n", error);
1525 #endif
1526 VOP_UNLOCK(vp, 0, p);
1527 (void)vn_close(vp, FREAD|FWRITE, p->p_ucred, p);
1528 return (error);
1529 }
1530
1531 /* XXX: eventually we should handle VREG, too. */
1532 if (va.va_type != VBLK) {
1533 VOP_UNLOCK(vp, 0, p);
1534 (void)vn_close(vp, FREAD|FWRITE, p->p_ucred, p);
1535 return (ENOTBLK);
1536 }
1537
1538 #ifdef DEBUG
1539 if (ccddebug & CCDB_VNODE)
1540 vprint("ccdlookup: vnode info", vp);
1541 #endif
1542
1543 VOP_UNLOCK(vp, 0, p);
1544 *vpp = vp;
1545 return (0);
1546 }
1547
1548 /*
1549 * Read the disklabel from the ccd. If one is not present, fake one
1550 * up.
1551 */
1552 void
ccdgetdisklabel(dev_t dev,struct ccd_softc * cs,struct disklabel * lp,struct cpu_disklabel * clp,int spoofonly)1553 ccdgetdisklabel(dev_t dev, struct ccd_softc *cs, struct disklabel *lp,
1554 struct cpu_disklabel *clp, int spoofonly)
1555 {
1556 struct ccdgeom *ccg = &cs->sc_geom;
1557 char *errstring;
1558
1559 bzero(lp, sizeof(*lp));
1560 bzero(clp, sizeof(*clp));
1561
1562 lp->d_secperunit = cs->sc_size;
1563 lp->d_secsize = ccg->ccg_secsize;
1564 lp->d_nsectors = ccg->ccg_nsectors;
1565 lp->d_ntracks = ccg->ccg_ntracks;
1566 lp->d_ncylinders = ccg->ccg_ncylinders;
1567 lp->d_secpercyl = lp->d_ntracks * lp->d_nsectors;
1568 lp->d_rpm = ccg->ccg_rpm;
1569
1570 strncpy(lp->d_typename, "ccd", sizeof(lp->d_typename));
1571 lp->d_type = DTYPE_CCD;
1572 strncpy(lp->d_packname, "fictitious", sizeof(lp->d_packname));
1573 lp->d_interleave = 1;
1574 lp->d_flags = 0;
1575
1576 lp->d_partitions[RAW_PART].p_offset = 0;
1577 lp->d_partitions[RAW_PART].p_size = cs->sc_size;
1578 lp->d_partitions[RAW_PART].p_fstype = FS_UNUSED;
1579 lp->d_npartitions = RAW_PART + 1;
1580
1581 lp->d_magic = DISKMAGIC;
1582 lp->d_magic2 = DISKMAGIC;
1583 lp->d_checksum = dkcksum(cs->sc_dkdev.dk_label);
1584
1585 /*
1586 * Call the generic disklabel extraction routine.
1587 */
1588 errstring = readdisklabel(CCDLABELDEV(dev), ccdstrategy,
1589 cs->sc_dkdev.dk_label, cs->sc_dkdev.dk_cpulabel, spoofonly);
1590 if (errstring)
1591 ccdmakedisklabel(cs);
1592
1593 #ifdef DEBUG
1594 /* It's actually extremely common to have unlabeled ccds. */
1595 if (ccddebug & CCDB_LABEL)
1596 if (errstring != NULL)
1597 printf("%s: %s\n", cs->sc_xname, errstring);
1598 #endif
1599 }
1600
1601 /*
1602 * Take care of things one might want to take care of in the event
1603 * that a disklabel isn't present.
1604 */
1605 void
ccdmakedisklabel(struct ccd_softc * cs)1606 ccdmakedisklabel(struct ccd_softc *cs)
1607 {
1608 struct disklabel *lp = cs->sc_dkdev.dk_label;
1609
1610 /*
1611 * For historical reasons, if there's no disklabel present
1612 * the raw partition must be marked FS_BSDFFS.
1613 */
1614 lp->d_partitions[RAW_PART].p_fstype = FS_BSDFFS;
1615
1616 strncpy(lp->d_packname, "default label", sizeof(lp->d_packname));
1617 }
1618
1619 /*
1620 * Wait interruptibly for an exclusive lock.
1621 *
1622 * XXX
1623 * Several drivers do this; it should be abstracted and made MP-safe.
1624 */
1625 int
ccdlock(struct ccd_softc * cs)1626 ccdlock(struct ccd_softc *cs)
1627 {
1628 int error;
1629
1630 while ((cs->sc_flags & CCDF_LOCKED) != 0) {
1631 cs->sc_flags |= CCDF_WANTED;
1632 if ((error = tsleep(cs, PRIBIO | PCATCH, "ccdlck", 0)) != 0)
1633 return (error);
1634 }
1635 cs->sc_flags |= CCDF_LOCKED;
1636 return (0);
1637 }
1638
1639 /*
1640 * Unlock and wake up any waiters.
1641 */
1642 void
ccdunlock(struct ccd_softc * cs)1643 ccdunlock(struct ccd_softc *cs)
1644 {
1645
1646 cs->sc_flags &= ~CCDF_LOCKED;
1647 if ((cs->sc_flags & CCDF_WANTED) != 0) {
1648 cs->sc_flags &= ~CCDF_WANTED;
1649 wakeup(cs);
1650 }
1651 }
1652
1653 #ifdef DEBUG
1654 void
printiinfo(struct ccdiinfo * ii)1655 printiinfo(struct ccdiinfo *ii)
1656 {
1657 int ix, i;
1658
1659 for (ix = 0; ii->ii_ndisk; ix++, ii++) {
1660 printf(" itab[%d]: #dk %d sblk %d soff %d",
1661 ix, ii->ii_ndisk, ii->ii_startblk, ii->ii_startoff);
1662 for (i = 0; i < ii->ii_ndisk; i++)
1663 printf(" %d", ii->ii_index[i]);
1664 printf("\n");
1665 }
1666 }
1667 #endif
1668