1 /*-
2  * Copyright (c) 2002 Poul-Henning Kamp
3  * Copyright (c) 2002 Networks Associates Technology, Inc.
4  * All rights reserved.
5  *
6  * This software was developed for the FreeBSD Project by Poul-Henning Kamp
7  * and NAI Labs, the Security Research Division of Network Associates, Inc.
8  * under DARPA/SPAWAR contract N66001-01-C-8035 ("CBOSS"), as part of the
9  * DARPA CHATS research program.
10  *
11  * Redistribution and use in source and binary forms, with or without
12  * modification, are permitted provided that the following conditions
13  * are met:
14  * 1. Redistributions of source code must retain the above copyright
15  *    notice, this list of conditions and the following disclaimer.
16  * 2. Redistributions in binary form must reproduce the above copyright
17  *    notice, this list of conditions and the following disclaimer in the
18  *    documentation and/or other materials provided with the distribution.
19  *
20  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
21  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
22  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
23  * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
24  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
25  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
26  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
27  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
28  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
29  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
30  * SUCH DAMAGE.
31  *
32  * $FreeBSD: stable/9/sys/geom/bde/g_bde_work.c 172836 2007-10-20 23:23:23Z julian $
33  */
34 /*
35  * This source file contains the state-engine which makes things happen in the
36  * right order.
37  *
38  * Outline:
39  *   1) g_bde_start1()
40  *	Break the struct bio into multiple work packets one per zone.
41  *   2) g_bde_start2()
42  *	Setup the necessary sector buffers and start those read operations
43  *	which we can start at this time and put the item on the work-list.
44  *   3) g_bde_worker()
45  *	Scan the work-list for items which are ready for crypto processing
46  *	and call the matching crypto function in g_bde_crypt.c and schedule
47  *	any writes needed.  Read operations finish here by releasing the
48  *	sector buffers and delivering the original bio request.
49  *   4) g_bde_write_done()
50  *	Release sector buffers and deliver the original bio request.
51  *
52  * Because of the C-scope rules, the functions are almost perfectly in the
53  * opposite order in this source file.
54  *
55  * XXX: A switch to the hardware assisted crypto in src/sys/opencrypto will add
56  * XXX: additional states to this state-engine.  Since no hardware available
57  * XXX: at this time has AES support, implementing this has been postponed
58  * XXX: until such time as it would result in a benefit.
59  */
60 
61 #include <sys/param.h>
62 #include <sys/bio.h>
63 #include <sys/lock.h>
64 #include <sys/mutex.h>
65 #include <sys/queue.h>
66 #include <sys/malloc.h>
67 #include <sys/systm.h>
68 #include <sys/kernel.h>
69 #include <sys/sysctl.h>
70 #include <sys/proc.h>
71 #include <sys/kthread.h>
72 
73 #include <crypto/rijndael/rijndael-api-fst.h>
74 #include <crypto/sha2/sha2.h>
75 #include <geom/geom.h>
76 #include <geom/bde/g_bde.h>
77 
78 static void g_bde_delete_sector(struct g_bde_softc *wp, struct g_bde_sector *sp);
79 static struct g_bde_sector * g_bde_new_sector(struct g_bde_work *wp, u_int len);
80 static void g_bde_release_keysector(struct g_bde_work *wp);
81 static struct g_bde_sector *g_bde_get_keysector(struct g_bde_work *wp);
82 static int g_bde_start_read(struct g_bde_sector *sp);
83 static void g_bde_purge_sector(struct g_bde_softc *sc, int fraction);
84 
85 /*
86  * Work item allocation.
87  *
88  * C++ would call these constructors and destructors.
89  */
90 static u_int g_bde_nwork;
91 SYSCTL_UINT(_debug, OID_AUTO, gbde_nwork, CTLFLAG_RD, &g_bde_nwork, 0, "");
92 
93 static MALLOC_DEFINE(M_GBDE, "gbde", "GBDE data structures");
94 
95 static struct g_bde_work *
g_bde_new_work(struct g_bde_softc * sc)96 g_bde_new_work(struct g_bde_softc *sc)
97 {
98 	struct g_bde_work *wp;
99 
100 	wp = malloc(sizeof *wp, M_GBDE, M_NOWAIT | M_ZERO);
101 	if (wp == NULL)
102 		return (wp);
103 	wp->state = SETUP;
104 	wp->softc = sc;
105 	g_bde_nwork++;
106 	sc->nwork++;
107 	TAILQ_INSERT_TAIL(&sc->worklist, wp, list);
108 	return (wp);
109 }
110 
111 static void
g_bde_delete_work(struct g_bde_work * wp)112 g_bde_delete_work(struct g_bde_work *wp)
113 {
114 	struct g_bde_softc *sc;
115 
116 	sc = wp->softc;
117 	g_bde_nwork--;
118 	sc->nwork--;
119 	TAILQ_REMOVE(&sc->worklist, wp, list);
120 	free(wp, M_GBDE);
121 }
122 
123 /*
124  * Sector buffer allocation
125  *
126  * These two functions allocate and free back variable sized sector buffers
127  */
128 
129 static u_int g_bde_nsect;
130 SYSCTL_UINT(_debug, OID_AUTO, gbde_nsect, CTLFLAG_RD, &g_bde_nsect, 0, "");
131 
132 static void
g_bde_delete_sector(struct g_bde_softc * sc,struct g_bde_sector * sp)133 g_bde_delete_sector(struct g_bde_softc *sc, struct g_bde_sector *sp)
134 {
135 
136 	g_bde_nsect--;
137 	sc->nsect--;
138 	if (sp->malloc)
139 		free(sp->data, M_GBDE);
140 	free(sp, M_GBDE);
141 }
142 
143 static struct g_bde_sector *
g_bde_new_sector(struct g_bde_work * wp,u_int len)144 g_bde_new_sector(struct g_bde_work *wp, u_int len)
145 {
146 	struct g_bde_sector *sp;
147 
148 	sp = malloc(sizeof *sp, M_GBDE, M_NOWAIT | M_ZERO);
149 	if (sp == NULL)
150 		return (sp);
151 	if (len > 0) {
152 		sp->data = malloc(len, M_GBDE, M_NOWAIT | M_ZERO);
153 		if (sp->data == NULL) {
154 			free(sp, M_GBDE);
155 			return (NULL);
156 		}
157 		sp->malloc = 1;
158 	}
159 	g_bde_nsect++;
160 	wp->softc->nsect++;
161 	sp->size = len;
162 	sp->softc = wp->softc;
163 	sp->ref = 1;
164 	sp->owner = wp;
165 	sp->offset = wp->so;
166 	sp->state = JUNK;
167 	return (sp);
168 }
169 
170 /*
171  * Skey sector cache.
172  *
173  * Nothing prevents two separate I/O requests from addressing the same zone
174  * and thereby needing the same skey sector.  We therefore need to sequence
175  * I/O operations to the skey sectors.  A certain amount of caching is also
176  * desirable, although the extent of benefit from this is not at this point
177  * determined.
178  *
179  * XXX: GEOM may be able to grow a generic caching facility at some point
180  * XXX: to support such needs.
181  */
182 
183 static u_int g_bde_ncache;
184 SYSCTL_UINT(_debug, OID_AUTO, gbde_ncache, CTLFLAG_RD, &g_bde_ncache, 0, "");
185 
186 static void
g_bde_purge_one_sector(struct g_bde_softc * sc,struct g_bde_sector * sp)187 g_bde_purge_one_sector(struct g_bde_softc *sc, struct g_bde_sector *sp)
188 {
189 
190 	g_trace(G_T_TOPOLOGY, "g_bde_purge_one_sector(%p, %p)", sc, sp);
191 	if (sp->ref != 0)
192 		return;
193 	TAILQ_REMOVE(&sc->freelist, sp, list);
194 	g_bde_ncache--;
195 	sc->ncache--;
196 	bzero(sp->data, sp->size);
197 	g_bde_delete_sector(sc, sp);
198 }
199 
200 static struct g_bde_sector *
g_bde_get_keysector(struct g_bde_work * wp)201 g_bde_get_keysector(struct g_bde_work *wp)
202 {
203 	struct g_bde_sector *sp;
204 	struct g_bde_softc *sc;
205 	off_t offset;
206 
207 	offset = wp->kso;
208 	g_trace(G_T_TOPOLOGY, "g_bde_get_keysector(%p, %jd)", wp, (intmax_t)offset);
209 	sc = wp->softc;
210 
211 	if (malloc_last_fail() < g_bde_ncache)
212 		g_bde_purge_sector(sc, -1);
213 
214 	sp = TAILQ_FIRST(&sc->freelist);
215 	if (sp != NULL && sp->ref == 0 && sp->used + 300 < time_uptime)
216 		g_bde_purge_one_sector(sc, sp);
217 
218 	TAILQ_FOREACH(sp, &sc->freelist, list) {
219 		if (sp->offset == offset)
220 			break;
221 	}
222 	if (sp != NULL) {
223 		sp->ref++;
224 		KASSERT(sp->offset == offset, ("wrong offset"));
225 		KASSERT(sp->softc == wp->softc, ("wrong softc"));
226 		if (sp->ref == 1)
227 			sp->owner = wp;
228 	} else {
229 		if (malloc_last_fail() < g_bde_ncache) {
230 			TAILQ_FOREACH(sp, &sc->freelist, list)
231 				if (sp->ref == 0)
232 					break;
233 		}
234 		if (sp == NULL && !TAILQ_EMPTY(&sc->freelist))
235 			sp = TAILQ_FIRST(&sc->freelist);
236 		if (sp != NULL && sp->ref > 0)
237 			sp = NULL;
238 		if (sp == NULL) {
239 			sp = g_bde_new_sector(wp, sc->sectorsize);
240 			if (sp != NULL) {
241 				g_bde_ncache++;
242 				sc->ncache++;
243 				TAILQ_INSERT_TAIL(&sc->freelist, sp, list);
244 				sp->malloc = 2;
245 			}
246 		}
247 		if (sp != NULL) {
248 			sp->offset = offset;
249 			sp->softc = wp->softc;
250 			sp->ref = 1;
251 			sp->owner = wp;
252 			sp->state = JUNK;
253 			sp->error = 0;
254 		}
255 	}
256 	if (sp != NULL) {
257 		TAILQ_REMOVE(&sc->freelist, sp, list);
258 		TAILQ_INSERT_TAIL(&sc->freelist, sp, list);
259 		sp->used = time_uptime;
260 	}
261 	wp->ksp = sp;
262 	return(sp);
263 }
264 
265 static void
g_bde_release_keysector(struct g_bde_work * wp)266 g_bde_release_keysector(struct g_bde_work *wp)
267 {
268 	struct g_bde_softc *sc;
269 	struct g_bde_work *wp2;
270 	struct g_bde_sector *sp;
271 
272 	sp = wp->ksp;
273 	g_trace(G_T_TOPOLOGY, "g_bde_release_keysector(%p)", sp);
274 	KASSERT(sp->malloc == 2, ("Wrong sector released"));
275 	sc = sp->softc;
276 	KASSERT(sc != NULL, ("NULL sp->softc"));
277 	KASSERT(wp == sp->owner, ("Releasing, not owner"));
278 	sp->owner = NULL;
279 	wp->ksp = NULL;
280 	sp->ref--;
281 	if (sp->ref > 0) {
282 		TAILQ_REMOVE(&sc->freelist, sp, list);
283 		TAILQ_INSERT_TAIL(&sc->freelist, sp, list);
284 		TAILQ_FOREACH(wp2, &sc->worklist, list) {
285 			if (wp2->ksp == sp) {
286 				KASSERT(wp2 != wp, ("Self-reowning"));
287 				sp->owner = wp2;
288 				wakeup(sp->softc);
289 				break;
290 			}
291 		}
292 		KASSERT(wp2 != NULL, ("Failed to pick up owner for %p\n", sp));
293 	} else if (sp->error != 0) {
294 		sp->offset = ~0;
295 		sp->error = 0;
296 		sp->state = JUNK;
297 	}
298 	TAILQ_REMOVE(&sc->freelist, sp, list);
299 	TAILQ_INSERT_HEAD(&sc->freelist, sp, list);
300 }
301 
302 static void
g_bde_purge_sector(struct g_bde_softc * sc,int fraction)303 g_bde_purge_sector(struct g_bde_softc *sc, int fraction)
304 {
305 	struct g_bde_sector *sp;
306 	int n;
307 
308 	g_trace(G_T_TOPOLOGY, "g_bde_purge_sector(%p)", sc);
309 	if (fraction > 0)
310 		n = sc->ncache / fraction + 1;
311 	else
312 		n = g_bde_ncache - malloc_last_fail();
313 	if (n < 0)
314 		return;
315 	if (n > sc->ncache)
316 		n = sc->ncache;
317 	while(n--) {
318 		TAILQ_FOREACH(sp, &sc->freelist, list) {
319 			if (sp->ref != 0)
320 				continue;
321 			TAILQ_REMOVE(&sc->freelist, sp, list);
322 			g_bde_ncache--;
323 			sc->ncache--;
324 			bzero(sp->data, sp->size);
325 			g_bde_delete_sector(sc, sp);
326 			break;
327 		}
328 	}
329 }
330 
331 static struct g_bde_sector *
g_bde_read_keysector(struct g_bde_softc * sc,struct g_bde_work * wp)332 g_bde_read_keysector(struct g_bde_softc *sc, struct g_bde_work *wp)
333 {
334 	struct g_bde_sector *sp;
335 
336 	g_trace(G_T_TOPOLOGY, "g_bde_read_keysector(%p)", wp);
337 	sp = g_bde_get_keysector(wp);
338 	if (sp == NULL) {
339 		g_bde_purge_sector(sc, -1);
340 		sp = g_bde_get_keysector(wp);
341 	}
342 	if (sp == NULL)
343 		return (sp);
344 	if (sp->owner != wp)
345 		return (sp);
346 	if (sp->state == VALID)
347 		return (sp);
348 	if (g_bde_start_read(sp) == 0)
349 		return (sp);
350 	g_bde_release_keysector(wp);
351 	return (NULL);
352 }
353 
354 /*
355  * Contribute to the completion of the original bio request.
356  *
357  * We have no simple way to tell how many bits the original bio request has
358  * been segmented into, so the easiest way to determine when we can deliver
359  * it is to keep track of the number of bytes we have completed.  We keep
360  * track of any errors underway and latch onto the first one.
361  *
362  * We always report "nothing done" in case of error, because random bits here
363  * and there may be completed and returning a number of completed bytes does
364  * not convey any useful information about which bytes they were.  If some
365  * piece of broken code somewhere interprets this to mean that nothing has
366  * changed on the underlying media they deserve the lossage headed for them.
367  *
368  * A single mutex per g_bde instance is used to prevent contention.
369  */
370 
371 static void
g_bde_contribute(struct bio * bp,off_t bytes,int error)372 g_bde_contribute(struct bio *bp, off_t bytes, int error)
373 {
374 
375 	g_trace(G_T_TOPOLOGY, "g_bde_contribute bp %p bytes %jd error %d",
376 	     bp, (intmax_t)bytes, error);
377 	if (bp->bio_error == 0)
378 		bp->bio_error = error;
379 	bp->bio_completed += bytes;
380 	KASSERT(bp->bio_completed <= bp->bio_length, ("Too large contribution"));
381 	if (bp->bio_completed == bp->bio_length) {
382 		if (bp->bio_error != 0)
383 			bp->bio_completed = 0;
384 		g_io_deliver(bp, bp->bio_error);
385 	}
386 }
387 
388 /*
389  * This is the common case "we're done with this work package" function
390  */
391 
392 static void
g_bde_work_done(struct g_bde_work * wp,int error)393 g_bde_work_done(struct g_bde_work *wp, int error)
394 {
395 
396 	g_bde_contribute(wp->bp, wp->length, error);
397 	if (wp->sp != NULL)
398 		g_bde_delete_sector(wp->softc, wp->sp);
399 	if (wp->ksp != NULL)
400 		g_bde_release_keysector(wp);
401 	g_bde_delete_work(wp);
402 }
403 
404 /*
405  * A write operation has finished.  When we have all expected cows in the
406  * barn close the door and call it a day.
407  */
408 
409 static void
g_bde_write_done(struct bio * bp)410 g_bde_write_done(struct bio *bp)
411 {
412 	struct g_bde_sector *sp;
413 	struct g_bde_work *wp;
414 	struct g_bde_softc *sc;
415 
416 	sp = bp->bio_caller1;
417 	sc = bp->bio_caller2;
418 	mtx_lock(&sc->worklist_mutex);
419 	KASSERT(sp != NULL, ("NULL sp"));
420 	KASSERT(sc != NULL, ("NULL sc"));
421 	KASSERT(sp->owner != NULL, ("NULL sp->owner"));
422 	g_trace(G_T_TOPOLOGY, "g_bde_write_done(%p)", sp);
423 	if (bp->bio_error == 0 && bp->bio_completed != sp->size)
424 		bp->bio_error = EIO;
425 	sp->error = bp->bio_error;
426 	g_destroy_bio(bp);
427 	wp = sp->owner;
428 	if (wp->error == 0)
429 		wp->error = sp->error;
430 
431 	if (wp->bp->bio_cmd == BIO_DELETE) {
432 		KASSERT(sp == wp->sp, ("trashed delete op"));
433 		g_bde_work_done(wp, wp->error);
434 		mtx_unlock(&sc->worklist_mutex);
435 		return;
436 	}
437 
438 	KASSERT(wp->bp->bio_cmd == BIO_WRITE, ("Confused in g_bde_write_done()"));
439 	KASSERT(sp == wp->sp || sp == wp->ksp, ("trashed write op"));
440 	if (wp->sp == sp) {
441 		g_bde_delete_sector(sc, wp->sp);
442 		wp->sp = NULL;
443 	} else {
444 		sp->state = VALID;
445 	}
446 	if (wp->sp == NULL && wp->ksp != NULL && wp->ksp->state == VALID)
447 		g_bde_work_done(wp, wp->error);
448 	mtx_unlock(&sc->worklist_mutex);
449 	return;
450 }
451 
452 /*
453  * Send a write request for the given sector down the pipeline.
454  */
455 
456 static int
g_bde_start_write(struct g_bde_sector * sp)457 g_bde_start_write(struct g_bde_sector *sp)
458 {
459 	struct bio *bp;
460 	struct g_bde_softc *sc;
461 
462 	g_trace(G_T_TOPOLOGY, "g_bde_start_write(%p)", sp);
463 	sc = sp->softc;
464 	KASSERT(sc != NULL, ("NULL sc in g_bde_start_write"));
465 	KASSERT(sp->owner != NULL, ("NULL sp->owner in g_bde_start_write"));
466 	bp = g_new_bio();
467 	if (bp == NULL)
468 		return (ENOMEM);
469 	bp->bio_cmd = BIO_WRITE;
470 	bp->bio_offset = sp->offset;
471 	bp->bio_data = sp->data;
472 	bp->bio_length = sp->size;
473 	bp->bio_done = g_bde_write_done;
474 	bp->bio_caller1 = sp;
475 	bp->bio_caller2 = sc;
476 	sp->state = IO;
477 	g_io_request(bp, sc->consumer);
478 	return(0);
479 }
480 
481 /*
482  * A read operation has finished.  Mark the sector no longer iobusy and
483  * wake up the worker thread and let it do its thing.
484  */
485 
486 static void
g_bde_read_done(struct bio * bp)487 g_bde_read_done(struct bio *bp)
488 {
489 	struct g_bde_sector *sp;
490 	struct g_bde_softc *sc;
491 
492 	sp = bp->bio_caller1;
493 	g_trace(G_T_TOPOLOGY, "g_bde_read_done(%p)", sp);
494 	sc = bp->bio_caller2;
495 	mtx_lock(&sc->worklist_mutex);
496 	if (bp->bio_error == 0 && bp->bio_completed != sp->size)
497 		bp->bio_error = EIO;
498 	sp->error = bp->bio_error;
499 	if (sp->error == 0)
500 		sp->state = VALID;
501 	else
502 		sp->state = JUNK;
503 	wakeup(sc);
504 	g_destroy_bio(bp);
505 	mtx_unlock(&sc->worklist_mutex);
506 }
507 
508 /*
509  * Send a read request for the given sector down the pipeline.
510  */
511 
512 static int
g_bde_start_read(struct g_bde_sector * sp)513 g_bde_start_read(struct g_bde_sector *sp)
514 {
515 	struct bio *bp;
516 	struct g_bde_softc *sc;
517 
518 	g_trace(G_T_TOPOLOGY, "g_bde_start_read(%p)", sp);
519 	sc = sp->softc;
520 	KASSERT(sc != NULL, ("Null softc in sp %p", sp));
521 	bp = g_new_bio();
522 	if (bp == NULL)
523 		return (ENOMEM);
524 	bp->bio_cmd = BIO_READ;
525 	bp->bio_offset = sp->offset;
526 	bp->bio_data = sp->data;
527 	bp->bio_length = sp->size;
528 	bp->bio_done = g_bde_read_done;
529 	bp->bio_caller1 = sp;
530 	bp->bio_caller2 = sc;
531 	sp->state = IO;
532 	g_io_request(bp, sc->consumer);
533 	return(0);
534 }
535 
536 /*
537  * The worker thread.
538  *
539  * The up/down path of GEOM is not allowed to sleep or do any major work
540  * so we use this thread to do the actual crypto operations and to push
541  * the state engine onwards.
542  *
543  * XXX: if we switch to the src/sys/opencrypt hardware assisted encryption
544  * XXX: using a thread here is probably not needed.
545  */
546 
547 void
g_bde_worker(void * arg)548 g_bde_worker(void *arg)
549 {
550 	struct g_bde_softc *sc;
551 	struct g_bde_work *wp, *twp;
552 	struct g_geom *gp;
553 	int restart, error;
554 
555 	gp = arg;
556 	sc = gp->softc;
557 
558 	mtx_lock(&sc->worklist_mutex);
559 	for (;;) {
560 		restart = 0;
561 		g_trace(G_T_TOPOLOGY, "g_bde_worker scan");
562 		TAILQ_FOREACH_SAFE(wp, &sc->worklist, list, twp) {
563 			KASSERT(wp != NULL, ("NULL wp"));
564 			KASSERT(wp->softc != NULL, ("NULL wp->softc"));
565 			if (wp->state != WAIT)
566 				continue;	/* Not interesting here */
567 
568 			KASSERT(wp->bp != NULL, ("NULL wp->bp"));
569 			KASSERT(wp->sp != NULL, ("NULL wp->sp"));
570 
571 			if (wp->ksp != NULL) {
572 				if (wp->ksp->owner != wp)
573 					continue;
574 				if (wp->ksp->state == IO)
575 					continue;
576 				KASSERT(wp->ksp->state == VALID,
577 				    ("Illegal sector state (%d)",
578 				    wp->ksp->state));
579 			}
580 
581 			if (wp->bp->bio_cmd == BIO_READ && wp->sp->state == IO)
582 				continue;
583 
584 			if (wp->ksp != NULL && wp->ksp->error != 0) {
585 				g_bde_work_done(wp, wp->ksp->error);
586 				continue;
587 			}
588 			switch(wp->bp->bio_cmd) {
589 			case BIO_READ:
590 				if (wp->ksp == NULL) {
591 					KASSERT(wp->error != 0,
592 					    ("BIO_READ, no ksp and no error"));
593 					g_bde_work_done(wp, wp->error);
594 					break;
595 				}
596 				if (wp->sp->error != 0) {
597 					g_bde_work_done(wp, wp->sp->error);
598 					break;
599 				}
600 				mtx_unlock(&sc->worklist_mutex);
601 				g_bde_crypt_read(wp);
602 				mtx_lock(&sc->worklist_mutex);
603 				restart++;
604 				g_bde_work_done(wp, wp->sp->error);
605 				break;
606 			case BIO_WRITE:
607 				wp->state = FINISH;
608 				KASSERT(wp->sp->owner == wp,
609 				    ("Write not owner sp"));
610 				KASSERT(wp->ksp->owner == wp,
611 				    ("Write not owner ksp"));
612 				mtx_unlock(&sc->worklist_mutex);
613 				g_bde_crypt_write(wp);
614 				mtx_lock(&sc->worklist_mutex);
615 				restart++;
616 				error = g_bde_start_write(wp->sp);
617 				if (error) {
618 					g_bde_work_done(wp, error);
619 					break;
620 				}
621 				error = g_bde_start_write(wp->ksp);
622 				if (wp->error != 0)
623 					wp->error = error;
624 				break;
625 			case BIO_DELETE:
626 				wp->state = FINISH;
627 				mtx_unlock(&sc->worklist_mutex);
628 				g_bde_crypt_delete(wp);
629 				mtx_lock(&sc->worklist_mutex);
630 				restart++;
631 				g_bde_start_write(wp->sp);
632 				break;
633 			}
634 			if (restart)
635 				break;
636 		}
637 		if (!restart) {
638 			/*
639 			 * We don't look for our death-warrant until we are
640 			 * idle.  Shouldn't make a difference in practice.
641 			 */
642 			if (sc->dead)
643 				break;
644 			g_trace(G_T_TOPOLOGY, "g_bde_worker sleep");
645 			error = msleep(sc, &sc->worklist_mutex,
646 			    PRIBIO, "-", hz);
647 			if (error == EWOULDBLOCK) {
648 				/*
649 				 * Lose our skey cache in an orderly fashion.
650 				 * The exact rate can be tuned to be less
651 				 * aggressive if this is desirable.  10% per
652 				 * second means that the cache is gone in a
653 				 * few minutes.
654 				 */
655 				g_bde_purge_sector(sc, 10);
656 			}
657 		}
658 	}
659 	g_trace(G_T_TOPOLOGY, "g_bde_worker die");
660 	g_bde_purge_sector(sc, 1);
661 	KASSERT(sc->nwork == 0, ("Dead but %d work remaining", sc->nwork));
662 	KASSERT(sc->ncache == 0, ("Dead but %d cache remaining", sc->ncache));
663 	KASSERT(sc->nsect == 0, ("Dead but %d sect remaining", sc->nsect));
664 	mtx_unlock(&sc->worklist_mutex);
665 	sc->dead = 2;
666 	wakeup(sc);
667 	kproc_exit(0);
668 }
669 
670 /*
671  * g_bde_start1 has chopped the incoming request up so all the requests
672  * we see here are inside a single zone.  Map the data and key locations
673  * grab the buffers we need and fire off the first volley of read requests.
674  */
675 
676 static void
g_bde_start2(struct g_bde_work * wp)677 g_bde_start2(struct g_bde_work *wp)
678 {
679 	struct g_bde_softc *sc;
680 
681 	KASSERT(wp != NULL, ("NULL wp in g_bde_start2"));
682 	KASSERT(wp->softc != NULL, ("NULL wp->softc"));
683 	g_trace(G_T_TOPOLOGY, "g_bde_start2(%p)", wp);
684 	sc = wp->softc;
685 	switch (wp->bp->bio_cmd) {
686 	case BIO_READ:
687 		wp->sp = g_bde_new_sector(wp, 0);
688 		if (wp->sp == NULL) {
689 			g_bde_work_done(wp, ENOMEM);
690 			return;
691 		}
692 		wp->sp->size = wp->length;
693 		wp->sp->data = wp->data;
694 		if (g_bde_start_read(wp->sp) != 0) {
695 			g_bde_work_done(wp, ENOMEM);
696 			return;
697 		}
698 		g_bde_read_keysector(sc, wp);
699 		if (wp->ksp == NULL)
700 			wp->error = ENOMEM;
701 		break;
702 	case BIO_DELETE:
703 		wp->sp = g_bde_new_sector(wp, wp->length);
704 		if (wp->sp == NULL) {
705 			g_bde_work_done(wp, ENOMEM);
706 			return;
707 		}
708 		break;
709 	case BIO_WRITE:
710 		wp->sp = g_bde_new_sector(wp, wp->length);
711 		if (wp->sp == NULL) {
712 			g_bde_work_done(wp, ENOMEM);
713 			return;
714 		}
715 		g_bde_read_keysector(sc, wp);
716 		if (wp->ksp == NULL) {
717 			g_bde_work_done(wp, ENOMEM);
718 			return;
719 		}
720 		break;
721 	default:
722 		KASSERT(0 == 1,
723 		    ("Wrong bio_cmd %d in g_bde_start2", wp->bp->bio_cmd));
724 	}
725 
726 	wp->state = WAIT;
727 	wakeup(sc);
728 }
729 
730 /*
731  * Create a sequence of work structures, and have g_bde_map_sector() determine
732  * how long they each can be.  Feed them to g_bde_start2().
733  */
734 
735 void
g_bde_start1(struct bio * bp)736 g_bde_start1(struct bio *bp)
737 {
738 	struct g_bde_softc *sc;
739 	struct g_bde_work *wp;
740 	off_t done;
741 
742 	sc = bp->bio_to->geom->softc;
743 	bp->bio_driver1 = sc;
744 
745 	mtx_lock(&sc->worklist_mutex);
746 	for(done = 0; done < bp->bio_length; ) {
747 		wp = g_bde_new_work(sc);
748 		if (wp != NULL) {
749 			wp->bp = bp;
750 			wp->offset = bp->bio_offset + done;
751 			wp->data = bp->bio_data + done;
752 			wp->length = bp->bio_length - done;
753 			g_bde_map_sector(wp);
754 			done += wp->length;
755 			g_bde_start2(wp);
756 		}
757 		if (wp == NULL || bp->bio_error != 0) {
758 			g_bde_contribute(bp, bp->bio_length - done, ENOMEM);
759 			break;
760 		}
761 	}
762 	mtx_unlock(&sc->worklist_mutex);
763 	return;
764 }
765