1 /* $NetBSD: udf_strat_sequential.c,v 1.20 2023/06/27 09:58:50 reinoud Exp $ */
2 
3 /*
4  * Copyright (c) 2006, 2008 Reinoud Zandijk
5  * All rights reserved.
6  *
7  * Redistribution and use in source and binary forms, with or without
8  * modification, are permitted provided that the following conditions
9  * are met:
10  * 1. Redistributions of source code must retain the above copyright
11  *    notice, this list of conditions and the following disclaimer.
12  * 2. Redistributions in binary form must reproduce the above copyright
13  *    notice, this list of conditions and the following disclaimer in the
14  *    documentation and/or other materials provided with the distribution.
15  *
16  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
17  * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
18  * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
19  * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
20  * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
21  * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
22  * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
23  * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
24  * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
25  * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
26  *
27  */
28 
29 #include <sys/cdefs.h>
30 #ifndef lint
31 __KERNEL_RCSID(0, "$NetBSD: udf_strat_sequential.c,v 1.20 2023/06/27 09:58:50 reinoud Exp $");
32 #endif /* not lint */
33 
34 
35 #if defined(_KERNEL_OPT)
36 #include "opt_compat_netbsd.h"
37 #endif
38 
39 #include <sys/param.h>
40 #include <sys/systm.h>
41 #include <sys/sysctl.h>
42 #include <sys/namei.h>
43 #include <sys/proc.h>
44 #include <sys/kernel.h>
45 #include <sys/vnode.h>
46 #include <miscfs/genfs/genfs_node.h>
47 #include <sys/mount.h>
48 #include <sys/buf.h>
49 #include <sys/file.h>
50 #include <sys/device.h>
51 #include <sys/disklabel.h>
52 #include <sys/ioctl.h>
53 #include <sys/malloc.h>
54 #include <sys/dirent.h>
55 #include <sys/stat.h>
56 #include <sys/conf.h>
57 #include <sys/kauth.h>
58 #include <sys/kthread.h>
59 #include <dev/clock_subr.h>
60 
61 #include <fs/udf/ecma167-udf.h>
62 #include <fs/udf/udf_mount.h>
63 
64 #include "udf.h"
65 #include "udf_subr.h"
66 #include "udf_bswap.h"
67 
68 
69 #define VTOI(vnode) ((struct udf_node *) vnode->v_data)
70 #define PRIV(ump) ((struct strat_private *) ump->strategy_private)
71 
72 /* --------------------------------------------------------------------- */
73 
74 /* BUFQ's */
75 #define UDF_SHED_MAX 3
76 
77 #define UDF_SHED_READING      0
78 #define UDF_SHED_WRITING      1
79 #define UDF_SHED_SEQWRITING   2
80 
81 struct strat_private {
82           struct pool                    desc_pool;                   /* node descriptors */
83 
84           lwp_t                         *queue_lwp;
85           kcondvar_t                     discstrat_cv;                /* to wait on       */
86           kmutex_t             discstrat_mutex;   /* disc strategy    */
87 
88           int                            thread_running;    /* thread control */
89           int                            run_thread;                  /* thread control */
90           int                            thread_finished;   /* thread control */
91 
92           int                            sync_req;                    /* thread control */
93           int                            cur_queue;
94 
95           struct disk_strategy           old_strategy_setting;
96           struct bufq_state   *queues[UDF_SHED_MAX];
97           struct timespec                last_queued[UDF_SHED_MAX];
98 };
99 
100 
101 /* --------------------------------------------------------------------- */
102 
103 static void
udf_wr_nodedscr_callback(struct buf * buf)104 udf_wr_nodedscr_callback(struct buf *buf)
105 {
106           struct udf_node *udf_node;
107 
108           KASSERT(buf);
109           KASSERT(buf->b_data);
110 
111           /* called when write action is done */
112           DPRINTF(WRITE, ("udf_wr_nodedscr_callback(): node written out\n"));
113 
114           udf_node = VTOI(buf->b_vp);
115           if (udf_node == NULL) {
116                     putiobuf(buf);
117                     printf("udf_wr_node_callback: NULL node?\n");
118                     return;
119           }
120 
121           /* XXX right flags to mark dirty again on error? */
122           if (buf->b_error) {
123                     udf_node->i_flags |= IN_MODIFIED | IN_ACCESSED;
124                     /* XXX TODO reschedule on error */
125           }
126 
127           /* decrement outstanding_nodedscr */
128           KASSERT(udf_node->outstanding_nodedscr >= 1);
129           udf_node->outstanding_nodedscr--;
130           if (udf_node->outstanding_nodedscr == 0) {
131                     /* first unlock the node */
132                     UDF_UNLOCK_NODE(udf_node, 0);
133                     cv_broadcast(&udf_node->node_lock);
134           }
135 
136           putiobuf(buf);
137 }
138 
139 /* --------------------------------------------------------------------- */
140 
141 static int
udf_create_logvol_dscr_seq(struct udf_strat_args * args)142 udf_create_logvol_dscr_seq(struct udf_strat_args *args)
143 {
144           union dscrptr   **dscrptr = &args->dscr;
145           struct udf_mount *ump = args->ump;
146           struct strat_private *priv = PRIV(ump);
147           uint32_t lb_size;
148 
149           lb_size = udf_rw32(ump->logical_vol->lb_size);
150           *dscrptr = pool_get(&priv->desc_pool, PR_WAITOK);
151           memset(*dscrptr, 0, lb_size);
152 
153           return 0;
154 }
155 
156 
157 static void
udf_free_logvol_dscr_seq(struct udf_strat_args * args)158 udf_free_logvol_dscr_seq(struct udf_strat_args *args)
159 {
160           union dscrptr    *dscr = args->dscr;
161           struct udf_mount *ump  = args->ump;
162           struct strat_private *priv = PRIV(ump);
163 
164           pool_put(&priv->desc_pool, dscr);
165 }
166 
167 
168 static int
udf_read_logvol_dscr_seq(struct udf_strat_args * args)169 udf_read_logvol_dscr_seq(struct udf_strat_args *args)
170 {
171           union dscrptr   **dscrptr = &args->dscr;
172           union dscrptr    *tmpdscr;
173           struct udf_mount *ump = args->ump;
174           struct long_ad   *icb = args->icb;
175           struct strat_private *priv = PRIV(ump);
176           uint32_t lb_size;
177           uint32_t sector, dummy;
178           int error;
179 
180           lb_size = udf_rw32(ump->logical_vol->lb_size);
181 
182           error = udf_translate_vtop(ump, icb, &sector, &dummy);
183           if (error)
184                     return error;
185 
186           /* try to read in fe/efe */
187           error = udf_read_phys_dscr(ump, sector, M_UDFTEMP, &tmpdscr);
188           if (error)
189                     return error;
190 
191           *dscrptr = pool_get(&priv->desc_pool, PR_WAITOK);
192           memcpy(*dscrptr, tmpdscr, lb_size);
193           free(tmpdscr, M_UDFTEMP);
194 
195           return 0;
196 }
197 
198 
199 static int
udf_write_logvol_dscr_seq(struct udf_strat_args * args)200 udf_write_logvol_dscr_seq(struct udf_strat_args *args)
201 {
202           union dscrptr    *dscr     = args->dscr;
203           struct udf_mount *ump      = args->ump;
204           struct udf_node  *udf_node = args->udf_node;
205           struct long_ad   *icb      = args->icb;
206           int               waitfor  = args->waitfor;
207           uint32_t logsectornr, sectornr, dummy;
208           int error, vpart;
209 
210           /*
211            * we have to decide if we write it out sequential or at its fixed
212            * position by examining the partition its (to be) written on.
213            */
214           vpart       = udf_rw16(udf_node->loc.loc.part_num);
215           logsectornr = udf_rw32(icb->loc.lb_num);
216           sectornr    = 0;
217           if (ump->vtop_tp[vpart] != UDF_VTOP_TYPE_VIRT) {
218                     error = udf_translate_vtop(ump, icb, &sectornr, &dummy);
219                     if (error)
220                               goto out;
221           }
222 
223           if (waitfor) {
224                     DPRINTF(WRITE, ("udf_write_logvol_dscr: sync write\n"));
225 
226                     error = udf_write_phys_dscr_sync(ump, udf_node, UDF_C_NODE,
227                               dscr, sectornr, logsectornr);
228           } else {
229                     DPRINTF(WRITE, ("udf_write_logvol_dscr: no wait, async write\n"));
230 
231                     error = udf_write_phys_dscr_async(ump, udf_node, UDF_C_NODE,
232                               dscr, sectornr, logsectornr, udf_wr_nodedscr_callback);
233                     /* will be UNLOCKED in call back */
234                     return error;
235           }
236 out:
237           udf_node->outstanding_nodedscr--;
238           if (udf_node->outstanding_nodedscr == 0) {
239                     UDF_UNLOCK_NODE(udf_node, 0);
240                     cv_broadcast(&udf_node->node_lock);
241           }
242 
243           return error;
244 }
245 
246 /* --------------------------------------------------------------------- */
247 
248 /*
249  * Main file-system specific scheduler. Due to the nature of optical media
250  * scheduling can't be performed in the traditional way. Most OS
251  * implementations i've seen thus read or write a file atomically giving all
252  * kinds of side effects.
253  *
254  * This implementation uses a kernel thread to schedule the queued requests in
255  * such a way that is semi-optimal for optical media; this means approximately
256  * (R*|(Wr*|Ws*))* since switching between reading and writing is expensive in
257  * time.
258  */
259 
260 static void
udf_queuebuf_seq(struct udf_strat_args * args)261 udf_queuebuf_seq(struct udf_strat_args *args)
262 {
263           struct udf_mount *ump = args->ump;
264           struct buf *nestbuf = args->nestbuf;
265           struct strat_private *priv = PRIV(ump);
266           int queue;
267           int what;
268 
269           KASSERT(ump);
270           KASSERT(nestbuf);
271           KASSERT(nestbuf->b_iodone == nestiobuf_iodone);
272 
273           what = nestbuf->b_udf_c_type;
274           queue = UDF_SHED_READING;
275           if ((nestbuf->b_flags & B_READ) == 0) {
276                     /* writing */
277                     queue = UDF_SHED_SEQWRITING;
278                     if (what == UDF_C_ABSOLUTE)
279                               queue = UDF_SHED_WRITING;
280           }
281 
282           /* use our own scheduler lists for more complex scheduling */
283           mutex_enter(&priv->discstrat_mutex);
284                     bufq_put(priv->queues[queue], nestbuf);
285                     vfs_timestamp(&priv->last_queued[queue]);
286           mutex_exit(&priv->discstrat_mutex);
287 
288           /* signal our thread that there might be something to do */
289           cv_signal(&priv->discstrat_cv);
290 }
291 
292 /* --------------------------------------------------------------------- */
293 
294 static void
udf_sync_caches_seq(struct udf_strat_args * args)295 udf_sync_caches_seq(struct udf_strat_args *args)
296 {
297           struct udf_mount *ump = args->ump;
298           struct strat_private *priv = PRIV(ump);
299 
300           /* we might be called during unmount inadvertedly, be on safe side */
301           if (!priv)
302                     return;
303 
304           /* signal our thread that there might be something to do */
305           priv->sync_req = 1;
306           cv_signal(&priv->discstrat_cv);
307 
308           mutex_enter(&priv->discstrat_mutex);
309                     while (priv->sync_req) {
310                               cv_timedwait(&priv->discstrat_cv,
311                                         &priv->discstrat_mutex, hz/8);
312                     }
313           mutex_exit(&priv->discstrat_mutex);
314 }
315 
316 /* --------------------------------------------------------------------- */
317 
318 /* TODO convert to lb_size */
319 static void
udf_VAT_mapping_update(struct udf_mount * ump,struct buf * buf,uint32_t lb_map)320 udf_VAT_mapping_update(struct udf_mount *ump, struct buf *buf, uint32_t lb_map)
321 {
322           union dscrptr    *fdscr = (union dscrptr *) buf->b_data;
323           struct vnode     *vp = buf->b_vp;
324           struct udf_node  *udf_node = VTOI(vp);
325           uint32_t lb_num;
326           uint32_t udf_rw32_lbmap;
327           int c_type = buf->b_udf_c_type;
328           int error;
329 
330           /* only interested when we're using a VAT */
331           KASSERT(ump->vat_node);
332           KASSERT(ump->vtop_alloc[ump->node_part] == UDF_ALLOC_VAT);
333 
334           /* only nodes are recorded in the VAT */
335           /* NOTE: and the fileset descriptor (FIXME ?) */
336           if (c_type != UDF_C_NODE)
337                     return;
338 
339           udf_rw32_lbmap = udf_rw32(lb_map);
340 
341           /* if we're the VAT itself, only update our assigned sector number */
342           if (udf_node == ump->vat_node) {
343                     fdscr->tag.tag_loc = udf_rw32_lbmap;
344                     udf_validate_tag_sum(fdscr);
345                     DPRINTF(TRANSLATE, ("VAT assigned to sector %u\n",
346                               udf_rw32(udf_rw32_lbmap)));
347                     /* no use mapping the VAT node in the VAT */
348                     return;
349           }
350 
351           /* record new position in VAT file */
352           lb_num = udf_rw32(fdscr->tag.tag_loc);
353 
354           /* lb_num = udf_rw32(udf_node->write_loc.loc.lb_num); */
355 
356           DPRINTF(TRANSLATE, ("VAT entry change (log %u -> phys %u)\n",
357                               lb_num, lb_map));
358 
359           /* VAT should be the longer than this write, can't go wrong */
360           KASSERT(lb_num <= ump->vat_entries);
361 
362           mutex_enter(&ump->allocate_mutex);
363           error = udf_vat_write(ump->vat_node,
364                               (uint8_t *) &udf_rw32_lbmap, 4,
365                               ump->vat_offset + lb_num * 4);
366           mutex_exit(&ump->allocate_mutex);
367 
368           if (error)
369                     panic( "udf_VAT_mapping_update: HELP! i couldn't "
370                               "write in the VAT file ?\n");
371 }
372 
373 
374 static void
udf_issue_buf(struct udf_mount * ump,int queue,struct buf * buf)375 udf_issue_buf(struct udf_mount *ump, int queue, struct buf *buf)
376 {
377           union dscrptr *dscr;
378           struct long_ad *node_ad_cpy;
379           struct part_desc *pdesc;
380           uint64_t *lmapping, *lmappos;
381           uint32_t sectornr, bpos;
382           uint32_t ptov;
383           uint16_t vpart_num;
384           uint8_t *fidblk;
385           int sector_size = ump->discinfo.sector_size;
386           int blks = sector_size / DEV_BSIZE;
387           int len, buf_len;
388 
389           /* if reading, just pass to the device's STRATEGY */
390           if (queue == UDF_SHED_READING) {
391                     DPRINTF(SHEDULE, ("\nudf_issue_buf READ %p : sector %d type %d,"
392                               "b_resid %d, b_bcount %d, b_bufsize %d\n",
393                               buf, (uint32_t) buf->b_blkno / blks, buf->b_udf_c_type,
394                               buf->b_resid, buf->b_bcount, buf->b_bufsize));
395                     VOP_STRATEGY(ump->devvp, buf);
396                     return;
397           }
398 
399           if (queue == UDF_SHED_WRITING) {
400                     DPRINTF(SHEDULE, ("\nudf_issue_buf WRITE %p : sector %d "
401                               "type %d, b_resid %d, b_bcount %d, b_bufsize %d\n",
402                               buf, (uint32_t) buf->b_blkno / blks, buf->b_udf_c_type,
403                               buf->b_resid, buf->b_bcount, buf->b_bufsize));
404                     KASSERT(buf->b_udf_c_type == UDF_C_ABSOLUTE);
405 
406                     // udf_fixup_node_internals(ump, buf->b_data, buf->b_udf_c_type);
407                     VOP_STRATEGY(ump->devvp, buf);
408                     return;
409           }
410 
411           KASSERT(queue == UDF_SHED_SEQWRITING);
412           DPRINTF(SHEDULE, ("\nudf_issue_buf SEQWRITE %p : sector XXXX "
413                     "type %d, b_resid %d, b_bcount %d, b_bufsize %d\n",
414                     buf, buf->b_udf_c_type, buf->b_resid, buf->b_bcount,
415                     buf->b_bufsize));
416 
417           /*
418            * Buffers should not have been allocated to disc addresses yet on
419            * this queue. Note that a buffer can get multiple extents allocated.
420            *
421            * lmapping contains lb_num relative to base partition.
422            */
423           lmapping    = ump->la_lmapping;
424           node_ad_cpy = ump->la_node_ad_cpy;
425 
426           /* logically allocate buf and map it in the file */
427           udf_late_allocate_buf(ump, buf, lmapping, node_ad_cpy, &vpart_num);
428 
429           /*
430            * NOTE We are using the knowledge here that sequential media will
431            * always be mapped linearly. Thus no use to explicitly translate the
432            * lmapping list.
433            */
434 
435           /* calculate offset from physical base partition */
436           pdesc = ump->partitions[ump->vtop[vpart_num]];
437           ptov  = udf_rw32(pdesc->start_loc);
438 
439           /* set buffers blkno to the physical block number */
440           buf->b_blkno = (*lmapping + ptov) * blks;
441 
442           /* fixate floating descriptors */
443           if (buf->b_udf_c_type == UDF_C_FLOAT_DSCR) {
444                     /* set our tag location to the absolute position */
445                     dscr = (union dscrptr *) buf->b_data;
446                     dscr->tag.tag_loc = udf_rw32(*lmapping + ptov);
447                     udf_validate_tag_and_crc_sums(dscr);
448           }
449 
450           /* update mapping in the VAT */
451           if (buf->b_udf_c_type == UDF_C_NODE) {
452                     udf_VAT_mapping_update(ump, buf, *lmapping);
453                     udf_fixup_node_internals(ump, buf->b_data, buf->b_udf_c_type);
454           }
455 
456           /* if we have FIDs, fixup using the new allocation table */
457           if (buf->b_udf_c_type == UDF_C_FIDS) {
458                     buf_len = buf->b_bcount;
459                     bpos = 0;
460                     lmappos = lmapping;
461                     while (buf_len) {
462                               sectornr = *lmappos++;
463                               len = MIN(buf_len, sector_size);
464                               fidblk = (uint8_t *) buf->b_data + bpos;
465                               udf_fixup_fid_block(fidblk, sector_size,
466                                         0, len, sectornr);
467                               bpos += len;
468                               buf_len -= len;
469                     }
470           }
471 
472           VOP_STRATEGY(ump->devvp, buf);
473 }
474 
475 
476 static void
udf_doshedule(struct udf_mount * ump)477 udf_doshedule(struct udf_mount *ump)
478 {
479           struct buf *buf;
480           struct timespec now, *last;
481           struct strat_private *priv = PRIV(ump);
482           void (*b_callback)(struct buf *);
483           int new_queue;
484           int error;
485 
486           buf = bufq_get(priv->queues[priv->cur_queue]);
487           if (buf) {
488                     /* transfer from the current queue to the device queue */
489                     mutex_exit(&priv->discstrat_mutex);
490 
491                     /* transform buffer to synchronous; XXX needed? */
492                     b_callback = buf->b_iodone;
493                     buf->b_iodone = NULL;
494                     CLR(buf->b_flags, B_ASYNC);
495 
496                     /* issue and wait on completion */
497                     udf_issue_buf(ump, priv->cur_queue, buf);
498                     biowait(buf);
499 
500                     mutex_enter(&priv->discstrat_mutex);
501 
502                     /* if there is an error, repair this error, otherwise propagate */
503                     if (buf->b_error && ((buf->b_flags & B_READ) == 0)) {
504                               /* check what we need to do */
505                               panic("UDF write error, can't handle yet!\n");
506                     }
507 
508                     /* propagate result to higher layers */
509                     if (b_callback) {
510                               buf->b_iodone = b_callback;
511                               (*buf->b_iodone)(buf);
512                     }
513 
514                     return;
515           }
516 
517           /* Check if we're idling in this state */
518           vfs_timestamp(&now);
519           last = &priv->last_queued[priv->cur_queue];
520           if (ump->discinfo.mmc_class == MMC_CLASS_CD) {
521                     /* dont switch too fast for CD media; its expensive in time */
522                     if (now.tv_sec - last->tv_sec < 3)
523                               return;
524           }
525 
526           /* check if we can/should switch */
527           new_queue = priv->cur_queue;
528 
529           if (bufq_peek(priv->queues[UDF_SHED_READING]))
530                     new_queue = UDF_SHED_READING;
531           if (bufq_peek(priv->queues[UDF_SHED_WRITING]))              /* only for unmount */
532                     new_queue = UDF_SHED_WRITING;
533           if (bufq_peek(priv->queues[UDF_SHED_SEQWRITING]))
534                     new_queue = UDF_SHED_SEQWRITING;
535           if (priv->cur_queue == UDF_SHED_READING) {
536                     if (new_queue == UDF_SHED_SEQWRITING) {
537                               /* TODO use flag to signal if this is needed */
538                               mutex_exit(&priv->discstrat_mutex);
539 
540                               /* update trackinfo for data and metadata */
541                               error = udf_update_trackinfo(ump,
542                                                   &ump->data_track);
543                               assert(error == 0);
544                               error = udf_update_trackinfo(ump,
545                                                   &ump->metadata_track);
546                               assert(error == 0);
547                               mutex_enter(&priv->discstrat_mutex);
548                               __USE(error);
549                     }
550           }
551 
552           if (new_queue != priv->cur_queue) {
553                     DPRINTF(SHEDULE, ("switching from %d to %d\n",
554                               priv->cur_queue, new_queue));
555                     if (new_queue == UDF_SHED_READING)
556                               udf_mmc_synchronise_caches(ump);
557           }
558 
559           priv->cur_queue = new_queue;
560 }
561 
562 
563 static void
udf_discstrat_thread(void * arg)564 udf_discstrat_thread(void *arg)
565 {
566           struct udf_mount *ump = (struct udf_mount *) arg;
567           struct strat_private *priv = PRIV(ump);
568           int empty;
569 
570           empty = 1;
571 
572           priv->thread_running = 1;
573           cv_broadcast(&priv->discstrat_cv);
574 
575           mutex_enter(&priv->discstrat_mutex);
576           while (priv->run_thread || !empty || priv->sync_req) {
577                     /* process the current selected queue */
578                     udf_doshedule(ump);
579                     empty  = (bufq_peek(priv->queues[UDF_SHED_READING]) == NULL);
580                     empty &= (bufq_peek(priv->queues[UDF_SHED_WRITING]) == NULL);
581                     empty &= (bufq_peek(priv->queues[UDF_SHED_SEQWRITING]) == NULL);
582 
583                     /* wait for more if needed */
584                     if (empty) {
585                               if (priv->sync_req) {
586                                         /* on sync, we need to simulate a read->write transition */
587                                         udf_mmc_synchronise_caches(ump);
588                                         priv->cur_queue = UDF_SHED_READING;
589                                         priv->sync_req = 0;
590                               }
591                               cv_timedwait(&priv->discstrat_cv,
592                                         &priv->discstrat_mutex, hz/8);
593                     }
594           }
595           mutex_exit(&priv->discstrat_mutex);
596 
597           priv->thread_running  = 0;
598           priv->thread_finished = 1;
599           cv_broadcast(&priv->discstrat_cv);
600 
601           kthread_exit(0);
602           /* not reached */
603 }
604 
605 /* --------------------------------------------------------------------- */
606 
607 static void
udf_discstrat_init_seq(struct udf_strat_args * args)608 udf_discstrat_init_seq(struct udf_strat_args *args)
609 {
610           struct udf_mount *ump = args->ump;
611           struct strat_private *priv = PRIV(ump);
612           struct disk_strategy dkstrat;
613           uint32_t lb_size;
614 
615           KASSERT(ump);
616           KASSERT(ump->logical_vol);
617           KASSERT(priv == NULL);
618 
619           lb_size = udf_rw32(ump->logical_vol->lb_size);
620           KASSERT(lb_size > 0);
621 
622           /* initialise our memory space */
623           ump->strategy_private = malloc(sizeof(struct strat_private),
624                     M_UDFTEMP, M_WAITOK);
625           priv = ump->strategy_private;
626           memset(priv, 0 , sizeof(struct strat_private));
627 
628           /* initialise locks */
629           cv_init(&priv->discstrat_cv, "udfstrat");
630           mutex_init(&priv->discstrat_mutex, MUTEX_DEFAULT, IPL_NONE);
631 
632           /*
633            * Initialise pool for descriptors associated with nodes. This is done
634            * in lb_size units though currently lb_size is dictated to be
635            * sector_size.
636            */
637           pool_init(&priv->desc_pool, lb_size, 0, 0, 0, "udf_desc_pool", NULL,
638               IPL_NONE);
639 
640           /*
641            * remember old device strategy method and explicit set method
642            * `discsort' since we have our own more complex strategy that is not
643            * implementable on the CD device and other strategies will get in the
644            * way.
645            */
646           memset(&priv->old_strategy_setting, 0,
647                     sizeof(struct disk_strategy));
648           VOP_IOCTL(ump->devvp, DIOCGSTRATEGY, &priv->old_strategy_setting,
649                     FREAD | FKIOCTL, NOCRED);
650           memset(&dkstrat, 0, sizeof(struct disk_strategy));
651           strcpy(dkstrat.dks_name, "discsort");
652           VOP_IOCTL(ump->devvp, DIOCSSTRATEGY, &dkstrat, FWRITE | FKIOCTL,
653                     NOCRED);
654 
655           /* initialise our internal scheduler */
656           priv->cur_queue = UDF_SHED_READING;
657           bufq_alloc(&priv->queues[UDF_SHED_READING], "disksort",
658                     BUFQ_SORT_RAWBLOCK);
659           bufq_alloc(&priv->queues[UDF_SHED_WRITING], "disksort",
660                     BUFQ_SORT_RAWBLOCK);
661           bufq_alloc(&priv->queues[UDF_SHED_SEQWRITING], "fcfs", 0);
662           vfs_timestamp(&priv->last_queued[UDF_SHED_READING]);
663           vfs_timestamp(&priv->last_queued[UDF_SHED_WRITING]);
664           vfs_timestamp(&priv->last_queued[UDF_SHED_SEQWRITING]);
665 
666           /* create our disk strategy thread */
667           priv->thread_finished = 0;
668           priv->thread_running  = 0;
669           priv->run_thread      = 1;
670           priv->sync_req        = 0;
671           if (kthread_create(PRI_NONE, 0 /* KTHREAD_MPSAFE*/, NULL /* cpu_info*/,
672                     udf_discstrat_thread, ump, &priv->queue_lwp,
673                     "%s", "udf_rw")) {
674                     panic("fork udf_rw");
675           }
676 
677           /* wait for thread to spin up */
678           mutex_enter(&priv->discstrat_mutex);
679           while (!priv->thread_running) {
680                     cv_timedwait(&priv->discstrat_cv, &priv->discstrat_mutex, hz);
681           }
682           mutex_exit(&priv->discstrat_mutex);
683 }
684 
685 
686 static void
udf_discstrat_finish_seq(struct udf_strat_args * args)687 udf_discstrat_finish_seq(struct udf_strat_args *args)
688 {
689           struct udf_mount *ump = args->ump;
690           struct strat_private *priv = PRIV(ump);
691 
692           if (ump == NULL)
693                     return;
694 
695           /* stop our scheduling thread */
696           KASSERT(priv->run_thread == 1);
697           priv->run_thread = 0;
698 
699           mutex_enter(&priv->discstrat_mutex);
700           while (!priv->thread_finished) {
701                     cv_broadcast(&priv->discstrat_cv);
702                     cv_timedwait(&priv->discstrat_cv, &priv->discstrat_mutex, hz);
703           }
704           mutex_exit(&priv->discstrat_mutex);
705 
706           /* kthread should be finished now */
707 
708           /* set back old device strategy method */
709           VOP_IOCTL(ump->devvp, DIOCSSTRATEGY, &priv->old_strategy_setting,
710                               FWRITE, NOCRED);
711 
712           /* destroy our pool */
713           pool_destroy(&priv->desc_pool);
714 
715           mutex_destroy(&priv->discstrat_mutex);
716           cv_destroy(&priv->discstrat_cv);
717 
718           /* free our private space */
719           free(ump->strategy_private, M_UDFTEMP);
720           ump->strategy_private = NULL;
721 }
722 
723 /* --------------------------------------------------------------------- */
724 
725 struct udf_strategy udf_strat_sequential =
726 {
727           udf_create_logvol_dscr_seq,
728           udf_free_logvol_dscr_seq,
729           udf_read_logvol_dscr_seq,
730           udf_write_logvol_dscr_seq,
731           udf_queuebuf_seq,
732           udf_sync_caches_seq,
733           udf_discstrat_init_seq,
734           udf_discstrat_finish_seq
735 };
736 
737 
738