1 /*-
2 * SPDX-License-Identifier: BSD-2-Clause-FreeBSD
3 *
4 * Copyright (c) 2003 Silicon Graphics International Corp.
5 * Copyright (c) 2009-2011 Spectra Logic Corporation
6 * Copyright (c) 2012 The FreeBSD Foundation
7 * Copyright (c) 2014-2015 Alexander Motin <mav@FreeBSD.org>
8 * All rights reserved.
9 *
10 * Portions of this software were developed by Edward Tomasz Napierala
11 * under sponsorship from the FreeBSD Foundation.
12 *
13 * Redistribution and use in source and binary forms, with or without
14 * modification, are permitted provided that the following conditions
15 * are met:
16 * 1. Redistributions of source code must retain the above copyright
17 * notice, this list of conditions, and the following disclaimer,
18 * without modification.
19 * 2. Redistributions in binary form must reproduce at minimum a disclaimer
20 * substantially similar to the "NO WARRANTY" disclaimer below
21 * ("Disclaimer") and any redistribution must be conditioned upon
22 * including a substantially similar Disclaimer requirement for further
23 * binary redistribution.
24 *
25 * NO WARRANTY
26 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
27 * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
28 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTIBILITY AND FITNESS FOR
29 * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
30 * HOLDERS OR CONTRIBUTORS BE LIABLE FOR SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
31 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
32 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
33 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT,
34 * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING
35 * IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
36 * POSSIBILITY OF SUCH DAMAGES.
37 *
38 * $Id: //depot/users/kenm/FreeBSD-test2/sys/cam/ctl/ctl_backend_block.c#5 $
39 */
40 /*
41 * CAM Target Layer driver backend for block devices.
42 *
43 * Author: Ken Merry <ken@FreeBSD.org>
44 */
45 #include <sys/cdefs.h>
46 __FBSDID("$FreeBSD: stable/12/sys/cam/ctl/ctl_backend_block.c 369455 2021-03-15 03:04:28Z mav $");
47
48 #include <sys/param.h>
49 #include <sys/systm.h>
50 #include <sys/kernel.h>
51 #include <sys/types.h>
52 #include <sys/kthread.h>
53 #include <sys/bio.h>
54 #include <sys/fcntl.h>
55 #include <sys/limits.h>
56 #include <sys/lock.h>
57 #include <sys/mutex.h>
58 #include <sys/condvar.h>
59 #include <sys/malloc.h>
60 #include <sys/conf.h>
61 #include <sys/ioccom.h>
62 #include <sys/queue.h>
63 #include <sys/sbuf.h>
64 #include <sys/endian.h>
65 #include <sys/uio.h>
66 #include <sys/buf.h>
67 #include <sys/taskqueue.h>
68 #include <sys/vnode.h>
69 #include <sys/namei.h>
70 #include <sys/mount.h>
71 #include <sys/disk.h>
72 #include <sys/fcntl.h>
73 #include <sys/filedesc.h>
74 #include <sys/filio.h>
75 #include <sys/proc.h>
76 #include <sys/pcpu.h>
77 #include <sys/module.h>
78 #include <sys/sdt.h>
79 #include <sys/devicestat.h>
80 #include <sys/sysctl.h>
81 #include <sys/nv.h>
82 #include <sys/dnv.h>
83 #include <sys/sx.h>
84
85 #include <geom/geom.h>
86
87 #include <cam/cam.h>
88 #include <cam/scsi/scsi_all.h>
89 #include <cam/scsi/scsi_da.h>
90 #include <cam/ctl/ctl_io.h>
91 #include <cam/ctl/ctl.h>
92 #include <cam/ctl/ctl_backend.h>
93 #include <cam/ctl/ctl_ioctl.h>
94 #include <cam/ctl/ctl_ha.h>
95 #include <cam/ctl/ctl_scsi_all.h>
96 #include <cam/ctl/ctl_private.h>
97 #include <cam/ctl/ctl_error.h>
98
99 /*
100 * The idea here is that we'll allocate enough S/G space to hold a 1MB
101 * I/O. If we get an I/O larger than that, we'll split it.
102 */
103 #define CTLBLK_HALF_IO_SIZE (512 * 1024)
104 #define CTLBLK_MAX_IO_SIZE (CTLBLK_HALF_IO_SIZE * 2)
105 #define CTLBLK_MAX_SEG MIN(CTLBLK_HALF_IO_SIZE, MAXPHYS)
106 #define CTLBLK_HALF_SEGS MAX(CTLBLK_HALF_IO_SIZE / CTLBLK_MAX_SEG, 1)
107 #define CTLBLK_MAX_SEGS (CTLBLK_HALF_SEGS * 2)
108
109 #ifdef CTLBLK_DEBUG
110 #define DPRINTF(fmt, args...) \
111 printf("cbb(%s:%d): " fmt, __FUNCTION__, __LINE__, ##args)
112 #else
113 #define DPRINTF(fmt, args...) do {} while(0)
114 #endif
115
116 #define PRIV(io) \
117 ((struct ctl_ptr_len_flags *)&(io)->io_hdr.ctl_private[CTL_PRIV_BACKEND])
118 #define ARGS(io) \
119 ((struct ctl_lba_len_flags *)&(io)->io_hdr.ctl_private[CTL_PRIV_LBA_LEN])
120
121 SDT_PROVIDER_DEFINE(cbb);
122
123 typedef enum {
124 CTL_BE_BLOCK_LUN_UNCONFIGURED = 0x01,
125 CTL_BE_BLOCK_LUN_WAITING = 0x04,
126 } ctl_be_block_lun_flags;
127
128 typedef enum {
129 CTL_BE_BLOCK_NONE,
130 CTL_BE_BLOCK_DEV,
131 CTL_BE_BLOCK_FILE
132 } ctl_be_block_type;
133
134 struct ctl_be_block_filedata {
135 struct ucred *cred;
136 };
137
138 union ctl_be_block_bedata {
139 struct ctl_be_block_filedata file;
140 };
141
142 struct ctl_be_block_io;
143 struct ctl_be_block_lun;
144
145 typedef void (*cbb_dispatch_t)(struct ctl_be_block_lun *be_lun,
146 struct ctl_be_block_io *beio);
147 typedef uint64_t (*cbb_getattr_t)(struct ctl_be_block_lun *be_lun,
148 const char *attrname);
149
150 /*
151 * Backend LUN structure. There is a 1:1 mapping between a block device
152 * and a backend block LUN, and between a backend block LUN and a CTL LUN.
153 */
154 struct ctl_be_block_lun {
155 struct ctl_be_lun cbe_lun; /* Must be first element. */
156 struct ctl_lun_create_params params;
157 char *dev_path;
158 ctl_be_block_type dev_type;
159 struct vnode *vn;
160 union ctl_be_block_bedata backend;
161 cbb_dispatch_t dispatch;
162 cbb_dispatch_t lun_flush;
163 cbb_dispatch_t unmap;
164 cbb_dispatch_t get_lba_status;
165 cbb_getattr_t getattr;
166 uint64_t size_blocks;
167 uint64_t size_bytes;
168 struct ctl_be_block_softc *softc;
169 struct devstat *disk_stats;
170 ctl_be_block_lun_flags flags;
171 SLIST_ENTRY(ctl_be_block_lun) links;
172 struct taskqueue *io_taskqueue;
173 struct task io_task;
174 int num_threads;
175 STAILQ_HEAD(, ctl_io_hdr) input_queue;
176 STAILQ_HEAD(, ctl_io_hdr) config_read_queue;
177 STAILQ_HEAD(, ctl_io_hdr) config_write_queue;
178 STAILQ_HEAD(, ctl_io_hdr) datamove_queue;
179 struct mtx_padalign io_lock;
180 struct mtx_padalign queue_lock;
181 };
182
183 /*
184 * Overall softc structure for the block backend module.
185 */
186 struct ctl_be_block_softc {
187 struct sx modify_lock;
188 struct mtx lock;
189 int num_luns;
190 SLIST_HEAD(, ctl_be_block_lun) lun_list;
191 uma_zone_t beio_zone;
192 uma_zone_t buf_zone;
193 #if (CTLBLK_MAX_SEG > 131072)
194 uma_zone_t buf128_zone;
195 #endif
196 };
197
198 static struct ctl_be_block_softc backend_block_softc;
199
200 /*
201 * Per-I/O information.
202 */
203 struct ctl_be_block_io {
204 union ctl_io *io;
205 struct ctl_sg_entry sg_segs[CTLBLK_MAX_SEGS];
206 struct iovec xiovecs[CTLBLK_MAX_SEGS];
207 int bio_cmd;
208 int two_sglists;
209 int num_segs;
210 int num_bios_sent;
211 int num_bios_done;
212 int send_complete;
213 int first_error;
214 uint64_t first_error_offset;
215 struct bintime ds_t0;
216 devstat_tag_type ds_tag_type;
217 devstat_trans_flags ds_trans_type;
218 uint64_t io_len;
219 uint64_t io_offset;
220 int io_arg;
221 struct ctl_be_block_softc *softc;
222 struct ctl_be_block_lun *lun;
223 void (*beio_cont)(struct ctl_be_block_io *beio); /* to continue processing */
224 };
225
226 extern struct ctl_softc *control_softc;
227
228 static int cbb_num_threads = 32;
229 SYSCTL_NODE(_kern_cam_ctl, OID_AUTO, block, CTLFLAG_RD, 0,
230 "CAM Target Layer Block Backend");
231 SYSCTL_INT(_kern_cam_ctl_block, OID_AUTO, num_threads, CTLFLAG_RWTUN,
232 &cbb_num_threads, 0, "Number of threads per backing file");
233
234 static struct ctl_be_block_io *ctl_alloc_beio(struct ctl_be_block_softc *softc);
235 static void ctl_free_beio(struct ctl_be_block_io *beio);
236 static void ctl_complete_beio(struct ctl_be_block_io *beio);
237 static int ctl_be_block_move_done(union ctl_io *io, bool samethr);
238 static void ctl_be_block_biodone(struct bio *bio);
239 static void ctl_be_block_flush_file(struct ctl_be_block_lun *be_lun,
240 struct ctl_be_block_io *beio);
241 static void ctl_be_block_dispatch_file(struct ctl_be_block_lun *be_lun,
242 struct ctl_be_block_io *beio);
243 static void ctl_be_block_gls_file(struct ctl_be_block_lun *be_lun,
244 struct ctl_be_block_io *beio);
245 static uint64_t ctl_be_block_getattr_file(struct ctl_be_block_lun *be_lun,
246 const char *attrname);
247 static void ctl_be_block_flush_dev(struct ctl_be_block_lun *be_lun,
248 struct ctl_be_block_io *beio);
249 static void ctl_be_block_unmap_dev(struct ctl_be_block_lun *be_lun,
250 struct ctl_be_block_io *beio);
251 static void ctl_be_block_dispatch_dev(struct ctl_be_block_lun *be_lun,
252 struct ctl_be_block_io *beio);
253 static uint64_t ctl_be_block_getattr_dev(struct ctl_be_block_lun *be_lun,
254 const char *attrname);
255 static void ctl_be_block_cr_dispatch(struct ctl_be_block_lun *be_lun,
256 union ctl_io *io);
257 static void ctl_be_block_cw_dispatch(struct ctl_be_block_lun *be_lun,
258 union ctl_io *io);
259 static void ctl_be_block_dispatch(struct ctl_be_block_lun *be_lun,
260 union ctl_io *io);
261 static void ctl_be_block_worker(void *context, int pending);
262 static int ctl_be_block_submit(union ctl_io *io);
263 static int ctl_be_block_ioctl(struct cdev *dev, u_long cmd, caddr_t addr,
264 int flag, struct thread *td);
265 static int ctl_be_block_open_file(struct ctl_be_block_lun *be_lun,
266 struct ctl_lun_req *req);
267 static int ctl_be_block_open_dev(struct ctl_be_block_lun *be_lun,
268 struct ctl_lun_req *req);
269 static int ctl_be_block_close(struct ctl_be_block_lun *be_lun);
270 static int ctl_be_block_open(struct ctl_be_block_lun *be_lun,
271 struct ctl_lun_req *req);
272 static int ctl_be_block_create(struct ctl_be_block_softc *softc,
273 struct ctl_lun_req *req);
274 static int ctl_be_block_rm(struct ctl_be_block_softc *softc,
275 struct ctl_lun_req *req);
276 static int ctl_be_block_modify(struct ctl_be_block_softc *softc,
277 struct ctl_lun_req *req);
278 static void ctl_be_block_lun_shutdown(struct ctl_be_lun *cbe_lun);
279 static int ctl_be_block_config_write(union ctl_io *io);
280 static int ctl_be_block_config_read(union ctl_io *io);
281 static int ctl_be_block_lun_info(struct ctl_be_lun *cbe_lun, struct sbuf *sb);
282 static uint64_t ctl_be_block_lun_attr(struct ctl_be_lun *cbe_lun, const char *attrname);
283 static int ctl_be_block_init(void);
284 static int ctl_be_block_shutdown(void);
285
286 static struct ctl_backend_driver ctl_be_block_driver =
287 {
288 .name = "block",
289 .flags = CTL_BE_FLAG_HAS_CONFIG,
290 .init = ctl_be_block_init,
291 .shutdown = ctl_be_block_shutdown,
292 .data_submit = ctl_be_block_submit,
293 .config_read = ctl_be_block_config_read,
294 .config_write = ctl_be_block_config_write,
295 .ioctl = ctl_be_block_ioctl,
296 .lun_info = ctl_be_block_lun_info,
297 .lun_attr = ctl_be_block_lun_attr
298 };
299
300 MALLOC_DEFINE(M_CTLBLK, "ctlblock", "Memory used for CTL block backend");
301 CTL_BACKEND_DECLARE(cbb, ctl_be_block_driver);
302
303 static void
ctl_alloc_seg(struct ctl_be_block_softc * softc,struct ctl_sg_entry * sg,size_t len)304 ctl_alloc_seg(struct ctl_be_block_softc *softc, struct ctl_sg_entry *sg,
305 size_t len)
306 {
307
308 #if (CTLBLK_MAX_SEG > 131072)
309 if (len <= 131072)
310 sg->addr = uma_zalloc(softc->buf128_zone, M_WAITOK);
311 else
312 #endif
313 sg->addr = uma_zalloc(softc->buf_zone, M_WAITOK);
314 sg->len = len;
315 }
316
317 static void
ctl_free_seg(struct ctl_be_block_softc * softc,struct ctl_sg_entry * sg)318 ctl_free_seg(struct ctl_be_block_softc *softc, struct ctl_sg_entry *sg)
319 {
320
321 #if (CTLBLK_MAX_SEG > 131072)
322 if (sg->len <= 131072)
323 uma_zfree(softc->buf128_zone, sg->addr);
324 else
325 #endif
326 uma_zfree(softc->buf_zone, sg->addr);
327 }
328
329 static struct ctl_be_block_io *
ctl_alloc_beio(struct ctl_be_block_softc * softc)330 ctl_alloc_beio(struct ctl_be_block_softc *softc)
331 {
332 struct ctl_be_block_io *beio;
333
334 beio = uma_zalloc(softc->beio_zone, M_WAITOK | M_ZERO);
335 beio->softc = softc;
336 return (beio);
337 }
338
339 static void
ctl_free_beio(struct ctl_be_block_io * beio)340 ctl_free_beio(struct ctl_be_block_io *beio)
341 {
342 struct ctl_be_block_softc *softc = beio->softc;
343 int i;
344
345 for (i = 0; i < beio->num_segs; i++) {
346 ctl_free_seg(softc, &beio->sg_segs[i]);
347
348 /* For compare we had two equal S/G lists. */
349 if (beio->two_sglists) {
350 ctl_free_seg(softc,
351 &beio->sg_segs[i + CTLBLK_HALF_SEGS]);
352 }
353 }
354
355 uma_zfree(softc->beio_zone, beio);
356 }
357
358 static void
ctl_complete_beio(struct ctl_be_block_io * beio)359 ctl_complete_beio(struct ctl_be_block_io *beio)
360 {
361 union ctl_io *io = beio->io;
362
363 if (beio->beio_cont != NULL) {
364 beio->beio_cont(beio);
365 } else {
366 ctl_free_beio(beio);
367 ctl_data_submit_done(io);
368 }
369 }
370
371 static size_t
cmp(uint8_t * a,uint8_t * b,size_t size)372 cmp(uint8_t *a, uint8_t *b, size_t size)
373 {
374 size_t i;
375
376 for (i = 0; i < size; i++) {
377 if (a[i] != b[i])
378 break;
379 }
380 return (i);
381 }
382
383 static void
ctl_be_block_compare(union ctl_io * io)384 ctl_be_block_compare(union ctl_io *io)
385 {
386 struct ctl_be_block_io *beio;
387 uint64_t off, res;
388 int i;
389 uint8_t info[8];
390
391 beio = (struct ctl_be_block_io *)PRIV(io)->ptr;
392 off = 0;
393 for (i = 0; i < beio->num_segs; i++) {
394 res = cmp(beio->sg_segs[i].addr,
395 beio->sg_segs[i + CTLBLK_HALF_SEGS].addr,
396 beio->sg_segs[i].len);
397 off += res;
398 if (res < beio->sg_segs[i].len)
399 break;
400 }
401 if (i < beio->num_segs) {
402 scsi_u64to8b(off, info);
403 ctl_set_sense(&io->scsiio, /*current_error*/ 1,
404 /*sense_key*/ SSD_KEY_MISCOMPARE,
405 /*asc*/ 0x1D, /*ascq*/ 0x00,
406 /*type*/ SSD_ELEM_INFO,
407 /*size*/ sizeof(info), /*data*/ &info,
408 /*type*/ SSD_ELEM_NONE);
409 } else
410 ctl_set_success(&io->scsiio);
411 }
412
413 static int
ctl_be_block_move_done(union ctl_io * io,bool samethr)414 ctl_be_block_move_done(union ctl_io *io, bool samethr)
415 {
416 struct ctl_be_block_io *beio;
417 struct ctl_be_block_lun *be_lun;
418 struct ctl_lba_len_flags *lbalen;
419
420 beio = (struct ctl_be_block_io *)PRIV(io)->ptr;
421
422 DPRINTF("entered\n");
423 io->scsiio.kern_rel_offset += io->scsiio.kern_data_len;
424
425 /*
426 * We set status at this point for read and compare commands.
427 */
428 if ((io->io_hdr.flags & CTL_FLAG_ABORT) == 0 &&
429 (io->io_hdr.status & CTL_STATUS_MASK) == CTL_STATUS_NONE) {
430 lbalen = ARGS(io);
431 if (lbalen->flags & CTL_LLF_READ) {
432 ctl_set_success(&io->scsiio);
433 } else if (lbalen->flags & CTL_LLF_COMPARE) {
434 /* We have two data blocks ready for comparison. */
435 ctl_be_block_compare(io);
436 }
437 }
438
439 /*
440 * If this is a read, or a write with errors, it is done.
441 */
442 if ((beio->bio_cmd == BIO_READ)
443 || ((io->io_hdr.flags & CTL_FLAG_ABORT) != 0)
444 || ((io->io_hdr.status & CTL_STATUS_MASK) != CTL_STATUS_NONE)) {
445 ctl_complete_beio(beio);
446 return (0);
447 }
448
449 /*
450 * At this point, we have a write and the DMA completed successfully.
451 * If we were called synchronously in the original thread then just
452 * dispatch, otherwise we now have to queue it to the task queue to
453 * execute the backend I/O. That is because we do blocking
454 * memory allocations, and in the file backing case, blocking I/O.
455 * This move done routine is generally called in the SIM's
456 * interrupt context, and therefore we cannot block.
457 */
458 be_lun = (struct ctl_be_block_lun *)CTL_BACKEND_LUN(io);
459 if (samethr) {
460 be_lun->dispatch(be_lun, beio);
461 } else {
462 mtx_lock(&be_lun->queue_lock);
463 STAILQ_INSERT_TAIL(&be_lun->datamove_queue, &io->io_hdr, links);
464 mtx_unlock(&be_lun->queue_lock);
465 taskqueue_enqueue(be_lun->io_taskqueue, &be_lun->io_task);
466 }
467 return (0);
468 }
469
470 static void
ctl_be_block_biodone(struct bio * bio)471 ctl_be_block_biodone(struct bio *bio)
472 {
473 struct ctl_be_block_io *beio;
474 struct ctl_be_block_lun *be_lun;
475 union ctl_io *io;
476 int error;
477
478 beio = bio->bio_caller1;
479 be_lun = beio->lun;
480 io = beio->io;
481
482 DPRINTF("entered\n");
483
484 error = bio->bio_error;
485 mtx_lock(&be_lun->io_lock);
486 if (error != 0 &&
487 (beio->first_error == 0 ||
488 bio->bio_offset < beio->first_error_offset)) {
489 beio->first_error = error;
490 beio->first_error_offset = bio->bio_offset;
491 }
492
493 beio->num_bios_done++;
494
495 /*
496 * XXX KDM will this cause WITNESS to complain? Holding a lock
497 * during the free might cause it to complain.
498 */
499 g_destroy_bio(bio);
500
501 /*
502 * If the send complete bit isn't set, or we aren't the last I/O to
503 * complete, then we're done.
504 */
505 if ((beio->send_complete == 0)
506 || (beio->num_bios_done < beio->num_bios_sent)) {
507 mtx_unlock(&be_lun->io_lock);
508 return;
509 }
510
511 /*
512 * At this point, we've verified that we are the last I/O to
513 * complete, so it's safe to drop the lock.
514 */
515 devstat_end_transaction(beio->lun->disk_stats, beio->io_len,
516 beio->ds_tag_type, beio->ds_trans_type,
517 /*now*/ NULL, /*then*/&beio->ds_t0);
518 mtx_unlock(&be_lun->io_lock);
519
520 /*
521 * If there are any errors from the backing device, we fail the
522 * entire I/O with a medium error.
523 */
524 error = beio->first_error;
525 if (error != 0) {
526 if (error == EOPNOTSUPP) {
527 ctl_set_invalid_opcode(&io->scsiio);
528 } else if (error == ENOSPC || error == EDQUOT) {
529 ctl_set_space_alloc_fail(&io->scsiio);
530 } else if (error == EROFS || error == EACCES) {
531 ctl_set_hw_write_protected(&io->scsiio);
532 } else if (beio->bio_cmd == BIO_FLUSH) {
533 /* XXX KDM is there is a better error here? */
534 ctl_set_internal_failure(&io->scsiio,
535 /*sks_valid*/ 1,
536 /*retry_count*/ 0xbad2);
537 } else {
538 ctl_set_medium_error(&io->scsiio,
539 beio->bio_cmd == BIO_READ);
540 }
541 ctl_complete_beio(beio);
542 return;
543 }
544
545 /*
546 * If this is a write, a flush, a delete or verify, we're all done.
547 * If this is a read, we can now send the data to the user.
548 */
549 if ((beio->bio_cmd == BIO_WRITE)
550 || (beio->bio_cmd == BIO_FLUSH)
551 || (beio->bio_cmd == BIO_DELETE)
552 || (ARGS(io)->flags & CTL_LLF_VERIFY)) {
553 ctl_set_success(&io->scsiio);
554 ctl_complete_beio(beio);
555 } else {
556 if ((ARGS(io)->flags & CTL_LLF_READ) &&
557 beio->beio_cont == NULL) {
558 ctl_set_success(&io->scsiio);
559 ctl_serseq_done(io);
560 }
561 ctl_datamove(io);
562 }
563 }
564
565 static void
ctl_be_block_flush_file(struct ctl_be_block_lun * be_lun,struct ctl_be_block_io * beio)566 ctl_be_block_flush_file(struct ctl_be_block_lun *be_lun,
567 struct ctl_be_block_io *beio)
568 {
569 union ctl_io *io = beio->io;
570 struct mount *mountpoint;
571 int error, lock_flags;
572
573 DPRINTF("entered\n");
574
575 binuptime(&beio->ds_t0);
576 mtx_lock(&be_lun->io_lock);
577 devstat_start_transaction(beio->lun->disk_stats, &beio->ds_t0);
578 mtx_unlock(&be_lun->io_lock);
579
580 (void) vn_start_write(be_lun->vn, &mountpoint, V_WAIT);
581
582 if (MNT_SHARED_WRITES(mountpoint) ||
583 ((mountpoint == NULL) && MNT_SHARED_WRITES(be_lun->vn->v_mount)))
584 lock_flags = LK_SHARED;
585 else
586 lock_flags = LK_EXCLUSIVE;
587 vn_lock(be_lun->vn, lock_flags | LK_RETRY);
588 error = VOP_FSYNC(be_lun->vn, beio->io_arg ? MNT_NOWAIT : MNT_WAIT,
589 curthread);
590 VOP_UNLOCK(be_lun->vn, 0);
591
592 vn_finished_write(mountpoint);
593
594 mtx_lock(&be_lun->io_lock);
595 devstat_end_transaction(beio->lun->disk_stats, beio->io_len,
596 beio->ds_tag_type, beio->ds_trans_type,
597 /*now*/ NULL, /*then*/&beio->ds_t0);
598 mtx_unlock(&be_lun->io_lock);
599
600 if (error == 0)
601 ctl_set_success(&io->scsiio);
602 else {
603 /* XXX KDM is there is a better error here? */
604 ctl_set_internal_failure(&io->scsiio,
605 /*sks_valid*/ 1,
606 /*retry_count*/ 0xbad1);
607 }
608
609 ctl_complete_beio(beio);
610 }
611
612 SDT_PROBE_DEFINE1(cbb, , read, file_start, "uint64_t");
613 SDT_PROBE_DEFINE1(cbb, , write, file_start, "uint64_t");
614 SDT_PROBE_DEFINE1(cbb, , read, file_done,"uint64_t");
615 SDT_PROBE_DEFINE1(cbb, , write, file_done, "uint64_t");
616
617 static void
ctl_be_block_dispatch_file(struct ctl_be_block_lun * be_lun,struct ctl_be_block_io * beio)618 ctl_be_block_dispatch_file(struct ctl_be_block_lun *be_lun,
619 struct ctl_be_block_io *beio)
620 {
621 struct ctl_be_block_filedata *file_data;
622 union ctl_io *io;
623 struct uio xuio;
624 struct iovec *xiovec;
625 size_t s;
626 int error, flags, i;
627
628 DPRINTF("entered\n");
629
630 file_data = &be_lun->backend.file;
631 io = beio->io;
632 flags = 0;
633 if (ARGS(io)->flags & CTL_LLF_DPO)
634 flags |= IO_DIRECT;
635 if (beio->bio_cmd == BIO_WRITE && ARGS(io)->flags & CTL_LLF_FUA)
636 flags |= IO_SYNC;
637
638 bzero(&xuio, sizeof(xuio));
639 if (beio->bio_cmd == BIO_READ) {
640 SDT_PROBE0(cbb, , read, file_start);
641 xuio.uio_rw = UIO_READ;
642 } else {
643 SDT_PROBE0(cbb, , write, file_start);
644 xuio.uio_rw = UIO_WRITE;
645 }
646 xuio.uio_offset = beio->io_offset;
647 xuio.uio_resid = beio->io_len;
648 xuio.uio_segflg = UIO_SYSSPACE;
649 xuio.uio_iov = beio->xiovecs;
650 xuio.uio_iovcnt = beio->num_segs;
651 xuio.uio_td = curthread;
652
653 for (i = 0, xiovec = xuio.uio_iov; i < xuio.uio_iovcnt; i++, xiovec++) {
654 xiovec->iov_base = beio->sg_segs[i].addr;
655 xiovec->iov_len = beio->sg_segs[i].len;
656 }
657
658 binuptime(&beio->ds_t0);
659 mtx_lock(&be_lun->io_lock);
660 devstat_start_transaction(beio->lun->disk_stats, &beio->ds_t0);
661 mtx_unlock(&be_lun->io_lock);
662
663 if (beio->bio_cmd == BIO_READ) {
664 vn_lock(be_lun->vn, LK_SHARED | LK_RETRY);
665
666 /*
667 * UFS pays attention to IO_DIRECT for reads. If the
668 * DIRECTIO option is configured into the kernel, it calls
669 * ffs_rawread(). But that only works for single-segment
670 * uios with user space addresses. In our case, with a
671 * kernel uio, it still reads into the buffer cache, but it
672 * will just try to release the buffer from the cache later
673 * on in ffs_read().
674 *
675 * ZFS does not pay attention to IO_DIRECT for reads.
676 *
677 * UFS does not pay attention to IO_SYNC for reads.
678 *
679 * ZFS pays attention to IO_SYNC (which translates into the
680 * Solaris define FRSYNC for zfs_read()) for reads. It
681 * attempts to sync the file before reading.
682 */
683 error = VOP_READ(be_lun->vn, &xuio, flags, file_data->cred);
684
685 VOP_UNLOCK(be_lun->vn, 0);
686 SDT_PROBE0(cbb, , read, file_done);
687 if (error == 0 && xuio.uio_resid > 0) {
688 /*
689 * If we red less then requested (EOF), then
690 * we should clean the rest of the buffer.
691 */
692 s = beio->io_len - xuio.uio_resid;
693 for (i = 0; i < beio->num_segs; i++) {
694 if (s >= beio->sg_segs[i].len) {
695 s -= beio->sg_segs[i].len;
696 continue;
697 }
698 bzero((uint8_t *)beio->sg_segs[i].addr + s,
699 beio->sg_segs[i].len - s);
700 s = 0;
701 }
702 }
703 } else {
704 struct mount *mountpoint;
705 int lock_flags;
706
707 (void)vn_start_write(be_lun->vn, &mountpoint, V_WAIT);
708
709 if (MNT_SHARED_WRITES(mountpoint) || ((mountpoint == NULL)
710 && MNT_SHARED_WRITES(be_lun->vn->v_mount)))
711 lock_flags = LK_SHARED;
712 else
713 lock_flags = LK_EXCLUSIVE;
714 vn_lock(be_lun->vn, lock_flags | LK_RETRY);
715
716 /*
717 * UFS pays attention to IO_DIRECT for writes. The write
718 * is done asynchronously. (Normally the write would just
719 * get put into cache.
720 *
721 * UFS pays attention to IO_SYNC for writes. It will
722 * attempt to write the buffer out synchronously if that
723 * flag is set.
724 *
725 * ZFS does not pay attention to IO_DIRECT for writes.
726 *
727 * ZFS pays attention to IO_SYNC (a.k.a. FSYNC or FRSYNC)
728 * for writes. It will flush the transaction from the
729 * cache before returning.
730 */
731 error = VOP_WRITE(be_lun->vn, &xuio, flags, file_data->cred);
732 VOP_UNLOCK(be_lun->vn, 0);
733
734 vn_finished_write(mountpoint);
735 SDT_PROBE0(cbb, , write, file_done);
736 }
737
738 mtx_lock(&be_lun->io_lock);
739 devstat_end_transaction(beio->lun->disk_stats, beio->io_len,
740 beio->ds_tag_type, beio->ds_trans_type,
741 /*now*/ NULL, /*then*/&beio->ds_t0);
742 mtx_unlock(&be_lun->io_lock);
743
744 /*
745 * If we got an error, set the sense data to "MEDIUM ERROR" and
746 * return the I/O to the user.
747 */
748 if (error != 0) {
749 if (error == ENOSPC || error == EDQUOT) {
750 ctl_set_space_alloc_fail(&io->scsiio);
751 } else if (error == EROFS || error == EACCES) {
752 ctl_set_hw_write_protected(&io->scsiio);
753 } else {
754 ctl_set_medium_error(&io->scsiio,
755 beio->bio_cmd == BIO_READ);
756 }
757 ctl_complete_beio(beio);
758 return;
759 }
760
761 /*
762 * If this is a write or a verify, we're all done.
763 * If this is a read, we can now send the data to the user.
764 */
765 if ((beio->bio_cmd == BIO_WRITE) ||
766 (ARGS(io)->flags & CTL_LLF_VERIFY)) {
767 ctl_set_success(&io->scsiio);
768 ctl_complete_beio(beio);
769 } else {
770 if ((ARGS(io)->flags & CTL_LLF_READ) &&
771 beio->beio_cont == NULL) {
772 ctl_set_success(&io->scsiio);
773 ctl_serseq_done(io);
774 }
775 ctl_datamove(io);
776 }
777 }
778
779 static void
ctl_be_block_gls_file(struct ctl_be_block_lun * be_lun,struct ctl_be_block_io * beio)780 ctl_be_block_gls_file(struct ctl_be_block_lun *be_lun,
781 struct ctl_be_block_io *beio)
782 {
783 union ctl_io *io = beio->io;
784 struct ctl_lba_len_flags *lbalen = ARGS(io);
785 struct scsi_get_lba_status_data *data;
786 off_t roff, off;
787 int error, status;
788
789 DPRINTF("entered\n");
790
791 off = roff = ((off_t)lbalen->lba) * be_lun->cbe_lun.blocksize;
792 vn_lock(be_lun->vn, LK_SHARED | LK_RETRY);
793 error = VOP_IOCTL(be_lun->vn, FIOSEEKHOLE, &off,
794 0, curthread->td_ucred, curthread);
795 if (error == 0 && off > roff)
796 status = 0; /* mapped up to off */
797 else {
798 error = VOP_IOCTL(be_lun->vn, FIOSEEKDATA, &off,
799 0, curthread->td_ucred, curthread);
800 if (error == 0 && off > roff)
801 status = 1; /* deallocated up to off */
802 else {
803 status = 0; /* unknown up to the end */
804 off = be_lun->size_bytes;
805 }
806 }
807 VOP_UNLOCK(be_lun->vn, 0);
808
809 data = (struct scsi_get_lba_status_data *)io->scsiio.kern_data_ptr;
810 scsi_u64to8b(lbalen->lba, data->descr[0].addr);
811 scsi_ulto4b(MIN(UINT32_MAX, off / be_lun->cbe_lun.blocksize -
812 lbalen->lba), data->descr[0].length);
813 data->descr[0].status = status;
814
815 ctl_complete_beio(beio);
816 }
817
818 static uint64_t
ctl_be_block_getattr_file(struct ctl_be_block_lun * be_lun,const char * attrname)819 ctl_be_block_getattr_file(struct ctl_be_block_lun *be_lun, const char *attrname)
820 {
821 struct vattr vattr;
822 struct statfs statfs;
823 uint64_t val;
824 int error;
825
826 val = UINT64_MAX;
827 if (be_lun->vn == NULL)
828 return (val);
829 vn_lock(be_lun->vn, LK_SHARED | LK_RETRY);
830 if (strcmp(attrname, "blocksused") == 0) {
831 error = VOP_GETATTR(be_lun->vn, &vattr, curthread->td_ucred);
832 if (error == 0)
833 val = vattr.va_bytes / be_lun->cbe_lun.blocksize;
834 }
835 if (strcmp(attrname, "blocksavail") == 0 &&
836 (be_lun->vn->v_iflag & VI_DOOMED) == 0) {
837 error = VFS_STATFS(be_lun->vn->v_mount, &statfs);
838 if (error == 0)
839 val = statfs.f_bavail * statfs.f_bsize /
840 be_lun->cbe_lun.blocksize;
841 }
842 VOP_UNLOCK(be_lun->vn, 0);
843 return (val);
844 }
845
846 static void
ctl_be_block_dispatch_zvol(struct ctl_be_block_lun * be_lun,struct ctl_be_block_io * beio)847 ctl_be_block_dispatch_zvol(struct ctl_be_block_lun *be_lun,
848 struct ctl_be_block_io *beio)
849 {
850 union ctl_io *io;
851 struct cdevsw *csw;
852 struct cdev *dev;
853 struct uio xuio;
854 struct iovec *xiovec;
855 int error, flags, i, ref;
856
857 DPRINTF("entered\n");
858
859 io = beio->io;
860 flags = 0;
861 if (ARGS(io)->flags & CTL_LLF_DPO)
862 flags |= IO_DIRECT;
863 if (beio->bio_cmd == BIO_WRITE && ARGS(io)->flags & CTL_LLF_FUA)
864 flags |= IO_SYNC;
865
866 bzero(&xuio, sizeof(xuio));
867 if (beio->bio_cmd == BIO_READ) {
868 SDT_PROBE0(cbb, , read, file_start);
869 xuio.uio_rw = UIO_READ;
870 } else {
871 SDT_PROBE0(cbb, , write, file_start);
872 xuio.uio_rw = UIO_WRITE;
873 }
874 xuio.uio_offset = beio->io_offset;
875 xuio.uio_resid = beio->io_len;
876 xuio.uio_segflg = UIO_SYSSPACE;
877 xuio.uio_iov = beio->xiovecs;
878 xuio.uio_iovcnt = beio->num_segs;
879 xuio.uio_td = curthread;
880
881 for (i = 0, xiovec = xuio.uio_iov; i < xuio.uio_iovcnt; i++, xiovec++) {
882 xiovec->iov_base = beio->sg_segs[i].addr;
883 xiovec->iov_len = beio->sg_segs[i].len;
884 }
885
886 binuptime(&beio->ds_t0);
887 mtx_lock(&be_lun->io_lock);
888 devstat_start_transaction(beio->lun->disk_stats, &beio->ds_t0);
889 mtx_unlock(&be_lun->io_lock);
890
891 csw = devvn_refthread(be_lun->vn, &dev, &ref);
892 if (csw) {
893 if (beio->bio_cmd == BIO_READ)
894 error = csw->d_read(dev, &xuio, flags);
895 else
896 error = csw->d_write(dev, &xuio, flags);
897 dev_relthread(dev, ref);
898 } else
899 error = ENXIO;
900
901 if (beio->bio_cmd == BIO_READ)
902 SDT_PROBE0(cbb, , read, file_done);
903 else
904 SDT_PROBE0(cbb, , write, file_done);
905
906 mtx_lock(&be_lun->io_lock);
907 devstat_end_transaction(beio->lun->disk_stats, beio->io_len,
908 beio->ds_tag_type, beio->ds_trans_type,
909 /*now*/ NULL, /*then*/&beio->ds_t0);
910 mtx_unlock(&be_lun->io_lock);
911
912 /*
913 * If we got an error, set the sense data to "MEDIUM ERROR" and
914 * return the I/O to the user.
915 */
916 if (error != 0) {
917 if (error == ENOSPC || error == EDQUOT) {
918 ctl_set_space_alloc_fail(&io->scsiio);
919 } else if (error == EROFS || error == EACCES) {
920 ctl_set_hw_write_protected(&io->scsiio);
921 } else {
922 ctl_set_medium_error(&io->scsiio,
923 beio->bio_cmd == BIO_READ);
924 }
925 ctl_complete_beio(beio);
926 return;
927 }
928
929 /*
930 * If this is a write or a verify, we're all done.
931 * If this is a read, we can now send the data to the user.
932 */
933 if ((beio->bio_cmd == BIO_WRITE) ||
934 (ARGS(io)->flags & CTL_LLF_VERIFY)) {
935 ctl_set_success(&io->scsiio);
936 ctl_complete_beio(beio);
937 } else {
938 if ((ARGS(io)->flags & CTL_LLF_READ) &&
939 beio->beio_cont == NULL) {
940 ctl_set_success(&io->scsiio);
941 ctl_serseq_done(io);
942 }
943 ctl_datamove(io);
944 }
945 }
946
947 static void
ctl_be_block_gls_zvol(struct ctl_be_block_lun * be_lun,struct ctl_be_block_io * beio)948 ctl_be_block_gls_zvol(struct ctl_be_block_lun *be_lun,
949 struct ctl_be_block_io *beio)
950 {
951 union ctl_io *io = beio->io;
952 struct cdevsw *csw;
953 struct cdev *dev;
954 struct ctl_lba_len_flags *lbalen = ARGS(io);
955 struct scsi_get_lba_status_data *data;
956 off_t roff, off;
957 int error, ref, status;
958
959 DPRINTF("entered\n");
960
961 csw = devvn_refthread(be_lun->vn, &dev, &ref);
962 if (csw == NULL) {
963 status = 0; /* unknown up to the end */
964 off = be_lun->size_bytes;
965 goto done;
966 }
967 off = roff = ((off_t)lbalen->lba) * be_lun->cbe_lun.blocksize;
968 error = csw->d_ioctl(dev, FIOSEEKHOLE, (caddr_t)&off, FREAD,
969 curthread);
970 if (error == 0 && off > roff)
971 status = 0; /* mapped up to off */
972 else {
973 error = csw->d_ioctl(dev, FIOSEEKDATA, (caddr_t)&off, FREAD,
974 curthread);
975 if (error == 0 && off > roff)
976 status = 1; /* deallocated up to off */
977 else {
978 status = 0; /* unknown up to the end */
979 off = be_lun->size_bytes;
980 }
981 }
982 dev_relthread(dev, ref);
983
984 done:
985 data = (struct scsi_get_lba_status_data *)io->scsiio.kern_data_ptr;
986 scsi_u64to8b(lbalen->lba, data->descr[0].addr);
987 scsi_ulto4b(MIN(UINT32_MAX, off / be_lun->cbe_lun.blocksize -
988 lbalen->lba), data->descr[0].length);
989 data->descr[0].status = status;
990
991 ctl_complete_beio(beio);
992 }
993
994 static void
ctl_be_block_flush_dev(struct ctl_be_block_lun * be_lun,struct ctl_be_block_io * beio)995 ctl_be_block_flush_dev(struct ctl_be_block_lun *be_lun,
996 struct ctl_be_block_io *beio)
997 {
998 struct bio *bio;
999 struct cdevsw *csw;
1000 struct cdev *dev;
1001 int ref;
1002
1003 DPRINTF("entered\n");
1004
1005 /* This can't fail, it's a blocking allocation. */
1006 bio = g_alloc_bio();
1007
1008 bio->bio_cmd = BIO_FLUSH;
1009 bio->bio_offset = 0;
1010 bio->bio_data = 0;
1011 bio->bio_done = ctl_be_block_biodone;
1012 bio->bio_caller1 = beio;
1013 bio->bio_pblkno = 0;
1014
1015 /*
1016 * We don't need to acquire the LUN lock here, because we are only
1017 * sending one bio, and so there is no other context to synchronize
1018 * with.
1019 */
1020 beio->num_bios_sent = 1;
1021 beio->send_complete = 1;
1022
1023 binuptime(&beio->ds_t0);
1024 mtx_lock(&be_lun->io_lock);
1025 devstat_start_transaction(be_lun->disk_stats, &beio->ds_t0);
1026 mtx_unlock(&be_lun->io_lock);
1027
1028 csw = devvn_refthread(be_lun->vn, &dev, &ref);
1029 if (csw) {
1030 bio->bio_dev = dev;
1031 csw->d_strategy(bio);
1032 dev_relthread(dev, ref);
1033 } else {
1034 bio->bio_error = ENXIO;
1035 ctl_be_block_biodone(bio);
1036 }
1037 }
1038
1039 static void
ctl_be_block_unmap_dev_range(struct ctl_be_block_lun * be_lun,struct ctl_be_block_io * beio,uint64_t off,uint64_t len,int last)1040 ctl_be_block_unmap_dev_range(struct ctl_be_block_lun *be_lun,
1041 struct ctl_be_block_io *beio,
1042 uint64_t off, uint64_t len, int last)
1043 {
1044 struct bio *bio;
1045 uint64_t maxlen;
1046 struct cdevsw *csw;
1047 struct cdev *dev;
1048 int ref;
1049
1050 csw = devvn_refthread(be_lun->vn, &dev, &ref);
1051 maxlen = LONG_MAX - (LONG_MAX % be_lun->cbe_lun.blocksize);
1052 while (len > 0) {
1053 bio = g_alloc_bio();
1054 bio->bio_cmd = BIO_DELETE;
1055 bio->bio_dev = dev;
1056 bio->bio_offset = off;
1057 bio->bio_length = MIN(len, maxlen);
1058 bio->bio_data = 0;
1059 bio->bio_done = ctl_be_block_biodone;
1060 bio->bio_caller1 = beio;
1061 bio->bio_pblkno = off / be_lun->cbe_lun.blocksize;
1062
1063 off += bio->bio_length;
1064 len -= bio->bio_length;
1065
1066 mtx_lock(&be_lun->io_lock);
1067 beio->num_bios_sent++;
1068 if (last && len == 0)
1069 beio->send_complete = 1;
1070 mtx_unlock(&be_lun->io_lock);
1071
1072 if (csw) {
1073 csw->d_strategy(bio);
1074 } else {
1075 bio->bio_error = ENXIO;
1076 ctl_be_block_biodone(bio);
1077 }
1078 }
1079 if (csw)
1080 dev_relthread(dev, ref);
1081 }
1082
1083 static void
ctl_be_block_unmap_dev(struct ctl_be_block_lun * be_lun,struct ctl_be_block_io * beio)1084 ctl_be_block_unmap_dev(struct ctl_be_block_lun *be_lun,
1085 struct ctl_be_block_io *beio)
1086 {
1087 union ctl_io *io;
1088 struct ctl_ptr_len_flags *ptrlen;
1089 struct scsi_unmap_desc *buf, *end;
1090 uint64_t len;
1091
1092 io = beio->io;
1093
1094 DPRINTF("entered\n");
1095
1096 binuptime(&beio->ds_t0);
1097 mtx_lock(&be_lun->io_lock);
1098 devstat_start_transaction(be_lun->disk_stats, &beio->ds_t0);
1099 mtx_unlock(&be_lun->io_lock);
1100
1101 if (beio->io_offset == -1) {
1102 beio->io_len = 0;
1103 ptrlen = (struct ctl_ptr_len_flags *)&io->io_hdr.ctl_private[CTL_PRIV_LBA_LEN];
1104 buf = (struct scsi_unmap_desc *)ptrlen->ptr;
1105 end = buf + ptrlen->len / sizeof(*buf);
1106 for (; buf < end; buf++) {
1107 len = (uint64_t)scsi_4btoul(buf->length) *
1108 be_lun->cbe_lun.blocksize;
1109 beio->io_len += len;
1110 ctl_be_block_unmap_dev_range(be_lun, beio,
1111 scsi_8btou64(buf->lba) * be_lun->cbe_lun.blocksize,
1112 len, (end - buf < 2) ? TRUE : FALSE);
1113 }
1114 } else
1115 ctl_be_block_unmap_dev_range(be_lun, beio,
1116 beio->io_offset, beio->io_len, TRUE);
1117 }
1118
1119 static void
ctl_be_block_dispatch_dev(struct ctl_be_block_lun * be_lun,struct ctl_be_block_io * beio)1120 ctl_be_block_dispatch_dev(struct ctl_be_block_lun *be_lun,
1121 struct ctl_be_block_io *beio)
1122 {
1123 TAILQ_HEAD(, bio) queue = TAILQ_HEAD_INITIALIZER(queue);
1124 struct bio *bio;
1125 struct cdevsw *csw;
1126 struct cdev *dev;
1127 off_t cur_offset;
1128 int i, max_iosize, ref;
1129
1130 DPRINTF("entered\n");
1131 csw = devvn_refthread(be_lun->vn, &dev, &ref);
1132
1133 /*
1134 * We have to limit our I/O size to the maximum supported by the
1135 * backend device.
1136 */
1137 if (csw) {
1138 max_iosize = dev->si_iosize_max;
1139 if (max_iosize < PAGE_SIZE)
1140 max_iosize = DFLTPHYS;
1141 } else
1142 max_iosize = DFLTPHYS;
1143
1144 cur_offset = beio->io_offset;
1145 for (i = 0; i < beio->num_segs; i++) {
1146 size_t cur_size;
1147 uint8_t *cur_ptr;
1148
1149 cur_size = beio->sg_segs[i].len;
1150 cur_ptr = beio->sg_segs[i].addr;
1151
1152 while (cur_size > 0) {
1153 /* This can't fail, it's a blocking allocation. */
1154 bio = g_alloc_bio();
1155
1156 KASSERT(bio != NULL, ("g_alloc_bio() failed!\n"));
1157
1158 bio->bio_cmd = beio->bio_cmd;
1159 bio->bio_dev = dev;
1160 bio->bio_caller1 = beio;
1161 bio->bio_length = min(cur_size, max_iosize);
1162 bio->bio_offset = cur_offset;
1163 bio->bio_data = cur_ptr;
1164 bio->bio_done = ctl_be_block_biodone;
1165 bio->bio_pblkno = cur_offset / be_lun->cbe_lun.blocksize;
1166
1167 cur_offset += bio->bio_length;
1168 cur_ptr += bio->bio_length;
1169 cur_size -= bio->bio_length;
1170
1171 TAILQ_INSERT_TAIL(&queue, bio, bio_queue);
1172 beio->num_bios_sent++;
1173 }
1174 }
1175 binuptime(&beio->ds_t0);
1176 mtx_lock(&be_lun->io_lock);
1177 devstat_start_transaction(be_lun->disk_stats, &beio->ds_t0);
1178 beio->send_complete = 1;
1179 mtx_unlock(&be_lun->io_lock);
1180
1181 /*
1182 * Fire off all allocated requests!
1183 */
1184 while ((bio = TAILQ_FIRST(&queue)) != NULL) {
1185 TAILQ_REMOVE(&queue, bio, bio_queue);
1186 if (csw)
1187 csw->d_strategy(bio);
1188 else {
1189 bio->bio_error = ENXIO;
1190 ctl_be_block_biodone(bio);
1191 }
1192 }
1193 if (csw)
1194 dev_relthread(dev, ref);
1195 }
1196
1197 static uint64_t
ctl_be_block_getattr_dev(struct ctl_be_block_lun * be_lun,const char * attrname)1198 ctl_be_block_getattr_dev(struct ctl_be_block_lun *be_lun, const char *attrname)
1199 {
1200 struct diocgattr_arg arg;
1201 struct cdevsw *csw;
1202 struct cdev *dev;
1203 int error, ref;
1204
1205 csw = devvn_refthread(be_lun->vn, &dev, &ref);
1206 if (csw == NULL)
1207 return (UINT64_MAX);
1208 strlcpy(arg.name, attrname, sizeof(arg.name));
1209 arg.len = sizeof(arg.value.off);
1210 if (csw->d_ioctl) {
1211 error = csw->d_ioctl(dev, DIOCGATTR, (caddr_t)&arg, FREAD,
1212 curthread);
1213 } else
1214 error = ENODEV;
1215 dev_relthread(dev, ref);
1216 if (error != 0)
1217 return (UINT64_MAX);
1218 return (arg.value.off);
1219 }
1220
1221 static void
ctl_be_block_cw_dispatch_sync(struct ctl_be_block_lun * be_lun,union ctl_io * io)1222 ctl_be_block_cw_dispatch_sync(struct ctl_be_block_lun *be_lun,
1223 union ctl_io *io)
1224 {
1225 struct ctl_be_lun *cbe_lun = &be_lun->cbe_lun;
1226 struct ctl_be_block_io *beio;
1227 struct ctl_lba_len_flags *lbalen;
1228
1229 DPRINTF("entered\n");
1230 beio = (struct ctl_be_block_io *)PRIV(io)->ptr;
1231 lbalen = (struct ctl_lba_len_flags *)&io->io_hdr.ctl_private[CTL_PRIV_LBA_LEN];
1232
1233 beio->io_len = lbalen->len * cbe_lun->blocksize;
1234 beio->io_offset = lbalen->lba * cbe_lun->blocksize;
1235 beio->io_arg = (lbalen->flags & SSC_IMMED) != 0;
1236 beio->bio_cmd = BIO_FLUSH;
1237 beio->ds_trans_type = DEVSTAT_NO_DATA;
1238 DPRINTF("SYNC\n");
1239 be_lun->lun_flush(be_lun, beio);
1240 }
1241
1242 static void
ctl_be_block_cw_done_ws(struct ctl_be_block_io * beio)1243 ctl_be_block_cw_done_ws(struct ctl_be_block_io *beio)
1244 {
1245 union ctl_io *io;
1246
1247 io = beio->io;
1248 ctl_free_beio(beio);
1249 if ((io->io_hdr.flags & CTL_FLAG_ABORT) ||
1250 ((io->io_hdr.status & CTL_STATUS_MASK) != CTL_STATUS_NONE &&
1251 (io->io_hdr.status & CTL_STATUS_MASK) != CTL_SUCCESS)) {
1252 ctl_config_write_done(io);
1253 return;
1254 }
1255
1256 ctl_be_block_config_write(io);
1257 }
1258
1259 static void
ctl_be_block_cw_dispatch_ws(struct ctl_be_block_lun * be_lun,union ctl_io * io)1260 ctl_be_block_cw_dispatch_ws(struct ctl_be_block_lun *be_lun,
1261 union ctl_io *io)
1262 {
1263 struct ctl_be_block_softc *softc = be_lun->softc;
1264 struct ctl_be_lun *cbe_lun = &be_lun->cbe_lun;
1265 struct ctl_be_block_io *beio;
1266 struct ctl_lba_len_flags *lbalen;
1267 uint64_t len_left, lba;
1268 uint32_t pb, pbo, adj;
1269 int i, seglen;
1270 uint8_t *buf, *end;
1271
1272 DPRINTF("entered\n");
1273
1274 beio = (struct ctl_be_block_io *)PRIV(io)->ptr;
1275 lbalen = ARGS(io);
1276
1277 if (lbalen->flags & ~(SWS_LBDATA | SWS_UNMAP | SWS_ANCHOR | SWS_NDOB) ||
1278 (lbalen->flags & (SWS_UNMAP | SWS_ANCHOR) && be_lun->unmap == NULL)) {
1279 ctl_free_beio(beio);
1280 ctl_set_invalid_field(&io->scsiio,
1281 /*sks_valid*/ 1,
1282 /*command*/ 1,
1283 /*field*/ 1,
1284 /*bit_valid*/ 0,
1285 /*bit*/ 0);
1286 ctl_config_write_done(io);
1287 return;
1288 }
1289
1290 if (lbalen->flags & (SWS_UNMAP | SWS_ANCHOR)) {
1291 beio->io_offset = lbalen->lba * cbe_lun->blocksize;
1292 beio->io_len = (uint64_t)lbalen->len * cbe_lun->blocksize;
1293 beio->bio_cmd = BIO_DELETE;
1294 beio->ds_trans_type = DEVSTAT_FREE;
1295
1296 be_lun->unmap(be_lun, beio);
1297 return;
1298 }
1299
1300 beio->bio_cmd = BIO_WRITE;
1301 beio->ds_trans_type = DEVSTAT_WRITE;
1302
1303 DPRINTF("WRITE SAME at LBA %jx len %u\n",
1304 (uintmax_t)lbalen->lba, lbalen->len);
1305
1306 pb = cbe_lun->blocksize << be_lun->cbe_lun.pblockexp;
1307 if (be_lun->cbe_lun.pblockoff > 0)
1308 pbo = pb - cbe_lun->blocksize * be_lun->cbe_lun.pblockoff;
1309 else
1310 pbo = 0;
1311 len_left = (uint64_t)lbalen->len * cbe_lun->blocksize;
1312 for (i = 0, lba = 0; i < CTLBLK_MAX_SEGS && len_left > 0; i++) {
1313
1314 /*
1315 * Setup the S/G entry for this chunk.
1316 */
1317 seglen = MIN(CTLBLK_MAX_SEG, len_left);
1318 if (pb > cbe_lun->blocksize) {
1319 adj = ((lbalen->lba + lba) * cbe_lun->blocksize +
1320 seglen - pbo) % pb;
1321 if (seglen > adj)
1322 seglen -= adj;
1323 else
1324 seglen -= seglen % cbe_lun->blocksize;
1325 } else
1326 seglen -= seglen % cbe_lun->blocksize;
1327 ctl_alloc_seg(softc, &beio->sg_segs[i], seglen);
1328
1329 DPRINTF("segment %d addr %p len %zd\n", i,
1330 beio->sg_segs[i].addr, beio->sg_segs[i].len);
1331
1332 beio->num_segs++;
1333 len_left -= seglen;
1334
1335 buf = beio->sg_segs[i].addr;
1336 end = buf + seglen;
1337 for (; buf < end; buf += cbe_lun->blocksize) {
1338 if (lbalen->flags & SWS_NDOB) {
1339 memset(buf, 0, cbe_lun->blocksize);
1340 } else {
1341 memcpy(buf, io->scsiio.kern_data_ptr,
1342 cbe_lun->blocksize);
1343 }
1344 if (lbalen->flags & SWS_LBDATA)
1345 scsi_ulto4b(lbalen->lba + lba, buf);
1346 lba++;
1347 }
1348 }
1349
1350 beio->io_offset = lbalen->lba * cbe_lun->blocksize;
1351 beio->io_len = lba * cbe_lun->blocksize;
1352
1353 /* We can not do all in one run. Correct and schedule rerun. */
1354 if (len_left > 0) {
1355 lbalen->lba += lba;
1356 lbalen->len -= lba;
1357 beio->beio_cont = ctl_be_block_cw_done_ws;
1358 }
1359
1360 be_lun->dispatch(be_lun, beio);
1361 }
1362
1363 static void
ctl_be_block_cw_dispatch_unmap(struct ctl_be_block_lun * be_lun,union ctl_io * io)1364 ctl_be_block_cw_dispatch_unmap(struct ctl_be_block_lun *be_lun,
1365 union ctl_io *io)
1366 {
1367 struct ctl_be_block_io *beio;
1368 struct ctl_ptr_len_flags *ptrlen;
1369
1370 DPRINTF("entered\n");
1371
1372 beio = (struct ctl_be_block_io *)PRIV(io)->ptr;
1373 ptrlen = (struct ctl_ptr_len_flags *)&io->io_hdr.ctl_private[CTL_PRIV_LBA_LEN];
1374
1375 if ((ptrlen->flags & ~SU_ANCHOR) != 0 || be_lun->unmap == NULL) {
1376 ctl_free_beio(beio);
1377 ctl_set_invalid_field(&io->scsiio,
1378 /*sks_valid*/ 0,
1379 /*command*/ 1,
1380 /*field*/ 0,
1381 /*bit_valid*/ 0,
1382 /*bit*/ 0);
1383 ctl_config_write_done(io);
1384 return;
1385 }
1386
1387 beio->io_len = 0;
1388 beio->io_offset = -1;
1389 beio->bio_cmd = BIO_DELETE;
1390 beio->ds_trans_type = DEVSTAT_FREE;
1391 DPRINTF("UNMAP\n");
1392 be_lun->unmap(be_lun, beio);
1393 }
1394
1395 static void
ctl_be_block_cr_done(struct ctl_be_block_io * beio)1396 ctl_be_block_cr_done(struct ctl_be_block_io *beio)
1397 {
1398 union ctl_io *io;
1399
1400 io = beio->io;
1401 ctl_free_beio(beio);
1402 ctl_config_read_done(io);
1403 }
1404
1405 static void
ctl_be_block_cr_dispatch(struct ctl_be_block_lun * be_lun,union ctl_io * io)1406 ctl_be_block_cr_dispatch(struct ctl_be_block_lun *be_lun,
1407 union ctl_io *io)
1408 {
1409 struct ctl_be_block_io *beio;
1410 struct ctl_be_block_softc *softc;
1411
1412 DPRINTF("entered\n");
1413
1414 softc = be_lun->softc;
1415 beio = ctl_alloc_beio(softc);
1416 beio->io = io;
1417 beio->lun = be_lun;
1418 beio->beio_cont = ctl_be_block_cr_done;
1419 PRIV(io)->ptr = (void *)beio;
1420
1421 switch (io->scsiio.cdb[0]) {
1422 case SERVICE_ACTION_IN: /* GET LBA STATUS */
1423 beio->bio_cmd = -1;
1424 beio->ds_trans_type = DEVSTAT_NO_DATA;
1425 beio->ds_tag_type = DEVSTAT_TAG_ORDERED;
1426 beio->io_len = 0;
1427 if (be_lun->get_lba_status)
1428 be_lun->get_lba_status(be_lun, beio);
1429 else
1430 ctl_be_block_cr_done(beio);
1431 break;
1432 default:
1433 panic("Unhandled CDB type %#x", io->scsiio.cdb[0]);
1434 break;
1435 }
1436 }
1437
1438 static void
ctl_be_block_cw_done(struct ctl_be_block_io * beio)1439 ctl_be_block_cw_done(struct ctl_be_block_io *beio)
1440 {
1441 union ctl_io *io;
1442
1443 io = beio->io;
1444 ctl_free_beio(beio);
1445 ctl_config_write_done(io);
1446 }
1447
1448 static void
ctl_be_block_cw_dispatch(struct ctl_be_block_lun * be_lun,union ctl_io * io)1449 ctl_be_block_cw_dispatch(struct ctl_be_block_lun *be_lun,
1450 union ctl_io *io)
1451 {
1452 struct ctl_be_block_io *beio;
1453 struct ctl_be_block_softc *softc;
1454
1455 DPRINTF("entered\n");
1456
1457 softc = be_lun->softc;
1458 beio = ctl_alloc_beio(softc);
1459 beio->io = io;
1460 beio->lun = be_lun;
1461 beio->beio_cont = ctl_be_block_cw_done;
1462 switch (io->scsiio.tag_type) {
1463 case CTL_TAG_ORDERED:
1464 beio->ds_tag_type = DEVSTAT_TAG_ORDERED;
1465 break;
1466 case CTL_TAG_HEAD_OF_QUEUE:
1467 beio->ds_tag_type = DEVSTAT_TAG_HEAD;
1468 break;
1469 case CTL_TAG_UNTAGGED:
1470 case CTL_TAG_SIMPLE:
1471 case CTL_TAG_ACA:
1472 default:
1473 beio->ds_tag_type = DEVSTAT_TAG_SIMPLE;
1474 break;
1475 }
1476 PRIV(io)->ptr = (void *)beio;
1477
1478 switch (io->scsiio.cdb[0]) {
1479 case SYNCHRONIZE_CACHE:
1480 case SYNCHRONIZE_CACHE_16:
1481 ctl_be_block_cw_dispatch_sync(be_lun, io);
1482 break;
1483 case WRITE_SAME_10:
1484 case WRITE_SAME_16:
1485 ctl_be_block_cw_dispatch_ws(be_lun, io);
1486 break;
1487 case UNMAP:
1488 ctl_be_block_cw_dispatch_unmap(be_lun, io);
1489 break;
1490 default:
1491 panic("Unhandled CDB type %#x", io->scsiio.cdb[0]);
1492 break;
1493 }
1494 }
1495
1496 SDT_PROBE_DEFINE1(cbb, , read, start, "uint64_t");
1497 SDT_PROBE_DEFINE1(cbb, , write, start, "uint64_t");
1498 SDT_PROBE_DEFINE1(cbb, , read, alloc_done, "uint64_t");
1499 SDT_PROBE_DEFINE1(cbb, , write, alloc_done, "uint64_t");
1500
1501 static void
ctl_be_block_next(struct ctl_be_block_io * beio)1502 ctl_be_block_next(struct ctl_be_block_io *beio)
1503 {
1504 struct ctl_be_block_lun *be_lun;
1505 union ctl_io *io;
1506
1507 io = beio->io;
1508 be_lun = beio->lun;
1509 ctl_free_beio(beio);
1510 if ((io->io_hdr.flags & CTL_FLAG_ABORT) ||
1511 ((io->io_hdr.status & CTL_STATUS_MASK) != CTL_STATUS_NONE &&
1512 (io->io_hdr.status & CTL_STATUS_MASK) != CTL_SUCCESS)) {
1513 ctl_data_submit_done(io);
1514 return;
1515 }
1516
1517 io->io_hdr.status &= ~CTL_STATUS_MASK;
1518 io->io_hdr.status |= CTL_STATUS_NONE;
1519
1520 mtx_lock(&be_lun->queue_lock);
1521 STAILQ_INSERT_TAIL(&be_lun->input_queue, &io->io_hdr, links);
1522 mtx_unlock(&be_lun->queue_lock);
1523 taskqueue_enqueue(be_lun->io_taskqueue, &be_lun->io_task);
1524 }
1525
1526 static void
ctl_be_block_dispatch(struct ctl_be_block_lun * be_lun,union ctl_io * io)1527 ctl_be_block_dispatch(struct ctl_be_block_lun *be_lun,
1528 union ctl_io *io)
1529 {
1530 struct ctl_be_lun *cbe_lun = &be_lun->cbe_lun;
1531 struct ctl_be_block_io *beio;
1532 struct ctl_be_block_softc *softc;
1533 struct ctl_lba_len_flags *lbalen;
1534 struct ctl_ptr_len_flags *bptrlen;
1535 uint64_t len_left, lbas;
1536 int i;
1537
1538 softc = be_lun->softc;
1539
1540 DPRINTF("entered\n");
1541
1542 lbalen = ARGS(io);
1543 if (lbalen->flags & CTL_LLF_WRITE) {
1544 SDT_PROBE0(cbb, , write, start);
1545 } else {
1546 SDT_PROBE0(cbb, , read, start);
1547 }
1548
1549 beio = ctl_alloc_beio(softc);
1550 beio->io = io;
1551 beio->lun = be_lun;
1552 bptrlen = PRIV(io);
1553 bptrlen->ptr = (void *)beio;
1554
1555 switch (io->scsiio.tag_type) {
1556 case CTL_TAG_ORDERED:
1557 beio->ds_tag_type = DEVSTAT_TAG_ORDERED;
1558 break;
1559 case CTL_TAG_HEAD_OF_QUEUE:
1560 beio->ds_tag_type = DEVSTAT_TAG_HEAD;
1561 break;
1562 case CTL_TAG_UNTAGGED:
1563 case CTL_TAG_SIMPLE:
1564 case CTL_TAG_ACA:
1565 default:
1566 beio->ds_tag_type = DEVSTAT_TAG_SIMPLE;
1567 break;
1568 }
1569
1570 if (lbalen->flags & CTL_LLF_WRITE) {
1571 beio->bio_cmd = BIO_WRITE;
1572 beio->ds_trans_type = DEVSTAT_WRITE;
1573 } else {
1574 beio->bio_cmd = BIO_READ;
1575 beio->ds_trans_type = DEVSTAT_READ;
1576 }
1577
1578 DPRINTF("%s at LBA %jx len %u @%ju\n",
1579 (beio->bio_cmd == BIO_READ) ? "READ" : "WRITE",
1580 (uintmax_t)lbalen->lba, lbalen->len, bptrlen->len);
1581 if (lbalen->flags & CTL_LLF_COMPARE) {
1582 beio->two_sglists = 1;
1583 lbas = CTLBLK_HALF_IO_SIZE;
1584 } else {
1585 lbas = CTLBLK_MAX_IO_SIZE;
1586 }
1587 lbas = MIN(lbalen->len - bptrlen->len, lbas / cbe_lun->blocksize);
1588 beio->io_offset = (lbalen->lba + bptrlen->len) * cbe_lun->blocksize;
1589 beio->io_len = lbas * cbe_lun->blocksize;
1590 bptrlen->len += lbas;
1591
1592 for (i = 0, len_left = beio->io_len; len_left > 0; i++) {
1593 KASSERT(i < CTLBLK_MAX_SEGS, ("Too many segs (%d >= %d)",
1594 i, CTLBLK_MAX_SEGS));
1595
1596 /*
1597 * Setup the S/G entry for this chunk.
1598 */
1599 ctl_alloc_seg(softc, &beio->sg_segs[i],
1600 min(CTLBLK_MAX_SEG, len_left));
1601
1602 DPRINTF("segment %d addr %p len %zd\n", i,
1603 beio->sg_segs[i].addr, beio->sg_segs[i].len);
1604
1605 /* Set up second segment for compare operation. */
1606 if (beio->two_sglists) {
1607 ctl_alloc_seg(softc,
1608 &beio->sg_segs[i + CTLBLK_HALF_SEGS],
1609 beio->sg_segs[i].len);
1610 }
1611
1612 beio->num_segs++;
1613 len_left -= beio->sg_segs[i].len;
1614 }
1615 if (bptrlen->len < lbalen->len)
1616 beio->beio_cont = ctl_be_block_next;
1617 io->scsiio.be_move_done = ctl_be_block_move_done;
1618 /* For compare we have separate S/G lists for read and datamove. */
1619 if (beio->two_sglists)
1620 io->scsiio.kern_data_ptr = (uint8_t *)&beio->sg_segs[CTLBLK_HALF_SEGS];
1621 else
1622 io->scsiio.kern_data_ptr = (uint8_t *)beio->sg_segs;
1623 io->scsiio.kern_data_len = beio->io_len;
1624 io->scsiio.kern_sg_entries = beio->num_segs;
1625 io->io_hdr.flags |= CTL_FLAG_ALLOCATED;
1626
1627 /*
1628 * For the read case, we need to read the data into our buffers and
1629 * then we can send it back to the user. For the write case, we
1630 * need to get the data from the user first.
1631 */
1632 if (beio->bio_cmd == BIO_READ) {
1633 SDT_PROBE0(cbb, , read, alloc_done);
1634 be_lun->dispatch(be_lun, beio);
1635 } else {
1636 SDT_PROBE0(cbb, , write, alloc_done);
1637 ctl_datamove(io);
1638 }
1639 }
1640
1641 static void
ctl_be_block_worker(void * context,int pending)1642 ctl_be_block_worker(void *context, int pending)
1643 {
1644 struct ctl_be_block_lun *be_lun = (struct ctl_be_block_lun *)context;
1645 struct ctl_be_lun *cbe_lun = &be_lun->cbe_lun;
1646 union ctl_io *io;
1647 struct ctl_be_block_io *beio;
1648
1649 DPRINTF("entered\n");
1650 /*
1651 * Fetch and process I/Os from all queues. If we detect LUN
1652 * CTL_LUN_FLAG_NO_MEDIA status here -- it is result of a race,
1653 * so make response maximally opaque to not confuse initiator.
1654 */
1655 for (;;) {
1656 mtx_lock(&be_lun->queue_lock);
1657 io = (union ctl_io *)STAILQ_FIRST(&be_lun->datamove_queue);
1658 if (io != NULL) {
1659 DPRINTF("datamove queue\n");
1660 STAILQ_REMOVE_HEAD(&be_lun->datamove_queue, links);
1661 mtx_unlock(&be_lun->queue_lock);
1662 beio = (struct ctl_be_block_io *)PRIV(io)->ptr;
1663 if (cbe_lun->flags & CTL_LUN_FLAG_NO_MEDIA) {
1664 ctl_set_busy(&io->scsiio);
1665 ctl_complete_beio(beio);
1666 continue;
1667 }
1668 be_lun->dispatch(be_lun, beio);
1669 continue;
1670 }
1671 io = (union ctl_io *)STAILQ_FIRST(&be_lun->config_write_queue);
1672 if (io != NULL) {
1673 DPRINTF("config write queue\n");
1674 STAILQ_REMOVE_HEAD(&be_lun->config_write_queue, links);
1675 mtx_unlock(&be_lun->queue_lock);
1676 if (cbe_lun->flags & CTL_LUN_FLAG_NO_MEDIA) {
1677 ctl_set_busy(&io->scsiio);
1678 ctl_config_write_done(io);
1679 continue;
1680 }
1681 ctl_be_block_cw_dispatch(be_lun, io);
1682 continue;
1683 }
1684 io = (union ctl_io *)STAILQ_FIRST(&be_lun->config_read_queue);
1685 if (io != NULL) {
1686 DPRINTF("config read queue\n");
1687 STAILQ_REMOVE_HEAD(&be_lun->config_read_queue, links);
1688 mtx_unlock(&be_lun->queue_lock);
1689 if (cbe_lun->flags & CTL_LUN_FLAG_NO_MEDIA) {
1690 ctl_set_busy(&io->scsiio);
1691 ctl_config_read_done(io);
1692 continue;
1693 }
1694 ctl_be_block_cr_dispatch(be_lun, io);
1695 continue;
1696 }
1697 io = (union ctl_io *)STAILQ_FIRST(&be_lun->input_queue);
1698 if (io != NULL) {
1699 DPRINTF("input queue\n");
1700 STAILQ_REMOVE_HEAD(&be_lun->input_queue, links);
1701 mtx_unlock(&be_lun->queue_lock);
1702 if (cbe_lun->flags & CTL_LUN_FLAG_NO_MEDIA) {
1703 ctl_set_busy(&io->scsiio);
1704 ctl_data_submit_done(io);
1705 continue;
1706 }
1707 ctl_be_block_dispatch(be_lun, io);
1708 continue;
1709 }
1710
1711 /*
1712 * If we get here, there is no work left in the queues, so
1713 * just break out and let the task queue go to sleep.
1714 */
1715 mtx_unlock(&be_lun->queue_lock);
1716 break;
1717 }
1718 }
1719
1720 /*
1721 * Entry point from CTL to the backend for I/O. We queue everything to a
1722 * work thread, so this just puts the I/O on a queue and wakes up the
1723 * thread.
1724 */
1725 static int
ctl_be_block_submit(union ctl_io * io)1726 ctl_be_block_submit(union ctl_io *io)
1727 {
1728 struct ctl_be_block_lun *be_lun;
1729
1730 DPRINTF("entered\n");
1731
1732 be_lun = (struct ctl_be_block_lun *)CTL_BACKEND_LUN(io);
1733
1734 KASSERT(io->io_hdr.io_type == CTL_IO_SCSI,
1735 ("%s: unexpected I/O type %x", __func__, io->io_hdr.io_type));
1736
1737 PRIV(io)->len = 0;
1738
1739 mtx_lock(&be_lun->queue_lock);
1740 STAILQ_INSERT_TAIL(&be_lun->input_queue, &io->io_hdr, links);
1741 mtx_unlock(&be_lun->queue_lock);
1742 taskqueue_enqueue(be_lun->io_taskqueue, &be_lun->io_task);
1743
1744 return (CTL_RETVAL_COMPLETE);
1745 }
1746
1747 static int
ctl_be_block_ioctl(struct cdev * dev,u_long cmd,caddr_t addr,int flag,struct thread * td)1748 ctl_be_block_ioctl(struct cdev *dev, u_long cmd, caddr_t addr,
1749 int flag, struct thread *td)
1750 {
1751 struct ctl_be_block_softc *softc = &backend_block_softc;
1752 int error;
1753
1754 error = 0;
1755 switch (cmd) {
1756 case CTL_LUN_REQ: {
1757 struct ctl_lun_req *lun_req;
1758
1759 lun_req = (struct ctl_lun_req *)addr;
1760
1761 switch (lun_req->reqtype) {
1762 case CTL_LUNREQ_CREATE:
1763 error = ctl_be_block_create(softc, lun_req);
1764 break;
1765 case CTL_LUNREQ_RM:
1766 error = ctl_be_block_rm(softc, lun_req);
1767 break;
1768 case CTL_LUNREQ_MODIFY:
1769 error = ctl_be_block_modify(softc, lun_req);
1770 break;
1771 default:
1772 lun_req->status = CTL_LUN_ERROR;
1773 snprintf(lun_req->error_str, sizeof(lun_req->error_str),
1774 "invalid LUN request type %d",
1775 lun_req->reqtype);
1776 break;
1777 }
1778 break;
1779 }
1780 default:
1781 error = ENOTTY;
1782 break;
1783 }
1784
1785 return (error);
1786 }
1787
1788 static int
ctl_be_block_open_file(struct ctl_be_block_lun * be_lun,struct ctl_lun_req * req)1789 ctl_be_block_open_file(struct ctl_be_block_lun *be_lun, struct ctl_lun_req *req)
1790 {
1791 struct ctl_be_lun *cbe_lun;
1792 struct ctl_be_block_filedata *file_data;
1793 struct ctl_lun_create_params *params;
1794 const char *value;
1795 struct vattr vattr;
1796 off_t ps, pss, po, pos, us, uss, uo, uos;
1797 int error;
1798
1799 cbe_lun = &be_lun->cbe_lun;
1800 file_data = &be_lun->backend.file;
1801 params = &be_lun->params;
1802
1803 be_lun->dev_type = CTL_BE_BLOCK_FILE;
1804 be_lun->dispatch = ctl_be_block_dispatch_file;
1805 be_lun->lun_flush = ctl_be_block_flush_file;
1806 be_lun->get_lba_status = ctl_be_block_gls_file;
1807 be_lun->getattr = ctl_be_block_getattr_file;
1808 be_lun->unmap = NULL;
1809 cbe_lun->flags &= ~CTL_LUN_FLAG_UNMAP;
1810
1811 error = VOP_GETATTR(be_lun->vn, &vattr, curthread->td_ucred);
1812 if (error != 0) {
1813 snprintf(req->error_str, sizeof(req->error_str),
1814 "error calling VOP_GETATTR() for file %s",
1815 be_lun->dev_path);
1816 return (error);
1817 }
1818
1819 file_data->cred = crhold(curthread->td_ucred);
1820 if (params->lun_size_bytes != 0)
1821 be_lun->size_bytes = params->lun_size_bytes;
1822 else
1823 be_lun->size_bytes = vattr.va_size;
1824
1825 /*
1826 * For files we can use any logical block size. Prefer 512 bytes
1827 * for compatibility reasons. If file's vattr.va_blocksize
1828 * (preferred I/O block size) is bigger and multiple to chosen
1829 * logical block size -- report it as physical block size.
1830 */
1831 if (params->blocksize_bytes != 0)
1832 cbe_lun->blocksize = params->blocksize_bytes;
1833 else if (cbe_lun->lun_type == T_CDROM)
1834 cbe_lun->blocksize = 2048;
1835 else
1836 cbe_lun->blocksize = 512;
1837 be_lun->size_blocks = be_lun->size_bytes / cbe_lun->blocksize;
1838 cbe_lun->maxlba = (be_lun->size_blocks == 0) ?
1839 0 : (be_lun->size_blocks - 1);
1840
1841 us = ps = vattr.va_blocksize;
1842 uo = po = 0;
1843
1844 value = dnvlist_get_string(cbe_lun->options, "pblocksize", NULL);
1845 if (value != NULL)
1846 ctl_expand_number(value, &ps);
1847 value = dnvlist_get_string(cbe_lun->options, "pblockoffset", NULL);
1848 if (value != NULL)
1849 ctl_expand_number(value, &po);
1850 pss = ps / cbe_lun->blocksize;
1851 pos = po / cbe_lun->blocksize;
1852 if ((pss > 0) && (pss * cbe_lun->blocksize == ps) && (pss >= pos) &&
1853 ((pss & (pss - 1)) == 0) && (pos * cbe_lun->blocksize == po)) {
1854 cbe_lun->pblockexp = fls(pss) - 1;
1855 cbe_lun->pblockoff = (pss - pos) % pss;
1856 }
1857
1858 value = dnvlist_get_string(cbe_lun->options, "ublocksize", NULL);
1859 if (value != NULL)
1860 ctl_expand_number(value, &us);
1861 value = dnvlist_get_string(cbe_lun->options, "ublockoffset", NULL);
1862 if (value != NULL)
1863 ctl_expand_number(value, &uo);
1864 uss = us / cbe_lun->blocksize;
1865 uos = uo / cbe_lun->blocksize;
1866 if ((uss > 0) && (uss * cbe_lun->blocksize == us) && (uss >= uos) &&
1867 ((uss & (uss - 1)) == 0) && (uos * cbe_lun->blocksize == uo)) {
1868 cbe_lun->ublockexp = fls(uss) - 1;
1869 cbe_lun->ublockoff = (uss - uos) % uss;
1870 }
1871
1872 /*
1873 * Sanity check. The media size has to be at least one
1874 * sector long.
1875 */
1876 if (be_lun->size_bytes < cbe_lun->blocksize) {
1877 error = EINVAL;
1878 snprintf(req->error_str, sizeof(req->error_str),
1879 "file %s size %ju < block size %u", be_lun->dev_path,
1880 (uintmax_t)be_lun->size_bytes, cbe_lun->blocksize);
1881 }
1882
1883 cbe_lun->opttxferlen = CTLBLK_MAX_IO_SIZE / cbe_lun->blocksize;
1884 return (error);
1885 }
1886
1887 static int
ctl_be_block_open_dev(struct ctl_be_block_lun * be_lun,struct ctl_lun_req * req)1888 ctl_be_block_open_dev(struct ctl_be_block_lun *be_lun, struct ctl_lun_req *req)
1889 {
1890 struct ctl_be_lun *cbe_lun = &be_lun->cbe_lun;
1891 struct ctl_lun_create_params *params;
1892 struct cdevsw *csw;
1893 struct cdev *dev;
1894 const char *value;
1895 int error, atomic, maxio, ref, unmap, tmp;
1896 off_t ps, pss, po, pos, us, uss, uo, uos, otmp;
1897
1898 params = &be_lun->params;
1899
1900 be_lun->dev_type = CTL_BE_BLOCK_DEV;
1901 csw = devvn_refthread(be_lun->vn, &dev, &ref);
1902 if (csw == NULL)
1903 return (ENXIO);
1904 if (strcmp(csw->d_name, "zvol") == 0) {
1905 be_lun->dispatch = ctl_be_block_dispatch_zvol;
1906 be_lun->get_lba_status = ctl_be_block_gls_zvol;
1907 atomic = maxio = CTLBLK_MAX_IO_SIZE;
1908 } else {
1909 be_lun->dispatch = ctl_be_block_dispatch_dev;
1910 be_lun->get_lba_status = NULL;
1911 atomic = 0;
1912 maxio = dev->si_iosize_max;
1913 if (maxio <= 0)
1914 maxio = DFLTPHYS;
1915 if (maxio > CTLBLK_MAX_SEG)
1916 maxio = CTLBLK_MAX_SEG;
1917 }
1918 be_lun->lun_flush = ctl_be_block_flush_dev;
1919 be_lun->getattr = ctl_be_block_getattr_dev;
1920 be_lun->unmap = ctl_be_block_unmap_dev;
1921
1922 if (!csw->d_ioctl) {
1923 dev_relthread(dev, ref);
1924 snprintf(req->error_str, sizeof(req->error_str),
1925 "no d_ioctl for device %s!", be_lun->dev_path);
1926 return (ENODEV);
1927 }
1928
1929 error = csw->d_ioctl(dev, DIOCGSECTORSIZE, (caddr_t)&tmp, FREAD,
1930 curthread);
1931 if (error) {
1932 dev_relthread(dev, ref);
1933 snprintf(req->error_str, sizeof(req->error_str),
1934 "error %d returned for DIOCGSECTORSIZE ioctl "
1935 "on %s!", error, be_lun->dev_path);
1936 return (error);
1937 }
1938
1939 /*
1940 * If the user has asked for a blocksize that is greater than the
1941 * backing device's blocksize, we can do it only if the blocksize
1942 * the user is asking for is an even multiple of the underlying
1943 * device's blocksize.
1944 */
1945 if ((params->blocksize_bytes != 0) &&
1946 (params->blocksize_bytes >= tmp)) {
1947 if (params->blocksize_bytes % tmp == 0) {
1948 cbe_lun->blocksize = params->blocksize_bytes;
1949 } else {
1950 dev_relthread(dev, ref);
1951 snprintf(req->error_str, sizeof(req->error_str),
1952 "requested blocksize %u is not an even "
1953 "multiple of backing device blocksize %u",
1954 params->blocksize_bytes, tmp);
1955 return (EINVAL);
1956 }
1957 } else if (params->blocksize_bytes != 0) {
1958 dev_relthread(dev, ref);
1959 snprintf(req->error_str, sizeof(req->error_str),
1960 "requested blocksize %u < backing device "
1961 "blocksize %u", params->blocksize_bytes, tmp);
1962 return (EINVAL);
1963 } else if (cbe_lun->lun_type == T_CDROM)
1964 cbe_lun->blocksize = MAX(tmp, 2048);
1965 else
1966 cbe_lun->blocksize = tmp;
1967
1968 error = csw->d_ioctl(dev, DIOCGMEDIASIZE, (caddr_t)&otmp, FREAD,
1969 curthread);
1970 if (error) {
1971 dev_relthread(dev, ref);
1972 snprintf(req->error_str, sizeof(req->error_str),
1973 "error %d returned for DIOCGMEDIASIZE "
1974 " ioctl on %s!", error,
1975 be_lun->dev_path);
1976 return (error);
1977 }
1978
1979 if (params->lun_size_bytes != 0) {
1980 if (params->lun_size_bytes > otmp) {
1981 dev_relthread(dev, ref);
1982 snprintf(req->error_str, sizeof(req->error_str),
1983 "requested LUN size %ju > backing device "
1984 "size %ju",
1985 (uintmax_t)params->lun_size_bytes,
1986 (uintmax_t)otmp);
1987 return (EINVAL);
1988 }
1989
1990 be_lun->size_bytes = params->lun_size_bytes;
1991 } else
1992 be_lun->size_bytes = otmp;
1993 be_lun->size_blocks = be_lun->size_bytes / cbe_lun->blocksize;
1994 cbe_lun->maxlba = (be_lun->size_blocks == 0) ?
1995 0 : (be_lun->size_blocks - 1);
1996
1997 error = csw->d_ioctl(dev, DIOCGSTRIPESIZE, (caddr_t)&ps, FREAD,
1998 curthread);
1999 if (error)
2000 ps = po = 0;
2001 else {
2002 error = csw->d_ioctl(dev, DIOCGSTRIPEOFFSET, (caddr_t)&po,
2003 FREAD, curthread);
2004 if (error)
2005 po = 0;
2006 }
2007 us = ps;
2008 uo = po;
2009
2010 value = dnvlist_get_string(cbe_lun->options, "pblocksize", NULL);
2011 if (value != NULL)
2012 ctl_expand_number(value, &ps);
2013 value = dnvlist_get_string(cbe_lun->options, "pblockoffset", NULL);
2014 if (value != NULL)
2015 ctl_expand_number(value, &po);
2016 pss = ps / cbe_lun->blocksize;
2017 pos = po / cbe_lun->blocksize;
2018 if ((pss > 0) && (pss * cbe_lun->blocksize == ps) && (pss >= pos) &&
2019 ((pss & (pss - 1)) == 0) && (pos * cbe_lun->blocksize == po)) {
2020 cbe_lun->pblockexp = fls(pss) - 1;
2021 cbe_lun->pblockoff = (pss - pos) % pss;
2022 }
2023
2024 value = dnvlist_get_string(cbe_lun->options, "ublocksize", NULL);
2025 if (value != NULL)
2026 ctl_expand_number(value, &us);
2027 value = dnvlist_get_string(cbe_lun->options, "ublockoffset", NULL);
2028 if (value != NULL)
2029 ctl_expand_number(value, &uo);
2030 uss = us / cbe_lun->blocksize;
2031 uos = uo / cbe_lun->blocksize;
2032 if ((uss > 0) && (uss * cbe_lun->blocksize == us) && (uss >= uos) &&
2033 ((uss & (uss - 1)) == 0) && (uos * cbe_lun->blocksize == uo)) {
2034 cbe_lun->ublockexp = fls(uss) - 1;
2035 cbe_lun->ublockoff = (uss - uos) % uss;
2036 }
2037
2038 cbe_lun->atomicblock = atomic / cbe_lun->blocksize;
2039 cbe_lun->opttxferlen = maxio / cbe_lun->blocksize;
2040
2041 if (be_lun->dispatch == ctl_be_block_dispatch_zvol) {
2042 unmap = 1;
2043 } else {
2044 struct diocgattr_arg arg;
2045
2046 strlcpy(arg.name, "GEOM::candelete", sizeof(arg.name));
2047 arg.len = sizeof(arg.value.i);
2048 error = csw->d_ioctl(dev, DIOCGATTR, (caddr_t)&arg, FREAD,
2049 curthread);
2050 unmap = (error == 0) ? arg.value.i : 0;
2051 }
2052 value = dnvlist_get_string(cbe_lun->options, "unmap", NULL);
2053 if (value != NULL)
2054 unmap = (strcmp(value, "on") == 0);
2055 if (unmap)
2056 cbe_lun->flags |= CTL_LUN_FLAG_UNMAP;
2057 else
2058 cbe_lun->flags &= ~CTL_LUN_FLAG_UNMAP;
2059
2060 dev_relthread(dev, ref);
2061 return (0);
2062 }
2063
2064 static int
ctl_be_block_close(struct ctl_be_block_lun * be_lun)2065 ctl_be_block_close(struct ctl_be_block_lun *be_lun)
2066 {
2067 struct ctl_be_lun *cbe_lun = &be_lun->cbe_lun;
2068 int flags;
2069
2070 if (be_lun->vn) {
2071 flags = FREAD;
2072 if ((cbe_lun->flags & CTL_LUN_FLAG_READONLY) == 0)
2073 flags |= FWRITE;
2074 (void)vn_close(be_lun->vn, flags, NOCRED, curthread);
2075 be_lun->vn = NULL;
2076
2077 switch (be_lun->dev_type) {
2078 case CTL_BE_BLOCK_DEV:
2079 break;
2080 case CTL_BE_BLOCK_FILE:
2081 if (be_lun->backend.file.cred != NULL) {
2082 crfree(be_lun->backend.file.cred);
2083 be_lun->backend.file.cred = NULL;
2084 }
2085 break;
2086 case CTL_BE_BLOCK_NONE:
2087 break;
2088 default:
2089 panic("Unexpected backend type %d", be_lun->dev_type);
2090 break;
2091 }
2092 be_lun->dev_type = CTL_BE_BLOCK_NONE;
2093 }
2094 return (0);
2095 }
2096
2097 static int
ctl_be_block_open(struct ctl_be_block_lun * be_lun,struct ctl_lun_req * req)2098 ctl_be_block_open(struct ctl_be_block_lun *be_lun, struct ctl_lun_req *req)
2099 {
2100 struct ctl_be_lun *cbe_lun = &be_lun->cbe_lun;
2101 struct nameidata nd;
2102 const char *value;
2103 int error, flags;
2104
2105 error = 0;
2106 if (rootvnode == NULL) {
2107 snprintf(req->error_str, sizeof(req->error_str),
2108 "Root filesystem is not mounted");
2109 return (1);
2110 }
2111 pwd_ensure_dirs();
2112
2113 value = dnvlist_get_string(cbe_lun->options, "file", NULL);
2114 if (value == NULL) {
2115 snprintf(req->error_str, sizeof(req->error_str),
2116 "no file argument specified");
2117 return (1);
2118 }
2119 free(be_lun->dev_path, M_CTLBLK);
2120 be_lun->dev_path = strdup(value, M_CTLBLK);
2121
2122 flags = FREAD;
2123 value = dnvlist_get_string(cbe_lun->options, "readonly", NULL);
2124 if (value != NULL) {
2125 if (strcmp(value, "on") != 0)
2126 flags |= FWRITE;
2127 } else if (cbe_lun->lun_type == T_DIRECT)
2128 flags |= FWRITE;
2129
2130 again:
2131 NDINIT(&nd, LOOKUP, FOLLOW, UIO_SYSSPACE, be_lun->dev_path, curthread);
2132 error = vn_open(&nd, &flags, 0, NULL);
2133 if ((error == EROFS || error == EACCES) && (flags & FWRITE)) {
2134 flags &= ~FWRITE;
2135 goto again;
2136 }
2137 if (error) {
2138 /*
2139 * This is the only reasonable guess we can make as far as
2140 * path if the user doesn't give us a fully qualified path.
2141 * If they want to specify a file, they need to specify the
2142 * full path.
2143 */
2144 if (be_lun->dev_path[0] != '/') {
2145 char *dev_name;
2146
2147 asprintf(&dev_name, M_CTLBLK, "/dev/%s",
2148 be_lun->dev_path);
2149 free(be_lun->dev_path, M_CTLBLK);
2150 be_lun->dev_path = dev_name;
2151 goto again;
2152 }
2153 snprintf(req->error_str, sizeof(req->error_str),
2154 "error opening %s: %d", be_lun->dev_path, error);
2155 return (error);
2156 }
2157 if (flags & FWRITE)
2158 cbe_lun->flags &= ~CTL_LUN_FLAG_READONLY;
2159 else
2160 cbe_lun->flags |= CTL_LUN_FLAG_READONLY;
2161
2162 NDFREE(&nd, NDF_ONLY_PNBUF);
2163 be_lun->vn = nd.ni_vp;
2164
2165 /* We only support disks and files. */
2166 if (vn_isdisk(be_lun->vn, &error)) {
2167 error = ctl_be_block_open_dev(be_lun, req);
2168 } else if (be_lun->vn->v_type == VREG) {
2169 error = ctl_be_block_open_file(be_lun, req);
2170 } else {
2171 error = EINVAL;
2172 snprintf(req->error_str, sizeof(req->error_str),
2173 "%s is not a disk or plain file", be_lun->dev_path);
2174 }
2175 VOP_UNLOCK(be_lun->vn, 0);
2176
2177 if (error != 0)
2178 ctl_be_block_close(be_lun);
2179 cbe_lun->serseq = CTL_LUN_SERSEQ_OFF;
2180 if (be_lun->dispatch != ctl_be_block_dispatch_dev)
2181 cbe_lun->serseq = CTL_LUN_SERSEQ_READ;
2182 value = dnvlist_get_string(cbe_lun->options, "serseq", NULL);
2183 if (value != NULL && strcmp(value, "on") == 0)
2184 cbe_lun->serseq = CTL_LUN_SERSEQ_ON;
2185 else if (value != NULL && strcmp(value, "read") == 0)
2186 cbe_lun->serseq = CTL_LUN_SERSEQ_READ;
2187 else if (value != NULL && strcmp(value, "off") == 0)
2188 cbe_lun->serseq = CTL_LUN_SERSEQ_OFF;
2189 return (0);
2190 }
2191
2192 static int
ctl_be_block_create(struct ctl_be_block_softc * softc,struct ctl_lun_req * req)2193 ctl_be_block_create(struct ctl_be_block_softc *softc, struct ctl_lun_req *req)
2194 {
2195 struct ctl_be_lun *cbe_lun;
2196 struct ctl_be_block_lun *be_lun;
2197 struct ctl_lun_create_params *params;
2198 char num_thread_str[16];
2199 char tmpstr[32];
2200 const char *value;
2201 int retval, num_threads;
2202 int tmp_num_threads;
2203
2204 params = &req->reqdata.create;
2205 retval = 0;
2206 req->status = CTL_LUN_OK;
2207
2208 be_lun = malloc(sizeof(*be_lun), M_CTLBLK, M_ZERO | M_WAITOK);
2209 cbe_lun = &be_lun->cbe_lun;
2210 be_lun->params = req->reqdata.create;
2211 be_lun->softc = softc;
2212 STAILQ_INIT(&be_lun->input_queue);
2213 STAILQ_INIT(&be_lun->config_read_queue);
2214 STAILQ_INIT(&be_lun->config_write_queue);
2215 STAILQ_INIT(&be_lun->datamove_queue);
2216 mtx_init(&be_lun->io_lock, "ctlblock io", NULL, MTX_DEF);
2217 mtx_init(&be_lun->queue_lock, "ctlblock queue", NULL, MTX_DEF);
2218 cbe_lun->options = nvlist_clone(req->args_nvl);
2219
2220 if (params->flags & CTL_LUN_FLAG_DEV_TYPE)
2221 cbe_lun->lun_type = params->device_type;
2222 else
2223 cbe_lun->lun_type = T_DIRECT;
2224 be_lun->flags = 0;
2225 cbe_lun->flags = 0;
2226 value = dnvlist_get_string(cbe_lun->options, "ha_role", NULL);
2227 if (value != NULL) {
2228 if (strcmp(value, "primary") == 0)
2229 cbe_lun->flags |= CTL_LUN_FLAG_PRIMARY;
2230 } else if (control_softc->flags & CTL_FLAG_ACTIVE_SHELF)
2231 cbe_lun->flags |= CTL_LUN_FLAG_PRIMARY;
2232
2233 if (cbe_lun->lun_type == T_DIRECT ||
2234 cbe_lun->lun_type == T_CDROM) {
2235 be_lun->size_bytes = params->lun_size_bytes;
2236 if (params->blocksize_bytes != 0)
2237 cbe_lun->blocksize = params->blocksize_bytes;
2238 else if (cbe_lun->lun_type == T_CDROM)
2239 cbe_lun->blocksize = 2048;
2240 else
2241 cbe_lun->blocksize = 512;
2242 be_lun->size_blocks = be_lun->size_bytes / cbe_lun->blocksize;
2243 cbe_lun->maxlba = (be_lun->size_blocks == 0) ?
2244 0 : (be_lun->size_blocks - 1);
2245
2246 if ((cbe_lun->flags & CTL_LUN_FLAG_PRIMARY) ||
2247 control_softc->ha_mode == CTL_HA_MODE_SER_ONLY) {
2248 retval = ctl_be_block_open(be_lun, req);
2249 if (retval != 0) {
2250 retval = 0;
2251 req->status = CTL_LUN_WARNING;
2252 }
2253 }
2254 num_threads = cbb_num_threads;
2255 } else {
2256 num_threads = 1;
2257 }
2258
2259 value = dnvlist_get_string(cbe_lun->options, "num_threads", NULL);
2260 if (value != NULL) {
2261 tmp_num_threads = strtol(value, NULL, 0);
2262
2263 /*
2264 * We don't let the user specify less than one
2265 * thread, but hope he's clueful enough not to
2266 * specify 1000 threads.
2267 */
2268 if (tmp_num_threads < 1) {
2269 snprintf(req->error_str, sizeof(req->error_str),
2270 "invalid number of threads %s",
2271 num_thread_str);
2272 goto bailout_error;
2273 }
2274 num_threads = tmp_num_threads;
2275 }
2276
2277 if (be_lun->vn == NULL)
2278 cbe_lun->flags |= CTL_LUN_FLAG_NO_MEDIA;
2279 /* Tell the user the blocksize we ended up using */
2280 params->lun_size_bytes = be_lun->size_bytes;
2281 params->blocksize_bytes = cbe_lun->blocksize;
2282 if (params->flags & CTL_LUN_FLAG_ID_REQ) {
2283 cbe_lun->req_lun_id = params->req_lun_id;
2284 cbe_lun->flags |= CTL_LUN_FLAG_ID_REQ;
2285 } else
2286 cbe_lun->req_lun_id = 0;
2287
2288 cbe_lun->lun_shutdown = ctl_be_block_lun_shutdown;
2289 cbe_lun->be = &ctl_be_block_driver;
2290
2291 if ((params->flags & CTL_LUN_FLAG_SERIAL_NUM) == 0) {
2292 snprintf(tmpstr, sizeof(tmpstr), "MYSERIAL%04d",
2293 softc->num_luns);
2294 strncpy((char *)cbe_lun->serial_num, tmpstr,
2295 MIN(sizeof(cbe_lun->serial_num), sizeof(tmpstr)));
2296
2297 /* Tell the user what we used for a serial number */
2298 strncpy((char *)params->serial_num, tmpstr,
2299 MIN(sizeof(params->serial_num), sizeof(tmpstr)));
2300 } else {
2301 strncpy((char *)cbe_lun->serial_num, params->serial_num,
2302 MIN(sizeof(cbe_lun->serial_num),
2303 sizeof(params->serial_num)));
2304 }
2305 if ((params->flags & CTL_LUN_FLAG_DEVID) == 0) {
2306 snprintf(tmpstr, sizeof(tmpstr), "MYDEVID%04d", softc->num_luns);
2307 strncpy((char *)cbe_lun->device_id, tmpstr,
2308 MIN(sizeof(cbe_lun->device_id), sizeof(tmpstr)));
2309
2310 /* Tell the user what we used for a device ID */
2311 strncpy((char *)params->device_id, tmpstr,
2312 MIN(sizeof(params->device_id), sizeof(tmpstr)));
2313 } else {
2314 strncpy((char *)cbe_lun->device_id, params->device_id,
2315 MIN(sizeof(cbe_lun->device_id),
2316 sizeof(params->device_id)));
2317 }
2318
2319 TASK_INIT(&be_lun->io_task, /*priority*/0, ctl_be_block_worker, be_lun);
2320
2321 be_lun->io_taskqueue = taskqueue_create("ctlblocktq", M_WAITOK,
2322 taskqueue_thread_enqueue, /*context*/&be_lun->io_taskqueue);
2323
2324 if (be_lun->io_taskqueue == NULL) {
2325 snprintf(req->error_str, sizeof(req->error_str),
2326 "unable to create taskqueue");
2327 goto bailout_error;
2328 }
2329
2330 /*
2331 * Note that we start the same number of threads by default for
2332 * both the file case and the block device case. For the file
2333 * case, we need multiple threads to allow concurrency, because the
2334 * vnode interface is designed to be a blocking interface. For the
2335 * block device case, ZFS zvols at least will block the caller's
2336 * context in many instances, and so we need multiple threads to
2337 * overcome that problem. Other block devices don't need as many
2338 * threads, but they shouldn't cause too many problems.
2339 *
2340 * If the user wants to just have a single thread for a block
2341 * device, he can specify that when the LUN is created, or change
2342 * the tunable/sysctl to alter the default number of threads.
2343 */
2344 retval = taskqueue_start_threads_in_proc(&be_lun->io_taskqueue,
2345 /*num threads*/num_threads,
2346 /*priority*/PUSER,
2347 /*proc*/control_softc->ctl_proc,
2348 /*thread name*/"block");
2349
2350 if (retval != 0)
2351 goto bailout_error;
2352
2353 be_lun->num_threads = num_threads;
2354
2355 retval = ctl_add_lun(&be_lun->cbe_lun);
2356 if (retval != 0) {
2357 snprintf(req->error_str, sizeof(req->error_str),
2358 "ctl_add_lun() returned error %d, see dmesg for "
2359 "details", retval);
2360 retval = 0;
2361 goto bailout_error;
2362 }
2363
2364 be_lun->disk_stats = devstat_new_entry("cbb", cbe_lun->lun_id,
2365 cbe_lun->blocksize,
2366 DEVSTAT_ALL_SUPPORTED,
2367 cbe_lun->lun_type
2368 | DEVSTAT_TYPE_IF_OTHER,
2369 DEVSTAT_PRIORITY_OTHER);
2370
2371 mtx_lock(&softc->lock);
2372 softc->num_luns++;
2373 SLIST_INSERT_HEAD(&softc->lun_list, be_lun, links);
2374 mtx_unlock(&softc->lock);
2375
2376 params->req_lun_id = cbe_lun->lun_id;
2377
2378 return (retval);
2379
2380 bailout_error:
2381 req->status = CTL_LUN_ERROR;
2382
2383 if (be_lun->io_taskqueue != NULL)
2384 taskqueue_free(be_lun->io_taskqueue);
2385 ctl_be_block_close(be_lun);
2386 if (be_lun->dev_path != NULL)
2387 free(be_lun->dev_path, M_CTLBLK);
2388 nvlist_destroy(cbe_lun->options);
2389 mtx_destroy(&be_lun->queue_lock);
2390 mtx_destroy(&be_lun->io_lock);
2391 free(be_lun, M_CTLBLK);
2392
2393 return (retval);
2394 }
2395
2396 static int
ctl_be_block_rm(struct ctl_be_block_softc * softc,struct ctl_lun_req * req)2397 ctl_be_block_rm(struct ctl_be_block_softc *softc, struct ctl_lun_req *req)
2398 {
2399 struct ctl_lun_rm_params *params;
2400 struct ctl_be_block_lun *be_lun;
2401 struct ctl_be_lun *cbe_lun;
2402 int retval;
2403
2404 params = &req->reqdata.rm;
2405
2406 sx_xlock(&softc->modify_lock);
2407 mtx_lock(&softc->lock);
2408 SLIST_FOREACH(be_lun, &softc->lun_list, links) {
2409 if (be_lun->cbe_lun.lun_id == params->lun_id) {
2410 SLIST_REMOVE(&softc->lun_list, be_lun,
2411 ctl_be_block_lun, links);
2412 softc->num_luns--;
2413 break;
2414 }
2415 }
2416 mtx_unlock(&softc->lock);
2417 sx_xunlock(&softc->modify_lock);
2418 if (be_lun == NULL) {
2419 snprintf(req->error_str, sizeof(req->error_str),
2420 "LUN %u is not managed by the block backend",
2421 params->lun_id);
2422 goto bailout_error;
2423 }
2424 cbe_lun = &be_lun->cbe_lun;
2425
2426 if (be_lun->vn != NULL) {
2427 cbe_lun->flags |= CTL_LUN_FLAG_NO_MEDIA;
2428 ctl_lun_no_media(cbe_lun);
2429 taskqueue_drain_all(be_lun->io_taskqueue);
2430 ctl_be_block_close(be_lun);
2431 }
2432
2433 mtx_lock(&softc->lock);
2434 be_lun->flags |= CTL_BE_BLOCK_LUN_WAITING;
2435 mtx_unlock(&softc->lock);
2436
2437 retval = ctl_remove_lun(cbe_lun);
2438 if (retval != 0) {
2439 snprintf(req->error_str, sizeof(req->error_str),
2440 "error %d returned from ctl_remove_lun() for "
2441 "LUN %d", retval, params->lun_id);
2442 mtx_lock(&softc->lock);
2443 be_lun->flags &= ~CTL_BE_BLOCK_LUN_WAITING;
2444 mtx_unlock(&softc->lock);
2445 goto bailout_error;
2446 }
2447
2448 mtx_lock(&softc->lock);
2449 while ((be_lun->flags & CTL_BE_BLOCK_LUN_UNCONFIGURED) == 0) {
2450 retval = msleep(be_lun, &softc->lock, PCATCH, "ctlblockrm", 0);
2451 if (retval == EINTR)
2452 break;
2453 }
2454 be_lun->flags &= ~CTL_BE_BLOCK_LUN_WAITING;
2455 if (be_lun->flags & CTL_BE_BLOCK_LUN_UNCONFIGURED) {
2456 mtx_unlock(&softc->lock);
2457 free(be_lun, M_CTLBLK);
2458 } else {
2459 mtx_unlock(&softc->lock);
2460 return (EINTR);
2461 }
2462
2463 req->status = CTL_LUN_OK;
2464 return (0);
2465
2466 bailout_error:
2467 req->status = CTL_LUN_ERROR;
2468 return (0);
2469 }
2470
2471 static int
ctl_be_block_modify(struct ctl_be_block_softc * softc,struct ctl_lun_req * req)2472 ctl_be_block_modify(struct ctl_be_block_softc *softc, struct ctl_lun_req *req)
2473 {
2474 struct ctl_lun_modify_params *params;
2475 struct ctl_be_block_lun *be_lun;
2476 struct ctl_be_lun *cbe_lun;
2477 const char *value;
2478 uint64_t oldsize;
2479 int error, wasprim;
2480
2481 params = &req->reqdata.modify;
2482
2483 sx_xlock(&softc->modify_lock);
2484 mtx_lock(&softc->lock);
2485 SLIST_FOREACH(be_lun, &softc->lun_list, links) {
2486 if (be_lun->cbe_lun.lun_id == params->lun_id)
2487 break;
2488 }
2489 mtx_unlock(&softc->lock);
2490 if (be_lun == NULL) {
2491 snprintf(req->error_str, sizeof(req->error_str),
2492 "LUN %u is not managed by the block backend",
2493 params->lun_id);
2494 goto bailout_error;
2495 }
2496 cbe_lun = &be_lun->cbe_lun;
2497
2498 if (params->lun_size_bytes != 0)
2499 be_lun->params.lun_size_bytes = params->lun_size_bytes;
2500
2501 if (req->args_nvl != NULL) {
2502 nvlist_destroy(cbe_lun->options);
2503 cbe_lun->options = nvlist_clone(req->args_nvl);
2504 }
2505
2506 wasprim = (cbe_lun->flags & CTL_LUN_FLAG_PRIMARY);
2507 value = dnvlist_get_string(cbe_lun->options, "ha_role", NULL);
2508 if (value != NULL) {
2509 if (strcmp(value, "primary") == 0)
2510 cbe_lun->flags |= CTL_LUN_FLAG_PRIMARY;
2511 else
2512 cbe_lun->flags &= ~CTL_LUN_FLAG_PRIMARY;
2513 } else if (control_softc->flags & CTL_FLAG_ACTIVE_SHELF)
2514 cbe_lun->flags |= CTL_LUN_FLAG_PRIMARY;
2515 else
2516 cbe_lun->flags &= ~CTL_LUN_FLAG_PRIMARY;
2517 if (wasprim != (cbe_lun->flags & CTL_LUN_FLAG_PRIMARY)) {
2518 if (cbe_lun->flags & CTL_LUN_FLAG_PRIMARY)
2519 ctl_lun_primary(cbe_lun);
2520 else
2521 ctl_lun_secondary(cbe_lun);
2522 }
2523
2524 oldsize = be_lun->size_blocks;
2525 if ((cbe_lun->flags & CTL_LUN_FLAG_PRIMARY) ||
2526 control_softc->ha_mode == CTL_HA_MODE_SER_ONLY) {
2527 if (be_lun->vn == NULL)
2528 error = ctl_be_block_open(be_lun, req);
2529 else if (vn_isdisk(be_lun->vn, &error))
2530 error = ctl_be_block_open_dev(be_lun, req);
2531 else if (be_lun->vn->v_type == VREG) {
2532 vn_lock(be_lun->vn, LK_SHARED | LK_RETRY);
2533 error = ctl_be_block_open_file(be_lun, req);
2534 VOP_UNLOCK(be_lun->vn, 0);
2535 } else
2536 error = EINVAL;
2537 if ((cbe_lun->flags & CTL_LUN_FLAG_NO_MEDIA) &&
2538 be_lun->vn != NULL) {
2539 cbe_lun->flags &= ~CTL_LUN_FLAG_NO_MEDIA;
2540 ctl_lun_has_media(cbe_lun);
2541 } else if ((cbe_lun->flags & CTL_LUN_FLAG_NO_MEDIA) == 0 &&
2542 be_lun->vn == NULL) {
2543 cbe_lun->flags |= CTL_LUN_FLAG_NO_MEDIA;
2544 ctl_lun_no_media(cbe_lun);
2545 }
2546 cbe_lun->flags &= ~CTL_LUN_FLAG_EJECTED;
2547 } else {
2548 if (be_lun->vn != NULL) {
2549 cbe_lun->flags |= CTL_LUN_FLAG_NO_MEDIA;
2550 ctl_lun_no_media(cbe_lun);
2551 taskqueue_drain_all(be_lun->io_taskqueue);
2552 error = ctl_be_block_close(be_lun);
2553 } else
2554 error = 0;
2555 }
2556 if (be_lun->size_blocks != oldsize)
2557 ctl_lun_capacity_changed(cbe_lun);
2558
2559 /* Tell the user the exact size we ended up using */
2560 params->lun_size_bytes = be_lun->size_bytes;
2561
2562 sx_xunlock(&softc->modify_lock);
2563 req->status = error ? CTL_LUN_WARNING : CTL_LUN_OK;
2564 return (0);
2565
2566 bailout_error:
2567 sx_xunlock(&softc->modify_lock);
2568 req->status = CTL_LUN_ERROR;
2569 return (0);
2570 }
2571
2572 static void
ctl_be_block_lun_shutdown(struct ctl_be_lun * cbe_lun)2573 ctl_be_block_lun_shutdown(struct ctl_be_lun *cbe_lun)
2574 {
2575 struct ctl_be_block_lun *be_lun = (struct ctl_be_block_lun *)cbe_lun;
2576 struct ctl_be_block_softc *softc = be_lun->softc;
2577
2578 taskqueue_drain_all(be_lun->io_taskqueue);
2579 taskqueue_free(be_lun->io_taskqueue);
2580 if (be_lun->disk_stats != NULL)
2581 devstat_remove_entry(be_lun->disk_stats);
2582 nvlist_destroy(be_lun->cbe_lun.options);
2583 free(be_lun->dev_path, M_CTLBLK);
2584 mtx_destroy(&be_lun->queue_lock);
2585 mtx_destroy(&be_lun->io_lock);
2586
2587 mtx_lock(&softc->lock);
2588 be_lun->flags |= CTL_BE_BLOCK_LUN_UNCONFIGURED;
2589 if (be_lun->flags & CTL_BE_BLOCK_LUN_WAITING)
2590 wakeup(be_lun);
2591 else
2592 free(be_lun, M_CTLBLK);
2593 mtx_unlock(&softc->lock);
2594 }
2595
2596 static int
ctl_be_block_config_write(union ctl_io * io)2597 ctl_be_block_config_write(union ctl_io *io)
2598 {
2599 struct ctl_be_block_lun *be_lun;
2600 struct ctl_be_lun *cbe_lun;
2601 int retval;
2602
2603 DPRINTF("entered\n");
2604
2605 cbe_lun = CTL_BACKEND_LUN(io);
2606 be_lun = (struct ctl_be_block_lun *)cbe_lun;
2607
2608 retval = 0;
2609 switch (io->scsiio.cdb[0]) {
2610 case SYNCHRONIZE_CACHE:
2611 case SYNCHRONIZE_CACHE_16:
2612 case WRITE_SAME_10:
2613 case WRITE_SAME_16:
2614 case UNMAP:
2615 /*
2616 * The upper level CTL code will filter out any CDBs with
2617 * the immediate bit set and return the proper error.
2618 *
2619 * We don't really need to worry about what LBA range the
2620 * user asked to be synced out. When they issue a sync
2621 * cache command, we'll sync out the whole thing.
2622 */
2623 mtx_lock(&be_lun->queue_lock);
2624 STAILQ_INSERT_TAIL(&be_lun->config_write_queue, &io->io_hdr,
2625 links);
2626 mtx_unlock(&be_lun->queue_lock);
2627 taskqueue_enqueue(be_lun->io_taskqueue, &be_lun->io_task);
2628 break;
2629 case START_STOP_UNIT: {
2630 struct scsi_start_stop_unit *cdb;
2631 struct ctl_lun_req req;
2632
2633 cdb = (struct scsi_start_stop_unit *)io->scsiio.cdb;
2634 if ((cdb->how & SSS_PC_MASK) != 0) {
2635 ctl_set_success(&io->scsiio);
2636 ctl_config_write_done(io);
2637 break;
2638 }
2639 if (cdb->how & SSS_START) {
2640 if ((cdb->how & SSS_LOEJ) && be_lun->vn == NULL) {
2641 retval = ctl_be_block_open(be_lun, &req);
2642 cbe_lun->flags &= ~CTL_LUN_FLAG_EJECTED;
2643 if (retval == 0) {
2644 cbe_lun->flags &= ~CTL_LUN_FLAG_NO_MEDIA;
2645 ctl_lun_has_media(cbe_lun);
2646 } else {
2647 cbe_lun->flags |= CTL_LUN_FLAG_NO_MEDIA;
2648 ctl_lun_no_media(cbe_lun);
2649 }
2650 }
2651 ctl_start_lun(cbe_lun);
2652 } else {
2653 ctl_stop_lun(cbe_lun);
2654 if (cdb->how & SSS_LOEJ) {
2655 cbe_lun->flags |= CTL_LUN_FLAG_NO_MEDIA;
2656 cbe_lun->flags |= CTL_LUN_FLAG_EJECTED;
2657 ctl_lun_ejected(cbe_lun);
2658 if (be_lun->vn != NULL)
2659 ctl_be_block_close(be_lun);
2660 }
2661 }
2662
2663 ctl_set_success(&io->scsiio);
2664 ctl_config_write_done(io);
2665 break;
2666 }
2667 case PREVENT_ALLOW:
2668 ctl_set_success(&io->scsiio);
2669 ctl_config_write_done(io);
2670 break;
2671 default:
2672 ctl_set_invalid_opcode(&io->scsiio);
2673 ctl_config_write_done(io);
2674 retval = CTL_RETVAL_COMPLETE;
2675 break;
2676 }
2677
2678 return (retval);
2679 }
2680
2681 static int
ctl_be_block_config_read(union ctl_io * io)2682 ctl_be_block_config_read(union ctl_io *io)
2683 {
2684 struct ctl_be_block_lun *be_lun;
2685 int retval = 0;
2686
2687 DPRINTF("entered\n");
2688
2689 be_lun = (struct ctl_be_block_lun *)CTL_BACKEND_LUN(io);
2690
2691 switch (io->scsiio.cdb[0]) {
2692 case SERVICE_ACTION_IN:
2693 if (io->scsiio.cdb[1] == SGLS_SERVICE_ACTION) {
2694 mtx_lock(&be_lun->queue_lock);
2695 STAILQ_INSERT_TAIL(&be_lun->config_read_queue,
2696 &io->io_hdr, links);
2697 mtx_unlock(&be_lun->queue_lock);
2698 taskqueue_enqueue(be_lun->io_taskqueue,
2699 &be_lun->io_task);
2700 retval = CTL_RETVAL_QUEUED;
2701 break;
2702 }
2703 ctl_set_invalid_field(&io->scsiio,
2704 /*sks_valid*/ 1,
2705 /*command*/ 1,
2706 /*field*/ 1,
2707 /*bit_valid*/ 1,
2708 /*bit*/ 4);
2709 ctl_config_read_done(io);
2710 retval = CTL_RETVAL_COMPLETE;
2711 break;
2712 default:
2713 ctl_set_invalid_opcode(&io->scsiio);
2714 ctl_config_read_done(io);
2715 retval = CTL_RETVAL_COMPLETE;
2716 break;
2717 }
2718
2719 return (retval);
2720 }
2721
2722 static int
ctl_be_block_lun_info(struct ctl_be_lun * cbe_lun,struct sbuf * sb)2723 ctl_be_block_lun_info(struct ctl_be_lun *cbe_lun, struct sbuf *sb)
2724 {
2725 struct ctl_be_block_lun *lun = (struct ctl_be_block_lun *)cbe_lun;
2726 int retval;
2727
2728 retval = sbuf_printf(sb, "\t<num_threads>");
2729 if (retval != 0)
2730 goto bailout;
2731 retval = sbuf_printf(sb, "%d", lun->num_threads);
2732 if (retval != 0)
2733 goto bailout;
2734 retval = sbuf_printf(sb, "</num_threads>\n");
2735
2736 bailout:
2737 return (retval);
2738 }
2739
2740 static uint64_t
ctl_be_block_lun_attr(struct ctl_be_lun * cbe_lun,const char * attrname)2741 ctl_be_block_lun_attr(struct ctl_be_lun *cbe_lun, const char *attrname)
2742 {
2743 struct ctl_be_block_lun *lun = (struct ctl_be_block_lun *)cbe_lun;
2744
2745 if (lun->getattr == NULL)
2746 return (UINT64_MAX);
2747 return (lun->getattr(lun, attrname));
2748 }
2749
2750 static int
ctl_be_block_init(void)2751 ctl_be_block_init(void)
2752 {
2753 struct ctl_be_block_softc *softc = &backend_block_softc;
2754
2755 sx_init(&softc->modify_lock, "ctlblock modify");
2756 mtx_init(&softc->lock, "ctlblock", NULL, MTX_DEF);
2757 softc->beio_zone = uma_zcreate("beio", sizeof(struct ctl_be_block_io),
2758 NULL, NULL, NULL, NULL, UMA_ALIGN_PTR, 0);
2759 softc->buf_zone = uma_zcreate("ctlblock", CTLBLK_MAX_SEG,
2760 NULL, NULL, NULL, NULL, /*align*/ 0, /*flags*/0);
2761 #if (CTLBLK_MAX_SEG > 131072)
2762 softc->buf128_zone = uma_zcreate("ctlblock128", 131072,
2763 NULL, NULL, NULL, NULL, /*align*/ 0, /*flags*/0);
2764 #endif
2765 SLIST_INIT(&softc->lun_list);
2766 return (0);
2767 }
2768
2769
2770 static int
ctl_be_block_shutdown(void)2771 ctl_be_block_shutdown(void)
2772 {
2773 struct ctl_be_block_softc *softc = &backend_block_softc;
2774 struct ctl_be_block_lun *lun;
2775
2776 mtx_lock(&softc->lock);
2777 while ((lun = SLIST_FIRST(&softc->lun_list)) != NULL) {
2778 SLIST_REMOVE_HEAD(&softc->lun_list, links);
2779 softc->num_luns--;
2780 /*
2781 * Drop our lock here. Since ctl_remove_lun() can call
2782 * back into us, this could potentially lead to a recursive
2783 * lock of the same mutex, which would cause a hang.
2784 */
2785 mtx_unlock(&softc->lock);
2786 ctl_remove_lun(&lun->cbe_lun);
2787 mtx_lock(&softc->lock);
2788 }
2789 mtx_unlock(&softc->lock);
2790 uma_zdestroy(softc->buf_zone);
2791 #if (CTLBLK_MAX_SEG > 131072)
2792 uma_zdestroy(softc->buf128_zone);
2793 #endif
2794 uma_zdestroy(softc->beio_zone);
2795 mtx_destroy(&softc->lock);
2796 sx_destroy(&softc->modify_lock);
2797 return (0);
2798 }
2799