1 /*-
2 * Copyright (C) 2012-2016 Intel Corporation
3 * All rights reserved.
4 *
5 * Redistribution and use in source and binary forms, with or without
6 * modification, are permitted provided that the following conditions
7 * are met:
8 * 1. Redistributions of source code must retain the above copyright
9 * notice, this list of conditions and the following disclaimer.
10 * 2. Redistributions in binary form must reproduce the above copyright
11 * notice, this list of conditions and the following disclaimer in the
12 * documentation and/or other materials provided with the distribution.
13 *
14 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
15 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
16 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
17 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
18 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
19 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
20 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
21 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
22 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
23 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
24 * SUCH DAMAGE.
25 */
26
27 #include <sys/cdefs.h>
28 __FBSDID("$FreeBSD: stable/10/sys/dev/nvd/nvd.c 312406 2017-01-19 11:17:09Z mav $");
29
30 #include <sys/param.h>
31 #include <sys/bio.h>
32 #include <sys/kernel.h>
33 #include <sys/malloc.h>
34 #include <sys/module.h>
35 #include <sys/sysctl.h>
36 #include <sys/systm.h>
37 #include <sys/taskqueue.h>
38
39 #include <geom/geom.h>
40 #include <geom/geom_disk.h>
41
42 #include <dev/nvme/nvme.h>
43
44 #define NVD_STR "nvd"
45
46 struct nvd_disk;
47
48 static disk_ioctl_t nvd_ioctl;
49 static disk_strategy_t nvd_strategy;
50
51 static void nvd_done(void *arg, const struct nvme_completion *cpl);
52
53 static void *nvd_new_disk(struct nvme_namespace *ns, void *ctrlr);
54 static void destroy_geom_disk(struct nvd_disk *ndisk);
55
56 static void *nvd_new_controller(struct nvme_controller *ctrlr);
57 static void nvd_controller_fail(void *ctrlr);
58
59 static int nvd_load(void);
60 static void nvd_unload(void);
61
62 MALLOC_DEFINE(M_NVD, "nvd", "nvd(4) allocations");
63
64 struct nvme_consumer *consumer_handle;
65
66 struct nvd_disk {
67
68 struct bio_queue_head bioq;
69 struct task bioqtask;
70 struct mtx bioqlock;
71
72 struct disk *disk;
73 struct taskqueue *tq;
74 struct nvme_namespace *ns;
75
76 uint32_t cur_depth;
77 uint32_t ordered_in_flight;
78
79 TAILQ_ENTRY(nvd_disk) global_tailq;
80 TAILQ_ENTRY(nvd_disk) ctrlr_tailq;
81 };
82
83 struct nvd_controller {
84
85 TAILQ_ENTRY(nvd_controller) tailq;
86 TAILQ_HEAD(, nvd_disk) disk_head;
87 };
88
89 static TAILQ_HEAD(, nvd_controller) ctrlr_head;
90 static TAILQ_HEAD(disk_list, nvd_disk) disk_head;
91
92 static SYSCTL_NODE(_hw, OID_AUTO, nvd, CTLFLAG_RD, 0, "nvd driver parameters");
93 /*
94 * The NVMe specification does not define a maximum or optimal delete size, so
95 * technically max delete size is min(full size of the namespace, 2^32 - 1
96 * LBAs). A single delete for a multi-TB NVMe namespace though may take much
97 * longer to complete than the nvme(4) I/O timeout period. So choose a sensible
98 * default here that is still suitably large to minimize the number of overall
99 * delete operations.
100 */
101 static uint64_t nvd_delete_max = (1024 * 1024 * 1024); /* 1GB */
102 SYSCTL_UQUAD(_hw_nvd, OID_AUTO, delete_max, CTLFLAG_RDTUN, &nvd_delete_max, 0,
103 "nvd maximum BIO_DELETE size in bytes");
104
nvd_modevent(module_t mod,int type,void * arg)105 static int nvd_modevent(module_t mod, int type, void *arg)
106 {
107 int error = 0;
108
109 switch (type) {
110 case MOD_LOAD:
111 error = nvd_load();
112 break;
113 case MOD_UNLOAD:
114 nvd_unload();
115 break;
116 default:
117 break;
118 }
119
120 return (error);
121 }
122
123 moduledata_t nvd_mod = {
124 NVD_STR,
125 (modeventhand_t)nvd_modevent,
126 0
127 };
128
129 DECLARE_MODULE(nvd, nvd_mod, SI_SUB_DRIVERS, SI_ORDER_ANY);
130 MODULE_VERSION(nvd, 1);
131 MODULE_DEPEND(nvd, nvme, 1, 1, 1);
132
133 static int
nvd_load()134 nvd_load()
135 {
136
137 TAILQ_INIT(&ctrlr_head);
138 TAILQ_INIT(&disk_head);
139
140 consumer_handle = nvme_register_consumer(nvd_new_disk,
141 nvd_new_controller, NULL, nvd_controller_fail);
142
143 return (consumer_handle != NULL ? 0 : -1);
144 }
145
146 static void
nvd_unload()147 nvd_unload()
148 {
149 struct nvd_controller *ctrlr;
150 struct nvd_disk *disk;
151
152 while (!TAILQ_EMPTY(&ctrlr_head)) {
153 ctrlr = TAILQ_FIRST(&ctrlr_head);
154 TAILQ_REMOVE(&ctrlr_head, ctrlr, tailq);
155 free(ctrlr, M_NVD);
156 }
157
158 while (!TAILQ_EMPTY(&disk_head)) {
159 disk = TAILQ_FIRST(&disk_head);
160 TAILQ_REMOVE(&disk_head, disk, global_tailq);
161 destroy_geom_disk(disk);
162 free(disk, M_NVD);
163 }
164
165 nvme_unregister_consumer(consumer_handle);
166 }
167
168 static int
nvd_bio_submit(struct nvd_disk * ndisk,struct bio * bp)169 nvd_bio_submit(struct nvd_disk *ndisk, struct bio *bp)
170 {
171 int err;
172
173 bp->bio_driver1 = NULL;
174 atomic_add_int(&ndisk->cur_depth, 1);
175 err = nvme_ns_bio_process(ndisk->ns, bp, nvd_done);
176 if (err) {
177 atomic_add_int(&ndisk->cur_depth, -1);
178 if (__predict_false(bp->bio_flags & BIO_ORDERED))
179 atomic_add_int(&ndisk->ordered_in_flight, -1);
180 bp->bio_error = err;
181 bp->bio_flags |= BIO_ERROR;
182 bp->bio_resid = bp->bio_bcount;
183 biodone(bp);
184 return (-1);
185 }
186
187 return (0);
188 }
189
190 static void
nvd_strategy(struct bio * bp)191 nvd_strategy(struct bio *bp)
192 {
193 struct nvd_disk *ndisk;
194
195 ndisk = (struct nvd_disk *)bp->bio_disk->d_drv1;
196
197 if (__predict_false(bp->bio_flags & BIO_ORDERED))
198 atomic_add_int(&ndisk->ordered_in_flight, 1);
199
200 if (__predict_true(ndisk->ordered_in_flight == 0)) {
201 nvd_bio_submit(ndisk, bp);
202 return;
203 }
204
205 /*
206 * There are ordered bios in flight, so we need to submit
207 * bios through the task queue to enforce ordering.
208 */
209 mtx_lock(&ndisk->bioqlock);
210 bioq_insert_tail(&ndisk->bioq, bp);
211 mtx_unlock(&ndisk->bioqlock);
212 taskqueue_enqueue(ndisk->tq, &ndisk->bioqtask);
213 }
214
215 static int
nvd_ioctl(struct disk * ndisk,u_long cmd,void * data,int fflag,struct thread * td)216 nvd_ioctl(struct disk *ndisk, u_long cmd, void *data, int fflag,
217 struct thread *td)
218 {
219 int ret = 0;
220
221 switch (cmd) {
222 default:
223 ret = EIO;
224 }
225
226 return (ret);
227 }
228
229 static void
nvd_done(void * arg,const struct nvme_completion * cpl)230 nvd_done(void *arg, const struct nvme_completion *cpl)
231 {
232 struct bio *bp;
233 struct nvd_disk *ndisk;
234
235 bp = (struct bio *)arg;
236
237 ndisk = bp->bio_disk->d_drv1;
238
239 atomic_add_int(&ndisk->cur_depth, -1);
240 if (__predict_false(bp->bio_flags & BIO_ORDERED))
241 atomic_add_int(&ndisk->ordered_in_flight, -1);
242
243 biodone(bp);
244 }
245
246 static void
nvd_bioq_process(void * arg,int pending)247 nvd_bioq_process(void *arg, int pending)
248 {
249 struct nvd_disk *ndisk = arg;
250 struct bio *bp;
251
252 for (;;) {
253 mtx_lock(&ndisk->bioqlock);
254 bp = bioq_takefirst(&ndisk->bioq);
255 mtx_unlock(&ndisk->bioqlock);
256 if (bp == NULL)
257 break;
258
259 if (nvd_bio_submit(ndisk, bp) != 0) {
260 continue;
261 }
262
263 #ifdef BIO_ORDERED
264 /*
265 * BIO_ORDERED flag dictates that the bio with BIO_ORDERED
266 * flag set must be completed before proceeding with
267 * additional bios.
268 */
269 if (bp->bio_flags & BIO_ORDERED) {
270 while (ndisk->cur_depth > 0) {
271 pause("nvd flush", 1);
272 }
273 }
274 #endif
275 }
276 }
277
278 static void *
nvd_new_controller(struct nvme_controller * ctrlr)279 nvd_new_controller(struct nvme_controller *ctrlr)
280 {
281 struct nvd_controller *nvd_ctrlr;
282
283 nvd_ctrlr = malloc(sizeof(struct nvd_controller), M_NVD,
284 M_ZERO | M_WAITOK);
285
286 TAILQ_INIT(&nvd_ctrlr->disk_head);
287 TAILQ_INSERT_TAIL(&ctrlr_head, nvd_ctrlr, tailq);
288
289 return (nvd_ctrlr);
290 }
291
292 static void *
nvd_new_disk(struct nvme_namespace * ns,void * ctrlr_arg)293 nvd_new_disk(struct nvme_namespace *ns, void *ctrlr_arg)
294 {
295 uint8_t descr[NVME_MODEL_NUMBER_LENGTH+1];
296 struct nvd_disk *ndisk;
297 struct disk *disk;
298 struct nvd_controller *ctrlr = ctrlr_arg;
299
300 ndisk = malloc(sizeof(struct nvd_disk), M_NVD, M_ZERO | M_WAITOK);
301
302 disk = disk_alloc();
303 disk->d_strategy = nvd_strategy;
304 disk->d_ioctl = nvd_ioctl;
305 disk->d_name = NVD_STR;
306 disk->d_drv1 = ndisk;
307
308 disk->d_maxsize = nvme_ns_get_max_io_xfer_size(ns);
309 disk->d_sectorsize = nvme_ns_get_sector_size(ns);
310 disk->d_mediasize = (off_t)nvme_ns_get_size(ns);
311 disk->d_delmaxsize = (off_t)nvme_ns_get_size(ns);
312 if (disk->d_delmaxsize > nvd_delete_max)
313 disk->d_delmaxsize = nvd_delete_max;
314 disk->d_stripesize = nvme_ns_get_stripesize(ns);
315
316 if (TAILQ_EMPTY(&disk_head))
317 disk->d_unit = 0;
318 else
319 disk->d_unit =
320 TAILQ_LAST(&disk_head, disk_list)->disk->d_unit + 1;
321
322 disk->d_flags = DISKFLAG_DIRECT_COMPLETION;
323
324 if (nvme_ns_get_flags(ns) & NVME_NS_DEALLOCATE_SUPPORTED)
325 disk->d_flags |= DISKFLAG_CANDELETE;
326
327 if (nvme_ns_get_flags(ns) & NVME_NS_FLUSH_SUPPORTED)
328 disk->d_flags |= DISKFLAG_CANFLUSHCACHE;
329
330 /* ifdef used here to ease porting to stable branches at a later point. */
331 #ifdef DISKFLAG_UNMAPPED_BIO
332 disk->d_flags |= DISKFLAG_UNMAPPED_BIO;
333 #endif
334
335 /*
336 * d_ident and d_descr are both far bigger than the length of either
337 * the serial or model number strings.
338 */
339 nvme_strvis(disk->d_ident, nvme_ns_get_serial_number(ns),
340 sizeof(disk->d_ident), NVME_SERIAL_NUMBER_LENGTH);
341 nvme_strvis(descr, nvme_ns_get_model_number(ns), sizeof(descr),
342 NVME_MODEL_NUMBER_LENGTH);
343 strlcpy(disk->d_descr, descr, sizeof(descr));
344
345 disk->d_rotation_rate = DISK_RR_NON_ROTATING;
346
347 ndisk->ns = ns;
348 ndisk->disk = disk;
349 ndisk->cur_depth = 0;
350 ndisk->ordered_in_flight = 0;
351
352 mtx_init(&ndisk->bioqlock, "NVD bioq lock", NULL, MTX_DEF);
353 bioq_init(&ndisk->bioq);
354
355 TASK_INIT(&ndisk->bioqtask, 0, nvd_bioq_process, ndisk);
356 ndisk->tq = taskqueue_create("nvd_taskq", M_WAITOK,
357 taskqueue_thread_enqueue, &ndisk->tq);
358 taskqueue_start_threads(&ndisk->tq, 1, PI_DISK, "nvd taskq");
359
360 TAILQ_INSERT_TAIL(&disk_head, ndisk, global_tailq);
361 TAILQ_INSERT_TAIL(&ctrlr->disk_head, ndisk, ctrlr_tailq);
362
363 disk_create(disk, DISK_VERSION);
364
365 printf(NVD_STR"%u: <%s> NVMe namespace\n", disk->d_unit, descr);
366 printf(NVD_STR"%u: %juMB (%ju %u byte sectors)\n", disk->d_unit,
367 (uintmax_t)disk->d_mediasize / (1024*1024),
368 (uintmax_t)disk->d_mediasize / disk->d_sectorsize,
369 disk->d_sectorsize);
370
371 return (NULL);
372 }
373
374 static void
destroy_geom_disk(struct nvd_disk * ndisk)375 destroy_geom_disk(struct nvd_disk *ndisk)
376 {
377 struct bio *bp;
378 struct disk *disk;
379 uint32_t unit;
380 int cnt = 0;
381
382 disk = ndisk->disk;
383 unit = disk->d_unit;
384 taskqueue_free(ndisk->tq);
385
386 disk_destroy(ndisk->disk);
387
388 mtx_lock(&ndisk->bioqlock);
389 for (;;) {
390 bp = bioq_takefirst(&ndisk->bioq);
391 if (bp == NULL)
392 break;
393 bp->bio_error = EIO;
394 bp->bio_flags |= BIO_ERROR;
395 bp->bio_resid = bp->bio_bcount;
396 cnt++;
397 biodone(bp);
398 }
399
400 printf(NVD_STR"%u: lost device - %d outstanding\n", unit, cnt);
401 printf(NVD_STR"%u: removing device entry\n", unit);
402
403 mtx_unlock(&ndisk->bioqlock);
404
405 mtx_destroy(&ndisk->bioqlock);
406 }
407
408 static void
nvd_controller_fail(void * ctrlr_arg)409 nvd_controller_fail(void *ctrlr_arg)
410 {
411 struct nvd_controller *ctrlr = ctrlr_arg;
412 struct nvd_disk *disk;
413
414 while (!TAILQ_EMPTY(&ctrlr->disk_head)) {
415 disk = TAILQ_FIRST(&ctrlr->disk_head);
416 TAILQ_REMOVE(&disk_head, disk, global_tailq);
417 TAILQ_REMOVE(&ctrlr->disk_head, disk, ctrlr_tailq);
418 destroy_geom_disk(disk);
419 free(disk, M_NVD);
420 }
421
422 TAILQ_REMOVE(&ctrlr_head, ctrlr, tailq);
423 free(ctrlr, M_NVD);
424 }
425
426