xref: /dragonfly/sys/dev/disk/dm/dmirror/dm_target_dmirror.c (revision f603807b2c8b9b8ca8a7a99e36eeb70cd39b460d)
1 /*
2  * Copyright (c) 2010 The DragonFly Project.  All rights reserved.
3  *
4  * This code is derived from software contributed to The DragonFly Project
5  * by Alex Hornung <ahornung@gmail.com>
6  *
7  * Redistribution and use in source and binary forms, with or without
8  * modification, are permitted provided that the following conditions
9  * are met:
10  *
11  * 1. Redistributions of source code must retain the above copyright
12  *    notice, this list of conditions and the following disclaimer.
13  * 2. Redistributions in binary form must reproduce the above copyright
14  *    notice, this list of conditions and the following disclaimer in
15  *    the documentation and/or other materials provided with the
16  *    distribution.
17  * 3. Neither the name of The DragonFly Project nor the names of its
18  *    contributors may be used to endorse or promote products derived
19  *    from this software without specific, prior written permission.
20  *
21  * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
22  * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
23  * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
24  * FOR A PARTICULAR PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE
25  * COPYRIGHT HOLDERS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
26  * INCIDENTAL, SPECIAL, EXEMPLARY OR CONSEQUENTIAL DAMAGES (INCLUDING,
27  * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
28  * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED
29  * AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
30  * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
31  * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
32  * SUCH DAMAGE.
33  */
34 
35 /*
36  * This file implements initial version of a mirror target
37  */
38 #include <sys/bio.h>
39 #include <sys/malloc.h>
40 #include <sys/uuid.h>
41 
42 #include <dev/disk/dm/dm.h>
43 MALLOC_DEFINE(M_DMDMIRROR, "dm_dmirror", "Device Mapper Target DMIRROR");
44 
45 /* segdesc flags */
46 #define MEDIA_UNSTABLE                  0x0001
47 #define   MEDIA_READ_DEGRADED 0x0002
48 #define MEDIA_WRITE_DEGRADED  0x0004
49 #define MEDIA_MASTER                    0x0008
50 #define UNINITIALIZED                   0x0010
51 #define OLD_UNSTABLE                    0x0020
52 #define OLD_MSATER            0x0040
53 
54 /* dmirror disk flags */
55 #define DISK_ONLINE           0x0001
56 
57 
58 #define   dmirror_set_bio_disk(bio, x)  ((bio)->bio_caller_info1.ptr = (x))
59 #define   dmirror_get_bio_disk(bio)     ((bio)?((bio)->bio_caller_info1.ptr):NULL)
60 #define   dmirror_set_bio_seg(bio, x)   ((bio)->bio_caller_info2.offset = (x))
61 #define   dmirror_get_bio_segno(bio)    ((bio)?((bio)->bio_caller_info2.offset):0)
62 
63 #define   dmirror_set_bio_retries(bio, x)         ((bio)->bio_caller_info3.value = (x))
64 #define   dmirror_get_bio_retries(bio)  ((bio)?((bio)->bio_caller_info3.value):0)
65 
66 #define dmirror_set_bio_mbuf(bio, x)    ((bio)->bio_caller_info3.ptr = (x))
67 #define dmirror_get_bio_mbuf(bio)       ((bio)?((bio)->bio_caller_info3.ptr):NULL)
68 
69 
70 
71 /* Segment descriptor for each logical segment */
72 typedef struct segdesc {
73           uint32_t  flags;              /* Flags, including state */
74           uint32_t  zf_bitmap;          /* Zero-fill bitmap */
75           uint8_t             disk_no;
76           uint8_t             spare1;
77           uint16_t  spare2;
78           uint32_t  spare3;
79           /* XXX: some timestamp/serial */
80 } segdesc_t;
81 
82 typedef struct dmirror_disk {
83           uint32_t  flags;
84           dm_pdev_t           *pdev;
85 } dmirror_disk_t;
86 
87 typedef struct target_dmirror_config {
88           size_t    params_len;
89           dmirror_disk_t      disks[4];
90           uint8_t   ndisks;
91           /* XXX: uuid stuff */
92 
93 } dm_target_dmirror_config_t;
94 
95 static
96 struct bio*
dmirror_clone_bio(struct bio * obio)97 dmirror_clone_bio(struct bio *obio)
98 {
99           struct bio *bio;
100           struct buf *mbp;
101           struct buf *bp;
102 
103           mbp = obio->bio_buf;
104           bp = getpbuf(NULL);
105 
106           BUF_KERNPROC(bp);
107           bp->b_vp = mbp->b_vp;
108           bp->b_cmd = mbp->b_cmd;
109           bp->b_data = (char *)mbp->b_data;
110           bp->b_resid = bp->b_bcount = mbp->b_bcount;
111           bp->b_bufsize = bp->b_bcount;
112 
113           bio = &bp->b_bio1;
114           bio->bio_offset = obio->bio_offset;
115 
116           return (bio);
117 }
118 
119 static void
dmirror_write_done(struct bio * bio)120 dmirror_write_done(struct bio *bio)
121 {
122           dmirror_disk_t disk;
123           off_t segno;
124           struct bio *obio, *mbio;
125           int retries;
126 
127           disk = dmirror_get_bio_disk(bio);
128           segno = dmirror_get_bio_segno(bio);
129           mbio = dmirror_get_bio_mbuf(bio);
130 
131           if (bio->bio_buf->b_flags & B_ERROR) {
132                     /* write failed */
133           }
134 
135           obio = pop_bio(bio);
136           biodone(obio);
137 }
138 
139 void
dmirror_issue_write(dmirror_disk_t disk,struct bio * bio)140 dmirror_issue_write(dmirror_disk_t disk, struct bio *bio)
141 {
142           dmirror_set_bio_disk(bio, disk);
143           dmirror_set_bio_segno(bio, SEGNO_FROM_OFFSET(bio->bio_offset));
144 
145           bio->bio_done = dmirror_write_done;
146           vn_strategy(disk->pdev, bio);
147 }
148 
149 void
dmirror_write(dm_target_crypt_config_t config,struct bio * bio)150 dmirror_write(dm_target_crypt_config_t config, struct bio *bio)
151 {
152           dmirror_disk_t disk, m_disk;
153           struct bio *wbio1, *wbio2;
154           segdesc_t segdesc;
155           int i, masters = 0;
156 
157           for(i = 0; i < XXX config->ndisks; i++) {
158                     disk = &config->disks[i];
159                     segdesc = SEGDESC_FROM_OFFSET(disk, bio->bio_offset);
160                     if (segdesc->flags & MEDIA_MASTER) {
161                               if (++masters == 1)
162                                         m_disk = disk;
163                     }
164           }
165 
166           if (masters == 1) {
167                     dmirror_set_bio_mbuf(bio, NULL);
168                     dmirror_issue_write(m_disk, bio);
169           } else {
170                     wbio1 = dmirror_clone_bio(bio);
171                     wbio2 = dmirror_clone_bio(bio);
172                     dmirror_set_bio_mbuf(wbio1, bio);
173                     dmirror_set_bio_mbuf(wbio2, bio);
174                     dmirror_issue_write(XXX disk1, wbio1);
175                     dmirror_issue_write(XXX disk2, wbio2);
176           }
177 
178 }
179 
180 static void
segdesc_set_flag(dmirror_disk_t disk,off_t segno,int flag)181 segdesc_set_flag(dmirror_disk_t disk, off_t segno, int flag)
182 {
183           /*
184            * XXX: set the flag on the in-memory descriptor and write back to disks.
185            */
186           foo |= flag;
187 }
188 
189 
190 static void
segdesc_clear_flag(dmirror_disk_t disk,off_t segno,int flag)191 segdesc_clear_flag(dmirror_disk_t disk, off_t segno, int flag)
192 {
193           /*
194            * XXX: set the flag on the in-memory descriptor and write back to disks.
195            */
196           foo &= ~flag;
197 }
198 
199 static void
dmirror_read_done(struct bio * bio)200 dmirror_read_done(struct bio *bio)
201 {
202           dmirror_disk_t disk;
203           off_t segno;
204           struct bio *obio;
205           int retries;
206 
207           disk = dmirror_get_bio_disk(bio);
208           segno = dmirror_get_bio_segno(bio);
209           retries = dmirror_get_bio_retries(bio);
210 
211           if (bio->bio_buf->b_flags & B_ERROR) {
212                     /* read failed, so redispatch to a different disk */
213                     segdesc_set_flag(disk, segno, MEDIA_READ_DEGRADED);
214                     /* XXX: set other disk to master, if possible */
215                     if (retries < disk->config->max_retries) {
216                               dmirror_set_bio_retries(bio, retries + 1);
217                               /*
218                                * XXX: how do we restore the bio to health? Like this?
219                                */
220                               bio->bio_buf->b_flags &= ~(B_ERROR | B_INVAL);
221                               /*
222                                * XXX: something tells me that dispatching stuff from a
223                                *        biodone routine is not the greatest idea
224                                */
225                               dmirror_issue_read(next_disk, bio);
226                               return;
227                     }
228           }
229 
230           obio = pop_bio(bio);
231           biodone(obio);
232 }
233 
234 void
dmirror_issue_read(dmirror_disk_t disk,struct bio * bio)235 dmirror_issue_read(dmirror_disk_t disk, struct bio *bio)
236 {
237           dmirror_set_bio_disk(bio, disk);
238           dmirror_set_bio_segno(bio, SEGNO_FROM_OFFSET(bio->bio_offset));
239 
240           bio->bio_done = dmirror_read_done;
241           vn_strategy(disk->pdev, bio);
242 }
243 
244 void
dmirror_read(dm_target_crypt_config_t config,struct bio * bio)245 dmirror_read(dm_target_crypt_config_t config, struct bio *bio)
246 {
247           dmirror_disk_t disk, m_disk;
248           segdesc_t segdesc;
249           int i, masters = 0;
250 
251           for(i = 0; i < XXX config->ndisks; i++) {
252                     disk = &config->disks[i];
253                     segdesc = SEGDESC_FROM_OFFSET(disk, bio->bio_offset);
254                     if (segdesc->flags & MEDIA_MASTER) {
255                               if (++masters == 1)
256                                         m_disk = disk;
257                     }
258           }
259 
260           if (masters > 1) {
261                     /* XXX: fail. */
262                     biodone(foo);
263                     return;
264           }
265 
266           if (masters == 1) {
267                     segdesc = SEGDESC_FROM_OFFSET(m_disk, bio->bio_offset);
268                     if (segdesc->flags & UNINITIALIZED) {
269                               /* XXX: ... */
270                     }
271                     dmirror_issue_read(m_disk, bio);
272           } else {
273                     /* dispatch read to any disk */
274                     /* but try not to send to a READ_DEGRADED drive */
275                     m_disk = NULL;
276                     for (i = 0; i < config->ndisks; i++) {
277                               disk = &config->disks[i];
278                               segdesc = SEGDESC_FROM_OFFSET(disk, bio->bio_offset);
279                               if (!(segdesc->flags & MEDIA_READ_DEGRADED)) {
280                                         m_disk = disk;
281                                         break;
282                               }
283                     }
284                     /* XXX: do the uninitialized magic here, too */
285                     if (m_disk) {
286                               /*
287                                * XXX: we found some non-degraded disk. We might want to
288                                * optimize performance by sending reads to different disks,
289                                * not just the first one.
290                                */
291                               dmirror_set_bio_retries(bio, 0);
292                               dmirror_issue_read(m_disk, bio);
293                     } else {
294                               /* XXX: all disks are read degraded, just sent to any */
295                               m_disk = &config->disks[i];
296                               dmirror_set_bio_retries(bio, 0);
297                               dmirror_issue_read(m_disk, bio);
298                     }
299           }
300 }
301 
302 /* Strategy routine called from dm_strategy. */
303 /*
304  * Do IO operation, called from dmstrategy routine.
305  */
306 int
dm_target_dmirror_strategy(dm_table_entry_t * table_en,struct buf * bp)307 dm_target_dmirror_strategy(dm_table_entry_t *table_en, struct buf *bp)
308 {
309           struct bio *bio, *split_bio1, *split_bio2;
310           struct buf *bp;
311           off_t bseg, eseg, seg_end;
312           size_t fsb;
313           int split_transaction = 0;
314 
315           dm_target_crypt_config_t *priv;
316           priv = table_en->target_config;
317 
318           if ((bp->b_cmd == BUF_CMD_READ) || (bp->b_cmd == BUF_CMD_WRITE)) {
319                     /* Get rid of stuff we can't really handle */
320                     if (((bp->b_bcount % DEV_BSIZE) != 0) || (bp->b_bcount == 0)) {
321                               kprintf("dm_target_dmirror_strategy: can't really handle bp->b_bcount = %d\n", bp->b_bcount);
322                               bp->b_error = EINVAL;
323                               bp->b_flags |= B_ERROR | B_INVAL;
324                               biodone(&bp->b_bio1);
325                               return 0;
326                     }
327 
328                     bseg = SEGNO_FROM_OFFSET(bp->b_bio1.bio_offset);
329                     eseg = SEGNO_FROM_OFFSET(bp->b_bio1.bio_offset + bp->b_resid);
330                     seg_end = OFFSET_FROM_SEGNO(eseg);
331 
332                     if (bseg != eseg) {
333                               split_transaction = 1;
334                               /* fsb = first segment bytes (bytes in the first segment) */
335                               fsb = seg_end - bp->b_bio1.bio_offset;
336 
337                               nestbuf = getpbuf(NULL);
338                               nestiobuf_setup(&bp->b_bio1, nestbuf, 0, fsb);
339                               split_bio1 = push_bio(&nestbuf->b_bio1);
340                               split_bio1->bio_offset = bp->b_bio1.bio_offset +
341                                   priv->block_offset*DEV_BSIZE;
342 
343                               nestbuf = getpbuf(NULL);
344                               nestiobuf_setup(&bp->b_bio1, nestbuf, fsb, bp->b_resid - fsb);
345                               split_bio2 = push_bio(&nestbuf->b_bio1);
346                               split_bio2->bio_offset = bp->b_bio1.bio_offset + fsb +
347                                   priv->block_offset*DEV_BSIZE;
348                     }
349           }
350 
351           switch (bp->b_cmd) {
352           case BUF_CMD_READ:
353                     if (split_transaction) {
354                               dmirror_read(priv, split_bio1);
355                               dmirror_read(priv, split_bio2);
356                     } else {
357                               bio = push_bio(&bp->b_bio1);
358                               bio->bio_offset = bp->b_bio1.bio_offset + priv->block_offset*DEV_BSIZE;
359                               dmirror_read(priv, bio);
360                     }
361                     break;
362 
363           case BUF_CMD_WRITE:
364                     if (split_transaction) {
365                               dmirror_write(priv, split_bio1);
366                               dmirror_write(priv, split_bio2);
367                     } else {
368                               bio = push_bio(&bp->b_bio1);
369                               bio->bio_offset = bp->b_bio1.bio_offset + priv->block_offset*DEV_BSIZE;
370                               dmirror_write(priv, bio);
371                     }
372                     break;
373 
374           default:
375                     /* XXX: clone... */
376                     vn_strategy(priv->pdev[0]->pdev_vnode, &bp->b_bio1);
377                     vn_strategy(priv->pdev[1]->pdev_vnode, &bp->b_bio1);
378           }
379 
380           return 0;
381 
382 }
383 
384 /* XXX: add missing dm functions */
385