1 /*        $NetBSD: ext2fs_alloc.c,v 1.58 2024/05/14 19:00:44 andvar Exp $       */
2 
3 /*
4  * Copyright (c) 1982, 1986, 1989, 1993
5  *        The Regents of the University of California.  All rights reserved.
6  *
7  * Redistribution and use in source and binary forms, with or without
8  * modification, are permitted provided that the following conditions
9  * are met:
10  * 1. Redistributions of source code must retain the above copyright
11  *    notice, this list of conditions and the following disclaimer.
12  * 2. Redistributions in binary form must reproduce the above copyright
13  *    notice, this list of conditions and the following disclaimer in the
14  *    documentation and/or other materials provided with the distribution.
15  * 3. Neither the name of the University nor the names of its contributors
16  *    may be used to endorse or promote products derived from this software
17  *    without specific prior written permission.
18  *
19  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
20  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
21  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
22  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
23  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
24  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
25  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
26  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
27  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
28  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
29  * SUCH DAMAGE.
30  *
31  *        @(#)ffs_alloc.c     8.11 (Berkeley) 10/27/94
32  *  Modified for ext2fs by Manuel Bouyer.
33  */
34 
35 /*
36  * Copyright (c) 1997 Manuel Bouyer.
37  *
38  * Redistribution and use in source and binary forms, with or without
39  * modification, are permitted provided that the following conditions
40  * are met:
41  * 1. Redistributions of source code must retain the above copyright
42  *    notice, this list of conditions and the following disclaimer.
43  * 2. Redistributions in binary form must reproduce the above copyright
44  *    notice, this list of conditions and the following disclaimer in the
45  *    documentation and/or other materials provided with the distribution.
46  *
47  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
48  * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
49  * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
50  * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
51  * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
52  * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
53  * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
54  * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
55  * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
56  * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
57  *
58  *        @(#)ffs_alloc.c     8.11 (Berkeley) 10/27/94
59  *  Modified for ext2fs by Manuel Bouyer.
60  */
61 
62 #include <sys/cdefs.h>
63 __KERNEL_RCSID(0, "$NetBSD: ext2fs_alloc.c,v 1.58 2024/05/14 19:00:44 andvar Exp $");
64 
65 #include <sys/param.h>
66 #include <sys/systm.h>
67 #include <sys/buf.h>
68 #include <sys/proc.h>
69 #include <sys/vnode.h>
70 #include <sys/mount.h>
71 #include <sys/kernel.h>
72 #include <sys/syslog.h>
73 #include <sys/kauth.h>
74 
75 #include <lib/libkern/crc16.h>
76 
77 #include <ufs/ufs/inode.h>
78 #include <ufs/ufs/ufs_extern.h>
79 #include <ufs/ufs/ufsmount.h>
80 
81 #include <ufs/ext2fs/ext2fs.h>
82 #include <ufs/ext2fs/ext2fs_extern.h>
83 
84 u_long ext2gennumber;
85 
86 static daddr_t      ext2fs_alloccg(struct inode *, int, daddr_t, int);
87 static u_long       ext2fs_dirpref(struct m_ext2fs *);
88 static void         ext2fs_fserr(struct m_ext2fs *, u_int, const char *);
89 static u_long       ext2fs_hashalloc(struct inode *, int, long, int,
90                         daddr_t (*)(struct inode *, int, daddr_t, int));
91 static daddr_t      ext2fs_nodealloccg(struct inode *, int, daddr_t, int);
92 static daddr_t      ext2fs_mapsearch(struct m_ext2fs *, char *, daddr_t);
93 static __inline void          ext2fs_cg_update(struct m_ext2fs *, int,
94     struct ext2_gd *, int, int, int, daddr_t);
95 static uint16_t ext2fs_cg_get_csum(struct m_ext2fs *, int, struct ext2_gd *);
96 static void         ext2fs_init_bb(struct m_ext2fs *, int, struct ext2_gd *,
97     char *);
98 
99 /*
100  * Allocate a block in the file system.
101  *
102  * A preference may be optionally specified. If a preference is given
103  * the following hierarchy is used to allocate a block:
104  *   1) allocate the requested block.
105  *   2) allocate a rotationally optimal block in the same cylinder.
106  *   3) allocate a block in the same cylinder group.
107  *   4) quadradically rehash into other cylinder groups, until an
108  *          available block is located.
109  * If no block preference is given the following hierarchy is used
110  * to allocate a block:
111  *   1) allocate a block in the cylinder group that contains the
112  *          inode for the file.
113  *   2) quadradically rehash into other cylinder groups, until an
114  *          available block is located.
115  */
116 int
ext2fs_alloc(struct inode * ip,daddr_t lbn,daddr_t bpref,kauth_cred_t cred,daddr_t * bnp)117 ext2fs_alloc(struct inode *ip, daddr_t lbn, daddr_t bpref,
118     kauth_cred_t cred, daddr_t *bnp)
119 {
120           struct m_ext2fs *fs;
121           daddr_t bno;
122           int cg;
123 
124           *bnp = 0;
125           fs = ip->i_e2fs;
126 #ifdef DIAGNOSTIC
127           if (cred == NOCRED)
128                     panic("ext2fs_alloc: missing credential");
129 #endif /* DIAGNOSTIC */
130           if (fs->e2fs.e2fs_fbcount == 0)
131                     goto nospace;
132           if (kauth_authorize_system(cred, KAUTH_SYSTEM_FS_RESERVEDSPACE, 0, NULL,
133               NULL, NULL) != 0 &&
134               freespace(fs) <= 0)
135                     goto nospace;
136           if (bpref >= fs->e2fs.e2fs_bcount)
137                     bpref = 0;
138           if (bpref == 0)
139                     cg = ino_to_cg(fs, ip->i_number);
140           else
141                     cg = dtog(fs, bpref);
142           bno = (daddr_t)ext2fs_hashalloc(ip, cg, bpref, fs->e2fs_bsize,
143               ext2fs_alloccg);
144           if (bno > 0) {
145                     ext2fs_setnblock(ip, ext2fs_nblock(ip) + btodb(fs->e2fs_bsize));
146                     ip->i_flag |= IN_CHANGE | IN_UPDATE;
147                     *bnp = bno;
148                     return 0;
149           }
150 nospace:
151           ext2fs_fserr(fs, kauth_cred_geteuid(cred), "file system full");
152           uprintf("\n%s: write failed, file system is full\n", fs->e2fs_fsmnt);
153           return ENOSPC;
154 }
155 
156 /*
157  * Allocate an inode in the file system.
158  *
159  * If allocating a directory, use ext2fs_dirpref to select the inode.
160  * If allocating in a directory, the following hierarchy is followed:
161  *   1) allocate the preferred inode.
162  *   2) allocate an inode in the same cylinder group.
163  *   3) quadradically rehash into other cylinder groups, until an
164  *          available inode is located.
165  * If no inode preference is given the following hierarchy is used
166  * to allocate an inode:
167  *   1) allocate an inode in cylinder group 0.
168  *   2) quadradically rehash into other cylinder groups, until an
169  *          available inode is located.
170  */
171 int
ext2fs_valloc(struct vnode * pvp,int mode,kauth_cred_t cred,ino_t * inop)172 ext2fs_valloc(struct vnode *pvp, int mode, kauth_cred_t cred, ino_t *inop)
173 {
174           struct inode *pip;
175           struct m_ext2fs *fs;
176           ino_t ino, ipref;
177           int cg;
178 
179           pip = VTOI(pvp);
180           fs = pip->i_e2fs;
181           if (fs->e2fs.e2fs_ficount == 0)
182                     goto noinodes;
183 
184           if ((mode & IFMT) == IFDIR)
185                     cg = ext2fs_dirpref(fs);
186           else
187                     cg = ino_to_cg(fs, pip->i_number);
188           ipref = cg * fs->e2fs.e2fs_ipg + 1;
189           ino = (ino_t)ext2fs_hashalloc(pip, cg, (long)ipref, mode, ext2fs_nodealloccg);
190           if (ino == 0)
191                     goto noinodes;
192 
193           *inop = ino;
194           return 0;
195 
196 noinodes:
197           ext2fs_fserr(fs, kauth_cred_geteuid(cred), "out of inodes");
198           uprintf("\n%s: create/symlink failed, no inodes free\n", fs->e2fs_fsmnt);
199           return ENOSPC;
200 }
201 
202 /*
203  * Find a cylinder to place a directory.
204  *
205  * The policy implemented by this algorithm is to select from
206  * among those cylinder groups with above the average number of
207  * free inodes, the one with the smallest number of directories.
208  */
209 static u_long
ext2fs_dirpref(struct m_ext2fs * fs)210 ext2fs_dirpref(struct m_ext2fs *fs)
211 {
212           int cg, maxspace, mincg, avgifree;
213 
214           avgifree = fs->e2fs.e2fs_ficount / fs->e2fs_ncg;
215           maxspace = 0;
216           mincg = -1;
217           for (cg = 0; cg < fs->e2fs_ncg; cg++) {
218                     uint32_t nifree =
219                         (fs2h16(fs->e2fs_gd[cg].ext2bgd_nifree_hi) << 16)
220                         | fs2h16(fs->e2fs_gd[cg].ext2bgd_nifree);
221                     if (nifree < avgifree) {
222                               continue;
223                     }
224                     uint32_t nbfree =
225                         (fs2h16(fs->e2fs_gd[cg].ext2bgd_nbfree_hi) << 16)
226                         | fs2h16(fs->e2fs_gd[cg].ext2bgd_nbfree);
227                     if (mincg == -1 || nbfree > maxspace) {
228                               mincg = cg;
229                               maxspace = nbfree;
230                     }
231           }
232           return mincg;
233 }
234 
235 /*
236  * Select the desired position for the next block in a file.  The file is
237  * logically divided into sections. The first section is composed of the
238  * direct blocks. Each additional section contains fs_maxbpg blocks.
239  *
240  * If no blocks have been allocated in the first section, the policy is to
241  * request a block in the same cylinder group as the inode that describes
242  * the file. Otherwise, the policy is to try to allocate the blocks
243  * contiguously. The two fields of the ext2 inode extension (see
244  * ufs/ufs/inode.h) help this.
245  */
246 daddr_t
ext2fs_blkpref(struct inode * ip,daddr_t lbn,int indx,int32_t * bap)247 ext2fs_blkpref(struct inode *ip, daddr_t lbn, int indx,
248                     int32_t *bap /* XXX ondisk32 */)
249 {
250           struct m_ext2fs *fs;
251           int cg, i;
252 
253           fs = ip->i_e2fs;
254           /*
255            * if we are doing contiguous lbn allocation, try to alloc blocks
256            * contiguously on disk
257            */
258 
259           if ( ip->i_e2fs_last_blk && lbn == ip->i_e2fs_last_lblk + 1) {
260                     return ip->i_e2fs_last_blk + 1;
261           }
262 
263           /*
264            * bap, if provided, gives us a list of blocks to which we want to
265            * stay close
266            */
267 
268           if (bap) {
269                     for (i = indx; i >= 0 ; i--) {
270                               if (bap[i]) {
271                                         return fs2h32(bap[i]) + 1;
272                               }
273                     }
274           }
275 
276           /* fall back to the first block of the cylinder containing the inode */
277 
278           cg = ino_to_cg(fs, ip->i_number);
279           return fs->e2fs.e2fs_bpg * cg + fs->e2fs.e2fs_first_dblock + 1;
280 }
281 
282 /*
283  * Implement the cylinder overflow algorithm.
284  *
285  * The policy implemented by this algorithm is:
286  *   1) allocate the block in its requested cylinder group.
287  *   2) quadradically rehash on the cylinder group number.
288  *   3) brute force search for a free block.
289  */
290 static u_long
ext2fs_hashalloc(struct inode * ip,int cg,long pref,int size,daddr_t (* allocator)(struct inode *,int,daddr_t,int))291 ext2fs_hashalloc(struct inode *ip, int cg, long pref, int size,
292                     daddr_t (*allocator)(struct inode *, int, daddr_t, int))
293 {
294           struct m_ext2fs *fs;
295           long result;
296           int i, icg = cg;
297 
298           fs = ip->i_e2fs;
299           /*
300            * 1: preferred cylinder group
301            */
302           result = (*allocator)(ip, cg, pref, size);
303           if (result)
304                     return result;
305           /*
306            * 2: quadratic rehash
307            */
308           for (i = 1; i < fs->e2fs_ncg; i *= 2) {
309                     cg += i;
310                     if (cg >= fs->e2fs_ncg)
311                               cg -= fs->e2fs_ncg;
312                     result = (*allocator)(ip, cg, 0, size);
313                     if (result)
314                               return result;
315           }
316           /*
317            * 3: brute force search
318            * Note that we start at i == 2, since 0 was checked initially,
319            * and 1 is always checked in the quadratic rehash.
320            */
321           cg = (icg + 2) % fs->e2fs_ncg;
322           for (i = 2; i < fs->e2fs_ncg; i++) {
323                     result = (*allocator)(ip, cg, 0, size);
324                     if (result)
325                               return result;
326                     cg++;
327                     if (cg == fs->e2fs_ncg)
328                               cg = 0;
329           }
330           return 0;
331 }
332 
333 /*
334  * Determine whether a block can be allocated.
335  *
336  * Check to see if a block of the appropriate size is available,
337  * and if it is, allocate it.
338  */
339 
340 static daddr_t
ext2fs_alloccg(struct inode * ip,int cg,daddr_t bpref,int size)341 ext2fs_alloccg(struct inode *ip, int cg, daddr_t bpref, int size)
342 {
343           struct m_ext2fs *fs;
344           char *bbp;
345           struct buf *bp;
346           int error, bno, start, end, loc;
347 
348           fs = ip->i_e2fs;
349           if (fs->e2fs_gd[cg].ext2bgd_nbfree == 0 &&
350               fs->e2fs_gd[cg].ext2bgd_nbfree_hi == 0)
351                     return 0;
352           error = bread(ip->i_devvp, EXT2_FSBTODB64(fs,
353                     fs2h32(fs->e2fs_gd[cg].ext2bgd_b_bitmap),
354                     fs2h32(fs->e2fs_gd[cg].ext2bgd_b_bitmap_hi)),
355                     (int)fs->e2fs_bsize, B_MODIFY, &bp);
356           if (error) {
357                     return 0;
358           }
359           bbp = (char *)bp->b_data;
360 
361           if (dtog(fs, bpref) != cg)
362                     bpref = 0;
363 
364           /* initialize block bitmap now if uninit */
365           if (__predict_false(E2FS_HAS_GD_CSUM(fs) &&
366               (fs->e2fs_gd[cg].ext2bgd_flags & h2fs16(E2FS_BG_BLOCK_UNINIT)))) {
367                     ext2fs_init_bb(fs, cg, &fs->e2fs_gd[cg], bbp);
368                     fs->e2fs_gd[cg].ext2bgd_flags &= h2fs16(~E2FS_BG_BLOCK_UNINIT);
369           }
370 
371           if (bpref != 0) {
372                     bpref = dtogd(fs, bpref);
373                     /*
374                      * if the requested block is available, use it
375                      */
376                     if (isclr(bbp, bpref)) {
377                               bno = bpref;
378                               goto gotit;
379                     }
380           }
381           /*
382            * no blocks in the requested cylinder, so take next
383            * available one in this cylinder group.
384            * first try to get 8 contiguous blocks, then fall back to a single
385            * block.
386            */
387           if (bpref)
388                     start = dtogd(fs, bpref) / NBBY;
389           else
390                     start = 0;
391           end = howmany(fs->e2fs.e2fs_fpg, NBBY) - start;
392           for (loc = start; loc < end; loc++) {
393                     if (bbp[loc] == 0) {
394                               bno = loc * NBBY;
395                               goto gotit;
396                     }
397           }
398           for (loc = 0; loc < start; loc++) {
399                     if (bbp[loc] == 0) {
400                               bno = loc * NBBY;
401                               goto gotit;
402                     }
403           }
404 
405           bno = ext2fs_mapsearch(fs, bbp, bpref);
406 #if 0
407           /*
408            * XXX jdolecek mapsearch actually never fails, it panics instead.
409            * If re-enabling, make sure to brele() before returning.
410            */
411           if (bno < 0)
412                     return 0;
413 #endif
414 gotit:
415 #ifdef DIAGNOSTIC
416           if (isset(bbp, (daddr_t)bno)) {
417                     printf("%s: cg=%d bno=%d fs=%s\n", __func__,
418                         cg, bno, fs->e2fs_fsmnt);
419                     panic("ext2fs_alloccg: dup alloc");
420           }
421 #endif
422           setbit(bbp, (daddr_t)bno);
423           fs->e2fs.e2fs_fbcount--;
424           ext2fs_cg_update(fs, cg, &fs->e2fs_gd[cg], -1, 0, 0, 0);
425           fs->e2fs_fmod = 1;
426           bdwrite(bp);
427           return cg * fs->e2fs.e2fs_fpg + fs->e2fs.e2fs_first_dblock + bno;
428 }
429 
430 /*
431  * Determine whether an inode can be allocated.
432  *
433  * Check to see if an inode is available, and if it is,
434  * allocate it using the following policy:
435  *   1) allocate the requested inode.
436  *   2) allocate the next available inode after the requested
437  *          inode in the specified cylinder group.
438  */
439 static daddr_t
ext2fs_nodealloccg(struct inode * ip,int cg,daddr_t ipref,int mode)440 ext2fs_nodealloccg(struct inode *ip, int cg, daddr_t ipref, int mode)
441 {
442           struct m_ext2fs *fs;
443           char *ibp;
444           struct buf *bp;
445           int error, start, len, loc, map, i;
446 
447           ipref--; /* to avoid a lot of (ipref -1) */
448           if (ipref == -1)
449                     ipref = 0;
450           fs = ip->i_e2fs;
451           if (fs->e2fs_gd[cg].ext2bgd_nifree == 0 &&
452               fs->e2fs_gd[cg].ext2bgd_nifree_hi == 0)
453                     return 0;
454           error = bread(ip->i_devvp, EXT2_FSBTODB64(fs,
455                     fs2h32(fs->e2fs_gd[cg].ext2bgd_i_bitmap),
456                     fs2h32(fs->e2fs_gd[cg].ext2bgd_i_bitmap_hi)),
457                     (int)fs->e2fs_bsize, B_MODIFY, &bp);
458           if (error) {
459                     return 0;
460           }
461           ibp = (char *)bp->b_data;
462 
463           KASSERT(!E2FS_HAS_GD_CSUM(fs) || (fs->e2fs_gd[cg].ext2bgd_flags & h2fs16(E2FS_BG_INODE_ZEROED)) != 0);
464 
465           /* initialize inode bitmap now if uninit */
466           if (__predict_false(E2FS_HAS_GD_CSUM(fs) &&
467               (fs->e2fs_gd[cg].ext2bgd_flags & h2fs16(E2FS_BG_INODE_UNINIT)))) {
468                     KASSERT(fs2h16(fs->e2fs_gd[cg].ext2bgd_nifree) == fs->e2fs.e2fs_ipg);
469                     memset(ibp, 0, fs->e2fs_bsize);
470                     fs->e2fs_gd[cg].ext2bgd_flags &= h2fs16(~E2FS_BG_INODE_UNINIT);
471           }
472 
473           if (ipref) {
474                     ipref %= fs->e2fs.e2fs_ipg;
475                     if (isclr(ibp, ipref))
476                               goto gotit;
477           }
478           start = ipref / NBBY;
479           len = howmany(fs->e2fs.e2fs_ipg - ipref, NBBY);
480           loc = skpc(0xff, len, &ibp[start]);
481           if (loc == 0) {
482                     len = start + 1;
483                     start = 0;
484                     loc = skpc(0xff, len, &ibp[0]);
485                     if (loc == 0) {
486                               printf("%s: cg = %d, ipref = %lld, fs = %s\n", __func__,
487                                   cg, (long long)ipref, fs->e2fs_fsmnt);
488                               panic("%s: map corrupted", __func__);
489                               /* NOTREACHED */
490                     }
491           }
492           i = start + len - loc;
493           map = ibp[i] ^ 0xff;
494           if (map == 0) {
495                     printf("%s: fs = %s\n", __func__, fs->e2fs_fsmnt);
496                     panic("%s: inode not in map", __func__);
497           }
498           ipref = i * NBBY + ffs(map) - 1;
499 gotit:
500           setbit(ibp, ipref);
501           fs->e2fs.e2fs_ficount--;
502           ext2fs_cg_update(fs, cg, &fs->e2fs_gd[cg],
503                     0, -1, ((mode & IFMT) == IFDIR) ? 1 : 0, ipref);
504           fs->e2fs_fmod = 1;
505           bdwrite(bp);
506           return cg * fs->e2fs.e2fs_ipg + ipref + 1;
507 }
508 
509 /*
510  * Free a block.
511  *
512  * The specified block is placed back in the
513  * free map.
514  */
515 void
ext2fs_blkfree(struct inode * ip,daddr_t bno)516 ext2fs_blkfree(struct inode *ip, daddr_t bno)
517 {
518           struct m_ext2fs *fs;
519           char *bbp;
520           struct buf *bp;
521           int error, cg;
522 
523           fs = ip->i_e2fs;
524           cg = dtog(fs, bno);
525 
526           KASSERT(!E2FS_HAS_GD_CSUM(fs) ||
527               (fs->e2fs_gd[cg].ext2bgd_flags & h2fs16(E2FS_BG_BLOCK_UNINIT)) == 0);
528 
529           if ((u_int)bno >= fs->e2fs.e2fs_bcount) {
530                     printf("%s: bad block %jd, ino %ju\n",
531                         __func__, (intmax_t)bno, (uintmax_t)ip->i_number);
532                     ext2fs_fserr(fs, ip->i_uid, "bad block");
533                     return;
534           }
535           error = bread(ip->i_devvp, EXT2_FSBTODB64(fs,
536               fs2h32(fs->e2fs_gd[cg].ext2bgd_b_bitmap),
537               fs2h32(fs->e2fs_gd[cg].ext2bgd_b_bitmap_hi)),
538               (int)fs->e2fs_bsize, B_MODIFY, &bp);
539           if (error) {
540                     return;
541           }
542           bbp = (char *)bp->b_data;
543           bno = dtogd(fs, bno);
544           if (isclr(bbp, bno)) {
545                     printf("%s: dev = %#jx, block = %jd, fs = %s\n", __func__,
546                         (uintmax_t)ip->i_dev, (intmax_t)bno,
547                         fs->e2fs_fsmnt);
548                     panic("%s: freeing free block", __func__);
549           }
550           clrbit(bbp, bno);
551           fs->e2fs.e2fs_fbcount++;
552           ext2fs_cg_update(fs, cg, &fs->e2fs_gd[cg], 1, 0, 0, 0);
553           fs->e2fs_fmod = 1;
554           bdwrite(bp);
555 }
556 
557 /*
558  * Free an inode.
559  *
560  * The specified inode is placed back in the free map.
561  */
562 int
ext2fs_vfree(struct vnode * pvp,ino_t ino,int mode)563 ext2fs_vfree(struct vnode *pvp, ino_t ino, int mode)
564 {
565           struct m_ext2fs *fs;
566           char *ibp;
567           struct inode *pip;
568           struct buf *bp;
569           int error, cg;
570 
571           pip = VTOI(pvp);
572           fs = pip->i_e2fs;
573 
574           if ((u_int)ino > fs->e2fs.e2fs_icount || (u_int)ino < EXT2_FIRSTINO)
575                     panic("%s: range: dev = %#jx, ino = %ju, fs = %s",
576                         __func__, (uintmax_t)pip->i_dev, (uintmax_t)ino,
577                         fs->e2fs_fsmnt);
578 
579           cg = ino_to_cg(fs, ino);
580 
581           KASSERT(!E2FS_HAS_GD_CSUM(fs) ||
582               (fs->e2fs_gd[cg].ext2bgd_flags & h2fs16(E2FS_BG_INODE_UNINIT)) == 0);
583 
584           error = bread(pip->i_devvp, EXT2_FSBTODB64(fs,
585               fs2h32(fs->e2fs_gd[cg].ext2bgd_i_bitmap),
586               fs2h32(fs->e2fs_gd[cg].ext2bgd_i_bitmap_hi)),
587               (int)fs->e2fs_bsize, B_MODIFY, &bp);
588           if (error) {
589                     return 0;
590           }
591           ibp = (char *)bp->b_data;
592           ino = (ino - 1) % fs->e2fs.e2fs_ipg;
593           if (isclr(ibp, ino)) {
594                     printf("%s: dev = %#jx, ino = %ju, fs = %s\n", __func__,
595                         (uintmax_t)pip->i_dev,
596                         (uintmax_t)ino, fs->e2fs_fsmnt);
597                     if (fs->e2fs_ronly == 0)
598                               panic("%s: freeing free inode", __func__);
599           }
600           clrbit(ibp, ino);
601           fs->e2fs.e2fs_ficount++;
602           ext2fs_cg_update(fs, cg, &fs->e2fs_gd[cg],
603                     0, 1, ((mode & IFMT) == IFDIR) ? -1 : 0, 0);
604           fs->e2fs_fmod = 1;
605           bdwrite(bp);
606           return 0;
607 }
608 
609 /*
610  * Find a block in the specified cylinder group.
611  *
612  * It is a panic if a request is made to find a block if none are
613  * available.
614  */
615 
616 static daddr_t
ext2fs_mapsearch(struct m_ext2fs * fs,char * bbp,daddr_t bpref)617 ext2fs_mapsearch(struct m_ext2fs *fs, char *bbp, daddr_t bpref)
618 {
619           int start, len, loc, i, map;
620 
621           /*
622            * find the fragment by searching through the free block
623            * map for an appropriate bit pattern
624            */
625           if (bpref)
626                     start = dtogd(fs, bpref) / NBBY;
627           else
628                     start = 0;
629           len = howmany(fs->e2fs.e2fs_fpg, NBBY) - start;
630           loc = skpc(0xff, len, &bbp[start]);
631           if (loc == 0) {
632                     len = start + 1;
633                     start = 0;
634                     loc = skpc(0xff, len, &bbp[start]);
635                     if (loc == 0) {
636                               printf("%s: start = %d, len = %d, fs = %s\n",
637                                   __func__, start, len, fs->e2fs_fsmnt);
638                               panic("%s: map corrupted", __func__);
639                               /* NOTREACHED */
640                     }
641           }
642           i = start + len - loc;
643           map = bbp[i] ^ 0xff;
644           if (map == 0) {
645                     printf("%s: fs = %s\n", __func__, fs->e2fs_fsmnt);
646                     panic("%s: block not in map", __func__);
647           }
648           return i * NBBY + ffs(map) - 1;
649 }
650 
651 /*
652  * Fserr prints the name of a file system with an error diagnostic.
653  *
654  * The form of the error message is:
655  *        fs: error message
656  */
657 static void
ext2fs_fserr(struct m_ext2fs * fs,u_int uid,const char * cp)658 ext2fs_fserr(struct m_ext2fs *fs, u_int uid, const char *cp)
659 {
660 
661           log(LOG_ERR, "uid %d on %s: %s\n", uid, fs->e2fs_fsmnt, cp);
662 }
663 
664 static __inline void
ext2fs_cg_update(struct m_ext2fs * fs,int cg,struct ext2_gd * gd,int nbfree,int nifree,int ndirs,daddr_t ioff)665 ext2fs_cg_update(struct m_ext2fs *fs, int cg, struct ext2_gd *gd, int nbfree, int nifree, int ndirs, daddr_t ioff)
666 {
667           if (nifree) {
668                     uint32_t ext2bgd_nifree = fs2h16(gd->ext2bgd_nifree) |
669                         (fs2h16(gd->ext2bgd_nifree_hi) << 16);
670                     ext2bgd_nifree += nifree;
671                     gd->ext2bgd_nifree = h2fs16(ext2bgd_nifree);
672                     gd->ext2bgd_nifree_hi = h2fs16(ext2bgd_nifree >> 16);
673                     /*
674                      * If we allocated inode on bigger offset than what was
675                      * ever used before, bump the itable_unused count. This
676                      * member only ever grows, and is used only for initialization
677                      * !INODE_ZEROED groups with used inodes. Of course, by the
678                      * time we get here the itables are already zeroed, but
679                      * e2fstools fsck.ext4 still checks this.
680                      */
681                     if (E2FS_HAS_GD_CSUM(fs) && nifree < 0 &&
682                         (ioff + 1) >= (fs->e2fs.e2fs_ipg -
683                         fs2h16(gd->ext2bgd_itable_unused_lo))) {
684                               gd->ext2bgd_itable_unused_lo =
685                                   h2fs16(fs->e2fs.e2fs_ipg - (ioff + 1));
686                     }
687 
688                     KASSERT(!E2FS_HAS_GD_CSUM(fs) ||
689                         gd->ext2bgd_itable_unused_lo <= ext2bgd_nifree);
690           }
691 
692 
693           if (nbfree) {
694                     uint32_t ext2bgd_nbfree = fs2h16(gd->ext2bgd_nbfree)
695                         | (fs2h16(gd->ext2bgd_nbfree_hi) << 16);
696                     ext2bgd_nbfree += nbfree;
697                     gd->ext2bgd_nbfree = h2fs16(ext2bgd_nbfree);
698                     gd->ext2bgd_nbfree_hi = h2fs16(ext2bgd_nbfree >> 16);
699           }
700 
701           if (ndirs) {
702                     uint32_t ext2bgd_ndirs = fs2h16(gd->ext2bgd_ndirs)
703                         | (fs2h16(gd->ext2bgd_ndirs_hi) << 16);
704                     ext2bgd_ndirs += ndirs;
705                     gd->ext2bgd_ndirs = h2fs16(ext2bgd_ndirs);
706                     gd->ext2bgd_ndirs_hi = h2fs16(ext2bgd_ndirs >> 16);
707           }
708 
709           if (E2FS_HAS_GD_CSUM(fs))
710                     gd->ext2bgd_checksum = ext2fs_cg_get_csum(fs, cg, gd);
711 }
712 
713 static const uint32_t ext2fs_crc32c_table[256] = {
714           0x00000000, 0xf26b8303, 0xe13b70f7, 0x1350f3f4,
715           0xc79a971f, 0x35f1141c, 0x26a1e7e8, 0xd4ca64eb,
716           0x8ad958cf, 0x78b2dbcc, 0x6be22838, 0x9989ab3b,
717           0x4d43cfd0, 0xbf284cd3, 0xac78bf27, 0x5e133c24,
718           0x105ec76f, 0xe235446c, 0xf165b798, 0x030e349b,
719           0xd7c45070, 0x25afd373, 0x36ff2087, 0xc494a384,
720           0x9a879fa0, 0x68ec1ca3, 0x7bbcef57, 0x89d76c54,
721           0x5d1d08bf, 0xaf768bbc, 0xbc267848, 0x4e4dfb4b,
722           0x20bd8ede, 0xd2d60ddd, 0xc186fe29, 0x33ed7d2a,
723           0xe72719c1, 0x154c9ac2, 0x061c6936, 0xf477ea35,
724           0xaa64d611, 0x580f5512, 0x4b5fa6e6, 0xb93425e5,
725           0x6dfe410e, 0x9f95c20d, 0x8cc531f9, 0x7eaeb2fa,
726           0x30e349b1, 0xc288cab2, 0xd1d83946, 0x23b3ba45,
727           0xf779deae, 0x05125dad, 0x1642ae59, 0xe4292d5a,
728           0xba3a117e, 0x4851927d, 0x5b016189, 0xa96ae28a,
729           0x7da08661, 0x8fcb0562, 0x9c9bf696, 0x6ef07595,
730           0x417b1dbc, 0xb3109ebf, 0xa0406d4b, 0x522bee48,
731           0x86e18aa3, 0x748a09a0, 0x67dafa54, 0x95b17957,
732           0xcba24573, 0x39c9c670, 0x2a993584, 0xd8f2b687,
733           0x0c38d26c, 0xfe53516f, 0xed03a29b, 0x1f682198,
734           0x5125dad3, 0xa34e59d0, 0xb01eaa24, 0x42752927,
735           0x96bf4dcc, 0x64d4cecf, 0x77843d3b, 0x85efbe38,
736           0xdbfc821c, 0x2997011f, 0x3ac7f2eb, 0xc8ac71e8,
737           0x1c661503, 0xee0d9600, 0xfd5d65f4, 0x0f36e6f7,
738           0x61c69362, 0x93ad1061, 0x80fde395, 0x72966096,
739           0xa65c047d, 0x5437877e, 0x4767748a, 0xb50cf789,
740           0xeb1fcbad, 0x197448ae, 0x0a24bb5a, 0xf84f3859,
741           0x2c855cb2, 0xdeeedfb1, 0xcdbe2c45, 0x3fd5af46,
742           0x7198540d, 0x83f3d70e, 0x90a324fa, 0x62c8a7f9,
743           0xb602c312, 0x44694011, 0x5739b3e5, 0xa55230e6,
744           0xfb410cc2, 0x092a8fc1, 0x1a7a7c35, 0xe811ff36,
745           0x3cdb9bdd, 0xceb018de, 0xdde0eb2a, 0x2f8b6829,
746           0x82f63b78, 0x709db87b, 0x63cd4b8f, 0x91a6c88c,
747           0x456cac67, 0xb7072f64, 0xa457dc90, 0x563c5f93,
748           0x082f63b7, 0xfa44e0b4, 0xe9141340, 0x1b7f9043,
749           0xcfb5f4a8, 0x3dde77ab, 0x2e8e845f, 0xdce5075c,
750           0x92a8fc17, 0x60c37f14, 0x73938ce0, 0x81f80fe3,
751           0x55326b08, 0xa759e80b, 0xb4091bff, 0x466298fc,
752           0x1871a4d8, 0xea1a27db, 0xf94ad42f, 0x0b21572c,
753           0xdfeb33c7, 0x2d80b0c4, 0x3ed04330, 0xccbbc033,
754           0xa24bb5a6, 0x502036a5, 0x4370c551, 0xb11b4652,
755           0x65d122b9, 0x97baa1ba, 0x84ea524e, 0x7681d14d,
756           0x2892ed69, 0xdaf96e6a, 0xc9a99d9e, 0x3bc21e9d,
757           0xef087a76, 0x1d63f975, 0x0e330a81, 0xfc588982,
758           0xb21572c9, 0x407ef1ca, 0x532e023e, 0xa145813d,
759           0x758fe5d6, 0x87e466d5, 0x94b49521, 0x66df1622,
760           0x38cc2a06, 0xcaa7a905, 0xd9f75af1, 0x2b9cd9f2,
761           0xff56bd19, 0x0d3d3e1a, 0x1e6dcdee, 0xec064eed,
762           0xc38d26c4, 0x31e6a5c7, 0x22b65633, 0xd0ddd530,
763           0x0417b1db, 0xf67c32d8, 0xe52cc12c, 0x1747422f,
764           0x49547e0b, 0xbb3ffd08, 0xa86f0efc, 0x5a048dff,
765           0x8ecee914, 0x7ca56a17, 0x6ff599e3, 0x9d9e1ae0,
766           0xd3d3e1ab, 0x21b862a8, 0x32e8915c, 0xc083125f,
767           0x144976b4, 0xe622f5b7, 0xf5720643, 0x07198540,
768           0x590ab964, 0xab613a67, 0xb831c993, 0x4a5a4a90,
769           0x9e902e7b, 0x6cfbad78, 0x7fab5e8c, 0x8dc0dd8f,
770           0xe330a81a, 0x115b2b19, 0x020bd8ed, 0xf0605bee,
771           0x24aa3f05, 0xd6c1bc06, 0xc5914ff2, 0x37faccf1,
772           0x69e9f0d5, 0x9b8273d6, 0x88d28022, 0x7ab90321,
773           0xae7367ca, 0x5c18e4c9, 0x4f48173d, 0xbd23943e,
774           0xf36e6f75, 0x0105ec76, 0x12551f82, 0xe03e9c81,
775           0x34f4f86a, 0xc69f7b69, 0xd5cf889d, 0x27a40b9e,
776           0x79b737ba, 0x8bdcb4b9, 0x988c474d, 0x6ae7c44e,
777           0xbe2da0a5, 0x4c4623a6, 0x5f16d052, 0xad7d5351,
778 };
779 
780 static uint32_t
ext2fs_crc32c(uint32_t last,const void * vbuf,size_t len)781 ext2fs_crc32c(uint32_t last, const void *vbuf, size_t len)
782 {
783           uint32_t crc = last;
784           const uint8_t *buf = vbuf;
785 
786           while (len--)
787                     crc = ext2fs_crc32c_table[(crc ^ *buf++) & 0xff] ^ (crc >> 8);
788 
789           return crc;
790 }
791 
792 /*
793  * Compute group description csum. Structure data must be LE (not host).
794  * Returned as LE (disk encoding).
795  */
796 static uint16_t
ext2fs_cg_get_csum(struct m_ext2fs * fs,int cg,struct ext2_gd * gd)797 ext2fs_cg_get_csum(struct m_ext2fs *fs, int cg, struct ext2_gd *gd)
798 {
799           uint16_t crc;
800           size_t cgsize = 1 << fs->e2fs_group_desc_shift;
801           uint32_t cg_bswapped = h2fs32((uint32_t)cg);
802           size_t off;
803 
804           if (EXT2F_HAS_ROCOMPAT_FEATURE(fs, EXT2F_ROCOMPAT_METADATA_CKSUM)) {
805                     uint32_t crc32;
806                     uint8_t dummy[2] = {0, 0};
807 
808                     off = offsetof(struct ext2_gd, ext2bgd_checksum);
809 
810                     crc32 = ext2fs_crc32c(~0, fs->e2fs.e2fs_uuid,
811                         sizeof(fs->e2fs.e2fs_uuid));
812                     crc32 = ext2fs_crc32c(crc32, &cg_bswapped, sizeof(cg_bswapped));
813                     crc32 = ext2fs_crc32c(crc32, gd, off);
814                     crc32 = ext2fs_crc32c(crc32, dummy, 2);
815                     crc32 = ext2fs_crc32c(crc32, gd + off + 2, cgsize - (off + 2));
816 
817                     return h2fs16(crc32 & 0xffff);
818           }
819 
820           if (!EXT2F_HAS_ROCOMPAT_FEATURE(fs, EXT2F_ROCOMPAT_GDT_CSUM))
821                     return 0;
822 
823           off = offsetof(struct ext2_gd, ext2bgd_checksum);
824 
825           crc = crc16(~0, (uint8_t *)fs->e2fs.e2fs_uuid,
826               sizeof(fs->e2fs.e2fs_uuid));
827           crc = crc16(crc, (uint8_t *)&cg_bswapped, sizeof(cg_bswapped));
828           crc = crc16(crc, (uint8_t *)gd, off);
829           crc = crc16(crc, (uint8_t *)gd + off + 2, cgsize - (off + 2));
830 
831           return h2fs16(crc);
832 }
833 
834 static void
ext2fs_init_bb(struct m_ext2fs * fs,int cg,struct ext2_gd * gd,char * bbp)835 ext2fs_init_bb(struct m_ext2fs *fs, int cg, struct ext2_gd *gd, char *bbp)
836 {
837           int i;
838 
839           memset(bbp, 0, fs->e2fs_bsize);
840 
841           /*
842            * No block was ever allocated on this cg before, so the only used
843            * blocks are metadata blocks on start of the group. We could optimize
844            * this to set by bytes, but since this is done once per the group
845            * in lifetime of filesystem, it really is not worth it.
846            */
847           for(i=0; i < fs->e2fs.e2fs_bpg - fs2h16(gd->ext2bgd_nbfree); i++)
848                     setbit(bbp, i);
849 }
850 
851 /*
852  * Verify csum and initialize itable if not done already
853  */
854 int
ext2fs_cg_verify_and_initialize(struct vnode * devvp,struct m_ext2fs * fs,int ronly)855 ext2fs_cg_verify_and_initialize(struct vnode *devvp, struct m_ext2fs *fs, int ronly)
856 {
857           struct ext2_gd *gd;
858           ino_t ioff;
859           size_t boff;
860           struct buf *bp;
861           int cg, i, error;
862 
863           if (!E2FS_HAS_GD_CSUM(fs))
864                     return 0;
865 
866           for(cg = 0; cg < fs->e2fs_ncg; cg++) {
867                     gd = &fs->e2fs_gd[cg];
868 
869                     /* Verify checksum */
870                     uint16_t csum = ext2fs_cg_get_csum(fs, cg, gd);
871                     if (gd->ext2bgd_checksum != csum) {
872                               printf("%s: group %d invalid csum (%#x != %#x)\n",
873                                   __func__, cg, gd->ext2bgd_checksum, csum);
874                               return EINVAL;
875                     }
876 
877                     /* if mounting read-write, zero itable if not already done */
878                     if (ronly ||
879                         (gd->ext2bgd_flags & h2fs16(E2FS_BG_INODE_ZEROED)) != 0)
880                               continue;
881 
882                     /*
883                      * We are skipping already used inodes, zero rest of itable
884                      * blocks. First block to zero could be only partial wipe, all
885                      * others are wiped completely. This might take a while,
886                      * there could be many inode table blocks. We use
887                      * delayed writes, so this shouldn't block for very
888                      * long.
889                      */
890                     ioff = fs->e2fs.e2fs_ipg - fs2h16(gd->ext2bgd_itable_unused_lo);
891                     boff = (ioff % fs->e2fs_ipb) * EXT2_DINODE_SIZE(fs);
892 
893                     for(i = ioff / fs->e2fs_ipb; i < fs->e2fs_itpg; i++) {
894                               if (boff) {
895                                         /* partial wipe, must read old data */
896                                         error = bread(devvp, EXT2_FSBTODB64OFF(fs,
897                                             fs2h32(gd->ext2bgd_i_tables),
898                                             fs2h32(gd->ext2bgd_i_tables_hi), i),
899                                             (int)fs->e2fs_bsize, B_MODIFY, &bp);
900                                         if (error) {
901                                                   printf("%s: can't read itable block",
902                                                       __func__);
903                                                   return error;
904                                         }
905                                         memset((char *)bp->b_data + boff, 0,
906                                             fs->e2fs_bsize - boff);
907                                         boff = 0;
908                               } else {
909                                         /*
910                                          * Complete wipe, don't need to read data. This
911                                          * assumes nothing else is changing the data.
912                                          */
913                                         bp = getblk(devvp, EXT2_FSBTODB64OFF(fs,
914                                             fs2h32(gd->ext2bgd_i_tables),
915                                             fs2h32(gd->ext2bgd_i_tables_hi), i),
916                                             (int)fs->e2fs_bsize, 0, 0);
917                                         clrbuf(bp);
918                               }
919 
920                               bdwrite(bp);
921                     }
922 
923                     gd->ext2bgd_flags |= h2fs16(E2FS_BG_INODE_ZEROED);
924                     gd->ext2bgd_checksum = ext2fs_cg_get_csum(fs, cg, gd);
925                     fs->e2fs_fmod = 1;
926           }
927 
928           return 0;
929 }
930