1 /* $OpenBSD: ext2fs_alloc.c,v 1.21 2007/03/14 13:56:42 pedro Exp $ */
2 /* $NetBSD: ext2fs_alloc.c,v 1.10 2001/07/05 08:38:27 toshii Exp $ */
3
4 /*
5 * Copyright (c) 1997 Manuel Bouyer.
6 * Copyright (c) 1982, 1986, 1989, 1993
7 * The Regents of the University of California. All rights reserved.
8 *
9 * Redistribution and use in source and binary forms, with or without
10 * modification, are permitted provided that the following conditions
11 * are met:
12 * 1. Redistributions of source code must retain the above copyright
13 * notice, this list of conditions and the following disclaimer.
14 * 2. Redistributions in binary form must reproduce the above copyright
15 * notice, this list of conditions and the following disclaimer in the
16 * documentation and/or other materials provided with the distribution.
17 * 3. Neither the name of the University nor the names of its contributors
18 * may be used to endorse or promote products derived from this software
19 * without specific prior written permission.
20 *
21 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
22 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
23 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
24 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
25 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
26 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
27 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
28 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
29 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
30 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
31 * SUCH DAMAGE.
32 *
33 * @(#)ffs_alloc.c 8.11 (Berkeley) 10/27/94
34 * Modified for ext2fs by Manuel Bouyer.
35 */
36
37 #include <sys/param.h>
38 #include <sys/systm.h>
39 #include <sys/buf.h>
40 #include <sys/proc.h>
41 #include <sys/vnode.h>
42 #include <sys/mount.h>
43 #include <sys/kernel.h>
44 #include <sys/syslog.h>
45
46 #include <ufs/ufs/quota.h>
47 #include <ufs/ufs/inode.h>
48 #include <ufs/ufs/ufsmount.h>
49 #include <ufs/ufs/ufs_extern.h>
50
51 #include <ufs/ext2fs/ext2fs.h>
52 #include <ufs/ext2fs/ext2fs_extern.h>
53
54 u_long ext2gennumber;
55
56 static ufs1_daddr_t ext2fs_alloccg(struct inode *, int, ufs1_daddr_t, int);
57 static u_long ext2fs_dirpref(struct m_ext2fs *);
58 static void ext2fs_fserr(struct m_ext2fs *, u_int, char *);
59 static u_long ext2fs_hashalloc(struct inode *, int, long, int,
60 ufs1_daddr_t (*)(struct inode *, int, ufs1_daddr_t, int));
61 static ufs1_daddr_t ext2fs_nodealloccg(struct inode *, int, ufs1_daddr_t, int);
62 static ufs1_daddr_t ext2fs_mapsearch(struct m_ext2fs *, char *, ufs1_daddr_t);
63
64 /*
65 * Allocate a block in the file system.
66 *
67 * A preference may be optionally specified. If a preference is given
68 * the following hierarchy is used to allocate a block:
69 * 1) allocate the requested block.
70 * 2) allocate a rotationally optimal block in the same cylinder.
71 * 3) allocate a block in the same cylinder group.
72 * 4) quadratically rehash into other cylinder groups, until an
73 * available block is located.
74 * If no block preference is given the following hierarchy is used
75 * to allocate a block:
76 * 1) allocate a block in the cylinder group that contains the
77 * inode for the file.
78 * 2) quadratically rehash into other cylinder groups, until an
79 * available block is located.
80 */
81 int
ext2fs_alloc(ip,lbn,bpref,cred,bnp)82 ext2fs_alloc(ip, lbn, bpref, cred, bnp)
83 struct inode *ip;
84 ufs1_daddr_t lbn, bpref;
85 struct ucred *cred;
86 ufs1_daddr_t *bnp;
87 {
88 struct m_ext2fs *fs;
89 ufs1_daddr_t bno;
90 int cg;
91
92 *bnp = 0;
93 fs = ip->i_e2fs;
94 #ifdef DIAGNOSTIC
95 if (cred == NOCRED)
96 panic("ext2fs_alloc: missing credential");
97 #endif /* DIAGNOSTIC */
98 if (fs->e2fs.e2fs_fbcount == 0)
99 goto nospace;
100 if (cred->cr_uid != 0 && freespace(fs) <= 0)
101 goto nospace;
102 if (bpref >= fs->e2fs.e2fs_bcount)
103 bpref = 0;
104 if (bpref == 0)
105 cg = ino_to_cg(fs, ip->i_number);
106 else
107 cg = dtog(fs, bpref);
108 bno = (ufs1_daddr_t)ext2fs_hashalloc(ip, cg, bpref, fs->e2fs_bsize,
109 ext2fs_alloccg);
110 if (bno > 0) {
111 ip->i_e2fs_nblock += btodb(fs->e2fs_bsize);
112 ip->i_flag |= IN_CHANGE | IN_UPDATE;
113 *bnp = bno;
114 return (0);
115 }
116 nospace:
117 ext2fs_fserr(fs, cred->cr_uid, "file system full");
118 uprintf("\n%s: write failed, file system is full\n", fs->e2fs_fsmnt);
119 return (ENOSPC);
120 }
121
122 /*
123 * Allocate an inode in the file system.
124 *
125 * If allocating a directory, use ext2fs_dirpref to select the inode.
126 * If allocating in a directory, the following hierarchy is followed:
127 * 1) allocate the preferred inode.
128 * 2) allocate an inode in the same cylinder group.
129 * 3) quadratically rehash into other cylinder groups, until an
130 * available inode is located.
131 * If no inode preference is given the following hierarchy is used
132 * to allocate an inode:
133 * 1) allocate an inode in cylinder group 0.
134 * 2) quadratically rehash into other cylinder groups, until an
135 * available inode is located.
136 */
137 int
ext2fs_inode_alloc(struct inode * pip,mode_t mode,struct ucred * cred,struct vnode ** vpp)138 ext2fs_inode_alloc(struct inode *pip, mode_t mode, struct ucred *cred,
139 struct vnode **vpp)
140 {
141 struct vnode *pvp;
142 struct m_ext2fs *fs;
143 struct inode *ip;
144 ino_t ino, ipref;
145 int cg, error;
146
147 *vpp = NULL;
148 pvp = ITOV(pip);
149 fs = pip->i_e2fs;
150 if (fs->e2fs.e2fs_ficount == 0)
151 goto noinodes;
152
153 if ((mode & IFMT) == IFDIR)
154 cg = ext2fs_dirpref(fs);
155 else
156 cg = ino_to_cg(fs, pip->i_number);
157 ipref = cg * fs->e2fs.e2fs_ipg + 1;
158 ino = (ino_t)ext2fs_hashalloc(pip, cg, (long)ipref, mode, ext2fs_nodealloccg);
159 if (ino == 0)
160 goto noinodes;
161 error = VFS_VGET(pvp->v_mount, ino, vpp);
162 if (error) {
163 ext2fs_inode_free(pip, ino, mode);
164 return (error);
165 }
166 ip = VTOI(*vpp);
167 if (ip->i_e2fs_mode && ip->i_e2fs_nlink != 0) {
168 printf("mode = 0%o, nlinks %d, inum = %d, fs = %s\n",
169 ip->i_e2fs_mode, ip->i_e2fs_nlink, ip->i_number, fs->e2fs_fsmnt);
170 panic("ext2fs_valloc: dup alloc");
171 }
172
173 bzero(&(ip->i_e2din), sizeof(struct ext2fs_dinode));
174
175 /*
176 * Set up a new generation number for this inode.
177 */
178 if (++ext2gennumber < (u_long)time.tv_sec)
179 ext2gennumber = time.tv_sec;
180 ip->i_e2fs_gen = ext2gennumber;
181 return (0);
182 noinodes:
183 ext2fs_fserr(fs, cred->cr_uid, "out of inodes");
184 uprintf("\n%s: create/symlink failed, no inodes free\n", fs->e2fs_fsmnt);
185 return (ENOSPC);
186 }
187
188 /*
189 * Find a cylinder to place a directory.
190 *
191 * The policy implemented by this algorithm is to select from
192 * among those cylinder groups with above the average number of
193 * free inodes, the one with the smallest number of directories.
194 */
195 static u_long
ext2fs_dirpref(fs)196 ext2fs_dirpref(fs)
197 struct m_ext2fs *fs;
198 {
199 int cg, maxspace, mincg, avgifree;
200
201 avgifree = fs->e2fs.e2fs_ficount / fs->e2fs_ncg;
202 maxspace = 0;
203 mincg = -1;
204 for (cg = 0; cg < fs->e2fs_ncg; cg++)
205 if ( fs->e2fs_gd[cg].ext2bgd_nifree >= avgifree) {
206 if (mincg == -1 || fs->e2fs_gd[cg].ext2bgd_nbfree > maxspace) {
207 mincg = cg;
208 maxspace = fs->e2fs_gd[cg].ext2bgd_nbfree;
209 }
210 }
211 return mincg;
212 }
213
214 /*
215 * Select the desired position for the next block in a file. The file is
216 * logically divided into sections. The first section is composed of the
217 * direct blocks. Each additional section contains fs_maxbpg blocks.
218 *
219 * If no blocks have been allocated in the first section, the policy is to
220 * request a block in the same cylinder group as the inode that describes
221 * the file. Otherwise, the policy is to try to allocate the blocks
222 * contigously. The two fields of the ext2 inode extension (see
223 * ufs/ufs/inode.h) help this.
224 */
225 ufs1_daddr_t
ext2fs_blkpref(ip,lbn,indx,bap)226 ext2fs_blkpref(ip, lbn, indx, bap)
227 struct inode *ip;
228 ufs1_daddr_t lbn;
229 int indx;
230 ufs1_daddr_t *bap;
231 {
232 struct m_ext2fs *fs;
233 int cg, i;
234
235 fs = ip->i_e2fs;
236 /*
237 * if we are doing contigous lbn allocation, try to alloc blocks
238 * contigously on disk
239 */
240
241 if ( ip->i_e2fs_last_blk && lbn == ip->i_e2fs_last_lblk + 1) {
242 return ip->i_e2fs_last_blk + 1;
243 }
244
245 /*
246 * bap, if provided, gives us a list of blocks to which we want to
247 * stay close
248 */
249
250 if (bap) {
251 for (i = indx; i >= 0 ; i--) {
252 if (bap[i]) {
253 return fs2h32(bap[i]) + 1;
254 }
255 }
256 }
257
258 /* fall back to the first block of the cylinder containing the inode */
259
260 cg = ino_to_cg(fs, ip->i_number);
261 return fs->e2fs.e2fs_bpg * cg + fs->e2fs.e2fs_first_dblock + 1;
262 }
263
264 /*
265 * Implement the cylinder overflow algorithm.
266 *
267 * The policy implemented by this algorithm is:
268 * 1) allocate the block in its requested cylinder group.
269 * 2) quadratically rehash on the cylinder group number.
270 * 3) brute force search for a free block.
271 */
272 static u_long
ext2fs_hashalloc(ip,cg,pref,size,allocator)273 ext2fs_hashalloc(ip, cg, pref, size, allocator)
274 struct inode *ip;
275 int cg;
276 long pref;
277 int size; /* size for data blocks, mode for inodes */
278 ufs1_daddr_t (*allocator)(struct inode *, int, ufs1_daddr_t, int);
279 {
280 struct m_ext2fs *fs;
281 long result;
282 int i, icg = cg;
283
284 fs = ip->i_e2fs;
285 /*
286 * 1: preferred cylinder group
287 */
288 result = (*allocator)(ip, cg, pref, size);
289 if (result)
290 return (result);
291 /*
292 * 2: quadratic rehash
293 */
294 for (i = 1; i < fs->e2fs_ncg; i *= 2) {
295 cg += i;
296 if (cg >= fs->e2fs_ncg)
297 cg -= fs->e2fs_ncg;
298 result = (*allocator)(ip, cg, 0, size);
299 if (result)
300 return (result);
301 }
302 /*
303 * 3: brute force search
304 * Note that we start at i == 2, since 0 was checked initially,
305 * and 1 is always checked in the quadratic rehash.
306 */
307 cg = (icg + 2) % fs->e2fs_ncg;
308 for (i = 2; i < fs->e2fs_ncg; i++) {
309 result = (*allocator)(ip, cg, 0, size);
310 if (result)
311 return (result);
312 cg++;
313 if (cg == fs->e2fs_ncg)
314 cg = 0;
315 }
316 return (0);
317 }
318
319 /*
320 * Determine whether a block can be allocated.
321 *
322 * Check to see if a block of the appropriate size is available,
323 * and if it is, allocate it.
324 */
325
326 static ufs1_daddr_t
ext2fs_alloccg(ip,cg,bpref,size)327 ext2fs_alloccg(ip, cg, bpref, size)
328 struct inode *ip;
329 int cg;
330 ufs1_daddr_t bpref;
331 int size;
332 {
333 struct m_ext2fs *fs;
334 char *bbp;
335 struct buf *bp;
336 int error, bno, start, end, loc;
337
338 fs = ip->i_e2fs;
339 if (fs->e2fs_gd[cg].ext2bgd_nbfree == 0)
340 return (0);
341 error = bread(ip->i_devvp, fsbtodb(fs,
342 fs->e2fs_gd[cg].ext2bgd_b_bitmap),
343 (int)fs->e2fs_bsize, NOCRED, &bp);
344 if (error || fs->e2fs_gd[cg].ext2bgd_nbfree == 0) {
345 brelse(bp);
346 return (0);
347 }
348 bbp = (char *)bp->b_data;
349
350 if (dtog(fs, bpref) != cg)
351 bpref = 0;
352 if (bpref != 0) {
353 bpref = dtogd(fs, bpref);
354 /*
355 * if the requested block is available, use it
356 */
357 if (isclr(bbp, bpref)) {
358 bno = bpref;
359 goto gotit;
360 }
361 }
362 /*
363 * no blocks in the requested cylinder, so take next
364 * available one in this cylinder group.
365 * first try to get 8 contigous blocks, then fall back to a single
366 * block.
367 */
368 if (bpref)
369 start = dtogd(fs, bpref) / NBBY;
370 else
371 start = 0;
372 end = howmany(fs->e2fs.e2fs_fpg, NBBY) - start;
373 for (loc = start; loc < end; loc++) {
374 if (bbp[loc] == 0) {
375 bno = loc * NBBY;
376 goto gotit;
377 }
378 }
379 for (loc = 0; loc < start; loc++) {
380 if (bbp[loc] == 0) {
381 bno = loc * NBBY;
382 goto gotit;
383 }
384 }
385
386 bno = ext2fs_mapsearch(fs, bbp, bpref);
387 if (bno < 0)
388 return (0);
389 gotit:
390 #ifdef DIAGNOSTIC
391 if (isset(bbp, (long)bno)) {
392 printf("ext2fs_alloccgblk: cg=%d bno=%d fs=%s\n",
393 cg, bno, fs->e2fs_fsmnt);
394 panic("ext2fs_alloccg: dup alloc");
395 }
396 #endif
397 setbit(bbp, (long)bno);
398 fs->e2fs.e2fs_fbcount--;
399 fs->e2fs_gd[cg].ext2bgd_nbfree--;
400 fs->e2fs_fmod = 1;
401 bdwrite(bp);
402 return (cg * fs->e2fs.e2fs_fpg + fs->e2fs.e2fs_first_dblock + bno);
403 }
404
405 /*
406 * Determine whether an inode can be allocated.
407 *
408 * Check to see if an inode is available, and if it is,
409 * allocate it using the following policy:
410 * 1) allocate the requested inode.
411 * 2) allocate the next available inode after the requested
412 * inode in the specified cylinder group.
413 */
414 static ufs1_daddr_t
ext2fs_nodealloccg(ip,cg,ipref,mode)415 ext2fs_nodealloccg(ip, cg, ipref, mode)
416 struct inode *ip;
417 int cg;
418 ufs1_daddr_t ipref;
419 int mode;
420 {
421 struct m_ext2fs *fs;
422 char *ibp;
423 struct buf *bp;
424 int error, start, len, loc, map, i;
425
426 ipref--; /* to avoid a lot of (ipref -1) */
427 fs = ip->i_e2fs;
428 if (fs->e2fs_gd[cg].ext2bgd_nifree == 0)
429 return (0);
430 error = bread(ip->i_devvp, fsbtodb(fs,
431 fs->e2fs_gd[cg].ext2bgd_i_bitmap),
432 (int)fs->e2fs_bsize, NOCRED, &bp);
433 if (error) {
434 brelse(bp);
435 return (0);
436 }
437 ibp = (char *)bp->b_data;
438 if (ipref) {
439 ipref %= fs->e2fs.e2fs_ipg;
440 if (isclr(ibp, ipref))
441 goto gotit;
442 }
443 start = ipref / NBBY;
444 len = howmany(fs->e2fs.e2fs_ipg - ipref, NBBY);
445 loc = skpc(0xff, len, &ibp[start]);
446 if (loc == 0) {
447 len = start + 1;
448 start = 0;
449 loc = skpc(0xff, len, &ibp[0]);
450 if (loc == 0) {
451 printf("cg = %d, ipref = %d, fs = %s\n",
452 cg, ipref, fs->e2fs_fsmnt);
453 panic("ext2fs_nodealloccg: map corrupted");
454 /* NOTREACHED */
455 }
456 }
457 i = start + len - loc;
458 map = ibp[i];
459 ipref = i * NBBY;
460 for (i = 1; i < (1 << NBBY); i <<= 1, ipref++) {
461 if ((map & i) == 0) {
462 goto gotit;
463 }
464 }
465 printf("fs = %s\n", fs->e2fs_fsmnt);
466 panic("ext2fs_nodealloccg: block not in map");
467 /* NOTREACHED */
468 gotit:
469 setbit(ibp, ipref);
470 fs->e2fs.e2fs_ficount--;
471 fs->e2fs_gd[cg].ext2bgd_nifree--;
472 fs->e2fs_fmod = 1;
473 if ((mode & IFMT) == IFDIR) {
474 fs->e2fs_gd[cg].ext2bgd_ndirs++;
475 }
476 bdwrite(bp);
477 return (cg * fs->e2fs.e2fs_ipg + ipref +1);
478 }
479
480 /*
481 * Free a block.
482 *
483 * The specified block is placed back in the
484 * free map.
485 */
486 void
ext2fs_blkfree(ip,bno)487 ext2fs_blkfree(ip, bno)
488 struct inode *ip;
489 ufs1_daddr_t bno;
490 {
491 struct m_ext2fs *fs;
492 char *bbp;
493 struct buf *bp;
494 int error, cg;
495
496 fs = ip->i_e2fs;
497 cg = dtog(fs, bno);
498 if ((u_int)bno >= fs->e2fs.e2fs_bcount) {
499 printf("bad block %d, ino %d\n", bno, ip->i_number);
500 ext2fs_fserr(fs, ip->i_e2fs_uid, "bad block");
501 return;
502 }
503 error = bread(ip->i_devvp,
504 fsbtodb(fs, fs->e2fs_gd[cg].ext2bgd_b_bitmap),
505 (int)fs->e2fs_bsize, NOCRED, &bp);
506 if (error) {
507 brelse(bp);
508 return;
509 }
510 bbp = (char *)bp->b_data;
511 bno = dtogd(fs, bno);
512 if (isclr(bbp, bno)) {
513 printf("dev = 0x%x, block = %d, fs = %s\n",
514 ip->i_dev, bno, fs->e2fs_fsmnt);
515 panic("blkfree: freeing free block");
516 }
517 clrbit(bbp, bno);
518 fs->e2fs.e2fs_fbcount++;
519 fs->e2fs_gd[cg].ext2bgd_nbfree++;
520
521 fs->e2fs_fmod = 1;
522 bdwrite(bp);
523 }
524
525 /*
526 * Free an inode.
527 *
528 * The specified inode is placed back in the free map.
529 */
530 int
ext2fs_inode_free(struct inode * pip,ino_t ino,mode_t mode)531 ext2fs_inode_free(struct inode *pip, ino_t ino, mode_t mode)
532 {
533 register struct m_ext2fs *fs;
534 register char *ibp;
535 struct buf *bp;
536 int error, cg;
537
538 fs = pip->i_e2fs;
539 if ((u_int)ino > fs->e2fs.e2fs_icount || (u_int)ino < EXT2_FIRSTINO)
540 panic("ifree: range: dev = 0x%x, ino = %d, fs = %s",
541 pip->i_dev, ino, fs->e2fs_fsmnt);
542 cg = ino_to_cg(fs, ino);
543 error = bread(pip->i_devvp,
544 fsbtodb(fs, fs->e2fs_gd[cg].ext2bgd_i_bitmap),
545 (int)fs->e2fs_bsize, NOCRED, &bp);
546 if (error) {
547 brelse(bp);
548 return (0);
549 }
550 ibp = (char *)bp->b_data;
551 ino = (ino - 1) % fs->e2fs.e2fs_ipg;
552 if (isclr(ibp, ino)) {
553 printf("dev = 0x%x, ino = %d, fs = %s\n",
554 pip->i_dev, ino, fs->e2fs_fsmnt);
555 if (fs->e2fs_ronly == 0)
556 panic("ifree: freeing free inode");
557 }
558 clrbit(ibp, ino);
559 fs->e2fs.e2fs_ficount++;
560 fs->e2fs_gd[cg].ext2bgd_nifree++;
561 if ((mode & IFMT) == IFDIR) {
562 fs->e2fs_gd[cg].ext2bgd_ndirs--;
563 }
564 fs->e2fs_fmod = 1;
565 bdwrite(bp);
566 return (0);
567 }
568
569 /*
570 * Find a block in the specified cylinder group.
571 *
572 * It is a panic if a request is made to find a block if none are
573 * available.
574 */
575
576 static ufs1_daddr_t
ext2fs_mapsearch(fs,bbp,bpref)577 ext2fs_mapsearch(fs, bbp, bpref)
578 struct m_ext2fs *fs;
579 char *bbp;
580 ufs1_daddr_t bpref;
581 {
582 ufs1_daddr_t bno;
583 int start, len, loc, i, map;
584
585 /*
586 * find the fragment by searching through the free block
587 * map for an appropriate bit pattern
588 */
589 if (bpref)
590 start = dtogd(fs, bpref) / NBBY;
591 else
592 start = 0;
593 len = howmany(fs->e2fs.e2fs_fpg, NBBY) - start;
594 loc = skpc(0xff, len, &bbp[start]);
595 if (loc == 0) {
596 len = start + 1;
597 start = 0;
598 loc = skpc(0xff, len, &bbp[start]);
599 if (loc == 0) {
600 printf("start = %d, len = %d, fs = %s\n",
601 start, len, fs->e2fs_fsmnt);
602 panic("ext2fs_alloccg: map corrupted");
603 /* NOTREACHED */
604 }
605 }
606 i = start + len - loc;
607 map = bbp[i];
608 bno = i * NBBY;
609 for (i = 1; i < (1 << NBBY); i <<= 1, bno++) {
610 if ((map & i) == 0)
611 return (bno);
612 }
613 printf("fs = %s\n", fs->e2fs_fsmnt);
614 panic("ext2fs_mapsearch: block not in map");
615 /* NOTREACHED */
616 }
617
618 /*
619 * Fserr prints the name of a file system with an error diagnostic.
620 *
621 * The form of the error message is:
622 * fs: error message
623 */
624 static void
ext2fs_fserr(fs,uid,cp)625 ext2fs_fserr(fs, uid, cp)
626 struct m_ext2fs *fs;
627 u_int uid;
628 char *cp;
629 {
630
631 log(LOG_ERR, "uid %d on %s: %s\n", uid, fs->e2fs_fsmnt, cp);
632 }
633