1 /*        $NetBSD: lfs_inode.c,v 1.160 2020/04/23 21:47:09 ad Exp $   */
2 
3 /*-
4  * Copyright (c) 1999, 2000, 2001, 2002, 2003 The NetBSD Foundation, Inc.
5  * All rights reserved.
6  *
7  * This code is derived from software contributed to The NetBSD Foundation
8  * by Konrad E. Schroder <perseant@hhhh.org>.
9  *
10  * Redistribution and use in source and binary forms, with or without
11  * modification, are permitted provided that the following conditions
12  * are met:
13  * 1. Redistributions of source code must retain the above copyright
14  *    notice, this list of conditions and the following disclaimer.
15  * 2. Redistributions in binary form must reproduce the above copyright
16  *    notice, this list of conditions and the following disclaimer in the
17  *    documentation and/or other materials provided with the distribution.
18  *
19  * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
20  * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
21  * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
22  * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
23  * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
24  * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
25  * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
26  * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
27  * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
28  * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
29  * POSSIBILITY OF SUCH DAMAGE.
30  */
31 /*
32  * Copyright (c) 1986, 1989, 1991, 1993
33  *        The Regents of the University of California.  All rights reserved.
34  *
35  * Redistribution and use in source and binary forms, with or without
36  * modification, are permitted provided that the following conditions
37  * are met:
38  * 1. Redistributions of source code must retain the above copyright
39  *    notice, this list of conditions and the following disclaimer.
40  * 2. Redistributions in binary form must reproduce the above copyright
41  *    notice, this list of conditions and the following disclaimer in the
42  *    documentation and/or other materials provided with the distribution.
43  * 3. Neither the name of the University nor the names of its contributors
44  *    may be used to endorse or promote products derived from this software
45  *    without specific prior written permission.
46  *
47  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
48  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
49  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
50  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
51  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
52  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
53  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
54  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
55  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
56  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
57  * SUCH DAMAGE.
58  *
59  *        @(#)lfs_inode.c     8.9 (Berkeley) 5/8/95
60  */
61 
62 #include <sys/cdefs.h>
63 __KERNEL_RCSID(0, "$NetBSD: lfs_inode.c,v 1.160 2020/04/23 21:47:09 ad Exp $");
64 
65 #if defined(_KERNEL_OPT)
66 #include "opt_quota.h"
67 #endif
68 
69 #include <sys/param.h>
70 #include <sys/systm.h>
71 #include <sys/mount.h>
72 #include <sys/malloc.h>
73 #include <sys/proc.h>
74 #include <sys/file.h>
75 #include <sys/buf.h>
76 #include <sys/vnode.h>
77 #include <sys/kernel.h>
78 #include <sys/trace.h>
79 #include <sys/resourcevar.h>
80 #include <sys/kauth.h>
81 
82 #include <ufs/lfs/ulfs_quotacommon.h>
83 #include <ufs/lfs/ulfs_inode.h>
84 #include <ufs/lfs/ulfsmount.h>
85 #include <ufs/lfs/ulfs_extern.h>
86 
87 #include <ufs/lfs/lfs.h>
88 #include <ufs/lfs/lfs_accessors.h>
89 #include <ufs/lfs/lfs_extern.h>
90 #include <ufs/lfs/lfs_kernel.h>
91 
92 static int lfs_update_seguse(struct lfs *, struct inode *ip, long, size_t);
93 static int lfs_indirtrunc(struct inode *, daddr_t, daddr_t,
94                                 daddr_t, int, daddr_t *, daddr_t *,
95                                 long *, size_t *);
96 static int lfs_blkfree (struct lfs *, struct inode *, daddr_t, size_t, long *, size_t *);
97 static int lfs_vtruncbuf(struct vnode *, daddr_t, bool, int);
98 
99 /* Search a block for a specific dinode. */
100 union lfs_dinode *
lfs_ifind(struct lfs * fs,ino_t ino,struct buf * bp)101 lfs_ifind(struct lfs *fs, ino_t ino, struct buf *bp)
102 {
103           union lfs_dinode *ldip;
104           unsigned num, i;
105 
106           ASSERT_NO_SEGLOCK(fs);
107           /*
108            * Read the inode block backwards, since later versions of the
109            * inode will supercede earlier ones.  Though it is unlikely, it is
110            * possible that the same inode will appear in the same inode block.
111            */
112           num = LFS_INOPB(fs);
113           for (i = num; i-- > 0; ) {
114                     ldip = DINO_IN_BLOCK(fs, bp->b_data, i);
115                     if (lfs_dino_getinumber(fs, ldip) == ino)
116                               return (ldip);
117           }
118 
119           printf("searched %u entries for %ju\n", num, (uintmax_t)ino);
120           printf("offset is 0x%jx (seg %d)\n", (uintmax_t)lfs_sb_getoffset(fs),
121                  lfs_dtosn(fs, lfs_sb_getoffset(fs)));
122           printf("block is 0x%jx (seg %d)\n",
123                  (uintmax_t)LFS_DBTOFSB(fs, bp->b_blkno),
124                  lfs_dtosn(fs, LFS_DBTOFSB(fs, bp->b_blkno)));
125 
126           return NULL;
127 }
128 
129 int
lfs_update(struct vnode * vp,const struct timespec * acc,const struct timespec * mod,int updflags)130 lfs_update(struct vnode *vp, const struct timespec *acc,
131     const struct timespec *mod, int updflags)
132 {
133           struct inode *ip;
134           struct lfs *fs = VFSTOULFS(vp->v_mount)->um_lfs;
135           int flags;
136           int error;
137 
138           ASSERT_NO_SEGLOCK(fs);
139           if (vp->v_mount->mnt_flag & MNT_RDONLY)
140                     return (0);
141           ip = VTOI(vp);
142 
143           /*
144            * If we are called from vinvalbuf, and the file's blocks have
145            * already been scheduled for writing, but the writes have not
146            * yet completed, lfs_vflush will not be called, and vinvalbuf
147            * will cause a panic.        So, we must wait until any pending write
148            * for our inode completes, if we are called with UPDATE_WAIT set.
149            */
150           mutex_enter(vp->v_interlock);
151           while ((updflags & (UPDATE_WAIT|UPDATE_DIROP)) == UPDATE_WAIT &&
152               WRITEINPROG(vp)) {
153                     DLOG((DLOG_SEG, "lfs_update: sleeping on ino %llu"
154                           " (in progress)\n", (unsigned long long) ip->i_number));
155                     cv_wait(&vp->v_cv, vp->v_interlock);
156           }
157           mutex_exit(vp->v_interlock);
158           LFS_ITIMES(ip, acc, mod, NULL);
159           if (updflags & UPDATE_CLOSE)
160                     flags = ip->i_state & (IN_MODIFIED | IN_ACCESSED | IN_CLEANING);
161           else
162                     flags = ip->i_state & (IN_MODIFIED | IN_CLEANING);
163           if (flags == 0)
164                     return (0);
165 
166           /* If sync, push back the vnode and any dirty blocks it may have. */
167           if ((updflags & (UPDATE_WAIT|UPDATE_DIROP)) == UPDATE_WAIT) {
168                     /* Avoid flushing VU_DIROP. */
169                     mutex_enter(&lfs_lock);
170                     ++fs->lfs_diropwait;
171                     while (vp->v_uflag & VU_DIROP) {
172                               DLOG((DLOG_DIROP, "lfs_update: sleeping on inode %llu "
173                                     "(dirops)\n", (unsigned long long) ip->i_number));
174                               DLOG((DLOG_DIROP, "lfs_update: vflags 0x%x, i_state"
175                                     " 0x%x\n",
176                                     vp->v_iflag | vp->v_vflag | vp->v_uflag,
177                                     ip->i_state));
178                               if (fs->lfs_dirops == 0)
179                                         break;
180                               else
181                                         mtsleep(&fs->lfs_writer, PRIBIO+1, "lfs_fsync",
182                                                   0, &lfs_lock);
183                               /* XXX KS - by falling out here, are we writing the vn
184                               twice? */
185                     }
186                     --fs->lfs_diropwait;
187                     fs->lfs_writer++;
188                     if (vp->v_uflag & VU_DIROP) {
189                               KASSERT(fs->lfs_dirops == 0);
190                               lfs_flush_fs(fs, SEGM_SYNC);
191                     }
192                     mutex_exit(&lfs_lock);
193                     error = lfs_vflush(vp);
194                     mutex_enter(&lfs_lock);
195                     if (--fs->lfs_writer == 0)
196                               cv_broadcast(&fs->lfs_diropscv);
197                     mutex_exit(&lfs_lock);
198                     return error;
199           }
200           return 0;
201 }
202 
203 #define   SINGLE    0         /* index of single indirect block */
204 #define   DOUBLE    1         /* index of double indirect block */
205 #define   TRIPLE    2         /* index of triple indirect block */
206 /*
207  * Truncate the inode oip to at most length size, freeing the
208  * disk blocks.
209  */
210 /* VOP_BWRITE 1 + ULFS_NIADDR + lfs_balloc == 2 + 2*ULFS_NIADDR times */
211 
212 int
lfs_truncate(struct vnode * ovp,off_t length,int ioflag,kauth_cred_t cred)213 lfs_truncate(struct vnode *ovp, off_t length, int ioflag, kauth_cred_t cred)
214 {
215           daddr_t lastblock;
216           struct inode *oip = VTOI(ovp);
217           daddr_t bn, lbn, lastiblock[ULFS_NIADDR], indir_lbn[ULFS_NIADDR];
218           /* note: newblks is set but only actually used if DIAGNOSTIC */
219           daddr_t newblks[ULFS_NDADDR + ULFS_NIADDR] __diagused;
220           struct lfs *fs;
221           struct buf *bp;
222           int offset, size, level;
223           daddr_t count, rcount;
224           daddr_t blocksreleased = 0, real_released = 0;
225           int i, nblocks;
226           int aflags, error, allerror = 0;
227           off_t osize;
228           long lastseg;
229           size_t bc;
230           int obufsize, odb;
231           int usepc;
232 
233           if (ovp->v_type == VCHR || ovp->v_type == VBLK ||
234               ovp->v_type == VFIFO || ovp->v_type == VSOCK) {
235                     KASSERT(oip->i_size == 0);
236                     return 0;
237           }
238 
239           if (length < 0)
240                     return (EINVAL);
241 
242           fs = oip->i_lfs;
243 
244           if (ovp->v_type == VLNK &&
245               (oip->i_size < fs->um_maxsymlinklen ||
246                (fs->um_maxsymlinklen == 0 &&
247                 lfs_dino_getblocks(fs, oip->i_din) == 0))) {
248                     KASSERTMSG((length == 0),
249                         "partial truncate of symlink: %jd", (intmax_t)length);
250                     memset((char *)SHORTLINK(oip), 0, (u_int)oip->i_size);
251                     oip->i_size = 0;
252                     lfs_dino_setsize(fs, oip->i_din, 0);
253                     oip->i_state |= IN_CHANGE | IN_UPDATE;
254                     return (lfs_update(ovp, NULL, NULL, 0));
255           }
256           if (oip->i_size == length) {
257                     /* still do a uvm_vnp_setsize() as writesize may be larger */
258                     uvm_vnp_setsize(ovp, length);
259                     oip->i_state |= IN_CHANGE | IN_UPDATE;
260                     return (lfs_update(ovp, NULL, NULL, 0));
261           }
262           lfs_imtime(fs);
263           osize = oip->i_size;
264           usepc = (ovp->v_type == VREG && ovp != fs->lfs_ivnode);
265 
266           ASSERT_NO_SEGLOCK(fs);
267           /*
268            * Lengthen the size of the file. We must ensure that the
269            * last byte of the file is allocated. Since the smallest
270            * value of osize is 0, length will be at least 1.
271            */
272           if (osize < length) {
273                     if (length > fs->um_maxfilesize)
274                               return (EFBIG);
275                     aflags = B_CLRBUF;
276                     if (ioflag & IO_SYNC)
277                               aflags |= B_SYNC;
278                     if (usepc) {
279                               if (lfs_lblkno(fs, osize) < ULFS_NDADDR &&
280                                   lfs_lblkno(fs, osize) != lfs_lblkno(fs, length) &&
281                                   lfs_blkroundup(fs, osize) != osize) {
282                                         off_t eob;
283 
284                                         eob = lfs_blkroundup(fs, osize);
285                                         uvm_vnp_setwritesize(ovp, eob);
286                                         error = ulfs_balloc_range(ovp, osize,
287                                             eob - osize, cred, aflags);
288                                         if (error) {
289                                                   (void) lfs_truncate(ovp, osize,
290                                                                 ioflag & IO_SYNC, cred);
291                                                   return error;
292                                         }
293                                         if (ioflag & IO_SYNC) {
294                                                   rw_enter(ovp->v_uobj.vmobjlock, RW_WRITER);
295                                                   VOP_PUTPAGES(ovp,
296                                                       trunc_page(osize & lfs_sb_getbmask(fs)),
297                                                       round_page(eob),
298                                                       PGO_CLEANIT | PGO_SYNCIO);
299                                         }
300                               }
301                               uvm_vnp_setwritesize(ovp, length);
302                               error = ulfs_balloc_range(ovp, length - 1, 1, cred,
303                                                              aflags);
304                               if (error) {
305                                         (void) lfs_truncate(ovp, osize,
306                                                                 ioflag & IO_SYNC, cred);
307                                         return error;
308                               }
309                               uvm_vnp_setsize(ovp, length);
310                               oip->i_state |= IN_CHANGE | IN_UPDATE;
311                               KASSERT(ovp->v_size == oip->i_size);
312                               oip->i_lfs_hiblk = lfs_lblkno(fs, oip->i_size + lfs_sb_getbsize(fs) - 1) - 1;
313                               return (lfs_update(ovp, NULL, NULL, 0));
314                     } else {
315                               error = lfs_reserve(fs, ovp, NULL,
316                                   lfs_btofsb(fs, (ULFS_NIADDR + 2) << lfs_sb_getbshift(fs)));
317                               if (error)
318                                         return (error);
319                               error = lfs_balloc(ovp, length - 1, 1, cred,
320                                                      aflags, &bp);
321                               lfs_reserve(fs, ovp, NULL,
322                                   -lfs_btofsb(fs, (ULFS_NIADDR + 2) << lfs_sb_getbshift(fs)));
323                               if (error)
324                                         return (error);
325                               oip->i_size = length;
326                               lfs_dino_setsize(fs, oip->i_din, oip->i_size);
327                               uvm_vnp_setsize(ovp, length);
328                               (void) VOP_BWRITE(bp->b_vp, bp);
329                               oip->i_state |= IN_CHANGE | IN_UPDATE;
330                               oip->i_lfs_hiblk = lfs_lblkno(fs, oip->i_size + lfs_sb_getbsize(fs) - 1) - 1;
331                               return (lfs_update(ovp, NULL, NULL, 0));
332                     }
333           }
334 
335           if ((error = lfs_reserve(fs, ovp, NULL,
336               lfs_btofsb(fs, (2 * ULFS_NIADDR + 3) << lfs_sb_getbshift(fs)))) != 0)
337                     return (error);
338 
339           /*
340            * Shorten the size of the file. If the file is not being
341            * truncated to a block boundary, the contents of the
342            * partial block following the end of the file must be
343            * zero'ed in case it ever becomes accessible again because
344            * of subsequent file growth. Directories however are not
345            * zero'ed as they should grow back initialized to empty.
346            */
347           offset = lfs_blkoff(fs, length);
348           lastseg = -1;
349           bc = 0;
350 
351           if (ovp != fs->lfs_ivnode)
352                     lfs_seglock(fs, SEGM_PROT);
353           if (offset == 0) {
354                     oip->i_size = length;
355                     lfs_dino_setsize(fs, oip->i_din, oip->i_size);
356           } else if (!usepc) {
357                     lbn = lfs_lblkno(fs, length);
358                     aflags = B_CLRBUF;
359                     if (ioflag & IO_SYNC)
360                               aflags |= B_SYNC;
361                     error = lfs_balloc(ovp, length - 1, 1, cred, aflags, &bp);
362                     if (error) {
363                               lfs_reserve(fs, ovp, NULL,
364                                   -lfs_btofsb(fs, (2 * ULFS_NIADDR + 3) << lfs_sb_getbshift(fs)));
365                               goto errout;
366                     }
367                     obufsize = bp->b_bufsize;
368                     odb = lfs_btofsb(fs, bp->b_bcount);
369                     oip->i_size = length;
370                     lfs_dino_setsize(fs, oip->i_din, oip->i_size);
371                     size = lfs_blksize(fs, oip, lbn);
372                     if (ovp->v_type != VDIR)
373                               memset((char *)bp->b_data + offset, 0,
374                                      (u_int)(size - offset));
375                     allocbuf(bp, size, 1);
376                     if ((bp->b_flags & B_LOCKED) != 0 && bp->b_iodone == NULL) {
377                               mutex_enter(&lfs_lock);
378                               locked_queue_bytes -= obufsize - bp->b_bufsize;
379                               mutex_exit(&lfs_lock);
380                     }
381                     if (bp->b_oflags & BO_DELWRI) {
382                               lfs_sb_addavail(fs, odb - lfs_btofsb(fs, size));
383                               /* XXX shouldn't this wake up on lfs_availsleep? */
384                     }
385                     (void) VOP_BWRITE(bp->b_vp, bp);
386           } else { /* vp->v_type == VREG && length < osize && offset != 0 */
387                     /*
388                      * When truncating a regular file down to a non-block-aligned
389                      * size, we must zero the part of last block which is past
390                      * the new EOF.  We must synchronously flush the zeroed pages
391                      * to disk since the new pages will be invalidated as soon
392                      * as we inform the VM system of the new, smaller size.
393                      * We must do this before acquiring the GLOCK, since fetching
394                      * the pages will acquire the GLOCK internally.
395                      * So there is a window where another thread could see a whole
396                      * zeroed page past EOF, but that's life.
397                      */
398                     daddr_t xlbn;
399                     voff_t eoz;
400 
401                     aflags = ioflag & IO_SYNC ? B_SYNC : 0;
402                     error = ulfs_balloc_range(ovp, length - 1, 1, cred, aflags);
403                     if (error) {
404                               lfs_reserve(fs, ovp, NULL,
405                                             -lfs_btofsb(fs, (2 * ULFS_NIADDR + 3) << lfs_sb_getbshift(fs)));
406                               goto errout;
407                     }
408                     xlbn = lfs_lblkno(fs, length);
409                     size = lfs_blksize(fs, oip, xlbn);
410                     eoz = MIN(lfs_lblktosize(fs, xlbn) + size, osize);
411                     ubc_zerorange(&ovp->v_uobj, length, eoz - length,
412                         UBC_VNODE_FLAGS(ovp));
413                     if (round_page(eoz) > round_page(length)) {
414                               rw_enter(ovp->v_uobj.vmobjlock, RW_WRITER);
415                               error = VOP_PUTPAGES(ovp, round_page(length),
416                                   round_page(eoz),
417                                   PGO_CLEANIT | PGO_DEACTIVATE |
418                                   ((ioflag & IO_SYNC) ? PGO_SYNCIO : 0));
419                               if (error) {
420                                         lfs_reserve(fs, ovp, NULL,
421                                                       -lfs_btofsb(fs, (2 * ULFS_NIADDR + 3) << lfs_sb_getbshift(fs)));
422                                         goto errout;
423                               }
424                     }
425           }
426 
427           genfs_node_wrlock(ovp);
428 
429           oip->i_size = length;
430           lfs_dino_setsize(fs, oip->i_din, oip->i_size);
431           uvm_vnp_setsize(ovp, length);
432 
433           /*
434            * Calculate index into inode's block list of
435            * last direct and indirect blocks (if any)
436            * which we want to keep.  Lastblock is -1 when
437            * the file is truncated to 0.
438            */
439           /* Avoid sign overflow - XXX assumes that off_t is a quad_t. */
440           if (length > QUAD_MAX - lfs_sb_getbsize(fs))
441                     lastblock = lfs_lblkno(fs, QUAD_MAX - lfs_sb_getbsize(fs));
442           else
443                     lastblock = lfs_lblkno(fs, length + lfs_sb_getbsize(fs) - 1) - 1;
444           lastiblock[SINGLE] = lastblock - ULFS_NDADDR;
445           lastiblock[DOUBLE] = lastiblock[SINGLE] - LFS_NINDIR(fs);
446           lastiblock[TRIPLE] = lastiblock[DOUBLE] - LFS_NINDIR(fs) * LFS_NINDIR(fs);
447           nblocks = lfs_btofsb(fs, lfs_sb_getbsize(fs));
448           /*
449            * Record changed file and block pointers before we start
450            * freeing blocks.  lastiblock values are also normalized to -1
451            * for calls to lfs_indirtrunc below.
452            */
453           for (i=0; i<ULFS_NDADDR; i++) {
454                     newblks[i] = lfs_dino_getdb(fs, oip->i_din, i);
455           }
456           for (i=0; i<ULFS_NIADDR; i++) {
457                     newblks[ULFS_NDADDR + i] = lfs_dino_getib(fs, oip->i_din, i);
458           }
459           for (level = TRIPLE; level >= SINGLE; level--)
460                     if (lastiblock[level] < 0) {
461                               newblks[ULFS_NDADDR+level] = 0;
462                               lastiblock[level] = -1;
463                     }
464           for (i = ULFS_NDADDR - 1; i > lastblock; i--)
465                     newblks[i] = 0;
466 
467           oip->i_size = osize;
468           lfs_dino_setsize(fs, oip->i_din, oip->i_size);
469           error = lfs_vtruncbuf(ovp, lastblock + 1, false, 0);
470           if (error && !allerror)
471                     allerror = error;
472 
473           /*
474            * Indirect blocks first.
475            */
476           indir_lbn[SINGLE] = -ULFS_NDADDR;
477           indir_lbn[DOUBLE] = indir_lbn[SINGLE] - LFS_NINDIR(fs) - 1;
478           indir_lbn[TRIPLE] = indir_lbn[DOUBLE] - LFS_NINDIR(fs) * LFS_NINDIR(fs) - 1;
479           for (level = TRIPLE; level >= SINGLE; level--) {
480                     bn = lfs_dino_getib(fs, oip->i_din, level);
481                     if (bn != 0) {
482                               error = lfs_indirtrunc(oip, indir_lbn[level],
483                                                          bn, lastiblock[level],
484                                                          level, &count, &rcount,
485                                                          &lastseg, &bc);
486                               if (error)
487                                         allerror = error;
488                               real_released += rcount;
489                               blocksreleased += count;
490                               if (lastiblock[level] < 0) {
491                                         if (lfs_dino_getib(fs, oip->i_din, level) > 0)
492                                                   real_released += nblocks;
493                                         blocksreleased += nblocks;
494                                         lfs_dino_setib(fs, oip->i_din, level, 0);
495                                         lfs_blkfree(fs, oip, bn, lfs_sb_getbsize(fs),
496                                                       &lastseg, &bc);
497                               lfs_deregister_block(ovp, bn);
498                               }
499                     }
500                     if (lastiblock[level] >= 0)
501                               goto done;
502           }
503 
504           /*
505            * All whole direct blocks or frags.
506            */
507           for (i = ULFS_NDADDR - 1; i > lastblock; i--) {
508                     long bsize, obsize;
509 
510                     bn = lfs_dino_getdb(fs, oip->i_din, i);
511                     if (bn == 0)
512                               continue;
513                     bsize = lfs_blksize(fs, oip, i);
514                     if (lfs_dino_getdb(fs, oip->i_din, i) > 0) {
515                               /* Check for fragment size changes */
516                               obsize = oip->i_lfs_fragsize[i];
517                               real_released += lfs_btofsb(fs, obsize);
518                               oip->i_lfs_fragsize[i] = 0;
519                     } else
520                               obsize = 0;
521                     blocksreleased += lfs_btofsb(fs, bsize);
522                     lfs_dino_setdb(fs, oip->i_din, i, 0);
523                     lfs_blkfree(fs, oip, bn, obsize, &lastseg, &bc);
524           lfs_deregister_block(ovp, bn);
525           }
526           if (lastblock < 0)
527                     goto done;
528 
529           /*
530            * Finally, look for a change in size of the
531            * last direct block; release any frags.
532            */
533           bn = lfs_dino_getdb(fs, oip->i_din, lastblock);
534           if (bn != 0) {
535                     long oldspace, newspace;
536 #if 0
537                     long olddspace;
538 #endif
539 
540                     /*
541                      * Calculate amount of space we're giving
542                      * back as old block size minus new block size.
543                      */
544                     oldspace = lfs_blksize(fs, oip, lastblock);
545 #if 0
546                     olddspace = oip->i_lfs_fragsize[lastblock];
547 #endif
548 
549                     oip->i_size = length;
550                     lfs_dino_setsize(fs, oip->i_din, oip->i_size);
551                     newspace = lfs_blksize(fs, oip, lastblock);
552                     if (newspace == 0)
553                               panic("itrunc: newspace");
554                     if (oldspace - newspace > 0) {
555                               blocksreleased += lfs_btofsb(fs, oldspace - newspace);
556                     }
557 #if 0
558                     if (bn > 0 && olddspace - newspace > 0) {
559                               /* No segment accounting here, just vnode */
560                               real_released += lfs_btofsb(fs, olddspace - newspace);
561                     }
562 #endif
563           }
564 
565 done:
566           /* Finish segment accounting corrections */
567           lfs_update_seguse(fs, oip, lastseg, bc);
568           for (level = SINGLE; level <= TRIPLE; level++)
569                     KASSERTMSG(((newblks[ULFS_NDADDR + level] == 0) ==
570                               (lfs_dino_getib(fs, oip->i_din, level) == 0)),
571                         "lfs itrunc1");
572           for (i = 0; i < ULFS_NDADDR; i++)
573                     KASSERTMSG(((newblks[i] == 0) ==
574                               (lfs_dino_getdb(fs, oip->i_din, i) == 0)),
575                         "lfs itrunc2");
576           KASSERTMSG((length != 0 || LIST_EMPTY(&ovp->v_cleanblkhd)),
577               "lfs itrunc3a");
578           KASSERTMSG((length != 0 || LIST_EMPTY(&ovp->v_dirtyblkhd)),
579               "lfs itrunc3b");
580 
581           /*
582            * Put back the real size.
583            */
584           oip->i_size = length;
585           lfs_dino_setsize(fs, oip->i_din, oip->i_size);
586           oip->i_lfs_effnblks -= blocksreleased;
587 
588           mutex_enter(&lfs_lock);
589           lfs_dino_setblocks(fs, oip->i_din,
590               lfs_dino_getblocks(fs, oip->i_din) - real_released);
591           lfs_sb_addbfree(fs, blocksreleased);
592 
593           KASSERTMSG((oip->i_size != 0 ||
594                     lfs_dino_getblocks(fs, oip->i_din) == 0),
595               "ino %llu truncate to 0 but %jd blks/%jd effblks",
596               (unsigned long long) oip->i_number,
597               lfs_dino_getblocks(fs, oip->i_din), oip->i_lfs_effnblks);
598           KASSERTMSG((oip->i_size != 0 || oip->i_lfs_effnblks == 0),
599               "ino %llu truncate to 0 but %jd blks/%jd effblks",
600               (unsigned long long) oip->i_number,
601               lfs_dino_getblocks(fs, oip->i_din), oip->i_lfs_effnblks);
602 
603           /*
604            * If we truncated to zero, take us off the paging queue.
605            */
606           if (oip->i_size == 0 && oip->i_state & IN_PAGING) {
607                     oip->i_state &= ~IN_PAGING;
608                     TAILQ_REMOVE(&fs->lfs_pchainhd, oip, i_lfs_pchain);
609           }
610           mutex_exit(&lfs_lock);
611 
612           oip->i_state |= IN_CHANGE;
613 #if defined(LFS_QUOTA) || defined(LFS_QUOTA2)
614           (void) lfs_chkdq(oip, -blocksreleased, NOCRED, 0);
615 #endif
616           lfs_reserve(fs, ovp, NULL,
617               -lfs_btofsb(fs, (2 * ULFS_NIADDR + 3) << lfs_sb_getbshift(fs)));
618           genfs_node_unlock(ovp);
619   errout:
620           oip->i_lfs_hiblk = lfs_lblkno(fs, oip->i_size + lfs_sb_getbsize(fs) - 1) - 1;
621           if (ovp != fs->lfs_ivnode)
622                     lfs_segunlock(fs);
623           return (allerror ? allerror : error);
624 }
625 
626 /* Update segment and avail usage information when removing a block. */
627 static int
lfs_blkfree(struct lfs * fs,struct inode * ip,daddr_t daddr,size_t bsize,long * lastseg,size_t * num)628 lfs_blkfree(struct lfs *fs, struct inode *ip, daddr_t daddr,
629               size_t bsize, long *lastseg, size_t *num)
630 {
631           long seg;
632           int error = 0;
633 
634           ASSERT_SEGLOCK(fs);
635           bsize = lfs_fragroundup(fs, bsize);
636           if (daddr > 0) {
637                     if (*lastseg != (seg = lfs_dtosn(fs, daddr))) {
638                               error = lfs_update_seguse(fs, ip, *lastseg, *num);
639                               *num = bsize;
640                               *lastseg = seg;
641                     } else
642                               *num += bsize;
643           }
644 
645           return error;
646 }
647 
648 /* Finish the accounting updates for a segment. */
649 static int
lfs_update_seguse(struct lfs * fs,struct inode * ip,long lastseg,size_t num)650 lfs_update_seguse(struct lfs *fs, struct inode *ip, long lastseg, size_t num)
651 {
652           struct segdelta *sd;
653 
654           ASSERT_SEGLOCK(fs);
655           if (lastseg < 0 || num == 0)
656                     return 0;
657 
658           LIST_FOREACH(sd, &ip->i_lfs_segdhd, list)
659                     if (sd->segnum == lastseg)
660                               break;
661           if (sd == NULL) {
662                     sd = malloc(sizeof(*sd), M_SEGMENT, M_WAITOK);
663                     sd->segnum = lastseg;
664                     sd->num = 0;
665                     LIST_INSERT_HEAD(&ip->i_lfs_segdhd, sd, list);
666           }
667           sd->num += num;
668 
669           return 0;
670 }
671 
672 static void
lfs_finalize_seguse(struct lfs * fs,void * v)673 lfs_finalize_seguse(struct lfs *fs, void *v)
674 {
675           SEGUSE *sup;
676           struct buf *bp;
677           struct segdelta *sd;
678           LIST_HEAD(, segdelta) *hd = v;
679 
680           ASSERT_SEGLOCK(fs);
681           while((sd = LIST_FIRST(hd)) != NULL) {
682                     LIST_REMOVE(sd, list);
683                     LFS_SEGENTRY(sup, fs, sd->segnum, bp);
684                     if (sd->num > sup->su_nbytes) {
685                               printf("lfs_finalize_seguse: segment %ld short by %ld\n",
686                                         sd->segnum, (long)(sd->num - sup->su_nbytes));
687                               panic("lfs_finalize_seguse: negative bytes");
688                               sup->su_nbytes = sd->num;
689                     }
690                     sup->su_nbytes -= sd->num;
691                     LFS_WRITESEGENTRY(sup, fs, sd->segnum, bp);
692                     free(sd, M_SEGMENT);
693           }
694 }
695 
696 /* Finish the accounting updates for a segment. */
697 void
lfs_finalize_ino_seguse(struct lfs * fs,struct inode * ip)698 lfs_finalize_ino_seguse(struct lfs *fs, struct inode *ip)
699 {
700           ASSERT_SEGLOCK(fs);
701           lfs_finalize_seguse(fs, &ip->i_lfs_segdhd);
702 }
703 
704 /* Finish the accounting updates for a segment. */
705 void
lfs_finalize_fs_seguse(struct lfs * fs)706 lfs_finalize_fs_seguse(struct lfs *fs)
707 {
708           ASSERT_SEGLOCK(fs);
709           lfs_finalize_seguse(fs, &fs->lfs_segdhd);
710 }
711 
712 /*
713  * Release blocks associated with the inode ip and stored in the indirect
714  * block bn.  Blocks are free'd in LIFO order up to (but not including)
715  * lastbn.  If level is greater than SINGLE, the block is an indirect block
716  * and recursive calls to indirtrunc must be used to cleanse other indirect
717  * blocks.
718  *
719  * NB: triple indirect blocks are untested.
720  */
721 static int
lfs_indirtrunc(struct inode * ip,daddr_t lbn,daddr_t dbn,daddr_t lastbn,int level,daddr_t * countp,daddr_t * rcountp,long * lastsegp,size_t * bcp)722 lfs_indirtrunc(struct inode *ip, daddr_t lbn, daddr_t dbn,
723                  daddr_t lastbn, int level, daddr_t *countp,
724                  daddr_t *rcountp, long *lastsegp, size_t *bcp)
725 {
726           int i;
727           struct buf *bp;
728           struct lfs *fs = ip->i_lfs;
729           void *bap;
730           bool bap_needs_free;
731           struct vnode *vp;
732           daddr_t nb, nlbn, last;
733           daddr_t blkcount, rblkcount, factor;
734           int nblocks;
735           daddr_t blocksreleased = 0, real_released = 0;
736           int error = 0, allerror = 0;
737 
738           ASSERT_SEGLOCK(fs);
739           /*
740            * Calculate index in current block of last
741            * block to be kept.  -1 indicates the entire
742            * block so we need not calculate the index.
743            */
744           factor = 1;
745           for (i = SINGLE; i < level; i++)
746                     factor *= LFS_NINDIR(fs);
747           last = lastbn;
748           if (lastbn > 0)
749                     last /= factor;
750           nblocks = lfs_btofsb(fs, lfs_sb_getbsize(fs));
751           /*
752            * Get buffer of block pointers, zero those entries corresponding
753            * to blocks to be free'd, and update on disk copy first.  Since
754            * double(triple) indirect before single(double) indirect, calls
755            * to bmap on these blocks will fail.  However, we already have
756            * the on disk address, so we have to set the b_blkno field
757            * explicitly instead of letting bread do everything for us.
758            */
759           vp = ITOV(ip);
760           bp = getblk(vp, lbn, lfs_sb_getbsize(fs), 0, 0);
761           if (bp->b_oflags & (BO_DONE | BO_DELWRI)) {
762                     /* Braces must be here in case trace evaluates to nothing. */
763                     trace(TR_BREADHIT, pack(vp, lfs_sb_getbsize(fs)), lbn);
764           } else {
765                     trace(TR_BREADMISS, pack(vp, lfs_sb_getbsize(fs)), lbn);
766                     curlwp->l_ru.ru_inblock++; /* pay for read */
767                     bp->b_flags |= B_READ;
768                     if (bp->b_bcount > bp->b_bufsize)
769                               panic("lfs_indirtrunc: bad buffer size");
770                     bp->b_blkno = LFS_FSBTODB(fs, dbn);
771                     VOP_STRATEGY(vp, bp);
772                     error = biowait(bp);
773           }
774           if (error) {
775                     brelse(bp, 0);
776                     *countp = *rcountp = 0;
777                     return (error);
778           }
779 
780           if (lastbn >= 0) {
781                     /*
782                      * We still need this block, so copy the data for
783                      * subsequent processing; then in the original block,
784                      * zero out the dying block pointers and send it off.
785                      */
786                     bap = lfs_malloc(fs, lfs_sb_getbsize(fs), LFS_NB_IBLOCK);
787                     memcpy(bap, bp->b_data, lfs_sb_getbsize(fs));
788                     bap_needs_free = true;
789 
790                     for (i = last + 1; i < LFS_NINDIR(fs); i++) {
791                               lfs_iblock_set(fs, bp->b_data, i, 0);
792                     }
793                     error = VOP_BWRITE(bp->b_vp, bp);
794                     if (error)
795                               allerror = error;
796           } else {
797                     bap = bp->b_data;
798                     bap_needs_free = false;
799           }
800 
801           /*
802            * Recursively free totally unused blocks.
803            */
804           for (i = LFS_NINDIR(fs) - 1, nlbn = lbn + 1 - i * factor; i > last;
805               i--, nlbn += factor) {
806                     nb = lfs_iblock_get(fs, bap, i);
807                     if (nb == 0)
808                               continue;
809                     if (level > SINGLE) {
810                               error = lfs_indirtrunc(ip, nlbn, nb,
811                                                          (daddr_t)-1, level - 1,
812                                                          &blkcount, &rblkcount,
813                                                          lastsegp, bcp);
814                               if (error)
815                                         allerror = error;
816                               blocksreleased += blkcount;
817                               real_released += rblkcount;
818                     }
819                     lfs_blkfree(fs, ip, nb, lfs_sb_getbsize(fs), lastsegp, bcp);
820                     if (lfs_iblock_get(fs, bap, i) > 0)
821                               real_released += nblocks;
822                     blocksreleased += nblocks;
823           }
824 
825           /*
826            * Recursively free last partial block.
827            */
828           if (level > SINGLE && lastbn >= 0) {
829                     last = lastbn % factor;
830                     nb = lfs_iblock_get(fs, bap, i);
831                     if (nb != 0) {
832                               error = lfs_indirtrunc(ip, nlbn, nb,
833                                                          last, level - 1, &blkcount,
834                                                          &rblkcount, lastsegp, bcp);
835                               if (error)
836                                         allerror = error;
837                               real_released += rblkcount;
838                               blocksreleased += blkcount;
839                     }
840           }
841 
842           if (bap_needs_free) {
843                     lfs_free(fs, bap, LFS_NB_IBLOCK);
844           } else {
845                     mutex_enter(&bufcache_lock);
846                     if (bp->b_oflags & BO_DELWRI) {
847                               LFS_UNLOCK_BUF(bp);
848                               lfs_sb_addavail(fs, lfs_btofsb(fs, bp->b_bcount));
849                               wakeup(&fs->lfs_availsleep);
850                     }
851                     brelsel(bp, BC_INVAL);
852                     mutex_exit(&bufcache_lock);
853           }
854 
855           *countp = blocksreleased;
856           *rcountp = real_released;
857           return (allerror);
858 }
859 
860 /*
861  * Destroy any in core blocks past the truncation length.
862  * Inlined from vtruncbuf, so that lfs_avail could be updated.
863  * We take the seglock to prevent cleaning from occurring while we are
864  * invalidating blocks.
865  */
866 static int
lfs_vtruncbuf(struct vnode * vp,daddr_t lbn,bool catch,int slptimeo)867 lfs_vtruncbuf(struct vnode *vp, daddr_t lbn, bool catch, int slptimeo)
868 {
869           struct buf *bp, *nbp;
870           int error = 0;
871           struct lfs *fs;
872           voff_t off;
873 
874           off = round_page((voff_t)lbn << vp->v_mount->mnt_fs_bshift);
875           rw_enter(vp->v_uobj.vmobjlock, RW_WRITER);
876           error = VOP_PUTPAGES(vp, off, 0, PGO_FREE | PGO_SYNCIO);
877           if (error)
878                     return error;
879 
880           fs = VTOI(vp)->i_lfs;
881 
882           ASSERT_SEGLOCK(fs);
883 
884           mutex_enter(&bufcache_lock);
885 restart:
886           for (bp = LIST_FIRST(&vp->v_cleanblkhd); bp; bp = nbp) {
887                     nbp = LIST_NEXT(bp, b_vnbufs);
888                     if (bp->b_lblkno < lbn)
889                               continue;
890                     error = bbusy(bp, catch, slptimeo, NULL);
891                     if (error == EPASSTHROUGH)
892                               goto restart;
893                     if (error)
894                               goto exit;
895 
896                     mutex_enter(bp->b_objlock);
897                     if (bp->b_oflags & BO_DELWRI) {
898                               bp->b_oflags &= ~BO_DELWRI;
899                               lfs_sb_addavail(fs, lfs_btofsb(fs, bp->b_bcount));
900                               wakeup(&fs->lfs_availsleep);
901                     }
902                     mutex_exit(bp->b_objlock);
903                     LFS_UNLOCK_BUF(bp);
904                     brelsel(bp, BC_INVAL | BC_VFLUSH);
905           }
906 
907           for (bp = LIST_FIRST(&vp->v_dirtyblkhd); bp; bp = nbp) {
908                     nbp = LIST_NEXT(bp, b_vnbufs);
909                     if (bp->b_lblkno < lbn)
910                               continue;
911                     error = bbusy(bp, catch, slptimeo, NULL);
912                     if (error == EPASSTHROUGH)
913                               goto restart;
914                     if (error)
915                               goto exit;
916 
917                     mutex_enter(bp->b_objlock);
918                     if (bp->b_oflags & BO_DELWRI) {
919                               bp->b_oflags &= ~BO_DELWRI;
920                               lfs_sb_addavail(fs, lfs_btofsb(fs, bp->b_bcount));
921                               wakeup(&fs->lfs_availsleep);
922                     }
923                     mutex_exit(bp->b_objlock);
924                     LFS_UNLOCK_BUF(bp);
925                     brelsel(bp, BC_INVAL | BC_VFLUSH);
926           }
927 exit:
928           mutex_exit(&bufcache_lock);
929 
930           return error;
931 }
932 
933