1 /*
2 * Copyright (C) 1984-2002 Mark Nudelman
3 *
4 * You may distribute under the terms of either the GNU General Public
5 * License or the Less License, as specified in the README file.
6 *
7 * For more information about less, or for information on how to
8 * contact the author, see the README file.
9 */
10
11
12 /*
13 * Low level character input from the input file.
14 * We use these special purpose routines which optimize moving
15 * both forward and backward from the current read pointer.
16 */
17
18 #include "less.h"
19 #if MSDOS_COMPILER==WIN32C
20 #include <errno.h>
21 #include <windows.h>
22 #endif
23
24 typedef POSITION BLOCKNUM;
25
26 public int ignore_eoi;
27
28 /*
29 * Pool of buffers holding the most recently used blocks of the input file.
30 * The buffer pool is kept as a doubly-linked circular list,
31 * in order from most- to least-recently used.
32 * The circular list is anchored by the file state "thisfile".
33 */
34 #define LBUFSIZE 8192
35 struct buf {
36 struct buf *next, *prev;
37 struct buf *hnext, *hprev;
38 BLOCKNUM block;
39 unsigned int datasize;
40 unsigned char data[LBUFSIZE];
41 };
42
43 struct buflist {
44 /* -- Following members must match struct buf */
45 struct buf *buf_next, *buf_prev;
46 struct buf *buf_hnext, *buf_hprev;
47 };
48
49 /*
50 * The file state is maintained in a filestate structure.
51 * A pointer to the filestate is kept in the ifile structure.
52 */
53 #define BUFHASH_SIZE 64
54 struct filestate {
55 struct buf *buf_next, *buf_prev;
56 struct buflist hashtbl[BUFHASH_SIZE];
57 int file;
58 int flags;
59 POSITION fpos;
60 int nbufs;
61 BLOCKNUM block;
62 unsigned int offset;
63 POSITION fsize;
64 };
65
66 #define ch_bufhead thisfile->buf_next
67 #define ch_buftail thisfile->buf_prev
68 #define ch_nbufs thisfile->nbufs
69 #define ch_block thisfile->block
70 #define ch_offset thisfile->offset
71 #define ch_fpos thisfile->fpos
72 #define ch_fsize thisfile->fsize
73 #define ch_flags thisfile->flags
74 #define ch_file thisfile->file
75
76 #define END_OF_CHAIN ((struct buf *)&thisfile->buf_next)
77 #define END_OF_HCHAIN(h) ((struct buf *)&thisfile->hashtbl[h])
78 #define BUFHASH(blk) ((blk) & (BUFHASH_SIZE-1))
79
80 #define FOR_BUFS_IN_CHAIN(h,bp) \
81 for (bp = thisfile->hashtbl[h].buf_hnext; \
82 bp != END_OF_HCHAIN(h); bp = bp->hnext)
83
84 #define HASH_RM(bp) \
85 (bp)->hnext->hprev = (bp)->hprev; \
86 (bp)->hprev->hnext = (bp)->hnext;
87
88 #define HASH_INS(bp,h) \
89 (bp)->hnext = thisfile->hashtbl[h].buf_hnext; \
90 (bp)->hprev = END_OF_HCHAIN(h); \
91 thisfile->hashtbl[h].buf_hnext->hprev = (bp); \
92 thisfile->hashtbl[h].buf_hnext = (bp);
93
94 static struct filestate *thisfile;
95 static int ch_ungotchar = -1;
96 static int maxbufs = -1;
97
98 extern int autobuf;
99 extern int sigs;
100 extern int secure;
101 extern IFILE curr_ifile;
102 #if LOGFILE
103 extern int logfile;
104 extern char *namelogfile;
105 #endif
106
107 static int ch_addbuf();
108
109
110 /*
111 * Get the character pointed to by the read pointer.
112 * ch_get() is a macro which is more efficient to call
113 * than fch_get (the function), in the usual case
114 * that the block desired is at the head of the chain.
115 */
116 #define ch_get() ((ch_block == ch_bufhead->block && \
117 ch_offset < ch_bufhead->datasize) ? \
118 ch_bufhead->data[ch_offset] : fch_get())
119 int
fch_get()120 fch_get()
121 {
122 register struct buf *bp;
123 register int n;
124 register int slept;
125 register int h;
126 POSITION pos;
127 POSITION len;
128
129 slept = FALSE;
130
131 /*
132 * Look for a buffer holding the desired block.
133 */
134 h = BUFHASH(ch_block);
135 FOR_BUFS_IN_CHAIN(h, bp)
136 {
137 if (bp->block == ch_block)
138 {
139 if (ch_offset >= bp->datasize)
140 /*
141 * Need more data in this buffer.
142 */
143 goto read_more;
144 goto found;
145 }
146 }
147 /*
148 * Block is not in a buffer.
149 * Take the least recently used buffer
150 * and read the desired block into it.
151 * If the LRU buffer has data in it,
152 * then maybe allocate a new buffer.
153 */
154 if (ch_buftail == END_OF_CHAIN || ch_buftail->block != -1)
155 {
156 /*
157 * There is no empty buffer to use.
158 * Allocate a new buffer if:
159 * 1. We can't seek on this file and -b is not in effect; or
160 * 2. We haven't allocated the max buffers for this file yet.
161 */
162 if ((autobuf && !(ch_flags & CH_CANSEEK)) ||
163 (maxbufs < 0 || ch_nbufs < maxbufs))
164 if (ch_addbuf())
165 /*
166 * Allocation failed: turn off autobuf.
167 */
168 autobuf = OPT_OFF;
169 }
170 bp = ch_buftail;
171 HASH_RM(bp); /* Remove from old hash chain. */
172 bp->block = ch_block;
173 bp->datasize = 0;
174 HASH_INS(bp, h); /* Insert into new hash chain. */
175
176 read_more:
177 pos = (ch_block * LBUFSIZE) + bp->datasize;
178 if ((len = ch_length()) != NULL_POSITION && pos >= len)
179 /*
180 * At end of file.
181 */
182 return (EOI);
183
184 if (pos != ch_fpos)
185 {
186 /*
187 * Not at the correct position: must seek.
188 * If input is a pipe, we're in trouble (can't seek on a pipe).
189 * Some data has been lost: just return "?".
190 */
191 if (!(ch_flags & CH_CANSEEK))
192 return ('?');
193 if (lseek(ch_file, (off_t)pos, 0) == BAD_LSEEK)
194 {
195 error("seek error", NULL_PARG);
196 clear_eol();
197 return (EOI);
198 }
199 ch_fpos = pos;
200 }
201
202 /*
203 * Read the block.
204 * If we read less than a full block, that's ok.
205 * We use partial block and pick up the rest next time.
206 */
207 if (ch_ungotchar != -1)
208 {
209 bp->data[bp->datasize] = ch_ungotchar;
210 n = 1;
211 ch_ungotchar = -1;
212 } else
213 {
214 n = iread(ch_file, &bp->data[bp->datasize],
215 (unsigned int)(LBUFSIZE - bp->datasize));
216 }
217
218 if (n == READ_INTR)
219 return (EOI);
220 if (n < 0)
221 {
222 #if MSDOS_COMPILER==WIN32C
223 if (errno != EPIPE)
224 #endif
225 {
226 error("read error", NULL_PARG);
227 clear_eol();
228 }
229 n = 0;
230 }
231
232 #if LOGFILE
233 /*
234 * If we have a log file, write the new data to it.
235 */
236 if (!secure && logfile >= 0 && n > 0)
237 write(logfile, (char *) &bp->data[bp->datasize], n);
238 #endif
239
240 ch_fpos += n;
241 bp->datasize += n;
242
243 /*
244 * If we have read to end of file, set ch_fsize to indicate
245 * the position of the end of file.
246 */
247 if (n == 0)
248 {
249 ch_fsize = pos;
250 if (ignore_eoi)
251 {
252 /*
253 * We are ignoring EOF.
254 * Wait a while, then try again.
255 */
256 if (!slept)
257 {
258 PARG parg;
259 parg.p_string = wait_message();
260 ierror("%s", &parg);
261 }
262 #if !MSDOS_COMPILER
263 sleep(1);
264 #else
265 #if MSDOS_COMPILER==WIN32C
266 Sleep(1000);
267 #endif
268 #endif
269 slept = TRUE;
270 }
271 if (sigs)
272 return (EOI);
273 }
274
275 found:
276 if (ch_bufhead != bp)
277 {
278 /*
279 * Move the buffer to the head of the buffer chain.
280 * This orders the buffer chain, most- to least-recently used.
281 */
282 bp->next->prev = bp->prev;
283 bp->prev->next = bp->next;
284 bp->next = ch_bufhead;
285 bp->prev = END_OF_CHAIN;
286 ch_bufhead->prev = bp;
287 ch_bufhead = bp;
288
289 /*
290 * Move to head of hash chain too.
291 */
292 HASH_RM(bp);
293 HASH_INS(bp, h);
294 }
295
296 if (ch_offset >= bp->datasize)
297 /*
298 * After all that, we still don't have enough data.
299 * Go back and try again.
300 */
301 goto read_more;
302
303 return (bp->data[ch_offset]);
304 }
305
306 /*
307 * ch_ungetchar is a rather kludgy and limited way to push
308 * a single char onto an input file descriptor.
309 */
310 public void
ch_ungetchar(c)311 ch_ungetchar(c)
312 int c;
313 {
314 if (c != -1 && ch_ungotchar != -1)
315 error("ch_ungetchar overrun", NULL_PARG);
316 ch_ungotchar = c;
317 }
318
319 #if LOGFILE
320 /*
321 * Close the logfile.
322 * If we haven't read all of standard input into it, do that now.
323 */
324 public void
end_logfile()325 end_logfile()
326 {
327 static int tried = FALSE;
328
329 if (logfile < 0)
330 return;
331 if (!tried && ch_fsize == NULL_POSITION)
332 {
333 tried = TRUE;
334 ierror("Finishing logfile", NULL_PARG);
335 while (ch_forw_get() != EOI)
336 if (ABORT_SIGS())
337 break;
338 }
339 close(logfile);
340 logfile = -1;
341 namelogfile = NULL;
342 }
343
344 /*
345 * Start a log file AFTER less has already been running.
346 * Invoked from the - command; see toggle_option().
347 * Write all the existing buffered data to the log file.
348 */
349 public void
sync_logfile()350 sync_logfile()
351 {
352 register struct buf *bp;
353 int warned = FALSE;
354 BLOCKNUM block;
355 BLOCKNUM nblocks;
356
357 nblocks = (ch_fpos + LBUFSIZE - 1) / LBUFSIZE;
358 for (block = 0; block < nblocks; block++)
359 {
360 for (bp = ch_bufhead; ; bp = bp->next)
361 {
362 if (bp == END_OF_CHAIN)
363 {
364 if (!warned)
365 {
366 error("Warning: log file is incomplete",
367 NULL_PARG);
368 warned = TRUE;
369 }
370 break;
371 }
372 if (bp->block == block)
373 {
374 write(logfile, (char *) bp->data, bp->datasize);
375 break;
376 }
377 }
378 }
379 }
380
381 #endif
382
383 /*
384 * Determine if a specific block is currently in one of the buffers.
385 */
386 static int
buffered(block)387 buffered(block)
388 BLOCKNUM block;
389 {
390 register struct buf *bp;
391 register int h;
392
393 h = BUFHASH(block);
394 FOR_BUFS_IN_CHAIN(h, bp)
395 {
396 if (bp->block == block)
397 return (TRUE);
398 }
399 return (FALSE);
400 }
401
402 /*
403 * Seek to a specified position in the file.
404 * Return 0 if successful, non-zero if can't seek there.
405 */
406 public int
ch_seek(pos)407 ch_seek(pos)
408 register POSITION pos;
409 {
410 BLOCKNUM new_block;
411 POSITION len;
412
413 len = ch_length();
414 if (pos < ch_zero() || (len != NULL_POSITION && pos > len))
415 return (1);
416
417 new_block = pos / LBUFSIZE;
418 if (!(ch_flags & CH_CANSEEK) && pos != ch_fpos && !buffered(new_block))
419 {
420 if (ch_fpos > pos)
421 return (1);
422 while (ch_fpos < pos)
423 {
424 if (ch_forw_get() == EOI)
425 return (1);
426 if (ABORT_SIGS())
427 return (1);
428 }
429 return (0);
430 }
431 /*
432 * Set read pointer.
433 */
434 ch_block = new_block;
435 ch_offset = pos % LBUFSIZE;
436 return (0);
437 }
438
439 /*
440 * Seek to the end of the file.
441 */
442 public int
ch_end_seek()443 ch_end_seek()
444 {
445 POSITION len;
446
447 if (ch_flags & CH_CANSEEK)
448 ch_fsize = filesize(ch_file);
449
450 len = ch_length();
451 if (len != NULL_POSITION)
452 return (ch_seek(len));
453
454 /*
455 * Do it the slow way: read till end of data.
456 */
457 while (ch_forw_get() != EOI)
458 if (ABORT_SIGS())
459 return (1);
460 return (0);
461 }
462
463 /*
464 * Seek to the beginning of the file, or as close to it as we can get.
465 * We may not be able to seek there if input is a pipe and the
466 * beginning of the pipe is no longer buffered.
467 */
468 public int
ch_beg_seek()469 ch_beg_seek()
470 {
471 register struct buf *bp, *firstbp;
472
473 /*
474 * Try a plain ch_seek first.
475 */
476 if (ch_seek(ch_zero()) == 0)
477 return (0);
478
479 /*
480 * Can't get to position 0.
481 * Look thru the buffers for the one closest to position 0.
482 */
483 firstbp = bp = ch_bufhead;
484 if (bp == END_OF_CHAIN)
485 return (1);
486 while ((bp = bp->next) != END_OF_CHAIN)
487 if (bp->block < firstbp->block)
488 firstbp = bp;
489 ch_block = firstbp->block;
490 ch_offset = 0;
491 return (0);
492 }
493
494 /*
495 * Return the length of the file, if known.
496 */
497 public POSITION
ch_length()498 ch_length()
499 {
500 if (ignore_eoi)
501 return (NULL_POSITION);
502 return (ch_fsize);
503 }
504
505 /*
506 * Return the current position in the file.
507 */
508 public POSITION
ch_tell()509 ch_tell()
510 {
511 return (ch_block * LBUFSIZE) + ch_offset;
512 }
513
514 /*
515 * Get the current char and post-increment the read pointer.
516 */
517 public int
ch_forw_get()518 ch_forw_get()
519 {
520 register int c;
521
522 c = ch_get();
523 if (c == EOI)
524 return (EOI);
525 if (ch_offset < LBUFSIZE-1)
526 ch_offset++;
527 else
528 {
529 ch_block ++;
530 ch_offset = 0;
531 }
532 return (c);
533 }
534
535 /*
536 * Pre-decrement the read pointer and get the new current char.
537 */
538 public int
ch_back_get()539 ch_back_get()
540 {
541 if (ch_offset > 0)
542 ch_offset --;
543 else
544 {
545 if (ch_block <= 0)
546 return (EOI);
547 if (!(ch_flags & CH_CANSEEK) && !buffered(ch_block-1))
548 return (EOI);
549 ch_block--;
550 ch_offset = LBUFSIZE-1;
551 }
552 return (ch_get());
553 }
554
555 /*
556 * Set max amount of buffer space.
557 * bufspace is in units of 1024 bytes. -1 mean no limit.
558 */
559 public void
ch_setbufspace(bufspace)560 ch_setbufspace(bufspace)
561 int bufspace;
562 {
563 if (bufspace < 0)
564 maxbufs = -1;
565 else
566 {
567 maxbufs = ((bufspace * 1024) + LBUFSIZE-1) / LBUFSIZE;
568 if (maxbufs < 1)
569 maxbufs = 1;
570 }
571 }
572
573 /*
574 * Flush (discard) any saved file state, including buffer contents.
575 */
576 public void
ch_flush()577 ch_flush()
578 {
579 register struct buf *bp;
580
581 if (!(ch_flags & CH_CANSEEK))
582 {
583 /*
584 * If input is a pipe, we don't flush buffer contents,
585 * since the contents can't be recovered.
586 */
587 ch_fsize = NULL_POSITION;
588 return;
589 }
590
591 /*
592 * Initialize all the buffers.
593 */
594 for (bp = ch_bufhead; bp != END_OF_CHAIN; bp = bp->next)
595 bp->block = -1;
596
597 /*
598 * Figure out the size of the file, if we can.
599 */
600 ch_fsize = filesize(ch_file);
601
602 /*
603 * Seek to a known position: the beginning of the file.
604 */
605 ch_fpos = 0;
606 ch_block = 0; /* ch_fpos / LBUFSIZE; */
607 ch_offset = 0; /* ch_fpos % LBUFSIZE; */
608
609 #if 1
610 /*
611 * This is a kludge to workaround a Linux kernel bug: files in
612 * /proc have a size of 0 according to fstat() but have readable
613 * data. They are sometimes, but not always, seekable.
614 * Force them to be non-seekable here.
615 */
616 if (ch_fsize == 0)
617 {
618 ch_fsize = NULL_POSITION;
619 ch_flags &= ~CH_CANSEEK;
620 }
621 #endif
622
623 if (lseek(ch_file, (off_t)0, 0) == BAD_LSEEK)
624 {
625 /*
626 * Warning only; even if the seek fails for some reason,
627 * there's a good chance we're at the beginning anyway.
628 * {{ I think this is bogus reasoning. }}
629 */
630 error("seek error to 0", NULL_PARG);
631 }
632 }
633
634 /*
635 * Allocate a new buffer.
636 * The buffer is added to the tail of the buffer chain.
637 */
638 static int
ch_addbuf()639 ch_addbuf()
640 {
641 register struct buf *bp;
642
643 /*
644 * Allocate and initialize a new buffer and link it
645 * onto the tail of the buffer list.
646 */
647 bp = (struct buf *) calloc(1, sizeof(struct buf));
648 if (bp == NULL)
649 return (1);
650 ch_nbufs++;
651 bp->block = -1;
652 bp->next = END_OF_CHAIN;
653 bp->prev = ch_buftail;
654 ch_buftail->next = bp;
655 ch_buftail = bp;
656 HASH_INS(bp, 0);
657 return (0);
658 }
659
660 /*
661 *
662 */
663 static void
init_hashtbl()664 init_hashtbl()
665 {
666 register int h;
667
668 for (h = 0; h < BUFHASH_SIZE; h++)
669 {
670 thisfile->hashtbl[h].buf_hnext = END_OF_HCHAIN(h);
671 thisfile->hashtbl[h].buf_hprev = END_OF_HCHAIN(h);
672 }
673 }
674
675 /*
676 * Delete all buffers for this file.
677 */
678 static void
ch_delbufs()679 ch_delbufs()
680 {
681 register struct buf *bp;
682
683 while (ch_bufhead != END_OF_CHAIN)
684 {
685 bp = ch_bufhead;
686 bp->next->prev = bp->prev;
687 bp->prev->next = bp->next;
688 free(bp);
689 }
690 ch_nbufs = 0;
691 init_hashtbl();
692 }
693
694 /*
695 * Is it possible to seek on a file descriptor?
696 */
697 public int
seekable(f)698 seekable(f)
699 int f;
700 {
701 #if MSDOS_COMPILER
702 extern int fd0;
703 if (f == fd0 && !isatty(fd0))
704 {
705 /*
706 * In MS-DOS, pipes are seekable. Check for
707 * standard input, and pretend it is not seekable.
708 */
709 return (0);
710 }
711 #endif
712 return (lseek(f, (off_t)1, 0) != BAD_LSEEK);
713 }
714
715 /*
716 * Initialize file state for a new file.
717 */
718 public void
ch_init(f,flags)719 ch_init(f, flags)
720 int f;
721 int flags;
722 {
723 /*
724 * See if we already have a filestate for this file.
725 */
726 thisfile = (struct filestate *) get_filestate(curr_ifile);
727 if (thisfile == NULL)
728 {
729 /*
730 * Allocate and initialize a new filestate.
731 */
732 thisfile = (struct filestate *)
733 calloc(1, sizeof(struct filestate));
734 thisfile->buf_next = thisfile->buf_prev = END_OF_CHAIN;
735 thisfile->nbufs = 0;
736 thisfile->flags = 0;
737 thisfile->fpos = 0;
738 thisfile->block = 0;
739 thisfile->offset = 0;
740 thisfile->file = -1;
741 thisfile->fsize = NULL_POSITION;
742 ch_flags = flags;
743 init_hashtbl();
744 /*
745 * Try to seek; set CH_CANSEEK if it works.
746 */
747 if ((flags & CH_CANSEEK) && !seekable(f))
748 ch_flags &= ~CH_CANSEEK;
749 set_filestate(curr_ifile, (void *) thisfile);
750 }
751 if (thisfile->file == -1)
752 thisfile->file = f;
753 ch_flush();
754 }
755
756 /*
757 * Close a filestate.
758 */
759 public void
ch_close()760 ch_close()
761 {
762 int keepstate = FALSE;
763
764 if (ch_flags & (CH_CANSEEK|CH_POPENED))
765 {
766 /*
767 * We can seek or re-open, so we don't need to keep buffers.
768 */
769 ch_delbufs();
770 } else
771 keepstate = TRUE;
772 if (!(ch_flags & CH_KEEPOPEN))
773 {
774 /*
775 * We don't need to keep the file descriptor open
776 * (because we can re-open it.)
777 * But don't really close it if it was opened via popen(),
778 * because pclose() wants to close it.
779 */
780 if (!(ch_flags & CH_POPENED))
781 close(ch_file);
782 ch_file = -1;
783 } else
784 keepstate = TRUE;
785 if (!keepstate)
786 {
787 /*
788 * We don't even need to keep the filestate structure.
789 */
790 free(thisfile);
791 thisfile = NULL;
792 set_filestate(curr_ifile, (void *) NULL);
793 }
794 }
795
796 /*
797 * Return ch_flags for the current file.
798 */
799 public int
ch_getflags()800 ch_getflags()
801 {
802 return (ch_flags);
803 }
804
805 #if 0
806 public void
807 ch_dump(struct filestate *fs)
808 {
809 struct buf *bp;
810 unsigned char *s;
811
812 if (fs == NULL)
813 {
814 printf(" --no filestate\n");
815 return;
816 }
817 printf(" file %d, flags %x, fpos %x, fsize %x, blk/off %x/%x\n",
818 fs->file, fs->flags, fs->fpos,
819 fs->fsize, fs->block, fs->offset);
820 printf(" %d bufs:\n", fs->nbufs);
821 for (bp = fs->buf_next; bp != (struct buf *)fs; bp = bp->next)
822 {
823 printf("%x: blk %x, size %x \"",
824 bp, bp->block, bp->datasize);
825 for (s = bp->data; s < bp->data + 30; s++)
826 if (*s >= ' ' && *s < 0x7F)
827 printf("%c", *s);
828 else
829 printf(".");
830 printf("\"\n");
831 }
832 }
833 #endif
834