1 /** $MirOS: src/usr.bin/col/col.c,v 1.13 2009/07/03 18:15:18 tg Exp $ */
2 /* $OpenBSD: col.c,v 1.9 2003/06/10 22:20:45 deraadt Exp $ */
3 /* $NetBSD: col.c,v 1.7 1995/09/02 05:48:50 jtc Exp $ */
4
5 /*-
6 * Copyright (c) 2007, 2008, 2009
7 * Thorsten Glaser <tg@mirbsd.org>
8 * Copyright (c) 1990, 1993, 1994
9 * The Regents of the University of California. All rights reserved.
10 *
11 * This code is derived from software contributed to Berkeley by
12 * Michael Rendell of the Memorial University of Newfoundland.
13 *
14 * Redistribution and use in source and binary forms, with or without
15 * modification, are permitted provided that the following conditions
16 * are met:
17 * 1. Redistributions of source code must retain the above copyright
18 * notice, this list of conditions and the following disclaimer.
19 * 2. Redistributions in binary form must reproduce the above copyright
20 * notice, this list of conditions and the following disclaimer in the
21 * documentation and/or other materials provided with the distribution.
22 * 3. Neither the name of the University nor the names of its contributors
23 * may be used to endorse or promote products derived from this software
24 * without specific prior written permission.
25 *
26 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
27 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
28 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
29 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
30 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
31 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
32 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
33 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
34 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
35 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
36 * SUCH DAMAGE.
37 */
38
39 #include <sys/cdefs.h>
40 #include <ctype.h>
41 #include <err.h>
42 #include <errno.h>
43 #include <locale.h>
44 #include <string.h>
45 #include <stdio.h>
46 #include <stdlib.h>
47 #include <unistd.h>
48 #include <wchar.h>
49 #include <wctype.h>
50
51 __COPYRIGHT("@(#) Copyright (c) 1990, 1993, 1994\n\
52 The Regents of the University of California. All rights reserved.\n");
53 __SCCSID("@(#)col.c 8.5 (Berkeley) 5/4/95");
54 __RCSID("$MirOS: src/usr.bin/col/col.c,v 1.13 2009/07/03 18:15:18 tg Exp $");
55
56 #define BS '\b' /* backspace */
57 #define TAB '\t' /* tab */
58 #define SPACE ' ' /* space */
59 #define NL '\n' /* newline */
60 #define CR '\r' /* carriage return */
61 #define ESC '\033' /* escape */
62 #define SI '\017' /* shift in to normal character set */
63 #define SO '\016' /* shift out to alternate character set */
64 #define VT '\013' /* vertical tab (aka reverse line feed) */
65 #define RLF '\007' /* ESC-07 reverse line feed */
66 #define RHLF '\010' /* ESC-010 reverse half-line feed */
67 #define FHLF '\011' /* ESC-011 forward half-line feed */
68
69 /* build up at least this many lines before flushing them out */
70 #define BUFFER_MARGIN 32
71
72 typedef char CSET;
73
74 typedef struct char_str {
75 #define CS_NORMAL 1
76 #define CS_ALTERNATE 2
77 short c_column; /* column character is in */
78 CSET c_set; /* character set (currently only 2) */
79 int8_t c_width; /* width of character */
80 wchar_t c_char; /* character in question */
81 } CHAR;
82
83 typedef struct line_str LINE;
84 struct line_str {
85 CHAR *l_line; /* characters on the line */
86 LINE *l_prev; /* previous line */
87 LINE *l_next; /* next line */
88 int l_lsize; /* allocated sizeof l_line */
89 int l_line_len; /* strlen(l_line) */
90 int l_needs_sort; /* set if chars went in out of order */
91 int l_max_col; /* max column in the line */
92 };
93
94 static LINE *alloc_line(void);
95 static void dowarn(int);
96 static void flush_line(LINE *);
97 static void flush_lines(int);
98 static void flush_blanks(void);
99 static void free_line(LINE *);
100 static __dead void usage(void);
101 static void *xmalloc(void *, size_t);
102
103 #ifdef COL_SLAVE
104 int do_col(int, int); /* -b (0/1), -f (0/1) */
105 #endif
106
107 static CSET last_set; /* char_set of last char printed */
108 static LINE *lines;
109 static int compress_spaces; /* if doing space -> tab conversion */
110 static int fine; /* if 'fine' resolution (half lines) */
111 static int max_bufd_lines; /* max # lines to keep in memory */
112 static int nblank_lines; /* # blanks after last flushed line */
113 static int no_backspaces; /* if not to output any backspaces */
114
115 #define PUTC(ch) \
116 if (putwchar(ch) == WEOF) \
117 err(1, "stdout");
118
119 int
120 #ifdef COL_SLAVE
do_col(int ex_b,int ex_f)121 do_col(int ex_b, int ex_f)
122 #else
123 main(int argc, char *argv[])
124 #endif
125 {
126 wint_t ch;
127 CHAR *c;
128 CSET cur_set; /* current character set */
129 LINE *l; /* current line */
130 int extra_lines; /* # of lines above first line */
131 int cur_col; /* current column */
132 int cur_line; /* line number of current position */
133 int max_line; /* max value of cur_line */
134 int this_line; /* line l points to */
135 int this_dwc; /* ever had a non-1-width wchar? */
136 int lastc_col; /* last column (for combining) */
137 int nflushd_lines; /* number of lines that were flushed */
138 int adjust, warned;
139 #ifndef COL_SLAVE
140 int opt;
141 #endif
142
143 max_bufd_lines = 256;
144 compress_spaces = 1; /* compress spaces into tabs */
145
146 #ifdef COL_SLAVE
147 compress_spaces = 0;
148 no_backspaces = ex_b;
149 fine = ex_f;
150 #else
151 #ifndef __MirBSD__
152 setlocale(LC_ALL, "");
153 #endif
154
155 while ((opt = getopt(argc, argv, "bfhl:x")) != -1)
156 switch (opt) {
157 case 'b': /* do not output backspaces */
158 no_backspaces = 1;
159 break;
160 case 'f': /* allow half forward line feeds */
161 fine = 1;
162 break;
163 case 'h': /* compress spaces into tabs */
164 compress_spaces = 1;
165 break;
166 case 'l': /* buffered line count */
167 if ((max_bufd_lines = atoi(optarg)) <= 0) {
168 fprintf(stderr,
169 "col: bad -l argument %s.\n", optarg);
170 return (1);
171 }
172 break;
173 case 'x': /* do not compress spaces into tabs */
174 compress_spaces = 0;
175 break;
176 case '?':
177 default:
178 usage();
179 }
180
181 if (optind != argc)
182 usage();
183 #endif
184
185 /* this value is in half lines */
186 max_bufd_lines *= 2;
187
188 adjust = cur_col = extra_lines = warned = 0;
189 cur_line = max_line = nflushd_lines = this_line = this_dwc = 0;
190 cur_set = last_set = CS_NORMAL;
191 lastc_col = -1;
192 lines = l = alloc_line();
193
194 for (;;) {
195 if ((ch = getwchar()) == WEOF) {
196 if (!ferror(stdin) || (errno != EILSEQ))
197 break;
198 clearerr(stdin);
199 ch = 0xFFFD;
200 }
201 if (!iswgraph(ch) && !iswoctet(ch)) {
202 lastc_col = -1;
203 switch (ch) {
204 case BS: /* can't go back further */
205 if (cur_col == 0)
206 continue;
207 if (this_dwc && l->l_line &&
208 (cur_line == this_line - adjust)) {
209 /* we had double-width, complicated */
210 CHAR *tc = l->l_line, *tend = NULL;
211 int nchars = l->l_line_len;
212
213 while (nchars--) {
214 if (!tc->c_set)
215 continue;
216 if (cur_col == (tc->c_column +
217 tc->c_width))
218 tend = tc;
219 ++tc;
220 }
221 cur_col -= tend == NULL ? 1 :
222 tend->c_width;
223 } else
224 --cur_col;
225 continue;
226 case CR:
227 cur_col = 0;
228 continue;
229 case ESC: /* just ignore WEOF */
230 switch(getwchar()) {
231 case RLF:
232 cur_line -= 2;
233 break;
234 case RHLF:
235 cur_line--;
236 break;
237 case FHLF:
238 cur_line++;
239 if (cur_line > max_line)
240 max_line = cur_line;
241 }
242 continue;
243 case NL:
244 cur_line += 2;
245 if (cur_line > max_line)
246 max_line = cur_line;
247 cur_col = 0;
248 continue;
249 case SPACE:
250 ++cur_col;
251 continue;
252 case SI:
253 cur_set = CS_NORMAL;
254 continue;
255 case SO:
256 cur_set = CS_ALTERNATE;
257 continue;
258 case TAB: /* adjust column */
259 cur_col |= 7;
260 ++cur_col;
261 continue;
262 case VT:
263 cur_line -= 2;
264 continue;
265 }
266 continue;
267 }
268
269 /* Must stuff ch in a line - are we at the right one? */
270 if (cur_line != this_line - adjust) {
271 LINE *lnew;
272 int nmove;
273
274 adjust = 0;
275 nmove = cur_line - this_line;
276 if (!fine) {
277 /* round up to next line */
278 if (cur_line & 1) {
279 adjust = 1;
280 nmove++;
281 }
282 }
283 if (nmove < 0) {
284 for (; nmove < 0 && l->l_prev; nmove++)
285 l = l->l_prev;
286 if (nmove) {
287 if (nflushd_lines == 0) {
288 /*
289 * Allow backup past first
290 * line if nothing has been
291 * flushed yet.
292 */
293 for (; nmove < 0; nmove++) {
294 lnew = alloc_line();
295 l->l_prev = lnew;
296 lnew->l_next = l;
297 l = lines = lnew;
298 extra_lines++;
299 }
300 } else {
301 if (!warned++)
302 dowarn(cur_line);
303 cur_line -= nmove;
304 }
305 }
306 } else {
307 /* may need to allocate here */
308 for (; nmove > 0 && l->l_next; nmove--)
309 l = l->l_next;
310 for (; nmove > 0; nmove--) {
311 lnew = alloc_line();
312 lnew->l_prev = l;
313 l->l_next = lnew;
314 l = lnew;
315 }
316 }
317 this_line = cur_line + adjust;
318 this_dwc = 0;
319 lastc_col = -1;
320 nmove = this_line - nflushd_lines;
321 if (nmove >= max_bufd_lines + BUFFER_MARGIN) {
322 nflushd_lines += nmove - max_bufd_lines;
323 flush_lines(nmove - max_bufd_lines);
324 }
325 }
326 /* grow line's buffer? */
327 if (l->l_line_len + 1 >= l->l_lsize) {
328 int need;
329
330 need = l->l_lsize ? l->l_lsize * 2 : 90;
331 l->l_line = (CHAR *)xmalloc((void *) l->l_line,
332 (unsigned) need * sizeof(CHAR));
333 l->l_lsize = need;
334 }
335 c = &l->l_line[l->l_line_len++];
336 c->c_char = ch;
337 if (this_dwc && c->c_char == L'_') {
338 /* we had double-width, scan if this is \b_ */
339 CHAR *tc = l->l_line, *tend = NULL;
340 int nchars = l->l_line_len;
341
342 while (nchars--) {
343 if (!tc->c_set)
344 continue;
345 if ((tc->c_column == cur_col) && tc->c_width)
346 tend = tc;
347 ++tc;
348 }
349 c->c_width = tend == NULL ? 1 : tend->c_width;
350 } else
351 c->c_width = wcwidth(ch);
352 c->c_set = cur_set;
353 lastc_col = c->c_column = (c->c_width == 0) &&
354 (lastc_col != -1) ? lastc_col : cur_col;
355 /*
356 * If things are put in out of order, they will need sorting
357 * when it is flushed.
358 */
359 if (cur_col < l->l_max_col)
360 l->l_needs_sort = 1;
361 else
362 l->l_max_col = cur_col;
363 cur_col += c->c_width;
364 if (c->c_width != 1)
365 this_dwc = 1;
366 }
367 if (max_line == 0)
368 return (0); /* no lines, so just exit */
369
370 /* goto the last line that had a character on it */
371 for (; l->l_next; l = l->l_next)
372 this_line++;
373 flush_lines(this_line - nflushd_lines + extra_lines + 1);
374
375 /* make sure we leave things in a sane state */
376 if (last_set != CS_NORMAL)
377 PUTC(L'\017');
378
379 /* flush out the last few blank lines */
380 nblank_lines = max_line - this_line;
381 if (max_line & 1)
382 nblank_lines++;
383 else if (!nblank_lines)
384 /* missing a \n on the last line? */
385 nblank_lines = 2;
386 flush_blanks();
387 return (0);
388 }
389
390 static void
flush_lines(int nflush)391 flush_lines(int nflush)
392 {
393 LINE *l;
394
395 while (--nflush >= 0) {
396 l = lines;
397 lines = l->l_next;
398 if (l->l_line) {
399 flush_blanks();
400 flush_line(l);
401 }
402 nblank_lines++;
403 if (l->l_line)
404 free((void *)l->l_line);
405 free_line(l);
406 }
407 if (lines)
408 lines->l_prev = NULL;
409 }
410
411 /*
412 * Print a number of newline/half newlines. If fine flag is set, nblank_lines
413 * is the number of half line feeds, otherwise it is the number of whole line
414 * feeds.
415 */
416 static void
flush_blanks(void)417 flush_blanks(void)
418 {
419 int half, i, nb;
420
421 half = 0;
422 nb = nblank_lines;
423 if (nb & 1) {
424 if (fine)
425 half = 1;
426 else
427 nb++;
428 }
429 nb /= 2;
430 for (i = nb; --i >= 0;)
431 PUTC(L'\n');
432 if (half) {
433 PUTC(L'\033');
434 PUTC(L'9');
435 if (!nb)
436 PUTC(L'\r');
437 }
438 nblank_lines = 0;
439 }
440
441 /*
442 * Write a line to stdout taking care of space to tab conversion (-h flag)
443 * and character set shifts.
444 */
445 static void
flush_line(LINE * l)446 flush_line(LINE *l)
447 {
448 CHAR *c, *endc, *lastc;
449 int nchars, last_col, this_col;
450
451 last_col = 0;
452 nchars = l->l_line_len;
453
454 if (l->l_needs_sort) {
455 static CHAR *sorted;
456 static int count_size, *count, i, save, sorted_size, tot;
457
458 /*
459 * Do an O(n) sort on l->l_line by column being careful to
460 * preserve the order of characters in the same column.
461 */
462 if (l->l_lsize > sorted_size) {
463 sorted_size = l->l_lsize;
464 sorted = (CHAR *)xmalloc((void *)sorted,
465 (unsigned)sizeof(CHAR) * sorted_size);
466 }
467 if (l->l_max_col >= count_size) {
468 count_size = l->l_max_col + 1;
469 count = (int *)xmalloc((void *)count,
470 (unsigned)sizeof(int) * count_size);
471 }
472 memset((char *)count, 0, sizeof(int) * l->l_max_col + 1);
473 for (i = nchars, c = l->l_line; --i >= 0; c++)
474 count[c->c_column]++;
475
476 /*
477 * calculate running total (shifted down by 1) to use as
478 * indices into new line.
479 */
480 for (tot = 0, i = 0; i <= l->l_max_col; i++) {
481 save = count[i];
482 count[i] = tot;
483 tot += save;
484 }
485
486 for (i = nchars, c = l->l_line; --i >= 0; c++)
487 sorted[count[c->c_column]++] = *c;
488 c = sorted;
489 } else
490 c = l->l_line;
491 while (nchars > 0) {
492 this_col = c->c_column;
493 lastc = endc = c;
494 do {
495 if (endc->c_width > 0)
496 lastc = endc;
497 ++endc;
498 } while (--nchars > 0 && this_col == endc->c_column);
499
500 /* if -b only print last character */
501 if (no_backspaces)
502 c = lastc;
503
504 if (this_col > last_col) {
505 int nspace = this_col - last_col;
506
507 if (compress_spaces && nspace > 1) {
508 int ntabs;
509
510 ntabs = ((last_col % 8) + nspace) / 8;
511 if (ntabs) {
512 nspace -= (ntabs * 8) - (last_col % 8);
513 while (--ntabs >= 0)
514 PUTC(L'\t');
515 }
516 }
517 while (--nspace >= 0)
518 PUTC(L' ');
519 last_col = this_col;
520 }
521 last_col += lastc->c_width;
522
523 for (;;) {
524 if (c->c_set != last_set) {
525 switch (c->c_set) {
526 case CS_NORMAL:
527 PUTC(L'\017');
528 break;
529 case CS_ALTERNATE:
530 PUTC(L'\016');
531 }
532 last_set = c->c_set;
533 }
534 PUTC(c->c_char);
535 if (++c >= endc)
536 break;
537 if (c->c_width)
538 PUTC(L'\b');
539 }
540 }
541 }
542
543 #define NALLOC 64
544
545 static LINE *line_freelist;
546
547 static LINE *
alloc_line(void)548 alloc_line(void)
549 {
550 LINE *l;
551 int i;
552
553 if (!line_freelist) {
554 l = (LINE *)xmalloc(NULL, sizeof(LINE) * NALLOC);
555 line_freelist = l;
556 for (i = 1; i < NALLOC; i++, l++)
557 l->l_next = l + 1;
558 l->l_next = NULL;
559 }
560 l = line_freelist;
561 line_freelist = l->l_next;
562
563 memset(l, 0, sizeof(LINE));
564 return (l);
565 }
566
567 static void
free_line(LINE * l)568 free_line(LINE *l)
569 {
570
571 l->l_next = line_freelist;
572 line_freelist = l;
573 }
574
575 static void *
xmalloc(void * p,size_t size)576 xmalloc(void *p, size_t size)
577 {
578
579 if (!(p = (void *)realloc(p, size)))
580 err(1, "realloc failed");
581 return (p);
582 }
583
584 static __dead void
usage(void)585 usage(void)
586 {
587 fprintf(stderr, "usage: col [-bfhx] [-l num]\n");
588 exit(1);
589 }
590
591 static void
dowarn(int line)592 dowarn(int line)
593 {
594 warnx("warning: can't back up %s",
595 line < 0 ? "past first line" : "-- line already flushed");
596 }
597