1 /* $OpenBSD: sdiff.c,v 1.36 2015/12/29 19:04:46 gsoares Exp $ */
2
3 /*
4 * Written by Raymond Lai <ray@cyth.net>.
5 * Public domain.
6 */
7
8 #include <sys/cdefs.h>
9 #include <sys/param.h>
10 #include <sys/queue.h>
11 #include <sys/stat.h>
12 #include <sys/wait.h>
13
14 #include <ctype.h>
15 #include <err.h>
16 #include <errno.h>
17 #include <fcntl.h>
18 #include <getopt.h>
19 #include <limits.h>
20 #include <paths.h>
21 #include <stdbool.h>
22 #include <stdint.h>
23 #include <stdio.h>
24 #include <stdlib.h>
25 #include <string.h>
26 #include <unistd.h>
27
28 #include "extern.h"
29
30 static char diff_path[] = "/usr/bin/diff";
31
32 #define WIDTH 126
33 /*
34 * Each column must be at least one character wide, plus three
35 * characters between the columns (space, [<|>], space).
36 */
37 #define WIDTH_MIN 5
38
39 /* 3 kilobytes of chars */
40 #define MAX_CHECK 768
41
42 /* A single diff line. */
43 struct diffline {
44 STAILQ_ENTRY(diffline) diffentries;
45 char *left;
46 char div;
47 char *right;
48 };
49
50 static void astrcat(char **, const char *);
51 static void enqueue(char *, char, char *);
52 static char *mktmpcpy(const char *);
53 static int istextfile(FILE *);
54 static int bindiff(FILE *, char *, FILE *, char *);
55 static void freediff(struct diffline *);
56 static void int_usage(void);
57 static int parsecmd(FILE *, FILE *, FILE *);
58 static void printa(FILE *, size_t);
59 static void printc(FILE *, size_t, FILE *, size_t);
60 static void printcol(const char *, size_t *, const size_t);
61 static void printd(FILE *, size_t);
62 static void println(const char *, const char, const char *);
63 static void processq(void);
64 static void prompt(const char *, const char *);
65 static void usage(void) __dead2;
66 static char *xfgets(FILE *);
67
68 static STAILQ_HEAD(, diffline) diffhead = STAILQ_HEAD_INITIALIZER(diffhead);
69 static size_t line_width; /* width of a line (two columns and divider) */
70 static size_t width; /* width of each column */
71 static size_t file1ln, file2ln; /* line number of file1 and file2 */
72 static bool Iflag; /* ignore sets matching regexp */
73 static bool lflag; /* print only left column for identical lines */
74 static bool sflag; /* skip identical lines */
75 static bool tflag; /* expand tabs */
76 static int tabsize = 8; /* tab size */
77 FILE *outfp; /* file to save changes to */
78 const char *tmpdir; /* TMPDIR or /tmp */
79
80 enum {
81 HELP_OPT = CHAR_MAX + 1,
82 NORMAL_OPT,
83 FCASE_SENSITIVE_OPT,
84 FCASE_IGNORE_OPT,
85 STRIPCR_OPT,
86 TSIZE_OPT,
87 DIFFPROG_OPT,
88 };
89
90 static struct option longopts[] = {
91 /* options only processed in sdiff */
92 { "suppress-common-lines", no_argument, NULL, 's' },
93 { "width", required_argument, NULL, 'w' },
94
95 { "output", required_argument, NULL, 'o' },
96 { "diff-program", required_argument, NULL, DIFFPROG_OPT },
97
98 /* Options processed by diff. */
99 { "ignore-file-name-case", no_argument, NULL, FCASE_IGNORE_OPT },
100 { "no-ignore-file-name-case", no_argument, NULL, FCASE_SENSITIVE_OPT },
101 { "strip-trailing-cr", no_argument, NULL, STRIPCR_OPT },
102 { "tabsize", required_argument, NULL, TSIZE_OPT },
103 { "help", no_argument, NULL, HELP_OPT },
104 { "text", no_argument, NULL, 'a' },
105 { "ignore-blank-lines", no_argument, NULL, 'B' },
106 { "ignore-space-change", no_argument, NULL, 'b' },
107 { "minimal", no_argument, NULL, 'd' },
108 { "ignore-tab-expansion", no_argument, NULL, 'E' },
109 { "ignore-matching-lines", required_argument, NULL, 'I' },
110 { "ignore-case", no_argument, NULL, 'i' },
111 { "left-column", no_argument, NULL, 'l' },
112 { "expand-tabs", no_argument, NULL, 't' },
113 { "speed-large-files", no_argument, NULL, 'H' },
114 { "ignore-all-space", no_argument, NULL, 'W' },
115
116 { NULL, 0, NULL, '\0'}
117 };
118
119 static const char *help_msg[] = {
120 "usage: sdiff [-abdilstW] [-I regexp] [-o outfile] [-w width] file1 file2\n",
121 "-l, --left-column: only print the left column for identical lines.",
122 "-o OUTFILE, --output=OUTFILE: interactively merge file1 and file2 into outfile.",
123 "-s, --suppress-common-lines: skip identical lines.",
124 "-w WIDTH, --width=WIDTH: print a maximum of WIDTH characters on each line.",
125 "",
126 "Options passed to diff(1) are:",
127 "\t-a, --text: treat file1 and file2 as text files.",
128 "\t-b, --ignore-trailing-cr: ignore trailing blank spaces.",
129 "\t-d, --minimal: minimize diff size.",
130 "\t-I RE, --ignore-matching-lines=RE: ignore changes whose line matches RE.",
131 "\t-i, --ignore-case: do a case-insensitive comparison.",
132 "\t-t, --expand-tabs: expand tabs to spaces.",
133 "\t-W, --ignore-all-space: ignore all whitespace.",
134 "\t--speed-large-files: assume large file with scattered changes.",
135 "\t--strip-trailing-cr: strip trailing carriage return.",
136 "\t--ignore-file-name-case: ignore case of file names.",
137 "\t--no-ignore-file-name-case: do not ignore file name case",
138 "\t--tabsize NUM: change size of tabs (default 8.)",
139
140 NULL,
141 };
142
143 /*
144 * Create temporary file if source_file is not a regular file.
145 * Returns temporary file name if one was malloced, NULL if unnecessary.
146 */
147 static char *
mktmpcpy(const char * source_file)148 mktmpcpy(const char *source_file)
149 {
150 struct stat sb;
151 ssize_t rcount;
152 int ifd, ofd;
153 u_char buf[BUFSIZ];
154 char *target_file;
155
156 /* Open input and output. */
157 ifd = open(source_file, O_RDONLY, 0);
158 /* File was opened successfully. */
159 if (ifd != -1) {
160 if (fstat(ifd, &sb) == -1)
161 err(2, "error getting file status from %s", source_file);
162
163 /* Regular file. */
164 if (S_ISREG(sb.st_mode)) {
165 close(ifd);
166 return (NULL);
167 }
168 } else {
169 /* If ``-'' does not exist the user meant stdin. */
170 if (errno == ENOENT && strcmp(source_file, "-") == 0)
171 ifd = STDIN_FILENO;
172 else
173 err(2, "error opening %s", source_file);
174 }
175
176 /* Not a regular file, so copy input into temporary file. */
177 if (asprintf(&target_file, "%s/sdiff.XXXXXXXXXX", tmpdir) == -1)
178 err(2, "asprintf");
179 if ((ofd = mkstemp(target_file)) == -1) {
180 warn("error opening %s", target_file);
181 goto FAIL;
182 }
183 while ((rcount = read(ifd, buf, sizeof(buf))) != -1 &&
184 rcount != 0) {
185 ssize_t wcount;
186
187 wcount = write(ofd, buf, (size_t)rcount);
188 if (-1 == wcount || rcount != wcount) {
189 warn("error writing to %s", target_file);
190 goto FAIL;
191 }
192 }
193 if (rcount == -1) {
194 warn("error reading from %s", source_file);
195 goto FAIL;
196 }
197
198 close(ifd);
199 close(ofd);
200
201 return (target_file);
202
203 FAIL:
204 unlink(target_file);
205 exit(2);
206 }
207
208 int
main(int argc,char ** argv)209 main(int argc, char **argv)
210 {
211 FILE *diffpipe, *file1, *file2;
212 size_t diffargc = 0, flagc = 0, wval = WIDTH;
213 int ch, fd[2], i, ret, status;
214 pid_t pid;
215 const char *errstr, *outfile = NULL;
216 char **diffargv, *diffprog = diff_path, *flagv;
217 char *filename1, *filename2, *tmp1, *tmp2, *s1, *s2;
218 char I_arg[] = "-I";
219 char speed_lf[] = "--speed-large-files";
220
221 /*
222 * Process diff flags.
223 */
224 /*
225 * Allocate memory for diff arguments and NULL.
226 * Each flag has at most one argument, so doubling argc gives an
227 * upper limit of how many diff args can be passed. argv[0],
228 * file1, and file2 won't have arguments so doubling them will
229 * waste some memory; however we need an extra space for the
230 * NULL at the end, so it sort of works out.
231 */
232 if ((diffargv = calloc(argc, sizeof(char *) * 2)) == NULL)
233 err(2, NULL);
234
235 /* Add first argument, the program name. */
236 diffargv[diffargc++] = diffprog;
237
238 /* create a dynamic string for merging single-character options */
239 if ((flagv = malloc(flagc + 2)) == NULL)
240 err(2, NULL);
241 flagv[flagc] = '-';
242 flagv[flagc + 1] = '\0';
243 diffargv[diffargc++] = flagv;
244
245 while ((ch = getopt_long(argc, argv, "aBbdEHI:ilo:stWw:",
246 longopts, NULL)) != -1) {
247 switch (ch) {
248 /* only compatible --long-name-form with diff */
249 case FCASE_IGNORE_OPT:
250 case FCASE_SENSITIVE_OPT:
251 case STRIPCR_OPT:
252 case 'S':
253 break;
254 /* combine no-arg single switches */
255 case 'a':
256 case 'B':
257 case 'b':
258 case 'd':
259 case 'E':
260 case 'i':
261 case 'W':
262 flagc++;
263 flagv = realloc(flagv, flagc + 2);
264 /*
265 * In diff, the 'W' option is 'w' and the 'w' is 'W'.
266 */
267 flagv[flagc] = ch == 'W' ? 'w' : ch;
268 flagv[flagc + 1] = '\0';
269 break;
270 case 'H':
271 diffargv[diffargc++] = speed_lf;
272 break;
273 case DIFFPROG_OPT:
274 diffargv[0] = diffprog = optarg;
275 break;
276 case 'I':
277 Iflag = true;
278 diffargv[diffargc++] = I_arg;
279 diffargv[diffargc++] = optarg;
280 break;
281 case 'l':
282 lflag = true;
283 break;
284 case 'o':
285 outfile = optarg;
286 break;
287 case 's':
288 sflag = true;
289 break;
290 case 't':
291 tflag = true;
292 break;
293 case 'w':
294 wval = strtonum(optarg, WIDTH_MIN,
295 INT_MAX, &errstr);
296 if (errstr)
297 errx(2, "width is %s: %s", errstr, optarg);
298 break;
299 case HELP_OPT:
300 for (i = 0; help_msg[i] != NULL; i++)
301 printf("%s\n", help_msg[i]);
302 exit(0);
303 break;
304 case TSIZE_OPT:
305 tabsize = strtonum(optarg, 1, INT_MAX, &errstr);
306 if (errstr)
307 errx(2, "tabsize is %s: %s", errstr, optarg);
308 break;
309 default:
310 usage();
311 break;
312 }
313 }
314
315 /* no single-character options were used */
316 if (flagc == 0) {
317 memmove(diffargv + 1, diffargv + 2,
318 sizeof(char *) * (diffargc - 2));
319 diffargc--;
320 free(flagv);
321 }
322
323 argc -= optind;
324 argv += optind;
325
326 if (argc != 2)
327 usage();
328
329 if (outfile && (outfp = fopen(outfile, "w")) == NULL)
330 err(2, "could not open: %s", optarg);
331
332 if ((tmpdir = getenv("TMPDIR")) == NULL || *tmpdir == '\0')
333 tmpdir = _PATH_TMP;
334
335 filename1 = argv[0];
336 filename2 = argv[1];
337
338 /*
339 * Create temporary files for diff and sdiff to share if file1
340 * or file2 are not regular files. This allows sdiff and diff
341 * to read the same inputs if one or both inputs are stdin.
342 *
343 * If any temporary files were created, their names would be
344 * saved in tmp1 or tmp2. tmp1 should never equal tmp2.
345 */
346 tmp1 = tmp2 = NULL;
347 /* file1 and file2 are the same, so copy to same temp file. */
348 if (strcmp(filename1, filename2) == 0) {
349 if ((tmp1 = mktmpcpy(filename1)))
350 filename1 = filename2 = tmp1;
351 /* Copy file1 and file2 into separate temp files. */
352 } else {
353 if ((tmp1 = mktmpcpy(filename1)))
354 filename1 = tmp1;
355 if ((tmp2 = mktmpcpy(filename2)))
356 filename2 = tmp2;
357 }
358
359 if ((file1 = fopen(filename1, "r")) == NULL)
360 err(2, "could not open %s", filename1);
361 if ((file2 = fopen(filename2, "r")) == NULL)
362 err(2, "could not open %s", filename2);
363 if (!istextfile(file1) || !istextfile(file2)) {
364 ret = bindiff(file1, filename1, file2, filename2);
365 goto done;
366 }
367
368 diffargv[diffargc++] = filename1;
369 diffargv[diffargc++] = filename2;
370 /* Add NULL to end of array to indicate end of array. */
371 diffargv[diffargc++] = NULL;
372
373 /* Subtract column divider and divide by two. */
374 width = (wval - 3) / 2;
375 /* Make sure line_width can fit in size_t. */
376 if (width > (SIZE_MAX - 3) / 2)
377 errx(2, "width is too large: %zu", width);
378 line_width = width * 2 + 3;
379
380 if (pipe(fd))
381 err(2, "pipe");
382
383 if ((pid = fork()) < 0)
384 err(1, "fork()");
385 if (pid == 0) {
386 /* child */
387 /* We don't read from the pipe. */
388 close(fd[0]);
389 if (dup2(fd[1], STDOUT_FILENO) != STDOUT_FILENO)
390 _exit(2);
391 /* Free unused descriptor. */
392 close(fd[1]);
393 execvp(diffprog, diffargv);
394 _exit(2);
395 }
396
397 /* parent */
398 /* We don't write to the pipe. */
399 close(fd[1]);
400
401 /* Open pipe to diff command. */
402 if ((diffpipe = fdopen(fd[0], "r")) == NULL)
403 err(2, "could not open diff pipe");
404
405 /* Line numbers start at one. */
406 file1ln = file2ln = 1;
407
408 /* Read and parse diff output. */
409 while (parsecmd(diffpipe, file1, file2) != EOF)
410 ;
411 fclose(diffpipe);
412
413 /* Wait for diff to exit. */
414 if (waitpid(pid, &status, 0) == -1 || !WIFEXITED(status) ||
415 WEXITSTATUS(status) >= 2)
416 errx(2, "diff exited abnormally");
417 ret = WEXITSTATUS(status);
418
419 /* No more diffs, so enqueue common lines. */
420 if (lflag)
421 while ((s1 = xfgets(file1)))
422 enqueue(s1, ' ', NULL);
423 else
424 for (;;) {
425 s1 = xfgets(file1);
426 s2 = xfgets(file2);
427 if (s1 || s2)
428 enqueue(s1, ' ', s2);
429 else
430 break;
431 }
432 fclose(file1);
433 fclose(file2);
434 /* Process unmodified lines. */
435 processq();
436
437 done:
438 /* Delete and free unneeded temporary files. */
439 if (tmp1 != NULL) {
440 if (unlink(tmp1) != 0)
441 warn("failed to delete %s", tmp1);
442 free(tmp1);
443 }
444 if (tmp2 != NULL) {
445 if (unlink(tmp2) != 0)
446 warn("failed to delete %s", tmp2);
447 free(tmp2);
448 }
449
450 /* Return diff exit status. */
451 free(diffargv);
452 if (flagc > 0)
453 free(flagv);
454 return (ret);
455 }
456
457 /*
458 * When sdiff detects a binary file as input.
459 */
460 static int
bindiff(FILE * f1,char * fn1,FILE * f2,char * fn2)461 bindiff(FILE *f1, char *fn1, FILE *f2, char *fn2)
462 {
463 int ch1, ch2;
464
465 flockfile(f1);
466 flockfile(f2);
467 do {
468 ch1 = getc_unlocked(f1);
469 ch2 = getc_unlocked(f2);
470 } while (ch1 != EOF && ch2 != EOF && ch1 == ch2);
471 funlockfile(f2);
472 funlockfile(f1);
473 if (ferror(f1)) {
474 warn("%s", fn1);
475 return (2);
476 }
477 if (ferror(f2)) {
478 warn("%s", fn2);
479 return (2);
480 }
481 if (ch1 != EOF || ch2 != EOF) {
482 printf("Binary files %s and %s differ\n", fn1, fn2);
483 return (1);
484 }
485 return (0);
486 }
487
488 /*
489 * Checks whether a file appears to be a text file.
490 */
491 static int
istextfile(FILE * f)492 istextfile(FILE *f)
493 {
494 int ch, i;
495
496 if (f == NULL)
497 return (1);
498 rewind(f);
499 for (i = 0; i <= MAX_CHECK; i++) {
500 ch = fgetc(f);
501 if (ch == '\0') {
502 rewind(f);
503 return (0);
504 }
505 if (ch == EOF)
506 break;
507 }
508 rewind(f);
509 return (1);
510 }
511
512 /*
513 * Prints an individual column (left or right), taking into account
514 * that tabs are variable-width. Takes a string, the current column
515 * the cursor is on the screen, and the maximum value of the column.
516 * The column value is updated as we go along.
517 */
518 static void
printcol(const char * s,size_t * col,const size_t col_max)519 printcol(const char *s, size_t *col, const size_t col_max)
520 {
521
522 for (; *s && *col < col_max; ++s) {
523 size_t new_col;
524
525 switch (*s) {
526 case '\t':
527 /*
528 * If rounding to next multiple of eight causes
529 * an integer overflow, just return.
530 */
531 if (*col > SIZE_MAX - tabsize)
532 return;
533
534 /* Round to next multiple of eight. */
535 new_col = (*col / tabsize + 1) * tabsize;
536
537 /*
538 * If printing the tab goes past the column
539 * width, don't print it and just quit.
540 */
541 if (new_col > col_max)
542 return;
543
544 if (tflag) {
545 do {
546 putchar(' ');
547 } while (++*col < new_col);
548 } else {
549 putchar(*s);
550 *col = new_col;
551 }
552 break;
553 default:
554 ++*col;
555 putchar(*s);
556 }
557 }
558 }
559
560 /*
561 * Prompts user to either choose between two strings or edit one, both,
562 * or neither.
563 */
564 static void
prompt(const char * s1,const char * s2)565 prompt(const char *s1, const char *s2)
566 {
567 char *cmd;
568
569 /* Print command prompt. */
570 putchar('%');
571
572 /* Get user input. */
573 for (; (cmd = xfgets(stdin)); free(cmd)) {
574 const char *p;
575
576 /* Skip leading whitespace. */
577 for (p = cmd; isspace((unsigned char)*p); ++p)
578 ;
579 switch (*p) {
580 case 'e':
581 /* Skip `e'. */
582 ++p;
583 if (eparse(p, s1, s2) == -1)
584 goto USAGE;
585 break;
586 case 'l':
587 case '1':
588 /* Choose left column as-is. */
589 if (s1 != NULL)
590 fprintf(outfp, "%s\n", s1);
591 /* End of command parsing. */
592 break;
593 case 'q':
594 goto QUIT;
595 case 'r':
596 case '2':
597 /* Choose right column as-is. */
598 if (s2 != NULL)
599 fprintf(outfp, "%s\n", s2);
600 /* End of command parsing. */
601 break;
602 case 's':
603 sflag = true;
604 goto PROMPT;
605 case 'v':
606 sflag = false;
607 /* FALLTHROUGH */
608 default:
609 /* Interactive usage help. */
610 USAGE:
611 int_usage();
612 PROMPT:
613 putchar('%');
614
615 /* Prompt user again. */
616 continue;
617 }
618 free(cmd);
619 return;
620 }
621
622 /*
623 * If there was no error, we received an EOF from stdin, so we
624 * should quit.
625 */
626 QUIT:
627 fclose(outfp);
628 exit(0);
629 }
630
631 /*
632 * Takes two strings, separated by a column divider. NULL strings are
633 * treated as empty columns. If the divider is the ` ' character, the
634 * second column is not printed (-l flag). In this case, the second
635 * string must be NULL. When the second column is NULL, the divider
636 * does not print the trailing space following the divider character.
637 *
638 * Takes into account that tabs can take multiple columns.
639 */
640 static void
println(const char * s1,const char divider,const char * s2)641 println(const char *s1, const char divider, const char *s2)
642 {
643 size_t col;
644
645 /* Print first column. Skips if s1 == NULL. */
646 col = 0;
647 if (s1) {
648 /* Skip angle bracket and space. */
649 printcol(s1, &col, width);
650
651 }
652
653 /* Otherwise, we pad this column up to width. */
654 for (; col < width; ++col)
655 putchar(' ');
656
657 /* Only print left column. */
658 if (divider == ' ' && !s2) {
659 printf(" (\n");
660 return;
661 }
662
663 /*
664 * Print column divider. If there is no second column, we don't
665 * need to add the space for padding.
666 */
667 if (!s2) {
668 printf(" %c\n", divider);
669 return;
670 }
671 printf(" %c ", divider);
672 col += 3;
673
674 /* Skip angle bracket and space. */
675 printcol(s2, &col, line_width);
676
677 putchar('\n');
678 }
679
680 /*
681 * Reads a line from file and returns as a string. If EOF is reached,
682 * NULL is returned. The returned string must be freed afterwards.
683 */
684 static char *
xfgets(FILE * file)685 xfgets(FILE *file)
686 {
687 size_t linecap;
688 ssize_t l;
689 char *s;
690
691 clearerr(file);
692 linecap = 0;
693 s = NULL;
694
695 if ((l = getline(&s, &linecap, file)) == -1) {
696 if (ferror(file))
697 err(2, "error reading file");
698 return (NULL);
699 }
700
701 if (s[l-1] == '\n')
702 s[l-1] = '\0';
703
704 return (s);
705 }
706
707 /*
708 * Parse ed commands from diffpipe and print lines from file1 (lines
709 * to change or delete) or file2 (lines to add or change).
710 * Returns EOF or 0.
711 */
712 static int
parsecmd(FILE * diffpipe,FILE * file1,FILE * file2)713 parsecmd(FILE *diffpipe, FILE *file1, FILE *file2)
714 {
715 size_t file1start, file1end, file2start, file2end, n;
716 /* ed command line and pointer to characters in line */
717 char *line, *p, *q;
718 const char *errstr;
719 char c, cmd;
720
721 /* Read ed command. */
722 if (!(line = xfgets(diffpipe)))
723 return (EOF);
724
725 p = line;
726 /* Go to character after line number. */
727 while (isdigit((unsigned char)*p))
728 ++p;
729 c = *p;
730 *p++ = 0;
731 file1start = strtonum(line, 0, INT_MAX, &errstr);
732 if (errstr)
733 errx(2, "file1 start is %s: %s", errstr, line);
734
735 /* A range is specified for file1. */
736 if (c == ',') {
737 q = p;
738 /* Go to character after file2end. */
739 while (isdigit((unsigned char)*p))
740 ++p;
741 c = *p;
742 *p++ = 0;
743 file1end = strtonum(q, 0, INT_MAX, &errstr);
744 if (errstr)
745 errx(2, "file1 end is %s: %s", errstr, line);
746 if (file1start > file1end)
747 errx(2, "invalid line range in file1: %s", line);
748 } else
749 file1end = file1start;
750
751 cmd = c;
752 /* Check that cmd is valid. */
753 if (!(cmd == 'a' || cmd == 'c' || cmd == 'd'))
754 errx(2, "ed command not recognized: %c: %s", cmd, line);
755
756 q = p;
757 /* Go to character after line number. */
758 while (isdigit((unsigned char)*p))
759 ++p;
760 c = *p;
761 *p++ = 0;
762 file2start = strtonum(q, 0, INT_MAX, &errstr);
763 if (errstr)
764 errx(2, "file2 start is %s: %s", errstr, line);
765
766 /*
767 * There should either be a comma signifying a second line
768 * number or the line should just end here.
769 */
770 if (c != ',' && c != '\0')
771 errx(2, "invalid line range in file2: %c: %s", c, line);
772
773 if (c == ',') {
774
775 file2end = strtonum(p, 0, INT_MAX, &errstr);
776 if (errstr)
777 errx(2, "file2 end is %s: %s", errstr, line);
778 if (file2start >= file2end)
779 errx(2, "invalid line range in file2: %s", line);
780 } else
781 file2end = file2start;
782
783 /* Appends happen _after_ stated line. */
784 if (cmd == 'a') {
785 if (file1start != file1end)
786 errx(2, "append cannot have a file1 range: %s",
787 line);
788 if (file1start == SIZE_MAX)
789 errx(2, "file1 line range too high: %s", line);
790 file1start = ++file1end;
791 }
792 /*
793 * I'm not sure what the deal is with the line numbers for
794 * deletes, though.
795 */
796 else if (cmd == 'd') {
797 if (file2start != file2end)
798 errx(2, "delete cannot have a file2 range: %s",
799 line);
800 if (file2start == SIZE_MAX)
801 errx(2, "file2 line range too high: %s", line);
802 file2start = ++file2end;
803 }
804
805 /*
806 * Continue reading file1 and file2 until we reach line numbers
807 * specified by diff. Should only happen with -I flag.
808 */
809 for (; file1ln < file1start && file2ln < file2start;
810 ++file1ln, ++file2ln) {
811 char *s1, *s2;
812
813 if (!(s1 = xfgets(file1)))
814 errx(2, "file1 shorter than expected");
815 if (!(s2 = xfgets(file2)))
816 errx(2, "file2 shorter than expected");
817
818 /* If the -l flag was specified, print only left column. */
819 if (lflag) {
820 free(s2);
821 /*
822 * XXX - If -l and -I are both specified, all
823 * unchanged or ignored lines are shown with a
824 * `(' divider. This matches GNU sdiff, but I
825 * believe it is a bug. Just check out:
826 * gsdiff -l -I '^$' samefile samefile.
827 */
828 if (Iflag)
829 enqueue(s1, '(', NULL);
830 else
831 enqueue(s1, ' ', NULL);
832 } else
833 enqueue(s1, ' ', s2);
834 }
835 /* Ignore deleted lines. */
836 for (; file1ln < file1start; ++file1ln) {
837 char *s;
838
839 if (!(s = xfgets(file1)))
840 errx(2, "file1 shorter than expected");
841
842 enqueue(s, '(', NULL);
843 }
844 /* Ignore added lines. */
845 for (; file2ln < file2start; ++file2ln) {
846 char *s;
847
848 if (!(s = xfgets(file2)))
849 errx(2, "file2 shorter than expected");
850
851 /* If -l flag was given, don't print right column. */
852 if (lflag)
853 free(s);
854 else
855 enqueue(NULL, ')', s);
856 }
857
858 /* Process unmodified or skipped lines. */
859 processq();
860
861 switch (cmd) {
862 case 'a':
863 printa(file2, file2end);
864 n = file2end - file2start + 1;
865 break;
866 case 'c':
867 printc(file1, file1end, file2, file2end);
868 n = file1end - file1start + 1 + 1 + file2end - file2start + 1;
869 break;
870 case 'd':
871 printd(file1, file1end);
872 n = file1end - file1start + 1;
873 break;
874 default:
875 errx(2, "invalid diff command: %c: %s", cmd, line);
876 }
877 free(line);
878
879 /* Skip to next ed line. */
880 while (n--) {
881 if (!(line = xfgets(diffpipe)))
882 errx(2, "diff ended early");
883 free(line);
884 }
885
886 return (0);
887 }
888
889 /*
890 * Queues up a diff line.
891 */
892 static void
enqueue(char * left,char divider,char * right)893 enqueue(char *left, char divider, char *right)
894 {
895 struct diffline *diffp;
896
897 if (!(diffp = malloc(sizeof(struct diffline))))
898 err(2, "enqueue");
899 diffp->left = left;
900 diffp->div = divider;
901 diffp->right = right;
902 STAILQ_INSERT_TAIL(&diffhead, diffp, diffentries);
903 }
904
905 /*
906 * Free a diffline structure and its elements.
907 */
908 static void
freediff(struct diffline * diffp)909 freediff(struct diffline *diffp)
910 {
911
912 free(diffp->left);
913 free(diffp->right);
914 free(diffp);
915 }
916
917 /*
918 * Append second string into first. Repeated appends to the same string
919 * are cached, making this an O(n) function, where n = strlen(append).
920 */
921 static void
astrcat(char ** s,const char * append)922 astrcat(char **s, const char *append)
923 {
924 /* Length of string in previous run. */
925 static size_t offset = 0;
926 size_t newsiz;
927 /*
928 * String from previous run. Compared to *s to see if we are
929 * dealing with the same string. If so, we can use offset.
930 */
931 static const char *oldstr = NULL;
932 char *newstr;
933
934 /*
935 * First string is NULL, so just copy append.
936 */
937 if (!*s) {
938 if (!(*s = strdup(append)))
939 err(2, "astrcat");
940
941 /* Keep track of string. */
942 offset = strlen(*s);
943 oldstr = *s;
944
945 return;
946 }
947
948 /*
949 * *s is a string so concatenate.
950 */
951
952 /* Did we process the same string in the last run? */
953 /*
954 * If this is a different string from the one we just processed
955 * cache new string.
956 */
957 if (oldstr != *s) {
958 offset = strlen(*s);
959 oldstr = *s;
960 }
961
962 /* Size = strlen(*s) + \n + strlen(append) + '\0'. */
963 newsiz = offset + 1 + strlen(append) + 1;
964
965 /* Resize *s to fit new string. */
966 newstr = realloc(*s, newsiz);
967 if (newstr == NULL)
968 err(2, "astrcat");
969 *s = newstr;
970
971 /* *s + offset should be end of string. */
972 /* Concatenate. */
973 strlcpy(*s + offset, "\n", newsiz - offset);
974 strlcat(*s + offset, append, newsiz - offset);
975
976 /* New string length should be exactly newsiz - 1 characters. */
977 /* Store generated string's values. */
978 offset = newsiz - 1;
979 oldstr = *s;
980 }
981
982 /*
983 * Process diff set queue, printing, prompting, and saving each diff
984 * line stored in queue.
985 */
986 static void
processq(void)987 processq(void)
988 {
989 struct diffline *diffp;
990 char divc, *left, *right;
991
992 /* Don't process empty queue. */
993 if (STAILQ_EMPTY(&diffhead))
994 return;
995
996 /* Remember the divider. */
997 divc = STAILQ_FIRST(&diffhead)->div;
998
999 left = NULL;
1000 right = NULL;
1001 /*
1002 * Go through set of diffs, concatenating each line in left or
1003 * right column into two long strings, `left' and `right'.
1004 */
1005 STAILQ_FOREACH(diffp, &diffhead, diffentries) {
1006 /*
1007 * Print changed lines if -s was given,
1008 * print all lines if -s was not given.
1009 */
1010 if (!sflag || diffp->div == '|' || diffp->div == '<' ||
1011 diffp->div == '>')
1012 println(diffp->left, diffp->div, diffp->right);
1013
1014 /* Append new lines to diff set. */
1015 if (diffp->left)
1016 astrcat(&left, diffp->left);
1017 if (diffp->right)
1018 astrcat(&right, diffp->right);
1019 }
1020
1021 /* Empty queue and free each diff line and its elements. */
1022 while (!STAILQ_EMPTY(&diffhead)) {
1023 diffp = STAILQ_FIRST(&diffhead);
1024 STAILQ_REMOVE_HEAD(&diffhead, diffentries);
1025 freediff(diffp);
1026 }
1027
1028 /* Write to outfp, prompting user if lines are different. */
1029 if (outfp)
1030 switch (divc) {
1031 case ' ': case '(': case ')':
1032 fprintf(outfp, "%s\n", left);
1033 break;
1034 case '|': case '<': case '>':
1035 prompt(left, right);
1036 break;
1037 default:
1038 errx(2, "invalid divider: %c", divc);
1039 }
1040
1041 /* Free left and right. */
1042 free(left);
1043 free(right);
1044 }
1045
1046 /*
1047 * Print lines following an (a)ppend command.
1048 */
1049 static void
printa(FILE * file,size_t line2)1050 printa(FILE *file, size_t line2)
1051 {
1052 char *line;
1053
1054 for (; file2ln <= line2; ++file2ln) {
1055 if (!(line = xfgets(file)))
1056 errx(2, "append ended early");
1057 enqueue(NULL, '>', line);
1058 }
1059 processq();
1060 }
1061
1062 /*
1063 * Print lines following a (c)hange command, from file1ln to file1end
1064 * and from file2ln to file2end.
1065 */
1066 static void
printc(FILE * file1,size_t file1end,FILE * file2,size_t file2end)1067 printc(FILE *file1, size_t file1end, FILE *file2, size_t file2end)
1068 {
1069 struct fileline {
1070 STAILQ_ENTRY(fileline) fileentries;
1071 char *line;
1072 };
1073 STAILQ_HEAD(, fileline) delqhead = STAILQ_HEAD_INITIALIZER(delqhead);
1074
1075 /* Read lines to be deleted. */
1076 for (; file1ln <= file1end; ++file1ln) {
1077 struct fileline *linep;
1078 char *line1;
1079
1080 /* Read lines from both. */
1081 if (!(line1 = xfgets(file1)))
1082 errx(2, "error reading file1 in delete in change");
1083
1084 /* Add to delete queue. */
1085 if (!(linep = malloc(sizeof(struct fileline))))
1086 err(2, "printc");
1087 linep->line = line1;
1088 STAILQ_INSERT_TAIL(&delqhead, linep, fileentries);
1089 }
1090
1091 /* Process changed lines.. */
1092 for (; !STAILQ_EMPTY(&delqhead) && file2ln <= file2end;
1093 ++file2ln) {
1094 struct fileline *del;
1095 char *add;
1096
1097 /* Get add line. */
1098 if (!(add = xfgets(file2)))
1099 errx(2, "error reading add in change");
1100
1101 del = STAILQ_FIRST(&delqhead);
1102 enqueue(del->line, '|', add);
1103 STAILQ_REMOVE_HEAD(&delqhead, fileentries);
1104 /*
1105 * Free fileline structure but not its elements since
1106 * they are queued up.
1107 */
1108 free(del);
1109 }
1110 processq();
1111
1112 /* Process remaining lines to add. */
1113 for (; file2ln <= file2end; ++file2ln) {
1114 char *add;
1115
1116 /* Get add line. */
1117 if (!(add = xfgets(file2)))
1118 errx(2, "error reading add in change");
1119
1120 enqueue(NULL, '>', add);
1121 }
1122 processq();
1123
1124 /* Process remaining lines to delete. */
1125 while (!STAILQ_EMPTY(&delqhead)) {
1126 struct fileline *filep;
1127
1128 filep = STAILQ_FIRST(&delqhead);
1129 enqueue(filep->line, '<', NULL);
1130 STAILQ_REMOVE_HEAD(&delqhead, fileentries);
1131 free(filep);
1132 }
1133 processq();
1134 }
1135
1136 /*
1137 * Print deleted lines from file, from file1ln to file1end.
1138 */
1139 static void
printd(FILE * file1,size_t file1end)1140 printd(FILE *file1, size_t file1end)
1141 {
1142 char *line1;
1143
1144 /* Print out lines file1ln to line2. */
1145 for (; file1ln <= file1end; ++file1ln) {
1146 if (!(line1 = xfgets(file1)))
1147 errx(2, "file1 ended early in delete");
1148 enqueue(line1, '<', NULL);
1149 }
1150 processq();
1151 }
1152
1153 /*
1154 * Interactive mode usage.
1155 */
1156 static void
int_usage(void)1157 int_usage(void)
1158 {
1159
1160 puts("e:\tedit blank diff\n"
1161 "eb:\tedit both diffs concatenated\n"
1162 "el:\tedit left diff\n"
1163 "er:\tedit right diff\n"
1164 "l | 1:\tchoose left diff\n"
1165 "r | 2:\tchoose right diff\n"
1166 "s:\tsilent mode--don't print identical lines\n"
1167 "v:\tverbose mode--print identical lines\n"
1168 "q:\tquit");
1169 }
1170
1171 static void
usage(void)1172 usage(void)
1173 {
1174
1175 fprintf(stderr,
1176 "usage: sdiff [-abdilstHW] [-I regexp] [-o outfile] [-w width] file1"
1177 " file2\n");
1178 exit(2);
1179 }
1180