xref: /freebsd-14-stable/usr.bin/sdiff/sdiff.c (revision 094f58809682a693918efe6aba5d33d6dae33c25)
1 /*	$OpenBSD: sdiff.c,v 1.36 2015/12/29 19:04:46 gsoares Exp $ */
2 
3 /*
4  * Written by Raymond Lai <ray@cyth.net>.
5  * Public domain.
6  */
7 
8 #include <sys/cdefs.h>
9 #include <sys/param.h>
10 #include <sys/queue.h>
11 #include <sys/stat.h>
12 #include <sys/wait.h>
13 
14 #include <ctype.h>
15 #include <err.h>
16 #include <errno.h>
17 #include <fcntl.h>
18 #include <getopt.h>
19 #include <limits.h>
20 #include <paths.h>
21 #include <stdbool.h>
22 #include <stdint.h>
23 #include <stdio.h>
24 #include <stdlib.h>
25 #include <string.h>
26 #include <unistd.h>
27 
28 #include "extern.h"
29 
30 static char diff_path[] = "/usr/bin/diff";
31 
32 #define WIDTH 126
33 /*
34  * Each column must be at least one character wide, plus three
35  * characters between the columns (space, [<|>], space).
36  */
37 #define WIDTH_MIN 5
38 
39 /* 3 kilobytes of chars */
40 #define MAX_CHECK 768
41 
42 /* A single diff line. */
43 struct diffline {
44 	STAILQ_ENTRY(diffline) diffentries;
45 	char	*left;
46 	char	 div;
47 	char	*right;
48 };
49 
50 static void astrcat(char **, const char *);
51 static void enqueue(char *, char, char *);
52 static char *mktmpcpy(const char *);
53 static int istextfile(FILE *);
54 static int bindiff(FILE *, char *, FILE *, char *);
55 static void freediff(struct diffline *);
56 static void int_usage(void);
57 static int parsecmd(FILE *, FILE *, FILE *);
58 static void printa(FILE *, size_t);
59 static void printc(FILE *, size_t, FILE *, size_t);
60 static void printcol(const char *, size_t *, const size_t);
61 static void printd(FILE *, size_t);
62 static void println(const char *, const char, const char *);
63 static void processq(void);
64 static void prompt(const char *, const char *);
65 static void usage(void) __dead2;
66 static char *xfgets(FILE *);
67 
68 static STAILQ_HEAD(, diffline) diffhead = STAILQ_HEAD_INITIALIZER(diffhead);
69 static size_t line_width;	/* width of a line (two columns and divider) */
70 static size_t width;		/* width of each column */
71 static size_t file1ln, file2ln;	/* line number of file1 and file2 */
72 static bool Iflag;		/* ignore sets matching regexp */
73 static bool lflag;		/* print only left column for identical lines */
74 static bool sflag;		/* skip identical lines */
75 static bool tflag;		/* expand tabs */
76 static int tabsize = 8;		/* tab size */
77 FILE *outfp;			/* file to save changes to */
78 const char *tmpdir;		/* TMPDIR or /tmp */
79 
80 enum {
81 	HELP_OPT = CHAR_MAX + 1,
82 	NORMAL_OPT,
83 	FCASE_SENSITIVE_OPT,
84 	FCASE_IGNORE_OPT,
85 	STRIPCR_OPT,
86 	TSIZE_OPT,
87 	DIFFPROG_OPT,
88 };
89 
90 static struct option longopts[] = {
91 	/* options only processed in sdiff */
92 	{ "suppress-common-lines",	no_argument,		NULL,	's' },
93 	{ "width",			required_argument,	NULL,	'w' },
94 
95 	{ "output",			required_argument,	NULL,	'o' },
96 	{ "diff-program",		required_argument,	NULL,	DIFFPROG_OPT },
97 
98 	/* Options processed by diff. */
99 	{ "ignore-file-name-case",	no_argument,		NULL,	FCASE_IGNORE_OPT },
100 	{ "no-ignore-file-name-case",	no_argument,		NULL,	FCASE_SENSITIVE_OPT },
101 	{ "strip-trailing-cr",		no_argument,		NULL,	STRIPCR_OPT },
102 	{ "tabsize",			required_argument,	NULL,	TSIZE_OPT },
103 	{ "help",			no_argument,		NULL,	HELP_OPT },
104 	{ "text",			no_argument,		NULL,	'a' },
105 	{ "ignore-blank-lines",		no_argument,		NULL,	'B' },
106 	{ "ignore-space-change",	no_argument,		NULL,	'b' },
107 	{ "minimal",			no_argument,		NULL,	'd' },
108 	{ "ignore-tab-expansion",	no_argument,		NULL,	'E' },
109 	{ "ignore-matching-lines",	required_argument,	NULL,	'I' },
110 	{ "ignore-case",		no_argument,		NULL,	'i' },
111 	{ "left-column",		no_argument,		NULL,	'l' },
112 	{ "expand-tabs",		no_argument,		NULL,	't' },
113 	{ "speed-large-files",		no_argument,		NULL,	'H' },
114 	{ "ignore-all-space",		no_argument,		NULL,	'W' },
115 
116 	{ NULL,				0,			NULL,	'\0'}
117 };
118 
119 static const char *help_msg[] = {
120 	"usage: sdiff [-abdilstW] [-I regexp] [-o outfile] [-w width] file1 file2\n",
121 	"-l, --left-column: only print the left column for identical lines.",
122 	"-o OUTFILE, --output=OUTFILE: interactively merge file1 and file2 into outfile.",
123 	"-s, --suppress-common-lines: skip identical lines.",
124 	"-w WIDTH, --width=WIDTH: print a maximum of WIDTH characters on each line.",
125 	"",
126 	"Options passed to diff(1) are:",
127 	"\t-a, --text: treat file1 and file2 as text files.",
128 	"\t-b, --ignore-trailing-cr: ignore trailing blank spaces.",
129 	"\t-d, --minimal: minimize diff size.",
130 	"\t-I RE, --ignore-matching-lines=RE: ignore changes whose line matches RE.",
131 	"\t-i, --ignore-case: do a case-insensitive comparison.",
132 	"\t-t, --expand-tabs: expand tabs to spaces.",
133 	"\t-W, --ignore-all-space: ignore all whitespace.",
134 	"\t--speed-large-files: assume large file with scattered changes.",
135 	"\t--strip-trailing-cr: strip trailing carriage return.",
136 	"\t--ignore-file-name-case: ignore case of file names.",
137 	"\t--no-ignore-file-name-case: do not ignore file name case",
138 	"\t--tabsize NUM: change size of tabs (default 8.)",
139 
140 	NULL,
141 };
142 
143 /*
144  * Create temporary file if source_file is not a regular file.
145  * Returns temporary file name if one was malloced, NULL if unnecessary.
146  */
147 static char *
mktmpcpy(const char * source_file)148 mktmpcpy(const char *source_file)
149 {
150 	struct stat sb;
151 	ssize_t rcount;
152 	int ifd, ofd;
153 	u_char buf[BUFSIZ];
154 	char *target_file;
155 
156 	/* Open input and output. */
157 	ifd = open(source_file, O_RDONLY, 0);
158 	/* File was opened successfully. */
159 	if (ifd != -1) {
160 		if (fstat(ifd, &sb) == -1)
161 			err(2, "error getting file status from %s", source_file);
162 
163 		/* Regular file. */
164 		if (S_ISREG(sb.st_mode)) {
165 			close(ifd);
166 			return (NULL);
167 		}
168 	} else {
169 		/* If ``-'' does not exist the user meant stdin. */
170 		if (errno == ENOENT && strcmp(source_file, "-") == 0)
171 			ifd = STDIN_FILENO;
172 		else
173 			err(2, "error opening %s", source_file);
174 	}
175 
176 	/* Not a regular file, so copy input into temporary file. */
177 	if (asprintf(&target_file, "%s/sdiff.XXXXXXXXXX", tmpdir) == -1)
178 		err(2, "asprintf");
179 	if ((ofd = mkstemp(target_file)) == -1) {
180 		warn("error opening %s", target_file);
181 		goto FAIL;
182 	}
183 	while ((rcount = read(ifd, buf, sizeof(buf))) != -1 &&
184 	    rcount != 0) {
185 		ssize_t wcount;
186 
187 		wcount = write(ofd, buf, (size_t)rcount);
188 		if (-1 == wcount || rcount != wcount) {
189 			warn("error writing to %s", target_file);
190 			goto FAIL;
191 		}
192 	}
193 	if (rcount == -1) {
194 		warn("error reading from %s", source_file);
195 		goto FAIL;
196 	}
197 
198 	close(ifd);
199 	close(ofd);
200 
201 	return (target_file);
202 
203 FAIL:
204 	unlink(target_file);
205 	exit(2);
206 }
207 
208 int
main(int argc,char ** argv)209 main(int argc, char **argv)
210 {
211 	FILE *diffpipe, *file1, *file2;
212 	size_t diffargc = 0, flagc = 0, wval = WIDTH;
213 	int ch, fd[2], i, ret, status;
214 	pid_t pid;
215 	const char *errstr, *outfile = NULL;
216 	char **diffargv, *diffprog = diff_path, *flagv;
217 	char *filename1, *filename2, *tmp1, *tmp2, *s1, *s2;
218 	char I_arg[] = "-I";
219 	char speed_lf[] = "--speed-large-files";
220 
221 	/*
222 	 * Process diff flags.
223 	 */
224 	/*
225 	 * Allocate memory for diff arguments and NULL.
226 	 * Each flag has at most one argument, so doubling argc gives an
227 	 * upper limit of how many diff args can be passed.  argv[0],
228 	 * file1, and file2 won't have arguments so doubling them will
229 	 * waste some memory; however we need an extra space for the
230 	 * NULL at the end, so it sort of works out.
231 	 */
232 	if ((diffargv = calloc(argc, sizeof(char *) * 2)) == NULL)
233 		err(2, NULL);
234 
235 	/* Add first argument, the program name. */
236 	diffargv[diffargc++] = diffprog;
237 
238 	/* create a dynamic string for merging single-character options */
239 	if ((flagv = malloc(flagc + 2)) == NULL)
240 		err(2, NULL);
241 	flagv[flagc] = '-';
242 	flagv[flagc + 1] = '\0';
243 	diffargv[diffargc++] = flagv;
244 
245 	while ((ch = getopt_long(argc, argv, "aBbdEHI:ilo:stWw:",
246 	    longopts, NULL)) != -1) {
247 		switch (ch) {
248 		/* only compatible --long-name-form with diff */
249 		case FCASE_IGNORE_OPT:
250 		case FCASE_SENSITIVE_OPT:
251 		case STRIPCR_OPT:
252 		case 'S':
253 		break;
254 		/* combine no-arg single switches */
255 		case 'a':
256 		case 'B':
257 		case 'b':
258 		case 'd':
259 		case 'E':
260 		case 'i':
261 		case 'W':
262 			flagc++;
263 			flagv = realloc(flagv, flagc + 2);
264 			/*
265 			 * In diff, the 'W' option is 'w' and the 'w' is 'W'.
266 			 */
267 			flagv[flagc] = ch == 'W' ? 'w' : ch;
268 			flagv[flagc + 1] = '\0';
269 			break;
270 		case 'H':
271 			diffargv[diffargc++] = speed_lf;
272 			break;
273 		case DIFFPROG_OPT:
274 			diffargv[0] = diffprog = optarg;
275 			break;
276 		case 'I':
277 			Iflag = true;
278 			diffargv[diffargc++] = I_arg;
279 			diffargv[diffargc++] = optarg;
280 			break;
281 		case 'l':
282 			lflag = true;
283 			break;
284 		case 'o':
285 			outfile = optarg;
286 			break;
287 		case 's':
288 			sflag = true;
289 			break;
290 		case 't':
291 			tflag = true;
292 			break;
293 		case 'w':
294 			wval = strtonum(optarg, WIDTH_MIN,
295 			    INT_MAX, &errstr);
296 			if (errstr)
297 				errx(2, "width is %s: %s", errstr, optarg);
298 			break;
299 		case HELP_OPT:
300 			for (i = 0; help_msg[i] != NULL; i++)
301 				printf("%s\n", help_msg[i]);
302 			exit(0);
303 			break;
304 		case TSIZE_OPT:
305 			tabsize = strtonum(optarg, 1, INT_MAX, &errstr);
306 			if (errstr)
307 				errx(2, "tabsize is %s: %s", errstr, optarg);
308 			break;
309 		default:
310 			usage();
311 			break;
312 		}
313 	}
314 
315 	/* no single-character options were used */
316 	if (flagc == 0) {
317 		memmove(diffargv + 1, diffargv + 2,
318 		    sizeof(char *) * (diffargc - 2));
319 		diffargc--;
320 		free(flagv);
321 	}
322 
323 	argc -= optind;
324 	argv += optind;
325 
326 	if (argc != 2)
327 		usage();
328 
329 	if (outfile && (outfp = fopen(outfile, "w")) == NULL)
330 		err(2, "could not open: %s", optarg);
331 
332 	if ((tmpdir = getenv("TMPDIR")) == NULL || *tmpdir == '\0')
333 		tmpdir = _PATH_TMP;
334 
335 	filename1 = argv[0];
336 	filename2 = argv[1];
337 
338 	/*
339 	 * Create temporary files for diff and sdiff to share if file1
340 	 * or file2 are not regular files.  This allows sdiff and diff
341 	 * to read the same inputs if one or both inputs are stdin.
342 	 *
343 	 * If any temporary files were created, their names would be
344 	 * saved in tmp1 or tmp2.  tmp1 should never equal tmp2.
345 	 */
346 	tmp1 = tmp2 = NULL;
347 	/* file1 and file2 are the same, so copy to same temp file. */
348 	if (strcmp(filename1, filename2) == 0) {
349 		if ((tmp1 = mktmpcpy(filename1)))
350 			filename1 = filename2 = tmp1;
351 	/* Copy file1 and file2 into separate temp files. */
352 	} else {
353 		if ((tmp1 = mktmpcpy(filename1)))
354 			filename1 = tmp1;
355 		if ((tmp2 = mktmpcpy(filename2)))
356 			filename2 = tmp2;
357 	}
358 
359 	if ((file1 = fopen(filename1, "r")) == NULL)
360 		err(2, "could not open %s", filename1);
361 	if ((file2 = fopen(filename2, "r")) == NULL)
362 		err(2, "could not open %s", filename2);
363 	if (!istextfile(file1) || !istextfile(file2)) {
364 		ret = bindiff(file1, filename1, file2, filename2);
365 		goto done;
366 	}
367 
368 	diffargv[diffargc++] = filename1;
369 	diffargv[diffargc++] = filename2;
370 	/* Add NULL to end of array to indicate end of array. */
371 	diffargv[diffargc++] = NULL;
372 
373 	/* Subtract column divider and divide by two. */
374 	width = (wval - 3) / 2;
375 	/* Make sure line_width can fit in size_t. */
376 	if (width > (SIZE_MAX - 3) / 2)
377 		errx(2, "width is too large: %zu", width);
378 	line_width = width * 2 + 3;
379 
380 	if (pipe(fd))
381 		err(2, "pipe");
382 
383 	if ((pid = fork()) < 0)
384 		err(1, "fork()");
385 	if (pid == 0) {
386 		/* child */
387 		/* We don't read from the pipe. */
388 		close(fd[0]);
389 		if (dup2(fd[1], STDOUT_FILENO) != STDOUT_FILENO)
390 			_exit(2);
391 		/* Free unused descriptor. */
392 		close(fd[1]);
393 		execvp(diffprog, diffargv);
394 		_exit(2);
395 	}
396 
397 	/* parent */
398 	/* We don't write to the pipe. */
399 	close(fd[1]);
400 
401 	/* Open pipe to diff command. */
402 	if ((diffpipe = fdopen(fd[0], "r")) == NULL)
403 		err(2, "could not open diff pipe");
404 
405 	/* Line numbers start at one. */
406 	file1ln = file2ln = 1;
407 
408 	/* Read and parse diff output. */
409 	while (parsecmd(diffpipe, file1, file2) != EOF)
410 		;
411 	fclose(diffpipe);
412 
413 	/* Wait for diff to exit. */
414 	if (waitpid(pid, &status, 0) == -1 || !WIFEXITED(status) ||
415 	    WEXITSTATUS(status) >= 2)
416 		errx(2, "diff exited abnormally");
417 	ret = WEXITSTATUS(status);
418 
419 	/* No more diffs, so enqueue common lines. */
420 	if (lflag)
421 		while ((s1 = xfgets(file1)))
422 			enqueue(s1, ' ', NULL);
423 	else
424 		for (;;) {
425 			s1 = xfgets(file1);
426 			s2 = xfgets(file2);
427 			if (s1 || s2)
428 				enqueue(s1, ' ', s2);
429 			else
430 				break;
431 		}
432 	fclose(file1);
433 	fclose(file2);
434 	/* Process unmodified lines. */
435 	processq();
436 
437 done:
438 	/* Delete and free unneeded temporary files. */
439 	if (tmp1 != NULL) {
440 		if (unlink(tmp1) != 0)
441 			warn("failed to delete %s", tmp1);
442 		free(tmp1);
443 	}
444 	if (tmp2 != NULL) {
445 		if (unlink(tmp2) != 0)
446 			warn("failed to delete %s", tmp2);
447 		free(tmp2);
448 	}
449 
450 	/* Return diff exit status. */
451 	free(diffargv);
452 	if (flagc > 0)
453 		free(flagv);
454 	return (ret);
455 }
456 
457 /*
458  * When sdiff detects a binary file as input.
459  */
460 static int
bindiff(FILE * f1,char * fn1,FILE * f2,char * fn2)461 bindiff(FILE *f1, char *fn1, FILE *f2, char *fn2)
462 {
463 	int ch1, ch2;
464 
465 	flockfile(f1);
466 	flockfile(f2);
467 	do {
468 		ch1 = getc_unlocked(f1);
469 		ch2 = getc_unlocked(f2);
470 	} while (ch1 != EOF && ch2 != EOF && ch1 == ch2);
471 	funlockfile(f2);
472 	funlockfile(f1);
473 	if (ferror(f1)) {
474 		warn("%s", fn1);
475 		return (2);
476 	}
477 	if (ferror(f2)) {
478 		warn("%s", fn2);
479 		return (2);
480 	}
481 	if (ch1 != EOF || ch2 != EOF) {
482 		printf("Binary files %s and %s differ\n", fn1, fn2);
483 		return (1);
484 	}
485 	return (0);
486 }
487 
488 /*
489  * Checks whether a file appears to be a text file.
490  */
491 static int
istextfile(FILE * f)492 istextfile(FILE *f)
493 {
494 	int	ch, i;
495 
496 	if (f == NULL)
497 		return (1);
498 	rewind(f);
499 	for (i = 0; i <= MAX_CHECK; i++) {
500 		ch = fgetc(f);
501 		if (ch == '\0') {
502 			rewind(f);
503 			return (0);
504 		}
505 		if (ch == EOF)
506 			break;
507 	}
508 	rewind(f);
509 	return (1);
510 }
511 
512 /*
513  * Prints an individual column (left or right), taking into account
514  * that tabs are variable-width.  Takes a string, the current column
515  * the cursor is on the screen, and the maximum value of the column.
516  * The column value is updated as we go along.
517  */
518 static void
printcol(const char * s,size_t * col,const size_t col_max)519 printcol(const char *s, size_t *col, const size_t col_max)
520 {
521 
522 	for (; *s && *col < col_max; ++s) {
523 		size_t new_col;
524 
525 		switch (*s) {
526 		case '\t':
527 			/*
528 			 * If rounding to next multiple of eight causes
529 			 * an integer overflow, just return.
530 			 */
531 			if (*col > SIZE_MAX - tabsize)
532 				return;
533 
534 			/* Round to next multiple of eight. */
535 			new_col = (*col / tabsize + 1) * tabsize;
536 
537 			/*
538 			 * If printing the tab goes past the column
539 			 * width, don't print it and just quit.
540 			 */
541 			if (new_col > col_max)
542 				return;
543 
544 			if (tflag) {
545 				do {
546 					putchar(' ');
547 				} while (++*col < new_col);
548 			} else {
549 				putchar(*s);
550 				*col = new_col;
551 			}
552 			break;
553 		default:
554 			++*col;
555 			putchar(*s);
556 		}
557 	}
558 }
559 
560 /*
561  * Prompts user to either choose between two strings or edit one, both,
562  * or neither.
563  */
564 static void
prompt(const char * s1,const char * s2)565 prompt(const char *s1, const char *s2)
566 {
567 	char *cmd;
568 
569 	/* Print command prompt. */
570 	putchar('%');
571 
572 	/* Get user input. */
573 	for (; (cmd = xfgets(stdin)); free(cmd)) {
574 		const char *p;
575 
576 		/* Skip leading whitespace. */
577 		for (p = cmd; isspace((unsigned char)*p); ++p)
578 			;
579 		switch (*p) {
580 		case 'e':
581 			/* Skip `e'. */
582 			++p;
583 			if (eparse(p, s1, s2) == -1)
584 				goto USAGE;
585 			break;
586 		case 'l':
587 		case '1':
588 			/* Choose left column as-is. */
589 			if (s1 != NULL)
590 				fprintf(outfp, "%s\n", s1);
591 			/* End of command parsing. */
592 			break;
593 		case 'q':
594 			goto QUIT;
595 		case 'r':
596 		case '2':
597 			/* Choose right column as-is. */
598 			if (s2 != NULL)
599 				fprintf(outfp, "%s\n", s2);
600 			/* End of command parsing. */
601 			break;
602 		case 's':
603 			sflag = true;
604 			goto PROMPT;
605 		case 'v':
606 			sflag = false;
607 			/* FALLTHROUGH */
608 		default:
609 			/* Interactive usage help. */
610 USAGE:
611 			int_usage();
612 PROMPT:
613 			putchar('%');
614 
615 			/* Prompt user again. */
616 			continue;
617 		}
618 		free(cmd);
619 		return;
620 	}
621 
622 	/*
623 	 * If there was no error, we received an EOF from stdin, so we
624 	 * should quit.
625 	 */
626 QUIT:
627 	fclose(outfp);
628 	exit(0);
629 }
630 
631 /*
632  * Takes two strings, separated by a column divider.  NULL strings are
633  * treated as empty columns.  If the divider is the ` ' character, the
634  * second column is not printed (-l flag).  In this case, the second
635  * string must be NULL.  When the second column is NULL, the divider
636  * does not print the trailing space following the divider character.
637  *
638  * Takes into account that tabs can take multiple columns.
639  */
640 static void
println(const char * s1,const char divider,const char * s2)641 println(const char *s1, const char divider, const char *s2)
642 {
643 	size_t col;
644 
645 	/* Print first column.  Skips if s1 == NULL. */
646 	col = 0;
647 	if (s1) {
648 		/* Skip angle bracket and space. */
649 		printcol(s1, &col, width);
650 
651 	}
652 
653 	/* Otherwise, we pad this column up to width. */
654 	for (; col < width; ++col)
655 		putchar(' ');
656 
657 	/* Only print left column. */
658 	if (divider == ' ' && !s2) {
659 		printf(" (\n");
660 		return;
661 	}
662 
663 	/*
664 	 * Print column divider.  If there is no second column, we don't
665 	 * need to add the space for padding.
666 	 */
667 	if (!s2) {
668 		printf(" %c\n", divider);
669 		return;
670 	}
671 	printf(" %c ", divider);
672 	col += 3;
673 
674 	/* Skip angle bracket and space. */
675 	printcol(s2, &col, line_width);
676 
677 	putchar('\n');
678 }
679 
680 /*
681  * Reads a line from file and returns as a string.  If EOF is reached,
682  * NULL is returned.  The returned string must be freed afterwards.
683  */
684 static char *
xfgets(FILE * file)685 xfgets(FILE *file)
686 {
687 	size_t linecap;
688 	ssize_t l;
689 	char *s;
690 
691 	clearerr(file);
692 	linecap = 0;
693 	s = NULL;
694 
695 	if ((l = getline(&s, &linecap, file)) == -1) {
696 		if (ferror(file))
697 			err(2, "error reading file");
698 		return (NULL);
699 	}
700 
701 	if (s[l-1] == '\n')
702 		s[l-1] = '\0';
703 
704 	return (s);
705 }
706 
707 /*
708  * Parse ed commands from diffpipe and print lines from file1 (lines
709  * to change or delete) or file2 (lines to add or change).
710  * Returns EOF or 0.
711  */
712 static int
parsecmd(FILE * diffpipe,FILE * file1,FILE * file2)713 parsecmd(FILE *diffpipe, FILE *file1, FILE *file2)
714 {
715 	size_t file1start, file1end, file2start, file2end, n;
716 	/* ed command line and pointer to characters in line */
717 	char *line, *p, *q;
718 	const char *errstr;
719 	char c, cmd;
720 
721 	/* Read ed command. */
722 	if (!(line = xfgets(diffpipe)))
723 		return (EOF);
724 
725 	p = line;
726 	/* Go to character after line number. */
727 	while (isdigit((unsigned char)*p))
728 		++p;
729 	c = *p;
730 	*p++ = 0;
731 	file1start = strtonum(line, 0, INT_MAX, &errstr);
732 	if (errstr)
733 		errx(2, "file1 start is %s: %s", errstr, line);
734 
735 	/* A range is specified for file1. */
736 	if (c == ',') {
737 		q = p;
738 		/* Go to character after file2end. */
739 		while (isdigit((unsigned char)*p))
740 			++p;
741 		c = *p;
742 		*p++ = 0;
743 		file1end = strtonum(q, 0, INT_MAX, &errstr);
744 		if (errstr)
745 			errx(2, "file1 end is %s: %s", errstr, line);
746 		if (file1start > file1end)
747 			errx(2, "invalid line range in file1: %s", line);
748 	} else
749 		file1end = file1start;
750 
751 	cmd = c;
752 	/* Check that cmd is valid. */
753 	if (!(cmd == 'a' || cmd == 'c' || cmd == 'd'))
754 		errx(2, "ed command not recognized: %c: %s", cmd, line);
755 
756 	q = p;
757 	/* Go to character after line number. */
758 	while (isdigit((unsigned char)*p))
759 		++p;
760 	c = *p;
761 	*p++ = 0;
762 	file2start = strtonum(q, 0, INT_MAX, &errstr);
763 	if (errstr)
764 		errx(2, "file2 start is %s: %s", errstr, line);
765 
766 	/*
767 	 * There should either be a comma signifying a second line
768 	 * number or the line should just end here.
769 	 */
770 	if (c != ',' && c != '\0')
771 		errx(2, "invalid line range in file2: %c: %s", c, line);
772 
773 	if (c == ',') {
774 
775 		file2end = strtonum(p, 0, INT_MAX, &errstr);
776 		if (errstr)
777 			errx(2, "file2 end is %s: %s", errstr, line);
778 		if (file2start >= file2end)
779 			errx(2, "invalid line range in file2: %s", line);
780 	} else
781 		file2end = file2start;
782 
783 	/* Appends happen _after_ stated line. */
784 	if (cmd == 'a') {
785 		if (file1start != file1end)
786 			errx(2, "append cannot have a file1 range: %s",
787 			    line);
788 		if (file1start == SIZE_MAX)
789 			errx(2, "file1 line range too high: %s", line);
790 		file1start = ++file1end;
791 	}
792 	/*
793 	 * I'm not sure what the deal is with the line numbers for
794 	 * deletes, though.
795 	 */
796 	else if (cmd == 'd') {
797 		if (file2start != file2end)
798 			errx(2, "delete cannot have a file2 range: %s",
799 			    line);
800 		if (file2start == SIZE_MAX)
801 			errx(2, "file2 line range too high: %s", line);
802 		file2start = ++file2end;
803 	}
804 
805 	/*
806 	 * Continue reading file1 and file2 until we reach line numbers
807 	 * specified by diff.  Should only happen with -I flag.
808 	 */
809 	for (; file1ln < file1start && file2ln < file2start;
810 	    ++file1ln, ++file2ln) {
811 		char *s1, *s2;
812 
813 		if (!(s1 = xfgets(file1)))
814 			errx(2, "file1 shorter than expected");
815 		if (!(s2 = xfgets(file2)))
816 			errx(2, "file2 shorter than expected");
817 
818 		/* If the -l flag was specified, print only left column. */
819 		if (lflag) {
820 			free(s2);
821 			/*
822 			 * XXX - If -l and -I are both specified, all
823 			 * unchanged or ignored lines are shown with a
824 			 * `(' divider.  This matches GNU sdiff, but I
825 			 * believe it is a bug.  Just check out:
826 			 * gsdiff -l -I '^$' samefile samefile.
827 			 */
828 			if (Iflag)
829 				enqueue(s1, '(', NULL);
830 			else
831 				enqueue(s1, ' ', NULL);
832 		} else
833 			enqueue(s1, ' ', s2);
834 	}
835 	/* Ignore deleted lines. */
836 	for (; file1ln < file1start; ++file1ln) {
837 		char *s;
838 
839 		if (!(s = xfgets(file1)))
840 			errx(2, "file1 shorter than expected");
841 
842 		enqueue(s, '(', NULL);
843 	}
844 	/* Ignore added lines. */
845 	for (; file2ln < file2start; ++file2ln) {
846 		char *s;
847 
848 		if (!(s = xfgets(file2)))
849 			errx(2, "file2 shorter than expected");
850 
851 		/* If -l flag was given, don't print right column. */
852 		if (lflag)
853 			free(s);
854 		else
855 			enqueue(NULL, ')', s);
856 	}
857 
858 	/* Process unmodified or skipped lines. */
859 	processq();
860 
861 	switch (cmd) {
862 	case 'a':
863 		printa(file2, file2end);
864 		n = file2end - file2start + 1;
865 		break;
866 	case 'c':
867 		printc(file1, file1end, file2, file2end);
868 		n = file1end - file1start + 1 + 1 + file2end - file2start + 1;
869 		break;
870 	case 'd':
871 		printd(file1, file1end);
872 		n = file1end - file1start + 1;
873 		break;
874 	default:
875 		errx(2, "invalid diff command: %c: %s", cmd, line);
876 	}
877 	free(line);
878 
879 	/* Skip to next ed line. */
880 	while (n--) {
881 		if (!(line = xfgets(diffpipe)))
882 			errx(2, "diff ended early");
883 		free(line);
884 	}
885 
886 	return (0);
887 }
888 
889 /*
890  * Queues up a diff line.
891  */
892 static void
enqueue(char * left,char divider,char * right)893 enqueue(char *left, char divider, char *right)
894 {
895 	struct diffline *diffp;
896 
897 	if (!(diffp = malloc(sizeof(struct diffline))))
898 		err(2, "enqueue");
899 	diffp->left = left;
900 	diffp->div = divider;
901 	diffp->right = right;
902 	STAILQ_INSERT_TAIL(&diffhead, diffp, diffentries);
903 }
904 
905 /*
906  * Free a diffline structure and its elements.
907  */
908 static void
freediff(struct diffline * diffp)909 freediff(struct diffline *diffp)
910 {
911 
912 	free(diffp->left);
913 	free(diffp->right);
914 	free(diffp);
915 }
916 
917 /*
918  * Append second string into first.  Repeated appends to the same string
919  * are cached, making this an O(n) function, where n = strlen(append).
920  */
921 static void
astrcat(char ** s,const char * append)922 astrcat(char **s, const char *append)
923 {
924 	/* Length of string in previous run. */
925 	static size_t offset = 0;
926 	size_t newsiz;
927 	/*
928 	 * String from previous run.  Compared to *s to see if we are
929 	 * dealing with the same string.  If so, we can use offset.
930 	 */
931 	static const char *oldstr = NULL;
932 	char *newstr;
933 
934 	/*
935 	 * First string is NULL, so just copy append.
936 	 */
937 	if (!*s) {
938 		if (!(*s = strdup(append)))
939 			err(2, "astrcat");
940 
941 		/* Keep track of string. */
942 		offset = strlen(*s);
943 		oldstr = *s;
944 
945 		return;
946 	}
947 
948 	/*
949 	 * *s is a string so concatenate.
950 	 */
951 
952 	/* Did we process the same string in the last run? */
953 	/*
954 	 * If this is a different string from the one we just processed
955 	 * cache new string.
956 	 */
957 	if (oldstr != *s) {
958 		offset = strlen(*s);
959 		oldstr = *s;
960 	}
961 
962 	/* Size = strlen(*s) + \n + strlen(append) + '\0'. */
963 	newsiz = offset + 1 + strlen(append) + 1;
964 
965 	/* Resize *s to fit new string. */
966 	newstr = realloc(*s, newsiz);
967 	if (newstr == NULL)
968 		err(2, "astrcat");
969 	*s = newstr;
970 
971 	/* *s + offset should be end of string. */
972 	/* Concatenate. */
973 	strlcpy(*s + offset, "\n", newsiz - offset);
974 	strlcat(*s + offset, append, newsiz - offset);
975 
976 	/* New string length should be exactly newsiz - 1 characters. */
977 	/* Store generated string's values. */
978 	offset = newsiz - 1;
979 	oldstr = *s;
980 }
981 
982 /*
983  * Process diff set queue, printing, prompting, and saving each diff
984  * line stored in queue.
985  */
986 static void
processq(void)987 processq(void)
988 {
989 	struct diffline *diffp;
990 	char divc, *left, *right;
991 
992 	/* Don't process empty queue. */
993 	if (STAILQ_EMPTY(&diffhead))
994 		return;
995 
996 	/* Remember the divider. */
997 	divc = STAILQ_FIRST(&diffhead)->div;
998 
999 	left = NULL;
1000 	right = NULL;
1001 	/*
1002 	 * Go through set of diffs, concatenating each line in left or
1003 	 * right column into two long strings, `left' and `right'.
1004 	 */
1005 	STAILQ_FOREACH(diffp, &diffhead, diffentries) {
1006 		/*
1007 		 * Print changed lines if -s was given,
1008 		 * print all lines if -s was not given.
1009 		 */
1010 		if (!sflag || diffp->div == '|' || diffp->div == '<' ||
1011 		    diffp->div == '>')
1012 			println(diffp->left, diffp->div, diffp->right);
1013 
1014 		/* Append new lines to diff set. */
1015 		if (diffp->left)
1016 			astrcat(&left, diffp->left);
1017 		if (diffp->right)
1018 			astrcat(&right, diffp->right);
1019 	}
1020 
1021 	/* Empty queue and free each diff line and its elements. */
1022 	while (!STAILQ_EMPTY(&diffhead)) {
1023 		diffp = STAILQ_FIRST(&diffhead);
1024 		STAILQ_REMOVE_HEAD(&diffhead, diffentries);
1025 		freediff(diffp);
1026 	}
1027 
1028 	/* Write to outfp, prompting user if lines are different. */
1029 	if (outfp)
1030 		switch (divc) {
1031 		case ' ': case '(': case ')':
1032 			fprintf(outfp, "%s\n", left);
1033 			break;
1034 		case '|': case '<': case '>':
1035 			prompt(left, right);
1036 			break;
1037 		default:
1038 			errx(2, "invalid divider: %c", divc);
1039 		}
1040 
1041 	/* Free left and right. */
1042 	free(left);
1043 	free(right);
1044 }
1045 
1046 /*
1047  * Print lines following an (a)ppend command.
1048  */
1049 static void
printa(FILE * file,size_t line2)1050 printa(FILE *file, size_t line2)
1051 {
1052 	char *line;
1053 
1054 	for (; file2ln <= line2; ++file2ln) {
1055 		if (!(line = xfgets(file)))
1056 			errx(2, "append ended early");
1057 		enqueue(NULL, '>', line);
1058 	}
1059 	processq();
1060 }
1061 
1062 /*
1063  * Print lines following a (c)hange command, from file1ln to file1end
1064  * and from file2ln to file2end.
1065  */
1066 static void
printc(FILE * file1,size_t file1end,FILE * file2,size_t file2end)1067 printc(FILE *file1, size_t file1end, FILE *file2, size_t file2end)
1068 {
1069 	struct fileline {
1070 		STAILQ_ENTRY(fileline)	 fileentries;
1071 		char			*line;
1072 	};
1073 	STAILQ_HEAD(, fileline) delqhead = STAILQ_HEAD_INITIALIZER(delqhead);
1074 
1075 	/* Read lines to be deleted. */
1076 	for (; file1ln <= file1end; ++file1ln) {
1077 		struct fileline *linep;
1078 		char *line1;
1079 
1080 		/* Read lines from both. */
1081 		if (!(line1 = xfgets(file1)))
1082 			errx(2, "error reading file1 in delete in change");
1083 
1084 		/* Add to delete queue. */
1085 		if (!(linep = malloc(sizeof(struct fileline))))
1086 			err(2, "printc");
1087 		linep->line = line1;
1088 		STAILQ_INSERT_TAIL(&delqhead, linep, fileentries);
1089 	}
1090 
1091 	/* Process changed lines.. */
1092 	for (; !STAILQ_EMPTY(&delqhead) && file2ln <= file2end;
1093 	    ++file2ln) {
1094 		struct fileline *del;
1095 		char *add;
1096 
1097 		/* Get add line. */
1098 		if (!(add = xfgets(file2)))
1099 			errx(2, "error reading add in change");
1100 
1101 		del = STAILQ_FIRST(&delqhead);
1102 		enqueue(del->line, '|', add);
1103 		STAILQ_REMOVE_HEAD(&delqhead, fileentries);
1104 		/*
1105 		 * Free fileline structure but not its elements since
1106 		 * they are queued up.
1107 		 */
1108 		free(del);
1109 	}
1110 	processq();
1111 
1112 	/* Process remaining lines to add. */
1113 	for (; file2ln <= file2end; ++file2ln) {
1114 		char *add;
1115 
1116 		/* Get add line. */
1117 		if (!(add = xfgets(file2)))
1118 			errx(2, "error reading add in change");
1119 
1120 		enqueue(NULL, '>', add);
1121 	}
1122 	processq();
1123 
1124 	/* Process remaining lines to delete. */
1125 	while (!STAILQ_EMPTY(&delqhead)) {
1126 		struct fileline *filep;
1127 
1128 		filep = STAILQ_FIRST(&delqhead);
1129 		enqueue(filep->line, '<', NULL);
1130 		STAILQ_REMOVE_HEAD(&delqhead, fileentries);
1131 		free(filep);
1132 	}
1133 	processq();
1134 }
1135 
1136 /*
1137  * Print deleted lines from file, from file1ln to file1end.
1138  */
1139 static void
printd(FILE * file1,size_t file1end)1140 printd(FILE *file1, size_t file1end)
1141 {
1142 	char *line1;
1143 
1144 	/* Print out lines file1ln to line2. */
1145 	for (; file1ln <= file1end; ++file1ln) {
1146 		if (!(line1 = xfgets(file1)))
1147 			errx(2, "file1 ended early in delete");
1148 		enqueue(line1, '<', NULL);
1149 	}
1150 	processq();
1151 }
1152 
1153 /*
1154  * Interactive mode usage.
1155  */
1156 static void
int_usage(void)1157 int_usage(void)
1158 {
1159 
1160 	puts("e:\tedit blank diff\n"
1161 	    "eb:\tedit both diffs concatenated\n"
1162 	    "el:\tedit left diff\n"
1163 	    "er:\tedit right diff\n"
1164 	    "l | 1:\tchoose left diff\n"
1165 	    "r | 2:\tchoose right diff\n"
1166 	    "s:\tsilent mode--don't print identical lines\n"
1167 	    "v:\tverbose mode--print identical lines\n"
1168 	    "q:\tquit");
1169 }
1170 
1171 static void
usage(void)1172 usage(void)
1173 {
1174 
1175 	fprintf(stderr,
1176 	    "usage: sdiff [-abdilstHW] [-I regexp] [-o outfile] [-w width] file1"
1177 	    " file2\n");
1178 	exit(2);
1179 }
1180