xref: /trueos/gnu/usr.bin/rcs/lib/rcssyn.c (revision c5342bffee8407caeac0b651549206d534dfccb9)
1 /* RCS file syntactic analysis */
2 
3 /******************************************************************************
4  *                       Syntax Analysis.
5  *                       Keyword table
6  *                       Testprogram: define SYNTEST
7  *                       Compatibility with Release 2: define COMPAT2=1
8  ******************************************************************************
9  */
10 
11 /* Copyright 1982, 1988, 1989 Walter Tichy
12    Copyright 1990, 1991, 1992, 1993, 1994, 1995 Paul Eggert
13    Distributed under license by the Free Software Foundation, Inc.
14 
15 This file is part of RCS.
16 
17 RCS is free software; you can redistribute it and/or modify
18 it under the terms of the GNU General Public License as published by
19 the Free Software Foundation; either version 2, or (at your option)
20 any later version.
21 
22 RCS is distributed in the hope that it will be useful,
23 but WITHOUT ANY WARRANTY; without even the implied warranty of
24 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
25 GNU General Public License for more details.
26 
27 You should have received a copy of the GNU General Public License
28 along with RCS; see the file COPYING.
29 If not, write to the Free Software Foundation,
30 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
31 
32 Report problems and direct all questions to:
33 
34     rcs-bugs@cs.purdue.edu
35 
36 */
37 
38 /*
39  * Revision 5.15  1995/06/16 06:19:24  eggert
40  * Update FSF address.
41  *
42  * Revision 5.14  1995/06/01 16:23:43  eggert
43  * (expand_names): Add "b" for -kb.
44  * (getdelta): Don't strip leading "19" from MKS RCS dates; see cmpdate.
45  *
46  * Revision 5.13  1994/03/20 04:52:58  eggert
47  * Remove lint.
48  *
49  * Revision 5.12  1993/11/03 17:42:27  eggert
50  * Parse MKS RCS dates; ignore \r in diff control lines.
51  * Don't discard ignored phrases.  Improve quality of diagnostics.
52  *
53  * Revision 5.11  1992/07/28  16:12:44  eggert
54  * Avoid `unsigned'.  Statement macro names now end in _.
55  *
56  * Revision 5.10  1992/01/24  18:44:19  eggert
57  * Move put routines to rcsgen.c.
58  *
59  * Revision 5.9  1992/01/06  02:42:34  eggert
60  * ULONG_MAX/10 -> ULONG_MAX_OVER_10
61  * while (E) ; -> while (E) continue;
62  *
63  * Revision 5.8  1991/08/19  03:13:55  eggert
64  * Tune.
65  *
66  * Revision 5.7  1991/04/21  11:58:29  eggert
67  * Disambiguate names on shortname hosts.
68  * Fix errno bug.  Add MS-DOS support.
69  *
70  * Revision 5.6  1991/02/28  19:18:51  eggert
71  * Fix null termination bug in reporting keyword expansion.
72  *
73  * Revision 5.5  1991/02/25  07:12:44  eggert
74  * Check diff output more carefully; avoid overflow.
75  *
76  * Revision 5.4  1990/11/01  05:28:48  eggert
77  * When ignoring unknown phrases, copy them to the output RCS file.
78  * Permit arbitrary data in logs and comment leaders.
79  * Don't check for nontext on initial checkin.
80  *
81  * Revision 5.3  1990/09/20  07:58:32  eggert
82  * Remove the test for non-text bytes; it caused more pain than it cured.
83  *
84  * Revision 5.2  1990/09/04  08:02:30  eggert
85  * Parse RCS files with no revisions.
86  * Don't strip leading white space from diff commands.  Count RCS lines better.
87  *
88  * Revision 5.1  1990/08/29  07:14:06  eggert
89  * Add -kkvl.  Clean old log messages too.
90  *
91  * Revision 5.0  1990/08/22  08:13:44  eggert
92  * Try to parse future RCS formats without barfing.
93  * Add -k.  Don't require final newline.
94  * Remove compile-time limits; use malloc instead.
95  * Don't output branch keyword if there's no default branch,
96  * because RCS version 3 doesn't understand it.
97  * Tune.  Remove lint.
98  * Add support for ISO 8859.  Ansify and Posixate.
99  * Check that a newly checked-in file is acceptable as input to 'diff'.
100  * Check diff's output.
101  *
102  * Revision 4.6  89/05/01  15:13:32  narten
103  * changed copyright header to reflect current distribution rules
104  *
105  * Revision 4.5  88/08/09  19:13:21  eggert
106  * Allow cc -R; remove lint.
107  *
108  * Revision 4.4  87/12/18  11:46:16  narten
109  * more lint cleanups (Guy Harris)
110  *
111  * Revision 4.3  87/10/18  10:39:36  narten
112  * Updating version numbers. Changes relative to 1.1 actually relative to
113  * 4.1
114  *
115  * Revision 1.3  87/09/24  14:00:49  narten
116  * Sources now pass through lint (if you ignore printf/sprintf/fprintf
117  * warnings)
118  *
119  * Revision 1.2  87/03/27  14:22:40  jenkins
120  * Port to suns
121  *
122  * Revision 4.1  83/03/28  11:38:49  wft
123  * Added parsing and printing of default branch.
124  *
125  * Revision 3.6  83/01/15  17:46:50  wft
126  * Changed readdelta() to initialize selector and log-pointer.
127  * Changed puttree to check for selector==DELETE; putdtext() uses DELNUMFORM.
128  *
129  * Revision 3.5  82/12/08  21:58:58  wft
130  * renamed Commentleader to Commleader.
131  *
132  * Revision 3.4  82/12/04  13:24:40  wft
133  * Added routine gettree(), which updates keeplock after reading the
134  * delta tree.
135  *
136  * Revision 3.3  82/11/28  21:30:11  wft
137  * Reading and printing of Suffix removed; version COMPAT2 skips the
138  * Suffix for files of release 2 format. Fixed problems with printing nil.
139  *
140  * Revision 3.2  82/10/18  21:18:25  wft
141  * renamed putdeltatext to putdtext.
142  *
143  * Revision 3.1  82/10/11  19:45:11  wft
144  * made sure getc() returns into an integer.
145  */
146 
147 
148 
149 /* version COMPAT2 reads files of the format of release 2 and 3, but
150  * generates files of release 3 format. Need not be defined if no
151  * old RCS files generated with release 2 exist.
152  */
153 
154 #include "rcsbase.h"
155 
156 libId(synId, "$FreeBSD$")
157 
158 static char const *getkeyval P((char const*,enum tokens,int));
159 static int getdelta P((void));
160 static int strn2expmode P((char const*,size_t));
161 static struct hshentry *getdnum P((void));
162 static void badDiffOutput P((char const*)) exiting;
163 static void diffLineNumberTooLarge P((char const*)) exiting;
164 static void getsemi P((char const*));
165 
166 /* keyword table */
167 
168 char const
169 	Kaccess[]   = "access",
170 	Kauthor[]   = "author",
171 	Kbranch[]   = "branch",
172 	Kcomment[]  = "comment",
173 	Kdate[]     = "date",
174 	Kdesc[]     = "desc",
175 	Kexpand[]   = "expand",
176 	Khead[]     = "head",
177 	Klocks[]    = "locks",
178 	Klog[]      = "log",
179 	Knext[]     = "next",
180 	Kstate[]    = "state",
181 	Kstrict[]   = "strict",
182 	Ksymbols[]  = "symbols",
183 	Ktext[]     = "text";
184 
185 static char const
186 #if COMPAT2
187 	Ksuffix[]   = "suffix",
188 #endif
189 	K_branches[]= "branches";
190 
191 static struct buf Commleader;
192 struct cbuf Comment;
193 struct cbuf Ignored;
194 struct access   * AccessList;
195 struct assoc    * Symbols;
196 struct rcslock *Locks;
197 int		  Expand;
198 int               StrictLocks;
199 struct hshentry * Head;
200 char const      * Dbranch;
201 int TotalDeltas;
202 
203 
204 	static void
getsemi(key)205 getsemi(key)
206 	char const *key;
207 /* Get a semicolon to finish off a phrase started by KEY.  */
208 {
209 	if (!getlex(SEMI))
210 		fatserror("missing ';' after '%s'", key);
211 }
212 
213 	static struct hshentry *
getdnum()214 getdnum()
215 /* Get a delta number.  */
216 {
217 	register struct hshentry *delta = getnum();
218 	if (delta && countnumflds(delta->num)&1)
219 		fatserror("%s isn't a delta number", delta->num);
220 	return delta;
221 }
222 
223 
224 	void
getadmin()225 getadmin()
226 /* Read an <admin> and initialize the appropriate global variables.  */
227 {
228 	register char const *id;
229         struct access   * newaccess;
230         struct assoc    * newassoc;
231 	struct rcslock *newlock;
232         struct hshentry * delta;
233 	struct access **LastAccess;
234 	struct assoc **LastSymbol;
235 	struct rcslock **LastLock;
236 	struct buf b;
237 	struct cbuf cb;
238 
239         TotalDeltas=0;
240 
241 	getkey(Khead);
242 	Head = getdnum();
243 	getsemi(Khead);
244 
245 	Dbranch = 0;
246 	if (getkeyopt(Kbranch)) {
247 		if ((delta = getnum()))
248 			Dbranch = delta->num;
249 		getsemi(Kbranch);
250         }
251 
252 
253 #if COMPAT2
254         /* read suffix. Only in release 2 format */
255 	if (getkeyopt(Ksuffix)) {
256                 if (nexttok==STRING) {
257 			readstring(); nextlex(); /* Throw away the suffix.  */
258 		} else if (nexttok==ID) {
259                         nextlex();
260                 }
261 		getsemi(Ksuffix);
262         }
263 #endif
264 
265 	getkey(Kaccess);
266 	LastAccess = &AccessList;
267 	while ((id = getid())) {
268 		newaccess = ftalloc(struct access);
269                 newaccess->login = id;
270 		*LastAccess = newaccess;
271 		LastAccess = &newaccess->nextaccess;
272         }
273 	*LastAccess = 0;
274 	getsemi(Kaccess);
275 
276 	getkey(Ksymbols);
277 	LastSymbol = &Symbols;
278         while ((id = getid())) {
279                 if (!getlex(COLON))
280 			fatserror("missing ':' in symbolic name definition");
281                 if (!(delta=getnum())) {
282 			fatserror("missing number in symbolic name definition");
283                 } else { /*add new pair to association list*/
284 			newassoc = ftalloc(struct assoc);
285                         newassoc->symbol=id;
286 			newassoc->num = delta->num;
287 			*LastSymbol = newassoc;
288 			LastSymbol = &newassoc->nextassoc;
289                 }
290         }
291 	*LastSymbol = 0;
292 	getsemi(Ksymbols);
293 
294 	getkey(Klocks);
295 	LastLock = &Locks;
296         while ((id = getid())) {
297                 if (!getlex(COLON))
298 			fatserror("missing ':' in lock");
299 		if (!(delta=getdnum())) {
300 			fatserror("missing number in lock");
301                 } else { /*add new pair to lock list*/
302 			newlock = ftalloc(struct rcslock);
303                         newlock->login=id;
304                         newlock->delta=delta;
305 			*LastLock = newlock;
306 			LastLock = &newlock->nextlock;
307                 }
308         }
309 	*LastLock = 0;
310 	getsemi(Klocks);
311 
312 	if ((StrictLocks = getkeyopt(Kstrict)))
313 		getsemi(Kstrict);
314 
315 	clear_buf(&Comment);
316 	if (getkeyopt(Kcomment)) {
317 		if (nexttok==STRING) {
318 			Comment = savestring(&Commleader);
319 			nextlex();
320 		}
321 		getsemi(Kcomment);
322         }
323 
324 	Expand = KEYVAL_EXPAND;
325 	if (getkeyopt(Kexpand)) {
326 		if (nexttok==STRING) {
327 			bufautobegin(&b);
328 			cb = savestring(&b);
329 			if ((Expand = strn2expmode(cb.string,cb.size)) < 0)
330 			    fatserror("unknown expand mode %.*s",
331 				(int)cb.size, cb.string
332 			    );
333 			bufautoend(&b);
334 			nextlex();
335 		}
336 		getsemi(Kexpand);
337         }
338 	Ignored = getphrases(Kdesc);
339 }
340 
341 char const *const expand_names[] = {
342 	/* These must agree with *_EXPAND in rcsbase.h.  */
343 	"kv", "kvl", "k", "v", "o", "b",
344 	0
345 };
346 
347 	int
str2expmode(s)348 str2expmode(s)
349 	char const *s;
350 /* Yield expand mode corresponding to S, or -1 if bad.  */
351 {
352 	return strn2expmode(s, strlen(s));
353 }
354 
355 	static int
strn2expmode(s,n)356 strn2expmode(s, n)
357 	char const *s;
358 	size_t n;
359 {
360 	char const *const *p;
361 
362 	for (p = expand_names;  *p;  ++p)
363 		if (memcmp(*p,s,n) == 0  &&  !(*p)[n])
364 			return p - expand_names;
365 	return -1;
366 }
367 
368 
369 	void
ignorephrases(key)370 ignorephrases(key)
371 	const char *key;
372 /*
373 * Ignore a series of phrases that do not start with KEY.
374 * Stop when the next phrase starts with a token that is not an identifier,
375 * or is KEY.
376 */
377 {
378 	for (;;) {
379 		nextlex();
380 		if (nexttok != ID  ||  strcmp(NextString,key) == 0)
381 			break;
382 		warnignore();
383 		hshenter=false;
384 		for (;; nextlex()) {
385 			switch (nexttok) {
386 				case SEMI: hshenter=true; break;
387 				case ID:
388 				case NUM: ffree1(NextString); continue;
389 				case STRING: readstring(); continue;
390 				default: continue;
391 			}
392 			break;
393 		}
394 	}
395 }
396 
397 
398 	static int
getdelta()399 getdelta()
400 /* Function: reads a delta block.
401  * returns false if the current block does not start with a number.
402  */
403 {
404         register struct hshentry * Delta, * num;
405 	struct branchhead **LastBranch, *NewBranch;
406 
407 	if (!(Delta = getdnum()))
408 		return false;
409 
410         hshenter = false; /*Don't enter dates into hashtable*/
411 	Delta->date = getkeyval(Kdate, NUM, false);
412         hshenter=true;    /*reset hshenter for revision numbers.*/
413 
414         Delta->author = getkeyval(Kauthor, ID, false);
415 
416         Delta->state = getkeyval(Kstate, ID, true);
417 
418 	getkey(K_branches);
419 	LastBranch = &Delta->branches;
420 	while ((num = getdnum())) {
421 		NewBranch = ftalloc(struct branchhead);
422                 NewBranch->hsh = num;
423 		*LastBranch = NewBranch;
424 		LastBranch = &NewBranch->nextbranch;
425         }
426 	*LastBranch = 0;
427 	getsemi(K_branches);
428 
429 	getkey(Knext);
430 	Delta->next = num = getdnum();
431 	getsemi(Knext);
432 	Delta->lockedby = 0;
433 	Delta->log.string = 0;
434 	Delta->selector = true;
435 	Delta->ig = getphrases(Kdesc);
436         TotalDeltas++;
437         return (true);
438 }
439 
440 
441 	void
gettree()442 gettree()
443 /* Function: Reads in the delta tree with getdelta(), then
444  * updates the lockedby fields.
445  */
446 {
447 	struct rcslock const *currlock;
448 
449 	while (getdelta())
450 		continue;
451         currlock=Locks;
452         while (currlock) {
453                 currlock->delta->lockedby = currlock->login;
454                 currlock = currlock->nextlock;
455         }
456 }
457 
458 
459 	void
getdesc(prdesc)460 getdesc(prdesc)
461 int  prdesc;
462 /* Function: read in descriptive text
463  * nexttok is not advanced afterwards.
464  * If prdesc is set, the text is printed to stdout.
465  */
466 {
467 
468 	getkeystring(Kdesc);
469         if (prdesc)
470                 printstring();  /*echo string*/
471         else    readstring();   /*skip string*/
472 }
473 
474 
475 
476 
477 
478 
479 	static char const *
getkeyval(keyword,token,optional)480 getkeyval(keyword, token, optional)
481 	char const *keyword;
482 	enum tokens token;
483 	int optional;
484 /* reads a pair of the form
485  * <keyword> <token> ;
486  * where token is one of <id> or <num>. optional indicates whether
487  * <token> is optional. A pointer to
488  * the actual character string of <id> or <num> is returned.
489  */
490 {
491 	register char const *val = 0;
492 
493 	getkey(keyword);
494         if (nexttok==token) {
495                 val = NextString;
496                 nextlex();
497         } else {
498 		if (!optional)
499 			fatserror("missing %s", keyword);
500         }
501 	getsemi(keyword);
502         return(val);
503 }
504 
505 
506 	void
unexpected_EOF()507 unexpected_EOF()
508 {
509 	rcsfaterror("unexpected EOF in diff output");
510 }
511 
512 	void
initdiffcmd(dc)513 initdiffcmd(dc)
514 	register struct diffcmd *dc;
515 /* Initialize *dc suitably for getdiffcmd(). */
516 {
517 	dc->adprev = 0;
518 	dc->dafter = 0;
519 }
520 
521 	static void
badDiffOutput(buf)522 badDiffOutput(buf)
523 	char const *buf;
524 {
525 	rcsfaterror("bad diff output line: %s", buf);
526 }
527 
528 	static void
diffLineNumberTooLarge(buf)529 diffLineNumberTooLarge(buf)
530 	char const *buf;
531 {
532 	rcsfaterror("diff line number too large: %s", buf);
533 }
534 
535 	int
getdiffcmd(finfile,delimiter,foutfile,dc)536 getdiffcmd(finfile, delimiter, foutfile, dc)
537 	RILE *finfile;
538 	FILE *foutfile;
539 	int delimiter;
540 	struct diffcmd *dc;
541 /* Get a editing command output by 'diff -n' from fin.
542  * The input is delimited by SDELIM if delimiter is set, EOF otherwise.
543  * Copy a clean version of the command to fout (if nonnull).
544  * Yield 0 for 'd', 1 for 'a', and -1 for EOF.
545  * Store the command's line number and length into dc->line1 and dc->nlines.
546  * Keep dc->adprev and dc->dafter up to date.
547  */
548 {
549 	register int c;
550 	declarecache;
551 	register FILE *fout;
552 	register char *p;
553 	register RILE *fin;
554 	long line1, nlines, t;
555 	char buf[BUFSIZ];
556 
557 	fin = finfile;
558 	fout = foutfile;
559 	setupcache(fin); cache(fin);
560 	cachegeteof_(c, { if (delimiter) unexpected_EOF(); return -1; } )
561 	if (delimiter) {
562 		if (c==SDELIM) {
563 			cacheget_(c)
564 			if (c==SDELIM) {
565 				buf[0] = c;
566 				buf[1] = 0;
567 				badDiffOutput(buf);
568 			}
569 			uncache(fin);
570 			nextc = c;
571 			if (fout)
572 				aprintf(fout, "%c%c", SDELIM, c);
573 			return -1;
574 		}
575 	}
576 	p = buf;
577 	do {
578 		if (buf+BUFSIZ-2 <= p) {
579 			rcsfaterror("diff output command line too long");
580 		}
581 		*p++ = c;
582 		cachegeteof_(c, unexpected_EOF();)
583 	} while (c != '\n');
584 	uncache(fin);
585 	if (delimiter)
586 		++rcsline;
587 	*p = '\0';
588 	for (p = buf+1;  (c = *p++) == ' ';  )
589 		continue;
590 	line1 = 0;
591 	while (isdigit(c)) {
592 		if (
593 			LONG_MAX/10 < line1  ||
594 			(t = line1 * 10,   (line1 = t + (c - '0'))  <  t)
595 		)
596 			diffLineNumberTooLarge(buf);
597 		c = *p++;
598 	}
599 	while (c == ' ')
600 		c = *p++;
601 	nlines = 0;
602 	while (isdigit(c)) {
603 		if (
604 			LONG_MAX/10 < nlines  ||
605 			(t = nlines * 10,   (nlines = t + (c - '0'))  <  t)
606 		)
607 			diffLineNumberTooLarge(buf);
608 		c = *p++;
609 	}
610 	if (c == '\r')
611 		c = *p++;
612 	if (c || !nlines) {
613 		badDiffOutput(buf);
614 	}
615 	if (line1+nlines < line1)
616 		diffLineNumberTooLarge(buf);
617 	switch (buf[0]) {
618 	    case 'a':
619 		if (line1 < dc->adprev) {
620 		    rcsfaterror("backward insertion in diff output: %s", buf);
621 		}
622 		dc->adprev = line1 + 1;
623 		break;
624 	    case 'd':
625 		if (line1 < dc->adprev  ||  line1 < dc->dafter) {
626 		    rcsfaterror("backward deletion in diff output: %s", buf);
627 		}
628 		dc->adprev = line1;
629 		dc->dafter = line1 + nlines;
630 		break;
631 	    default:
632 		badDiffOutput(buf);
633 	}
634 	if (fout) {
635 		aprintf(fout, "%s\n", buf);
636 	}
637 	dc->line1 = line1;
638 	dc->nlines = nlines;
639 	return buf[0] == 'a';
640 }
641 
642 
643 
644 #ifdef SYNTEST
645 
646 /* Input an RCS file and print its internal data structures.  */
647 
648 char const cmdid[] = "syntest";
649 
650 	int
main(argc,argv)651 main(argc,argv)
652 int argc; char * argv[];
653 {
654 
655         if (argc<2) {
656 		aputs("No input file\n",stderr);
657 		exitmain(EXIT_FAILURE);
658         }
659 	if (!(finptr = Iopen(argv[1], FOPEN_R, (struct stat*)0))) {
660 		faterror("can't open input file %s", argv[1]);
661         }
662         Lexinit();
663         getadmin();
664 	fdlock = STDOUT_FILENO;
665 	putadmin();
666 
667         gettree();
668 
669         getdesc(true);
670 
671 	nextlex();
672 
673 	if (!eoflex()) {
674 		fatserror("expecting EOF");
675         }
676 	exitmain(EXIT_SUCCESS);
677 }
678 
exiterr()679 void exiterr() { _exit(EXIT_FAILURE); }
680 
681 #endif
682