xref: /dragonfly/contrib/mdocml/roff.c (revision 1e4d43f9c96723e4e55543d240f182e1aac9a4c2)
1 /* $Id: roff.c,v 1.378 2021/08/10 12:55:04 schwarze Exp $ */
2 /*
3  * Copyright (c) 2010-2015, 2017-2020 Ingo Schwarze <schwarze@openbsd.org>
4  * Copyright (c) 2008-2012, 2014 Kristaps Dzonsons <kristaps@bsd.lv>
5  *
6  * Permission to use, copy, modify, and distribute this software for any
7  * purpose with or without fee is hereby granted, provided that the above
8  * copyright notice and this permission notice appear in all copies.
9  *
10  * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHORS DISCLAIM ALL WARRANTIES
11  * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
12  * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHORS BE LIABLE FOR
13  * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
14  * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
15  * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
16  * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
17  *
18  * Implementation of the roff(7) parser for mandoc(1).
19  */
20 #include "config.h"
21 
22 #include <sys/types.h>
23 
24 #include <assert.h>
25 #include <ctype.h>
26 #include <limits.h>
27 #include <stddef.h>
28 #include <stdint.h>
29 #include <stdio.h>
30 #include <stdlib.h>
31 #include <string.h>
32 
33 #include "mandoc_aux.h"
34 #include "mandoc_ohash.h"
35 #include "mandoc.h"
36 #include "roff.h"
37 #include "mandoc_parse.h"
38 #include "libmandoc.h"
39 #include "roff_int.h"
40 #include "tbl_parse.h"
41 #include "eqn_parse.h"
42 
43 /*
44  * ASCII_ESC is used to signal from roff_getarg() to roff_expand()
45  * that an escape sequence resulted from copy-in processing and
46  * needs to be checked or interpolated.  As it is used nowhere
47  * else, it is defined here rather than in a header file.
48  */
49 #define   ASCII_ESC 27
50 
51 /* Maximum number of string expansions per line, to break infinite loops. */
52 #define   EXPAND_LIMIT        1000
53 
54 /* Types of definitions of macros and strings. */
55 #define   ROFFDEF_USER        (1 << 1)  /* User-defined. */
56 #define   ROFFDEF_PRE         (1 << 2)  /* Predefined. */
57 #define   ROFFDEF_REN         (1 << 3)  /* Renamed standard macro. */
58 #define   ROFFDEF_STD         (1 << 4)  /* mdoc(7) or man(7) macro. */
59 #define   ROFFDEF_ANY         (ROFFDEF_USER | ROFFDEF_PRE | \
60                                ROFFDEF_REN | ROFFDEF_STD)
61 #define   ROFFDEF_UNDEF       (1 << 5)  /* Completely undefined. */
62 
63 /* --- data types --------------------------------------------------------- */
64 
65 /*
66  * An incredibly-simple string buffer.
67  */
68 struct    roffstr {
69           char                *p; /* nil-terminated buffer */
70           size_t               sz; /* saved strlen(p) */
71 };
72 
73 /*
74  * A key-value roffstr pair as part of a singly-linked list.
75  */
76 struct    roffkv {
77           struct roffstr       key;
78           struct roffstr       val;
79           struct roffkv       *next; /* next in list */
80 };
81 
82 /*
83  * A single number register as part of a singly-linked list.
84  */
85 struct    roffreg {
86           struct roffstr       key;
87           int                  val;
88           int                  step;
89           struct roffreg      *next;
90 };
91 
92 /*
93  * Association of request and macro names with token IDs.
94  */
95 struct    roffreq {
96           enum roff_tok        tok;
97           char                 name[];
98 };
99 
100 /*
101  * A macro processing context.
102  * More than one is needed when macro calls are nested.
103  */
104 struct    mctx {
105           char                **argv;
106           int                  argc;
107           int                  argsz;
108 };
109 
110 struct    roff {
111           struct roff_man     *man; /* mdoc or man parser */
112           struct roffnode     *last; /* leaf of stack */
113           struct mctx         *mstack; /* stack of macro contexts */
114           int                 *rstack; /* stack of inverted `ie' values */
115           struct ohash        *reqtab; /* request lookup table */
116           struct roffreg      *regtab; /* number registers */
117           struct roffkv       *strtab; /* user-defined strings & macros */
118           struct roffkv       *rentab; /* renamed strings & macros */
119           struct roffkv       *xmbtab; /* multi-byte trans table (`tr') */
120           struct roffstr      *xtab; /* single-byte trans table (`tr') */
121           const char          *current_string; /* value of last called user macro */
122           struct tbl_node     *first_tbl; /* first table parsed */
123           struct tbl_node     *last_tbl; /* last table parsed */
124           struct tbl_node     *tbl; /* current table being parsed */
125           struct eqn_node     *last_eqn; /* equation parser */
126           struct eqn_node     *eqn; /* active equation parser */
127           int                  eqn_inline; /* current equation is inline */
128           int                  options; /* parse options */
129           int                  mstacksz; /* current size of mstack */
130           int                  mstackpos; /* position in mstack */
131           int                  rstacksz; /* current size limit of rstack */
132           int                  rstackpos; /* position in rstack */
133           int                  format; /* current file in mdoc or man format */
134           char                 control; /* control character */
135           char                 escape; /* escape character */
136 };
137 
138 /*
139  * A macro definition, condition, or ignored block.
140  */
141 struct    roffnode {
142           enum roff_tok        tok; /* type of node */
143           struct roffnode     *parent; /* up one in stack */
144           int                  line; /* parse line */
145           int                  col; /* parse col */
146           char                *name; /* node name, e.g. macro name */
147           char                *end; /* custom end macro of the block */
148           int                  endspan; /* scope to: 1=eol 2=next line -1=\} */
149           int                  rule; /* content is: 1=evaluated 0=skipped */
150 };
151 
152 #define   ROFF_ARGS  struct roff *r, /* parse ctx */ \
153                                enum roff_tok tok, /* tok of macro */ \
154                                struct buf *buf, /* input buffer */ \
155                                int ln, /* parse line */ \
156                                int ppos, /* original pos in buffer */ \
157                                int pos, /* current pos in buffer */ \
158                                int *offs /* reset offset of buffer data */
159 
160 typedef   int (*roffproc)(ROFF_ARGS);
161 
162 struct    roffmac {
163           roffproc   proc; /* process new macro */
164           roffproc   text; /* process as child text of macro */
165           roffproc   sub; /* process as child of macro */
166           int                  flags;
167 #define   ROFFMAC_STRUCT      (1 << 0) /* always interpret */
168 };
169 
170 struct    predef {
171           const char          *name; /* predefined input name */
172           const char          *str; /* replacement symbol */
173 };
174 
175 #define   PREDEF(__name, __str) \
176           { (__name), (__str) },
177 
178 /* --- function prototypes ------------------------------------------------ */
179 
180 static    int                  roffnode_cleanscope(struct roff *);
181 static    int                  roffnode_pop(struct roff *);
182 static    void                 roffnode_push(struct roff *, enum roff_tok,
183                                         const char *, int, int);
184 static    void                 roff_addtbl(struct roff_man *, int, struct tbl_node *);
185 static    int                  roff_als(ROFF_ARGS);
186 static    int                  roff_block(ROFF_ARGS);
187 static    int                  roff_block_text(ROFF_ARGS);
188 static    int                  roff_block_sub(ROFF_ARGS);
189 static    int                  roff_break(ROFF_ARGS);
190 static    int                  roff_cblock(ROFF_ARGS);
191 static    int                  roff_cc(ROFF_ARGS);
192 static    int                  roff_ccond(struct roff *, int, int);
193 static    int                  roff_char(ROFF_ARGS);
194 static    int                  roff_cond(ROFF_ARGS);
195 static    int                  roff_cond_checkend(ROFF_ARGS);
196 static    int                  roff_cond_text(ROFF_ARGS);
197 static    int                  roff_cond_sub(ROFF_ARGS);
198 static    int                  roff_ds(ROFF_ARGS);
199 static    int                  roff_ec(ROFF_ARGS);
200 static    int                  roff_eo(ROFF_ARGS);
201 static    int                  roff_eqndelim(struct roff *, struct buf *, int);
202 static    int                  roff_evalcond(struct roff *, int, char *, int *);
203 static    int                  roff_evalnum(struct roff *, int,
204                                         const char *, int *, int *, int);
205 static    int                  roff_evalpar(struct roff *, int,
206                                         const char *, int *, int *, int);
207 static    int                  roff_evalstrcond(const char *, int *);
208 static    int                  roff_expand(struct roff *, struct buf *,
209                                         int, int, char);
210 static    void                 roff_free1(struct roff *);
211 static    void                 roff_freereg(struct roffreg *);
212 static    void                 roff_freestr(struct roffkv *);
213 static    size_t               roff_getname(struct roff *, char **, int, int);
214 static    int                  roff_getnum(const char *, int *, int *, int);
215 static    int                  roff_getop(const char *, int *, char *);
216 static    int                  roff_getregn(struct roff *,
217                                         const char *, size_t, char);
218 static    int                  roff_getregro(const struct roff *,
219                                         const char *name);
220 static    const char          *roff_getstrn(struct roff *,
221                                         const char *, size_t, int *);
222 static    int                  roff_hasregn(const struct roff *,
223                                         const char *, size_t);
224 static    int                  roff_insec(ROFF_ARGS);
225 static    int                  roff_it(ROFF_ARGS);
226 static    int                  roff_line_ignore(ROFF_ARGS);
227 static    void                 roff_man_alloc1(struct roff_man *);
228 static    void                 roff_man_free1(struct roff_man *);
229 static    int                  roff_manyarg(ROFF_ARGS);
230 static    int                  roff_noarg(ROFF_ARGS);
231 static    int                  roff_nop(ROFF_ARGS);
232 static    int                  roff_nr(ROFF_ARGS);
233 static    int                  roff_onearg(ROFF_ARGS);
234 static    enum roff_tok        roff_parse(struct roff *, char *, int *,
235                                         int, int);
236 static    int                  roff_parsetext(struct roff *, struct buf *,
237                                         int, int *);
238 static    int                  roff_renamed(ROFF_ARGS);
239 static    int                  roff_return(ROFF_ARGS);
240 static    int                  roff_rm(ROFF_ARGS);
241 static    int                  roff_rn(ROFF_ARGS);
242 static    int                  roff_rr(ROFF_ARGS);
243 static    void                 roff_setregn(struct roff *, const char *,
244                                         size_t, int, char, int);
245 static    void                 roff_setstr(struct roff *,
246                                         const char *, const char *, int);
247 static    void                 roff_setstrn(struct roffkv **, const char *,
248                                         size_t, const char *, size_t, int);
249 static    int                  roff_shift(ROFF_ARGS);
250 static    int                  roff_so(ROFF_ARGS);
251 static    int                  roff_tr(ROFF_ARGS);
252 static    int                  roff_Dd(ROFF_ARGS);
253 static    int                  roff_TE(ROFF_ARGS);
254 static    int                  roff_TS(ROFF_ARGS);
255 static    int                  roff_EQ(ROFF_ARGS);
256 static    int                  roff_EN(ROFF_ARGS);
257 static    int                  roff_T_(ROFF_ARGS);
258 static    int                  roff_unsupp(ROFF_ARGS);
259 static    int                  roff_userdef(ROFF_ARGS);
260 
261 /* --- constant data ------------------------------------------------------ */
262 
263 #define   ROFFNUM_SCALE       (1 << 0)  /* Honour scaling in roff_getnum(). */
264 #define   ROFFNUM_WHITE       (1 << 1)  /* Skip whitespace in roff_evalnum(). */
265 
266 const char *__roff_name[MAN_MAX + 1] = {
267           "br",               "ce",               "fi",               "ft",
268           "ll",               "mc",               "nf",
269           "po",               "rj",               "sp",
270           "ta",               "ti",               NULL,
271           "ab",               "ad",               "af",               "aln",
272           "als",              "am",               "am1",              "ami",
273           "ami1",             "as",               "as1",              "asciify",
274           "backtrace",        "bd",               "bleedat",          "blm",
275         "box",                "boxa",             "bp",               "BP",
276           "break",  "breakchar",        "brnl",             "brp",
277           "brpnl",  "c2",               "cc",
278           "cf",               "cflags", "ch",               "char",
279           "chop",             "class",  "close",  "CL",
280           "color",  "composite",        "continue",         "cp",
281           "cropat", "cs",               "cu",               "da",
282           "dch",              "Dd",               "de",               "de1",
283           "defcolor",         "dei",              "dei1",             "device",
284           "devicem",          "di",               "do",               "ds",
285           "ds1",              "dwh",              "dt",               "ec",
286           "ecr",              "ecs",              "el",               "em",
287           "EN",               "eo",               "EP",               "EQ",
288           "errprint",         "ev",               "evc",              "ex",
289           "fallback",         "fam",              "fc",               "fchar",
290           "fcolor", "fdeferlig",        "feature",          "fkern",
291           "fl",               "flig",             "fp",               "fps",
292           "fschar", "fspacewidth",      "fspecial",         "ftr",
293           "fzoom",  "gcolor", "hc",               "hcode",
294           "hidechar",         "hla",              "hlm",              "hpf",
295           "hpfa",             "hpfcode",          "hw",               "hy",
296           "hylang", "hylen",  "hym",              "hypp",
297           "hys",              "ie",               "if",               "ig",
298           "index",  "it",               "itc",              "IX",
299           "kern",             "kernafter",        "kernbefore",       "kernpair",
300           "lc",               "lc_ctype",         "lds",              "length",
301           "letadj", "lf",               "lg",               "lhang",
302           "linetabs",         "lnr",              "lnrf",             "lpfx",
303           "ls",               "lsm",              "lt",
304           "mediasize",        "minss",  "mk",               "mso",
305           "na",               "ne",               "nh",               "nhychar",
306           "nm",               "nn",               "nop",              "nr",
307           "nrf",              "nroff",  "ns",               "nx",
308           "open",             "opena",  "os",               "output",
309           "padj",             "papersize",        "pc",               "pev",
310           "pi",               "PI",               "pl",               "pm",
311           "pn",               "pnr",              "ps",
312           "psbb",             "pshape", "pso",              "ptr",
313           "pvs",              "rchar",  "rd",               "recursionlimit",
314           "return", "rfschar",          "rhang",
315           "rm",               "rn",               "rnn",              "rr",
316           "rs",               "rt",               "schar",  "sentchar",
317           "shc",              "shift",  "sizes",  "so",
318           "spacewidth",       "special",          "spreadwarn",       "ss",
319           "sty",              "substring",        "sv",               "sy",
320           "T&",               "tc",               "TE",
321           "TH",               "tkf",              "tl",
322           "tm",               "tm1",              "tmc",              "tr",
323           "track",  "transchar",        "trf",              "trimat",
324           "trin",             "trnt",             "troff",  "TS",
325           "uf",               "ul",               "unformat",         "unwatch",
326           "unwatchn",         "vpt",              "vs",               "warn",
327           "warnscale",        "watch",  "watchlength",      "watchn",
328           "wh",               "while",  "write",  "writec",
329           "writem", "xflag",  ".",                NULL,
330           NULL,               "text",
331           "Dd",               "Dt",               "Os",               "Sh",
332           "Ss",               "Pp",               "D1",               "Dl",
333           "Bd",               "Ed",               "Bl",               "El",
334           "It",               "Ad",               "An",               "Ap",
335           "Ar",               "Cd",               "Cm",               "Dv",
336           "Er",               "Ev",               "Ex",               "Fa",
337           "Fd",               "Fl",               "Fn",               "Ft",
338           "Ic",               "In",               "Li",               "Nd",
339           "Nm",               "Op",               "Ot",               "Pa",
340           "Rv",               "St",               "Va",               "Vt",
341           "Xr",               "%A",               "%B",               "%D",
342           "%I",               "%J",               "%N",               "%O",
343           "%P",               "%R",               "%T",               "%V",
344           "Ac",               "Ao",               "Aq",               "At",
345           "Bc",               "Bf",               "Bo",               "Bq",
346           "Bsx",              "Bx",               "Db",               "Dc",
347           "Do",               "Dq",               "Ec",               "Ef",
348           "Em",               "Eo",               "Fx",               "Ms",
349           "No",               "Ns",               "Nx",               "Ox",
350           "Pc",               "Pf",               "Po",               "Pq",
351           "Qc",               "Ql",               "Qo",               "Qq",
352           "Re",               "Rs",               "Sc",               "So",
353           "Sq",               "Sm",               "Sx",               "Sy",
354           "Tn",               "Ux",               "Xc",               "Xo",
355           "Fo",               "Fc",               "Oo",               "Oc",
356           "Bk",               "Ek",               "Bt",               "Hf",
357           "Fr",               "Ud",               "Lb",               "Lp",
358           "Lk",               "Mt",               "Brq",              "Bro",
359           "Brc",              "%C",               "Es",               "En",
360           "Dx",               "%Q",               "%U",               "Ta",
361           "Tg",               NULL,
362           "TH",               "SH",               "SS",               "TP",
363           "TQ",
364           "LP",               "PP",               "P",                "IP",
365           "HP",               "SM",               "SB",               "BI",
366           "IB",               "BR",               "RB",               "R",
367           "B",                "I",                "IR",               "RI",
368           "RE",               "RS",               "DT",               "UC",
369           "PD",               "AT",               "in",
370           "SY",               "YS",               "OP",
371           "EX",               "EE",               "UR",
372           "UE",               "MT",               "ME",               NULL
373 };
374 const     char *const *roff_name = __roff_name;
375 
376 static    struct roffmac       roffs[TOKEN_NONE] = {
377           { roff_noarg, NULL, NULL, 0 },  /* br */
378           { roff_onearg, NULL, NULL, 0 },  /* ce */
379           { roff_noarg, NULL, NULL, 0 },  /* fi */
380           { roff_onearg, NULL, NULL, 0 },  /* ft */
381           { roff_onearg, NULL, NULL, 0 },  /* ll */
382           { roff_onearg, NULL, NULL, 0 },  /* mc */
383           { roff_noarg, NULL, NULL, 0 },  /* nf */
384           { roff_onearg, NULL, NULL, 0 },  /* po */
385           { roff_onearg, NULL, NULL, 0 },  /* rj */
386           { roff_onearg, NULL, NULL, 0 },  /* sp */
387           { roff_manyarg, NULL, NULL, 0 },  /* ta */
388           { roff_onearg, NULL, NULL, 0 },  /* ti */
389           { NULL, NULL, NULL, 0 },  /* ROFF_MAX */
390           { roff_unsupp, NULL, NULL, 0 },  /* ab */
391           { roff_line_ignore, NULL, NULL, 0 },  /* ad */
392           { roff_line_ignore, NULL, NULL, 0 },  /* af */
393           { roff_unsupp, NULL, NULL, 0 },  /* aln */
394           { roff_als, NULL, NULL, 0 },  /* als */
395           { roff_block, roff_block_text, roff_block_sub, 0 },  /* am */
396           { roff_block, roff_block_text, roff_block_sub, 0 },  /* am1 */
397           { roff_block, roff_block_text, roff_block_sub, 0 },  /* ami */
398           { roff_block, roff_block_text, roff_block_sub, 0 },  /* ami1 */
399           { roff_ds, NULL, NULL, 0 },  /* as */
400           { roff_ds, NULL, NULL, 0 },  /* as1 */
401           { roff_unsupp, NULL, NULL, 0 },  /* asciify */
402           { roff_line_ignore, NULL, NULL, 0 },  /* backtrace */
403           { roff_line_ignore, NULL, NULL, 0 },  /* bd */
404           { roff_line_ignore, NULL, NULL, 0 },  /* bleedat */
405           { roff_unsupp, NULL, NULL, 0 },  /* blm */
406           { roff_unsupp, NULL, NULL, 0 },  /* box */
407           { roff_unsupp, NULL, NULL, 0 },  /* boxa */
408           { roff_line_ignore, NULL, NULL, 0 },  /* bp */
409           { roff_unsupp, NULL, NULL, 0 },  /* BP */
410           { roff_break, NULL, NULL, 0 },  /* break */
411           { roff_line_ignore, NULL, NULL, 0 },  /* breakchar */
412           { roff_line_ignore, NULL, NULL, 0 },  /* brnl */
413           { roff_noarg, NULL, NULL, 0 },  /* brp */
414           { roff_line_ignore, NULL, NULL, 0 },  /* brpnl */
415           { roff_unsupp, NULL, NULL, 0 },  /* c2 */
416           { roff_cc, NULL, NULL, 0 },  /* cc */
417           { roff_insec, NULL, NULL, 0 },  /* cf */
418           { roff_line_ignore, NULL, NULL, 0 },  /* cflags */
419           { roff_line_ignore, NULL, NULL, 0 },  /* ch */
420           { roff_char, NULL, NULL, 0 },  /* char */
421           { roff_unsupp, NULL, NULL, 0 },  /* chop */
422           { roff_line_ignore, NULL, NULL, 0 },  /* class */
423           { roff_insec, NULL, NULL, 0 },  /* close */
424           { roff_unsupp, NULL, NULL, 0 },  /* CL */
425           { roff_line_ignore, NULL, NULL, 0 },  /* color */
426           { roff_unsupp, NULL, NULL, 0 },  /* composite */
427           { roff_unsupp, NULL, NULL, 0 },  /* continue */
428           { roff_line_ignore, NULL, NULL, 0 },  /* cp */
429           { roff_line_ignore, NULL, NULL, 0 },  /* cropat */
430           { roff_line_ignore, NULL, NULL, 0 },  /* cs */
431           { roff_line_ignore, NULL, NULL, 0 },  /* cu */
432           { roff_unsupp, NULL, NULL, 0 },  /* da */
433           { roff_unsupp, NULL, NULL, 0 },  /* dch */
434           { roff_Dd, NULL, NULL, 0 },  /* Dd */
435           { roff_block, roff_block_text, roff_block_sub, 0 },  /* de */
436           { roff_block, roff_block_text, roff_block_sub, 0 },  /* de1 */
437           { roff_line_ignore, NULL, NULL, 0 },  /* defcolor */
438           { roff_block, roff_block_text, roff_block_sub, 0 },  /* dei */
439           { roff_block, roff_block_text, roff_block_sub, 0 },  /* dei1 */
440           { roff_unsupp, NULL, NULL, 0 },  /* device */
441           { roff_unsupp, NULL, NULL, 0 },  /* devicem */
442           { roff_unsupp, NULL, NULL, 0 },  /* di */
443           { roff_unsupp, NULL, NULL, 0 },  /* do */
444           { roff_ds, NULL, NULL, 0 },  /* ds */
445           { roff_ds, NULL, NULL, 0 },  /* ds1 */
446           { roff_unsupp, NULL, NULL, 0 },  /* dwh */
447           { roff_unsupp, NULL, NULL, 0 },  /* dt */
448           { roff_ec, NULL, NULL, 0 },  /* ec */
449           { roff_unsupp, NULL, NULL, 0 },  /* ecr */
450           { roff_unsupp, NULL, NULL, 0 },  /* ecs */
451           { roff_cond, roff_cond_text, roff_cond_sub, ROFFMAC_STRUCT },  /* el */
452           { roff_unsupp, NULL, NULL, 0 },  /* em */
453           { roff_EN, NULL, NULL, 0 },  /* EN */
454           { roff_eo, NULL, NULL, 0 },  /* eo */
455           { roff_unsupp, NULL, NULL, 0 },  /* EP */
456           { roff_EQ, NULL, NULL, 0 },  /* EQ */
457           { roff_line_ignore, NULL, NULL, 0 },  /* errprint */
458           { roff_unsupp, NULL, NULL, 0 },  /* ev */
459           { roff_unsupp, NULL, NULL, 0 },  /* evc */
460           { roff_unsupp, NULL, NULL, 0 },  /* ex */
461           { roff_line_ignore, NULL, NULL, 0 },  /* fallback */
462           { roff_line_ignore, NULL, NULL, 0 },  /* fam */
463           { roff_unsupp, NULL, NULL, 0 },  /* fc */
464           { roff_unsupp, NULL, NULL, 0 },  /* fchar */
465           { roff_line_ignore, NULL, NULL, 0 },  /* fcolor */
466           { roff_line_ignore, NULL, NULL, 0 },  /* fdeferlig */
467           { roff_line_ignore, NULL, NULL, 0 },  /* feature */
468           { roff_line_ignore, NULL, NULL, 0 },  /* fkern */
469           { roff_line_ignore, NULL, NULL, 0 },  /* fl */
470           { roff_line_ignore, NULL, NULL, 0 },  /* flig */
471           { roff_line_ignore, NULL, NULL, 0 },  /* fp */
472           { roff_line_ignore, NULL, NULL, 0 },  /* fps */
473           { roff_unsupp, NULL, NULL, 0 },  /* fschar */
474           { roff_line_ignore, NULL, NULL, 0 },  /* fspacewidth */
475           { roff_line_ignore, NULL, NULL, 0 },  /* fspecial */
476           { roff_line_ignore, NULL, NULL, 0 },  /* ftr */
477           { roff_line_ignore, NULL, NULL, 0 },  /* fzoom */
478           { roff_line_ignore, NULL, NULL, 0 },  /* gcolor */
479           { roff_line_ignore, NULL, NULL, 0 },  /* hc */
480           { roff_line_ignore, NULL, NULL, 0 },  /* hcode */
481           { roff_line_ignore, NULL, NULL, 0 },  /* hidechar */
482           { roff_line_ignore, NULL, NULL, 0 },  /* hla */
483           { roff_line_ignore, NULL, NULL, 0 },  /* hlm */
484           { roff_line_ignore, NULL, NULL, 0 },  /* hpf */
485           { roff_line_ignore, NULL, NULL, 0 },  /* hpfa */
486           { roff_line_ignore, NULL, NULL, 0 },  /* hpfcode */
487           { roff_line_ignore, NULL, NULL, 0 },  /* hw */
488           { roff_line_ignore, NULL, NULL, 0 },  /* hy */
489           { roff_line_ignore, NULL, NULL, 0 },  /* hylang */
490           { roff_line_ignore, NULL, NULL, 0 },  /* hylen */
491           { roff_line_ignore, NULL, NULL, 0 },  /* hym */
492           { roff_line_ignore, NULL, NULL, 0 },  /* hypp */
493           { roff_line_ignore, NULL, NULL, 0 },  /* hys */
494           { roff_cond, roff_cond_text, roff_cond_sub, ROFFMAC_STRUCT },  /* ie */
495           { roff_cond, roff_cond_text, roff_cond_sub, ROFFMAC_STRUCT },  /* if */
496           { roff_block, roff_block_text, roff_block_sub, 0 },  /* ig */
497           { roff_unsupp, NULL, NULL, 0 },  /* index */
498           { roff_it, NULL, NULL, 0 },  /* it */
499           { roff_unsupp, NULL, NULL, 0 },  /* itc */
500           { roff_line_ignore, NULL, NULL, 0 },  /* IX */
501           { roff_line_ignore, NULL, NULL, 0 },  /* kern */
502           { roff_line_ignore, NULL, NULL, 0 },  /* kernafter */
503           { roff_line_ignore, NULL, NULL, 0 },  /* kernbefore */
504           { roff_line_ignore, NULL, NULL, 0 },  /* kernpair */
505           { roff_unsupp, NULL, NULL, 0 },  /* lc */
506           { roff_unsupp, NULL, NULL, 0 },  /* lc_ctype */
507           { roff_unsupp, NULL, NULL, 0 },  /* lds */
508           { roff_unsupp, NULL, NULL, 0 },  /* length */
509           { roff_line_ignore, NULL, NULL, 0 },  /* letadj */
510           { roff_insec, NULL, NULL, 0 },  /* lf */
511           { roff_line_ignore, NULL, NULL, 0 },  /* lg */
512           { roff_line_ignore, NULL, NULL, 0 },  /* lhang */
513           { roff_unsupp, NULL, NULL, 0 },  /* linetabs */
514           { roff_unsupp, NULL, NULL, 0 },  /* lnr */
515           { roff_unsupp, NULL, NULL, 0 },  /* lnrf */
516           { roff_unsupp, NULL, NULL, 0 },  /* lpfx */
517           { roff_line_ignore, NULL, NULL, 0 },  /* ls */
518           { roff_unsupp, NULL, NULL, 0 },  /* lsm */
519           { roff_line_ignore, NULL, NULL, 0 },  /* lt */
520           { roff_line_ignore, NULL, NULL, 0 },  /* mediasize */
521           { roff_line_ignore, NULL, NULL, 0 },  /* minss */
522           { roff_line_ignore, NULL, NULL, 0 },  /* mk */
523           { roff_insec, NULL, NULL, 0 },  /* mso */
524           { roff_line_ignore, NULL, NULL, 0 },  /* na */
525           { roff_line_ignore, NULL, NULL, 0 },  /* ne */
526           { roff_line_ignore, NULL, NULL, 0 },  /* nh */
527           { roff_line_ignore, NULL, NULL, 0 },  /* nhychar */
528           { roff_unsupp, NULL, NULL, 0 },  /* nm */
529           { roff_unsupp, NULL, NULL, 0 },  /* nn */
530           { roff_nop, NULL, NULL, 0 },  /* nop */
531           { roff_nr, NULL, NULL, 0 },  /* nr */
532           { roff_unsupp, NULL, NULL, 0 },  /* nrf */
533           { roff_line_ignore, NULL, NULL, 0 },  /* nroff */
534           { roff_line_ignore, NULL, NULL, 0 },  /* ns */
535           { roff_insec, NULL, NULL, 0 },  /* nx */
536           { roff_insec, NULL, NULL, 0 },  /* open */
537           { roff_insec, NULL, NULL, 0 },  /* opena */
538           { roff_line_ignore, NULL, NULL, 0 },  /* os */
539           { roff_unsupp, NULL, NULL, 0 },  /* output */
540           { roff_line_ignore, NULL, NULL, 0 },  /* padj */
541           { roff_line_ignore, NULL, NULL, 0 },  /* papersize */
542           { roff_line_ignore, NULL, NULL, 0 },  /* pc */
543           { roff_line_ignore, NULL, NULL, 0 },  /* pev */
544           { roff_insec, NULL, NULL, 0 },  /* pi */
545           { roff_unsupp, NULL, NULL, 0 },  /* PI */
546           { roff_line_ignore, NULL, NULL, 0 },  /* pl */
547           { roff_line_ignore, NULL, NULL, 0 },  /* pm */
548           { roff_line_ignore, NULL, NULL, 0 },  /* pn */
549           { roff_line_ignore, NULL, NULL, 0 },  /* pnr */
550           { roff_line_ignore, NULL, NULL, 0 },  /* ps */
551           { roff_unsupp, NULL, NULL, 0 },  /* psbb */
552           { roff_unsupp, NULL, NULL, 0 },  /* pshape */
553           { roff_insec, NULL, NULL, 0 },  /* pso */
554           { roff_line_ignore, NULL, NULL, 0 },  /* ptr */
555           { roff_line_ignore, NULL, NULL, 0 },  /* pvs */
556           { roff_unsupp, NULL, NULL, 0 },  /* rchar */
557           { roff_line_ignore, NULL, NULL, 0 },  /* rd */
558           { roff_line_ignore, NULL, NULL, 0 },  /* recursionlimit */
559           { roff_return, NULL, NULL, 0 },  /* return */
560           { roff_unsupp, NULL, NULL, 0 },  /* rfschar */
561           { roff_line_ignore, NULL, NULL, 0 },  /* rhang */
562           { roff_rm, NULL, NULL, 0 },  /* rm */
563           { roff_rn, NULL, NULL, 0 },  /* rn */
564           { roff_unsupp, NULL, NULL, 0 },  /* rnn */
565           { roff_rr, NULL, NULL, 0 },  /* rr */
566           { roff_line_ignore, NULL, NULL, 0 },  /* rs */
567           { roff_line_ignore, NULL, NULL, 0 },  /* rt */
568           { roff_unsupp, NULL, NULL, 0 },  /* schar */
569           { roff_line_ignore, NULL, NULL, 0 },  /* sentchar */
570           { roff_line_ignore, NULL, NULL, 0 },  /* shc */
571           { roff_shift, NULL, NULL, 0 },  /* shift */
572           { roff_line_ignore, NULL, NULL, 0 },  /* sizes */
573           { roff_so, NULL, NULL, 0 },  /* so */
574           { roff_line_ignore, NULL, NULL, 0 },  /* spacewidth */
575           { roff_line_ignore, NULL, NULL, 0 },  /* special */
576           { roff_line_ignore, NULL, NULL, 0 },  /* spreadwarn */
577           { roff_line_ignore, NULL, NULL, 0 },  /* ss */
578           { roff_line_ignore, NULL, NULL, 0 },  /* sty */
579           { roff_unsupp, NULL, NULL, 0 },  /* substring */
580           { roff_line_ignore, NULL, NULL, 0 },  /* sv */
581           { roff_insec, NULL, NULL, 0 },  /* sy */
582           { roff_T_, NULL, NULL, 0 },  /* T& */
583           { roff_unsupp, NULL, NULL, 0 },  /* tc */
584           { roff_TE, NULL, NULL, 0 },  /* TE */
585           { roff_Dd, NULL, NULL, 0 },  /* TH */
586           { roff_line_ignore, NULL, NULL, 0 },  /* tkf */
587           { roff_unsupp, NULL, NULL, 0 },  /* tl */
588           { roff_line_ignore, NULL, NULL, 0 },  /* tm */
589           { roff_line_ignore, NULL, NULL, 0 },  /* tm1 */
590           { roff_line_ignore, NULL, NULL, 0 },  /* tmc */
591           { roff_tr, NULL, NULL, 0 },  /* tr */
592           { roff_line_ignore, NULL, NULL, 0 },  /* track */
593           { roff_line_ignore, NULL, NULL, 0 },  /* transchar */
594           { roff_insec, NULL, NULL, 0 },  /* trf */
595           { roff_line_ignore, NULL, NULL, 0 },  /* trimat */
596           { roff_unsupp, NULL, NULL, 0 },  /* trin */
597           { roff_unsupp, NULL, NULL, 0 },  /* trnt */
598           { roff_line_ignore, NULL, NULL, 0 },  /* troff */
599           { roff_TS, NULL, NULL, 0 },  /* TS */
600           { roff_line_ignore, NULL, NULL, 0 },  /* uf */
601           { roff_line_ignore, NULL, NULL, 0 },  /* ul */
602           { roff_unsupp, NULL, NULL, 0 },  /* unformat */
603           { roff_line_ignore, NULL, NULL, 0 },  /* unwatch */
604           { roff_line_ignore, NULL, NULL, 0 },  /* unwatchn */
605           { roff_line_ignore, NULL, NULL, 0 },  /* vpt */
606           { roff_line_ignore, NULL, NULL, 0 },  /* vs */
607           { roff_line_ignore, NULL, NULL, 0 },  /* warn */
608           { roff_line_ignore, NULL, NULL, 0 },  /* warnscale */
609           { roff_line_ignore, NULL, NULL, 0 },  /* watch */
610           { roff_line_ignore, NULL, NULL, 0 },  /* watchlength */
611           { roff_line_ignore, NULL, NULL, 0 },  /* watchn */
612           { roff_unsupp, NULL, NULL, 0 },  /* wh */
613           { roff_cond, roff_cond_text, roff_cond_sub, ROFFMAC_STRUCT }, /*while*/
614           { roff_insec, NULL, NULL, 0 },  /* write */
615           { roff_insec, NULL, NULL, 0 },  /* writec */
616           { roff_insec, NULL, NULL, 0 },  /* writem */
617           { roff_line_ignore, NULL, NULL, 0 },  /* xflag */
618           { roff_cblock, NULL, NULL, 0 },  /* . */
619           { roff_renamed, NULL, NULL, 0 },
620           { roff_userdef, NULL, NULL, 0 }
621 };
622 
623 /* Array of injected predefined strings. */
624 #define   PREDEFS_MAX          38
625 static    const struct predef predefs[PREDEFS_MAX] = {
626 #include "predefs.in"
627 };
628 
629 static    int        roffce_lines;      /* number of input lines to center */
630 static    struct roff_node *roffce_node;  /* active request */
631 static    int        roffit_lines;  /* number of lines to delay */
632 static    char      *roffit_macro;  /* nil-terminated macro line */
633 
634 
635 /* --- request table ------------------------------------------------------ */
636 
637 struct ohash *
roffhash_alloc(enum roff_tok mintok,enum roff_tok maxtok)638 roffhash_alloc(enum roff_tok mintok, enum roff_tok maxtok)
639 {
640           struct ohash        *htab;
641           struct roffreq      *req;
642           enum roff_tok        tok;
643           size_t               sz;
644           unsigned int         slot;
645 
646           htab = mandoc_malloc(sizeof(*htab));
647           mandoc_ohash_init(htab, 8, offsetof(struct roffreq, name));
648 
649           for (tok = mintok; tok < maxtok; tok++) {
650                     if (roff_name[tok] == NULL)
651                               continue;
652                     sz = strlen(roff_name[tok]);
653                     req = mandoc_malloc(sizeof(*req) + sz + 1);
654                     req->tok = tok;
655                     memcpy(req->name, roff_name[tok], sz + 1);
656                     slot = ohash_qlookup(htab, req->name);
657                     ohash_insert(htab, slot, req);
658           }
659           return htab;
660 }
661 
662 void
roffhash_free(struct ohash * htab)663 roffhash_free(struct ohash *htab)
664 {
665           struct roffreq      *req;
666           unsigned int         slot;
667 
668           if (htab == NULL)
669                     return;
670           for (req = ohash_first(htab, &slot); req != NULL;
671                req = ohash_next(htab, &slot))
672                     free(req);
673           ohash_delete(htab);
674           free(htab);
675 }
676 
677 enum roff_tok
roffhash_find(struct ohash * htab,const char * name,size_t sz)678 roffhash_find(struct ohash *htab, const char *name, size_t sz)
679 {
680           struct roffreq      *req;
681           const char          *end;
682 
683           if (sz) {
684                     end = name + sz;
685                     req = ohash_find(htab, ohash_qlookupi(htab, name, &end));
686           } else
687                     req = ohash_find(htab, ohash_qlookup(htab, name));
688           return req == NULL ? TOKEN_NONE : req->tok;
689 }
690 
691 /* --- stack of request blocks -------------------------------------------- */
692 
693 /*
694  * Pop the current node off of the stack of roff instructions currently
695  * pending.  Return 1 if it is a loop or 0 otherwise.
696  */
697 static int
roffnode_pop(struct roff * r)698 roffnode_pop(struct roff *r)
699 {
700           struct roffnode     *p;
701           int                  inloop;
702 
703           p = r->last;
704           inloop = p->tok == ROFF_while;
705           r->last = p->parent;
706           free(p->name);
707           free(p->end);
708           free(p);
709           return inloop;
710 }
711 
712 /*
713  * Push a roff node onto the instruction stack.  This must later be
714  * removed with roffnode_pop().
715  */
716 static void
roffnode_push(struct roff * r,enum roff_tok tok,const char * name,int line,int col)717 roffnode_push(struct roff *r, enum roff_tok tok, const char *name,
718                     int line, int col)
719 {
720           struct roffnode     *p;
721 
722           p = mandoc_calloc(1, sizeof(struct roffnode));
723           p->tok = tok;
724           if (name)
725                     p->name = mandoc_strdup(name);
726           p->parent = r->last;
727           p->line = line;
728           p->col = col;
729           p->rule = p->parent ? p->parent->rule : 0;
730 
731           r->last = p;
732 }
733 
734 /* --- roff parser state data management ---------------------------------- */
735 
736 static void
roff_free1(struct roff * r)737 roff_free1(struct roff *r)
738 {
739           int                  i;
740 
741           tbl_free(r->first_tbl);
742           r->first_tbl = r->last_tbl = r->tbl = NULL;
743 
744           eqn_free(r->last_eqn);
745           r->last_eqn = r->eqn = NULL;
746 
747           while (r->mstackpos >= 0)
748                     roff_userret(r);
749 
750           while (r->last)
751                     roffnode_pop(r);
752 
753           free (r->rstack);
754           r->rstack = NULL;
755           r->rstacksz = 0;
756           r->rstackpos = -1;
757 
758           roff_freereg(r->regtab);
759           r->regtab = NULL;
760 
761           roff_freestr(r->strtab);
762           roff_freestr(r->rentab);
763           roff_freestr(r->xmbtab);
764           r->strtab = r->rentab = r->xmbtab = NULL;
765 
766           if (r->xtab)
767                     for (i = 0; i < 128; i++)
768                               free(r->xtab[i].p);
769           free(r->xtab);
770           r->xtab = NULL;
771 }
772 
773 void
roff_reset(struct roff * r)774 roff_reset(struct roff *r)
775 {
776           roff_free1(r);
777           r->options |= MPARSE_COMMENT;
778           r->format = r->options & (MPARSE_MDOC | MPARSE_MAN);
779           r->control = '\0';
780           r->escape = '\\';
781           roffce_lines = 0;
782           roffce_node = NULL;
783           roffit_lines = 0;
784           roffit_macro = NULL;
785 }
786 
787 void
roff_free(struct roff * r)788 roff_free(struct roff *r)
789 {
790           int                  i;
791 
792           roff_free1(r);
793           for (i = 0; i < r->mstacksz; i++)
794                     free(r->mstack[i].argv);
795           free(r->mstack);
796           roffhash_free(r->reqtab);
797           free(r);
798 }
799 
800 struct roff *
roff_alloc(int options)801 roff_alloc(int options)
802 {
803           struct roff         *r;
804 
805           r = mandoc_calloc(1, sizeof(struct roff));
806           r->reqtab = roffhash_alloc(0, ROFF_RENAMED);
807           r->options = options | MPARSE_COMMENT;
808           r->format = options & (MPARSE_MDOC | MPARSE_MAN);
809           r->mstackpos = -1;
810           r->rstackpos = -1;
811           r->escape = '\\';
812           return r;
813 }
814 
815 /* --- syntax tree state data management ---------------------------------- */
816 
817 static void
roff_man_free1(struct roff_man * man)818 roff_man_free1(struct roff_man *man)
819 {
820           if (man->meta.first != NULL)
821                     roff_node_delete(man, man->meta.first);
822           free(man->meta.msec);
823           free(man->meta.vol);
824           free(man->meta.os);
825           free(man->meta.arch);
826           free(man->meta.title);
827           free(man->meta.name);
828           free(man->meta.date);
829           free(man->meta.sodest);
830 }
831 
832 void
roff_state_reset(struct roff_man * man)833 roff_state_reset(struct roff_man *man)
834 {
835           man->last = man->meta.first;
836           man->last_es = NULL;
837           man->flags = 0;
838           man->lastsec = man->lastnamed = SEC_NONE;
839           man->next = ROFF_NEXT_CHILD;
840           roff_setreg(man->roff, "nS", 0, '=');
841 }
842 
843 static void
roff_man_alloc1(struct roff_man * man)844 roff_man_alloc1(struct roff_man *man)
845 {
846           memset(&man->meta, 0, sizeof(man->meta));
847           man->meta.first = mandoc_calloc(1, sizeof(*man->meta.first));
848           man->meta.first->type = ROFFT_ROOT;
849           man->meta.macroset = MACROSET_NONE;
850           roff_state_reset(man);
851 }
852 
853 void
roff_man_reset(struct roff_man * man)854 roff_man_reset(struct roff_man *man)
855 {
856           roff_man_free1(man);
857           roff_man_alloc1(man);
858 }
859 
860 void
roff_man_free(struct roff_man * man)861 roff_man_free(struct roff_man *man)
862 {
863           roff_man_free1(man);
864           free(man);
865 }
866 
867 struct roff_man *
roff_man_alloc(struct roff * roff,const char * os_s,int quick)868 roff_man_alloc(struct roff *roff, const char *os_s, int quick)
869 {
870           struct roff_man *man;
871 
872           man = mandoc_calloc(1, sizeof(*man));
873           man->roff = roff;
874           man->os_s = os_s;
875           man->quick = quick;
876           roff_man_alloc1(man);
877           roff->man = man;
878           return man;
879 }
880 
881 /* --- syntax tree handling ----------------------------------------------- */
882 
883 struct roff_node *
roff_node_alloc(struct roff_man * man,int line,int pos,enum roff_type type,int tok)884 roff_node_alloc(struct roff_man *man, int line, int pos,
885           enum roff_type type, int tok)
886 {
887           struct roff_node    *n;
888 
889           n = mandoc_calloc(1, sizeof(*n));
890           n->line = line;
891           n->pos = pos;
892           n->tok = tok;
893           n->type = type;
894           n->sec = man->lastsec;
895 
896           if (man->flags & MDOC_SYNOPSIS)
897                     n->flags |= NODE_SYNPRETTY;
898           else
899                     n->flags &= ~NODE_SYNPRETTY;
900           if ((man->flags & (ROFF_NOFILL | ROFF_NONOFILL)) == ROFF_NOFILL)
901                     n->flags |= NODE_NOFILL;
902           else
903                     n->flags &= ~NODE_NOFILL;
904           if (man->flags & MDOC_NEWLINE)
905                     n->flags |= NODE_LINE;
906           man->flags &= ~MDOC_NEWLINE;
907 
908           return n;
909 }
910 
911 void
roff_node_append(struct roff_man * man,struct roff_node * n)912 roff_node_append(struct roff_man *man, struct roff_node *n)
913 {
914 
915           switch (man->next) {
916           case ROFF_NEXT_SIBLING:
917                     if (man->last->next != NULL) {
918                               n->next = man->last->next;
919                               man->last->next->prev = n;
920                     } else
921                               man->last->parent->last = n;
922                     man->last->next = n;
923                     n->prev = man->last;
924                     n->parent = man->last->parent;
925                     break;
926           case ROFF_NEXT_CHILD:
927                     if (man->last->child != NULL) {
928                               n->next = man->last->child;
929                               man->last->child->prev = n;
930                     } else
931                               man->last->last = n;
932                     man->last->child = n;
933                     n->parent = man->last;
934                     break;
935           default:
936                     abort();
937           }
938           man->last = n;
939 
940           switch (n->type) {
941           case ROFFT_HEAD:
942                     n->parent->head = n;
943                     break;
944           case ROFFT_BODY:
945                     if (n->end != ENDBODY_NOT)
946                               return;
947                     n->parent->body = n;
948                     break;
949           case ROFFT_TAIL:
950                     n->parent->tail = n;
951                     break;
952           default:
953                     return;
954           }
955 
956           /*
957            * Copy over the normalised-data pointer of our parent.  Not
958            * everybody has one, but copying a null pointer is fine.
959            */
960 
961           n->norm = n->parent->norm;
962           assert(n->parent->type == ROFFT_BLOCK);
963 }
964 
965 void
roff_word_alloc(struct roff_man * man,int line,int pos,const char * word)966 roff_word_alloc(struct roff_man *man, int line, int pos, const char *word)
967 {
968           struct roff_node    *n;
969 
970           n = roff_node_alloc(man, line, pos, ROFFT_TEXT, TOKEN_NONE);
971           n->string = roff_strdup(man->roff, word);
972           roff_node_append(man, n);
973           n->flags |= NODE_VALID | NODE_ENDED;
974           man->next = ROFF_NEXT_SIBLING;
975 }
976 
977 void
roff_word_append(struct roff_man * man,const char * word)978 roff_word_append(struct roff_man *man, const char *word)
979 {
980           struct roff_node    *n;
981           char                          *addstr, *newstr;
982 
983           n = man->last;
984           addstr = roff_strdup(man->roff, word);
985           mandoc_asprintf(&newstr, "%s %s", n->string, addstr);
986           free(addstr);
987           free(n->string);
988           n->string = newstr;
989           man->next = ROFF_NEXT_SIBLING;
990 }
991 
992 void
roff_elem_alloc(struct roff_man * man,int line,int pos,int tok)993 roff_elem_alloc(struct roff_man *man, int line, int pos, int tok)
994 {
995           struct roff_node    *n;
996 
997           n = roff_node_alloc(man, line, pos, ROFFT_ELEM, tok);
998           roff_node_append(man, n);
999           man->next = ROFF_NEXT_CHILD;
1000 }
1001 
1002 struct roff_node *
roff_block_alloc(struct roff_man * man,int line,int pos,int tok)1003 roff_block_alloc(struct roff_man *man, int line, int pos, int tok)
1004 {
1005           struct roff_node    *n;
1006 
1007           n = roff_node_alloc(man, line, pos, ROFFT_BLOCK, tok);
1008           roff_node_append(man, n);
1009           man->next = ROFF_NEXT_CHILD;
1010           return n;
1011 }
1012 
1013 struct roff_node *
roff_head_alloc(struct roff_man * man,int line,int pos,int tok)1014 roff_head_alloc(struct roff_man *man, int line, int pos, int tok)
1015 {
1016           struct roff_node    *n;
1017 
1018           n = roff_node_alloc(man, line, pos, ROFFT_HEAD, tok);
1019           roff_node_append(man, n);
1020           man->next = ROFF_NEXT_CHILD;
1021           return n;
1022 }
1023 
1024 struct roff_node *
roff_body_alloc(struct roff_man * man,int line,int pos,int tok)1025 roff_body_alloc(struct roff_man *man, int line, int pos, int tok)
1026 {
1027           struct roff_node    *n;
1028 
1029           n = roff_node_alloc(man, line, pos, ROFFT_BODY, tok);
1030           roff_node_append(man, n);
1031           man->next = ROFF_NEXT_CHILD;
1032           return n;
1033 }
1034 
1035 static void
roff_addtbl(struct roff_man * man,int line,struct tbl_node * tbl)1036 roff_addtbl(struct roff_man *man, int line, struct tbl_node *tbl)
1037 {
1038           struct roff_node    *n;
1039           struct tbl_span               *span;
1040 
1041           if (man->meta.macroset == MACROSET_MAN)
1042                     man_breakscope(man, ROFF_TS);
1043           while ((span = tbl_span(tbl)) != NULL) {
1044                     n = roff_node_alloc(man, line, 0, ROFFT_TBL, TOKEN_NONE);
1045                     n->span = span;
1046                     roff_node_append(man, n);
1047                     n->flags |= NODE_VALID | NODE_ENDED;
1048                     man->next = ROFF_NEXT_SIBLING;
1049           }
1050 }
1051 
1052 void
roff_node_unlink(struct roff_man * man,struct roff_node * n)1053 roff_node_unlink(struct roff_man *man, struct roff_node *n)
1054 {
1055 
1056           /* Adjust siblings. */
1057 
1058           if (n->prev)
1059                     n->prev->next = n->next;
1060           if (n->next)
1061                     n->next->prev = n->prev;
1062 
1063           /* Adjust parent. */
1064 
1065           if (n->parent != NULL) {
1066                     if (n->parent->child == n)
1067                               n->parent->child = n->next;
1068                     if (n->parent->last == n)
1069                               n->parent->last = n->prev;
1070           }
1071 
1072           /* Adjust parse point. */
1073 
1074           if (man == NULL)
1075                     return;
1076           if (man->last == n) {
1077                     if (n->prev == NULL) {
1078                               man->last = n->parent;
1079                               man->next = ROFF_NEXT_CHILD;
1080                     } else {
1081                               man->last = n->prev;
1082                               man->next = ROFF_NEXT_SIBLING;
1083                     }
1084           }
1085           if (man->meta.first == n)
1086                     man->meta.first = NULL;
1087 }
1088 
1089 void
roff_node_relink(struct roff_man * man,struct roff_node * n)1090 roff_node_relink(struct roff_man *man, struct roff_node *n)
1091 {
1092           roff_node_unlink(man, n);
1093           n->prev = n->next = NULL;
1094           roff_node_append(man, n);
1095 }
1096 
1097 void
roff_node_free(struct roff_node * n)1098 roff_node_free(struct roff_node *n)
1099 {
1100 
1101           if (n->args != NULL)
1102                     mdoc_argv_free(n->args);
1103           if (n->type == ROFFT_BLOCK || n->type == ROFFT_ELEM)
1104                     free(n->norm);
1105           eqn_box_free(n->eqn);
1106           free(n->string);
1107           free(n->tag);
1108           free(n);
1109 }
1110 
1111 void
roff_node_delete(struct roff_man * man,struct roff_node * n)1112 roff_node_delete(struct roff_man *man, struct roff_node *n)
1113 {
1114 
1115           while (n->child != NULL)
1116                     roff_node_delete(man, n->child);
1117           roff_node_unlink(man, n);
1118           roff_node_free(n);
1119 }
1120 
1121 int
roff_node_transparent(struct roff_node * n)1122 roff_node_transparent(struct roff_node *n)
1123 {
1124           if (n == NULL)
1125                     return 0;
1126           if (n->type == ROFFT_COMMENT || n->flags & NODE_NOPRT)
1127                     return 1;
1128           return roff_tok_transparent(n->tok);
1129 }
1130 
1131 int
roff_tok_transparent(enum roff_tok tok)1132 roff_tok_transparent(enum roff_tok tok)
1133 {
1134           switch (tok) {
1135           case ROFF_ft:
1136           case ROFF_ll:
1137           case ROFF_mc:
1138           case ROFF_po:
1139           case ROFF_ta:
1140           case MDOC_Db:
1141           case MDOC_Es:
1142           case MDOC_Sm:
1143           case MDOC_Tg:
1144           case MAN_DT:
1145           case MAN_UC:
1146           case MAN_PD:
1147           case MAN_AT:
1148                     return 1;
1149           default:
1150                     return 0;
1151           }
1152 }
1153 
1154 struct roff_node *
roff_node_child(struct roff_node * n)1155 roff_node_child(struct roff_node *n)
1156 {
1157           for (n = n->child; roff_node_transparent(n); n = n->next)
1158                     continue;
1159           return n;
1160 }
1161 
1162 struct roff_node *
roff_node_prev(struct roff_node * n)1163 roff_node_prev(struct roff_node *n)
1164 {
1165           do {
1166                     n = n->prev;
1167           } while (roff_node_transparent(n));
1168           return n;
1169 }
1170 
1171 struct roff_node *
roff_node_next(struct roff_node * n)1172 roff_node_next(struct roff_node *n)
1173 {
1174           do {
1175                     n = n->next;
1176           } while (roff_node_transparent(n));
1177           return n;
1178 }
1179 
1180 void
deroff(char ** dest,const struct roff_node * n)1181 deroff(char **dest, const struct roff_node *n)
1182 {
1183           char      *cp;
1184           size_t     sz;
1185 
1186           if (n->string == NULL) {
1187                     for (n = n->child; n != NULL; n = n->next)
1188                               deroff(dest, n);
1189                     return;
1190           }
1191 
1192           /* Skip leading whitespace. */
1193 
1194           for (cp = n->string; *cp != '\0'; cp++) {
1195                     if (cp[0] == '\\' && cp[1] != '\0' &&
1196                         strchr(" %&0^|~", cp[1]) != NULL)
1197                               cp++;
1198                     else if ( ! isspace((unsigned char)*cp))
1199                               break;
1200           }
1201 
1202           /* Skip trailing backslash. */
1203 
1204           sz = strlen(cp);
1205           if (sz > 0 && cp[sz - 1] == '\\')
1206                     sz--;
1207 
1208           /* Skip trailing whitespace. */
1209 
1210           for (; sz; sz--)
1211                     if ( ! isspace((unsigned char)cp[sz-1]))
1212                               break;
1213 
1214           /* Skip empty strings. */
1215 
1216           if (sz == 0)
1217                     return;
1218 
1219           if (*dest == NULL) {
1220                     *dest = mandoc_strndup(cp, sz);
1221                     return;
1222           }
1223 
1224           mandoc_asprintf(&cp, "%s %*s", *dest, (int)sz, cp);
1225           free(*dest);
1226           *dest = cp;
1227 }
1228 
1229 /* --- main functions of the roff parser ---------------------------------- */
1230 
1231 /*
1232  * In the current line, expand escape sequences that produce parsable
1233  * input text.  Also check the syntax of the remaining escape sequences,
1234  * which typically produce output glyphs or change formatter state.
1235  */
1236 static int
roff_expand(struct roff * r,struct buf * buf,int ln,int pos,char newesc)1237 roff_expand(struct roff *r, struct buf *buf, int ln, int pos, char newesc)
1238 {
1239           struct mctx         *ctx;     /* current macro call context */
1240           char                 ubuf[24]; /* buffer to print the number */
1241           struct roff_node *n;          /* used for header comments */
1242           const char          *start;   /* start of the string to process */
1243           char                *stesc;   /* start of an escape sequence ('\\') */
1244           const char          *esct;    /* type of esccape sequence */
1245           char                *ep;      /* end of comment string */
1246           const char          *stnam;   /* start of the name, after "[(*" */
1247           const char          *cp;      /* end of the name, e.g. before ']' */
1248           const char          *res;     /* the string to be substituted */
1249           char                *nbuf;    /* new buffer to copy buf->buf to */
1250           size_t               maxl;  /* expected length of the escape name */
1251           size_t               naml;    /* actual length of the escape name */
1252           size_t               asz;     /* length of the replacement */
1253           size_t               rsz;     /* length of the rest of the string */
1254           int                  inaml;   /* length returned from mandoc_escape() */
1255           int                  expand_count;      /* to avoid infinite loops */
1256           int                  npos;    /* position in numeric expression */
1257           int                  arg_complete; /* argument not interrupted by eol */
1258           int                  quote_args; /* true for \\$@, false for \\$* */
1259           int                  done;    /* no more input available */
1260           int                  deftype; /* type of definition to paste */
1261           int                  rcsid;   /* kind of RCS id seen */
1262           enum mandocerr       err;     /* for escape sequence problems */
1263           char                 sign;    /* increment number register */
1264           char                 term;    /* character terminating the escape */
1265 
1266           /* Search forward for comments. */
1267 
1268           done = 0;
1269           start = buf->buf + pos;
1270           for (stesc = buf->buf + pos; *stesc != '\0'; stesc++) {
1271                     if (stesc[0] != newesc || stesc[1] == '\0')
1272                               continue;
1273                     stesc++;
1274                     if (*stesc != '"' && *stesc != '#')
1275                               continue;
1276 
1277                     /* Comment found, look for RCS id. */
1278 
1279                     rcsid = 0;
1280                     if ((cp = strstr(stesc, "$" "OpenBSD")) != NULL) {
1281                               rcsid = 1 << MANDOC_OS_OPENBSD;
1282                               cp += 8;
1283                     } else if ((cp = strstr(stesc, "$" "NetBSD")) != NULL) {
1284                               rcsid = 1 << MANDOC_OS_NETBSD;
1285                               cp += 7;
1286                     }
1287                     if (cp != NULL &&
1288                         isalnum((unsigned char)*cp) == 0 &&
1289                         strchr(cp, '$') != NULL) {
1290                               if (r->man->meta.rcsids & rcsid)
1291                                         mandoc_msg(MANDOCERR_RCS_REP, ln,
1292                                             (int)(stesc - buf->buf) + 1,
1293                                             "%s", stesc + 1);
1294                               r->man->meta.rcsids |= rcsid;
1295                     }
1296 
1297                     /* Handle trailing whitespace. */
1298 
1299                     ep = strchr(stesc--, '\0') - 1;
1300                     if (*ep == '\n') {
1301                               done = 1;
1302                               ep--;
1303                     }
1304                     if (*ep == ' ' || *ep == '\t')
1305                               mandoc_msg(MANDOCERR_SPACE_EOL,
1306                                   ln, (int)(ep - buf->buf), NULL);
1307 
1308                     /*
1309                      * Save comments preceding the title macro
1310                      * in the syntax tree.
1311                      */
1312 
1313                     if (newesc != ASCII_ESC && r->options & MPARSE_COMMENT) {
1314                               while (*ep == ' ' || *ep == '\t')
1315                                         ep--;
1316                               ep[1] = '\0';
1317                               n = roff_node_alloc(r->man,
1318                                   ln, stesc + 1 - buf->buf,
1319                                   ROFFT_COMMENT, TOKEN_NONE);
1320                               n->string = mandoc_strdup(stesc + 2);
1321                               roff_node_append(r->man, n);
1322                               n->flags |= NODE_VALID | NODE_ENDED;
1323                               r->man->next = ROFF_NEXT_SIBLING;
1324                     }
1325 
1326                     /* Line continuation with comment. */
1327 
1328                     if (stesc[1] == '#') {
1329                               *stesc = '\0';
1330                               return ROFF_IGN | ROFF_APPEND;
1331                     }
1332 
1333                     /* Discard normal comments. */
1334 
1335                     while (stesc > start && stesc[-1] == ' ' &&
1336                         (stesc == start + 1 || stesc[-2] != '\\'))
1337                               stesc--;
1338                     *stesc = '\0';
1339                     break;
1340           }
1341           if (stesc == start)
1342                     return ROFF_CONT;
1343           stesc--;
1344 
1345           /* Notice the end of the input. */
1346 
1347           if (*stesc == '\n') {
1348                     *stesc-- = '\0';
1349                     done = 1;
1350           }
1351 
1352           expand_count = 0;
1353           while (stesc >= start) {
1354                     if (*stesc != newesc) {
1355 
1356                               /*
1357                                * If we have a non-standard escape character,
1358                                * escape literal backslashes because all
1359                                * processing in subsequent functions uses
1360                                * the standard escaping rules.
1361                                */
1362 
1363                               if (newesc != ASCII_ESC && *stesc == '\\') {
1364                                         *stesc = '\0';
1365                                         buf->sz = mandoc_asprintf(&nbuf, "%s\\e%s",
1366                                             buf->buf, stesc + 1) + 1;
1367                                         start = nbuf + pos;
1368                                         stesc = nbuf + (stesc - buf->buf);
1369                                         free(buf->buf);
1370                                         buf->buf = nbuf;
1371                               }
1372 
1373                               /* Search backwards for the next escape. */
1374 
1375                               stesc--;
1376                               continue;
1377                     }
1378 
1379                     /* If it is escaped, skip it. */
1380 
1381                     for (cp = stesc - 1; cp >= start; cp--)
1382                               if (*cp != r->escape)
1383                                         break;
1384 
1385                     if ((stesc - cp) % 2 == 0) {
1386                               while (stesc > cp)
1387                                         *stesc-- = '\\';
1388                               continue;
1389                     } else if (stesc[1] != '\0') {
1390                               *stesc = '\\';
1391                     } else {
1392                               *stesc-- = '\0';
1393                               if (done)
1394                                         continue;
1395                               else
1396                                         return ROFF_IGN | ROFF_APPEND;
1397                     }
1398 
1399                     /* Decide whether to expand or to check only. */
1400 
1401                     term = '\0';
1402                     cp = stesc + 1;
1403                     if (*cp == 'E')
1404                               cp++;
1405                     esct = cp;
1406                     switch (*esct) {
1407                     case '*':
1408                     case '$':
1409                               res = NULL;
1410                               break;
1411                     case 'B':
1412                     case 'w':
1413                               term = cp[1];
1414                               /* FALLTHROUGH */
1415                     case 'n':
1416                               sign = cp[1];
1417                               if (sign == '+' || sign == '-')
1418                                         cp++;
1419                               res = ubuf;
1420                               break;
1421                     default:
1422                               err = MANDOCERR_OK;
1423                               switch(mandoc_escape(&cp, &stnam, &inaml)) {
1424                               case ESCAPE_SPECIAL:
1425                                         if (mchars_spec2cp(stnam, inaml) >= 0)
1426                                                   break;
1427                                         /* FALLTHROUGH */
1428                               case ESCAPE_ERROR:
1429                                         err = MANDOCERR_ESC_BAD;
1430                                         break;
1431                               case ESCAPE_UNDEF:
1432                                         err = MANDOCERR_ESC_UNDEF;
1433                                         break;
1434                               case ESCAPE_UNSUPP:
1435                                         err = MANDOCERR_ESC_UNSUPP;
1436                                         break;
1437                               default:
1438                                         break;
1439                               }
1440                               if (err != MANDOCERR_OK)
1441                                         mandoc_msg(err, ln, (int)(stesc - buf->buf),
1442                                             "%.*s", (int)(cp - stesc), stesc);
1443                               stesc--;
1444                               continue;
1445                     }
1446 
1447                     if (EXPAND_LIMIT < ++expand_count) {
1448                               mandoc_msg(MANDOCERR_ROFFLOOP,
1449                                   ln, (int)(stesc - buf->buf), NULL);
1450                               return ROFF_IGN;
1451                     }
1452 
1453                     /*
1454                      * The third character decides the length
1455                      * of the name of the string or register.
1456                      * Save a pointer to the name.
1457                      */
1458 
1459                     if (term == '\0') {
1460                               switch (*++cp) {
1461                               case '\0':
1462                                         maxl = 0;
1463                                         break;
1464                               case '(':
1465                                         cp++;
1466                                         maxl = 2;
1467                                         break;
1468                               case '[':
1469                                         cp++;
1470                                         term = ']';
1471                                         maxl = 0;
1472                                         break;
1473                               default:
1474                                         maxl = 1;
1475                                         break;
1476                               }
1477                     } else {
1478                               cp += 2;
1479                               maxl = 0;
1480                     }
1481                     stnam = cp;
1482 
1483                     /* Advance to the end of the name. */
1484 
1485                     naml = 0;
1486                     arg_complete = 1;
1487                     while (maxl == 0 || naml < maxl) {
1488                               if (*cp == '\0') {
1489                                         mandoc_msg(MANDOCERR_ESC_BAD, ln,
1490                                             (int)(stesc - buf->buf), "%s", stesc);
1491                                         arg_complete = 0;
1492                                         break;
1493                               }
1494                               if (maxl == 0 && *cp == term) {
1495                                         cp++;
1496                                         break;
1497                               }
1498                               if (*cp++ != '\\' || *esct != 'w') {
1499                                         naml++;
1500                                         continue;
1501                               }
1502                               switch (mandoc_escape(&cp, NULL, NULL)) {
1503                               case ESCAPE_SPECIAL:
1504                               case ESCAPE_UNICODE:
1505                               case ESCAPE_NUMBERED:
1506                               case ESCAPE_UNDEF:
1507                               case ESCAPE_OVERSTRIKE:
1508                                         naml++;
1509                                         break;
1510                               default:
1511                                         break;
1512                               }
1513                     }
1514 
1515                     /*
1516                      * Retrieve the replacement string; if it is
1517                      * undefined, resume searching for escapes.
1518                      */
1519 
1520                     switch (*esct) {
1521                     case '*':
1522                               if (arg_complete) {
1523                                         deftype = ROFFDEF_USER | ROFFDEF_PRE;
1524                                         res = roff_getstrn(r, stnam, naml, &deftype);
1525 
1526                                         /*
1527                                          * If not overriden, let \*(.T
1528                                          * through to the formatters.
1529                                          */
1530 
1531                                         if (res == NULL && naml == 2 &&
1532                                             stnam[0] == '.' && stnam[1] == 'T') {
1533                                                   roff_setstrn(&r->strtab,
1534                                                       ".T", 2, NULL, 0, 0);
1535                                                   stesc--;
1536                                                   continue;
1537                                         }
1538                               }
1539                               break;
1540                     case '$':
1541                               if (r->mstackpos < 0) {
1542                                         mandoc_msg(MANDOCERR_ARG_UNDEF, ln,
1543                                             (int)(stesc - buf->buf), "%.3s", stesc);
1544                                         break;
1545                               }
1546                               ctx = r->mstack + r->mstackpos;
1547                               npos = esct[1] - '1';
1548                               if (npos >= 0 && npos <= 8) {
1549                                         res = npos < ctx->argc ?
1550                                             ctx->argv[npos] : "";
1551                                         break;
1552                               }
1553                               if (esct[1] == '*')
1554                                         quote_args = 0;
1555                               else if (esct[1] == '@')
1556                                         quote_args = 1;
1557                               else {
1558                                         mandoc_msg(MANDOCERR_ARG_NONUM, ln,
1559                                             (int)(stesc - buf->buf), "%.3s", stesc);
1560                                         break;
1561                               }
1562                               asz = 0;
1563                               for (npos = 0; npos < ctx->argc; npos++) {
1564                                         if (npos)
1565                                                   asz++;  /* blank */
1566                                         if (quote_args)
1567                                                   asz += 2;  /* quotes */
1568                                         asz += strlen(ctx->argv[npos]);
1569                               }
1570                               if (asz != 3) {
1571                                         rsz = buf->sz - (stesc - buf->buf) - 3;
1572                                         if (asz < 3)
1573                                                   memmove(stesc + asz, stesc + 3, rsz);
1574                                         buf->sz += asz - 3;
1575                                         nbuf = mandoc_realloc(buf->buf, buf->sz);
1576                                         start = nbuf + pos;
1577                                         stesc = nbuf + (stesc - buf->buf);
1578                                         buf->buf = nbuf;
1579                                         if (asz > 3)
1580                                                   memmove(stesc + asz, stesc + 3, rsz);
1581                               }
1582                               for (npos = 0; npos < ctx->argc; npos++) {
1583                                         if (npos)
1584                                                   *stesc++ = ' ';
1585                                         if (quote_args)
1586                                                   *stesc++ = '"';
1587                                         cp = ctx->argv[npos];
1588                                         while (*cp != '\0')
1589                                                   *stesc++ = *cp++;
1590                                         if (quote_args)
1591                                                   *stesc++ = '"';
1592                               }
1593                               continue;
1594                     case 'B':
1595                               npos = 0;
1596                               ubuf[0] = arg_complete &&
1597                                   roff_evalnum(r, ln, stnam, &npos,
1598                                     NULL, ROFFNUM_SCALE) &&
1599                                   stnam + npos + 1 == cp ? '1' : '0';
1600                               ubuf[1] = '\0';
1601                               break;
1602                     case 'n':
1603                               if (arg_complete)
1604                                         (void)snprintf(ubuf, sizeof(ubuf), "%d",
1605                                             roff_getregn(r, stnam, naml, sign));
1606                               else
1607                                         ubuf[0] = '\0';
1608                               break;
1609                     case 'w':
1610                               /* use even incomplete args */
1611                               (void)snprintf(ubuf, sizeof(ubuf), "%d",
1612                                   24 * (int)naml);
1613                               break;
1614                     }
1615 
1616                     if (res == NULL) {
1617                               if (*esct == '*')
1618                                         mandoc_msg(MANDOCERR_STR_UNDEF,
1619                                             ln, (int)(stesc - buf->buf),
1620                                             "%.*s", (int)naml, stnam);
1621                               res = "";
1622                     } else if (buf->sz + strlen(res) > SHRT_MAX) {
1623                               mandoc_msg(MANDOCERR_ROFFLOOP,
1624                                   ln, (int)(stesc - buf->buf), NULL);
1625                               return ROFF_IGN;
1626                     }
1627 
1628                     /* Replace the escape sequence by the string. */
1629 
1630                     *stesc = '\0';
1631                     buf->sz = mandoc_asprintf(&nbuf, "%s%s%s",
1632                         buf->buf, res, cp) + 1;
1633 
1634                     /* Prepare for the next replacement. */
1635 
1636                     start = nbuf + pos;
1637                     stesc = nbuf + (stesc - buf->buf) + strlen(res);
1638                     free(buf->buf);
1639                     buf->buf = nbuf;
1640           }
1641           return ROFF_CONT;
1642 }
1643 
1644 /*
1645  * Parse a quoted or unquoted roff-style request or macro argument.
1646  * Return a pointer to the parsed argument, which is either the original
1647  * pointer or advanced by one byte in case the argument is quoted.
1648  * NUL-terminate the argument in place.
1649  * Collapse pairs of quotes inside quoted arguments.
1650  * Advance the argument pointer to the next argument,
1651  * or to the NUL byte terminating the argument line.
1652  */
1653 char *
roff_getarg(struct roff * r,char ** cpp,int ln,int * pos)1654 roff_getarg(struct roff *r, char **cpp, int ln, int *pos)
1655 {
1656           struct buf           buf;
1657           char                *cp, *start;
1658           int                  newesc, pairs, quoted, white;
1659 
1660           /* Quoting can only start with a new word. */
1661           start = *cpp;
1662           quoted = 0;
1663           if ('"' == *start) {
1664                     quoted = 1;
1665                     start++;
1666           }
1667 
1668           newesc = pairs = white = 0;
1669           for (cp = start; '\0' != *cp; cp++) {
1670 
1671                     /*
1672                      * Move the following text left
1673                      * after quoted quotes and after "\\" and "\t".
1674                      */
1675                     if (pairs)
1676                               cp[-pairs] = cp[0];
1677 
1678                     if ('\\' == cp[0]) {
1679                               /*
1680                                * In copy mode, translate double to single
1681                                * backslashes and backslash-t to literal tabs.
1682                                */
1683                               switch (cp[1]) {
1684                               case 'a':
1685                               case 't':
1686                                         cp[-pairs] = '\t';
1687                                         pairs++;
1688                                         cp++;
1689                                         break;
1690                               case '\\':
1691                                         newesc = 1;
1692                                         cp[-pairs] = ASCII_ESC;
1693                                         pairs++;
1694                                         cp++;
1695                                         break;
1696                               case ' ':
1697                                         /* Skip escaped blanks. */
1698                                         if (0 == quoted)
1699                                                   cp++;
1700                                         break;
1701                               default:
1702                                         break;
1703                               }
1704                     } else if (0 == quoted) {
1705                               if (' ' == cp[0]) {
1706                                         /* Unescaped blanks end unquoted args. */
1707                                         white = 1;
1708                                         break;
1709                               }
1710                     } else if ('"' == cp[0]) {
1711                               if ('"' == cp[1]) {
1712                                         /* Quoted quotes collapse. */
1713                                         pairs++;
1714                                         cp++;
1715                               } else {
1716                                         /* Unquoted quotes end quoted args. */
1717                                         quoted = 2;
1718                                         break;
1719                               }
1720                     }
1721           }
1722 
1723           /* Quoted argument without a closing quote. */
1724           if (1 == quoted)
1725                     mandoc_msg(MANDOCERR_ARG_QUOTE, ln, *pos, NULL);
1726 
1727           /* NUL-terminate this argument and move to the next one. */
1728           if (pairs)
1729                     cp[-pairs] = '\0';
1730           if ('\0' != *cp) {
1731                     *cp++ = '\0';
1732                     while (' ' == *cp)
1733                               cp++;
1734           }
1735           *pos += (int)(cp - start) + (quoted ? 1 : 0);
1736           *cpp = cp;
1737 
1738           if ('\0' == *cp && (white || ' ' == cp[-1]))
1739                     mandoc_msg(MANDOCERR_SPACE_EOL, ln, *pos, NULL);
1740 
1741           start = mandoc_strdup(start);
1742           if (newesc == 0)
1743                     return start;
1744 
1745           buf.buf = start;
1746           buf.sz = strlen(start) + 1;
1747           buf.next = NULL;
1748           if (roff_expand(r, &buf, ln, 0, ASCII_ESC) & ROFF_IGN) {
1749                     free(buf.buf);
1750                     buf.buf = mandoc_strdup("");
1751           }
1752           return buf.buf;
1753 }
1754 
1755 
1756 /*
1757  * Process text streams.
1758  */
1759 static int
roff_parsetext(struct roff * r,struct buf * buf,int pos,int * offs)1760 roff_parsetext(struct roff *r, struct buf *buf, int pos, int *offs)
1761 {
1762           size_t               sz;
1763           const char          *start;
1764           char                *p;
1765           int                  isz;
1766           enum mandoc_esc      esc;
1767 
1768           /* Spring the input line trap. */
1769 
1770           if (roffit_lines == 1) {
1771                     isz = mandoc_asprintf(&p, "%s\n.%s", buf->buf, roffit_macro);
1772                     free(buf->buf);
1773                     buf->buf = p;
1774                     buf->sz = isz + 1;
1775                     *offs = 0;
1776                     free(roffit_macro);
1777                     roffit_lines = 0;
1778                     return ROFF_REPARSE;
1779           } else if (roffit_lines > 1)
1780                     --roffit_lines;
1781 
1782           if (roffce_node != NULL && buf->buf[pos] != '\0') {
1783                     if (roffce_lines < 1) {
1784                               r->man->last = roffce_node;
1785                               r->man->next = ROFF_NEXT_SIBLING;
1786                               roffce_lines = 0;
1787                               roffce_node = NULL;
1788                     } else
1789                               roffce_lines--;
1790           }
1791 
1792           /* Convert all breakable hyphens into ASCII_HYPH. */
1793 
1794           start = p = buf->buf + pos;
1795 
1796           while (*p != '\0') {
1797                     sz = strcspn(p, "-\\");
1798                     p += sz;
1799 
1800                     if (*p == '\0')
1801                               break;
1802 
1803                     if (*p == '\\') {
1804                               /* Skip over escapes. */
1805                               p++;
1806                               esc = mandoc_escape((const char **)&p, NULL, NULL);
1807                               if (esc == ESCAPE_ERROR)
1808                                         break;
1809                               while (*p == '-')
1810                                         p++;
1811                               continue;
1812                     } else if (p == start) {
1813                               p++;
1814                               continue;
1815                     }
1816 
1817                     if (isalpha((unsigned char)p[-1]) &&
1818                         isalpha((unsigned char)p[1]))
1819                               *p = ASCII_HYPH;
1820                     p++;
1821           }
1822           return ROFF_CONT;
1823 }
1824 
1825 int
roff_parseln(struct roff * r,int ln,struct buf * buf,int * offs,size_t len)1826 roff_parseln(struct roff *r, int ln, struct buf *buf, int *offs, size_t len)
1827 {
1828           enum roff_tok        t;
1829           int                  e;
1830           int                  pos;     /* parse point */
1831           int                  spos;    /* saved parse point for messages */
1832           int                  ppos;    /* original offset in buf->buf */
1833           int                  ctl;     /* macro line (boolean) */
1834 
1835           ppos = pos = *offs;
1836 
1837           if (len > 80 && r->tbl == NULL && r->eqn == NULL &&
1838               (r->man->flags & ROFF_NOFILL) == 0 &&
1839               strchr(" .\\", buf->buf[pos]) == NULL &&
1840               buf->buf[pos] != r->control &&
1841               strcspn(buf->buf, " ") < 80)
1842                     mandoc_msg(MANDOCERR_TEXT_LONG, ln, (int)len - 1,
1843                         "%.20s...", buf->buf + pos);
1844 
1845           /* Handle in-line equation delimiters. */
1846 
1847           if (r->tbl == NULL &&
1848               r->last_eqn != NULL && r->last_eqn->delim &&
1849               (r->eqn == NULL || r->eqn_inline)) {
1850                     e = roff_eqndelim(r, buf, pos);
1851                     if (e == ROFF_REPARSE)
1852                               return e;
1853                     assert(e == ROFF_CONT);
1854           }
1855 
1856           /* Expand some escape sequences. */
1857 
1858           e = roff_expand(r, buf, ln, pos, r->escape);
1859           if ((e & ROFF_MASK) == ROFF_IGN)
1860                     return e;
1861           assert(e == ROFF_CONT);
1862 
1863           ctl = roff_getcontrol(r, buf->buf, &pos);
1864 
1865           /*
1866            * First, if a scope is open and we're not a macro, pass the
1867            * text through the macro's filter.
1868            * Equations process all content themselves.
1869            * Tables process almost all content themselves, but we want
1870            * to warn about macros before passing it there.
1871            */
1872 
1873           if (r->last != NULL && ! ctl) {
1874                     t = r->last->tok;
1875                     e = (*roffs[t].text)(r, t, buf, ln, pos, pos, offs);
1876                     if ((e & ROFF_MASK) == ROFF_IGN)
1877                               return e;
1878                     e &= ~ROFF_MASK;
1879           } else
1880                     e = ROFF_IGN;
1881           if (r->eqn != NULL && strncmp(buf->buf + ppos, ".EN", 3)) {
1882                     eqn_read(r->eqn, buf->buf + ppos);
1883                     return e;
1884           }
1885           if (r->tbl != NULL && (ctl == 0 || buf->buf[pos] == '\0')) {
1886                     tbl_read(r->tbl, ln, buf->buf, ppos);
1887                     roff_addtbl(r->man, ln, r->tbl);
1888                     return e;
1889           }
1890           if ( ! ctl) {
1891                     r->options &= ~MPARSE_COMMENT;
1892                     return roff_parsetext(r, buf, pos, offs) | e;
1893           }
1894 
1895           /* Skip empty request lines. */
1896 
1897           if (buf->buf[pos] == '"') {
1898                     mandoc_msg(MANDOCERR_COMMENT_BAD, ln, pos, NULL);
1899                     return ROFF_IGN;
1900           } else if (buf->buf[pos] == '\0')
1901                     return ROFF_IGN;
1902 
1903           /*
1904            * If a scope is open, go to the child handler for that macro,
1905            * as it may want to preprocess before doing anything with it.
1906            * Don't do so if an equation is open.
1907            */
1908 
1909           if (r->last) {
1910                     t = r->last->tok;
1911                     return (*roffs[t].sub)(r, t, buf, ln, ppos, pos, offs);
1912           }
1913 
1914           /* No scope is open.  This is a new request or macro. */
1915 
1916           r->options &= ~MPARSE_COMMENT;
1917           spos = pos;
1918           t = roff_parse(r, buf->buf, &pos, ln, ppos);
1919 
1920           /* Tables ignore most macros. */
1921 
1922           if (r->tbl != NULL && (t == TOKEN_NONE || t == ROFF_TS ||
1923               t == ROFF_br || t == ROFF_ce || t == ROFF_rj || t == ROFF_sp)) {
1924                     mandoc_msg(MANDOCERR_TBLMACRO,
1925                         ln, pos, "%s", buf->buf + spos);
1926                     if (t != TOKEN_NONE)
1927                               return ROFF_IGN;
1928                     while (buf->buf[pos] != '\0' && buf->buf[pos] != ' ')
1929                               pos++;
1930                     while (buf->buf[pos] == ' ')
1931                               pos++;
1932                     tbl_read(r->tbl, ln, buf->buf, pos);
1933                     roff_addtbl(r->man, ln, r->tbl);
1934                     return ROFF_IGN;
1935           }
1936 
1937           /* For now, let high level macros abort .ce mode. */
1938 
1939           if (ctl && roffce_node != NULL &&
1940               (t == TOKEN_NONE || t == ROFF_Dd || t == ROFF_EQ ||
1941                t == ROFF_TH || t == ROFF_TS)) {
1942                     r->man->last = roffce_node;
1943                     r->man->next = ROFF_NEXT_SIBLING;
1944                     roffce_lines = 0;
1945                     roffce_node = NULL;
1946           }
1947 
1948           /*
1949            * This is neither a roff request nor a user-defined macro.
1950            * Let the standard macro set parsers handle it.
1951            */
1952 
1953           if (t == TOKEN_NONE)
1954                     return ROFF_CONT;
1955 
1956           /* Execute a roff request or a user defined macro. */
1957 
1958           return (*roffs[t].proc)(r, t, buf, ln, spos, pos, offs);
1959 }
1960 
1961 /*
1962  * Internal interface function to tell the roff parser that execution
1963  * of the current macro ended.  This is required because macro
1964  * definitions usually do not end with a .return request.
1965  */
1966 void
roff_userret(struct roff * r)1967 roff_userret(struct roff *r)
1968 {
1969           struct mctx         *ctx;
1970           int                  i;
1971 
1972           assert(r->mstackpos >= 0);
1973           ctx = r->mstack + r->mstackpos;
1974           for (i = 0; i < ctx->argc; i++)
1975                     free(ctx->argv[i]);
1976           ctx->argc = 0;
1977           r->mstackpos--;
1978 }
1979 
1980 void
roff_endparse(struct roff * r)1981 roff_endparse(struct roff *r)
1982 {
1983           if (r->last != NULL)
1984                     mandoc_msg(MANDOCERR_BLK_NOEND, r->last->line,
1985                         r->last->col, "%s", roff_name[r->last->tok]);
1986 
1987           if (r->eqn != NULL) {
1988                     mandoc_msg(MANDOCERR_BLK_NOEND,
1989                         r->eqn->node->line, r->eqn->node->pos, "EQ");
1990                     eqn_parse(r->eqn);
1991                     r->eqn = NULL;
1992           }
1993 
1994           if (r->tbl != NULL) {
1995                     tbl_end(r->tbl, 1);
1996                     r->tbl = NULL;
1997           }
1998 }
1999 
2000 /*
2001  * Parse a roff node's type from the input buffer.  This must be in the
2002  * form of ".foo xxx" in the usual way.
2003  */
2004 static enum roff_tok
roff_parse(struct roff * r,char * buf,int * pos,int ln,int ppos)2005 roff_parse(struct roff *r, char *buf, int *pos, int ln, int ppos)
2006 {
2007           char                *cp;
2008           const char          *mac;
2009           size_t               maclen;
2010           int                  deftype;
2011           enum roff_tok        t;
2012 
2013           cp = buf + *pos;
2014 
2015           if ('\0' == *cp || '"' == *cp || '\t' == *cp || ' ' == *cp)
2016                     return TOKEN_NONE;
2017 
2018           mac = cp;
2019           maclen = roff_getname(r, &cp, ln, ppos);
2020 
2021           deftype = ROFFDEF_USER | ROFFDEF_REN;
2022           r->current_string = roff_getstrn(r, mac, maclen, &deftype);
2023           switch (deftype) {
2024           case ROFFDEF_USER:
2025                     t = ROFF_USERDEF;
2026                     break;
2027           case ROFFDEF_REN:
2028                     t = ROFF_RENAMED;
2029                     break;
2030           default:
2031                     t = roffhash_find(r->reqtab, mac, maclen);
2032                     break;
2033           }
2034           if (t != TOKEN_NONE)
2035                     *pos = cp - buf;
2036           else if (deftype == ROFFDEF_UNDEF) {
2037                     /* Using an undefined macro defines it to be empty. */
2038                     roff_setstrn(&r->strtab, mac, maclen, "", 0, 0);
2039                     roff_setstrn(&r->rentab, mac, maclen, NULL, 0, 0);
2040           }
2041           return t;
2042 }
2043 
2044 /* --- handling of request blocks ----------------------------------------- */
2045 
2046 /*
2047  * Close a macro definition block or an "ignore" block.
2048  */
2049 static int
roff_cblock(ROFF_ARGS)2050 roff_cblock(ROFF_ARGS)
2051 {
2052           int        rr;
2053 
2054           if (r->last == NULL) {
2055                     mandoc_msg(MANDOCERR_BLK_NOTOPEN, ln, ppos, "..");
2056                     return ROFF_IGN;
2057           }
2058 
2059           switch (r->last->tok) {
2060           case ROFF_am:
2061           case ROFF_ami:
2062           case ROFF_de:
2063           case ROFF_dei:
2064           case ROFF_ig:
2065                     break;
2066           case ROFF_am1:
2067           case ROFF_de1:
2068                     /* Remapped in roff_block(). */
2069                     abort();
2070           default:
2071                     mandoc_msg(MANDOCERR_BLK_NOTOPEN, ln, ppos, "..");
2072                     return ROFF_IGN;
2073           }
2074 
2075           roffnode_pop(r);
2076           roffnode_cleanscope(r);
2077 
2078           /*
2079            * If a conditional block with braces is still open,
2080            * check for "\}" block end markers.
2081            */
2082 
2083           if (r->last != NULL && r->last->endspan < 0) {
2084                     rr = 1;  /* If arguments follow "\}", warn about them. */
2085                     roff_cond_checkend(r, tok, buf, ln, ppos, pos, &rr);
2086           }
2087 
2088           if (buf->buf[pos] != '\0')
2089                     mandoc_msg(MANDOCERR_ARG_SKIP, ln, pos,
2090                         ".. %s", buf->buf + pos);
2091 
2092           return ROFF_IGN;
2093 }
2094 
2095 /*
2096  * Pop all nodes ending at the end of the current input line.
2097  * Return the number of loops ended.
2098  */
2099 static int
roffnode_cleanscope(struct roff * r)2100 roffnode_cleanscope(struct roff *r)
2101 {
2102           int inloop;
2103 
2104           inloop = 0;
2105           while (r->last != NULL && r->last->endspan > 0) {
2106                     if (--r->last->endspan != 0)
2107                               break;
2108                     inloop += roffnode_pop(r);
2109           }
2110           return inloop;
2111 }
2112 
2113 /*
2114  * Handle the closing "\}" of a conditional block.
2115  * Apart from generating warnings, this only pops nodes.
2116  * Return the number of loops ended.
2117  */
2118 static int
roff_ccond(struct roff * r,int ln,int ppos)2119 roff_ccond(struct roff *r, int ln, int ppos)
2120 {
2121           if (NULL == r->last) {
2122                     mandoc_msg(MANDOCERR_BLK_NOTOPEN, ln, ppos, "\\}");
2123                     return 0;
2124           }
2125 
2126           switch (r->last->tok) {
2127           case ROFF_el:
2128           case ROFF_ie:
2129           case ROFF_if:
2130           case ROFF_while:
2131                     break;
2132           default:
2133                     mandoc_msg(MANDOCERR_BLK_NOTOPEN, ln, ppos, "\\}");
2134                     return 0;
2135           }
2136 
2137           if (r->last->endspan > -1) {
2138                     mandoc_msg(MANDOCERR_BLK_NOTOPEN, ln, ppos, "\\}");
2139                     return 0;
2140           }
2141 
2142           return roffnode_pop(r) + roffnode_cleanscope(r);
2143 }
2144 
2145 static int
roff_block(ROFF_ARGS)2146 roff_block(ROFF_ARGS)
2147 {
2148           const char          *name, *value;
2149           char                *call, *cp, *iname, *rname;
2150           size_t               csz, namesz, rsz;
2151           int                  deftype;
2152 
2153           /* Ignore groff compatibility mode for now. */
2154 
2155           if (tok == ROFF_de1)
2156                     tok = ROFF_de;
2157           else if (tok == ROFF_dei1)
2158                     tok = ROFF_dei;
2159           else if (tok == ROFF_am1)
2160                     tok = ROFF_am;
2161           else if (tok == ROFF_ami1)
2162                     tok = ROFF_ami;
2163 
2164           /* Parse the macro name argument. */
2165 
2166           cp = buf->buf + pos;
2167           if (tok == ROFF_ig) {
2168                     iname = NULL;
2169                     namesz = 0;
2170           } else {
2171                     iname = cp;
2172                     namesz = roff_getname(r, &cp, ln, ppos);
2173                     iname[namesz] = '\0';
2174           }
2175 
2176           /* Resolve the macro name argument if it is indirect. */
2177 
2178           if (namesz && (tok == ROFF_dei || tok == ROFF_ami)) {
2179                     deftype = ROFFDEF_USER;
2180                     name = roff_getstrn(r, iname, namesz, &deftype);
2181                     if (name == NULL) {
2182                               mandoc_msg(MANDOCERR_STR_UNDEF,
2183                                   ln, (int)(iname - buf->buf),
2184                                   "%.*s", (int)namesz, iname);
2185                               namesz = 0;
2186                     } else
2187                               namesz = strlen(name);
2188           } else
2189                     name = iname;
2190 
2191           if (namesz == 0 && tok != ROFF_ig) {
2192                     mandoc_msg(MANDOCERR_REQ_EMPTY,
2193                         ln, ppos, "%s", roff_name[tok]);
2194                     return ROFF_IGN;
2195           }
2196 
2197           roffnode_push(r, tok, name, ln, ppos);
2198 
2199           /*
2200            * At the beginning of a `de' macro, clear the existing string
2201            * with the same name, if there is one.  New content will be
2202            * appended from roff_block_text() in multiline mode.
2203            */
2204 
2205           if (tok == ROFF_de || tok == ROFF_dei) {
2206                     roff_setstrn(&r->strtab, name, namesz, "", 0, 0);
2207                     roff_setstrn(&r->rentab, name, namesz, NULL, 0, 0);
2208           } else if (tok == ROFF_am || tok == ROFF_ami) {
2209                     deftype = ROFFDEF_ANY;
2210                     value = roff_getstrn(r, iname, namesz, &deftype);
2211                     switch (deftype) {  /* Before appending, ... */
2212                     case ROFFDEF_PRE: /* copy predefined to user-defined. */
2213                               roff_setstrn(&r->strtab, name, namesz,
2214                                   value, strlen(value), 0);
2215                               break;
2216                     case ROFFDEF_REN: /* call original standard macro. */
2217                               csz = mandoc_asprintf(&call, ".%.*s \\$* \\\"\n",
2218                                   (int)strlen(value), value);
2219                               roff_setstrn(&r->strtab, name, namesz, call, csz, 0);
2220                               roff_setstrn(&r->rentab, name, namesz, NULL, 0, 0);
2221                               free(call);
2222                               break;
2223                     case ROFFDEF_STD:  /* rename and call standard macro. */
2224                               rsz = mandoc_asprintf(&rname, "__%s_renamed", name);
2225                               roff_setstrn(&r->rentab, rname, rsz, name, namesz, 0);
2226                               csz = mandoc_asprintf(&call, ".%.*s \\$* \\\"\n",
2227                                   (int)rsz, rname);
2228                               roff_setstrn(&r->strtab, name, namesz, call, csz, 0);
2229                               free(call);
2230                               free(rname);
2231                               break;
2232                     default:
2233                               break;
2234                     }
2235           }
2236 
2237           if (*cp == '\0')
2238                     return ROFF_IGN;
2239 
2240           /* Get the custom end marker. */
2241 
2242           iname = cp;
2243           namesz = roff_getname(r, &cp, ln, ppos);
2244 
2245           /* Resolve the end marker if it is indirect. */
2246 
2247           if (namesz && (tok == ROFF_dei || tok == ROFF_ami)) {
2248                     deftype = ROFFDEF_USER;
2249                     name = roff_getstrn(r, iname, namesz, &deftype);
2250                     if (name == NULL) {
2251                               mandoc_msg(MANDOCERR_STR_UNDEF,
2252                                   ln, (int)(iname - buf->buf),
2253                                   "%.*s", (int)namesz, iname);
2254                               namesz = 0;
2255                     } else
2256                               namesz = strlen(name);
2257           } else
2258                     name = iname;
2259 
2260           if (namesz)
2261                     r->last->end = mandoc_strndup(name, namesz);
2262 
2263           if (*cp != '\0')
2264                     mandoc_msg(MANDOCERR_ARG_EXCESS,
2265                         ln, pos, ".%s ... %s", roff_name[tok], cp);
2266 
2267           return ROFF_IGN;
2268 }
2269 
2270 static int
roff_block_sub(ROFF_ARGS)2271 roff_block_sub(ROFF_ARGS)
2272 {
2273           enum roff_tok       t;
2274           int                 i, j;
2275 
2276           /*
2277            * First check whether a custom macro exists at this level.  If
2278            * it does, then check against it.  This is some of groff's
2279            * stranger behaviours.  If we encountered a custom end-scope
2280            * tag and that tag also happens to be a "real" macro, then we
2281            * need to try interpreting it again as a real macro.  If it's
2282            * not, then return ignore.  Else continue.
2283            */
2284 
2285           if (r->last->end) {
2286                     for (i = pos, j = 0; r->last->end[j]; j++, i++)
2287                               if (buf->buf[i] != r->last->end[j])
2288                                         break;
2289 
2290                     if (r->last->end[j] == '\0' &&
2291                         (buf->buf[i] == '\0' ||
2292                          buf->buf[i] == ' ' ||
2293                          buf->buf[i] == '\t')) {
2294                               roffnode_pop(r);
2295                               roffnode_cleanscope(r);
2296 
2297                               while (buf->buf[i] == ' ' || buf->buf[i] == '\t')
2298                                         i++;
2299 
2300                               pos = i;
2301                               if (roff_parse(r, buf->buf, &pos, ln, ppos) !=
2302                                   TOKEN_NONE)
2303                                         return ROFF_RERUN;
2304                               return ROFF_IGN;
2305                     }
2306           }
2307 
2308           /*
2309            * If we have no custom end-query or lookup failed, then try
2310            * pulling it out of the hashtable.
2311            */
2312 
2313           t = roff_parse(r, buf->buf, &pos, ln, ppos);
2314 
2315           if (t != ROFF_cblock) {
2316                     if (tok != ROFF_ig)
2317                               roff_setstr(r, r->last->name, buf->buf + ppos, 2);
2318                     return ROFF_IGN;
2319           }
2320 
2321           return (*roffs[t].proc)(r, t, buf, ln, ppos, pos, offs);
2322 }
2323 
2324 static int
roff_block_text(ROFF_ARGS)2325 roff_block_text(ROFF_ARGS)
2326 {
2327 
2328           if (tok != ROFF_ig)
2329                     roff_setstr(r, r->last->name, buf->buf + pos, 2);
2330 
2331           return ROFF_IGN;
2332 }
2333 
2334 /*
2335  * Check for a closing "\}" and handle it.
2336  * In this function, the final "int *offs" argument is used for
2337  * different purposes than elsewhere:
2338  * Input: *offs == 0: caller wants to discard arguments following \}
2339  *        *offs == 1: caller wants to preserve text following \}
2340  * Output: *offs = 0: tell caller to discard input line
2341  *         *offs = 1: tell caller to use input line
2342  */
2343 static int
roff_cond_checkend(ROFF_ARGS)2344 roff_cond_checkend(ROFF_ARGS)
2345 {
2346           char                *ep;
2347           int                  endloop, irc, rr;
2348 
2349           irc = ROFF_IGN;
2350           rr = r->last->rule;
2351           endloop = tok != ROFF_while ? ROFF_IGN :
2352               rr ? ROFF_LOOPCONT : ROFF_LOOPEXIT;
2353           if (roffnode_cleanscope(r))
2354                     irc |= endloop;
2355 
2356           /*
2357            * If "\}" occurs on a macro line without a preceding macro or
2358            * a text line contains nothing else, drop the line completely.
2359            */
2360 
2361           ep = buf->buf + pos;
2362           if (ep[0] == '\\' && ep[1] == '}' && (ep[2] == '\0' || *offs == 0))
2363                     rr = 0;
2364 
2365           /*
2366            * The closing delimiter "\}" rewinds the conditional scope
2367            * but is otherwise ignored when interpreting the line.
2368            */
2369 
2370           while ((ep = strchr(ep, '\\')) != NULL) {
2371                     switch (ep[1]) {
2372                     case '}':
2373                               if (ep[2] == '\0')
2374                                         ep[0] = '\0';
2375                               else if (rr)
2376                                         ep[1] = '&';
2377                               else
2378                                         memmove(ep, ep + 2, strlen(ep + 2) + 1);
2379                               if (roff_ccond(r, ln, ep - buf->buf))
2380                                         irc |= endloop;
2381                               break;
2382                     case '\0':
2383                               ++ep;
2384                               break;
2385                     default:
2386                               ep += 2;
2387                               break;
2388                     }
2389           }
2390           *offs = rr;
2391           return irc;
2392 }
2393 
2394 /*
2395  * Parse and process a request or macro line in conditional scope.
2396  */
2397 static int
roff_cond_sub(ROFF_ARGS)2398 roff_cond_sub(ROFF_ARGS)
2399 {
2400           struct roffnode     *bl;
2401           int                  irc, rr;
2402           enum roff_tok        t;
2403 
2404           rr = 0;  /* If arguments follow "\}", skip them. */
2405           irc = roff_cond_checkend(r, tok, buf, ln, ppos, pos, &rr);
2406           t = roff_parse(r, buf->buf, &pos, ln, ppos);
2407 
2408           /* For now, let high level macros abort .ce mode. */
2409 
2410           if (roffce_node != NULL &&
2411               (t == TOKEN_NONE || t == ROFF_Dd || t == ROFF_EQ ||
2412              t == ROFF_TH || t == ROFF_TS)) {
2413                     r->man->last = roffce_node;
2414                     r->man->next = ROFF_NEXT_SIBLING;
2415                     roffce_lines = 0;
2416                     roffce_node = NULL;
2417           }
2418 
2419           /*
2420            * Fully handle known macros when they are structurally
2421            * required or when the conditional evaluated to true.
2422            */
2423 
2424           if (t == ROFF_break) {
2425                     if (irc & ROFF_LOOPMASK)
2426                               irc = ROFF_IGN | ROFF_LOOPEXIT;
2427                     else if (rr) {
2428                               for (bl = r->last; bl != NULL; bl = bl->parent) {
2429                                         bl->rule = 0;
2430                                         if (bl->tok == ROFF_while)
2431                                                   break;
2432                               }
2433                     }
2434           } else if (t != TOKEN_NONE &&
2435               (rr || roffs[t].flags & ROFFMAC_STRUCT))
2436                     irc |= (*roffs[t].proc)(r, t, buf, ln, ppos, pos, offs);
2437           else
2438                     irc |= rr ? ROFF_CONT : ROFF_IGN;
2439           return irc;
2440 }
2441 
2442 /*
2443  * Parse and process a text line in conditional scope.
2444  */
2445 static int
roff_cond_text(ROFF_ARGS)2446 roff_cond_text(ROFF_ARGS)
2447 {
2448           int        irc, rr;
2449 
2450           rr = 1;  /* If arguments follow "\}", preserve them. */
2451           irc = roff_cond_checkend(r, tok, buf, ln, ppos, pos, &rr);
2452           if (rr)
2453                     irc |= ROFF_CONT;
2454           return irc;
2455 }
2456 
2457 /* --- handling of numeric and conditional expressions -------------------- */
2458 
2459 /*
2460  * Parse a single signed integer number.  Stop at the first non-digit.
2461  * If there is at least one digit, return success and advance the
2462  * parse point, else return failure and let the parse point unchanged.
2463  * Ignore overflows, treat them just like the C language.
2464  */
2465 static int
roff_getnum(const char * v,int * pos,int * res,int flags)2466 roff_getnum(const char *v, int *pos, int *res, int flags)
2467 {
2468           int        myres, scaled, n, p;
2469 
2470           if (NULL == res)
2471                     res = &myres;
2472 
2473           p = *pos;
2474           n = v[p] == '-';
2475           if (n || v[p] == '+')
2476                     p++;
2477 
2478           if (flags & ROFFNUM_WHITE)
2479                     while (isspace((unsigned char)v[p]))
2480                               p++;
2481 
2482           for (*res = 0; isdigit((unsigned char)v[p]); p++)
2483                     *res = 10 * *res + v[p] - '0';
2484           if (p == *pos + n)
2485                     return 0;
2486 
2487           if (n)
2488                     *res = -*res;
2489 
2490           /* Each number may be followed by one optional scaling unit. */
2491 
2492           switch (v[p]) {
2493           case 'f':
2494                     scaled = *res * 65536;
2495                     break;
2496           case 'i':
2497                     scaled = *res * 240;
2498                     break;
2499           case 'c':
2500                     scaled = *res * 240 / 2.54;
2501                     break;
2502           case 'v':
2503           case 'P':
2504                     scaled = *res * 40;
2505                     break;
2506           case 'm':
2507           case 'n':
2508                     scaled = *res * 24;
2509                     break;
2510           case 'p':
2511                     scaled = *res * 10 / 3;
2512                     break;
2513           case 'u':
2514                     scaled = *res;
2515                     break;
2516           case 'M':
2517                     scaled = *res * 6 / 25;
2518                     break;
2519           default:
2520                     scaled = *res;
2521                     p--;
2522                     break;
2523           }
2524           if (flags & ROFFNUM_SCALE)
2525                     *res = scaled;
2526 
2527           *pos = p + 1;
2528           return 1;
2529 }
2530 
2531 /*
2532  * Evaluate a string comparison condition.
2533  * The first character is the delimiter.
2534  * Succeed if the string up to its second occurrence
2535  * matches the string up to its third occurence.
2536  * Advance the cursor after the third occurrence
2537  * or lacking that, to the end of the line.
2538  */
2539 static int
roff_evalstrcond(const char * v,int * pos)2540 roff_evalstrcond(const char *v, int *pos)
2541 {
2542           const char          *s1, *s2, *s3;
2543           int                  match;
2544 
2545           match = 0;
2546           s1 = v + *pos;                /* initial delimiter */
2547           s2 = s1 + 1;                  /* for scanning the first string */
2548           s3 = strchr(s2, *s1);         /* for scanning the second string */
2549 
2550           if (NULL == s3)               /* found no middle delimiter */
2551                     goto out;
2552 
2553           while ('\0' != *++s3) {
2554                     if (*s2 != *s3) {  /* mismatch */
2555                               s3 = strchr(s3, *s1);
2556                               break;
2557                     }
2558                     if (*s3 == *s1) {  /* found the final delimiter */
2559                               match = 1;
2560                               break;
2561                     }
2562                     s2++;
2563           }
2564 
2565 out:
2566           if (NULL == s3)
2567                     s3 = strchr(s2, '\0');
2568           else if (*s3 != '\0')
2569                     s3++;
2570           *pos = s3 - v;
2571           return match;
2572 }
2573 
2574 /*
2575  * Evaluate an optionally negated single character, numerical,
2576  * or string condition.
2577  */
2578 static int
roff_evalcond(struct roff * r,int ln,char * v,int * pos)2579 roff_evalcond(struct roff *r, int ln, char *v, int *pos)
2580 {
2581           const char          *start, *end;
2582           char                *cp, *name;
2583           size_t               sz;
2584           int                  deftype, len, number, savepos, istrue, wanttrue;
2585 
2586           if ('!' == v[*pos]) {
2587                     wanttrue = 0;
2588                     (*pos)++;
2589           } else
2590                     wanttrue = 1;
2591 
2592           switch (v[*pos]) {
2593           case '\0':
2594                     return 0;
2595           case 'n':
2596           case 'o':
2597                     (*pos)++;
2598                     return wanttrue;
2599           case 'e':
2600           case 't':
2601           case 'v':
2602                     (*pos)++;
2603                     return !wanttrue;
2604           case 'c':
2605                     do {
2606                               (*pos)++;
2607                     } while (v[*pos] == ' ');
2608 
2609                     /*
2610                      * Quirk for groff compatibility:
2611                      * The horizontal tab is neither available nor unavailable.
2612                      */
2613 
2614                     if (v[*pos] == '\t') {
2615                               (*pos)++;
2616                               return 0;
2617                     }
2618 
2619                     /* Printable ASCII characters are available. */
2620 
2621                     if (v[*pos] != '\\') {
2622                               (*pos)++;
2623                               return wanttrue;
2624                     }
2625 
2626                     end = v + ++*pos;
2627                     switch (mandoc_escape(&end, &start, &len)) {
2628                     case ESCAPE_SPECIAL:
2629                               istrue = mchars_spec2cp(start, len) != -1;
2630                               break;
2631                     case ESCAPE_UNICODE:
2632                               istrue = 1;
2633                               break;
2634                     case ESCAPE_NUMBERED:
2635                               istrue = mchars_num2char(start, len) != -1;
2636                               break;
2637                     default:
2638                               istrue = !wanttrue;
2639                               break;
2640                     }
2641                     *pos = end - v;
2642                     return istrue == wanttrue;
2643           case 'd':
2644           case 'r':
2645                     cp = v + *pos + 1;
2646                     while (*cp == ' ')
2647                               cp++;
2648                     name = cp;
2649                     sz = roff_getname(r, &cp, ln, cp - v);
2650                     if (sz == 0)
2651                               istrue = 0;
2652                     else if (v[*pos] == 'r')
2653                               istrue = roff_hasregn(r, name, sz);
2654                     else {
2655                               deftype = ROFFDEF_ANY;
2656                             roff_getstrn(r, name, sz, &deftype);
2657                               istrue = !!deftype;
2658                     }
2659                     *pos = (name + sz) - v;
2660                     return istrue == wanttrue;
2661           default:
2662                     break;
2663           }
2664 
2665           savepos = *pos;
2666           if (roff_evalnum(r, ln, v, pos, &number, ROFFNUM_SCALE))
2667                     return (number > 0) == wanttrue;
2668           else if (*pos == savepos)
2669                     return roff_evalstrcond(v, pos) == wanttrue;
2670           else
2671                     return 0;
2672 }
2673 
2674 static int
roff_line_ignore(ROFF_ARGS)2675 roff_line_ignore(ROFF_ARGS)
2676 {
2677 
2678           return ROFF_IGN;
2679 }
2680 
2681 static int
roff_insec(ROFF_ARGS)2682 roff_insec(ROFF_ARGS)
2683 {
2684 
2685           mandoc_msg(MANDOCERR_REQ_INSEC, ln, ppos, "%s", roff_name[tok]);
2686           return ROFF_IGN;
2687 }
2688 
2689 static int
roff_unsupp(ROFF_ARGS)2690 roff_unsupp(ROFF_ARGS)
2691 {
2692 
2693           mandoc_msg(MANDOCERR_REQ_UNSUPP, ln, ppos, "%s", roff_name[tok]);
2694           return ROFF_IGN;
2695 }
2696 
2697 static int
roff_cond(ROFF_ARGS)2698 roff_cond(ROFF_ARGS)
2699 {
2700           int        irc;
2701 
2702           roffnode_push(r, tok, NULL, ln, ppos);
2703 
2704           /*
2705            * An `.el' has no conditional body: it will consume the value
2706            * of the current rstack entry set in prior `ie' calls or
2707            * defaults to DENY.
2708            *
2709            * If we're not an `el', however, then evaluate the conditional.
2710            */
2711 
2712           r->last->rule = tok == ROFF_el ?
2713               (r->rstackpos < 0 ? 0 : r->rstack[r->rstackpos--]) :
2714               roff_evalcond(r, ln, buf->buf, &pos);
2715 
2716           /*
2717            * An if-else will put the NEGATION of the current evaluated
2718            * conditional into the stack of rules.
2719            */
2720 
2721           if (tok == ROFF_ie) {
2722                     if (r->rstackpos + 1 == r->rstacksz) {
2723                               r->rstacksz += 16;
2724                               r->rstack = mandoc_reallocarray(r->rstack,
2725                                   r->rstacksz, sizeof(int));
2726                     }
2727                     r->rstack[++r->rstackpos] = !r->last->rule;
2728           }
2729 
2730           /* If the parent has false as its rule, then so do we. */
2731 
2732           if (r->last->parent && !r->last->parent->rule)
2733                     r->last->rule = 0;
2734 
2735           /*
2736            * Determine scope.
2737            * If there is nothing on the line after the conditional,
2738            * not even whitespace, use next-line scope.
2739            * Except that .while does not support next-line scope.
2740            */
2741 
2742           if (buf->buf[pos] == '\0' && tok != ROFF_while) {
2743                     r->last->endspan = 2;
2744                     goto out;
2745           }
2746 
2747           while (buf->buf[pos] == ' ')
2748                     pos++;
2749 
2750           /* An opening brace requests multiline scope. */
2751 
2752           if (buf->buf[pos] == '\\' && buf->buf[pos + 1] == '{') {
2753                     r->last->endspan = -1;
2754                     pos += 2;
2755                     while (buf->buf[pos] == ' ')
2756                               pos++;
2757                     goto out;
2758           }
2759 
2760           /*
2761            * Anything else following the conditional causes
2762            * single-line scope.  Warn if the scope contains
2763            * nothing but trailing whitespace.
2764            */
2765 
2766           if (buf->buf[pos] == '\0')
2767                     mandoc_msg(MANDOCERR_COND_EMPTY,
2768                         ln, ppos, "%s", roff_name[tok]);
2769 
2770           r->last->endspan = 1;
2771 
2772 out:
2773           *offs = pos;
2774           irc = ROFF_RERUN;
2775           if (tok == ROFF_while)
2776                     irc |= ROFF_WHILE;
2777           return irc;
2778 }
2779 
2780 static int
roff_ds(ROFF_ARGS)2781 roff_ds(ROFF_ARGS)
2782 {
2783           char                *string;
2784           const char          *name;
2785           size_t               namesz;
2786 
2787           /* Ignore groff compatibility mode for now. */
2788 
2789           if (tok == ROFF_ds1)
2790                     tok = ROFF_ds;
2791           else if (tok == ROFF_as1)
2792                     tok = ROFF_as;
2793 
2794           /*
2795            * The first word is the name of the string.
2796            * If it is empty or terminated by an escape sequence,
2797            * abort the `ds' request without defining anything.
2798            */
2799 
2800           name = string = buf->buf + pos;
2801           if (*name == '\0')
2802                     return ROFF_IGN;
2803 
2804           namesz = roff_getname(r, &string, ln, pos);
2805           switch (name[namesz]) {
2806           case '\\':
2807                     return ROFF_IGN;
2808           case '\t':
2809                     string = buf->buf + pos + namesz;
2810                     break;
2811           default:
2812                     break;
2813           }
2814 
2815           /* Read past the initial double-quote, if any. */
2816           if (*string == '"')
2817                     string++;
2818 
2819           /* The rest is the value. */
2820           roff_setstrn(&r->strtab, name, namesz, string, strlen(string),
2821               ROFF_as == tok);
2822           roff_setstrn(&r->rentab, name, namesz, NULL, 0, 0);
2823           return ROFF_IGN;
2824 }
2825 
2826 /*
2827  * Parse a single operator, one or two characters long.
2828  * If the operator is recognized, return success and advance the
2829  * parse point, else return failure and let the parse point unchanged.
2830  */
2831 static int
roff_getop(const char * v,int * pos,char * res)2832 roff_getop(const char *v, int *pos, char *res)
2833 {
2834 
2835           *res = v[*pos];
2836 
2837           switch (*res) {
2838           case '+':
2839           case '-':
2840           case '*':
2841           case '/':
2842           case '%':
2843           case '&':
2844           case ':':
2845                     break;
2846           case '<':
2847                     switch (v[*pos + 1]) {
2848                     case '=':
2849                               *res = 'l';
2850                               (*pos)++;
2851                               break;
2852                     case '>':
2853                               *res = '!';
2854                               (*pos)++;
2855                               break;
2856                     case '?':
2857                               *res = 'i';
2858                               (*pos)++;
2859                               break;
2860                     default:
2861                               break;
2862                     }
2863                     break;
2864           case '>':
2865                     switch (v[*pos + 1]) {
2866                     case '=':
2867                               *res = 'g';
2868                               (*pos)++;
2869                               break;
2870                     case '?':
2871                               *res = 'a';
2872                               (*pos)++;
2873                               break;
2874                     default:
2875                               break;
2876                     }
2877                     break;
2878           case '=':
2879                     if ('=' == v[*pos + 1])
2880                               (*pos)++;
2881                     break;
2882           default:
2883                     return 0;
2884           }
2885           (*pos)++;
2886 
2887           return *res;
2888 }
2889 
2890 /*
2891  * Evaluate either a parenthesized numeric expression
2892  * or a single signed integer number.
2893  */
2894 static int
roff_evalpar(struct roff * r,int ln,const char * v,int * pos,int * res,int flags)2895 roff_evalpar(struct roff *r, int ln,
2896           const char *v, int *pos, int *res, int flags)
2897 {
2898 
2899           if ('(' != v[*pos])
2900                     return roff_getnum(v, pos, res, flags);
2901 
2902           (*pos)++;
2903           if ( ! roff_evalnum(r, ln, v, pos, res, flags | ROFFNUM_WHITE))
2904                     return 0;
2905 
2906           /*
2907            * Omission of the closing parenthesis
2908            * is an error in validation mode,
2909            * but ignored in evaluation mode.
2910            */
2911 
2912           if (')' == v[*pos])
2913                     (*pos)++;
2914           else if (NULL == res)
2915                     return 0;
2916 
2917           return 1;
2918 }
2919 
2920 /*
2921  * Evaluate a complete numeric expression.
2922  * Proceed left to right, there is no concept of precedence.
2923  */
2924 static int
roff_evalnum(struct roff * r,int ln,const char * v,int * pos,int * res,int flags)2925 roff_evalnum(struct roff *r, int ln, const char *v,
2926           int *pos, int *res, int flags)
2927 {
2928           int                  mypos, operand2;
2929           char                 operator;
2930 
2931           if (NULL == pos) {
2932                     mypos = 0;
2933                     pos = &mypos;
2934           }
2935 
2936           if (flags & ROFFNUM_WHITE)
2937                     while (isspace((unsigned char)v[*pos]))
2938                               (*pos)++;
2939 
2940           if ( ! roff_evalpar(r, ln, v, pos, res, flags))
2941                     return 0;
2942 
2943           while (1) {
2944                     if (flags & ROFFNUM_WHITE)
2945                               while (isspace((unsigned char)v[*pos]))
2946                                         (*pos)++;
2947 
2948                     if ( ! roff_getop(v, pos, &operator))
2949                               break;
2950 
2951                     if (flags & ROFFNUM_WHITE)
2952                               while (isspace((unsigned char)v[*pos]))
2953                                         (*pos)++;
2954 
2955                     if ( ! roff_evalpar(r, ln, v, pos, &operand2, flags))
2956                               return 0;
2957 
2958                     if (flags & ROFFNUM_WHITE)
2959                               while (isspace((unsigned char)v[*pos]))
2960                                         (*pos)++;
2961 
2962                     if (NULL == res)
2963                               continue;
2964 
2965                     switch (operator) {
2966                     case '+':
2967                               *res += operand2;
2968                               break;
2969                     case '-':
2970                               *res -= operand2;
2971                               break;
2972                     case '*':
2973                               *res *= operand2;
2974                               break;
2975                     case '/':
2976                               if (operand2 == 0) {
2977                                         mandoc_msg(MANDOCERR_DIVZERO,
2978                                                   ln, *pos, "%s", v);
2979                                         *res = 0;
2980                                         break;
2981                               }
2982                               *res /= operand2;
2983                               break;
2984                     case '%':
2985                               if (operand2 == 0) {
2986                                         mandoc_msg(MANDOCERR_DIVZERO,
2987                                                   ln, *pos, "%s", v);
2988                                         *res = 0;
2989                                         break;
2990                               }
2991                               *res %= operand2;
2992                               break;
2993                     case '<':
2994                               *res = *res < operand2;
2995                               break;
2996                     case '>':
2997                               *res = *res > operand2;
2998                               break;
2999                     case 'l':
3000                               *res = *res <= operand2;
3001                               break;
3002                     case 'g':
3003                               *res = *res >= operand2;
3004                               break;
3005                     case '=':
3006                               *res = *res == operand2;
3007                               break;
3008                     case '!':
3009                               *res = *res != operand2;
3010                               break;
3011                     case '&':
3012                               *res = *res && operand2;
3013                               break;
3014                     case ':':
3015                               *res = *res || operand2;
3016                               break;
3017                     case 'i':
3018                               if (operand2 < *res)
3019                                         *res = operand2;
3020                               break;
3021                     case 'a':
3022                               if (operand2 > *res)
3023                                         *res = operand2;
3024                               break;
3025                     default:
3026                               abort();
3027                     }
3028           }
3029           return 1;
3030 }
3031 
3032 /* --- register management ------------------------------------------------ */
3033 
3034 void
roff_setreg(struct roff * r,const char * name,int val,char sign)3035 roff_setreg(struct roff *r, const char *name, int val, char sign)
3036 {
3037           roff_setregn(r, name, strlen(name), val, sign, INT_MIN);
3038 }
3039 
3040 static void
roff_setregn(struct roff * r,const char * name,size_t len,int val,char sign,int step)3041 roff_setregn(struct roff *r, const char *name, size_t len,
3042     int val, char sign, int step)
3043 {
3044           struct roffreg      *reg;
3045 
3046           /* Search for an existing register with the same name. */
3047           reg = r->regtab;
3048 
3049           while (reg != NULL && (reg->key.sz != len ||
3050               strncmp(reg->key.p, name, len) != 0))
3051                     reg = reg->next;
3052 
3053           if (NULL == reg) {
3054                     /* Create a new register. */
3055                     reg = mandoc_malloc(sizeof(struct roffreg));
3056                     reg->key.p = mandoc_strndup(name, len);
3057                     reg->key.sz = len;
3058                     reg->val = 0;
3059                     reg->step = 0;
3060                     reg->next = r->regtab;
3061                     r->regtab = reg;
3062           }
3063 
3064           if ('+' == sign)
3065                     reg->val += val;
3066           else if ('-' == sign)
3067                     reg->val -= val;
3068           else
3069                     reg->val = val;
3070           if (step != INT_MIN)
3071                     reg->step = step;
3072 }
3073 
3074 /*
3075  * Handle some predefined read-only number registers.
3076  * For now, return -1 if the requested register is not predefined;
3077  * in case a predefined read-only register having the value -1
3078  * were to turn up, another special value would have to be chosen.
3079  */
3080 static int
roff_getregro(const struct roff * r,const char * name)3081 roff_getregro(const struct roff *r, const char *name)
3082 {
3083 
3084           switch (*name) {
3085           case '$':  /* Number of arguments of the last macro evaluated. */
3086                     return r->mstackpos < 0 ? 0 : r->mstack[r->mstackpos].argc;
3087           case 'A':  /* ASCII approximation mode is always off. */
3088                     return 0;
3089           case 'g':  /* Groff compatibility mode is always on. */
3090                     return 1;
3091           case 'H':  /* Fixed horizontal resolution. */
3092                     return 24;
3093           case 'j':  /* Always adjust left margin only. */
3094                     return 0;
3095           case 'T':  /* Some output device is always defined. */
3096                     return 1;
3097           case 'V':  /* Fixed vertical resolution. */
3098                     return 40;
3099           default:
3100                     return -1;
3101           }
3102 }
3103 
3104 int
roff_getreg(struct roff * r,const char * name)3105 roff_getreg(struct roff *r, const char *name)
3106 {
3107           return roff_getregn(r, name, strlen(name), '\0');
3108 }
3109 
3110 static int
roff_getregn(struct roff * r,const char * name,size_t len,char sign)3111 roff_getregn(struct roff *r, const char *name, size_t len, char sign)
3112 {
3113           struct roffreg      *reg;
3114           int                  val;
3115 
3116           if ('.' == name[0] && 2 == len) {
3117                     val = roff_getregro(r, name + 1);
3118                     if (-1 != val)
3119                               return val;
3120           }
3121 
3122           for (reg = r->regtab; reg; reg = reg->next) {
3123                     if (len == reg->key.sz &&
3124                         0 == strncmp(name, reg->key.p, len)) {
3125                               switch (sign) {
3126                               case '+':
3127                                         reg->val += reg->step;
3128                                         break;
3129                               case '-':
3130                                         reg->val -= reg->step;
3131                                         break;
3132                               default:
3133                                         break;
3134                               }
3135                               return reg->val;
3136                     }
3137           }
3138 
3139           roff_setregn(r, name, len, 0, '\0', INT_MIN);
3140           return 0;
3141 }
3142 
3143 static int
roff_hasregn(const struct roff * r,const char * name,size_t len)3144 roff_hasregn(const struct roff *r, const char *name, size_t len)
3145 {
3146           struct roffreg      *reg;
3147           int                  val;
3148 
3149           if ('.' == name[0] && 2 == len) {
3150                     val = roff_getregro(r, name + 1);
3151                     if (-1 != val)
3152                               return 1;
3153           }
3154 
3155           for (reg = r->regtab; reg; reg = reg->next)
3156                     if (len == reg->key.sz &&
3157                         0 == strncmp(name, reg->key.p, len))
3158                               return 1;
3159 
3160           return 0;
3161 }
3162 
3163 static void
roff_freereg(struct roffreg * reg)3164 roff_freereg(struct roffreg *reg)
3165 {
3166           struct roffreg      *old_reg;
3167 
3168           while (NULL != reg) {
3169                     free(reg->key.p);
3170                     old_reg = reg;
3171                     reg = reg->next;
3172                     free(old_reg);
3173           }
3174 }
3175 
3176 static int
roff_nr(ROFF_ARGS)3177 roff_nr(ROFF_ARGS)
3178 {
3179           char                *key, *val, *step;
3180           size_t               keysz;
3181           int                  iv, is, len;
3182           char                 sign;
3183 
3184           key = val = buf->buf + pos;
3185           if (*key == '\0')
3186                     return ROFF_IGN;
3187 
3188           keysz = roff_getname(r, &val, ln, pos);
3189           if (key[keysz] == '\\' || key[keysz] == '\t')
3190                     return ROFF_IGN;
3191 
3192           sign = *val;
3193           if (sign == '+' || sign == '-')
3194                     val++;
3195 
3196           len = 0;
3197           if (roff_evalnum(r, ln, val, &len, &iv, ROFFNUM_SCALE) == 0)
3198                     return ROFF_IGN;
3199 
3200           step = val + len;
3201           while (isspace((unsigned char)*step))
3202                     step++;
3203           if (roff_evalnum(r, ln, step, NULL, &is, 0) == 0)
3204                     is = INT_MIN;
3205 
3206           roff_setregn(r, key, keysz, iv, sign, is);
3207           return ROFF_IGN;
3208 }
3209 
3210 static int
roff_rr(ROFF_ARGS)3211 roff_rr(ROFF_ARGS)
3212 {
3213           struct roffreg      *reg, **prev;
3214           char                *name, *cp;
3215           size_t               namesz;
3216 
3217           name = cp = buf->buf + pos;
3218           if (*name == '\0')
3219                     return ROFF_IGN;
3220           namesz = roff_getname(r, &cp, ln, pos);
3221           name[namesz] = '\0';
3222 
3223           prev = &r->regtab;
3224           while (1) {
3225                     reg = *prev;
3226                     if (reg == NULL || !strcmp(name, reg->key.p))
3227                               break;
3228                     prev = &reg->next;
3229           }
3230           if (reg != NULL) {
3231                     *prev = reg->next;
3232                     free(reg->key.p);
3233                     free(reg);
3234           }
3235           return ROFF_IGN;
3236 }
3237 
3238 /* --- handler functions for roff requests -------------------------------- */
3239 
3240 static int
roff_rm(ROFF_ARGS)3241 roff_rm(ROFF_ARGS)
3242 {
3243           const char           *name;
3244           char                 *cp;
3245           size_t                namesz;
3246 
3247           cp = buf->buf + pos;
3248           while (*cp != '\0') {
3249                     name = cp;
3250                     namesz = roff_getname(r, &cp, ln, (int)(cp - buf->buf));
3251                     roff_setstrn(&r->strtab, name, namesz, NULL, 0, 0);
3252                     roff_setstrn(&r->rentab, name, namesz, NULL, 0, 0);
3253                     if (name[namesz] == '\\' || name[namesz] == '\t')
3254                               break;
3255           }
3256           return ROFF_IGN;
3257 }
3258 
3259 static int
roff_it(ROFF_ARGS)3260 roff_it(ROFF_ARGS)
3261 {
3262           int                  iv;
3263 
3264           /* Parse the number of lines. */
3265 
3266           if ( ! roff_evalnum(r, ln, buf->buf, &pos, &iv, 0)) {
3267                     mandoc_msg(MANDOCERR_IT_NONUM,
3268                         ln, ppos, "%s", buf->buf + 1);
3269                     return ROFF_IGN;
3270           }
3271 
3272           while (isspace((unsigned char)buf->buf[pos]))
3273                     pos++;
3274 
3275           /*
3276            * Arm the input line trap.
3277            * Special-casing "an-trap" is an ugly workaround to cope
3278            * with DocBook stupidly fiddling with man(7) internals.
3279            */
3280 
3281           roffit_lines = iv;
3282           roffit_macro = mandoc_strdup(iv != 1 ||
3283               strcmp(buf->buf + pos, "an-trap") ?
3284               buf->buf + pos : "br");
3285           return ROFF_IGN;
3286 }
3287 
3288 static int
roff_Dd(ROFF_ARGS)3289 roff_Dd(ROFF_ARGS)
3290 {
3291           int                  mask;
3292           enum roff_tok        t, te;
3293 
3294           switch (tok) {
3295           case ROFF_Dd:
3296                     tok = MDOC_Dd;
3297                     te = MDOC_MAX;
3298                     if (r->format == 0)
3299                               r->format = MPARSE_MDOC;
3300                     mask = MPARSE_MDOC | MPARSE_QUICK;
3301                     break;
3302           case ROFF_TH:
3303                     tok = MAN_TH;
3304                     te = MAN_MAX;
3305                     if (r->format == 0)
3306                               r->format = MPARSE_MAN;
3307                     mask = MPARSE_QUICK;
3308                     break;
3309           default:
3310                     abort();
3311           }
3312           if ((r->options & mask) == 0)
3313                     for (t = tok; t < te; t++)
3314                               roff_setstr(r, roff_name[t], NULL, 0);
3315           return ROFF_CONT;
3316 }
3317 
3318 static int
roff_TE(ROFF_ARGS)3319 roff_TE(ROFF_ARGS)
3320 {
3321           r->man->flags &= ~ROFF_NONOFILL;
3322           if (r->tbl == NULL) {
3323                     mandoc_msg(MANDOCERR_BLK_NOTOPEN, ln, ppos, "TE");
3324                     return ROFF_IGN;
3325           }
3326           if (tbl_end(r->tbl, 0) == 0) {
3327                     r->tbl = NULL;
3328                     free(buf->buf);
3329                     buf->buf = mandoc_strdup(".sp");
3330                     buf->sz = 4;
3331                     *offs = 0;
3332                     return ROFF_REPARSE;
3333           }
3334           r->tbl = NULL;
3335           return ROFF_IGN;
3336 }
3337 
3338 static int
roff_T_(ROFF_ARGS)3339 roff_T_(ROFF_ARGS)
3340 {
3341 
3342           if (NULL == r->tbl)
3343                     mandoc_msg(MANDOCERR_BLK_NOTOPEN, ln, ppos, "T&");
3344           else
3345                     tbl_restart(ln, ppos, r->tbl);
3346 
3347           return ROFF_IGN;
3348 }
3349 
3350 /*
3351  * Handle in-line equation delimiters.
3352  */
3353 static int
roff_eqndelim(struct roff * r,struct buf * buf,int pos)3354 roff_eqndelim(struct roff *r, struct buf *buf, int pos)
3355 {
3356           char                *cp1, *cp2;
3357           const char          *bef_pr, *bef_nl, *mac, *aft_nl, *aft_pr;
3358 
3359           /*
3360            * Outside equations, look for an opening delimiter.
3361            * If we are inside an equation, we already know it is
3362            * in-line, or this function wouldn't have been called;
3363            * so look for a closing delimiter.
3364            */
3365 
3366           cp1 = buf->buf + pos;
3367           cp2 = strchr(cp1, r->eqn == NULL ?
3368               r->last_eqn->odelim : r->last_eqn->cdelim);
3369           if (cp2 == NULL)
3370                     return ROFF_CONT;
3371 
3372           *cp2++ = '\0';
3373           bef_pr = bef_nl = aft_nl = aft_pr = "";
3374 
3375           /* Handle preceding text, protecting whitespace. */
3376 
3377           if (*buf->buf != '\0') {
3378                     if (r->eqn == NULL)
3379                               bef_pr = "\\&";
3380                     bef_nl = "\n";
3381           }
3382 
3383           /*
3384            * Prepare replacing the delimiter with an equation macro
3385            * and drop leading white space from the equation.
3386            */
3387 
3388           if (r->eqn == NULL) {
3389                     while (*cp2 == ' ')
3390                               cp2++;
3391                     mac = ".EQ";
3392           } else
3393                     mac = ".EN";
3394 
3395           /* Handle following text, protecting whitespace. */
3396 
3397           if (*cp2 != '\0') {
3398                     aft_nl = "\n";
3399                     if (r->eqn != NULL)
3400                               aft_pr = "\\&";
3401           }
3402 
3403           /* Do the actual replacement. */
3404 
3405           buf->sz = mandoc_asprintf(&cp1, "%s%s%s%s%s%s%s", buf->buf,
3406               bef_pr, bef_nl, mac, aft_nl, aft_pr, cp2) + 1;
3407           free(buf->buf);
3408           buf->buf = cp1;
3409 
3410           /* Toggle the in-line state of the eqn subsystem. */
3411 
3412           r->eqn_inline = r->eqn == NULL;
3413           return ROFF_REPARSE;
3414 }
3415 
3416 static int
roff_EQ(ROFF_ARGS)3417 roff_EQ(ROFF_ARGS)
3418 {
3419           struct roff_node    *n;
3420 
3421           if (r->man->meta.macroset == MACROSET_MAN)
3422                     man_breakscope(r->man, ROFF_EQ);
3423           n = roff_node_alloc(r->man, ln, ppos, ROFFT_EQN, TOKEN_NONE);
3424           if (ln > r->man->last->line)
3425                     n->flags |= NODE_LINE;
3426           n->eqn = eqn_box_new();
3427           roff_node_append(r->man, n);
3428           r->man->next = ROFF_NEXT_SIBLING;
3429 
3430           assert(r->eqn == NULL);
3431           if (r->last_eqn == NULL)
3432                     r->last_eqn = eqn_alloc();
3433           else
3434                     eqn_reset(r->last_eqn);
3435           r->eqn = r->last_eqn;
3436           r->eqn->node = n;
3437 
3438           if (buf->buf[pos] != '\0')
3439                     mandoc_msg(MANDOCERR_ARG_SKIP, ln, pos,
3440                         ".EQ %s", buf->buf + pos);
3441 
3442           return ROFF_IGN;
3443 }
3444 
3445 static int
roff_EN(ROFF_ARGS)3446 roff_EN(ROFF_ARGS)
3447 {
3448           if (r->eqn != NULL) {
3449                     eqn_parse(r->eqn);
3450                     r->eqn = NULL;
3451           } else
3452                     mandoc_msg(MANDOCERR_BLK_NOTOPEN, ln, ppos, "EN");
3453           if (buf->buf[pos] != '\0')
3454                     mandoc_msg(MANDOCERR_ARG_SKIP, ln, pos,
3455                         "EN %s", buf->buf + pos);
3456           return ROFF_IGN;
3457 }
3458 
3459 static int
roff_TS(ROFF_ARGS)3460 roff_TS(ROFF_ARGS)
3461 {
3462           if (r->tbl != NULL) {
3463                     mandoc_msg(MANDOCERR_BLK_BROKEN, ln, ppos, "TS breaks TS");
3464                     tbl_end(r->tbl, 0);
3465           }
3466           r->man->flags |= ROFF_NONOFILL;
3467           r->tbl = tbl_alloc(ppos, ln, r->last_tbl);
3468           if (r->last_tbl == NULL)
3469                     r->first_tbl = r->tbl;
3470           r->last_tbl = r->tbl;
3471           return ROFF_IGN;
3472 }
3473 
3474 static int
roff_noarg(ROFF_ARGS)3475 roff_noarg(ROFF_ARGS)
3476 {
3477           if (r->man->flags & (MAN_BLINE | MAN_ELINE))
3478                     man_breakscope(r->man, tok);
3479           if (tok == ROFF_brp)
3480                     tok = ROFF_br;
3481           roff_elem_alloc(r->man, ln, ppos, tok);
3482           if (buf->buf[pos] != '\0')
3483                     mandoc_msg(MANDOCERR_ARG_SKIP, ln, pos,
3484                        "%s %s", roff_name[tok], buf->buf + pos);
3485           if (tok == ROFF_nf)
3486                     r->man->flags |= ROFF_NOFILL;
3487           else if (tok == ROFF_fi)
3488                     r->man->flags &= ~ROFF_NOFILL;
3489           r->man->last->flags |= NODE_LINE | NODE_VALID | NODE_ENDED;
3490           r->man->next = ROFF_NEXT_SIBLING;
3491           return ROFF_IGN;
3492 }
3493 
3494 static int
roff_onearg(ROFF_ARGS)3495 roff_onearg(ROFF_ARGS)
3496 {
3497           struct roff_node    *n;
3498           char                          *cp;
3499           int                            npos;
3500 
3501           if (r->man->flags & (MAN_BLINE | MAN_ELINE) &&
3502               (tok == ROFF_ce || tok == ROFF_rj || tok == ROFF_sp ||
3503                tok == ROFF_ti))
3504                     man_breakscope(r->man, tok);
3505 
3506           if (roffce_node != NULL && (tok == ROFF_ce || tok == ROFF_rj)) {
3507                     r->man->last = roffce_node;
3508                     r->man->next = ROFF_NEXT_SIBLING;
3509           }
3510 
3511           roff_elem_alloc(r->man, ln, ppos, tok);
3512           n = r->man->last;
3513 
3514           cp = buf->buf + pos;
3515           if (*cp != '\0') {
3516                     while (*cp != '\0' && *cp != ' ')
3517                               cp++;
3518                     while (*cp == ' ')
3519                               *cp++ = '\0';
3520                     if (*cp != '\0')
3521                               mandoc_msg(MANDOCERR_ARG_EXCESS,
3522                                   ln, (int)(cp - buf->buf),
3523                                   "%s ... %s", roff_name[tok], cp);
3524                     roff_word_alloc(r->man, ln, pos, buf->buf + pos);
3525           }
3526 
3527           if (tok == ROFF_ce || tok == ROFF_rj) {
3528                     if (r->man->last->type == ROFFT_ELEM) {
3529                               roff_word_alloc(r->man, ln, pos, "1");
3530                               r->man->last->flags |= NODE_NOSRC;
3531                     }
3532                     npos = 0;
3533                     if (roff_evalnum(r, ln, r->man->last->string, &npos,
3534                         &roffce_lines, 0) == 0) {
3535                               mandoc_msg(MANDOCERR_CE_NONUM,
3536                                   ln, pos, "ce %s", buf->buf + pos);
3537                               roffce_lines = 1;
3538                     }
3539                     if (roffce_lines < 1) {
3540                               r->man->last = r->man->last->parent;
3541                               roffce_node = NULL;
3542                               roffce_lines = 0;
3543                     } else
3544                               roffce_node = r->man->last->parent;
3545           } else {
3546                     n->flags |= NODE_VALID | NODE_ENDED;
3547                     r->man->last = n;
3548           }
3549           n->flags |= NODE_LINE;
3550           r->man->next = ROFF_NEXT_SIBLING;
3551           return ROFF_IGN;
3552 }
3553 
3554 static int
roff_manyarg(ROFF_ARGS)3555 roff_manyarg(ROFF_ARGS)
3556 {
3557           struct roff_node    *n;
3558           char                          *sp, *ep;
3559 
3560           roff_elem_alloc(r->man, ln, ppos, tok);
3561           n = r->man->last;
3562 
3563           for (sp = ep = buf->buf + pos; *sp != '\0'; sp = ep) {
3564                     while (*ep != '\0' && *ep != ' ')
3565                               ep++;
3566                     while (*ep == ' ')
3567                               *ep++ = '\0';
3568                     roff_word_alloc(r->man, ln, sp - buf->buf, sp);
3569           }
3570 
3571           n->flags |= NODE_LINE | NODE_VALID | NODE_ENDED;
3572           r->man->last = n;
3573           r->man->next = ROFF_NEXT_SIBLING;
3574           return ROFF_IGN;
3575 }
3576 
3577 static int
roff_als(ROFF_ARGS)3578 roff_als(ROFF_ARGS)
3579 {
3580           char                *oldn, *newn, *end, *value;
3581           size_t               oldsz, newsz, valsz;
3582 
3583           newn = oldn = buf->buf + pos;
3584           if (*newn == '\0')
3585                     return ROFF_IGN;
3586 
3587           newsz = roff_getname(r, &oldn, ln, pos);
3588           if (newn[newsz] == '\\' || newn[newsz] == '\t' || *oldn == '\0')
3589                     return ROFF_IGN;
3590 
3591           end = oldn;
3592           oldsz = roff_getname(r, &end, ln, oldn - buf->buf);
3593           if (oldsz == 0)
3594                     return ROFF_IGN;
3595 
3596           valsz = mandoc_asprintf(&value, ".%.*s \\$@\\\"\n",
3597               (int)oldsz, oldn);
3598           roff_setstrn(&r->strtab, newn, newsz, value, valsz, 0);
3599           roff_setstrn(&r->rentab, newn, newsz, NULL, 0, 0);
3600           free(value);
3601           return ROFF_IGN;
3602 }
3603 
3604 /*
3605  * The .break request only makes sense inside conditionals,
3606  * and that case is already handled in roff_cond_sub().
3607  */
3608 static int
roff_break(ROFF_ARGS)3609 roff_break(ROFF_ARGS)
3610 {
3611           mandoc_msg(MANDOCERR_BLK_NOTOPEN, ln, pos, "break");
3612           return ROFF_IGN;
3613 }
3614 
3615 static int
roff_cc(ROFF_ARGS)3616 roff_cc(ROFF_ARGS)
3617 {
3618           const char          *p;
3619 
3620           p = buf->buf + pos;
3621 
3622           if (*p == '\0' || (r->control = *p++) == '.')
3623                     r->control = '\0';
3624 
3625           if (*p != '\0')
3626                     mandoc_msg(MANDOCERR_ARG_EXCESS,
3627                         ln, p - buf->buf, "cc ... %s", p);
3628 
3629           return ROFF_IGN;
3630 }
3631 
3632 static int
roff_char(ROFF_ARGS)3633 roff_char(ROFF_ARGS)
3634 {
3635           const char          *p, *kp, *vp;
3636           size_t               ksz, vsz;
3637           int                  font;
3638 
3639           /* Parse the character to be replaced. */
3640 
3641           kp = buf->buf + pos;
3642           p = kp + 1;
3643           if (*kp == '\0' || (*kp == '\\' &&
3644                mandoc_escape(&p, NULL, NULL) != ESCAPE_SPECIAL) ||
3645               (*p != ' ' && *p != '\0')) {
3646                     mandoc_msg(MANDOCERR_CHAR_ARG, ln, pos, "char %s", kp);
3647                     return ROFF_IGN;
3648           }
3649           ksz = p - kp;
3650           while (*p == ' ')
3651                     p++;
3652 
3653           /*
3654            * If the replacement string contains a font escape sequence,
3655            * we have to restore the font at the end.
3656            */
3657 
3658           vp = p;
3659           vsz = strlen(p);
3660           font = 0;
3661           while (*p != '\0') {
3662                     if (*p++ != '\\')
3663                               continue;
3664                     switch (mandoc_escape(&p, NULL, NULL)) {
3665                     case ESCAPE_FONT:
3666                     case ESCAPE_FONTROMAN:
3667                     case ESCAPE_FONTITALIC:
3668                     case ESCAPE_FONTBOLD:
3669                     case ESCAPE_FONTBI:
3670                     case ESCAPE_FONTCR:
3671                     case ESCAPE_FONTCB:
3672                     case ESCAPE_FONTCI:
3673                     case ESCAPE_FONTPREV:
3674                               font++;
3675                               break;
3676                     default:
3677                               break;
3678                     }
3679           }
3680           if (font > 1)
3681                     mandoc_msg(MANDOCERR_CHAR_FONT,
3682                         ln, (int)(vp - buf->buf), "%s", vp);
3683 
3684           /*
3685            * Approximate the effect of .char using the .tr tables.
3686            * XXX In groff, .char and .tr interact differently.
3687            */
3688 
3689           if (ksz == 1) {
3690                     if (r->xtab == NULL)
3691                               r->xtab = mandoc_calloc(128, sizeof(*r->xtab));
3692                     assert((unsigned int)*kp < 128);
3693                     free(r->xtab[(int)*kp].p);
3694                     r->xtab[(int)*kp].sz = mandoc_asprintf(&r->xtab[(int)*kp].p,
3695                         "%s%s", vp, font ? "\fP" : "");
3696           } else {
3697                     roff_setstrn(&r->xmbtab, kp, ksz, vp, vsz, 0);
3698                     if (font)
3699                               roff_setstrn(&r->xmbtab, kp, ksz, "\\fP", 3, 1);
3700           }
3701           return ROFF_IGN;
3702 }
3703 
3704 static int
roff_ec(ROFF_ARGS)3705 roff_ec(ROFF_ARGS)
3706 {
3707           const char          *p;
3708 
3709           p = buf->buf + pos;
3710           if (*p == '\0')
3711                     r->escape = '\\';
3712           else {
3713                     r->escape = *p;
3714                     if (*++p != '\0')
3715                               mandoc_msg(MANDOCERR_ARG_EXCESS, ln,
3716                                   (int)(p - buf->buf), "ec ... %s", p);
3717           }
3718           return ROFF_IGN;
3719 }
3720 
3721 static int
roff_eo(ROFF_ARGS)3722 roff_eo(ROFF_ARGS)
3723 {
3724           r->escape = '\0';
3725           if (buf->buf[pos] != '\0')
3726                     mandoc_msg(MANDOCERR_ARG_SKIP,
3727                         ln, pos, "eo %s", buf->buf + pos);
3728           return ROFF_IGN;
3729 }
3730 
3731 static int
roff_nop(ROFF_ARGS)3732 roff_nop(ROFF_ARGS)
3733 {
3734           while (buf->buf[pos] == ' ')
3735                     pos++;
3736           *offs = pos;
3737           return ROFF_RERUN;
3738 }
3739 
3740 static int
roff_tr(ROFF_ARGS)3741 roff_tr(ROFF_ARGS)
3742 {
3743           const char          *p, *first, *second;
3744           size_t               fsz, ssz;
3745           enum mandoc_esc      esc;
3746 
3747           p = buf->buf + pos;
3748 
3749           if (*p == '\0') {
3750                     mandoc_msg(MANDOCERR_REQ_EMPTY, ln, ppos, "tr");
3751                     return ROFF_IGN;
3752           }
3753 
3754           while (*p != '\0') {
3755                     fsz = ssz = 1;
3756 
3757                     first = p++;
3758                     if (*first == '\\') {
3759                               esc = mandoc_escape(&p, NULL, NULL);
3760                               if (esc == ESCAPE_ERROR) {
3761                                         mandoc_msg(MANDOCERR_ESC_BAD, ln,
3762                                             (int)(p - buf->buf), "%s", first);
3763                                         return ROFF_IGN;
3764                               }
3765                               fsz = (size_t)(p - first);
3766                     }
3767 
3768                     second = p++;
3769                     if (*second == '\\') {
3770                               esc = mandoc_escape(&p, NULL, NULL);
3771                               if (esc == ESCAPE_ERROR) {
3772                                         mandoc_msg(MANDOCERR_ESC_BAD, ln,
3773                                             (int)(p - buf->buf), "%s", second);
3774                                         return ROFF_IGN;
3775                               }
3776                               ssz = (size_t)(p - second);
3777                     } else if (*second == '\0') {
3778                               mandoc_msg(MANDOCERR_TR_ODD, ln,
3779                                   (int)(first - buf->buf), "tr %s", first);
3780                               second = " ";
3781                               p--;
3782                     }
3783 
3784                     if (fsz > 1) {
3785                               roff_setstrn(&r->xmbtab, first, fsz,
3786                                   second, ssz, 0);
3787                               continue;
3788                     }
3789 
3790                     if (r->xtab == NULL)
3791                               r->xtab = mandoc_calloc(128,
3792                                   sizeof(struct roffstr));
3793 
3794                     free(r->xtab[(int)*first].p);
3795                     r->xtab[(int)*first].p = mandoc_strndup(second, ssz);
3796                     r->xtab[(int)*first].sz = ssz;
3797           }
3798 
3799           return ROFF_IGN;
3800 }
3801 
3802 /*
3803  * Implementation of the .return request.
3804  * There is no need to call roff_userret() from here.
3805  * The read module will call that after rewinding the reader stack
3806  * to the place from where the current macro was called.
3807  */
3808 static int
roff_return(ROFF_ARGS)3809 roff_return(ROFF_ARGS)
3810 {
3811           if (r->mstackpos >= 0)
3812                     return ROFF_IGN | ROFF_USERRET;
3813 
3814           mandoc_msg(MANDOCERR_REQ_NOMAC, ln, ppos, "return");
3815           return ROFF_IGN;
3816 }
3817 
3818 static int
roff_rn(ROFF_ARGS)3819 roff_rn(ROFF_ARGS)
3820 {
3821           const char          *value;
3822           char                *oldn, *newn, *end;
3823           size_t               oldsz, newsz;
3824           int                  deftype;
3825 
3826           oldn = newn = buf->buf + pos;
3827           if (*oldn == '\0')
3828                     return ROFF_IGN;
3829 
3830           oldsz = roff_getname(r, &newn, ln, pos);
3831           if (oldn[oldsz] == '\\' || oldn[oldsz] == '\t' || *newn == '\0')
3832                     return ROFF_IGN;
3833 
3834           end = newn;
3835           newsz = roff_getname(r, &end, ln, newn - buf->buf);
3836           if (newsz == 0)
3837                     return ROFF_IGN;
3838 
3839           deftype = ROFFDEF_ANY;
3840           value = roff_getstrn(r, oldn, oldsz, &deftype);
3841           switch (deftype) {
3842           case ROFFDEF_USER:
3843                     roff_setstrn(&r->strtab, newn, newsz, value, strlen(value), 0);
3844                     roff_setstrn(&r->strtab, oldn, oldsz, NULL, 0, 0);
3845                     roff_setstrn(&r->rentab, newn, newsz, NULL, 0, 0);
3846                     break;
3847           case ROFFDEF_PRE:
3848                     roff_setstrn(&r->strtab, newn, newsz, value, strlen(value), 0);
3849                     roff_setstrn(&r->rentab, newn, newsz, NULL, 0, 0);
3850                     break;
3851           case ROFFDEF_REN:
3852                     roff_setstrn(&r->rentab, newn, newsz, value, strlen(value), 0);
3853                     roff_setstrn(&r->rentab, oldn, oldsz, NULL, 0, 0);
3854                     roff_setstrn(&r->strtab, newn, newsz, NULL, 0, 0);
3855                     break;
3856           case ROFFDEF_STD:
3857                     roff_setstrn(&r->rentab, newn, newsz, oldn, oldsz, 0);
3858                     roff_setstrn(&r->strtab, newn, newsz, NULL, 0, 0);
3859                     break;
3860           default:
3861                     roff_setstrn(&r->strtab, newn, newsz, NULL, 0, 0);
3862                     roff_setstrn(&r->rentab, newn, newsz, NULL, 0, 0);
3863                     break;
3864           }
3865           return ROFF_IGN;
3866 }
3867 
3868 static int
roff_shift(ROFF_ARGS)3869 roff_shift(ROFF_ARGS)
3870 {
3871           struct mctx         *ctx;
3872           int                  levels, i;
3873 
3874           levels = 1;
3875           if (buf->buf[pos] != '\0' &&
3876               roff_evalnum(r, ln, buf->buf, &pos, &levels, 0) == 0) {
3877                     mandoc_msg(MANDOCERR_CE_NONUM,
3878                         ln, pos, "shift %s", buf->buf + pos);
3879                     levels = 1;
3880           }
3881           if (r->mstackpos < 0) {
3882                     mandoc_msg(MANDOCERR_REQ_NOMAC, ln, ppos, "shift");
3883                     return ROFF_IGN;
3884           }
3885           ctx = r->mstack + r->mstackpos;
3886           if (levels > ctx->argc) {
3887                     mandoc_msg(MANDOCERR_SHIFT,
3888                         ln, pos, "%d, but max is %d", levels, ctx->argc);
3889                     levels = ctx->argc;
3890           }
3891           if (levels == 0)
3892                     return ROFF_IGN;
3893           for (i = 0; i < levels; i++)
3894                     free(ctx->argv[i]);
3895           ctx->argc -= levels;
3896           for (i = 0; i < ctx->argc; i++)
3897                     ctx->argv[i] = ctx->argv[i + levels];
3898           return ROFF_IGN;
3899 }
3900 
3901 static int
roff_so(ROFF_ARGS)3902 roff_so(ROFF_ARGS)
3903 {
3904           char *name, *cp;
3905 
3906           name = buf->buf + pos;
3907           mandoc_msg(MANDOCERR_SO, ln, ppos, "so %s", name);
3908 
3909           /*
3910            * Handle `so'.  Be EXTREMELY careful, as we shouldn't be
3911            * opening anything that's not in our cwd or anything beneath
3912            * it.  Thus, explicitly disallow traversing up the file-system
3913            * or using absolute paths.
3914            */
3915 
3916           if (*name == '/' || strstr(name, "../") || strstr(name, "/..")) {
3917                     mandoc_msg(MANDOCERR_SO_PATH, ln, ppos, ".so %s", name);
3918                     buf->sz = mandoc_asprintf(&cp,
3919                         ".sp\nSee the file %s.\n.sp", name) + 1;
3920                     free(buf->buf);
3921                     buf->buf = cp;
3922                     *offs = 0;
3923                     return ROFF_REPARSE;
3924           }
3925 
3926           *offs = pos;
3927           return ROFF_SO;
3928 }
3929 
3930 /* --- user defined strings and macros ------------------------------------ */
3931 
3932 static int
roff_userdef(ROFF_ARGS)3933 roff_userdef(ROFF_ARGS)
3934 {
3935           struct mctx          *ctx;
3936           char                 *arg, *ap, *dst, *src;
3937           size_t                sz;
3938 
3939           /* If the macro is empty, ignore it altogether. */
3940 
3941           if (*r->current_string == '\0')
3942                     return ROFF_IGN;
3943 
3944           /* Initialize a new macro stack context. */
3945 
3946           if (++r->mstackpos == r->mstacksz) {
3947                     r->mstack = mandoc_recallocarray(r->mstack,
3948                         r->mstacksz, r->mstacksz + 8, sizeof(*r->mstack));
3949                     r->mstacksz += 8;
3950           }
3951           ctx = r->mstack + r->mstackpos;
3952           ctx->argsz = 0;
3953           ctx->argc = 0;
3954           ctx->argv = NULL;
3955 
3956           /*
3957            * Collect pointers to macro argument strings,
3958            * NUL-terminating them and escaping quotes.
3959            */
3960 
3961           src = buf->buf + pos;
3962           while (*src != '\0') {
3963                     if (ctx->argc == ctx->argsz) {
3964                               ctx->argsz += 8;
3965                               ctx->argv = mandoc_reallocarray(ctx->argv,
3966                                   ctx->argsz, sizeof(*ctx->argv));
3967                     }
3968                     arg = roff_getarg(r, &src, ln, &pos);
3969                     sz = 1;  /* For the terminating NUL. */
3970                     for (ap = arg; *ap != '\0'; ap++)
3971                               sz += *ap == '"' ? 4 : 1;
3972                     ctx->argv[ctx->argc++] = dst = mandoc_malloc(sz);
3973                     for (ap = arg; *ap != '\0'; ap++) {
3974                               if (*ap == '"') {
3975                                         memcpy(dst, "\\(dq", 4);
3976                                         dst += 4;
3977                               } else
3978                                         *dst++ = *ap;
3979                     }
3980                     *dst = '\0';
3981                     free(arg);
3982           }
3983 
3984           /* Replace the macro invocation by the macro definition. */
3985 
3986           free(buf->buf);
3987           buf->buf = mandoc_strdup(r->current_string);
3988           buf->sz = strlen(buf->buf) + 1;
3989           *offs = 0;
3990 
3991           return buf->buf[buf->sz - 2] == '\n' ?
3992               ROFF_REPARSE | ROFF_USERCALL : ROFF_IGN | ROFF_APPEND;
3993 }
3994 
3995 /*
3996  * Calling a high-level macro that was renamed with .rn.
3997  * r->current_string has already been set up by roff_parse().
3998  */
3999 static int
roff_renamed(ROFF_ARGS)4000 roff_renamed(ROFF_ARGS)
4001 {
4002           char      *nbuf;
4003 
4004           buf->sz = mandoc_asprintf(&nbuf, ".%s%s%s", r->current_string,
4005               buf->buf[pos] == '\0' ? "" : " ", buf->buf + pos) + 1;
4006           free(buf->buf);
4007           buf->buf = nbuf;
4008           *offs = 0;
4009           return ROFF_CONT;
4010 }
4011 
4012 /*
4013  * Measure the length in bytes of the roff identifier at *cpp
4014  * and advance the pointer to the next word.
4015  */
4016 static size_t
roff_getname(struct roff * r,char ** cpp,int ln,int pos)4017 roff_getname(struct roff *r, char **cpp, int ln, int pos)
4018 {
4019           char       *name, *cp;
4020           size_t      namesz;
4021 
4022           name = *cpp;
4023           if (*name == '\0')
4024                     return 0;
4025 
4026           /* Advance cp to the byte after the end of the name. */
4027 
4028           for (cp = name; 1; cp++) {
4029                     namesz = cp - name;
4030                     if (*cp == '\0')
4031                               break;
4032                     if (*cp == ' ' || *cp == '\t') {
4033                               cp++;
4034                               break;
4035                     }
4036                     if (*cp != '\\')
4037                               continue;
4038                     if (cp[1] == '{' || cp[1] == '}')
4039                               break;
4040                     if (*++cp == '\\')
4041                               continue;
4042                     mandoc_msg(MANDOCERR_NAMESC, ln, pos,
4043                         "%.*s", (int)(cp - name + 1), name);
4044                     mandoc_escape((const char **)&cp, NULL, NULL);
4045                     break;
4046           }
4047 
4048           /* Read past spaces. */
4049 
4050           while (*cp == ' ')
4051                     cp++;
4052 
4053           *cpp = cp;
4054           return namesz;
4055 }
4056 
4057 /*
4058  * Store *string into the user-defined string called *name.
4059  * To clear an existing entry, call with (*r, *name, NULL, 0).
4060  * append == 0: replace mode
4061  * append == 1: single-line append mode
4062  * append == 2: multiline append mode, append '\n' after each call
4063  */
4064 static void
roff_setstr(struct roff * r,const char * name,const char * string,int append)4065 roff_setstr(struct roff *r, const char *name, const char *string,
4066           int append)
4067 {
4068           size_t     namesz;
4069 
4070           namesz = strlen(name);
4071           roff_setstrn(&r->strtab, name, namesz, string,
4072               string ? strlen(string) : 0, append);
4073           roff_setstrn(&r->rentab, name, namesz, NULL, 0, 0);
4074 }
4075 
4076 static void
roff_setstrn(struct roffkv ** r,const char * name,size_t namesz,const char * string,size_t stringsz,int append)4077 roff_setstrn(struct roffkv **r, const char *name, size_t namesz,
4078                     const char *string, size_t stringsz, int append)
4079 {
4080           struct roffkv       *n;
4081           char                *c;
4082           int                  i;
4083           size_t               oldch, newch;
4084 
4085           /* Search for an existing string with the same name. */
4086           n = *r;
4087 
4088           while (n && (namesz != n->key.sz ||
4089                               strncmp(n->key.p, name, namesz)))
4090                     n = n->next;
4091 
4092           if (NULL == n) {
4093                     /* Create a new string table entry. */
4094                     n = mandoc_malloc(sizeof(struct roffkv));
4095                     n->key.p = mandoc_strndup(name, namesz);
4096                     n->key.sz = namesz;
4097                     n->val.p = NULL;
4098                     n->val.sz = 0;
4099                     n->next = *r;
4100                     *r = n;
4101           } else if (0 == append) {
4102                     free(n->val.p);
4103                     n->val.p = NULL;
4104                     n->val.sz = 0;
4105           }
4106 
4107           if (NULL == string)
4108                     return;
4109 
4110           /*
4111            * One additional byte for the '\n' in multiline mode,
4112            * and one for the terminating '\0'.
4113            */
4114           newch = stringsz + (1 < append ? 2u : 1u);
4115 
4116           if (NULL == n->val.p) {
4117                     n->val.p = mandoc_malloc(newch);
4118                     *n->val.p = '\0';
4119                     oldch = 0;
4120           } else {
4121                     oldch = n->val.sz;
4122                     n->val.p = mandoc_realloc(n->val.p, oldch + newch);
4123           }
4124 
4125           /* Skip existing content in the destination buffer. */
4126           c = n->val.p + (int)oldch;
4127 
4128           /* Append new content to the destination buffer. */
4129           i = 0;
4130           while (i < (int)stringsz) {
4131                     /*
4132                      * Rudimentary roff copy mode:
4133                      * Handle escaped backslashes.
4134                      */
4135                     if ('\\' == string[i] && '\\' == string[i + 1])
4136                               i++;
4137                     *c++ = string[i++];
4138           }
4139 
4140           /* Append terminating bytes. */
4141           if (1 < append)
4142                     *c++ = '\n';
4143 
4144           *c = '\0';
4145           n->val.sz = (int)(c - n->val.p);
4146 }
4147 
4148 static const char *
roff_getstrn(struct roff * r,const char * name,size_t len,int * deftype)4149 roff_getstrn(struct roff *r, const char *name, size_t len,
4150     int *deftype)
4151 {
4152           const struct roffkv *n;
4153           int                            found, i;
4154           enum roff_tok                  tok;
4155 
4156           found = 0;
4157           for (n = r->strtab; n != NULL; n = n->next) {
4158                     if (strncmp(name, n->key.p, len) != 0 ||
4159                         n->key.p[len] != '\0' || n->val.p == NULL)
4160                               continue;
4161                     if (*deftype & ROFFDEF_USER) {
4162                               *deftype = ROFFDEF_USER;
4163                               return n->val.p;
4164                     } else {
4165                               found = 1;
4166                               break;
4167                     }
4168           }
4169           for (n = r->rentab; n != NULL; n = n->next) {
4170                     if (strncmp(name, n->key.p, len) != 0 ||
4171                         n->key.p[len] != '\0' || n->val.p == NULL)
4172                               continue;
4173                     if (*deftype & ROFFDEF_REN) {
4174                               *deftype = ROFFDEF_REN;
4175                               return n->val.p;
4176                     } else {
4177                               found = 1;
4178                               break;
4179                     }
4180           }
4181           for (i = 0; i < PREDEFS_MAX; i++) {
4182                     if (strncmp(name, predefs[i].name, len) != 0 ||
4183                         predefs[i].name[len] != '\0')
4184                               continue;
4185                     if (*deftype & ROFFDEF_PRE) {
4186                               *deftype = ROFFDEF_PRE;
4187                               return predefs[i].str;
4188                     } else {
4189                               found = 1;
4190                               break;
4191                     }
4192           }
4193           if (r->man->meta.macroset != MACROSET_MAN) {
4194                     for (tok = MDOC_Dd; tok < MDOC_MAX; tok++) {
4195                               if (strncmp(name, roff_name[tok], len) != 0 ||
4196                                   roff_name[tok][len] != '\0')
4197                                         continue;
4198                               if (*deftype & ROFFDEF_STD) {
4199                                         *deftype = ROFFDEF_STD;
4200                                         return NULL;
4201                               } else {
4202                                         found = 1;
4203                                         break;
4204                               }
4205                     }
4206           }
4207           if (r->man->meta.macroset != MACROSET_MDOC) {
4208                     for (tok = MAN_TH; tok < MAN_MAX; tok++) {
4209                               if (strncmp(name, roff_name[tok], len) != 0 ||
4210                                   roff_name[tok][len] != '\0')
4211                                         continue;
4212                               if (*deftype & ROFFDEF_STD) {
4213                                         *deftype = ROFFDEF_STD;
4214                                         return NULL;
4215                               } else {
4216                                         found = 1;
4217                                         break;
4218                               }
4219                     }
4220           }
4221 
4222           if (found == 0 && *deftype != ROFFDEF_ANY) {
4223                     if (*deftype & ROFFDEF_REN) {
4224                               /*
4225                                * This might still be a request,
4226                                * so do not treat it as undefined yet.
4227                                */
4228                               *deftype = ROFFDEF_UNDEF;
4229                               return NULL;
4230                     }
4231 
4232                     /* Using an undefined string defines it to be empty. */
4233 
4234                     roff_setstrn(&r->strtab, name, len, "", 0, 0);
4235                     roff_setstrn(&r->rentab, name, len, NULL, 0, 0);
4236           }
4237 
4238           *deftype = 0;
4239           return NULL;
4240 }
4241 
4242 static void
roff_freestr(struct roffkv * r)4243 roff_freestr(struct roffkv *r)
4244 {
4245           struct roffkv        *n, *nn;
4246 
4247           for (n = r; n; n = nn) {
4248                     free(n->key.p);
4249                     free(n->val.p);
4250                     nn = n->next;
4251                     free(n);
4252           }
4253 }
4254 
4255 /* --- accessors and utility functions ------------------------------------ */
4256 
4257 /*
4258  * Duplicate an input string, making the appropriate character
4259  * conversations (as stipulated by `tr') along the way.
4260  * Returns a heap-allocated string with all the replacements made.
4261  */
4262 char *
roff_strdup(const struct roff * r,const char * p)4263 roff_strdup(const struct roff *r, const char *p)
4264 {
4265           const struct roffkv *cp;
4266           char                *res;
4267           const char          *pp;
4268           size_t               ssz, sz;
4269           enum mandoc_esc      esc;
4270 
4271           if (NULL == r->xmbtab && NULL == r->xtab)
4272                     return mandoc_strdup(p);
4273           else if ('\0' == *p)
4274                     return mandoc_strdup("");
4275 
4276           /*
4277            * Step through each character looking for term matches
4278            * (remember that a `tr' can be invoked with an escape, which is
4279            * a glyph but the escape is multi-character).
4280            * We only do this if the character hash has been initialised
4281            * and the string is >0 length.
4282            */
4283 
4284           res = NULL;
4285           ssz = 0;
4286 
4287           while ('\0' != *p) {
4288                     assert((unsigned int)*p < 128);
4289                     if ('\\' != *p && r->xtab && r->xtab[(unsigned int)*p].p) {
4290                               sz = r->xtab[(int)*p].sz;
4291                               res = mandoc_realloc(res, ssz + sz + 1);
4292                               memcpy(res + ssz, r->xtab[(int)*p].p, sz);
4293                               ssz += sz;
4294                               p++;
4295                               continue;
4296                     } else if ('\\' != *p) {
4297                               res = mandoc_realloc(res, ssz + 2);
4298                               res[ssz++] = *p++;
4299                               continue;
4300                     }
4301 
4302                     /* Search for term matches. */
4303                     for (cp = r->xmbtab; cp; cp = cp->next)
4304                               if (0 == strncmp(p, cp->key.p, cp->key.sz))
4305                                         break;
4306 
4307                     if (NULL != cp) {
4308                               /*
4309                                * A match has been found.
4310                                * Append the match to the array and move
4311                                * forward by its keysize.
4312                                */
4313                               res = mandoc_realloc(res,
4314                                   ssz + cp->val.sz + 1);
4315                               memcpy(res + ssz, cp->val.p, cp->val.sz);
4316                               ssz += cp->val.sz;
4317                               p += (int)cp->key.sz;
4318                               continue;
4319                     }
4320 
4321                     /*
4322                      * Handle escapes carefully: we need to copy
4323                      * over just the escape itself, or else we might
4324                      * do replacements within the escape itself.
4325                      * Make sure to pass along the bogus string.
4326                      */
4327                     pp = p++;
4328                     esc = mandoc_escape(&p, NULL, NULL);
4329                     if (ESCAPE_ERROR == esc) {
4330                               sz = strlen(pp);
4331                               res = mandoc_realloc(res, ssz + sz + 1);
4332                               memcpy(res + ssz, pp, sz);
4333                               break;
4334                     }
4335                     /*
4336                      * We bail out on bad escapes.
4337                      * No need to warn: we already did so when
4338                      * roff_expand() was called.
4339                      */
4340                     sz = (int)(p - pp);
4341                     res = mandoc_realloc(res, ssz + sz + 1);
4342                     memcpy(res + ssz, pp, sz);
4343                     ssz += sz;
4344           }
4345 
4346           res[(int)ssz] = '\0';
4347           return res;
4348 }
4349 
4350 int
roff_getformat(const struct roff * r)4351 roff_getformat(const struct roff *r)
4352 {
4353 
4354           return r->format;
4355 }
4356 
4357 /*
4358  * Find out whether a line is a macro line or not.
4359  * If it is, adjust the current position and return one; if it isn't,
4360  * return zero and don't change the current position.
4361  * If the control character has been set with `.cc', then let that grain
4362  * precedence.
4363  * This is slighly contrary to groff, where using the non-breaking
4364  * control character when `cc' has been invoked will cause the
4365  * non-breaking macro contents to be printed verbatim.
4366  */
4367 int
roff_getcontrol(const struct roff * r,const char * cp,int * ppos)4368 roff_getcontrol(const struct roff *r, const char *cp, int *ppos)
4369 {
4370           int                 pos;
4371 
4372           pos = *ppos;
4373 
4374           if (r->control != '\0' && cp[pos] == r->control)
4375                     pos++;
4376           else if (r->control != '\0')
4377                     return 0;
4378           else if ('\\' == cp[pos] && '.' == cp[pos + 1])
4379                     pos += 2;
4380           else if ('.' == cp[pos] || '\'' == cp[pos])
4381                     pos++;
4382           else
4383                     return 0;
4384 
4385           while (' ' == cp[pos] || '\t' == cp[pos])
4386                     pos++;
4387 
4388           *ppos = pos;
4389           return 1;
4390 }
4391