1 /*        Id: roff.c,v 1.363 2019/02/06 21:11:43 schwarze Exp  */
2 /*
3  * Copyright (c) 2008-2012, 2014 Kristaps Dzonsons <kristaps@bsd.lv>
4  * Copyright (c) 2010-2015, 2017-2019 Ingo Schwarze <schwarze@openbsd.org>
5  *
6  * Permission to use, copy, modify, and distribute this software for any
7  * purpose with or without fee is hereby granted, provided that the above
8  * copyright notice and this permission notice appear in all copies.
9  *
10  * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHORS DISCLAIM ALL WARRANTIES
11  * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
12  * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHORS BE LIABLE FOR
13  * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
14  * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
15  * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
16  * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
17  */
18 #include "config.h"
19 
20 #include <sys/types.h>
21 
22 #include <assert.h>
23 #include <ctype.h>
24 #include <limits.h>
25 #include <stddef.h>
26 #include <stdint.h>
27 #include <stdio.h>
28 #include <stdlib.h>
29 #include <string.h>
30 
31 #include "mandoc_aux.h"
32 #include "mandoc_ohash.h"
33 #include "mandoc.h"
34 #include "roff.h"
35 #include "mandoc_parse.h"
36 #include "libmandoc.h"
37 #include "roff_int.h"
38 #include "tbl_parse.h"
39 #include "eqn_parse.h"
40 
41 /*
42  * ASCII_ESC is used to signal from roff_getarg() to roff_expand()
43  * that an escape sequence resulted from copy-in processing and
44  * needs to be checked or interpolated.  As it is used nowhere
45  * else, it is defined here rather than in a header file.
46  */
47 #define   ASCII_ESC 27
48 
49 /* Maximum number of string expansions per line, to break infinite loops. */
50 #define   EXPAND_LIMIT        1000
51 
52 /* Types of definitions of macros and strings. */
53 #define   ROFFDEF_USER        (1 << 1)  /* User-defined. */
54 #define   ROFFDEF_PRE         (1 << 2)  /* Predefined. */
55 #define   ROFFDEF_REN         (1 << 3)  /* Renamed standard macro. */
56 #define   ROFFDEF_STD         (1 << 4)  /* mdoc(7) or man(7) macro. */
57 #define   ROFFDEF_ANY         (ROFFDEF_USER | ROFFDEF_PRE | \
58                                ROFFDEF_REN | ROFFDEF_STD)
59 #define   ROFFDEF_UNDEF       (1 << 5)  /* Completely undefined. */
60 
61 /* --- data types --------------------------------------------------------- */
62 
63 /*
64  * An incredibly-simple string buffer.
65  */
66 struct    roffstr {
67           char                *p; /* nil-terminated buffer */
68           size_t               sz; /* saved strlen(p) */
69 };
70 
71 /*
72  * A key-value roffstr pair as part of a singly-linked list.
73  */
74 struct    roffkv {
75           struct roffstr       key;
76           struct roffstr       val;
77           struct roffkv       *next; /* next in list */
78 };
79 
80 /*
81  * A single number register as part of a singly-linked list.
82  */
83 struct    roffreg {
84           struct roffstr       key;
85           int                  val;
86           int                  step;
87           struct roffreg      *next;
88 };
89 
90 /*
91  * Association of request and macro names with token IDs.
92  */
93 struct    roffreq {
94           enum roff_tok        tok;
95           char                 name[];
96 };
97 
98 /*
99  * A macro processing context.
100  * More than one is needed when macro calls are nested.
101  */
102 struct    mctx {
103           char                **argv;
104           int                  argc;
105           int                  argsz;
106 };
107 
108 struct    roff {
109           struct roff_man     *man; /* mdoc or man parser */
110           struct roffnode     *last; /* leaf of stack */
111           struct mctx         *mstack; /* stack of macro contexts */
112           int                 *rstack; /* stack of inverted `ie' values */
113           struct ohash        *reqtab; /* request lookup table */
114           struct roffreg      *regtab; /* number registers */
115           struct roffkv       *strtab; /* user-defined strings & macros */
116           struct roffkv       *rentab; /* renamed strings & macros */
117           struct roffkv       *xmbtab; /* multi-byte trans table (`tr') */
118           struct roffstr      *xtab; /* single-byte trans table (`tr') */
119           const char          *current_string; /* value of last called user macro */
120           struct tbl_node     *first_tbl; /* first table parsed */
121           struct tbl_node     *last_tbl; /* last table parsed */
122           struct tbl_node     *tbl; /* current table being parsed */
123           struct eqn_node     *last_eqn; /* equation parser */
124           struct eqn_node     *eqn; /* active equation parser */
125           int                  eqn_inline; /* current equation is inline */
126           int                  options; /* parse options */
127           int                  mstacksz; /* current size of mstack */
128           int                  mstackpos; /* position in mstack */
129           int                  rstacksz; /* current size limit of rstack */
130           int                  rstackpos; /* position in rstack */
131           int                  format; /* current file in mdoc or man format */
132           char                 control; /* control character */
133           char                 escape; /* escape character */
134 };
135 
136 struct    roffnode {
137           enum roff_tok        tok; /* type of node */
138           struct roffnode     *parent; /* up one in stack */
139           int                  line; /* parse line */
140           int                  col; /* parse col */
141           char                *name; /* node name, e.g. macro name */
142           char                *end; /* end-rules: custom token */
143           int                  endspan; /* end-rules: next-line or infty */
144           int                  rule; /* current evaluation rule */
145 };
146 
147 #define   ROFF_ARGS  struct roff *r, /* parse ctx */ \
148                                enum roff_tok tok, /* tok of macro */ \
149                                struct buf *buf, /* input buffer */ \
150                                int ln, /* parse line */ \
151                                int ppos, /* original pos in buffer */ \
152                                int pos, /* current pos in buffer */ \
153                                int *offs /* reset offset of buffer data */
154 
155 typedef   int (*roffproc)(ROFF_ARGS);
156 
157 struct    roffmac {
158           roffproc   proc; /* process new macro */
159           roffproc   text; /* process as child text of macro */
160           roffproc   sub; /* process as child of macro */
161           int                  flags;
162 #define   ROFFMAC_STRUCT      (1 << 0) /* always interpret */
163 };
164 
165 struct    predef {
166           const char          *name; /* predefined input name */
167           const char          *str; /* replacement symbol */
168 };
169 
170 #define   PREDEF(__name, __str) \
171           { (__name), (__str) },
172 
173 /* --- function prototypes ------------------------------------------------ */
174 
175 static    int                  roffnode_cleanscope(struct roff *);
176 static    int                  roffnode_pop(struct roff *);
177 static    void                 roffnode_push(struct roff *, enum roff_tok,
178                                         const char *, int, int);
179 static    void                 roff_addtbl(struct roff_man *, int, struct tbl_node *);
180 static    int                  roff_als(ROFF_ARGS);
181 static    int                  roff_block(ROFF_ARGS);
182 static    int                  roff_block_text(ROFF_ARGS);
183 static    int                  roff_block_sub(ROFF_ARGS);
184 static    int                  roff_cblock(ROFF_ARGS);
185 static    int                  roff_cc(ROFF_ARGS);
186 static    int                  roff_ccond(struct roff *, int, int);
187 static    int                  roff_char(ROFF_ARGS);
188 static    int                  roff_cond(ROFF_ARGS);
189 static    int                  roff_cond_text(ROFF_ARGS);
190 static    int                  roff_cond_sub(ROFF_ARGS);
191 static    int                  roff_ds(ROFF_ARGS);
192 static    int                  roff_ec(ROFF_ARGS);
193 static    int                  roff_eo(ROFF_ARGS);
194 static    int                  roff_eqndelim(struct roff *, struct buf *, int);
195 static    int                  roff_evalcond(struct roff *r, int, char *, int *);
196 static    int                  roff_evalnum(struct roff *, int,
197                                         const char *, int *, int *, int);
198 static    int                  roff_evalpar(struct roff *, int,
199                                         const char *, int *, int *, int);
200 static    int                  roff_evalstrcond(const char *, int *);
201 static    int                  roff_expand(struct roff *, struct buf *,
202                                         int, int, char);
203 static    void                 roff_free1(struct roff *);
204 static    void                 roff_freereg(struct roffreg *);
205 static    void                 roff_freestr(struct roffkv *);
206 static    size_t               roff_getname(struct roff *, char **, int, int);
207 static    int                  roff_getnum(const char *, int *, int *, int);
208 static    int                  roff_getop(const char *, int *, char *);
209 static    int                  roff_getregn(struct roff *,
210                                         const char *, size_t, char);
211 static    int                  roff_getregro(const struct roff *,
212                                         const char *name);
213 static    const char          *roff_getstrn(struct roff *,
214                                         const char *, size_t, int *);
215 static    int                  roff_hasregn(const struct roff *,
216                                         const char *, size_t);
217 static    int                  roff_insec(ROFF_ARGS);
218 static    int                  roff_it(ROFF_ARGS);
219 static    int                  roff_line_ignore(ROFF_ARGS);
220 static    void                 roff_man_alloc1(struct roff_man *);
221 static    void                 roff_man_free1(struct roff_man *);
222 static    int                  roff_manyarg(ROFF_ARGS);
223 static    int                  roff_noarg(ROFF_ARGS);
224 static    int                  roff_nop(ROFF_ARGS);
225 static    int                  roff_nr(ROFF_ARGS);
226 static    int                  roff_onearg(ROFF_ARGS);
227 static    enum roff_tok        roff_parse(struct roff *, char *, int *,
228                                         int, int);
229 static    int                  roff_parsetext(struct roff *, struct buf *,
230                                         int, int *);
231 static    int                  roff_renamed(ROFF_ARGS);
232 static    int                  roff_return(ROFF_ARGS);
233 static    int                  roff_rm(ROFF_ARGS);
234 static    int                  roff_rn(ROFF_ARGS);
235 static    int                  roff_rr(ROFF_ARGS);
236 static    void                 roff_setregn(struct roff *, const char *,
237                                         size_t, int, char, int);
238 static    void                 roff_setstr(struct roff *,
239                                         const char *, const char *, int);
240 static    void                 roff_setstrn(struct roffkv **, const char *,
241                                         size_t, const char *, size_t, int);
242 static    int                  roff_shift(ROFF_ARGS);
243 static    int                  roff_so(ROFF_ARGS);
244 static    int                  roff_tr(ROFF_ARGS);
245 static    int                  roff_Dd(ROFF_ARGS);
246 static    int                  roff_TE(ROFF_ARGS);
247 static    int                  roff_TS(ROFF_ARGS);
248 static    int                  roff_EQ(ROFF_ARGS);
249 static    int                  roff_EN(ROFF_ARGS);
250 static    int                  roff_T_(ROFF_ARGS);
251 static    int                  roff_unsupp(ROFF_ARGS);
252 static    int                  roff_userdef(ROFF_ARGS);
253 
254 /* --- constant data ------------------------------------------------------ */
255 
256 #define   ROFFNUM_SCALE       (1 << 0)  /* Honour scaling in roff_getnum(). */
257 #define   ROFFNUM_WHITE       (1 << 1)  /* Skip whitespace in roff_evalnum(). */
258 
259 const char *__roff_name[MAN_MAX + 1] = {
260           "br",               "ce",               "fi",               "ft",
261           "ll",               "mc",               "nf",
262           "po",               "rj",               "sp",
263           "ta",               "ti",               NULL,
264           "ab",               "ad",               "af",               "aln",
265           "als",              "am",               "am1",              "ami",
266           "ami1",             "as",               "as1",              "asciify",
267           "backtrace",        "bd",               "bleedat",          "blm",
268         "box",                "boxa",             "bp",               "BP",
269           "break",  "breakchar",        "brnl",             "brp",
270           "brpnl",  "c2",               "cc",
271           "cf",               "cflags", "ch",               "char",
272           "chop",             "class",  "close",  "CL",
273           "color",  "composite",        "continue",         "cp",
274           "cropat", "cs",               "cu",               "da",
275           "dch",              "Dd",               "de",               "de1",
276           "defcolor",         "dei",              "dei1",             "device",
277           "devicem",          "di",               "do",               "ds",
278           "ds1",              "dwh",              "dt",               "ec",
279           "ecr",              "ecs",              "el",               "em",
280           "EN",               "eo",               "EP",               "EQ",
281           "errprint",         "ev",               "evc",              "ex",
282           "fallback",         "fam",              "fc",               "fchar",
283           "fcolor", "fdeferlig",        "feature",          "fkern",
284           "fl",               "flig",             "fp",               "fps",
285           "fschar", "fspacewidth",      "fspecial",         "ftr",
286           "fzoom",  "gcolor", "hc",               "hcode",
287           "hidechar",         "hla",              "hlm",              "hpf",
288           "hpfa",             "hpfcode",          "hw",               "hy",
289           "hylang", "hylen",  "hym",              "hypp",
290           "hys",              "ie",               "if",               "ig",
291           "index",  "it",               "itc",              "IX",
292           "kern",             "kernafter",        "kernbefore",       "kernpair",
293           "lc",               "lc_ctype",         "lds",              "length",
294           "letadj", "lf",               "lg",               "lhang",
295           "linetabs",         "lnr",              "lnrf",             "lpfx",
296           "ls",               "lsm",              "lt",
297           "mediasize",        "minss",  "mk",               "mso",
298           "na",               "ne",               "nh",               "nhychar",
299           "nm",               "nn",               "nop",              "nr",
300           "nrf",              "nroff",  "ns",               "nx",
301           "open",             "opena",  "os",               "output",
302           "padj",             "papersize",        "pc",               "pev",
303           "pi",               "PI",               "pl",               "pm",
304           "pn",               "pnr",              "ps",
305           "psbb",             "pshape", "pso",              "ptr",
306           "pvs",              "rchar",  "rd",               "recursionlimit",
307           "return", "rfschar",          "rhang",
308           "rm",               "rn",               "rnn",              "rr",
309           "rs",               "rt",               "schar",  "sentchar",
310           "shc",              "shift",  "sizes",  "so",
311           "spacewidth",       "special",          "spreadwarn",       "ss",
312           "sty",              "substring",        "sv",               "sy",
313           "T&",               "tc",               "TE",
314           "TH",               "tkf",              "tl",
315           "tm",               "tm1",              "tmc",              "tr",
316           "track",  "transchar",        "trf",              "trimat",
317           "trin",             "trnt",             "troff",  "TS",
318           "uf",               "ul",               "unformat",         "unwatch",
319           "unwatchn",         "vpt",              "vs",               "warn",
320           "warnscale",        "watch",  "watchlength",      "watchn",
321           "wh",               "while",  "write",  "writec",
322           "writem", "xflag",  ".",                NULL,
323           NULL,               "text",
324           "Dd",               "Dt",               "Os",               "Sh",
325           "Ss",               "Pp",               "D1",               "Dl",
326           "Bd",               "Ed",               "Bl",               "El",
327           "It",               "Ad",               "An",               "Ap",
328           "Ar",               "Cd",               "Cm",               "Dv",
329           "Er",               "Ev",               "Ex",               "Fa",
330           "Fd",               "Fl",               "Fn",               "Ft",
331           "Ic",               "In",               "Li",               "Nd",
332           "Nm",               "Op",               "Ot",               "Pa",
333           "Rv",               "St",               "Va",               "Vt",
334           "Xr",               "%A",               "%B",               "%D",
335           "%I",               "%J",               "%N",               "%O",
336           "%P",               "%R",               "%T",               "%V",
337           "Ac",               "Ao",               "Aq",               "At",
338           "Bc",               "Bf",               "Bo",               "Bq",
339           "Bsx",              "Bx",               "Db",               "Dc",
340           "Do",               "Dq",               "Ec",               "Ef",
341           "Em",               "Eo",               "Fx",               "Ms",
342           "No",               "Ns",               "Nx",               "Ox",
343           "Pc",               "Pf",               "Po",               "Pq",
344           "Qc",               "Ql",               "Qo",               "Qq",
345           "Re",               "Rs",               "Sc",               "So",
346           "Sq",               "Sm",               "Sx",               "Sy",
347           "Tn",               "Ux",               "Xc",               "Xo",
348           "Fo",               "Fc",               "Oo",               "Oc",
349           "Bk",               "Ek",               "Bt",               "Hf",
350           "Fr",               "Ud",               "Lb",               "Lp",
351           "Lk",               "Mt",               "Brq",              "Bro",
352           "Brc",              "%C",               "Es",               "En",
353           "Dx",               "%Q",               "%U",               "Ta",
354           NULL,
355           "TH",               "SH",               "SS",               "TP",
356           "TQ",
357           "LP",               "PP",               "P",                "IP",
358           "HP",               "SM",               "SB",               "BI",
359           "IB",               "BR",               "RB",               "R",
360           "B",                "I",                "IR",               "RI",
361           "RE",               "RS",               "DT",               "UC",
362           "PD",               "AT",               "in",
363           "SY",               "YS",               "OP",
364           "EX",               "EE",               "UR",
365           "UE",               "MT",               "ME",               NULL
366 };
367 const     char *const *roff_name = __roff_name;
368 
369 static    struct roffmac       roffs[TOKEN_NONE] = {
370           { roff_noarg, NULL, NULL, 0 },  /* br */
371           { roff_onearg, NULL, NULL, 0 },  /* ce */
372           { roff_noarg, NULL, NULL, 0 },  /* fi */
373           { roff_onearg, NULL, NULL, 0 },  /* ft */
374           { roff_onearg, NULL, NULL, 0 },  /* ll */
375           { roff_onearg, NULL, NULL, 0 },  /* mc */
376           { roff_noarg, NULL, NULL, 0 },  /* nf */
377           { roff_onearg, NULL, NULL, 0 },  /* po */
378           { roff_onearg, NULL, NULL, 0 },  /* rj */
379           { roff_onearg, NULL, NULL, 0 },  /* sp */
380           { roff_manyarg, NULL, NULL, 0 },  /* ta */
381           { roff_onearg, NULL, NULL, 0 },  /* ti */
382           { NULL, NULL, NULL, 0 },  /* ROFF_MAX */
383           { roff_unsupp, NULL, NULL, 0 },  /* ab */
384           { roff_line_ignore, NULL, NULL, 0 },  /* ad */
385           { roff_line_ignore, NULL, NULL, 0 },  /* af */
386           { roff_unsupp, NULL, NULL, 0 },  /* aln */
387           { roff_als, NULL, NULL, 0 },  /* als */
388           { roff_block, roff_block_text, roff_block_sub, 0 },  /* am */
389           { roff_block, roff_block_text, roff_block_sub, 0 },  /* am1 */
390           { roff_block, roff_block_text, roff_block_sub, 0 },  /* ami */
391           { roff_block, roff_block_text, roff_block_sub, 0 },  /* ami1 */
392           { roff_ds, NULL, NULL, 0 },  /* as */
393           { roff_ds, NULL, NULL, 0 },  /* as1 */
394           { roff_unsupp, NULL, NULL, 0 },  /* asciify */
395           { roff_line_ignore, NULL, NULL, 0 },  /* backtrace */
396           { roff_line_ignore, NULL, NULL, 0 },  /* bd */
397           { roff_line_ignore, NULL, NULL, 0 },  /* bleedat */
398           { roff_unsupp, NULL, NULL, 0 },  /* blm */
399           { roff_unsupp, NULL, NULL, 0 },  /* box */
400           { roff_unsupp, NULL, NULL, 0 },  /* boxa */
401           { roff_line_ignore, NULL, NULL, 0 },  /* bp */
402           { roff_unsupp, NULL, NULL, 0 },  /* BP */
403           { roff_unsupp, NULL, NULL, 0 },  /* break */
404           { roff_line_ignore, NULL, NULL, 0 },  /* breakchar */
405           { roff_line_ignore, NULL, NULL, 0 },  /* brnl */
406           { roff_noarg, NULL, NULL, 0 },  /* brp */
407           { roff_line_ignore, NULL, NULL, 0 },  /* brpnl */
408           { roff_unsupp, NULL, NULL, 0 },  /* c2 */
409           { roff_cc, NULL, NULL, 0 },  /* cc */
410           { roff_insec, NULL, NULL, 0 },  /* cf */
411           { roff_line_ignore, NULL, NULL, 0 },  /* cflags */
412           { roff_line_ignore, NULL, NULL, 0 },  /* ch */
413           { roff_char, NULL, NULL, 0 },  /* char */
414           { roff_unsupp, NULL, NULL, 0 },  /* chop */
415           { roff_line_ignore, NULL, NULL, 0 },  /* class */
416           { roff_insec, NULL, NULL, 0 },  /* close */
417           { roff_unsupp, NULL, NULL, 0 },  /* CL */
418           { roff_line_ignore, NULL, NULL, 0 },  /* color */
419           { roff_unsupp, NULL, NULL, 0 },  /* composite */
420           { roff_unsupp, NULL, NULL, 0 },  /* continue */
421           { roff_line_ignore, NULL, NULL, 0 },  /* cp */
422           { roff_line_ignore, NULL, NULL, 0 },  /* cropat */
423           { roff_line_ignore, NULL, NULL, 0 },  /* cs */
424           { roff_line_ignore, NULL, NULL, 0 },  /* cu */
425           { roff_unsupp, NULL, NULL, 0 },  /* da */
426           { roff_unsupp, NULL, NULL, 0 },  /* dch */
427           { roff_Dd, NULL, NULL, 0 },  /* Dd */
428           { roff_block, roff_block_text, roff_block_sub, 0 },  /* de */
429           { roff_block, roff_block_text, roff_block_sub, 0 },  /* de1 */
430           { roff_line_ignore, NULL, NULL, 0 },  /* defcolor */
431           { roff_block, roff_block_text, roff_block_sub, 0 },  /* dei */
432           { roff_block, roff_block_text, roff_block_sub, 0 },  /* dei1 */
433           { roff_unsupp, NULL, NULL, 0 },  /* device */
434           { roff_unsupp, NULL, NULL, 0 },  /* devicem */
435           { roff_unsupp, NULL, NULL, 0 },  /* di */
436           { roff_unsupp, NULL, NULL, 0 },  /* do */
437           { roff_ds, NULL, NULL, 0 },  /* ds */
438           { roff_ds, NULL, NULL, 0 },  /* ds1 */
439           { roff_unsupp, NULL, NULL, 0 },  /* dwh */
440           { roff_unsupp, NULL, NULL, 0 },  /* dt */
441           { roff_ec, NULL, NULL, 0 },  /* ec */
442           { roff_unsupp, NULL, NULL, 0 },  /* ecr */
443           { roff_unsupp, NULL, NULL, 0 },  /* ecs */
444           { roff_cond, roff_cond_text, roff_cond_sub, ROFFMAC_STRUCT },  /* el */
445           { roff_unsupp, NULL, NULL, 0 },  /* em */
446           { roff_EN, NULL, NULL, 0 },  /* EN */
447           { roff_eo, NULL, NULL, 0 },  /* eo */
448           { roff_unsupp, NULL, NULL, 0 },  /* EP */
449           { roff_EQ, NULL, NULL, 0 },  /* EQ */
450           { roff_line_ignore, NULL, NULL, 0 },  /* errprint */
451           { roff_unsupp, NULL, NULL, 0 },  /* ev */
452           { roff_unsupp, NULL, NULL, 0 },  /* evc */
453           { roff_unsupp, NULL, NULL, 0 },  /* ex */
454           { roff_line_ignore, NULL, NULL, 0 },  /* fallback */
455           { roff_line_ignore, NULL, NULL, 0 },  /* fam */
456           { roff_unsupp, NULL, NULL, 0 },  /* fc */
457           { roff_unsupp, NULL, NULL, 0 },  /* fchar */
458           { roff_line_ignore, NULL, NULL, 0 },  /* fcolor */
459           { roff_line_ignore, NULL, NULL, 0 },  /* fdeferlig */
460           { roff_line_ignore, NULL, NULL, 0 },  /* feature */
461           { roff_line_ignore, NULL, NULL, 0 },  /* fkern */
462           { roff_line_ignore, NULL, NULL, 0 },  /* fl */
463           { roff_line_ignore, NULL, NULL, 0 },  /* flig */
464           { roff_line_ignore, NULL, NULL, 0 },  /* fp */
465           { roff_line_ignore, NULL, NULL, 0 },  /* fps */
466           { roff_unsupp, NULL, NULL, 0 },  /* fschar */
467           { roff_line_ignore, NULL, NULL, 0 },  /* fspacewidth */
468           { roff_line_ignore, NULL, NULL, 0 },  /* fspecial */
469           { roff_line_ignore, NULL, NULL, 0 },  /* ftr */
470           { roff_line_ignore, NULL, NULL, 0 },  /* fzoom */
471           { roff_line_ignore, NULL, NULL, 0 },  /* gcolor */
472           { roff_line_ignore, NULL, NULL, 0 },  /* hc */
473           { roff_line_ignore, NULL, NULL, 0 },  /* hcode */
474           { roff_line_ignore, NULL, NULL, 0 },  /* hidechar */
475           { roff_line_ignore, NULL, NULL, 0 },  /* hla */
476           { roff_line_ignore, NULL, NULL, 0 },  /* hlm */
477           { roff_line_ignore, NULL, NULL, 0 },  /* hpf */
478           { roff_line_ignore, NULL, NULL, 0 },  /* hpfa */
479           { roff_line_ignore, NULL, NULL, 0 },  /* hpfcode */
480           { roff_line_ignore, NULL, NULL, 0 },  /* hw */
481           { roff_line_ignore, NULL, NULL, 0 },  /* hy */
482           { roff_line_ignore, NULL, NULL, 0 },  /* hylang */
483           { roff_line_ignore, NULL, NULL, 0 },  /* hylen */
484           { roff_line_ignore, NULL, NULL, 0 },  /* hym */
485           { roff_line_ignore, NULL, NULL, 0 },  /* hypp */
486           { roff_line_ignore, NULL, NULL, 0 },  /* hys */
487           { roff_cond, roff_cond_text, roff_cond_sub, ROFFMAC_STRUCT },  /* ie */
488           { roff_cond, roff_cond_text, roff_cond_sub, ROFFMAC_STRUCT },  /* if */
489           { roff_block, roff_block_text, roff_block_sub, 0 },  /* ig */
490           { roff_unsupp, NULL, NULL, 0 },  /* index */
491           { roff_it, NULL, NULL, 0 },  /* it */
492           { roff_unsupp, NULL, NULL, 0 },  /* itc */
493           { roff_line_ignore, NULL, NULL, 0 },  /* IX */
494           { roff_line_ignore, NULL, NULL, 0 },  /* kern */
495           { roff_line_ignore, NULL, NULL, 0 },  /* kernafter */
496           { roff_line_ignore, NULL, NULL, 0 },  /* kernbefore */
497           { roff_line_ignore, NULL, NULL, 0 },  /* kernpair */
498           { roff_unsupp, NULL, NULL, 0 },  /* lc */
499           { roff_unsupp, NULL, NULL, 0 },  /* lc_ctype */
500           { roff_unsupp, NULL, NULL, 0 },  /* lds */
501           { roff_unsupp, NULL, NULL, 0 },  /* length */
502           { roff_line_ignore, NULL, NULL, 0 },  /* letadj */
503           { roff_insec, NULL, NULL, 0 },  /* lf */
504           { roff_line_ignore, NULL, NULL, 0 },  /* lg */
505           { roff_line_ignore, NULL, NULL, 0 },  /* lhang */
506           { roff_unsupp, NULL, NULL, 0 },  /* linetabs */
507           { roff_unsupp, NULL, NULL, 0 },  /* lnr */
508           { roff_unsupp, NULL, NULL, 0 },  /* lnrf */
509           { roff_unsupp, NULL, NULL, 0 },  /* lpfx */
510           { roff_line_ignore, NULL, NULL, 0 },  /* ls */
511           { roff_unsupp, NULL, NULL, 0 },  /* lsm */
512           { roff_line_ignore, NULL, NULL, 0 },  /* lt */
513           { roff_line_ignore, NULL, NULL, 0 },  /* mediasize */
514           { roff_line_ignore, NULL, NULL, 0 },  /* minss */
515           { roff_line_ignore, NULL, NULL, 0 },  /* mk */
516           { roff_insec, NULL, NULL, 0 },  /* mso */
517           { roff_line_ignore, NULL, NULL, 0 },  /* na */
518           { roff_line_ignore, NULL, NULL, 0 },  /* ne */
519           { roff_line_ignore, NULL, NULL, 0 },  /* nh */
520           { roff_line_ignore, NULL, NULL, 0 },  /* nhychar */
521           { roff_unsupp, NULL, NULL, 0 },  /* nm */
522           { roff_unsupp, NULL, NULL, 0 },  /* nn */
523           { roff_nop, NULL, NULL, 0 },  /* nop */
524           { roff_nr, NULL, NULL, 0 },  /* nr */
525           { roff_unsupp, NULL, NULL, 0 },  /* nrf */
526           { roff_line_ignore, NULL, NULL, 0 },  /* nroff */
527           { roff_line_ignore, NULL, NULL, 0 },  /* ns */
528           { roff_insec, NULL, NULL, 0 },  /* nx */
529           { roff_insec, NULL, NULL, 0 },  /* open */
530           { roff_insec, NULL, NULL, 0 },  /* opena */
531           { roff_line_ignore, NULL, NULL, 0 },  /* os */
532           { roff_unsupp, NULL, NULL, 0 },  /* output */
533           { roff_line_ignore, NULL, NULL, 0 },  /* padj */
534           { roff_line_ignore, NULL, NULL, 0 },  /* papersize */
535           { roff_line_ignore, NULL, NULL, 0 },  /* pc */
536           { roff_line_ignore, NULL, NULL, 0 },  /* pev */
537           { roff_insec, NULL, NULL, 0 },  /* pi */
538           { roff_unsupp, NULL, NULL, 0 },  /* PI */
539           { roff_line_ignore, NULL, NULL, 0 },  /* pl */
540           { roff_line_ignore, NULL, NULL, 0 },  /* pm */
541           { roff_line_ignore, NULL, NULL, 0 },  /* pn */
542           { roff_line_ignore, NULL, NULL, 0 },  /* pnr */
543           { roff_line_ignore, NULL, NULL, 0 },  /* ps */
544           { roff_unsupp, NULL, NULL, 0 },  /* psbb */
545           { roff_unsupp, NULL, NULL, 0 },  /* pshape */
546           { roff_insec, NULL, NULL, 0 },  /* pso */
547           { roff_line_ignore, NULL, NULL, 0 },  /* ptr */
548           { roff_line_ignore, NULL, NULL, 0 },  /* pvs */
549           { roff_unsupp, NULL, NULL, 0 },  /* rchar */
550           { roff_line_ignore, NULL, NULL, 0 },  /* rd */
551           { roff_line_ignore, NULL, NULL, 0 },  /* recursionlimit */
552           { roff_return, NULL, NULL, 0 },  /* return */
553           { roff_unsupp, NULL, NULL, 0 },  /* rfschar */
554           { roff_line_ignore, NULL, NULL, 0 },  /* rhang */
555           { roff_rm, NULL, NULL, 0 },  /* rm */
556           { roff_rn, NULL, NULL, 0 },  /* rn */
557           { roff_unsupp, NULL, NULL, 0 },  /* rnn */
558           { roff_rr, NULL, NULL, 0 },  /* rr */
559           { roff_line_ignore, NULL, NULL, 0 },  /* rs */
560           { roff_line_ignore, NULL, NULL, 0 },  /* rt */
561           { roff_unsupp, NULL, NULL, 0 },  /* schar */
562           { roff_line_ignore, NULL, NULL, 0 },  /* sentchar */
563           { roff_line_ignore, NULL, NULL, 0 },  /* shc */
564           { roff_shift, NULL, NULL, 0 },  /* shift */
565           { roff_line_ignore, NULL, NULL, 0 },  /* sizes */
566           { roff_so, NULL, NULL, 0 },  /* so */
567           { roff_line_ignore, NULL, NULL, 0 },  /* spacewidth */
568           { roff_line_ignore, NULL, NULL, 0 },  /* special */
569           { roff_line_ignore, NULL, NULL, 0 },  /* spreadwarn */
570           { roff_line_ignore, NULL, NULL, 0 },  /* ss */
571           { roff_line_ignore, NULL, NULL, 0 },  /* sty */
572           { roff_unsupp, NULL, NULL, 0 },  /* substring */
573           { roff_line_ignore, NULL, NULL, 0 },  /* sv */
574           { roff_insec, NULL, NULL, 0 },  /* sy */
575           { roff_T_, NULL, NULL, 0 },  /* T& */
576           { roff_unsupp, NULL, NULL, 0 },  /* tc */
577           { roff_TE, NULL, NULL, 0 },  /* TE */
578           { roff_Dd, NULL, NULL, 0 },  /* TH */
579           { roff_line_ignore, NULL, NULL, 0 },  /* tkf */
580           { roff_unsupp, NULL, NULL, 0 },  /* tl */
581           { roff_line_ignore, NULL, NULL, 0 },  /* tm */
582           { roff_line_ignore, NULL, NULL, 0 },  /* tm1 */
583           { roff_line_ignore, NULL, NULL, 0 },  /* tmc */
584           { roff_tr, NULL, NULL, 0 },  /* tr */
585           { roff_line_ignore, NULL, NULL, 0 },  /* track */
586           { roff_line_ignore, NULL, NULL, 0 },  /* transchar */
587           { roff_insec, NULL, NULL, 0 },  /* trf */
588           { roff_line_ignore, NULL, NULL, 0 },  /* trimat */
589           { roff_unsupp, NULL, NULL, 0 },  /* trin */
590           { roff_unsupp, NULL, NULL, 0 },  /* trnt */
591           { roff_line_ignore, NULL, NULL, 0 },  /* troff */
592           { roff_TS, NULL, NULL, 0 },  /* TS */
593           { roff_line_ignore, NULL, NULL, 0 },  /* uf */
594           { roff_line_ignore, NULL, NULL, 0 },  /* ul */
595           { roff_unsupp, NULL, NULL, 0 },  /* unformat */
596           { roff_line_ignore, NULL, NULL, 0 },  /* unwatch */
597           { roff_line_ignore, NULL, NULL, 0 },  /* unwatchn */
598           { roff_line_ignore, NULL, NULL, 0 },  /* vpt */
599           { roff_line_ignore, NULL, NULL, 0 },  /* vs */
600           { roff_line_ignore, NULL, NULL, 0 },  /* warn */
601           { roff_line_ignore, NULL, NULL, 0 },  /* warnscale */
602           { roff_line_ignore, NULL, NULL, 0 },  /* watch */
603           { roff_line_ignore, NULL, NULL, 0 },  /* watchlength */
604           { roff_line_ignore, NULL, NULL, 0 },  /* watchn */
605           { roff_unsupp, NULL, NULL, 0 },  /* wh */
606           { roff_cond, roff_cond_text, roff_cond_sub, ROFFMAC_STRUCT }, /*while*/
607           { roff_insec, NULL, NULL, 0 },  /* write */
608           { roff_insec, NULL, NULL, 0 },  /* writec */
609           { roff_insec, NULL, NULL, 0 },  /* writem */
610           { roff_line_ignore, NULL, NULL, 0 },  /* xflag */
611           { roff_cblock, NULL, NULL, 0 },  /* . */
612           { roff_renamed, NULL, NULL, 0 },
613           { roff_userdef, NULL, NULL, 0 }
614 };
615 
616 /* Array of injected predefined strings. */
617 #define   PREDEFS_MAX          38
618 static    const struct predef predefs[PREDEFS_MAX] = {
619 #include "predefs.in"
620 };
621 
622 static    int        roffce_lines;      /* number of input lines to center */
623 static    struct roff_node *roffce_node;  /* active request */
624 static    int        roffit_lines;  /* number of lines to delay */
625 static    char      *roffit_macro;  /* nil-terminated macro line */
626 
627 
628 /* --- request table ------------------------------------------------------ */
629 
630 struct ohash *
roffhash_alloc(enum roff_tok mintok,enum roff_tok maxtok)631 roffhash_alloc(enum roff_tok mintok, enum roff_tok maxtok)
632 {
633           struct ohash        *htab;
634           struct roffreq      *req;
635           enum roff_tok        tok;
636           size_t               sz;
637           unsigned int         slot;
638 
639           htab = mandoc_malloc(sizeof(*htab));
640           mandoc_ohash_init(htab, 8, offsetof(struct roffreq, name));
641 
642           for (tok = mintok; tok < maxtok; tok++) {
643                     if (roff_name[tok] == NULL)
644                               continue;
645                     sz = strlen(roff_name[tok]);
646                     req = mandoc_malloc(sizeof(*req) + sz + 1);
647                     req->tok = tok;
648                     memcpy(req->name, roff_name[tok], sz + 1);
649                     slot = ohash_qlookup(htab, req->name);
650                     ohash_insert(htab, slot, req);
651           }
652           return htab;
653 }
654 
655 void
roffhash_free(struct ohash * htab)656 roffhash_free(struct ohash *htab)
657 {
658           struct roffreq      *req;
659           unsigned int         slot;
660 
661           if (htab == NULL)
662                     return;
663           for (req = ohash_first(htab, &slot); req != NULL;
664                req = ohash_next(htab, &slot))
665                     free(req);
666           ohash_delete(htab);
667           free(htab);
668 }
669 
670 enum roff_tok
roffhash_find(struct ohash * htab,const char * name,size_t sz)671 roffhash_find(struct ohash *htab, const char *name, size_t sz)
672 {
673           struct roffreq      *req;
674           const char          *end;
675 
676           if (sz) {
677                     end = name + sz;
678                     req = ohash_find(htab, ohash_qlookupi(htab, name, &end));
679           } else
680                     req = ohash_find(htab, ohash_qlookup(htab, name));
681           return req == NULL ? TOKEN_NONE : req->tok;
682 }
683 
684 /* --- stack of request blocks -------------------------------------------- */
685 
686 /*
687  * Pop the current node off of the stack of roff instructions currently
688  * pending.
689  */
690 static int
roffnode_pop(struct roff * r)691 roffnode_pop(struct roff *r)
692 {
693           struct roffnode     *p;
694           int                  inloop;
695 
696           p = r->last;
697           inloop = p->tok == ROFF_while;
698           r->last = p->parent;
699           free(p->name);
700           free(p->end);
701           free(p);
702           return inloop;
703 }
704 
705 /*
706  * Push a roff node onto the instruction stack.  This must later be
707  * removed with roffnode_pop().
708  */
709 static void
roffnode_push(struct roff * r,enum roff_tok tok,const char * name,int line,int col)710 roffnode_push(struct roff *r, enum roff_tok tok, const char *name,
711                     int line, int col)
712 {
713           struct roffnode     *p;
714 
715           p = mandoc_calloc(1, sizeof(struct roffnode));
716           p->tok = tok;
717           if (name)
718                     p->name = mandoc_strdup(name);
719           p->parent = r->last;
720           p->line = line;
721           p->col = col;
722           p->rule = p->parent ? p->parent->rule : 0;
723 
724           r->last = p;
725 }
726 
727 /* --- roff parser state data management ---------------------------------- */
728 
729 static void
roff_free1(struct roff * r)730 roff_free1(struct roff *r)
731 {
732           int                  i;
733 
734           tbl_free(r->first_tbl);
735           r->first_tbl = r->last_tbl = r->tbl = NULL;
736 
737           eqn_free(r->last_eqn);
738           r->last_eqn = r->eqn = NULL;
739 
740           while (r->mstackpos >= 0)
741                     roff_userret(r);
742 
743           while (r->last)
744                     roffnode_pop(r);
745 
746           free (r->rstack);
747           r->rstack = NULL;
748           r->rstacksz = 0;
749           r->rstackpos = -1;
750 
751           roff_freereg(r->regtab);
752           r->regtab = NULL;
753 
754           roff_freestr(r->strtab);
755           roff_freestr(r->rentab);
756           roff_freestr(r->xmbtab);
757           r->strtab = r->rentab = r->xmbtab = NULL;
758 
759           if (r->xtab)
760                     for (i = 0; i < 128; i++)
761                               free(r->xtab[i].p);
762           free(r->xtab);
763           r->xtab = NULL;
764 }
765 
766 void
roff_reset(struct roff * r)767 roff_reset(struct roff *r)
768 {
769           roff_free1(r);
770           r->format = r->options & (MPARSE_MDOC | MPARSE_MAN);
771           r->control = '\0';
772           r->escape = '\\';
773           roffce_lines = 0;
774           roffce_node = NULL;
775           roffit_lines = 0;
776           roffit_macro = NULL;
777 }
778 
779 void
roff_free(struct roff * r)780 roff_free(struct roff *r)
781 {
782           int                  i;
783 
784           roff_free1(r);
785           for (i = 0; i < r->mstacksz; i++)
786                     free(r->mstack[i].argv);
787           free(r->mstack);
788           roffhash_free(r->reqtab);
789           free(r);
790 }
791 
792 struct roff *
roff_alloc(int options)793 roff_alloc(int options)
794 {
795           struct roff         *r;
796 
797           r = mandoc_calloc(1, sizeof(struct roff));
798           r->reqtab = roffhash_alloc(0, ROFF_RENAMED);
799           r->options = options;
800           r->format = options & (MPARSE_MDOC | MPARSE_MAN);
801           r->mstackpos = -1;
802           r->rstackpos = -1;
803           r->escape = '\\';
804           return r;
805 }
806 
807 /* --- syntax tree state data management ---------------------------------- */
808 
809 static void
roff_man_free1(struct roff_man * man)810 roff_man_free1(struct roff_man *man)
811 {
812           if (man->meta.first != NULL)
813                     roff_node_delete(man, man->meta.first);
814           free(man->meta.msec);
815           free(man->meta.vol);
816           free(man->meta.os);
817           free(man->meta.arch);
818           free(man->meta.title);
819           free(man->meta.name);
820           free(man->meta.date);
821           free(man->meta.sodest);
822 }
823 
824 void
roff_state_reset(struct roff_man * man)825 roff_state_reset(struct roff_man *man)
826 {
827           man->last = man->meta.first;
828           man->last_es = NULL;
829           man->flags = 0;
830           man->lastsec = man->lastnamed = SEC_NONE;
831           man->next = ROFF_NEXT_CHILD;
832           roff_setreg(man->roff, "nS", 0, '=');
833 }
834 
835 static void
roff_man_alloc1(struct roff_man * man)836 roff_man_alloc1(struct roff_man *man)
837 {
838           memset(&man->meta, 0, sizeof(man->meta));
839           man->meta.first = mandoc_calloc(1, sizeof(*man->meta.first));
840           man->meta.first->type = ROFFT_ROOT;
841           man->meta.macroset = MACROSET_NONE;
842           roff_state_reset(man);
843 }
844 
845 void
roff_man_reset(struct roff_man * man)846 roff_man_reset(struct roff_man *man)
847 {
848           roff_man_free1(man);
849           roff_man_alloc1(man);
850 }
851 
852 void
roff_man_free(struct roff_man * man)853 roff_man_free(struct roff_man *man)
854 {
855           roff_man_free1(man);
856           free(man);
857 }
858 
859 struct roff_man *
roff_man_alloc(struct roff * roff,const char * os_s,int quick)860 roff_man_alloc(struct roff *roff, const char *os_s, int quick)
861 {
862           struct roff_man *man;
863 
864           man = mandoc_calloc(1, sizeof(*man));
865           man->roff = roff;
866           man->os_s = os_s;
867           man->quick = quick;
868           roff_man_alloc1(man);
869           roff->man = man;
870           return man;
871 }
872 
873 /* --- syntax tree handling ----------------------------------------------- */
874 
875 struct roff_node *
roff_node_alloc(struct roff_man * man,int line,int pos,enum roff_type type,int tok)876 roff_node_alloc(struct roff_man *man, int line, int pos,
877           enum roff_type type, int tok)
878 {
879           struct roff_node    *n;
880 
881           n = mandoc_calloc(1, sizeof(*n));
882           n->line = line;
883           n->pos = pos;
884           n->tok = tok;
885           n->type = type;
886           n->sec = man->lastsec;
887 
888           if (man->flags & MDOC_SYNOPSIS)
889                     n->flags |= NODE_SYNPRETTY;
890           else
891                     n->flags &= ~NODE_SYNPRETTY;
892           if ((man->flags & (ROFF_NOFILL | ROFF_NONOFILL)) == ROFF_NOFILL)
893                     n->flags |= NODE_NOFILL;
894           else
895                     n->flags &= ~NODE_NOFILL;
896           if (man->flags & MDOC_NEWLINE)
897                     n->flags |= NODE_LINE;
898           man->flags &= ~MDOC_NEWLINE;
899 
900           return n;
901 }
902 
903 void
roff_node_append(struct roff_man * man,struct roff_node * n)904 roff_node_append(struct roff_man *man, struct roff_node *n)
905 {
906 
907           switch (man->next) {
908           case ROFF_NEXT_SIBLING:
909                     if (man->last->next != NULL) {
910                               n->next = man->last->next;
911                               man->last->next->prev = n;
912                     } else
913                               man->last->parent->last = n;
914                     man->last->next = n;
915                     n->prev = man->last;
916                     n->parent = man->last->parent;
917                     break;
918           case ROFF_NEXT_CHILD:
919                     if (man->last->child != NULL) {
920                               n->next = man->last->child;
921                               man->last->child->prev = n;
922                     } else
923                               man->last->last = n;
924                     man->last->child = n;
925                     n->parent = man->last;
926                     break;
927           default:
928                     abort();
929           }
930           man->last = n;
931 
932           switch (n->type) {
933           case ROFFT_HEAD:
934                     n->parent->head = n;
935                     break;
936           case ROFFT_BODY:
937                     if (n->end != ENDBODY_NOT)
938                               return;
939                     n->parent->body = n;
940                     break;
941           case ROFFT_TAIL:
942                     n->parent->tail = n;
943                     break;
944           default:
945                     return;
946           }
947 
948           /*
949            * Copy over the normalised-data pointer of our parent.  Not
950            * everybody has one, but copying a null pointer is fine.
951            */
952 
953           n->norm = n->parent->norm;
954           assert(n->parent->type == ROFFT_BLOCK);
955 }
956 
957 void
roff_word_alloc(struct roff_man * man,int line,int pos,const char * word)958 roff_word_alloc(struct roff_man *man, int line, int pos, const char *word)
959 {
960           struct roff_node    *n;
961 
962           n = roff_node_alloc(man, line, pos, ROFFT_TEXT, TOKEN_NONE);
963           n->string = roff_strdup(man->roff, word);
964           roff_node_append(man, n);
965           n->flags |= NODE_VALID | NODE_ENDED;
966           man->next = ROFF_NEXT_SIBLING;
967 }
968 
969 void
roff_word_append(struct roff_man * man,const char * word)970 roff_word_append(struct roff_man *man, const char *word)
971 {
972           struct roff_node    *n;
973           char                          *addstr, *newstr;
974 
975           n = man->last;
976           addstr = roff_strdup(man->roff, word);
977           mandoc_asprintf(&newstr, "%s %s", n->string, addstr);
978           free(addstr);
979           free(n->string);
980           n->string = newstr;
981           man->next = ROFF_NEXT_SIBLING;
982 }
983 
984 void
roff_elem_alloc(struct roff_man * man,int line,int pos,int tok)985 roff_elem_alloc(struct roff_man *man, int line, int pos, int tok)
986 {
987           struct roff_node    *n;
988 
989           n = roff_node_alloc(man, line, pos, ROFFT_ELEM, tok);
990           roff_node_append(man, n);
991           man->next = ROFF_NEXT_CHILD;
992 }
993 
994 struct roff_node *
roff_block_alloc(struct roff_man * man,int line,int pos,int tok)995 roff_block_alloc(struct roff_man *man, int line, int pos, int tok)
996 {
997           struct roff_node    *n;
998 
999           n = roff_node_alloc(man, line, pos, ROFFT_BLOCK, tok);
1000           roff_node_append(man, n);
1001           man->next = ROFF_NEXT_CHILD;
1002           return n;
1003 }
1004 
1005 struct roff_node *
roff_head_alloc(struct roff_man * man,int line,int pos,int tok)1006 roff_head_alloc(struct roff_man *man, int line, int pos, int tok)
1007 {
1008           struct roff_node    *n;
1009 
1010           n = roff_node_alloc(man, line, pos, ROFFT_HEAD, tok);
1011           roff_node_append(man, n);
1012           man->next = ROFF_NEXT_CHILD;
1013           return n;
1014 }
1015 
1016 struct roff_node *
roff_body_alloc(struct roff_man * man,int line,int pos,int tok)1017 roff_body_alloc(struct roff_man *man, int line, int pos, int tok)
1018 {
1019           struct roff_node    *n;
1020 
1021           n = roff_node_alloc(man, line, pos, ROFFT_BODY, tok);
1022           roff_node_append(man, n);
1023           man->next = ROFF_NEXT_CHILD;
1024           return n;
1025 }
1026 
1027 static void
roff_addtbl(struct roff_man * man,int line,struct tbl_node * tbl)1028 roff_addtbl(struct roff_man *man, int line, struct tbl_node *tbl)
1029 {
1030           struct roff_node    *n;
1031           struct tbl_span               *span;
1032 
1033           if (man->meta.macroset == MACROSET_MAN)
1034                     man_breakscope(man, ROFF_TS);
1035           while ((span = tbl_span(tbl)) != NULL) {
1036                     n = roff_node_alloc(man, line, 0, ROFFT_TBL, TOKEN_NONE);
1037                     n->span = span;
1038                     roff_node_append(man, n);
1039                     n->flags |= NODE_VALID | NODE_ENDED;
1040                     man->next = ROFF_NEXT_SIBLING;
1041           }
1042 }
1043 
1044 void
roff_node_unlink(struct roff_man * man,struct roff_node * n)1045 roff_node_unlink(struct roff_man *man, struct roff_node *n)
1046 {
1047 
1048           /* Adjust siblings. */
1049 
1050           if (n->prev)
1051                     n->prev->next = n->next;
1052           if (n->next)
1053                     n->next->prev = n->prev;
1054 
1055           /* Adjust parent. */
1056 
1057           if (n->parent != NULL) {
1058                     if (n->parent->child == n)
1059                               n->parent->child = n->next;
1060                     if (n->parent->last == n)
1061                               n->parent->last = n->prev;
1062           }
1063 
1064           /* Adjust parse point. */
1065 
1066           if (man == NULL)
1067                     return;
1068           if (man->last == n) {
1069                     if (n->prev == NULL) {
1070                               man->last = n->parent;
1071                               man->next = ROFF_NEXT_CHILD;
1072                     } else {
1073                               man->last = n->prev;
1074                               man->next = ROFF_NEXT_SIBLING;
1075                     }
1076           }
1077           if (man->meta.first == n)
1078                     man->meta.first = NULL;
1079 }
1080 
1081 void
roff_node_relink(struct roff_man * man,struct roff_node * n)1082 roff_node_relink(struct roff_man *man, struct roff_node *n)
1083 {
1084           roff_node_unlink(man, n);
1085           n->prev = n->next = NULL;
1086           roff_node_append(man, n);
1087 }
1088 
1089 void
roff_node_free(struct roff_node * n)1090 roff_node_free(struct roff_node *n)
1091 {
1092 
1093           if (n->args != NULL)
1094                     mdoc_argv_free(n->args);
1095           if (n->type == ROFFT_BLOCK || n->type == ROFFT_ELEM)
1096                     free(n->norm);
1097           eqn_box_free(n->eqn);
1098           free(n->string);
1099           free(n);
1100 }
1101 
1102 void
roff_node_delete(struct roff_man * man,struct roff_node * n)1103 roff_node_delete(struct roff_man *man, struct roff_node *n)
1104 {
1105 
1106           while (n->child != NULL)
1107                     roff_node_delete(man, n->child);
1108           roff_node_unlink(man, n);
1109           roff_node_free(n);
1110 }
1111 
1112 void
deroff(char ** dest,const struct roff_node * n)1113 deroff(char **dest, const struct roff_node *n)
1114 {
1115           char      *cp;
1116           size_t     sz;
1117 
1118           if (n->type != ROFFT_TEXT) {
1119                     for (n = n->child; n != NULL; n = n->next)
1120                               deroff(dest, n);
1121                     return;
1122           }
1123 
1124           /* Skip leading whitespace. */
1125 
1126           for (cp = n->string; *cp != '\0'; cp++) {
1127                     if (cp[0] == '\\' && cp[1] != '\0' &&
1128                         strchr(" %&0^|~", cp[1]) != NULL)
1129                               cp++;
1130                     else if ( ! isspace((unsigned char)*cp))
1131                               break;
1132           }
1133 
1134           /* Skip trailing backslash. */
1135 
1136           sz = strlen(cp);
1137           if (sz > 0 && cp[sz - 1] == '\\')
1138                     sz--;
1139 
1140           /* Skip trailing whitespace. */
1141 
1142           for (; sz; sz--)
1143                     if ( ! isspace((unsigned char)cp[sz-1]))
1144                               break;
1145 
1146           /* Skip empty strings. */
1147 
1148           if (sz == 0)
1149                     return;
1150 
1151           if (*dest == NULL) {
1152                     *dest = mandoc_strndup(cp, sz);
1153                     return;
1154           }
1155 
1156           mandoc_asprintf(&cp, "%s %*s", *dest, (int)sz, cp);
1157           free(*dest);
1158           *dest = cp;
1159 }
1160 
1161 /* --- main functions of the roff parser ---------------------------------- */
1162 
1163 /*
1164  * In the current line, expand escape sequences that produce parsable
1165  * input text.  Also check the syntax of the remaining escape sequences,
1166  * which typically produce output glyphs or change formatter state.
1167  */
1168 static int
roff_expand(struct roff * r,struct buf * buf,int ln,int pos,char newesc)1169 roff_expand(struct roff *r, struct buf *buf, int ln, int pos, char newesc)
1170 {
1171           struct mctx         *ctx;     /* current macro call context */
1172           char                 ubuf[24]; /* buffer to print the number */
1173           struct roff_node *n;          /* used for header comments */
1174           const char          *start;   /* start of the string to process */
1175           char                *stesc;   /* start of an escape sequence ('\\') */
1176           const char          *esct;    /* type of esccape sequence */
1177           char                *ep;      /* end of comment string */
1178           const char          *stnam;   /* start of the name, after "[(*" */
1179           const char          *cp;      /* end of the name, e.g. before ']' */
1180           const char          *res;     /* the string to be substituted */
1181           char                *nbuf;    /* new buffer to copy buf->buf to */
1182           size_t               maxl;  /* expected length of the escape name */
1183           size_t               naml;    /* actual length of the escape name */
1184           size_t               asz;     /* length of the replacement */
1185           size_t               rsz;     /* length of the rest of the string */
1186           int                  inaml;   /* length returned from mandoc_escape() */
1187           int                  expand_count;      /* to avoid infinite loops */
1188           int                  npos;    /* position in numeric expression */
1189           int                  arg_complete; /* argument not interrupted by eol */
1190           int                  quote_args; /* true for \\$@, false for \\$* */
1191           int                  done;    /* no more input available */
1192           int                  deftype; /* type of definition to paste */
1193           int                  rcsid;   /* kind of RCS id seen */
1194           enum mandocerr       err;     /* for escape sequence problems */
1195           char                 sign = '\0';       /* increment number register */
1196           char                 term;    /* character terminating the escape */
1197 
1198           /* Search forward for comments. */
1199 
1200           done = 0;
1201           start = buf->buf + pos;
1202           for (stesc = buf->buf + pos; *stesc != '\0'; stesc++) {
1203                     if (stesc[0] != newesc || stesc[1] == '\0')
1204                               continue;
1205                     stesc++;
1206                     if (*stesc != '"' && *stesc != '#')
1207                               continue;
1208 
1209                     /* Comment found, look for RCS id. */
1210 
1211                     rcsid = 0;
1212                     if ((cp = strstr(stesc, "$" "OpenBSD")) != NULL) {
1213                               rcsid = 1 << MANDOC_OS_OPENBSD;
1214                               cp += 8;
1215                     } else if ((cp = strstr(stesc, "$" "NetBSD")) != NULL) {
1216                               rcsid = 1 << MANDOC_OS_NETBSD;
1217                               cp += 7;
1218                     }
1219                     if (cp != NULL &&
1220                         isalnum((unsigned char)*cp) == 0 &&
1221                         strchr(cp, '$') != NULL) {
1222                               if (r->man->meta.rcsids & rcsid)
1223                                         mandoc_msg(MANDOCERR_RCS_REP, ln,
1224                                             (int)(stesc - buf->buf) + 1,
1225                                             "%s", stesc + 1);
1226                               r->man->meta.rcsids |= rcsid;
1227                     }
1228 
1229                     /* Handle trailing whitespace. */
1230 
1231                     ep = strchr(stesc--, '\0') - 1;
1232                     if (*ep == '\n') {
1233                               done = 1;
1234                               ep--;
1235                     }
1236                     if (*ep == ' ' || *ep == '\t')
1237                               mandoc_msg(MANDOCERR_SPACE_EOL,
1238                                   ln, (int)(ep - buf->buf), NULL);
1239 
1240                     /*
1241                      * Save comments preceding the title macro
1242                      * in the syntax tree.
1243                      */
1244 
1245                     if (newesc != ASCII_ESC && r->format == 0) {
1246                               while (*ep == ' ' || *ep == '\t')
1247                                         ep--;
1248                               ep[1] = '\0';
1249                               n = roff_node_alloc(r->man,
1250                                   ln, stesc + 1 - buf->buf,
1251                                   ROFFT_COMMENT, TOKEN_NONE);
1252                               n->string = mandoc_strdup(stesc + 2);
1253                               roff_node_append(r->man, n);
1254                               n->flags |= NODE_VALID | NODE_ENDED;
1255                               r->man->next = ROFF_NEXT_SIBLING;
1256                     }
1257 
1258                     /* Line continuation with comment. */
1259 
1260                     if (stesc[1] == '#') {
1261                               *stesc = '\0';
1262                               return ROFF_IGN | ROFF_APPEND;
1263                     }
1264 
1265                     /* Discard normal comments. */
1266 
1267                     while (stesc > start && stesc[-1] == ' ' &&
1268                         (stesc == start + 1 || stesc[-2] != '\\'))
1269                               stesc--;
1270                     *stesc = '\0';
1271                     break;
1272           }
1273           if (stesc == start)
1274                     return ROFF_CONT;
1275           stesc--;
1276 
1277           /* Notice the end of the input. */
1278 
1279           if (*stesc == '\n') {
1280                     *stesc-- = '\0';
1281                     done = 1;
1282           }
1283 
1284           expand_count = 0;
1285           while (stesc >= start) {
1286                     if (*stesc != newesc) {
1287 
1288                               /*
1289                                * If we have a non-standard escape character,
1290                                * escape literal backslashes because all
1291                                * processing in subsequent functions uses
1292                                * the standard escaping rules.
1293                                */
1294 
1295                               if (newesc != ASCII_ESC && *stesc == '\\') {
1296                                         *stesc = '\0';
1297                                         buf->sz = mandoc_asprintf(&nbuf, "%s\\e%s",
1298                                             buf->buf, stesc + 1) + 1;
1299                                         start = nbuf + pos;
1300                                         stesc = nbuf + (stesc - buf->buf);
1301                                         free(buf->buf);
1302                                         buf->buf = nbuf;
1303                               }
1304 
1305                               /* Search backwards for the next escape. */
1306 
1307                               stesc--;
1308                               continue;
1309                     }
1310 
1311                     /* If it is escaped, skip it. */
1312 
1313                     for (cp = stesc - 1; cp >= start; cp--)
1314                               if (*cp != r->escape)
1315                                         break;
1316 
1317                     if ((stesc - cp) % 2 == 0) {
1318                               while (stesc > cp)
1319                                         *stesc-- = '\\';
1320                               continue;
1321                     } else if (stesc[1] != '\0') {
1322                               *stesc = '\\';
1323                     } else {
1324                               *stesc-- = '\0';
1325                               if (done)
1326                                         continue;
1327                               else
1328                                         return ROFF_IGN | ROFF_APPEND;
1329                     }
1330 
1331                     /* Decide whether to expand or to check only. */
1332 
1333                     term = '\0';
1334                     cp = stesc + 1;
1335                     if (*cp == 'E')
1336                               cp++;
1337                     esct = cp;
1338                     switch (*esct) {
1339                     case '*':
1340                     case '$':
1341                               res = NULL;
1342                               break;
1343                     case 'B':
1344                     case 'w':
1345                               term = cp[1];
1346                               /* FALLTHROUGH */
1347                     case 'n':
1348                               sign = cp[1];
1349                               if (sign == '+' || sign == '-')
1350                                         cp++;
1351                               res = ubuf;
1352                               break;
1353                     default:
1354                               err = MANDOCERR_OK;
1355                               switch(mandoc_escape(&cp, &stnam, &inaml)) {
1356                               case ESCAPE_SPECIAL:
1357                                         if (mchars_spec2cp(stnam, inaml) >= 0)
1358                                                   break;
1359                                         /* FALLTHROUGH */
1360                               case ESCAPE_ERROR:
1361                                         err = MANDOCERR_ESC_BAD;
1362                                         break;
1363                               case ESCAPE_UNDEF:
1364                                         err = MANDOCERR_ESC_UNDEF;
1365                                         break;
1366                               case ESCAPE_UNSUPP:
1367                                         err = MANDOCERR_ESC_UNSUPP;
1368                                         break;
1369                               default:
1370                                         break;
1371                               }
1372                               if (err != MANDOCERR_OK)
1373                                         mandoc_msg(err, ln, (int)(stesc - buf->buf),
1374                                             "%.*s", (int)(cp - stesc), stesc);
1375                               stesc--;
1376                               continue;
1377                     }
1378 
1379                     if (EXPAND_LIMIT < ++expand_count) {
1380                               mandoc_msg(MANDOCERR_ROFFLOOP,
1381                                   ln, (int)(stesc - buf->buf), NULL);
1382                               return ROFF_IGN;
1383                     }
1384 
1385                     /*
1386                      * The third character decides the length
1387                      * of the name of the string or register.
1388                      * Save a pointer to the name.
1389                      */
1390 
1391                     if (term == '\0') {
1392                               switch (*++cp) {
1393                               case '\0':
1394                                         maxl = 0;
1395                                         break;
1396                               case '(':
1397                                         cp++;
1398                                         maxl = 2;
1399                                         break;
1400                               case '[':
1401                                         cp++;
1402                                         term = ']';
1403                                         maxl = 0;
1404                                         break;
1405                               default:
1406                                         maxl = 1;
1407                                         break;
1408                               }
1409                     } else {
1410                               cp += 2;
1411                               maxl = 0;
1412                     }
1413                     stnam = cp;
1414 
1415                     /* Advance to the end of the name. */
1416 
1417                     naml = 0;
1418                     arg_complete = 1;
1419                     while (maxl == 0 || naml < maxl) {
1420                               if (*cp == '\0') {
1421                                         mandoc_msg(MANDOCERR_ESC_BAD, ln,
1422                                             (int)(stesc - buf->buf), "%s", stesc);
1423                                         arg_complete = 0;
1424                                         break;
1425                               }
1426                               if (maxl == 0 && *cp == term) {
1427                                         cp++;
1428                                         break;
1429                               }
1430                               if (*cp++ != '\\' || *esct != 'w') {
1431                                         naml++;
1432                                         continue;
1433                               }
1434                               switch (mandoc_escape(&cp, NULL, NULL)) {
1435                               case ESCAPE_SPECIAL:
1436                               case ESCAPE_UNICODE:
1437                               case ESCAPE_NUMBERED:
1438                               case ESCAPE_UNDEF:
1439                               case ESCAPE_OVERSTRIKE:
1440                                         naml++;
1441                                         break;
1442                               default:
1443                                         break;
1444                               }
1445                     }
1446 
1447                     /*
1448                      * Retrieve the replacement string; if it is
1449                      * undefined, resume searching for escapes.
1450                      */
1451 
1452                     switch (*esct) {
1453                     case '*':
1454                               if (arg_complete) {
1455                                         deftype = ROFFDEF_USER | ROFFDEF_PRE;
1456                                         res = roff_getstrn(r, stnam, naml, &deftype);
1457 
1458                                         /*
1459                                          * If not overriden, let \*(.T
1460                                          * through to the formatters.
1461                                          */
1462 
1463                                         if (res == NULL && naml == 2 &&
1464                                             stnam[0] == '.' && stnam[1] == 'T') {
1465                                                   roff_setstrn(&r->strtab,
1466                                                       ".T", 2, NULL, 0, 0);
1467                                                   stesc--;
1468                                                   continue;
1469                                         }
1470                               }
1471                               break;
1472                     case '$':
1473                               if (r->mstackpos < 0) {
1474                                         mandoc_msg(MANDOCERR_ARG_UNDEF, ln,
1475                                             (int)(stesc - buf->buf), "%.3s", stesc);
1476                                         break;
1477                               }
1478                               ctx = r->mstack + r->mstackpos;
1479                               npos = esct[1] - '1';
1480                               if (npos >= 0 && npos <= 8) {
1481                                         res = npos < ctx->argc ?
1482                                             ctx->argv[npos] : "";
1483                                         break;
1484                               }
1485                               if (esct[1] == '*')
1486                                         quote_args = 0;
1487                               else if (esct[1] == '@')
1488                                         quote_args = 1;
1489                               else {
1490                                         mandoc_msg(MANDOCERR_ARG_NONUM, ln,
1491                                             (int)(stesc - buf->buf), "%.3s", stesc);
1492                                         break;
1493                               }
1494                               asz = 0;
1495                               for (npos = 0; npos < ctx->argc; npos++) {
1496                                         if (npos)
1497                                                   asz++;  /* blank */
1498                                         if (quote_args)
1499                                                   asz += 2;  /* quotes */
1500                                         asz += strlen(ctx->argv[npos]);
1501                               }
1502                               if (asz != 3) {
1503                                         rsz = buf->sz - (stesc - buf->buf) - 3;
1504                                         if (asz < 3)
1505                                                   memmove(stesc + asz, stesc + 3, rsz);
1506                                         buf->sz += asz - 3;
1507                                         nbuf = mandoc_realloc(buf->buf, buf->sz);
1508                                         start = nbuf + pos;
1509                                         stesc = nbuf + (stesc - buf->buf);
1510                                         buf->buf = nbuf;
1511                                         if (asz > 3)
1512                                                   memmove(stesc + asz, stesc + 3, rsz);
1513                               }
1514                               for (npos = 0; npos < ctx->argc; npos++) {
1515                                         if (npos)
1516                                                   *stesc++ = ' ';
1517                                         if (quote_args)
1518                                                   *stesc++ = '"';
1519                                         cp = ctx->argv[npos];
1520                                         while (*cp != '\0')
1521                                                   *stesc++ = *cp++;
1522                                         if (quote_args)
1523                                                   *stesc++ = '"';
1524                               }
1525                               continue;
1526                     case 'B':
1527                               npos = 0;
1528                               ubuf[0] = arg_complete &&
1529                                   roff_evalnum(r, ln, stnam, &npos,
1530                                     NULL, ROFFNUM_SCALE) &&
1531                                   stnam + npos + 1 == cp ? '1' : '0';
1532                               ubuf[1] = '\0';
1533                               break;
1534                     case 'n':
1535                               if (arg_complete)
1536                                         (void)snprintf(ubuf, sizeof(ubuf), "%d",
1537                                             roff_getregn(r, stnam, naml, sign));
1538                               else
1539                                         ubuf[0] = '\0';
1540                               break;
1541                     case 'w':
1542                               /* use even incomplete args */
1543                               (void)snprintf(ubuf, sizeof(ubuf), "%d",
1544                                   24 * (int)naml);
1545                               break;
1546                     }
1547 
1548                     if (res == NULL) {
1549                               if (*esct == '*')
1550                                         mandoc_msg(MANDOCERR_STR_UNDEF,
1551                                             ln, (int)(stesc - buf->buf),
1552                                             "%.*s", (int)naml, stnam);
1553                               res = "";
1554                     } else if (buf->sz + strlen(res) > SHRT_MAX) {
1555                               mandoc_msg(MANDOCERR_ROFFLOOP,
1556                                   ln, (int)(stesc - buf->buf), NULL);
1557                               return ROFF_IGN;
1558                     }
1559 
1560                     /* Replace the escape sequence by the string. */
1561 
1562                     *stesc = '\0';
1563                     buf->sz = mandoc_asprintf(&nbuf, "%s%s%s",
1564                         buf->buf, res, cp) + 1;
1565 
1566                     /* Prepare for the next replacement. */
1567 
1568                     start = nbuf + pos;
1569                     stesc = nbuf + (stesc - buf->buf) + strlen(res);
1570                     free(buf->buf);
1571                     buf->buf = nbuf;
1572           }
1573           return ROFF_CONT;
1574 }
1575 
1576 /*
1577  * Parse a quoted or unquoted roff-style request or macro argument.
1578  * Return a pointer to the parsed argument, which is either the original
1579  * pointer or advanced by one byte in case the argument is quoted.
1580  * NUL-terminate the argument in place.
1581  * Collapse pairs of quotes inside quoted arguments.
1582  * Advance the argument pointer to the next argument,
1583  * or to the NUL byte terminating the argument line.
1584  */
1585 char *
roff_getarg(struct roff * r,char ** cpp,int ln,int * pos)1586 roff_getarg(struct roff *r, char **cpp, int ln, int *pos)
1587 {
1588           struct buf           buf;
1589           char                *cp, *start;
1590           int                  newesc, pairs, quoted, white;
1591 
1592           /* Quoting can only start with a new word. */
1593           start = *cpp;
1594           quoted = 0;
1595           if ('"' == *start) {
1596                     quoted = 1;
1597                     start++;
1598           }
1599 
1600           newesc = pairs = white = 0;
1601           for (cp = start; '\0' != *cp; cp++) {
1602 
1603                     /*
1604                      * Move the following text left
1605                      * after quoted quotes and after "\\" and "\t".
1606                      */
1607                     if (pairs)
1608                               cp[-pairs] = cp[0];
1609 
1610                     if ('\\' == cp[0]) {
1611                               /*
1612                                * In copy mode, translate double to single
1613                                * backslashes and backslash-t to literal tabs.
1614                                */
1615                               switch (cp[1]) {
1616                               case 'a':
1617                               case 't':
1618                                         cp[-pairs] = '\t';
1619                                         pairs++;
1620                                         cp++;
1621                                         break;
1622                               case '\\':
1623                                         newesc = 1;
1624                                         cp[-pairs] = ASCII_ESC;
1625                                         pairs++;
1626                                         cp++;
1627                                         break;
1628                               case ' ':
1629                                         /* Skip escaped blanks. */
1630                                         if (0 == quoted)
1631                                                   cp++;
1632                                         break;
1633                               default:
1634                                         break;
1635                               }
1636                     } else if (0 == quoted) {
1637                               if (' ' == cp[0]) {
1638                                         /* Unescaped blanks end unquoted args. */
1639                                         white = 1;
1640                                         break;
1641                               }
1642                     } else if ('"' == cp[0]) {
1643                               if ('"' == cp[1]) {
1644                                         /* Quoted quotes collapse. */
1645                                         pairs++;
1646                                         cp++;
1647                               } else {
1648                                         /* Unquoted quotes end quoted args. */
1649                                         quoted = 2;
1650                                         break;
1651                               }
1652                     }
1653           }
1654 
1655           /* Quoted argument without a closing quote. */
1656           if (1 == quoted)
1657                     mandoc_msg(MANDOCERR_ARG_QUOTE, ln, *pos, NULL);
1658 
1659           /* NUL-terminate this argument and move to the next one. */
1660           if (pairs)
1661                     cp[-pairs] = '\0';
1662           if ('\0' != *cp) {
1663                     *cp++ = '\0';
1664                     while (' ' == *cp)
1665                               cp++;
1666           }
1667           *pos += (int)(cp - start) + (quoted ? 1 : 0);
1668           *cpp = cp;
1669 
1670           if ('\0' == *cp && (white || ' ' == cp[-1]))
1671                     mandoc_msg(MANDOCERR_SPACE_EOL, ln, *pos, NULL);
1672 
1673           start = mandoc_strdup(start);
1674           if (newesc == 0)
1675                     return start;
1676 
1677           buf.buf = start;
1678           buf.sz = strlen(start) + 1;
1679           buf.next = NULL;
1680           if (roff_expand(r, &buf, ln, 0, ASCII_ESC) & ROFF_IGN) {
1681                     free(buf.buf);
1682                     buf.buf = mandoc_strdup("");
1683           }
1684           return buf.buf;
1685 }
1686 
1687 
1688 /*
1689  * Process text streams.
1690  */
1691 static int
roff_parsetext(struct roff * r,struct buf * buf,int pos,int * offs)1692 roff_parsetext(struct roff *r, struct buf *buf, int pos, int *offs)
1693 {
1694           size_t               sz;
1695           const char          *start;
1696           char                *p;
1697           int                  isz;
1698           enum mandoc_esc      esc;
1699 
1700           /* Spring the input line trap. */
1701 
1702           if (roffit_lines == 1) {
1703                     isz = mandoc_asprintf(&p, "%s\n.%s", buf->buf, roffit_macro);
1704                     free(buf->buf);
1705                     buf->buf = p;
1706                     buf->sz = isz + 1;
1707                     *offs = 0;
1708                     free(roffit_macro);
1709                     roffit_lines = 0;
1710                     return ROFF_REPARSE;
1711           } else if (roffit_lines > 1)
1712                     --roffit_lines;
1713 
1714           if (roffce_node != NULL && buf->buf[pos] != '\0') {
1715                     if (roffce_lines < 1) {
1716                               r->man->last = roffce_node;
1717                               r->man->next = ROFF_NEXT_SIBLING;
1718                               roffce_lines = 0;
1719                               roffce_node = NULL;
1720                     } else
1721                               roffce_lines--;
1722           }
1723 
1724           /* Convert all breakable hyphens into ASCII_HYPH. */
1725 
1726           start = p = buf->buf + pos;
1727 
1728           while (*p != '\0') {
1729                     sz = strcspn(p, "-\\");
1730                     p += sz;
1731 
1732                     if (*p == '\0')
1733                               break;
1734 
1735                     if (*p == '\\') {
1736                               /* Skip over escapes. */
1737                               p++;
1738                               esc = mandoc_escape((const char **)(void *)&p, NULL, NULL);
1739                               if (esc == ESCAPE_ERROR)
1740                                         break;
1741                               while (*p == '-')
1742                                         p++;
1743                               continue;
1744                     } else if (p == start) {
1745                               p++;
1746                               continue;
1747                     }
1748 
1749                     if (isalpha((unsigned char)p[-1]) &&
1750                         isalpha((unsigned char)p[1]))
1751                               *p = ASCII_HYPH;
1752                     p++;
1753           }
1754           return ROFF_CONT;
1755 }
1756 
1757 int
roff_parseln(struct roff * r,int ln,struct buf * buf,int * offs)1758 roff_parseln(struct roff *r, int ln, struct buf *buf, int *offs)
1759 {
1760           enum roff_tok        t;
1761           int                  e;
1762           int                  pos;     /* parse point */
1763           int                  spos;    /* saved parse point for messages */
1764           int                  ppos;    /* original offset in buf->buf */
1765           int                  ctl;     /* macro line (boolean) */
1766 
1767           ppos = pos = *offs;
1768 
1769           /* Handle in-line equation delimiters. */
1770 
1771           if (r->tbl == NULL &&
1772               r->last_eqn != NULL && r->last_eqn->delim &&
1773               (r->eqn == NULL || r->eqn_inline)) {
1774                     e = roff_eqndelim(r, buf, pos);
1775                     if (e == ROFF_REPARSE)
1776                               return e;
1777                     assert(e == ROFF_CONT);
1778           }
1779 
1780           /* Expand some escape sequences. */
1781 
1782           e = roff_expand(r, buf, ln, pos, r->escape);
1783           if ((e & ROFF_MASK) == ROFF_IGN)
1784                     return e;
1785           assert(e == ROFF_CONT);
1786 
1787           ctl = roff_getcontrol(r, buf->buf, &pos);
1788 
1789           /*
1790            * First, if a scope is open and we're not a macro, pass the
1791            * text through the macro's filter.
1792            * Equations process all content themselves.
1793            * Tables process almost all content themselves, but we want
1794            * to warn about macros before passing it there.
1795            */
1796 
1797           if (r->last != NULL && ! ctl) {
1798                     t = r->last->tok;
1799                     e = (*roffs[t].text)(r, t, buf, ln, pos, pos, offs);
1800                     if ((e & ROFF_MASK) == ROFF_IGN)
1801                               return e;
1802                     e &= ~ROFF_MASK;
1803           } else
1804                     e = ROFF_IGN;
1805           if (r->eqn != NULL && strncmp(buf->buf + ppos, ".EN", 3)) {
1806                     eqn_read(r->eqn, buf->buf + ppos);
1807                     return e;
1808           }
1809           if (r->tbl != NULL && (ctl == 0 || buf->buf[pos] == '\0')) {
1810                     tbl_read(r->tbl, ln, buf->buf, ppos);
1811                     roff_addtbl(r->man, ln, r->tbl);
1812                     return e;
1813           }
1814           if ( ! ctl)
1815                     return roff_parsetext(r, buf, pos, offs) | e;
1816 
1817           /* Skip empty request lines. */
1818 
1819           if (buf->buf[pos] == '"') {
1820                     mandoc_msg(MANDOCERR_COMMENT_BAD, ln, pos, NULL);
1821                     return ROFF_IGN;
1822           } else if (buf->buf[pos] == '\0')
1823                     return ROFF_IGN;
1824 
1825           /*
1826            * If a scope is open, go to the child handler for that macro,
1827            * as it may want to preprocess before doing anything with it.
1828            * Don't do so if an equation is open.
1829            */
1830 
1831           if (r->last) {
1832                     t = r->last->tok;
1833                     return (*roffs[t].sub)(r, t, buf, ln, ppos, pos, offs);
1834           }
1835 
1836           /* No scope is open.  This is a new request or macro. */
1837 
1838           spos = pos;
1839           t = roff_parse(r, buf->buf, &pos, ln, ppos);
1840 
1841           /* Tables ignore most macros. */
1842 
1843           if (r->tbl != NULL && (t == TOKEN_NONE || t == ROFF_TS ||
1844               t == ROFF_br || t == ROFF_ce || t == ROFF_rj || t == ROFF_sp)) {
1845                     mandoc_msg(MANDOCERR_TBLMACRO,
1846                         ln, pos, "%s", buf->buf + spos);
1847                     if (t != TOKEN_NONE)
1848                               return ROFF_IGN;
1849                     while (buf->buf[pos] != '\0' && buf->buf[pos] != ' ')
1850                               pos++;
1851                     while (buf->buf[pos] == ' ')
1852                               pos++;
1853                     tbl_read(r->tbl, ln, buf->buf, pos);
1854                     roff_addtbl(r->man, ln, r->tbl);
1855                     return ROFF_IGN;
1856           }
1857 
1858           /* For now, let high level macros abort .ce mode. */
1859 
1860           if (ctl && roffce_node != NULL &&
1861               (t == TOKEN_NONE || t == ROFF_Dd || t == ROFF_EQ ||
1862                t == ROFF_TH || t == ROFF_TS)) {
1863                     r->man->last = roffce_node;
1864                     r->man->next = ROFF_NEXT_SIBLING;
1865                     roffce_lines = 0;
1866                     roffce_node = NULL;
1867           }
1868 
1869           /*
1870            * This is neither a roff request nor a user-defined macro.
1871            * Let the standard macro set parsers handle it.
1872            */
1873 
1874           if (t == TOKEN_NONE)
1875                     return ROFF_CONT;
1876 
1877           /* Execute a roff request or a user defined macro. */
1878 
1879           return (*roffs[t].proc)(r, t, buf, ln, spos, pos, offs);
1880 }
1881 
1882 /*
1883  * Internal interface function to tell the roff parser that execution
1884  * of the current macro ended.  This is required because macro
1885  * definitions usually do not end with a .return request.
1886  */
1887 void
roff_userret(struct roff * r)1888 roff_userret(struct roff *r)
1889 {
1890           struct mctx         *ctx;
1891           int                  i;
1892 
1893           assert(r->mstackpos >= 0);
1894           ctx = r->mstack + r->mstackpos;
1895           for (i = 0; i < ctx->argc; i++)
1896                     free(ctx->argv[i]);
1897           ctx->argc = 0;
1898           r->mstackpos--;
1899 }
1900 
1901 void
roff_endparse(struct roff * r)1902 roff_endparse(struct roff *r)
1903 {
1904           if (r->last != NULL)
1905                     mandoc_msg(MANDOCERR_BLK_NOEND, r->last->line,
1906                         r->last->col, "%s", roff_name[r->last->tok]);
1907 
1908           if (r->eqn != NULL) {
1909                     mandoc_msg(MANDOCERR_BLK_NOEND,
1910                         r->eqn->node->line, r->eqn->node->pos, "EQ");
1911                     eqn_parse(r->eqn);
1912                     r->eqn = NULL;
1913           }
1914 
1915           if (r->tbl != NULL) {
1916                     tbl_end(r->tbl, 1);
1917                     r->tbl = NULL;
1918           }
1919 }
1920 
1921 /*
1922  * Parse a roff node's type from the input buffer.  This must be in the
1923  * form of ".foo xxx" in the usual way.
1924  */
1925 static enum roff_tok
roff_parse(struct roff * r,char * buf,int * pos,int ln,int ppos)1926 roff_parse(struct roff *r, char *buf, int *pos, int ln, int ppos)
1927 {
1928           char                *cp;
1929           const char          *mac;
1930           size_t               maclen;
1931           int                  deftype;
1932           enum roff_tok        t;
1933 
1934           cp = buf + *pos;
1935 
1936           if ('\0' == *cp || '"' == *cp || '\t' == *cp || ' ' == *cp)
1937                     return TOKEN_NONE;
1938 
1939           mac = cp;
1940           maclen = roff_getname(r, &cp, ln, ppos);
1941 
1942           deftype = ROFFDEF_USER | ROFFDEF_REN;
1943           r->current_string = roff_getstrn(r, mac, maclen, &deftype);
1944           switch (deftype) {
1945           case ROFFDEF_USER:
1946                     t = ROFF_USERDEF;
1947                     break;
1948           case ROFFDEF_REN:
1949                     t = ROFF_RENAMED;
1950                     break;
1951           default:
1952                     t = roffhash_find(r->reqtab, mac, maclen);
1953                     break;
1954           }
1955           if (t != TOKEN_NONE)
1956                     *pos = cp - buf;
1957           else if (deftype == ROFFDEF_UNDEF) {
1958                     /* Using an undefined macro defines it to be empty. */
1959                     roff_setstrn(&r->strtab, mac, maclen, "", 0, 0);
1960                     roff_setstrn(&r->rentab, mac, maclen, NULL, 0, 0);
1961           }
1962           return t;
1963 }
1964 
1965 /* --- handling of request blocks ----------------------------------------- */
1966 
1967 static int
roff_cblock(ROFF_ARGS)1968 roff_cblock(ROFF_ARGS)
1969 {
1970 
1971           /*
1972            * A block-close `..' should only be invoked as a child of an
1973            * ignore macro, otherwise raise a warning and just ignore it.
1974            */
1975 
1976           if (r->last == NULL) {
1977                     mandoc_msg(MANDOCERR_BLK_NOTOPEN, ln, ppos, "..");
1978                     return ROFF_IGN;
1979           }
1980 
1981           switch (r->last->tok) {
1982           case ROFF_am:
1983                     /* ROFF_am1 is remapped to ROFF_am in roff_block(). */
1984           case ROFF_ami:
1985           case ROFF_de:
1986                     /* ROFF_de1 is remapped to ROFF_de in roff_block(). */
1987           case ROFF_dei:
1988           case ROFF_ig:
1989                     break;
1990           default:
1991                     mandoc_msg(MANDOCERR_BLK_NOTOPEN, ln, ppos, "..");
1992                     return ROFF_IGN;
1993           }
1994 
1995           if (buf->buf[pos] != '\0')
1996                     mandoc_msg(MANDOCERR_ARG_SKIP, ln, pos,
1997                         ".. %s", buf->buf + pos);
1998 
1999           roffnode_pop(r);
2000           roffnode_cleanscope(r);
2001           return ROFF_IGN;
2002 
2003 }
2004 
2005 static int
roffnode_cleanscope(struct roff * r)2006 roffnode_cleanscope(struct roff *r)
2007 {
2008           int inloop;
2009 
2010           inloop = 0;
2011           while (r->last != NULL) {
2012                     if (--r->last->endspan != 0)
2013                               break;
2014                     inloop += roffnode_pop(r);
2015           }
2016           return inloop;
2017 }
2018 
2019 static int
roff_ccond(struct roff * r,int ln,int ppos)2020 roff_ccond(struct roff *r, int ln, int ppos)
2021 {
2022           if (NULL == r->last) {
2023                     mandoc_msg(MANDOCERR_BLK_NOTOPEN, ln, ppos, "\\}");
2024                     return 0;
2025           }
2026 
2027           switch (r->last->tok) {
2028           case ROFF_el:
2029           case ROFF_ie:
2030           case ROFF_if:
2031           case ROFF_while:
2032                     break;
2033           default:
2034                     mandoc_msg(MANDOCERR_BLK_NOTOPEN, ln, ppos, "\\}");
2035                     return 0;
2036           }
2037 
2038           if (r->last->endspan > -1) {
2039                     mandoc_msg(MANDOCERR_BLK_NOTOPEN, ln, ppos, "\\}");
2040                     return 0;
2041           }
2042 
2043           return roffnode_pop(r) + roffnode_cleanscope(r);
2044 }
2045 
2046 static int
roff_block(ROFF_ARGS)2047 roff_block(ROFF_ARGS)
2048 {
2049           const char          *name, *value;
2050           char                *call, *cp, *iname, *rname;
2051           size_t               csz, namesz, rsz;
2052           int                  deftype;
2053 
2054           /* Ignore groff compatibility mode for now. */
2055 
2056           if (tok == ROFF_de1)
2057                     tok = ROFF_de;
2058           else if (tok == ROFF_dei1)
2059                     tok = ROFF_dei;
2060           else if (tok == ROFF_am1)
2061                     tok = ROFF_am;
2062           else if (tok == ROFF_ami1)
2063                     tok = ROFF_ami;
2064 
2065           /* Parse the macro name argument. */
2066 
2067           cp = buf->buf + pos;
2068           if (tok == ROFF_ig) {
2069                     iname = NULL;
2070                     namesz = 0;
2071           } else {
2072                     iname = cp;
2073                     namesz = roff_getname(r, &cp, ln, ppos);
2074                     iname[namesz] = '\0';
2075           }
2076 
2077           /* Resolve the macro name argument if it is indirect. */
2078 
2079           if (namesz && (tok == ROFF_dei || tok == ROFF_ami)) {
2080                     deftype = ROFFDEF_USER;
2081                     name = roff_getstrn(r, iname, namesz, &deftype);
2082                     if (name == NULL) {
2083                               mandoc_msg(MANDOCERR_STR_UNDEF,
2084                                   ln, (int)(iname - buf->buf),
2085                                   "%.*s", (int)namesz, iname);
2086                               namesz = 0;
2087                     } else
2088                               namesz = strlen(name);
2089           } else
2090                     name = iname;
2091 
2092           if (namesz == 0 && tok != ROFF_ig) {
2093                     mandoc_msg(MANDOCERR_REQ_EMPTY,
2094                         ln, ppos, "%s", roff_name[tok]);
2095                     return ROFF_IGN;
2096           }
2097 
2098           roffnode_push(r, tok, name, ln, ppos);
2099 
2100           /*
2101            * At the beginning of a `de' macro, clear the existing string
2102            * with the same name, if there is one.  New content will be
2103            * appended from roff_block_text() in multiline mode.
2104            */
2105 
2106           if (tok == ROFF_de || tok == ROFF_dei) {
2107                     roff_setstrn(&r->strtab, name, namesz, "", 0, 0);
2108                     roff_setstrn(&r->rentab, name, namesz, NULL, 0, 0);
2109           } else if (tok == ROFF_am || tok == ROFF_ami) {
2110                     deftype = ROFFDEF_ANY;
2111                     value = roff_getstrn(r, iname, namesz, &deftype);
2112                     switch (deftype) {  /* Before appending, ... */
2113                     case ROFFDEF_PRE: /* copy predefined to user-defined. */
2114                               roff_setstrn(&r->strtab, name, namesz,
2115                                   value, strlen(value), 0);
2116                               break;
2117                     case ROFFDEF_REN: /* call original standard macro. */
2118                               csz = mandoc_asprintf(&call, ".%.*s \\$* \\\"\n",
2119                                   (int)strlen(value), value);
2120                               roff_setstrn(&r->strtab, name, namesz, call, csz, 0);
2121                               roff_setstrn(&r->rentab, name, namesz, NULL, 0, 0);
2122                               free(call);
2123                               break;
2124                     case ROFFDEF_STD:  /* rename and call standard macro. */
2125                               rsz = mandoc_asprintf(&rname, "__%s_renamed", name);
2126                               roff_setstrn(&r->rentab, rname, rsz, name, namesz, 0);
2127                               csz = mandoc_asprintf(&call, ".%.*s \\$* \\\"\n",
2128                                   (int)rsz, rname);
2129                               roff_setstrn(&r->strtab, name, namesz, call, csz, 0);
2130                               free(call);
2131                               free(rname);
2132                               break;
2133                     default:
2134                               break;
2135                     }
2136           }
2137 
2138           if (*cp == '\0')
2139                     return ROFF_IGN;
2140 
2141           /* Get the custom end marker. */
2142 
2143           iname = cp;
2144           namesz = roff_getname(r, &cp, ln, ppos);
2145 
2146           /* Resolve the end marker if it is indirect. */
2147 
2148           if (namesz && (tok == ROFF_dei || tok == ROFF_ami)) {
2149                     deftype = ROFFDEF_USER;
2150                     name = roff_getstrn(r, iname, namesz, &deftype);
2151                     if (name == NULL) {
2152                               mandoc_msg(MANDOCERR_STR_UNDEF,
2153                                   ln, (int)(iname - buf->buf),
2154                                   "%.*s", (int)namesz, iname);
2155                               namesz = 0;
2156                     } else
2157                               namesz = strlen(name);
2158           } else
2159                     name = iname;
2160 
2161           if (namesz)
2162                     r->last->end = mandoc_strndup(name, namesz);
2163 
2164           if (*cp != '\0')
2165                     mandoc_msg(MANDOCERR_ARG_EXCESS,
2166                         ln, pos, ".%s ... %s", roff_name[tok], cp);
2167 
2168           return ROFF_IGN;
2169 }
2170 
2171 static int
roff_block_sub(ROFF_ARGS)2172 roff_block_sub(ROFF_ARGS)
2173 {
2174           enum roff_tok       t;
2175           int                 i, j;
2176 
2177           /*
2178            * First check whether a custom macro exists at this level.  If
2179            * it does, then check against it.  This is some of groff's
2180            * stranger behaviours.  If we encountered a custom end-scope
2181            * tag and that tag also happens to be a "real" macro, then we
2182            * need to try interpreting it again as a real macro.  If it's
2183            * not, then return ignore.  Else continue.
2184            */
2185 
2186           if (r->last->end) {
2187                     for (i = pos, j = 0; r->last->end[j]; j++, i++)
2188                               if (buf->buf[i] != r->last->end[j])
2189                                         break;
2190 
2191                     if (r->last->end[j] == '\0' &&
2192                         (buf->buf[i] == '\0' ||
2193                          buf->buf[i] == ' ' ||
2194                          buf->buf[i] == '\t')) {
2195                               roffnode_pop(r);
2196                               roffnode_cleanscope(r);
2197 
2198                               while (buf->buf[i] == ' ' || buf->buf[i] == '\t')
2199                                         i++;
2200 
2201                               pos = i;
2202                               if (roff_parse(r, buf->buf, &pos, ln, ppos) !=
2203                                   TOKEN_NONE)
2204                                         return ROFF_RERUN;
2205                               return ROFF_IGN;
2206                     }
2207           }
2208 
2209           /*
2210            * If we have no custom end-query or lookup failed, then try
2211            * pulling it out of the hashtable.
2212            */
2213 
2214           t = roff_parse(r, buf->buf, &pos, ln, ppos);
2215 
2216           if (t != ROFF_cblock) {
2217                     if (tok != ROFF_ig)
2218                               roff_setstr(r, r->last->name, buf->buf + ppos, 2);
2219                     return ROFF_IGN;
2220           }
2221 
2222           return (*roffs[t].proc)(r, t, buf, ln, ppos, pos, offs);
2223 }
2224 
2225 static int
roff_block_text(ROFF_ARGS)2226 roff_block_text(ROFF_ARGS)
2227 {
2228 
2229           if (tok != ROFF_ig)
2230                     roff_setstr(r, r->last->name, buf->buf + pos, 2);
2231 
2232           return ROFF_IGN;
2233 }
2234 
2235 static int
roff_cond_sub(ROFF_ARGS)2236 roff_cond_sub(ROFF_ARGS)
2237 {
2238           char                *ep;
2239           int                  endloop, irc, rr;
2240           enum roff_tok        t;
2241 
2242           irc = ROFF_IGN;
2243           rr = r->last->rule;
2244           endloop = tok != ROFF_while ? ROFF_IGN :
2245               rr ? ROFF_LOOPCONT : ROFF_LOOPEXIT;
2246           if (roffnode_cleanscope(r))
2247                     irc |= endloop;
2248 
2249           /*
2250            * If `\}' occurs on a macro line without a preceding macro,
2251            * drop the line completely.
2252            */
2253 
2254           ep = buf->buf + pos;
2255           if (ep[0] == '\\' && ep[1] == '}')
2256                     rr = 0;
2257 
2258           /*
2259            * The closing delimiter `\}' rewinds the conditional scope
2260            * but is otherwise ignored when interpreting the line.
2261            */
2262 
2263           while ((ep = strchr(ep, '\\')) != NULL) {
2264                     switch (ep[1]) {
2265                     case '}':
2266                               memmove(ep, ep + 2, strlen(ep + 2) + 1);
2267                               if (roff_ccond(r, ln, ep - buf->buf))
2268                                         irc |= endloop;
2269                               break;
2270                     case '\0':
2271                               ++ep;
2272                               break;
2273                     default:
2274                               ep += 2;
2275                               break;
2276                     }
2277           }
2278 
2279           /*
2280            * Fully handle known macros when they are structurally
2281            * required or when the conditional evaluated to true.
2282            */
2283 
2284           t = roff_parse(r, buf->buf, &pos, ln, ppos);
2285           irc |= t != TOKEN_NONE && (rr || roffs[t].flags & ROFFMAC_STRUCT) ?
2286               (*roffs[t].proc)(r, t, buf, ln, ppos, pos, offs) :
2287               rr ? ROFF_CONT : ROFF_IGN;
2288           return irc;
2289 }
2290 
2291 static int
roff_cond_text(ROFF_ARGS)2292 roff_cond_text(ROFF_ARGS)
2293 {
2294           char                *ep;
2295           int                  endloop, irc, rr;
2296 
2297           irc = ROFF_IGN;
2298           rr = r->last->rule;
2299           endloop = tok != ROFF_while ? ROFF_IGN :
2300               rr ? ROFF_LOOPCONT : ROFF_LOOPEXIT;
2301           if (roffnode_cleanscope(r))
2302                     irc |= endloop;
2303 
2304           /*
2305            * If `\}' occurs on a text line with neither preceding
2306            * nor following characters, drop the line completely.
2307            */
2308 
2309           ep = buf->buf + pos;
2310           if (strcmp(ep, "\\}") == 0)
2311                     rr = 0;
2312 
2313           /*
2314            * The closing delimiter `\}' rewinds the conditional scope
2315            * but is otherwise ignored when interpreting the line.
2316            */
2317 
2318           while ((ep = strchr(ep, '\\')) != NULL) {
2319                     switch (ep[1]) {
2320                     case '}':
2321                               memmove(ep, ep + 2, strlen(ep + 2) + 1);
2322                               if (roff_ccond(r, ln, ep - buf->buf))
2323                                         irc |= endloop;
2324                               break;
2325                     case '\0':
2326                               ++ep;
2327                               break;
2328                     default:
2329                               ep += 2;
2330                               break;
2331                     }
2332           }
2333           if (rr)
2334                     irc |= ROFF_CONT;
2335           return irc;
2336 }
2337 
2338 /* --- handling of numeric and conditional expressions -------------------- */
2339 
2340 /*
2341  * Parse a single signed integer number.  Stop at the first non-digit.
2342  * If there is at least one digit, return success and advance the
2343  * parse point, else return failure and let the parse point unchanged.
2344  * Ignore overflows, treat them just like the C language.
2345  */
2346 static int
roff_getnum(const char * v,int * pos,int * res,int flags)2347 roff_getnum(const char *v, int *pos, int *res, int flags)
2348 {
2349           int        myres, scaled, n, p;
2350 
2351           if (NULL == res)
2352                     res = &myres;
2353 
2354           p = *pos;
2355           n = v[p] == '-';
2356           if (n || v[p] == '+')
2357                     p++;
2358 
2359           if (flags & ROFFNUM_WHITE)
2360                     while (isspace((unsigned char)v[p]))
2361                               p++;
2362 
2363           for (*res = 0; isdigit((unsigned char)v[p]); p++)
2364                     *res = 10 * *res + v[p] - '0';
2365           if (p == *pos + n)
2366                     return 0;
2367 
2368           if (n)
2369                     *res = -*res;
2370 
2371           /* Each number may be followed by one optional scaling unit. */
2372 
2373           switch (v[p]) {
2374           case 'f':
2375                     scaled = *res * 65536;
2376                     break;
2377           case 'i':
2378                     scaled = *res * 240;
2379                     break;
2380           case 'c':
2381                     scaled = *res * 240 / 2.54;
2382                     break;
2383           case 'v':
2384           case 'P':
2385                     scaled = *res * 40;
2386                     break;
2387           case 'm':
2388           case 'n':
2389                     scaled = *res * 24;
2390                     break;
2391           case 'p':
2392                     scaled = *res * 10 / 3;
2393                     break;
2394           case 'u':
2395                     scaled = *res;
2396                     break;
2397           case 'M':
2398                     scaled = *res * 6 / 25;
2399                     break;
2400           default:
2401                     scaled = *res;
2402                     p--;
2403                     break;
2404           }
2405           if (flags & ROFFNUM_SCALE)
2406                     *res = scaled;
2407 
2408           *pos = p + 1;
2409           return 1;
2410 }
2411 
2412 /*
2413  * Evaluate a string comparison condition.
2414  * The first character is the delimiter.
2415  * Succeed if the string up to its second occurrence
2416  * matches the string up to its third occurence.
2417  * Advance the cursor after the third occurrence
2418  * or lacking that, to the end of the line.
2419  */
2420 static int
roff_evalstrcond(const char * v,int * pos)2421 roff_evalstrcond(const char *v, int *pos)
2422 {
2423           const char          *s1, *s2, *s3;
2424           int                  match;
2425 
2426           match = 0;
2427           s1 = v + *pos;                /* initial delimiter */
2428           s2 = s1 + 1;                  /* for scanning the first string */
2429           s3 = strchr(s2, *s1);         /* for scanning the second string */
2430 
2431           if (NULL == s3)               /* found no middle delimiter */
2432                     goto out;
2433 
2434           while ('\0' != *++s3) {
2435                     if (*s2 != *s3) {  /* mismatch */
2436                               s3 = strchr(s3, *s1);
2437                               break;
2438                     }
2439                     if (*s3 == *s1) {  /* found the final delimiter */
2440                               match = 1;
2441                               break;
2442                     }
2443                     s2++;
2444           }
2445 
2446 out:
2447           if (NULL == s3)
2448                     s3 = strchr(s2, '\0');
2449           else if (*s3 != '\0')
2450                     s3++;
2451           *pos = s3 - v;
2452           return match;
2453 }
2454 
2455 /*
2456  * Evaluate an optionally negated single character, numerical,
2457  * or string condition.
2458  */
2459 static int
roff_evalcond(struct roff * r,int ln,char * v,int * pos)2460 roff_evalcond(struct roff *r, int ln, char *v, int *pos)
2461 {
2462           const char          *start, *end;
2463           char                *cp, *name;
2464           size_t               sz;
2465           int                  deftype, len, number, savepos, istrue, wanttrue;
2466 
2467           if ('!' == v[*pos]) {
2468                     wanttrue = 0;
2469                     (*pos)++;
2470           } else
2471                     wanttrue = 1;
2472 
2473           switch (v[*pos]) {
2474           case '\0':
2475                     return 0;
2476           case 'n':
2477           case 'o':
2478                     (*pos)++;
2479                     return wanttrue;
2480           case 'e':
2481           case 't':
2482           case 'v':
2483                     (*pos)++;
2484                     return !wanttrue;
2485           case 'c':
2486                     do {
2487                               (*pos)++;
2488                     } while (v[*pos] == ' ');
2489 
2490                     /*
2491                      * Quirk for groff compatibility:
2492                      * The horizontal tab is neither available nor unavailable.
2493                      */
2494 
2495                     if (v[*pos] == '\t') {
2496                               (*pos)++;
2497                               return 0;
2498                     }
2499 
2500                     /* Printable ASCII characters are available. */
2501 
2502                     if (v[*pos] != '\\') {
2503                               (*pos)++;
2504                               return wanttrue;
2505                     }
2506 
2507                     end = v + ++*pos;
2508                     switch (mandoc_escape(&end, &start, &len)) {
2509                     case ESCAPE_SPECIAL:
2510                               istrue = mchars_spec2cp(start, len) != -1;
2511                               break;
2512                     case ESCAPE_UNICODE:
2513                               istrue = 1;
2514                               break;
2515                     case ESCAPE_NUMBERED:
2516                               istrue = mchars_num2char(start, len) != -1;
2517                               break;
2518                     default:
2519                               istrue = !wanttrue;
2520                               break;
2521                     }
2522                     *pos = end - v;
2523                     return istrue == wanttrue;
2524           case 'd':
2525           case 'r':
2526                     cp = v + *pos + 1;
2527                     while (*cp == ' ')
2528                               cp++;
2529                     name = cp;
2530                     sz = roff_getname(r, &cp, ln, cp - v);
2531                     if (sz == 0)
2532                               istrue = 0;
2533                     else if (v[*pos] == 'r')
2534                               istrue = roff_hasregn(r, name, sz);
2535                     else {
2536                               deftype = ROFFDEF_ANY;
2537                             roff_getstrn(r, name, sz, &deftype);
2538                               istrue = !!deftype;
2539                     }
2540                     *pos = (name + sz) - v;
2541                     return istrue == wanttrue;
2542           default:
2543                     break;
2544           }
2545 
2546           savepos = *pos;
2547           if (roff_evalnum(r, ln, v, pos, &number, ROFFNUM_SCALE))
2548                     return (number > 0) == wanttrue;
2549           else if (*pos == savepos)
2550                     return roff_evalstrcond(v, pos) == wanttrue;
2551           else
2552                     return 0;
2553 }
2554 
2555 static int
roff_line_ignore(ROFF_ARGS)2556 roff_line_ignore(ROFF_ARGS)
2557 {
2558 
2559           return ROFF_IGN;
2560 }
2561 
2562 static int
roff_insec(ROFF_ARGS)2563 roff_insec(ROFF_ARGS)
2564 {
2565 
2566           mandoc_msg(MANDOCERR_REQ_INSEC, ln, ppos, "%s", roff_name[tok]);
2567           return ROFF_IGN;
2568 }
2569 
2570 static int
roff_unsupp(ROFF_ARGS)2571 roff_unsupp(ROFF_ARGS)
2572 {
2573 
2574           mandoc_msg(MANDOCERR_REQ_UNSUPP, ln, ppos, "%s", roff_name[tok]);
2575           return ROFF_IGN;
2576 }
2577 
2578 static int
roff_cond(ROFF_ARGS)2579 roff_cond(ROFF_ARGS)
2580 {
2581           int        irc;
2582 
2583           roffnode_push(r, tok, NULL, ln, ppos);
2584 
2585           /*
2586            * An `.el' has no conditional body: it will consume the value
2587            * of the current rstack entry set in prior `ie' calls or
2588            * defaults to DENY.
2589            *
2590            * If we're not an `el', however, then evaluate the conditional.
2591            */
2592 
2593           r->last->rule = tok == ROFF_el ?
2594               (r->rstackpos < 0 ? 0 : r->rstack[r->rstackpos--]) :
2595               roff_evalcond(r, ln, buf->buf, &pos);
2596 
2597           /*
2598            * An if-else will put the NEGATION of the current evaluated
2599            * conditional into the stack of rules.
2600            */
2601 
2602           if (tok == ROFF_ie) {
2603                     if (r->rstackpos + 1 == r->rstacksz) {
2604                               r->rstacksz += 16;
2605                               r->rstack = mandoc_reallocarray(r->rstack,
2606                                   r->rstacksz, sizeof(int));
2607                     }
2608                     r->rstack[++r->rstackpos] = !r->last->rule;
2609           }
2610 
2611           /* If the parent has false as its rule, then so do we. */
2612 
2613           if (r->last->parent && !r->last->parent->rule)
2614                     r->last->rule = 0;
2615 
2616           /*
2617            * Determine scope.
2618            * If there is nothing on the line after the conditional,
2619            * not even whitespace, use next-line scope.
2620            * Except that .while does not support next-line scope.
2621            */
2622 
2623           if (buf->buf[pos] == '\0' && tok != ROFF_while) {
2624                     r->last->endspan = 2;
2625                     goto out;
2626           }
2627 
2628           while (buf->buf[pos] == ' ')
2629                     pos++;
2630 
2631           /* An opening brace requests multiline scope. */
2632 
2633           if (buf->buf[pos] == '\\' && buf->buf[pos + 1] == '{') {
2634                     r->last->endspan = -1;
2635                     pos += 2;
2636                     while (buf->buf[pos] == ' ')
2637                               pos++;
2638                     goto out;
2639           }
2640 
2641           /*
2642            * Anything else following the conditional causes
2643            * single-line scope.  Warn if the scope contains
2644            * nothing but trailing whitespace.
2645            */
2646 
2647           if (buf->buf[pos] == '\0')
2648                     mandoc_msg(MANDOCERR_COND_EMPTY,
2649                         ln, ppos, "%s", roff_name[tok]);
2650 
2651           r->last->endspan = 1;
2652 
2653 out:
2654           *offs = pos;
2655           irc = ROFF_RERUN;
2656           if (tok == ROFF_while)
2657                     irc |= ROFF_WHILE;
2658           return irc;
2659 }
2660 
2661 static int
roff_ds(ROFF_ARGS)2662 roff_ds(ROFF_ARGS)
2663 {
2664           char                *string;
2665           const char          *name;
2666           size_t               namesz;
2667 
2668           /* Ignore groff compatibility mode for now. */
2669 
2670           if (tok == ROFF_ds1)
2671                     tok = ROFF_ds;
2672           else if (tok == ROFF_as1)
2673                     tok = ROFF_as;
2674 
2675           /*
2676            * The first word is the name of the string.
2677            * If it is empty or terminated by an escape sequence,
2678            * abort the `ds' request without defining anything.
2679            */
2680 
2681           name = string = buf->buf + pos;
2682           if (*name == '\0')
2683                     return ROFF_IGN;
2684 
2685           namesz = roff_getname(r, &string, ln, pos);
2686           switch (name[namesz]) {
2687           case '\\':
2688                     return ROFF_IGN;
2689           case '\t':
2690                     string = buf->buf + pos + namesz;
2691                     break;
2692           default:
2693                     break;
2694           }
2695 
2696           /* Read past the initial double-quote, if any. */
2697           if (*string == '"')
2698                     string++;
2699 
2700           /* The rest is the value. */
2701           roff_setstrn(&r->strtab, name, namesz, string, strlen(string),
2702               ROFF_as == tok);
2703           roff_setstrn(&r->rentab, name, namesz, NULL, 0, 0);
2704           return ROFF_IGN;
2705 }
2706 
2707 /*
2708  * Parse a single operator, one or two characters long.
2709  * If the operator is recognized, return success and advance the
2710  * parse point, else return failure and let the parse point unchanged.
2711  */
2712 static int
roff_getop(const char * v,int * pos,char * res)2713 roff_getop(const char *v, int *pos, char *res)
2714 {
2715 
2716           *res = v[*pos];
2717 
2718           switch (*res) {
2719           case '+':
2720           case '-':
2721           case '*':
2722           case '/':
2723           case '%':
2724           case '&':
2725           case ':':
2726                     break;
2727           case '<':
2728                     switch (v[*pos + 1]) {
2729                     case '=':
2730                               *res = 'l';
2731                               (*pos)++;
2732                               break;
2733                     case '>':
2734                               *res = '!';
2735                               (*pos)++;
2736                               break;
2737                     case '?':
2738                               *res = 'i';
2739                               (*pos)++;
2740                               break;
2741                     default:
2742                               break;
2743                     }
2744                     break;
2745           case '>':
2746                     switch (v[*pos + 1]) {
2747                     case '=':
2748                               *res = 'g';
2749                               (*pos)++;
2750                               break;
2751                     case '?':
2752                               *res = 'a';
2753                               (*pos)++;
2754                               break;
2755                     default:
2756                               break;
2757                     }
2758                     break;
2759           case '=':
2760                     if ('=' == v[*pos + 1])
2761                               (*pos)++;
2762                     break;
2763           default:
2764                     return 0;
2765           }
2766           (*pos)++;
2767 
2768           return *res;
2769 }
2770 
2771 /*
2772  * Evaluate either a parenthesized numeric expression
2773  * or a single signed integer number.
2774  */
2775 static int
roff_evalpar(struct roff * r,int ln,const char * v,int * pos,int * res,int flags)2776 roff_evalpar(struct roff *r, int ln,
2777           const char *v, int *pos, int *res, int flags)
2778 {
2779 
2780           if ('(' != v[*pos])
2781                     return roff_getnum(v, pos, res, flags);
2782 
2783           (*pos)++;
2784           if ( ! roff_evalnum(r, ln, v, pos, res, flags | ROFFNUM_WHITE))
2785                     return 0;
2786 
2787           /*
2788            * Omission of the closing parenthesis
2789            * is an error in validation mode,
2790            * but ignored in evaluation mode.
2791            */
2792 
2793           if (')' == v[*pos])
2794                     (*pos)++;
2795           else if (NULL == res)
2796                     return 0;
2797 
2798           return 1;
2799 }
2800 
2801 /*
2802  * Evaluate a complete numeric expression.
2803  * Proceed left to right, there is no concept of precedence.
2804  */
2805 static int
roff_evalnum(struct roff * r,int ln,const char * v,int * pos,int * res,int flags)2806 roff_evalnum(struct roff *r, int ln, const char *v,
2807           int *pos, int *res, int flags)
2808 {
2809           int                  mypos, operand2;
2810           char                 operator;
2811 
2812           if (NULL == pos) {
2813                     mypos = 0;
2814                     pos = &mypos;
2815           }
2816 
2817           if (flags & ROFFNUM_WHITE)
2818                     while (isspace((unsigned char)v[*pos]))
2819                               (*pos)++;
2820 
2821           if ( ! roff_evalpar(r, ln, v, pos, res, flags))
2822                     return 0;
2823 
2824           while (1) {
2825                     if (flags & ROFFNUM_WHITE)
2826                               while (isspace((unsigned char)v[*pos]))
2827                                         (*pos)++;
2828 
2829                     if ( ! roff_getop(v, pos, &operator))
2830                               break;
2831 
2832                     if (flags & ROFFNUM_WHITE)
2833                               while (isspace((unsigned char)v[*pos]))
2834                                         (*pos)++;
2835 
2836                     if ( ! roff_evalpar(r, ln, v, pos, &operand2, flags))
2837                               return 0;
2838 
2839                     if (flags & ROFFNUM_WHITE)
2840                               while (isspace((unsigned char)v[*pos]))
2841                                         (*pos)++;
2842 
2843                     if (NULL == res)
2844                               continue;
2845 
2846                     switch (operator) {
2847                     case '+':
2848                               *res += operand2;
2849                               break;
2850                     case '-':
2851                               *res -= operand2;
2852                               break;
2853                     case '*':
2854                               *res *= operand2;
2855                               break;
2856                     case '/':
2857                               if (operand2 == 0) {
2858                                         mandoc_msg(MANDOCERR_DIVZERO,
2859                                                   ln, *pos, "%s", v);
2860                                         *res = 0;
2861                                         break;
2862                               }
2863                               *res /= operand2;
2864                               break;
2865                     case '%':
2866                               if (operand2 == 0) {
2867                                         mandoc_msg(MANDOCERR_DIVZERO,
2868                                                   ln, *pos, "%s", v);
2869                                         *res = 0;
2870                                         break;
2871                               }
2872                               *res %= operand2;
2873                               break;
2874                     case '<':
2875                               *res = *res < operand2;
2876                               break;
2877                     case '>':
2878                               *res = *res > operand2;
2879                               break;
2880                     case 'l':
2881                               *res = *res <= operand2;
2882                               break;
2883                     case 'g':
2884                               *res = *res >= operand2;
2885                               break;
2886                     case '=':
2887                               *res = *res == operand2;
2888                               break;
2889                     case '!':
2890                               *res = *res != operand2;
2891                               break;
2892                     case '&':
2893                               *res = *res && operand2;
2894                               break;
2895                     case ':':
2896                               *res = *res || operand2;
2897                               break;
2898                     case 'i':
2899                               if (operand2 < *res)
2900                                         *res = operand2;
2901                               break;
2902                     case 'a':
2903                               if (operand2 > *res)
2904                                         *res = operand2;
2905                               break;
2906                     default:
2907                               abort();
2908                     }
2909           }
2910           return 1;
2911 }
2912 
2913 /* --- register management ------------------------------------------------ */
2914 
2915 void
roff_setreg(struct roff * r,const char * name,int val,char sign)2916 roff_setreg(struct roff *r, const char *name, int val, char sign)
2917 {
2918           roff_setregn(r, name, strlen(name), val, sign, INT_MIN);
2919 }
2920 
2921 static void
roff_setregn(struct roff * r,const char * name,size_t len,int val,char sign,int step)2922 roff_setregn(struct roff *r, const char *name, size_t len,
2923     int val, char sign, int step)
2924 {
2925           struct roffreg      *reg;
2926 
2927           /* Search for an existing register with the same name. */
2928           reg = r->regtab;
2929 
2930           while (reg != NULL && (reg->key.sz != len ||
2931               strncmp(reg->key.p, name, len) != 0))
2932                     reg = reg->next;
2933 
2934           if (NULL == reg) {
2935                     /* Create a new register. */
2936                     reg = mandoc_malloc(sizeof(struct roffreg));
2937                     reg->key.p = mandoc_strndup(name, len);
2938                     reg->key.sz = len;
2939                     reg->val = 0;
2940                     reg->step = 0;
2941                     reg->next = r->regtab;
2942                     r->regtab = reg;
2943           }
2944 
2945           if ('+' == sign)
2946                     reg->val += val;
2947           else if ('-' == sign)
2948                     reg->val -= val;
2949           else
2950                     reg->val = val;
2951           if (step != INT_MIN)
2952                     reg->step = step;
2953 }
2954 
2955 /*
2956  * Handle some predefined read-only number registers.
2957  * For now, return -1 if the requested register is not predefined;
2958  * in case a predefined read-only register having the value -1
2959  * were to turn up, another special value would have to be chosen.
2960  */
2961 static int
roff_getregro(const struct roff * r,const char * name)2962 roff_getregro(const struct roff *r, const char *name)
2963 {
2964 
2965           switch (*name) {
2966           case '$':  /* Number of arguments of the last macro evaluated. */
2967                     return r->mstackpos < 0 ? 0 : r->mstack[r->mstackpos].argc;
2968           case 'A':  /* ASCII approximation mode is always off. */
2969                     return 0;
2970           case 'g':  /* Groff compatibility mode is always on. */
2971                     return 1;
2972           case 'H':  /* Fixed horizontal resolution. */
2973                     return 24;
2974           case 'j':  /* Always adjust left margin only. */
2975                     return 0;
2976           case 'T':  /* Some output device is always defined. */
2977                     return 1;
2978           case 'V':  /* Fixed vertical resolution. */
2979                     return 40;
2980           default:
2981                     return -1;
2982           }
2983 }
2984 
2985 int
roff_getreg(struct roff * r,const char * name)2986 roff_getreg(struct roff *r, const char *name)
2987 {
2988           return roff_getregn(r, name, strlen(name), '\0');
2989 }
2990 
2991 static int
roff_getregn(struct roff * r,const char * name,size_t len,char sign)2992 roff_getregn(struct roff *r, const char *name, size_t len, char sign)
2993 {
2994           struct roffreg      *reg;
2995           int                  val;
2996 
2997           if ('.' == name[0] && 2 == len) {
2998                     val = roff_getregro(r, name + 1);
2999                     if (-1 != val)
3000                               return val;
3001           }
3002 
3003           for (reg = r->regtab; reg; reg = reg->next) {
3004                     if (len == reg->key.sz &&
3005                         0 == strncmp(name, reg->key.p, len)) {
3006                               switch (sign) {
3007                               case '+':
3008                                         reg->val += reg->step;
3009                                         break;
3010                               case '-':
3011                                         reg->val -= reg->step;
3012                                         break;
3013                               default:
3014                                         break;
3015                               }
3016                               return reg->val;
3017                     }
3018           }
3019 
3020           roff_setregn(r, name, len, 0, '\0', INT_MIN);
3021           return 0;
3022 }
3023 
3024 static int
roff_hasregn(const struct roff * r,const char * name,size_t len)3025 roff_hasregn(const struct roff *r, const char *name, size_t len)
3026 {
3027           struct roffreg      *reg;
3028           int                  val;
3029 
3030           if ('.' == name[0] && 2 == len) {
3031                     val = roff_getregro(r, name + 1);
3032                     if (-1 != val)
3033                               return 1;
3034           }
3035 
3036           for (reg = r->regtab; reg; reg = reg->next)
3037                     if (len == reg->key.sz &&
3038                         0 == strncmp(name, reg->key.p, len))
3039                               return 1;
3040 
3041           return 0;
3042 }
3043 
3044 static void
roff_freereg(struct roffreg * reg)3045 roff_freereg(struct roffreg *reg)
3046 {
3047           struct roffreg      *old_reg;
3048 
3049           while (NULL != reg) {
3050                     free(reg->key.p);
3051                     old_reg = reg;
3052                     reg = reg->next;
3053                     free(old_reg);
3054           }
3055 }
3056 
3057 static int
roff_nr(ROFF_ARGS)3058 roff_nr(ROFF_ARGS)
3059 {
3060           char                *key, *val, *step;
3061           size_t               keysz;
3062           int                  iv, is, len;
3063           char                 sign;
3064 
3065           key = val = buf->buf + pos;
3066           if (*key == '\0')
3067                     return ROFF_IGN;
3068 
3069           keysz = roff_getname(r, &val, ln, pos);
3070           if (key[keysz] == '\\' || key[keysz] == '\t')
3071                     return ROFF_IGN;
3072 
3073           sign = *val;
3074           if (sign == '+' || sign == '-')
3075                     val++;
3076 
3077           len = 0;
3078           if (roff_evalnum(r, ln, val, &len, &iv, ROFFNUM_SCALE) == 0)
3079                     return ROFF_IGN;
3080 
3081           step = val + len;
3082           while (isspace((unsigned char)*step))
3083                     step++;
3084           if (roff_evalnum(r, ln, step, NULL, &is, 0) == 0)
3085                     is = INT_MIN;
3086 
3087           roff_setregn(r, key, keysz, iv, sign, is);
3088           return ROFF_IGN;
3089 }
3090 
3091 static int
roff_rr(ROFF_ARGS)3092 roff_rr(ROFF_ARGS)
3093 {
3094           struct roffreg      *reg, **prev;
3095           char                *name, *cp;
3096           size_t               namesz;
3097 
3098           name = cp = buf->buf + pos;
3099           if (*name == '\0')
3100                     return ROFF_IGN;
3101           namesz = roff_getname(r, &cp, ln, pos);
3102           name[namesz] = '\0';
3103 
3104           prev = &r->regtab;
3105           while (1) {
3106                     reg = *prev;
3107                     if (reg == NULL || !strcmp(name, reg->key.p))
3108                               break;
3109                     prev = &reg->next;
3110           }
3111           if (reg != NULL) {
3112                     *prev = reg->next;
3113                     free(reg->key.p);
3114                     free(reg);
3115           }
3116           return ROFF_IGN;
3117 }
3118 
3119 /* --- handler functions for roff requests -------------------------------- */
3120 
3121 static int
roff_rm(ROFF_ARGS)3122 roff_rm(ROFF_ARGS)
3123 {
3124           const char           *name;
3125           char                 *cp;
3126           size_t                namesz;
3127 
3128           cp = buf->buf + pos;
3129           while (*cp != '\0') {
3130                     name = cp;
3131                     namesz = roff_getname(r, &cp, ln, (int)(cp - buf->buf));
3132                     roff_setstrn(&r->strtab, name, namesz, NULL, 0, 0);
3133                     roff_setstrn(&r->rentab, name, namesz, NULL, 0, 0);
3134                     if (name[namesz] == '\\' || name[namesz] == '\t')
3135                               break;
3136           }
3137           return ROFF_IGN;
3138 }
3139 
3140 static int
roff_it(ROFF_ARGS)3141 roff_it(ROFF_ARGS)
3142 {
3143           int                  iv;
3144 
3145           /* Parse the number of lines. */
3146 
3147           if ( ! roff_evalnum(r, ln, buf->buf, &pos, &iv, 0)) {
3148                     mandoc_msg(MANDOCERR_IT_NONUM,
3149                         ln, ppos, "%s", buf->buf + 1);
3150                     return ROFF_IGN;
3151           }
3152 
3153           while (isspace((unsigned char)buf->buf[pos]))
3154                     pos++;
3155 
3156           /*
3157            * Arm the input line trap.
3158            * Special-casing "an-trap" is an ugly workaround to cope
3159            * with DocBook stupidly fiddling with man(7) internals.
3160            */
3161 
3162           roffit_lines = iv;
3163           roffit_macro = mandoc_strdup(iv != 1 ||
3164               strcmp(buf->buf + pos, "an-trap") ?
3165               buf->buf + pos : "br");
3166           return ROFF_IGN;
3167 }
3168 
3169 static int
roff_Dd(ROFF_ARGS)3170 roff_Dd(ROFF_ARGS)
3171 {
3172           int                  mask;
3173           enum roff_tok        t, te;
3174 
3175           switch (tok) {
3176           case ROFF_Dd:
3177                     tok = MDOC_Dd;
3178                     te = MDOC_MAX;
3179                     if (r->format == 0)
3180                               r->format = MPARSE_MDOC;
3181                     mask = MPARSE_MDOC | MPARSE_QUICK;
3182                     break;
3183           case ROFF_TH:
3184                     tok = MAN_TH;
3185                     te = MAN_MAX;
3186                     if (r->format == 0)
3187                               r->format = MPARSE_MAN;
3188                     mask = MPARSE_QUICK;
3189                     break;
3190           default:
3191                     abort();
3192           }
3193           if ((r->options & mask) == 0)
3194                     for (t = tok; t < te; t++)
3195                               roff_setstr(r, roff_name[t], NULL, 0);
3196           return ROFF_CONT;
3197 }
3198 
3199 static int
roff_TE(ROFF_ARGS)3200 roff_TE(ROFF_ARGS)
3201 {
3202           r->man->flags &= ~ROFF_NONOFILL;
3203           if (r->tbl == NULL) {
3204                     mandoc_msg(MANDOCERR_BLK_NOTOPEN, ln, ppos, "TE");
3205                     return ROFF_IGN;
3206           }
3207           if (tbl_end(r->tbl, 0) == 0) {
3208                     r->tbl = NULL;
3209                     free(buf->buf);
3210                     buf->buf = mandoc_strdup(".sp");
3211                     buf->sz = 4;
3212                     *offs = 0;
3213                     return ROFF_REPARSE;
3214           }
3215           r->tbl = NULL;
3216           return ROFF_IGN;
3217 }
3218 
3219 static int
roff_T_(ROFF_ARGS)3220 roff_T_(ROFF_ARGS)
3221 {
3222 
3223           if (NULL == r->tbl)
3224                     mandoc_msg(MANDOCERR_BLK_NOTOPEN, ln, ppos, "T&");
3225           else
3226                     tbl_restart(ln, ppos, r->tbl);
3227 
3228           return ROFF_IGN;
3229 }
3230 
3231 /*
3232  * Handle in-line equation delimiters.
3233  */
3234 static int
roff_eqndelim(struct roff * r,struct buf * buf,int pos)3235 roff_eqndelim(struct roff *r, struct buf *buf, int pos)
3236 {
3237           char                *cp1, *cp2;
3238           const char          *bef_pr, *bef_nl, *mac, *aft_nl, *aft_pr;
3239 
3240           /*
3241            * Outside equations, look for an opening delimiter.
3242            * If we are inside an equation, we already know it is
3243            * in-line, or this function wouldn't have been called;
3244            * so look for a closing delimiter.
3245            */
3246 
3247           cp1 = buf->buf + pos;
3248           cp2 = strchr(cp1, r->eqn == NULL ?
3249               r->last_eqn->odelim : r->last_eqn->cdelim);
3250           if (cp2 == NULL)
3251                     return ROFF_CONT;
3252 
3253           *cp2++ = '\0';
3254           bef_pr = bef_nl = aft_nl = aft_pr = "";
3255 
3256           /* Handle preceding text, protecting whitespace. */
3257 
3258           if (*buf->buf != '\0') {
3259                     if (r->eqn == NULL)
3260                               bef_pr = "\\&";
3261                     bef_nl = "\n";
3262           }
3263 
3264           /*
3265            * Prepare replacing the delimiter with an equation macro
3266            * and drop leading white space from the equation.
3267            */
3268 
3269           if (r->eqn == NULL) {
3270                     while (*cp2 == ' ')
3271                               cp2++;
3272                     mac = ".EQ";
3273           } else
3274                     mac = ".EN";
3275 
3276           /* Handle following text, protecting whitespace. */
3277 
3278           if (*cp2 != '\0') {
3279                     aft_nl = "\n";
3280                     if (r->eqn != NULL)
3281                               aft_pr = "\\&";
3282           }
3283 
3284           /* Do the actual replacement. */
3285 
3286           buf->sz = mandoc_asprintf(&cp1, "%s%s%s%s%s%s%s", buf->buf,
3287               bef_pr, bef_nl, mac, aft_nl, aft_pr, cp2) + 1;
3288           free(buf->buf);
3289           buf->buf = cp1;
3290 
3291           /* Toggle the in-line state of the eqn subsystem. */
3292 
3293           r->eqn_inline = r->eqn == NULL;
3294           return ROFF_REPARSE;
3295 }
3296 
3297 static int
roff_EQ(ROFF_ARGS)3298 roff_EQ(ROFF_ARGS)
3299 {
3300           struct roff_node    *n;
3301 
3302           if (r->man->meta.macroset == MACROSET_MAN)
3303                     man_breakscope(r->man, ROFF_EQ);
3304           n = roff_node_alloc(r->man, ln, ppos, ROFFT_EQN, TOKEN_NONE);
3305           if (ln > r->man->last->line)
3306                     n->flags |= NODE_LINE;
3307           n->eqn = eqn_box_new();
3308           roff_node_append(r->man, n);
3309           r->man->next = ROFF_NEXT_SIBLING;
3310 
3311           assert(r->eqn == NULL);
3312           if (r->last_eqn == NULL)
3313                     r->last_eqn = eqn_alloc();
3314           else
3315                     eqn_reset(r->last_eqn);
3316           r->eqn = r->last_eqn;
3317           r->eqn->node = n;
3318 
3319           if (buf->buf[pos] != '\0')
3320                     mandoc_msg(MANDOCERR_ARG_SKIP, ln, pos,
3321                         ".EQ %s", buf->buf + pos);
3322 
3323           return ROFF_IGN;
3324 }
3325 
3326 static int
roff_EN(ROFF_ARGS)3327 roff_EN(ROFF_ARGS)
3328 {
3329           if (r->eqn != NULL) {
3330                     eqn_parse(r->eqn);
3331                     r->eqn = NULL;
3332           } else
3333                     mandoc_msg(MANDOCERR_BLK_NOTOPEN, ln, ppos, "EN");
3334           if (buf->buf[pos] != '\0')
3335                     mandoc_msg(MANDOCERR_ARG_SKIP, ln, pos,
3336                         "EN %s", buf->buf + pos);
3337           return ROFF_IGN;
3338 }
3339 
3340 static int
roff_TS(ROFF_ARGS)3341 roff_TS(ROFF_ARGS)
3342 {
3343           if (r->tbl != NULL) {
3344                     mandoc_msg(MANDOCERR_BLK_BROKEN, ln, ppos, "TS breaks TS");
3345                     tbl_end(r->tbl, 0);
3346           }
3347           r->man->flags |= ROFF_NONOFILL;
3348           r->tbl = tbl_alloc(ppos, ln, r->last_tbl);
3349           if (r->last_tbl == NULL)
3350                     r->first_tbl = r->tbl;
3351           r->last_tbl = r->tbl;
3352           return ROFF_IGN;
3353 }
3354 
3355 static int
roff_noarg(ROFF_ARGS)3356 roff_noarg(ROFF_ARGS)
3357 {
3358           if (r->man->flags & (MAN_BLINE | MAN_ELINE))
3359                     man_breakscope(r->man, tok);
3360           if (tok == ROFF_brp)
3361                     tok = ROFF_br;
3362           roff_elem_alloc(r->man, ln, ppos, tok);
3363           if (buf->buf[pos] != '\0')
3364                     mandoc_msg(MANDOCERR_ARG_SKIP, ln, pos,
3365                        "%s %s", roff_name[tok], buf->buf + pos);
3366           if (tok == ROFF_nf)
3367                     r->man->flags |= ROFF_NOFILL;
3368           else if (tok == ROFF_fi)
3369                     r->man->flags &= ~ROFF_NOFILL;
3370           r->man->last->flags |= NODE_LINE | NODE_VALID | NODE_ENDED;
3371           r->man->next = ROFF_NEXT_SIBLING;
3372           return ROFF_IGN;
3373 }
3374 
3375 static int
roff_onearg(ROFF_ARGS)3376 roff_onearg(ROFF_ARGS)
3377 {
3378           struct roff_node    *n;
3379           char                          *cp;
3380           int                            npos;
3381 
3382           if (r->man->flags & (MAN_BLINE | MAN_ELINE) &&
3383               (tok == ROFF_ce || tok == ROFF_rj || tok == ROFF_sp ||
3384                tok == ROFF_ti))
3385                     man_breakscope(r->man, tok);
3386 
3387           if (roffce_node != NULL && (tok == ROFF_ce || tok == ROFF_rj)) {
3388                     r->man->last = roffce_node;
3389                     r->man->next = ROFF_NEXT_SIBLING;
3390           }
3391 
3392           roff_elem_alloc(r->man, ln, ppos, tok);
3393           n = r->man->last;
3394 
3395           cp = buf->buf + pos;
3396           if (*cp != '\0') {
3397                     while (*cp != '\0' && *cp != ' ')
3398                               cp++;
3399                     while (*cp == ' ')
3400                               *cp++ = '\0';
3401                     if (*cp != '\0')
3402                               mandoc_msg(MANDOCERR_ARG_EXCESS,
3403                                   ln, (int)(cp - buf->buf),
3404                                   "%s ... %s", roff_name[tok], cp);
3405                     roff_word_alloc(r->man, ln, pos, buf->buf + pos);
3406           }
3407 
3408           if (tok == ROFF_ce || tok == ROFF_rj) {
3409                     if (r->man->last->type == ROFFT_ELEM) {
3410                               roff_word_alloc(r->man, ln, pos, "1");
3411                               r->man->last->flags |= NODE_NOSRC;
3412                     }
3413                     npos = 0;
3414                     if (roff_evalnum(r, ln, r->man->last->string, &npos,
3415                         &roffce_lines, 0) == 0) {
3416                               mandoc_msg(MANDOCERR_CE_NONUM,
3417                                   ln, pos, "ce %s", buf->buf + pos);
3418                               roffce_lines = 1;
3419                     }
3420                     if (roffce_lines < 1) {
3421                               r->man->last = r->man->last->parent;
3422                               roffce_node = NULL;
3423                               roffce_lines = 0;
3424                     } else
3425                               roffce_node = r->man->last->parent;
3426           } else {
3427                     n->flags |= NODE_VALID | NODE_ENDED;
3428                     r->man->last = n;
3429           }
3430           n->flags |= NODE_LINE;
3431           r->man->next = ROFF_NEXT_SIBLING;
3432           return ROFF_IGN;
3433 }
3434 
3435 static int
roff_manyarg(ROFF_ARGS)3436 roff_manyarg(ROFF_ARGS)
3437 {
3438           struct roff_node    *n;
3439           char                          *sp, *ep;
3440 
3441           roff_elem_alloc(r->man, ln, ppos, tok);
3442           n = r->man->last;
3443 
3444           for (sp = ep = buf->buf + pos; *sp != '\0'; sp = ep) {
3445                     while (*ep != '\0' && *ep != ' ')
3446                               ep++;
3447                     while (*ep == ' ')
3448                               *ep++ = '\0';
3449                     roff_word_alloc(r->man, ln, sp - buf->buf, sp);
3450           }
3451 
3452           n->flags |= NODE_LINE | NODE_VALID | NODE_ENDED;
3453           r->man->last = n;
3454           r->man->next = ROFF_NEXT_SIBLING;
3455           return ROFF_IGN;
3456 }
3457 
3458 static int
roff_als(ROFF_ARGS)3459 roff_als(ROFF_ARGS)
3460 {
3461           char                *oldn, *newn, *end, *value;
3462           size_t               oldsz, newsz, valsz;
3463 
3464           newn = oldn = buf->buf + pos;
3465           if (*newn == '\0')
3466                     return ROFF_IGN;
3467 
3468           newsz = roff_getname(r, &oldn, ln, pos);
3469           if (newn[newsz] == '\\' || newn[newsz] == '\t' || *oldn == '\0')
3470                     return ROFF_IGN;
3471 
3472           end = oldn;
3473           oldsz = roff_getname(r, &end, ln, oldn - buf->buf);
3474           if (oldsz == 0)
3475                     return ROFF_IGN;
3476 
3477           valsz = mandoc_asprintf(&value, ".%.*s \\$@\\\"\n",
3478               (int)oldsz, oldn);
3479           roff_setstrn(&r->strtab, newn, newsz, value, valsz, 0);
3480           roff_setstrn(&r->rentab, newn, newsz, NULL, 0, 0);
3481           free(value);
3482           return ROFF_IGN;
3483 }
3484 
3485 static int
roff_cc(ROFF_ARGS)3486 roff_cc(ROFF_ARGS)
3487 {
3488           const char          *p;
3489 
3490           p = buf->buf + pos;
3491 
3492           if (*p == '\0' || (r->control = *p++) == '.')
3493                     r->control = '\0';
3494 
3495           if (*p != '\0')
3496                     mandoc_msg(MANDOCERR_ARG_EXCESS,
3497                         ln, p - buf->buf, "cc ... %s", p);
3498 
3499           return ROFF_IGN;
3500 }
3501 
3502 static int
roff_char(ROFF_ARGS)3503 roff_char(ROFF_ARGS)
3504 {
3505           const char          *p, *kp, *vp;
3506           size_t               ksz, vsz;
3507           int                  font;
3508 
3509           /* Parse the character to be replaced. */
3510 
3511           kp = buf->buf + pos;
3512           p = kp + 1;
3513           if (*kp == '\0' || (*kp == '\\' &&
3514                mandoc_escape(&p, NULL, NULL) != ESCAPE_SPECIAL) ||
3515               (*p != ' ' && *p != '\0')) {
3516                     mandoc_msg(MANDOCERR_CHAR_ARG, ln, pos, "char %s", kp);
3517                     return ROFF_IGN;
3518           }
3519           ksz = p - kp;
3520           while (*p == ' ')
3521                     p++;
3522 
3523           /*
3524            * If the replacement string contains a font escape sequence,
3525            * we have to restore the font at the end.
3526            */
3527 
3528           vp = p;
3529           vsz = strlen(p);
3530           font = 0;
3531           while (*p != '\0') {
3532                     if (*p++ != '\\')
3533                               continue;
3534                     switch (mandoc_escape(&p, NULL, NULL)) {
3535                     case ESCAPE_FONT:
3536                     case ESCAPE_FONTROMAN:
3537                     case ESCAPE_FONTITALIC:
3538                     case ESCAPE_FONTBOLD:
3539                     case ESCAPE_FONTBI:
3540                     case ESCAPE_FONTCW:
3541                     case ESCAPE_FONTPREV:
3542                               font++;
3543                               break;
3544                     default:
3545                               break;
3546                     }
3547           }
3548           if (font > 1)
3549                     mandoc_msg(MANDOCERR_CHAR_FONT,
3550                         ln, (int)(vp - buf->buf), "%s", vp);
3551 
3552           /*
3553            * Approximate the effect of .char using the .tr tables.
3554            * XXX In groff, .char and .tr interact differently.
3555            */
3556 
3557           if (ksz == 1) {
3558                     if (r->xtab == NULL)
3559                               r->xtab = mandoc_calloc(128, sizeof(*r->xtab));
3560                     assert((unsigned int)*kp < 128);
3561                     free(r->xtab[(int)*kp].p);
3562                     r->xtab[(int)*kp].sz = mandoc_asprintf(&r->xtab[(int)*kp].p,
3563                         "%s%s", vp, font ? "\fP" : "");
3564           } else {
3565                     roff_setstrn(&r->xmbtab, kp, ksz, vp, vsz, 0);
3566                     if (font)
3567                               roff_setstrn(&r->xmbtab, kp, ksz, "\\fP", 3, 1);
3568           }
3569           return ROFF_IGN;
3570 }
3571 
3572 static int
roff_ec(ROFF_ARGS)3573 roff_ec(ROFF_ARGS)
3574 {
3575           const char          *p;
3576 
3577           p = buf->buf + pos;
3578           if (*p == '\0')
3579                     r->escape = '\\';
3580           else {
3581                     r->escape = *p;
3582                     if (*++p != '\0')
3583                               mandoc_msg(MANDOCERR_ARG_EXCESS, ln,
3584                                   (int)(p - buf->buf), "ec ... %s", p);
3585           }
3586           return ROFF_IGN;
3587 }
3588 
3589 static int
roff_eo(ROFF_ARGS)3590 roff_eo(ROFF_ARGS)
3591 {
3592           r->escape = '\0';
3593           if (buf->buf[pos] != '\0')
3594                     mandoc_msg(MANDOCERR_ARG_SKIP,
3595                         ln, pos, "eo %s", buf->buf + pos);
3596           return ROFF_IGN;
3597 }
3598 
3599 static int
roff_nop(ROFF_ARGS)3600 roff_nop(ROFF_ARGS)
3601 {
3602           while (buf->buf[pos] == ' ')
3603                     pos++;
3604           *offs = pos;
3605           return ROFF_RERUN;
3606 }
3607 
3608 static int
roff_tr(ROFF_ARGS)3609 roff_tr(ROFF_ARGS)
3610 {
3611           const char          *p, *first, *second;
3612           size_t               fsz, ssz;
3613           enum mandoc_esc      esc;
3614 
3615           p = buf->buf + pos;
3616 
3617           if (*p == '\0') {
3618                     mandoc_msg(MANDOCERR_REQ_EMPTY, ln, ppos, "tr");
3619                     return ROFF_IGN;
3620           }
3621 
3622           while (*p != '\0') {
3623                     fsz = ssz = 1;
3624 
3625                     first = p++;
3626                     if (*first == '\\') {
3627                               esc = mandoc_escape(&p, NULL, NULL);
3628                               if (esc == ESCAPE_ERROR) {
3629                                         mandoc_msg(MANDOCERR_ESC_BAD, ln,
3630                                             (int)(p - buf->buf), "%s", first);
3631                                         return ROFF_IGN;
3632                               }
3633                               fsz = (size_t)(p - first);
3634                     }
3635 
3636                     second = p++;
3637                     if (*second == '\\') {
3638                               esc = mandoc_escape(&p, NULL, NULL);
3639                               if (esc == ESCAPE_ERROR) {
3640                                         mandoc_msg(MANDOCERR_ESC_BAD, ln,
3641                                             (int)(p - buf->buf), "%s", second);
3642                                         return ROFF_IGN;
3643                               }
3644                               ssz = (size_t)(p - second);
3645                     } else if (*second == '\0') {
3646                               mandoc_msg(MANDOCERR_TR_ODD, ln,
3647                                   (int)(first - buf->buf), "tr %s", first);
3648                               second = " ";
3649                               p--;
3650                     }
3651 
3652                     if (fsz > 1) {
3653                               roff_setstrn(&r->xmbtab, first, fsz,
3654                                   second, ssz, 0);
3655                               continue;
3656                     }
3657 
3658                     if (r->xtab == NULL)
3659                               r->xtab = mandoc_calloc(128,
3660                                   sizeof(struct roffstr));
3661 
3662                     free(r->xtab[(int)*first].p);
3663                     r->xtab[(int)*first].p = mandoc_strndup(second, ssz);
3664                     r->xtab[(int)*first].sz = ssz;
3665           }
3666 
3667           return ROFF_IGN;
3668 }
3669 
3670 /*
3671  * Implementation of the .return request.
3672  * There is no need to call roff_userret() from here.
3673  * The read module will call that after rewinding the reader stack
3674  * to the place from where the current macro was called.
3675  */
3676 static int
roff_return(ROFF_ARGS)3677 roff_return(ROFF_ARGS)
3678 {
3679           if (r->mstackpos >= 0)
3680                     return ROFF_IGN | ROFF_USERRET;
3681 
3682           mandoc_msg(MANDOCERR_REQ_NOMAC, ln, ppos, "return");
3683           return ROFF_IGN;
3684 }
3685 
3686 static int
roff_rn(ROFF_ARGS)3687 roff_rn(ROFF_ARGS)
3688 {
3689           const char          *value;
3690           char                *oldn, *newn, *end;
3691           size_t               oldsz, newsz;
3692           int                  deftype;
3693 
3694           oldn = newn = buf->buf + pos;
3695           if (*oldn == '\0')
3696                     return ROFF_IGN;
3697 
3698           oldsz = roff_getname(r, &newn, ln, pos);
3699           if (oldn[oldsz] == '\\' || oldn[oldsz] == '\t' || *newn == '\0')
3700                     return ROFF_IGN;
3701 
3702           end = newn;
3703           newsz = roff_getname(r, &end, ln, newn - buf->buf);
3704           if (newsz == 0)
3705                     return ROFF_IGN;
3706 
3707           deftype = ROFFDEF_ANY;
3708           value = roff_getstrn(r, oldn, oldsz, &deftype);
3709           switch (deftype) {
3710           case ROFFDEF_USER:
3711                     roff_setstrn(&r->strtab, newn, newsz, value, strlen(value), 0);
3712                     roff_setstrn(&r->strtab, oldn, oldsz, NULL, 0, 0);
3713                     roff_setstrn(&r->rentab, newn, newsz, NULL, 0, 0);
3714                     break;
3715           case ROFFDEF_PRE:
3716                     roff_setstrn(&r->strtab, newn, newsz, value, strlen(value), 0);
3717                     roff_setstrn(&r->rentab, newn, newsz, NULL, 0, 0);
3718                     break;
3719           case ROFFDEF_REN:
3720                     roff_setstrn(&r->rentab, newn, newsz, value, strlen(value), 0);
3721                     roff_setstrn(&r->rentab, oldn, oldsz, NULL, 0, 0);
3722                     roff_setstrn(&r->strtab, newn, newsz, NULL, 0, 0);
3723                     break;
3724           case ROFFDEF_STD:
3725                     roff_setstrn(&r->rentab, newn, newsz, oldn, oldsz, 0);
3726                     roff_setstrn(&r->strtab, newn, newsz, NULL, 0, 0);
3727                     break;
3728           default:
3729                     roff_setstrn(&r->strtab, newn, newsz, NULL, 0, 0);
3730                     roff_setstrn(&r->rentab, newn, newsz, NULL, 0, 0);
3731                     break;
3732           }
3733           return ROFF_IGN;
3734 }
3735 
3736 static int
roff_shift(ROFF_ARGS)3737 roff_shift(ROFF_ARGS)
3738 {
3739           struct mctx         *ctx;
3740           int                  levels, i;
3741 
3742           levels = 1;
3743           if (buf->buf[pos] != '\0' &&
3744               roff_evalnum(r, ln, buf->buf, &pos, &levels, 0) == 0) {
3745                     mandoc_msg(MANDOCERR_CE_NONUM,
3746                         ln, pos, "shift %s", buf->buf + pos);
3747                     levels = 1;
3748           }
3749           if (r->mstackpos < 0) {
3750                     mandoc_msg(MANDOCERR_REQ_NOMAC, ln, ppos, "shift");
3751                     return ROFF_IGN;
3752           }
3753           ctx = r->mstack + r->mstackpos;
3754           if (levels > ctx->argc) {
3755                     mandoc_msg(MANDOCERR_SHIFT,
3756                         ln, pos, "%d, but max is %d", levels, ctx->argc);
3757                     levels = ctx->argc;
3758           }
3759           if (levels == 0)
3760                     return ROFF_IGN;
3761           for (i = 0; i < levels; i++)
3762                     free(ctx->argv[i]);
3763           ctx->argc -= levels;
3764           for (i = 0; i < ctx->argc; i++)
3765                     ctx->argv[i] = ctx->argv[i + levels];
3766           return ROFF_IGN;
3767 }
3768 
3769 static int
roff_so(ROFF_ARGS)3770 roff_so(ROFF_ARGS)
3771 {
3772           char *name, *cp;
3773 
3774           name = buf->buf + pos;
3775           mandoc_msg(MANDOCERR_SO, ln, ppos, "so %s", name);
3776 
3777           /*
3778            * Handle `so'.  Be EXTREMELY careful, as we shouldn't be
3779            * opening anything that's not in our cwd or anything beneath
3780            * it.  Thus, explicitly disallow traversing up the file-system
3781            * or using absolute paths.
3782            */
3783 
3784           if (*name == '/' || strstr(name, "../") || strstr(name, "/..")) {
3785                     mandoc_msg(MANDOCERR_SO_PATH, ln, ppos, ".so %s", name);
3786                     buf->sz = mandoc_asprintf(&cp,
3787                         ".sp\nSee the file %s.\n.sp", name) + 1;
3788                     free(buf->buf);
3789                     buf->buf = cp;
3790                     *offs = 0;
3791                     return ROFF_REPARSE;
3792           }
3793 
3794           *offs = pos;
3795           return ROFF_SO;
3796 }
3797 
3798 /* --- user defined strings and macros ------------------------------------ */
3799 
3800 static int
roff_userdef(ROFF_ARGS)3801 roff_userdef(ROFF_ARGS)
3802 {
3803           struct mctx          *ctx;
3804           char                 *arg, *ap, *dst, *src;
3805           size_t                sz;
3806 
3807           /* Initialize a new macro stack context. */
3808 
3809           if (++r->mstackpos == r->mstacksz) {
3810                     r->mstack = mandoc_recallocarray(r->mstack,
3811                         r->mstacksz, r->mstacksz + 8, sizeof(*r->mstack));
3812                     r->mstacksz += 8;
3813           }
3814           ctx = r->mstack + r->mstackpos;
3815           ctx->argsz = 0;
3816           ctx->argc = 0;
3817           ctx->argv = NULL;
3818 
3819           /*
3820            * Collect pointers to macro argument strings,
3821            * NUL-terminating them and escaping quotes.
3822            */
3823 
3824           src = buf->buf + pos;
3825           while (*src != '\0') {
3826                     if (ctx->argc == ctx->argsz) {
3827                               ctx->argsz += 8;
3828                               ctx->argv = mandoc_reallocarray(ctx->argv,
3829                                   ctx->argsz, sizeof(*ctx->argv));
3830                     }
3831                     arg = roff_getarg(r, &src, ln, &pos);
3832                     sz = 1;  /* For the terminating NUL. */
3833                     for (ap = arg; *ap != '\0'; ap++)
3834                               sz += *ap == '"' ? 4 : 1;
3835                     ctx->argv[ctx->argc++] = dst = mandoc_malloc(sz);
3836                     for (ap = arg; *ap != '\0'; ap++) {
3837                               if (*ap == '"') {
3838                                         memcpy(dst, "\\(dq", 4);
3839                                         dst += 4;
3840                               } else
3841                                         *dst++ = *ap;
3842                     }
3843                     *dst = '\0';
3844                     free(arg);
3845           }
3846 
3847           /* Replace the macro invocation by the macro definition. */
3848 
3849           free(buf->buf);
3850           buf->buf = mandoc_strdup(r->current_string);
3851           buf->sz = strlen(buf->buf) + 1;
3852           *offs = 0;
3853 
3854           return buf->sz > 1 && buf->buf[buf->sz - 2] == '\n' ?
3855               ROFF_REPARSE | ROFF_USERCALL : ROFF_IGN | ROFF_APPEND;
3856 }
3857 
3858 /*
3859  * Calling a high-level macro that was renamed with .rn.
3860  * r->current_string has already been set up by roff_parse().
3861  */
3862 static int
roff_renamed(ROFF_ARGS)3863 roff_renamed(ROFF_ARGS)
3864 {
3865           char      *nbuf;
3866 
3867           buf->sz = mandoc_asprintf(&nbuf, ".%s%s%s", r->current_string,
3868               buf->buf[pos] == '\0' ? "" : " ", buf->buf + pos) + 1;
3869           free(buf->buf);
3870           buf->buf = nbuf;
3871           *offs = 0;
3872           return ROFF_CONT;
3873 }
3874 
3875 /*
3876  * Measure the length in bytes of the roff identifier at *cpp
3877  * and advance the pointer to the next word.
3878  */
3879 static size_t
roff_getname(struct roff * r,char ** cpp,int ln,int pos)3880 roff_getname(struct roff *r, char **cpp, int ln, int pos)
3881 {
3882           char       *name, *cp;
3883           size_t      namesz;
3884 
3885           name = *cpp;
3886           if (*name == '\0')
3887                     return 0;
3888 
3889           /* Advance cp to the byte after the end of the name. */
3890 
3891           for (cp = name; 1; cp++) {
3892                     namesz = cp - name;
3893                     if (*cp == '\0')
3894                               break;
3895                     if (*cp == ' ' || *cp == '\t') {
3896                               cp++;
3897                               break;
3898                     }
3899                     if (*cp != '\\')
3900                               continue;
3901                     if (cp[1] == '{' || cp[1] == '}')
3902                               break;
3903                     if (*++cp == '\\')
3904                               continue;
3905                     mandoc_msg(MANDOCERR_NAMESC, ln, pos,
3906                         "%.*s", (int)(cp - name + 1), name);
3907                     mandoc_escape((const char **)(void *)&cp, NULL, NULL);
3908                     break;
3909           }
3910 
3911           /* Read past spaces. */
3912 
3913           while (*cp == ' ')
3914                     cp++;
3915 
3916           *cpp = cp;
3917           return namesz;
3918 }
3919 
3920 /*
3921  * Store *string into the user-defined string called *name.
3922  * To clear an existing entry, call with (*r, *name, NULL, 0).
3923  * append == 0: replace mode
3924  * append == 1: single-line append mode
3925  * append == 2: multiline append mode, append '\n' after each call
3926  */
3927 static void
roff_setstr(struct roff * r,const char * name,const char * string,int append)3928 roff_setstr(struct roff *r, const char *name, const char *string,
3929           int append)
3930 {
3931           size_t     namesz;
3932 
3933           namesz = strlen(name);
3934           roff_setstrn(&r->strtab, name, namesz, string,
3935               string ? strlen(string) : 0, append);
3936           roff_setstrn(&r->rentab, name, namesz, NULL, 0, 0);
3937 }
3938 
3939 static void
roff_setstrn(struct roffkv ** r,const char * name,size_t namesz,const char * string,size_t stringsz,int append)3940 roff_setstrn(struct roffkv **r, const char *name, size_t namesz,
3941                     const char *string, size_t stringsz, int append)
3942 {
3943           struct roffkv       *n;
3944           char                *c;
3945           int                  i;
3946           size_t               oldch, newch;
3947 
3948           /* Search for an existing string with the same name. */
3949           n = *r;
3950 
3951           while (n && (namesz != n->key.sz ||
3952                               strncmp(n->key.p, name, namesz)))
3953                     n = n->next;
3954 
3955           if (NULL == n) {
3956                     /* Create a new string table entry. */
3957                     n = mandoc_malloc(sizeof(struct roffkv));
3958                     n->key.p = mandoc_strndup(name, namesz);
3959                     n->key.sz = namesz;
3960                     n->val.p = NULL;
3961                     n->val.sz = 0;
3962                     n->next = *r;
3963                     *r = n;
3964           } else if (0 == append) {
3965                     free(n->val.p);
3966                     n->val.p = NULL;
3967                     n->val.sz = 0;
3968           }
3969 
3970           if (NULL == string)
3971                     return;
3972 
3973           /*
3974            * One additional byte for the '\n' in multiline mode,
3975            * and one for the terminating '\0'.
3976            */
3977           newch = stringsz + (1 < append ? 2u : 1u);
3978 
3979           if (NULL == n->val.p) {
3980                     n->val.p = mandoc_malloc(newch);
3981                     *n->val.p = '\0';
3982                     oldch = 0;
3983           } else {
3984                     oldch = n->val.sz;
3985                     n->val.p = mandoc_realloc(n->val.p, oldch + newch);
3986           }
3987 
3988           /* Skip existing content in the destination buffer. */
3989           c = n->val.p + (int)oldch;
3990 
3991           /* Append new content to the destination buffer. */
3992           i = 0;
3993           while (i < (int)stringsz) {
3994                     /*
3995                      * Rudimentary roff copy mode:
3996                      * Handle escaped backslashes.
3997                      */
3998                     if ('\\' == string[i] && '\\' == string[i + 1])
3999                               i++;
4000                     *c++ = string[i++];
4001           }
4002 
4003           /* Append terminating bytes. */
4004           if (1 < append)
4005                     *c++ = '\n';
4006 
4007           *c = '\0';
4008           n->val.sz = (int)(c - n->val.p);
4009 }
4010 
4011 static const char *
roff_getstrn(struct roff * r,const char * name,size_t len,int * deftype)4012 roff_getstrn(struct roff *r, const char *name, size_t len,
4013     int *deftype)
4014 {
4015           const struct roffkv *n;
4016           int                            found, i;
4017           enum roff_tok                  tok;
4018 
4019           found = 0;
4020           for (n = r->strtab; n != NULL; n = n->next) {
4021                     if (strncmp(name, n->key.p, len) != 0 ||
4022                         n->key.p[len] != '\0' || n->val.p == NULL)
4023                               continue;
4024                     if (*deftype & ROFFDEF_USER) {
4025                               *deftype = ROFFDEF_USER;
4026                               return n->val.p;
4027                     } else {
4028                               found = 1;
4029                               break;
4030                     }
4031           }
4032           for (n = r->rentab; n != NULL; n = n->next) {
4033                     if (strncmp(name, n->key.p, len) != 0 ||
4034                         n->key.p[len] != '\0' || n->val.p == NULL)
4035                               continue;
4036                     if (*deftype & ROFFDEF_REN) {
4037                               *deftype = ROFFDEF_REN;
4038                               return n->val.p;
4039                     } else {
4040                               found = 1;
4041                               break;
4042                     }
4043           }
4044           for (i = 0; i < PREDEFS_MAX; i++) {
4045                     if (strncmp(name, predefs[i].name, len) != 0 ||
4046                         predefs[i].name[len] != '\0')
4047                               continue;
4048                     if (*deftype & ROFFDEF_PRE) {
4049                               *deftype = ROFFDEF_PRE;
4050                               return predefs[i].str;
4051                     } else {
4052                               found = 1;
4053                               break;
4054                     }
4055           }
4056           if (r->man->meta.macroset != MACROSET_MAN) {
4057                     for (tok = MDOC_Dd; tok < MDOC_MAX; tok++) {
4058                               if (strncmp(name, roff_name[tok], len) != 0 ||
4059                                   roff_name[tok][len] != '\0')
4060                                         continue;
4061                               if (*deftype & ROFFDEF_STD) {
4062                                         *deftype = ROFFDEF_STD;
4063                                         return NULL;
4064                               } else {
4065                                         found = 1;
4066                                         break;
4067                               }
4068                     }
4069           }
4070           if (r->man->meta.macroset != MACROSET_MDOC) {
4071                     for (tok = MAN_TH; tok < MAN_MAX; tok++) {
4072                               if (strncmp(name, roff_name[tok], len) != 0 ||
4073                                   roff_name[tok][len] != '\0')
4074                                         continue;
4075                               if (*deftype & ROFFDEF_STD) {
4076                                         *deftype = ROFFDEF_STD;
4077                                         return NULL;
4078                               } else {
4079                                         found = 1;
4080                                         break;
4081                               }
4082                     }
4083           }
4084 
4085           if (found == 0 && *deftype != ROFFDEF_ANY) {
4086                     if (*deftype & ROFFDEF_REN) {
4087                               /*
4088                                * This might still be a request,
4089                                * so do not treat it as undefined yet.
4090                                */
4091                               *deftype = ROFFDEF_UNDEF;
4092                               return NULL;
4093                     }
4094 
4095                     /* Using an undefined string defines it to be empty. */
4096 
4097                     roff_setstrn(&r->strtab, name, len, "", 0, 0);
4098                     roff_setstrn(&r->rentab, name, len, NULL, 0, 0);
4099           }
4100 
4101           *deftype = 0;
4102           return NULL;
4103 }
4104 
4105 static void
roff_freestr(struct roffkv * r)4106 roff_freestr(struct roffkv *r)
4107 {
4108           struct roffkv        *n, *nn;
4109 
4110           for (n = r; n; n = nn) {
4111                     free(n->key.p);
4112                     free(n->val.p);
4113                     nn = n->next;
4114                     free(n);
4115           }
4116 }
4117 
4118 /* --- accessors and utility functions ------------------------------------ */
4119 
4120 /*
4121  * Duplicate an input string, making the appropriate character
4122  * conversations (as stipulated by `tr') along the way.
4123  * Returns a heap-allocated string with all the replacements made.
4124  */
4125 char *
roff_strdup(const struct roff * r,const char * p)4126 roff_strdup(const struct roff *r, const char *p)
4127 {
4128           const struct roffkv *cp;
4129           char                *res;
4130           const char          *pp;
4131           size_t               ssz, sz;
4132           enum mandoc_esc      esc;
4133 
4134           if (NULL == r->xmbtab && NULL == r->xtab)
4135                     return mandoc_strdup(p);
4136           else if ('\0' == *p)
4137                     return mandoc_strdup("");
4138 
4139           /*
4140            * Step through each character looking for term matches
4141            * (remember that a `tr' can be invoked with an escape, which is
4142            * a glyph but the escape is multi-character).
4143            * We only do this if the character hash has been initialised
4144            * and the string is >0 length.
4145            */
4146 
4147           res = NULL;
4148           ssz = 0;
4149 
4150           while ('\0' != *p) {
4151                     assert((unsigned int)*p < 128);
4152                     if ('\\' != *p && r->xtab && r->xtab[(unsigned int)*p].p) {
4153                               sz = r->xtab[(int)*p].sz;
4154                               res = mandoc_realloc(res, ssz + sz + 1);
4155                               memcpy(res + ssz, r->xtab[(int)*p].p, sz);
4156                               ssz += sz;
4157                               p++;
4158                               continue;
4159                     } else if ('\\' != *p) {
4160                               res = mandoc_realloc(res, ssz + 2);
4161                               res[ssz++] = *p++;
4162                               continue;
4163                     }
4164 
4165                     /* Search for term matches. */
4166                     for (cp = r->xmbtab; cp; cp = cp->next)
4167                               if (0 == strncmp(p, cp->key.p, cp->key.sz))
4168                                         break;
4169 
4170                     if (NULL != cp) {
4171                               /*
4172                                * A match has been found.
4173                                * Append the match to the array and move
4174                                * forward by its keysize.
4175                                */
4176                               res = mandoc_realloc(res,
4177                                   ssz + cp->val.sz + 1);
4178                               memcpy(res + ssz, cp->val.p, cp->val.sz);
4179                               ssz += cp->val.sz;
4180                               p += (int)cp->key.sz;
4181                               continue;
4182                     }
4183 
4184                     /*
4185                      * Handle escapes carefully: we need to copy
4186                      * over just the escape itself, or else we might
4187                      * do replacements within the escape itself.
4188                      * Make sure to pass along the bogus string.
4189                      */
4190                     pp = p++;
4191                     esc = mandoc_escape(&p, NULL, NULL);
4192                     if (ESCAPE_ERROR == esc) {
4193                               sz = strlen(pp);
4194                               res = mandoc_realloc(res, ssz + sz + 1);
4195                               memcpy(res + ssz, pp, sz);
4196                               break;
4197                     }
4198                     /*
4199                      * We bail out on bad escapes.
4200                      * No need to warn: we already did so when
4201                      * roff_expand() was called.
4202                      */
4203                     sz = (int)(p - pp);
4204                     res = mandoc_realloc(res, ssz + sz + 1);
4205                     memcpy(res + ssz, pp, sz);
4206                     ssz += sz;
4207           }
4208 
4209           res[(int)ssz] = '\0';
4210           return res;
4211 }
4212 
4213 int
roff_getformat(const struct roff * r)4214 roff_getformat(const struct roff *r)
4215 {
4216 
4217           return r->format;
4218 }
4219 
4220 /*
4221  * Find out whether a line is a macro line or not.
4222  * If it is, adjust the current position and return one; if it isn't,
4223  * return zero and don't change the current position.
4224  * If the control character has been set with `.cc', then let that grain
4225  * precedence.
4226  * This is slighly contrary to groff, where using the non-breaking
4227  * control character when `cc' has been invoked will cause the
4228  * non-breaking macro contents to be printed verbatim.
4229  */
4230 int
roff_getcontrol(const struct roff * r,const char * cp,int * ppos)4231 roff_getcontrol(const struct roff *r, const char *cp, int *ppos)
4232 {
4233           int                 pos;
4234 
4235           pos = *ppos;
4236 
4237           if (r->control != '\0' && cp[pos] == r->control)
4238                     pos++;
4239           else if (r->control != '\0')
4240                     return 0;
4241           else if ('\\' == cp[pos] && '.' == cp[pos + 1])
4242                     pos += 2;
4243           else if ('.' == cp[pos] || '\'' == cp[pos])
4244                     pos++;
4245           else
4246                     return 0;
4247 
4248           while (' ' == cp[pos] || '\t' == cp[pos])
4249                     pos++;
4250 
4251           *ppos = pos;
4252           return 1;
4253 }
4254