1 /*-
2 * Copyright (c) 2006, Maxime Henrion <mux@FreeBSD.org>
3 * All rights reserved.
4 *
5 * Redistribution and use in source and binary forms, with or without
6 * modification, are permitted provided that the following conditions
7 * are met:
8 * 1. Redistributions of source code must retain the above copyright
9 * notice, this list of conditions and the following disclaimer.
10 * 2. Redistributions in binary form must reproduce the above copyright
11 * notice, this list of conditions and the following disclaimer in the
12 * documentation and/or other materials provided with the distribution.
13 *
14 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
15 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
16 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
17 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
18 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
19 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
20 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
21 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
22 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
23 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
24 * SUCH DAMAGE.
25 *
26 * $FreeBSD$
27 */
28
29 #include <sys/types.h>
30
31 #include <assert.h>
32 #include <regex.h>
33 #include <stdlib.h>
34
35 #include "fnmatch.h"
36 #include "globtree.h"
37 #include "misc.h"
38
39 /*
40 * The "GlobTree" interface allows one to construct arbitrarily complex
41 * boolean expressions for evaluating whether to accept or reject a
42 * filename. The globtree_test() function returns true or false
43 * according to whether the name is accepted or rejected by the
44 * expression.
45 *
46 * Expressions are trees constructed from nodes representing either
47 * primitive matching operations (primaries) or operators that are
48 * applied to their subexpressions. The simplest primitives are
49 * globtree_false(), which matches nothing, and globtree_true(), which
50 * matches everything.
51 *
52 * A more useful primitive is the matching operation, constructed with
53 * globtree_match(). It will call fnmatch() with the suppliedi
54 * shell-style pattern to determine if the filename matches.
55 *
56 * Expressions can be combined with the boolean operators AND, OR, and
57 * NOT, to form more complex expressions.
58 */
59
60 /* Node types. */
61 #define GLOBTREE_NOT 0
62 #define GLOBTREE_AND 1
63 #define GLOBTREE_OR 2
64 #define GLOBTREE_MATCH 3
65 #define GLOBTREE_REGEX 4
66 #define GLOBTREE_TRUE 5
67 #define GLOBTREE_FALSE 6
68
69 /* A node. */
70 struct globtree {
71 int type;
72 struct globtree *left;
73 struct globtree *right;
74
75 /* The "data" field points to the text pattern for GLOBTREE_MATCH
76 nodes, and to the regex_t for GLOBTREE_REGEX nodes. For any
77 other node, it is set to NULL. */
78 void *data;
79 /* The "flags" field contains the flags to pass to fnmatch() for
80 GLOBTREE_MATCH nodes. */
81 int flags;
82 };
83
84 static struct globtree *globtree_new(int);
85 static int globtree_eval(struct globtree *, const char *);
86
87 static struct globtree *
globtree_new(int type)88 globtree_new(int type)
89 {
90 struct globtree *gt;
91
92 gt = xmalloc(sizeof(struct globtree));
93 gt->type = type;
94 gt->data = NULL;
95 gt->flags = 0;
96 gt->left = NULL;
97 gt->right = NULL;
98 return (gt);
99 }
100
101 struct globtree *
globtree_true(void)102 globtree_true(void)
103 {
104 struct globtree *gt;
105
106 gt = globtree_new(GLOBTREE_TRUE);
107 return (gt);
108 }
109
110 struct globtree *
globtree_false(void)111 globtree_false(void)
112 {
113 struct globtree *gt;
114
115 gt = globtree_new(GLOBTREE_FALSE);
116 return (gt);
117 }
118
119 struct globtree *
globtree_match(const char * pattern,int flags)120 globtree_match(const char *pattern, int flags)
121 {
122 struct globtree *gt;
123
124 gt = globtree_new(GLOBTREE_MATCH);
125 gt->data = xstrdup(pattern);
126 gt->flags = flags;
127 return (gt);
128 }
129
130 struct globtree *
globtree_regex(const char * pattern)131 globtree_regex(const char *pattern)
132 {
133 struct globtree *gt;
134 int error;
135
136 gt = globtree_new(GLOBTREE_REGEX);
137 gt->data = xmalloc(sizeof(regex_t));
138 error = regcomp(gt->data, pattern, REG_NOSUB);
139 assert(!error);
140 return (gt);
141 }
142
143 struct globtree *
globtree_and(struct globtree * left,struct globtree * right)144 globtree_and(struct globtree *left, struct globtree *right)
145 {
146 struct globtree *gt;
147
148 if (left->type == GLOBTREE_FALSE || right->type == GLOBTREE_FALSE) {
149 globtree_free(left);
150 globtree_free(right);
151 gt = globtree_false();
152 return (gt);
153 }
154 if (left->type == GLOBTREE_TRUE) {
155 globtree_free(left);
156 return (right);
157 }
158 if (right->type == GLOBTREE_TRUE) {
159 globtree_free(right);
160 return (left);
161 }
162 gt = globtree_new(GLOBTREE_AND);
163 gt->left = left;
164 gt->right = right;
165 return (gt);
166 }
167
168 struct globtree *
globtree_or(struct globtree * left,struct globtree * right)169 globtree_or(struct globtree *left, struct globtree *right)
170 {
171 struct globtree *gt;
172
173 if (left->type == GLOBTREE_TRUE || right->type == GLOBTREE_TRUE) {
174 globtree_free(left);
175 globtree_free(right);
176 gt = globtree_true();
177 return (gt);
178 }
179 if (left->type == GLOBTREE_FALSE) {
180 globtree_free(left);
181 return (right);
182 }
183 if (right->type == GLOBTREE_FALSE) {
184 globtree_free(right);
185 return (left);
186 }
187 gt = globtree_new(GLOBTREE_OR);
188 gt->left = left;
189 gt->right = right;
190 return (gt);
191 }
192
193 struct globtree *
globtree_not(struct globtree * child)194 globtree_not(struct globtree *child)
195 {
196 struct globtree *gt;
197
198 if (child->type == GLOBTREE_TRUE) {
199 globtree_free(child);
200 gt = globtree_new(GLOBTREE_FALSE);
201 return (gt);
202 }
203 if (child->type == GLOBTREE_FALSE) {
204 globtree_free(child);
205 gt = globtree_new(GLOBTREE_TRUE);
206 return (gt);
207 }
208 gt = globtree_new(GLOBTREE_NOT);
209 gt->left = child;
210 return (gt);
211 }
212
213 /* Evaluate one node (must be a leaf node). */
214 static int
globtree_eval(struct globtree * gt,const char * path)215 globtree_eval(struct globtree *gt, const char *path)
216 {
217 int rv;
218
219 switch (gt->type) {
220 case GLOBTREE_TRUE:
221 return (1);
222 case GLOBTREE_FALSE:
223 return (0);
224 case GLOBTREE_MATCH:
225 assert(gt->data != NULL);
226 rv = fnmatch(gt->data, path, gt->flags);
227 if (rv == 0)
228 return (1);
229 assert(rv == FNM_NOMATCH);
230 return (0);
231 case GLOBTREE_REGEX:
232 assert(gt->data != NULL);
233 rv = regexec(gt->data, path, 0, NULL, 0);
234 if (rv == 0)
235 return (1);
236 assert(rv == REG_NOMATCH);
237 return (0);
238 }
239
240 assert(0);
241 return (-1);
242 }
243
244 /* Small stack API to walk the tree iteratively. */
245 typedef enum {
246 STATE_DOINGLEFT,
247 STATE_DOINGRIGHT
248 } walkstate_t;
249
250 struct stack {
251 struct stackelem *stack;
252 size_t size;
253 size_t in;
254 };
255
256 struct stackelem {
257 struct globtree *node;
258 walkstate_t state;
259 };
260
261 static void
stack_init(struct stack * stack)262 stack_init(struct stack *stack)
263 {
264
265 stack->in = 0;
266 stack->size = 8; /* Initial size. */
267 stack->stack = xmalloc(sizeof(struct stackelem) * stack->size);
268 }
269
270 static size_t
stack_size(struct stack * stack)271 stack_size(struct stack *stack)
272 {
273
274 return (stack->in);
275 }
276
277 static void
stack_push(struct stack * stack,struct globtree * node,walkstate_t state)278 stack_push(struct stack *stack, struct globtree *node, walkstate_t state)
279 {
280 struct stackelem *e;
281
282 if (stack->in == stack->size) {
283 stack->size *= 2;
284 stack->stack = xrealloc(stack->stack,
285 sizeof(struct stackelem) * stack->size);
286 }
287 e = stack->stack + stack->in++;
288 e->node = node;
289 e->state = state;
290 }
291
292 static void
stack_pop(struct stack * stack,struct globtree ** node,walkstate_t * state)293 stack_pop(struct stack *stack, struct globtree **node, walkstate_t *state)
294 {
295 struct stackelem *e;
296
297 assert(stack->in > 0);
298 e = stack->stack + --stack->in;
299 *node = e->node;
300 *state = e->state;
301 }
302
303 static void
stack_free(struct stack * s)304 stack_free(struct stack *s)
305 {
306
307 free(s->stack);
308 }
309
310 /* Tests if the supplied filename matches. */
311 int
globtree_test(struct globtree * gt,const char * path)312 globtree_test(struct globtree *gt, const char *path)
313 {
314 struct stack stack;
315 walkstate_t state;
316 int val;
317
318 stack_init(&stack);
319 for (;;) {
320 doleft:
321 /* Descend to the left until we hit bottom. */
322 while (gt->left != NULL) {
323 stack_push(&stack, gt, STATE_DOINGLEFT);
324 gt = gt->left;
325 }
326
327 /* Now we're at a leaf node. Evaluate it. */
328 val = globtree_eval(gt, path);
329 /* Ascend, propagating the value through operator nodes. */
330 for (;;) {
331 if (stack_size(&stack) == 0) {
332 stack_free(&stack);
333 return (val);
334 }
335 stack_pop(&stack, >, &state);
336 switch (gt->type) {
337 case GLOBTREE_NOT:
338 val = !val;
339 break;
340 case GLOBTREE_AND:
341 /* If we haven't yet evaluated the right subtree
342 and the partial result is true, descend to
343 the right. Otherwise the result is already
344 determined to be val. */
345 if (state == STATE_DOINGLEFT && val) {
346 stack_push(&stack, gt,
347 STATE_DOINGRIGHT);
348 gt = gt->right;
349 goto doleft;
350 }
351 break;
352 case GLOBTREE_OR:
353 /* If we haven't yet evaluated the right subtree
354 and the partial result is false, descend to
355 the right. Otherwise the result is already
356 determined to be val. */
357 if (state == STATE_DOINGLEFT && !val) {
358 stack_push(&stack, gt,
359 STATE_DOINGRIGHT);
360 gt = gt->right;
361 goto doleft;
362 }
363 break;
364 default:
365 /* We only push nodes that have children. */
366 assert(0);
367 return (-1);
368 }
369 }
370 }
371 }
372
373 /*
374 * We could de-recursify this function using a stack, but it would be
375 * overkill since it is never called from a thread context with a
376 * limited stack size nor used in a critical path, so I think we can
377 * afford keeping it recursive.
378 */
379 void
globtree_free(struct globtree * gt)380 globtree_free(struct globtree *gt)
381 {
382
383 if (gt->data != NULL) {
384 if (gt->type == GLOBTREE_REGEX)
385 regfree(gt->data);
386 free(gt->data);
387 }
388 if (gt->left != NULL)
389 globtree_free(gt->left);
390 if (gt->right != NULL)
391 globtree_free(gt->right);
392 free(gt);
393 }
394