xref: /dragonfly/contrib/lvm2/dist/libdm/regex/parse_rx.c (revision 86d7f5d305c6adaa56ff4582ece9859d73106103)
1 /*        $NetBSD: parse_rx.c,v 1.1.1.1 2008/12/22 00:18:37 haad Exp $          */
2 
3 /*
4  * Copyright (C) 2001-2004 Sistina Software, Inc. All rights reserved.
5  * Copyright (C) 2004-2007 Red Hat, Inc. All rights reserved.
6  *
7  * This file is part of the device-mapper userspace tools.
8  *
9  * This copyrighted material is made available to anyone wishing to use,
10  * modify, copy, or redistribute it subject to the terms and conditions
11  * of the GNU Lesser General Public License v.2.1.
12  *
13  * You should have received a copy of the GNU Lesser General Public License
14  * along with this program; if not, write to the Free Software Foundation,
15  * Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
16  */
17 
18 #include "dmlib.h"
19 #include "parse_rx.h"
20 
21 struct parse_sp {             /* scratch pad for the parsing process */
22           struct dm_pool *mem;
23           int type;           /* token type, 0 indicates a charset */
24           dm_bitset_t charset;          /* The current charset */
25           const char *cursor; /* where we are in the regex */
26           const char *rx_end; /* 1pte for the expression being parsed */
27 };
28 
29 static struct rx_node *_or_term(struct parse_sp *ps);
30 
_single_char(struct parse_sp * ps,unsigned int c,const char * ptr)31 static void _single_char(struct parse_sp *ps, unsigned int c, const char *ptr)
32 {
33           ps->type = 0;
34           ps->cursor = ptr + 1;
35           dm_bit_clear_all(ps->charset);
36           dm_bit_set(ps->charset, c);
37 }
38 
39 /*
40  * Get the next token from the regular expression.
41  * Returns: 1 success, 0 end of input, -1 error.
42  */
_rx_get_token(struct parse_sp * ps)43 static int _rx_get_token(struct parse_sp *ps)
44 {
45           int neg = 0, range = 0;
46           char c, lc = 0;
47           const char *ptr = ps->cursor;
48           if (ptr == ps->rx_end) {      /* end of input ? */
49                     ps->type = -1;
50                     return 0;
51           }
52 
53           switch (*ptr) {
54                     /* charsets and ncharsets */
55           case '[':
56                     ptr++;
57                     if (*ptr == '^') {
58                               dm_bit_set_all(ps->charset);
59 
60                               /* never transition on zero */
61                               dm_bit_clear(ps->charset, 0);
62                               neg = 1;
63                               ptr++;
64 
65                     } else
66                               dm_bit_clear_all(ps->charset);
67 
68                     while ((ptr < ps->rx_end) && (*ptr != ']')) {
69                               if (*ptr == '\\') {
70                                         /* an escaped character */
71                                         ptr++;
72                                         switch (*ptr) {
73                                         case 'n':
74                                                   c = '\n';
75                                                   break;
76                                         case 'r':
77                                                   c = '\r';
78                                                   break;
79                                         case 't':
80                                                   c = '\t';
81                                                   break;
82                                         default:
83                                                   c = *ptr;
84                                         }
85                               } else if (*ptr == '-' && lc) {
86                                         /* we've got a range on our hands */
87                                         range = 1;
88                                         ptr++;
89                                         if (ptr == ps->rx_end) {
90                                                   log_error("Incomplete range"
91                                                               "specification");
92                                                   return -1;
93                                         }
94                                         c = *ptr;
95                               } else
96                                         c = *ptr;
97 
98                               if (range) {
99                                         /* add lc - c into the bitset */
100                                         if (lc > c) {
101                                                   char tmp = c;
102                                                   c = lc;
103                                                   lc = tmp;
104                                         }
105 
106                                         for (; lc <= c; lc++) {
107                                                   if (neg)
108                                                             dm_bit_clear(ps->charset, lc);
109                                                   else
110                                                             dm_bit_set(ps->charset, lc);
111                                         }
112                                         range = 0;
113                               } else {
114                                         /* add c into the bitset */
115                                         if (neg)
116                                                   dm_bit_clear(ps->charset, c);
117                                         else
118                                                   dm_bit_set(ps->charset, c);
119                               }
120                               ptr++;
121                               lc = c;
122                     }
123 
124                     if (ptr >= ps->rx_end) {
125                               ps->type = -1;
126                               return -1;
127                     }
128 
129                     ps->type = 0;
130                     ps->cursor = ptr + 1;
131                     break;
132 
133                     /* These characters are special, we just return their ASCII
134                        codes as the type.  Sorted into ascending order to help the
135                        compiler */
136           case '(':
137           case ')':
138           case '*':
139           case '+':
140           case '?':
141           case '|':
142                     ps->type = (int) *ptr;
143                     ps->cursor = ptr + 1;
144                     break;
145 
146           case '^':
147                     _single_char(ps, HAT_CHAR, ptr);
148                     break;
149 
150           case '$':
151                     _single_char(ps, DOLLAR_CHAR, ptr);
152                     break;
153 
154           case '.':
155                     /* The 'all but newline' character set */
156                     ps->type = 0;
157                     ps->cursor = ptr + 1;
158                     dm_bit_set_all(ps->charset);
159                     dm_bit_clear(ps->charset, (int) '\n');
160                     dm_bit_clear(ps->charset, (int) '\r');
161                     dm_bit_clear(ps->charset, 0);
162                     break;
163 
164           case '\\':
165                     /* escaped character */
166                     ptr++;
167                     if (ptr >= ps->rx_end) {
168                               log_error("Badly quoted character at end "
169                                           "of expression");
170                               ps->type = -1;
171                               return -1;
172                     }
173 
174                     ps->type = 0;
175                     ps->cursor = ptr + 1;
176                     dm_bit_clear_all(ps->charset);
177                     switch (*ptr) {
178                     case 'n':
179                               dm_bit_set(ps->charset, (int) '\n');
180                               break;
181                     case 'r':
182                               dm_bit_set(ps->charset, (int) '\r');
183                               break;
184                     case 't':
185                               dm_bit_set(ps->charset, (int) '\t');
186                               break;
187                     default:
188                               dm_bit_set(ps->charset, (int) *ptr);
189                     }
190                     break;
191 
192           default:
193                     /* add a single character to the bitset */
194                     ps->type = 0;
195                     ps->cursor = ptr + 1;
196                     dm_bit_clear_all(ps->charset);
197                     dm_bit_set(ps->charset, (int) *ptr);
198                     break;
199           }
200 
201           return 1;
202 }
203 
_node(struct dm_pool * mem,int type,struct rx_node * l,struct rx_node * r)204 static struct rx_node *_node(struct dm_pool *mem, int type,
205                                    struct rx_node *l, struct rx_node *r)
206 {
207           struct rx_node *n = dm_pool_zalloc(mem, sizeof(*n));
208 
209           if (n) {
210                     if (!(n->charset = dm_bitset_create(mem, 256))) {
211                               dm_pool_free(mem, n);
212                               return NULL;
213                     }
214 
215                     n->type = type;
216                     n->left = l;
217                     n->right = r;
218           }
219 
220           return n;
221 }
222 
_term(struct parse_sp * ps)223 static struct rx_node *_term(struct parse_sp *ps)
224 {
225           struct rx_node *n;
226 
227           switch (ps->type) {
228           case 0:
229                     if (!(n = _node(ps->mem, CHARSET, NULL, NULL))) {
230                               stack;
231                               return NULL;
232                     }
233 
234                     dm_bit_copy(n->charset, ps->charset);
235                     _rx_get_token(ps);  /* match charset */
236                     break;
237 
238           case '(':
239                     _rx_get_token(ps);  /* match '(' */
240                     n = _or_term(ps);
241                     if (ps->type != ')') {
242                               log_error("missing ')' in regular expression");
243                               return 0;
244                     }
245                     _rx_get_token(ps);  /* match ')' */
246                     break;
247 
248           default:
249                     n = 0;
250           }
251 
252           return n;
253 }
254 
_closure_term(struct parse_sp * ps)255 static struct rx_node *_closure_term(struct parse_sp *ps)
256 {
257           struct rx_node *l, *n;
258 
259           if (!(l = _term(ps)))
260                     return NULL;
261 
262           for (;;) {
263                     switch (ps->type) {
264                     case '*':
265                               n = _node(ps->mem, STAR, l, NULL);
266                               break;
267 
268                     case '+':
269                               n = _node(ps->mem, PLUS, l, NULL);
270                               break;
271 
272                     case '?':
273                               n = _node(ps->mem, QUEST, l, NULL);
274                               break;
275 
276                     default:
277                               return l;
278                     }
279 
280                     if (!n) {
281                               stack;
282                               return NULL;
283                     }
284 
285                     _rx_get_token(ps);
286                     l = n;
287           }
288 
289           return n;
290 }
291 
_cat_term(struct parse_sp * ps)292 static struct rx_node *_cat_term(struct parse_sp *ps)
293 {
294           struct rx_node *l, *r, *n;
295 
296           if (!(l = _closure_term(ps)))
297                     return NULL;
298 
299           if (ps->type == '|')
300                     return l;
301 
302           if (!(r = _cat_term(ps)))
303                     return l;
304 
305           if (!(n = _node(ps->mem, CAT, l, r)))
306                     stack;
307 
308           return n;
309 }
310 
_or_term(struct parse_sp * ps)311 static struct rx_node *_or_term(struct parse_sp *ps)
312 {
313           struct rx_node *l, *r, *n;
314 
315           if (!(l = _cat_term(ps)))
316                     return NULL;
317 
318           if (ps->type != '|')
319                     return l;
320 
321           _rx_get_token(ps);            /* match '|' */
322 
323           if (!(r = _or_term(ps))) {
324                     log_error("Badly formed 'or' expression");
325                     return NULL;
326           }
327 
328           if (!(n = _node(ps->mem, OR, l, r)))
329                     stack;
330 
331           return n;
332 }
333 
rx_parse_tok(struct dm_pool * mem,const char * begin,const char * end)334 struct rx_node *rx_parse_tok(struct dm_pool *mem,
335                                    const char *begin, const char *end)
336 {
337           struct rx_node *r;
338           struct parse_sp *ps = dm_pool_zalloc(mem, sizeof(*ps));
339 
340           if (!ps) {
341                     stack;
342                     return NULL;
343           }
344 
345           ps->mem = mem;
346           ps->charset = dm_bitset_create(mem, 256);
347           ps->cursor = begin;
348           ps->rx_end = end;
349           _rx_get_token(ps);            /* load the first token */
350 
351           if (!(r = _or_term(ps))) {
352                     log_error("Parse error in regex");
353                     dm_pool_free(mem, ps);
354           }
355 
356           return r;
357 }
358 
rx_parse_str(struct dm_pool * mem,const char * str)359 struct rx_node *rx_parse_str(struct dm_pool *mem, const char *str)
360 {
361           return rx_parse_tok(mem, str, str + strlen(str));
362 }
363