1 /* Copyright (c) 2013, Vsevolod Stakhov
2 * All rights reserved.
3 *
4 * Redistribution and use in source and binary forms, with or without
5 * modification, are permitted provided that the following conditions are met:
6 * * Redistributions of source code must retain the above copyright
7 * notice, this list of conditions and the following disclaimer.
8 * * Redistributions in binary form must reproduce the above copyright
9 * notice, this list of conditions and the following disclaimer in the
10 * documentation and/or other materials provided with the distribution.
11 *
12 * THIS SOFTWARE IS PROVIDED ''AS IS'' AND ANY
13 * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
14 * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
15 * DISCLAIMED. IN NO EVENT SHALL AUTHOR BE LIABLE FOR ANY
16 * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
17 * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
18 * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
19 * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
20 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
21 * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
22 */
23
24 #include "ucl.h"
25 #include "ucl_internal.h"
26 #include "ucl_chartable.h"
27
28 /**
29 * @file rcl_parser.c
30 * The implementation of rcl parser
31 */
32
33 struct ucl_parser_saved_state {
34 unsigned int line;
35 unsigned int column;
36 size_t remain;
37 const unsigned char *pos;
38 };
39
40 /**
41 * Move up to len characters
42 * @param parser
43 * @param begin
44 * @param len
45 * @return new position in chunk
46 */
47 #define ucl_chunk_skipc(chunk, p) do{ \
48 if (*(p) == '\n') { \
49 (chunk)->line ++; \
50 (chunk)->column = 0; \
51 } \
52 else (chunk)->column ++; \
53 (p++); \
54 (chunk)->pos ++; \
55 (chunk)->remain --; \
56 } while (0)
57
58 static inline void
ucl_set_err(struct ucl_chunk * chunk,int code,const char * str,UT_string ** err)59 ucl_set_err (struct ucl_chunk *chunk, int code, const char *str, UT_string **err)
60 {
61 if (chunk->pos < chunk->end) {
62 if (isgraph (*chunk->pos)) {
63 ucl_create_err (err, "error on line %d at column %d: '%s', character: '%c'",
64 chunk->line, chunk->column, str, *chunk->pos);
65 }
66 else {
67 ucl_create_err (err, "error on line %d at column %d: '%s', character: '0x%02x'",
68 chunk->line, chunk->column, str, (int)*chunk->pos);
69 }
70 }
71 else {
72 ucl_create_err (err, "error at the end of chunk: %s", str);
73 }
74 }
75
76 /**
77 * Skip all comments from the current pos resolving nested and multiline comments
78 * @param parser
79 * @return
80 */
81 static bool
ucl_skip_comments(struct ucl_parser * parser)82 ucl_skip_comments (struct ucl_parser *parser)
83 {
84 struct ucl_chunk *chunk = parser->chunks;
85 const unsigned char *p;
86 int comments_nested = 0;
87
88 p = chunk->pos;
89
90 start:
91 if (*p == '#') {
92 if (parser->state != UCL_STATE_SCOMMENT &&
93 parser->state != UCL_STATE_MCOMMENT) {
94 while (p < chunk->end) {
95 if (*p == '\n') {
96 ucl_chunk_skipc (chunk, p);
97 goto start;
98 }
99 ucl_chunk_skipc (chunk, p);
100 }
101 }
102 }
103 else if (*p == '/' && chunk->remain >= 2) {
104 if (p[1] == '*') {
105 ucl_chunk_skipc (chunk, p);
106 comments_nested ++;
107 ucl_chunk_skipc (chunk, p);
108
109 while (p < chunk->end) {
110 if (*p == '*') {
111 ucl_chunk_skipc (chunk, p);
112 if (*p == '/') {
113 comments_nested --;
114 if (comments_nested == 0) {
115 ucl_chunk_skipc (chunk, p);
116 goto start;
117 }
118 }
119 ucl_chunk_skipc (chunk, p);
120 }
121 else if (p[0] == '/' && chunk->remain >= 2 && p[1] == '*') {
122 comments_nested ++;
123 ucl_chunk_skipc (chunk, p);
124 ucl_chunk_skipc (chunk, p);
125 continue;
126 }
127 ucl_chunk_skipc (chunk, p);
128 }
129 if (comments_nested != 0) {
130 ucl_set_err (chunk, UCL_ENESTED, "unfinished multiline comment", &parser->err);
131 return false;
132 }
133 }
134 }
135
136 return true;
137 }
138
139 /**
140 * Return multiplier for a character
141 * @param c multiplier character
142 * @param is_bytes if true use 1024 multiplier
143 * @return multiplier
144 */
145 static inline unsigned long
ucl_lex_num_multiplier(const unsigned char c,bool is_bytes)146 ucl_lex_num_multiplier (const unsigned char c, bool is_bytes) {
147 const struct {
148 char c;
149 long mult_normal;
150 long mult_bytes;
151 } multipliers[] = {
152 {'m', 1000 * 1000, 1024 * 1024},
153 {'k', 1000, 1024},
154 {'g', 1000 * 1000 * 1000, 1024 * 1024 * 1024}
155 };
156 int i;
157
158 for (i = 0; i < 3; i ++) {
159 if (tolower (c) == multipliers[i].c) {
160 if (is_bytes) {
161 return multipliers[i].mult_bytes;
162 }
163 return multipliers[i].mult_normal;
164 }
165 }
166
167 return 1;
168 }
169
170
171 /**
172 * Return multiplier for time scaling
173 * @param c
174 * @return
175 */
176 static inline double
ucl_lex_time_multiplier(const unsigned char c)177 ucl_lex_time_multiplier (const unsigned char c) {
178 const struct {
179 char c;
180 double mult;
181 } multipliers[] = {
182 {'m', 60},
183 {'h', 60 * 60},
184 {'d', 60 * 60 * 24},
185 {'w', 60 * 60 * 24 * 7},
186 {'y', 60 * 60 * 24 * 7 * 365}
187 };
188 int i;
189
190 for (i = 0; i < 5; i ++) {
191 if (tolower (c) == multipliers[i].c) {
192 return multipliers[i].mult;
193 }
194 }
195
196 return 1;
197 }
198
199 /**
200 * Return true if a character is a end of an atom
201 * @param c
202 * @return
203 */
204 static inline bool
ucl_lex_is_atom_end(const unsigned char c)205 ucl_lex_is_atom_end (const unsigned char c)
206 {
207 return ucl_test_character (c, UCL_CHARACTER_VALUE_END);
208 }
209
210 static inline bool
ucl_lex_is_comment(const unsigned char c1,const unsigned char c2)211 ucl_lex_is_comment (const unsigned char c1, const unsigned char c2)
212 {
213 if (c1 == '/') {
214 if (c2 == '*') {
215 return true;
216 }
217 }
218 else if (c1 == '#') {
219 return true;
220 }
221 return false;
222 }
223
224 /**
225 * Check variable found
226 * @param parser
227 * @param ptr
228 * @param remain
229 * @param out_len
230 * @param strict
231 * @param found
232 * @return
233 */
234 static inline const char *
ucl_check_variable_safe(struct ucl_parser * parser,const char * ptr,size_t remain,size_t * out_len,bool strict,bool * found)235 ucl_check_variable_safe (struct ucl_parser *parser, const char *ptr, size_t remain,
236 size_t *out_len, bool strict, bool *found)
237 {
238 struct ucl_variable *var;
239 unsigned char *dst;
240 size_t dstlen;
241 bool need_free = false;
242
243 LL_FOREACH (parser->variables, var) {
244 if (strict) {
245 if (remain == var->var_len) {
246 if (memcmp (ptr, var->var, var->var_len) == 0) {
247 *out_len += var->value_len;
248 *found = true;
249 return (ptr + var->var_len);
250 }
251 }
252 }
253 else {
254 if (remain >= var->var_len) {
255 if (memcmp (ptr, var->var, var->var_len) == 0) {
256 *out_len += var->value_len;
257 *found = true;
258 return (ptr + var->var_len);
259 }
260 }
261 }
262 }
263
264 /* XXX: can only handle ${VAR} */
265 if (!(*found) && parser->var_handler != NULL && strict) {
266 /* Call generic handler */
267 if (parser->var_handler (ptr, remain, &dst, &dstlen, &need_free,
268 parser->var_data)) {
269 *found = true;
270 if (need_free) {
271 free (dst);
272 }
273 return (ptr + remain);
274 }
275 }
276
277 return ptr;
278 }
279
280 /**
281 * Check for a variable in a given string
282 * @param parser
283 * @param ptr
284 * @param remain
285 * @param out_len
286 * @param vars_found
287 * @return
288 */
289 static const char *
ucl_check_variable(struct ucl_parser * parser,const char * ptr,size_t remain,size_t * out_len,bool * vars_found)290 ucl_check_variable (struct ucl_parser *parser, const char *ptr,
291 size_t remain, size_t *out_len, bool *vars_found)
292 {
293 const char *p, *end, *ret = ptr;
294 bool found = false;
295
296 if (*ptr == '{') {
297 /* We need to match the variable enclosed in braces */
298 p = ptr + 1;
299 end = ptr + remain;
300 while (p < end) {
301 if (*p == '}') {
302 ret = ucl_check_variable_safe (parser, ptr + 1, p - ptr - 1,
303 out_len, true, &found);
304 if (found) {
305 /* {} must be excluded actually */
306 ret ++;
307 if (!*vars_found) {
308 *vars_found = true;
309 }
310 }
311 else {
312 *out_len += 2;
313 }
314 break;
315 }
316 p ++;
317 }
318 }
319 else if (*ptr != '$') {
320 /* Not count escaped dollar sign */
321 ret = ucl_check_variable_safe (parser, ptr, remain, out_len, false, &found);
322 if (found && !*vars_found) {
323 *vars_found = true;
324 }
325 if (!found) {
326 (*out_len) ++;
327 }
328 }
329 else {
330 ret ++;
331 (*out_len) ++;
332 }
333
334 return ret;
335 }
336
337 /**
338 * Expand a single variable
339 * @param parser
340 * @param ptr
341 * @param remain
342 * @param dest
343 * @return
344 */
345 static const char *
ucl_expand_single_variable(struct ucl_parser * parser,const char * ptr,size_t remain,unsigned char ** dest)346 ucl_expand_single_variable (struct ucl_parser *parser, const char *ptr,
347 size_t remain, unsigned char **dest)
348 {
349 unsigned char *d = *dest, *dst;
350 const char *p = ptr + 1, *ret;
351 struct ucl_variable *var;
352 size_t dstlen;
353 bool need_free = false;
354 bool found = false;
355 bool strict = false;
356
357 ret = ptr + 1;
358 remain --;
359
360 if (*p == '$') {
361 *d++ = *p++;
362 *dest = d;
363 return p;
364 }
365 else if (*p == '{') {
366 p ++;
367 strict = true;
368 ret += 2;
369 remain -= 2;
370 }
371
372 LL_FOREACH (parser->variables, var) {
373 if (remain >= var->var_len) {
374 if (memcmp (p, var->var, var->var_len) == 0) {
375 memcpy (d, var->value, var->value_len);
376 ret += var->var_len;
377 d += var->value_len;
378 found = true;
379 break;
380 }
381 }
382 }
383 if (!found) {
384 if (strict && parser->var_handler != NULL) {
385 if (parser->var_handler (ptr, remain, &dst, &dstlen, &need_free,
386 parser->var_data)) {
387 memcpy (d, dst, dstlen);
388 ret += dstlen;
389 d += remain;
390 found = true;
391 }
392 }
393
394 /* Leave variable as is */
395 if (!found) {
396 memcpy (d, ptr, 2);
397 d += 2;
398 ret --;
399 }
400 }
401
402 *dest = d;
403 return ret;
404 }
405
406 /**
407 * Expand variables in string
408 * @param parser
409 * @param dst
410 * @param src
411 * @param in_len
412 * @return
413 */
414 static ssize_t
ucl_expand_variable(struct ucl_parser * parser,unsigned char ** dst,const char * src,size_t in_len)415 ucl_expand_variable (struct ucl_parser *parser, unsigned char **dst,
416 const char *src, size_t in_len)
417 {
418 const char *p, *end = src + in_len;
419 unsigned char *d;
420 size_t out_len = 0;
421 bool vars_found = false;
422
423 p = src;
424 while (p != end) {
425 if (*p == '$') {
426 p = ucl_check_variable (parser, p + 1, end - p - 1, &out_len, &vars_found);
427 }
428 else {
429 p ++;
430 out_len ++;
431 }
432 }
433
434 if (!vars_found) {
435 /* Trivial case */
436 *dst = NULL;
437 return in_len;
438 }
439
440 *dst = UCL_ALLOC (out_len + 1);
441 if (*dst == NULL) {
442 return in_len;
443 }
444
445 d = *dst;
446 p = src;
447 while (p != end) {
448 if (*p == '$') {
449 p = ucl_expand_single_variable (parser, p, end - p, &d);
450 }
451 else {
452 *d++ = *p++;
453 }
454 }
455
456 *d = '\0';
457
458 return out_len;
459 }
460
461 /**
462 * Store or copy pointer to the trash stack
463 * @param parser parser object
464 * @param src src string
465 * @param dst destination buffer (trash stack pointer)
466 * @param dst_const const destination pointer (e.g. value of object)
467 * @param in_len input length
468 * @param need_unescape need to unescape source (and copy it)
469 * @param need_lowercase need to lowercase value (and copy)
470 * @param need_expand need to expand variables (and copy as well)
471 * @return output length (excluding \0 symbol)
472 */
473 static inline ssize_t
ucl_copy_or_store_ptr(struct ucl_parser * parser,const unsigned char * src,unsigned char ** dst,const char ** dst_const,size_t in_len,bool need_unescape,bool need_lowercase,bool need_expand)474 ucl_copy_or_store_ptr (struct ucl_parser *parser,
475 const unsigned char *src, unsigned char **dst,
476 const char **dst_const, size_t in_len,
477 bool need_unescape, bool need_lowercase, bool need_expand)
478 {
479 ssize_t ret = -1, tret;
480 unsigned char *tmp;
481
482 if (need_unescape || need_lowercase ||
483 (need_expand && parser->variables != NULL) ||
484 !(parser->flags & UCL_PARSER_ZEROCOPY)) {
485 /* Copy string */
486 *dst = UCL_ALLOC (in_len + 1);
487 if (*dst == NULL) {
488 ucl_set_err (parser->chunks, 0, "cannot allocate memory for a string", &parser->err);
489 return false;
490 }
491 if (need_lowercase) {
492 ret = ucl_strlcpy_tolower (*dst, src, in_len + 1);
493 }
494 else {
495 ret = ucl_strlcpy_unsafe (*dst, src, in_len + 1);
496 }
497
498 if (need_unescape) {
499 ret = ucl_unescape_json_string (*dst, ret);
500 }
501 if (need_expand) {
502 tmp = *dst;
503 tret = ret;
504 ret = ucl_expand_variable (parser, dst, tmp, ret);
505 if (*dst == NULL) {
506 /* Nothing to expand */
507 *dst = tmp;
508 ret = tret;
509 }
510 }
511 *dst_const = *dst;
512 }
513 else {
514 *dst_const = src;
515 ret = in_len;
516 }
517
518 return ret;
519 }
520
521 /**
522 * Create and append an object at the specified level
523 * @param parser
524 * @param is_array
525 * @param level
526 * @return
527 */
528 static inline ucl_object_t *
ucl_add_parser_stack(ucl_object_t * obj,struct ucl_parser * parser,bool is_array,int level)529 ucl_add_parser_stack (ucl_object_t *obj, struct ucl_parser *parser, bool is_array, int level)
530 {
531 struct ucl_stack *st;
532
533 if (!is_array) {
534 if (obj == NULL) {
535 obj = ucl_object_typed_new (UCL_OBJECT);
536 }
537 else {
538 obj->type = UCL_OBJECT;
539 }
540 obj->value.ov = ucl_hash_create ();
541 parser->state = UCL_STATE_KEY;
542 }
543 else {
544 if (obj == NULL) {
545 obj = ucl_object_typed_new (UCL_ARRAY);
546 }
547 else {
548 obj->type = UCL_ARRAY;
549 }
550 parser->state = UCL_STATE_VALUE;
551 }
552
553 st = UCL_ALLOC (sizeof (struct ucl_stack));
554 if (st == NULL) {
555 ucl_set_err (parser->chunks, 0, "cannot allocate memory for an object", &parser->err);
556 return NULL;
557 }
558 st->obj = obj;
559 st->level = level;
560 LL_PREPEND (parser->stack, st);
561 parser->cur_obj = obj;
562
563 return obj;
564 }
565
566 int
ucl_maybe_parse_number(ucl_object_t * obj,const char * start,const char * end,const char ** pos,bool allow_double,bool number_bytes,bool allow_time)567 ucl_maybe_parse_number (ucl_object_t *obj,
568 const char *start, const char *end, const char **pos,
569 bool allow_double, bool number_bytes, bool allow_time)
570 {
571 const char *p = start, *c = start;
572 char *endptr;
573 bool got_dot = false, got_exp = false, need_double = false,
574 is_time = false, valid_start = false, is_hex = false,
575 is_neg = false;
576 double dv = 0;
577 int64_t lv = 0;
578
579 if (*p == '-') {
580 is_neg = true;
581 c ++;
582 p ++;
583 }
584 while (p < end) {
585 if (is_hex && isxdigit (*p)) {
586 p ++;
587 }
588 else if (isdigit (*p)) {
589 valid_start = true;
590 p ++;
591 }
592 else if (!is_hex && (*p == 'x' || *p == 'X')) {
593 is_hex = true;
594 allow_double = false;
595 c = p + 1;
596 }
597 else if (allow_double) {
598 if (p == c) {
599 /* Empty digits sequence, not a number */
600 *pos = start;
601 return EINVAL;
602 }
603 else if (*p == '.') {
604 if (got_dot) {
605 /* Double dots, not a number */
606 *pos = start;
607 return EINVAL;
608 }
609 else {
610 got_dot = true;
611 need_double = true;
612 p ++;
613 }
614 }
615 else if (*p == 'e' || *p == 'E') {
616 if (got_exp) {
617 /* Double exp, not a number */
618 *pos = start;
619 return EINVAL;
620 }
621 else {
622 got_exp = true;
623 need_double = true;
624 p ++;
625 if (p >= end) {
626 *pos = start;
627 return EINVAL;
628 }
629 if (!isdigit (*p) && *p != '+' && *p != '-') {
630 /* Wrong exponent sign */
631 *pos = start;
632 return EINVAL;
633 }
634 else {
635 p ++;
636 }
637 }
638 }
639 else {
640 /* Got the end of the number, need to check */
641 break;
642 }
643 }
644 else {
645 break;
646 }
647 }
648
649 if (!valid_start) {
650 *pos = start;
651 return EINVAL;
652 }
653
654 errno = 0;
655 if (need_double) {
656 dv = strtod (c, &endptr);
657 }
658 else {
659 if (is_hex) {
660 lv = strtoimax (c, &endptr, 16);
661 }
662 else {
663 lv = strtoimax (c, &endptr, 10);
664 }
665 }
666 if (errno == ERANGE) {
667 *pos = start;
668 return ERANGE;
669 }
670
671 /* Now check endptr */
672 if (endptr == NULL || ucl_lex_is_atom_end (*endptr) || *endptr == '\0' ||
673 ucl_test_character (*endptr, UCL_CHARACTER_WHITESPACE_UNSAFE)) {
674 p = endptr;
675 goto set_obj;
676 }
677
678 if (endptr < end && endptr != start) {
679 p = endptr;
680 switch (*p) {
681 case 'm':
682 case 'M':
683 case 'g':
684 case 'G':
685 case 'k':
686 case 'K':
687 if (end - p >= 2) {
688 if (p[1] == 's' || p[1] == 'S') {
689 /* Milliseconds */
690 if (!need_double) {
691 need_double = true;
692 dv = lv;
693 }
694 is_time = true;
695 if (p[0] == 'm' || p[0] == 'M') {
696 dv /= 1000.;
697 }
698 else {
699 dv *= ucl_lex_num_multiplier (*p, false);
700 }
701 p += 2;
702 goto set_obj;
703 }
704 else if (number_bytes || (p[1] == 'b' || p[1] == 'B')) {
705 /* Bytes */
706 if (need_double) {
707 need_double = false;
708 lv = dv;
709 }
710 lv *= ucl_lex_num_multiplier (*p, true);
711 p += 2;
712 goto set_obj;
713 }
714 else if (ucl_lex_is_atom_end (p[1])) {
715 if (need_double) {
716 dv *= ucl_lex_num_multiplier (*p, false);
717 }
718 else {
719 lv *= ucl_lex_num_multiplier (*p, number_bytes);
720 }
721 p ++;
722 goto set_obj;
723 }
724 else if (allow_time && end - p >= 3) {
725 if (tolower (p[0]) == 'm' &&
726 tolower (p[1]) == 'i' &&
727 tolower (p[2]) == 'n') {
728 /* Minutes */
729 if (!need_double) {
730 need_double = true;
731 dv = lv;
732 }
733 is_time = true;
734 dv *= 60.;
735 p += 3;
736 goto set_obj;
737 }
738 }
739 }
740 else {
741 if (need_double) {
742 dv *= ucl_lex_num_multiplier (*p, false);
743 }
744 else {
745 lv *= ucl_lex_num_multiplier (*p, number_bytes);
746 }
747 p ++;
748 goto set_obj;
749 }
750 break;
751 case 'S':
752 case 's':
753 if (allow_time &&
754 (p == end - 1 || ucl_lex_is_atom_end (p[1]))) {
755 if (!need_double) {
756 need_double = true;
757 dv = lv;
758 }
759 p ++;
760 is_time = true;
761 goto set_obj;
762 }
763 break;
764 case 'h':
765 case 'H':
766 case 'd':
767 case 'D':
768 case 'w':
769 case 'W':
770 case 'Y':
771 case 'y':
772 if (allow_time &&
773 (p == end - 1 || ucl_lex_is_atom_end (p[1]))) {
774 if (!need_double) {
775 need_double = true;
776 dv = lv;
777 }
778 is_time = true;
779 dv *= ucl_lex_time_multiplier (*p);
780 p ++;
781 goto set_obj;
782 }
783 break;
784 }
785 }
786
787 *pos = c;
788 return EINVAL;
789
790 set_obj:
791 if (allow_double && (need_double || is_time)) {
792 if (!is_time) {
793 obj->type = UCL_FLOAT;
794 }
795 else {
796 obj->type = UCL_TIME;
797 }
798 obj->value.dv = is_neg ? (-dv) : dv;
799 }
800 else {
801 obj->type = UCL_INT;
802 obj->value.iv = is_neg ? (-lv) : lv;
803 }
804 *pos = p;
805 return 0;
806 }
807
808 /**
809 * Parse possible number
810 * @param parser
811 * @param chunk
812 * @return true if a number has been parsed
813 */
814 static bool
ucl_lex_number(struct ucl_parser * parser,struct ucl_chunk * chunk,ucl_object_t * obj)815 ucl_lex_number (struct ucl_parser *parser,
816 struct ucl_chunk *chunk, ucl_object_t *obj)
817 {
818 const unsigned char *pos;
819 int ret;
820
821 ret = ucl_maybe_parse_number (obj, chunk->pos, chunk->end, (const char **)&pos,
822 true, false, ((parser->flags & UCL_PARSER_NO_TIME) == 0));
823
824 if (ret == 0) {
825 chunk->remain -= pos - chunk->pos;
826 chunk->column += pos - chunk->pos;
827 chunk->pos = pos;
828 return true;
829 }
830 else if (ret == ERANGE) {
831 ucl_set_err (chunk, ERANGE, "numeric value out of range", &parser->err);
832 }
833
834 return false;
835 }
836
837 /**
838 * Parse quoted string with possible escapes
839 * @param parser
840 * @param chunk
841 * @return true if a string has been parsed
842 */
843 static bool
ucl_lex_json_string(struct ucl_parser * parser,struct ucl_chunk * chunk,bool * need_unescape,bool * ucl_escape,bool * var_expand)844 ucl_lex_json_string (struct ucl_parser *parser,
845 struct ucl_chunk *chunk, bool *need_unescape, bool *ucl_escape, bool *var_expand)
846 {
847 const unsigned char *p = chunk->pos;
848 unsigned char c;
849 int i;
850
851 while (p < chunk->end) {
852 c = *p;
853 if (c < 0x1F) {
854 /* Unmasked control character */
855 if (c == '\n') {
856 ucl_set_err (chunk, UCL_ESYNTAX, "unexpected newline", &parser->err);
857 }
858 else {
859 ucl_set_err (chunk, UCL_ESYNTAX, "unexpected control character", &parser->err);
860 }
861 return false;
862 }
863 else if (c == '\\') {
864 ucl_chunk_skipc (chunk, p);
865 c = *p;
866 if (p >= chunk->end) {
867 ucl_set_err (chunk, UCL_ESYNTAX, "unfinished escape character", &parser->err);
868 return false;
869 }
870 else if (ucl_test_character (c, UCL_CHARACTER_ESCAPE)) {
871 if (c == 'u') {
872 ucl_chunk_skipc (chunk, p);
873 for (i = 0; i < 4 && p < chunk->end; i ++) {
874 if (!isxdigit (*p)) {
875 ucl_set_err (chunk, UCL_ESYNTAX, "invalid utf escape", &parser->err);
876 return false;
877 }
878 ucl_chunk_skipc (chunk, p);
879 }
880 if (p >= chunk->end) {
881 ucl_set_err (chunk, UCL_ESYNTAX, "unfinished escape character", &parser->err);
882 return false;
883 }
884 }
885 else {
886 ucl_chunk_skipc (chunk, p);
887 }
888 }
889 *need_unescape = true;
890 *ucl_escape = true;
891 continue;
892 }
893 else if (c == '"') {
894 ucl_chunk_skipc (chunk, p);
895 return true;
896 }
897 else if (ucl_test_character (c, UCL_CHARACTER_UCL_UNSAFE)) {
898 *ucl_escape = true;
899 }
900 else if (c == '$') {
901 *var_expand = true;
902 }
903 ucl_chunk_skipc (chunk, p);
904 }
905
906 ucl_set_err (chunk, UCL_ESYNTAX, "no quote at the end of json string", &parser->err);
907 return false;
908 }
909
910 /**
911 * Parse a key in an object
912 * @param parser
913 * @param chunk
914 * @return true if a key has been parsed
915 */
916 static bool
ucl_parse_key(struct ucl_parser * parser,struct ucl_chunk * chunk,bool * next_key,bool * end_of_object)917 ucl_parse_key (struct ucl_parser *parser, struct ucl_chunk *chunk, bool *next_key, bool *end_of_object)
918 {
919 const unsigned char *p, *c = NULL, *end, *t;
920 const char *key = NULL;
921 bool got_quote = false, got_eq = false, got_semicolon = false,
922 need_unescape = false, ucl_escape = false, var_expand = false,
923 got_content = false, got_sep = false;
924 ucl_object_t *nobj, *tobj;
925 ucl_hash_t *container;
926 ssize_t keylen;
927
928 p = chunk->pos;
929
930 if (*p == '.') {
931 /* It is macro actually */
932 ucl_chunk_skipc (chunk, p);
933 parser->prev_state = parser->state;
934 parser->state = UCL_STATE_MACRO_NAME;
935 return true;
936 }
937 while (p < chunk->end) {
938 /*
939 * A key must start with alpha, number, '/' or '_' and end with space character
940 */
941 if (c == NULL) {
942 if (chunk->remain >= 2 && ucl_lex_is_comment (p[0], p[1])) {
943 if (!ucl_skip_comments (parser)) {
944 return false;
945 }
946 p = chunk->pos;
947 }
948 else if (ucl_test_character (*p, UCL_CHARACTER_WHITESPACE_UNSAFE)) {
949 ucl_chunk_skipc (chunk, p);
950 }
951 else if (ucl_test_character (*p, UCL_CHARACTER_KEY_START)) {
952 /* The first symbol */
953 c = p;
954 ucl_chunk_skipc (chunk, p);
955 got_content = true;
956 }
957 else if (*p == '"') {
958 /* JSON style key */
959 c = p + 1;
960 got_quote = true;
961 got_content = true;
962 ucl_chunk_skipc (chunk, p);
963 }
964 else if (*p == '}') {
965 /* We have actually end of an object */
966 *end_of_object = true;
967 return true;
968 }
969 else if (*p == '.') {
970 ucl_chunk_skipc (chunk, p);
971 parser->prev_state = parser->state;
972 parser->state = UCL_STATE_MACRO_NAME;
973 return true;
974 }
975 else {
976 /* Invalid identifier */
977 ucl_set_err (chunk, UCL_ESYNTAX, "key must begin with a letter", &parser->err);
978 return false;
979 }
980 }
981 else {
982 /* Parse the body of a key */
983 if (!got_quote) {
984 if (ucl_test_character (*p, UCL_CHARACTER_KEY)) {
985 got_content = true;
986 ucl_chunk_skipc (chunk, p);
987 }
988 else if (ucl_test_character (*p, UCL_CHARACTER_KEY_SEP)) {
989 end = p;
990 break;
991 }
992 else {
993 ucl_set_err (chunk, UCL_ESYNTAX, "invalid character in a key", &parser->err);
994 return false;
995 }
996 }
997 else {
998 /* We need to parse json like quoted string */
999 if (!ucl_lex_json_string (parser, chunk, &need_unescape, &ucl_escape, &var_expand)) {
1000 return false;
1001 }
1002 /* Always escape keys obtained via json */
1003 end = chunk->pos - 1;
1004 p = chunk->pos;
1005 break;
1006 }
1007 }
1008 }
1009
1010 if (p >= chunk->end && got_content) {
1011 ucl_set_err (chunk, UCL_ESYNTAX, "unfinished key", &parser->err);
1012 return false;
1013 }
1014 else if (!got_content) {
1015 return true;
1016 }
1017 *end_of_object = false;
1018 /* We are now at the end of the key, need to parse the rest */
1019 while (p < chunk->end) {
1020 if (ucl_test_character (*p, UCL_CHARACTER_WHITESPACE)) {
1021 ucl_chunk_skipc (chunk, p);
1022 }
1023 else if (*p == '=') {
1024 if (!got_eq && !got_semicolon) {
1025 ucl_chunk_skipc (chunk, p);
1026 got_eq = true;
1027 }
1028 else {
1029 ucl_set_err (chunk, UCL_ESYNTAX, "unexpected '=' character", &parser->err);
1030 return false;
1031 }
1032 }
1033 else if (*p == ':') {
1034 if (!got_eq && !got_semicolon) {
1035 ucl_chunk_skipc (chunk, p);
1036 got_semicolon = true;
1037 }
1038 else {
1039 ucl_set_err (chunk, UCL_ESYNTAX, "unexpected ':' character", &parser->err);
1040 return false;
1041 }
1042 }
1043 else if (chunk->remain >= 2 && ucl_lex_is_comment (p[0], p[1])) {
1044 /* Check for comment */
1045 if (!ucl_skip_comments (parser)) {
1046 return false;
1047 }
1048 p = chunk->pos;
1049 }
1050 else {
1051 /* Start value */
1052 break;
1053 }
1054 }
1055
1056 if (p >= chunk->end && got_content) {
1057 ucl_set_err (chunk, UCL_ESYNTAX, "unfinished key", &parser->err);
1058 return false;
1059 }
1060
1061 got_sep = got_semicolon || got_eq;
1062
1063 if (!got_sep) {
1064 /*
1065 * Maybe we have more keys nested, so search for termination character.
1066 * Possible choices:
1067 * 1) key1 key2 ... keyN [:=] value <- we treat that as error
1068 * 2) key1 ... keyN {} or [] <- we treat that as nested objects
1069 * 3) key1 value[;,\n] <- we treat that as linear object
1070 */
1071 t = p;
1072 *next_key = false;
1073 while (ucl_test_character (*t, UCL_CHARACTER_WHITESPACE)) {
1074 t ++;
1075 }
1076 /* Check first non-space character after a key */
1077 if (*t != '{' && *t != '[') {
1078 while (t < chunk->end) {
1079 if (*t == ',' || *t == ';' || *t == '\n' || *t == '\r') {
1080 break;
1081 }
1082 else if (*t == '{' || *t == '[') {
1083 *next_key = true;
1084 break;
1085 }
1086 t ++;
1087 }
1088 }
1089 }
1090
1091 /* Create a new object */
1092 nobj = ucl_object_new ();
1093 keylen = ucl_copy_or_store_ptr (parser, c, &nobj->trash_stack[UCL_TRASH_KEY],
1094 &key, end - c, need_unescape, parser->flags & UCL_PARSER_KEY_LOWERCASE, false);
1095 if (keylen == -1) {
1096 ucl_object_unref (nobj);
1097 return false;
1098 }
1099 else if (keylen == 0) {
1100 ucl_set_err (chunk, UCL_ESYNTAX, "empty keys are not allowed", &parser->err);
1101 ucl_object_unref (nobj);
1102 return false;
1103 }
1104
1105 container = parser->stack->obj->value.ov;
1106 nobj->key = key;
1107 nobj->keylen = keylen;
1108 tobj = __DECONST (ucl_object_t *, ucl_hash_search_obj (container, nobj));
1109 if (tobj == NULL) {
1110 container = ucl_hash_insert_object (container, nobj);
1111 nobj->prev = nobj;
1112 nobj->next = NULL;
1113 parser->stack->obj->len ++;
1114 }
1115 else {
1116 DL_APPEND (tobj, nobj);
1117 }
1118
1119 if (ucl_escape) {
1120 nobj->flags |= UCL_OBJECT_NEED_KEY_ESCAPE;
1121 }
1122 parser->stack->obj->value.ov = container;
1123
1124 parser->cur_obj = nobj;
1125
1126 return true;
1127 }
1128
1129 /**
1130 * Parse a cl string
1131 * @param parser
1132 * @param chunk
1133 * @return true if a key has been parsed
1134 */
1135 static bool
ucl_parse_string_value(struct ucl_parser * parser,struct ucl_chunk * chunk,bool * var_expand,bool * need_unescape)1136 ucl_parse_string_value (struct ucl_parser *parser,
1137 struct ucl_chunk *chunk, bool *var_expand, bool *need_unescape)
1138 {
1139 const unsigned char *p;
1140 enum {
1141 UCL_BRACE_ROUND = 0,
1142 UCL_BRACE_SQUARE,
1143 UCL_BRACE_FIGURE
1144 };
1145 int braces[3][2] = {{0, 0}, {0, 0}, {0, 0}};
1146
1147 p = chunk->pos;
1148
1149 while (p < chunk->end) {
1150
1151 /* Skip pairs of figure braces */
1152 if (*p == '{') {
1153 braces[UCL_BRACE_FIGURE][0] ++;
1154 }
1155 else if (*p == '}') {
1156 braces[UCL_BRACE_FIGURE][1] ++;
1157 if (braces[UCL_BRACE_FIGURE][1] <= braces[UCL_BRACE_FIGURE][0]) {
1158 /* This is not a termination symbol, continue */
1159 ucl_chunk_skipc (chunk, p);
1160 continue;
1161 }
1162 }
1163 /* Skip pairs of square braces */
1164 else if (*p == '[') {
1165 braces[UCL_BRACE_SQUARE][0] ++;
1166 }
1167 else if (*p == ']') {
1168 braces[UCL_BRACE_SQUARE][1] ++;
1169 if (braces[UCL_BRACE_SQUARE][1] <= braces[UCL_BRACE_SQUARE][0]) {
1170 /* This is not a termination symbol, continue */
1171 ucl_chunk_skipc (chunk, p);
1172 continue;
1173 }
1174 }
1175 else if (*p == '$') {
1176 *var_expand = true;
1177 }
1178 else if (*p == '\\') {
1179 *need_unescape = true;
1180 ucl_chunk_skipc (chunk, p);
1181 if (p < chunk->end) {
1182 ucl_chunk_skipc (chunk, p);
1183 }
1184 continue;
1185 }
1186
1187 if (ucl_lex_is_atom_end (*p) || (chunk->remain >= 2 && ucl_lex_is_comment (p[0], p[1]))) {
1188 break;
1189 }
1190 ucl_chunk_skipc (chunk, p);
1191 }
1192
1193 if (p >= chunk->end) {
1194 ucl_set_err (chunk, UCL_ESYNTAX, "unfinished value", &parser->err);
1195 return false;
1196 }
1197
1198 return true;
1199 }
1200
1201 /**
1202 * Parse multiline string ending with \n{term}\n
1203 * @param parser
1204 * @param chunk
1205 * @param term
1206 * @param term_len
1207 * @return size of multiline string or 0 in case of error
1208 */
1209 static int
ucl_parse_multiline_string(struct ucl_parser * parser,struct ucl_chunk * chunk,const unsigned char * term,int term_len,unsigned char const ** beg,bool * var_expand)1210 ucl_parse_multiline_string (struct ucl_parser *parser,
1211 struct ucl_chunk *chunk, const unsigned char *term,
1212 int term_len, unsigned char const **beg,
1213 bool *var_expand)
1214 {
1215 const unsigned char *p, *c;
1216 bool newline = false;
1217 int len = 0;
1218
1219 p = chunk->pos;
1220
1221 c = p;
1222
1223 while (p < chunk->end) {
1224 if (newline) {
1225 if (chunk->end - p < term_len) {
1226 return 0;
1227 }
1228 else if (memcmp (p, term, term_len) == 0 && (p[term_len] == '\n' || p[term_len] == '\r')) {
1229 len = p - c;
1230 chunk->remain -= term_len;
1231 chunk->pos = p + term_len;
1232 chunk->column = term_len;
1233 *beg = c;
1234 break;
1235 }
1236 }
1237 if (*p == '\n') {
1238 newline = true;
1239 }
1240 else {
1241 if (*p == '$') {
1242 *var_expand = true;
1243 }
1244 newline = false;
1245 }
1246 ucl_chunk_skipc (chunk, p);
1247 }
1248
1249 return len;
1250 }
1251
1252 static ucl_object_t*
ucl_get_value_object(struct ucl_parser * parser)1253 ucl_get_value_object (struct ucl_parser *parser)
1254 {
1255 ucl_object_t *t, *obj = NULL;
1256
1257 if (parser->stack->obj->type == UCL_ARRAY) {
1258 /* Object must be allocated */
1259 obj = ucl_object_new ();
1260 t = parser->stack->obj->value.av;
1261 DL_APPEND (t, obj);
1262 parser->cur_obj = obj;
1263 parser->stack->obj->value.av = t;
1264 parser->stack->obj->len ++;
1265 }
1266 else {
1267 /* Object has been already allocated */
1268 obj = parser->cur_obj;
1269 }
1270
1271 return obj;
1272 }
1273
1274 /**
1275 * Handle value data
1276 * @param parser
1277 * @param chunk
1278 * @return
1279 */
1280 static bool
ucl_parse_value(struct ucl_parser * parser,struct ucl_chunk * chunk)1281 ucl_parse_value (struct ucl_parser *parser, struct ucl_chunk *chunk)
1282 {
1283 const unsigned char *p, *c;
1284 ucl_object_t *obj = NULL;
1285 unsigned int stripped_spaces;
1286 int str_len;
1287 bool need_unescape = false, ucl_escape = false, var_expand = false;
1288
1289 p = chunk->pos;
1290
1291 /* Skip any spaces and comments */
1292 if (ucl_test_character (*p, UCL_CHARACTER_WHITESPACE_UNSAFE) ||
1293 (chunk->remain >= 2 && ucl_lex_is_comment (p[0], p[1]))) {
1294 while (p < chunk->end && ucl_test_character (*p, UCL_CHARACTER_WHITESPACE_UNSAFE)) {
1295 ucl_chunk_skipc (chunk, p);
1296 }
1297 if (!ucl_skip_comments (parser)) {
1298 return false;
1299 }
1300 p = chunk->pos;
1301 }
1302
1303 while (p < chunk->end) {
1304 c = p;
1305 switch (*p) {
1306 case '"':
1307 obj = ucl_get_value_object (parser);
1308 ucl_chunk_skipc (chunk, p);
1309 if (!ucl_lex_json_string (parser, chunk, &need_unescape, &ucl_escape, &var_expand)) {
1310 return false;
1311 }
1312 str_len = chunk->pos - c - 2;
1313 obj->type = UCL_STRING;
1314 if ((str_len = ucl_copy_or_store_ptr (parser, c + 1, &obj->trash_stack[UCL_TRASH_VALUE],
1315 &obj->value.sv, str_len, need_unescape, false, var_expand)) == -1) {
1316 return false;
1317 }
1318 obj->len = str_len;
1319 parser->state = UCL_STATE_AFTER_VALUE;
1320 p = chunk->pos;
1321 return true;
1322 break;
1323 case '{':
1324 obj = ucl_get_value_object (parser);
1325 /* We have a new object */
1326 obj = ucl_add_parser_stack (obj, parser, false, parser->stack->level);
1327 if (obj == NULL) {
1328 return false;
1329 }
1330
1331 ucl_chunk_skipc (chunk, p);
1332 return true;
1333 break;
1334 case '[':
1335 obj = ucl_get_value_object (parser);
1336 /* We have a new array */
1337 obj = ucl_add_parser_stack (obj, parser, true, parser->stack->level);
1338 if (obj == NULL) {
1339 return false;
1340 }
1341
1342 ucl_chunk_skipc (chunk, p);
1343 return true;
1344 break;
1345 case ']':
1346 /* We have the array ending */
1347 if (parser->stack && parser->stack->obj->type == UCL_ARRAY) {
1348 parser->state = UCL_STATE_AFTER_VALUE;
1349 return true;
1350 }
1351 else {
1352 goto parse_string;
1353 }
1354 break;
1355 case '<':
1356 obj = ucl_get_value_object (parser);
1357 /* We have something like multiline value, which must be <<[A-Z]+\n */
1358 if (chunk->end - p > 3) {
1359 if (memcmp (p, "<<", 2) == 0) {
1360 p += 2;
1361 /* We allow only uppercase characters in multiline definitions */
1362 while (p < chunk->end && *p >= 'A' && *p <= 'Z') {
1363 p ++;
1364 }
1365 if (*p =='\n') {
1366 /* Set chunk positions and start multiline parsing */
1367 c += 2;
1368 chunk->remain -= p - c;
1369 chunk->pos = p + 1;
1370 chunk->column = 0;
1371 chunk->line ++;
1372 if ((str_len = ucl_parse_multiline_string (parser, chunk, c,
1373 p - c, &c, &var_expand)) == 0) {
1374 ucl_set_err (chunk, UCL_ESYNTAX, "unterminated multiline value", &parser->err);
1375 return false;
1376 }
1377 obj->type = UCL_STRING;
1378 if ((str_len = ucl_copy_or_store_ptr (parser, c, &obj->trash_stack[UCL_TRASH_VALUE],
1379 &obj->value.sv, str_len - 1, false, false, var_expand)) == -1) {
1380 return false;
1381 }
1382 obj->len = str_len;
1383 parser->state = UCL_STATE_AFTER_VALUE;
1384 return true;
1385 }
1386 }
1387 }
1388 /* Fallback to ordinary strings */
1389 default:
1390 parse_string:
1391 if (obj == NULL) {
1392 obj = ucl_get_value_object (parser);
1393 }
1394 /* Parse atom */
1395 if (ucl_test_character (*p, UCL_CHARACTER_VALUE_DIGIT_START)) {
1396 if (!ucl_lex_number (parser, chunk, obj)) {
1397 if (parser->state == UCL_STATE_ERROR) {
1398 return false;
1399 }
1400 }
1401 else {
1402 parser->state = UCL_STATE_AFTER_VALUE;
1403 return true;
1404 }
1405 /* Fallback to normal string */
1406 }
1407
1408 if (!ucl_parse_string_value (parser, chunk, &var_expand, &need_unescape)) {
1409 return false;
1410 }
1411 /* Cut trailing spaces */
1412 stripped_spaces = 0;
1413 while (ucl_test_character (*(chunk->pos - 1 - stripped_spaces),
1414 UCL_CHARACTER_WHITESPACE)) {
1415 stripped_spaces ++;
1416 }
1417 str_len = chunk->pos - c - stripped_spaces;
1418 if (str_len <= 0) {
1419 ucl_set_err (chunk, 0, "string value must not be empty", &parser->err);
1420 return false;
1421 }
1422 else if (str_len == 4 && memcmp (c, "null", 4) == 0) {
1423 obj->len = 0;
1424 obj->type = UCL_NULL;
1425 }
1426 else if (!ucl_maybe_parse_boolean (obj, c, str_len)) {
1427 obj->type = UCL_STRING;
1428 if ((str_len = ucl_copy_or_store_ptr (parser, c, &obj->trash_stack[UCL_TRASH_VALUE],
1429 &obj->value.sv, str_len, need_unescape,
1430 false, var_expand)) == -1) {
1431 return false;
1432 }
1433 obj->len = str_len;
1434 }
1435 parser->state = UCL_STATE_AFTER_VALUE;
1436 p = chunk->pos;
1437
1438 return true;
1439 break;
1440 }
1441 }
1442
1443 return true;
1444 }
1445
1446 /**
1447 * Handle after value data
1448 * @param parser
1449 * @param chunk
1450 * @return
1451 */
1452 static bool
ucl_parse_after_value(struct ucl_parser * parser,struct ucl_chunk * chunk)1453 ucl_parse_after_value (struct ucl_parser *parser, struct ucl_chunk *chunk)
1454 {
1455 const unsigned char *p;
1456 bool got_sep = false;
1457 struct ucl_stack *st;
1458
1459 p = chunk->pos;
1460
1461 while (p < chunk->end) {
1462 if (ucl_test_character (*p, UCL_CHARACTER_WHITESPACE)) {
1463 /* Skip whitespaces */
1464 ucl_chunk_skipc (chunk, p);
1465 }
1466 else if (chunk->remain >= 2 && ucl_lex_is_comment (p[0], p[1])) {
1467 /* Skip comment */
1468 if (!ucl_skip_comments (parser)) {
1469 return false;
1470 }
1471 /* Treat comment as a separator */
1472 got_sep = true;
1473 p = chunk->pos;
1474 }
1475 else if (ucl_test_character (*p, UCL_CHARACTER_VALUE_END)) {
1476 if (*p == '}' || *p == ']') {
1477 if (parser->stack == NULL) {
1478 ucl_set_err (chunk, UCL_ESYNTAX, "end of array or object detected without corresponding start", &parser->err);
1479 return false;
1480 }
1481 if ((*p == '}' && parser->stack->obj->type == UCL_OBJECT) ||
1482 (*p == ']' && parser->stack->obj->type == UCL_ARRAY)) {
1483
1484 /* Pop all nested objects from a stack */
1485 st = parser->stack;
1486 parser->stack = st->next;
1487 UCL_FREE (sizeof (struct ucl_stack), st);
1488
1489 while (parser->stack != NULL) {
1490 st = parser->stack;
1491 if (st->next == NULL || st->next->level == st->level) {
1492 break;
1493 }
1494 parser->stack = st->next;
1495 UCL_FREE (sizeof (struct ucl_stack), st);
1496 }
1497 }
1498 else {
1499 ucl_set_err (chunk, UCL_ESYNTAX, "unexpected terminating symbol detected", &parser->err);
1500 return false;
1501 }
1502
1503 if (parser->stack == NULL) {
1504 /* Ignore everything after a top object */
1505 return true;
1506 }
1507 else {
1508 ucl_chunk_skipc (chunk, p);
1509 }
1510 got_sep = true;
1511 }
1512 else {
1513 /* Got a separator */
1514 got_sep = true;
1515 ucl_chunk_skipc (chunk, p);
1516 }
1517 }
1518 else {
1519 /* Anything else */
1520 if (!got_sep) {
1521 ucl_set_err (chunk, UCL_ESYNTAX, "delimiter is missing", &parser->err);
1522 return false;
1523 }
1524 return true;
1525 }
1526 }
1527
1528 return true;
1529 }
1530
1531 /**
1532 * Handle macro data
1533 * @param parser
1534 * @param chunk
1535 * @return
1536 */
1537 static bool
ucl_parse_macro_value(struct ucl_parser * parser,struct ucl_chunk * chunk,struct ucl_macro * macro,unsigned char const ** macro_start,size_t * macro_len)1538 ucl_parse_macro_value (struct ucl_parser *parser,
1539 struct ucl_chunk *chunk, struct ucl_macro *macro,
1540 unsigned char const **macro_start, size_t *macro_len)
1541 {
1542 const unsigned char *p, *c;
1543 bool need_unescape = false, ucl_escape = false, var_expand = false;
1544
1545 p = chunk->pos;
1546
1547 switch (*p) {
1548 case '"':
1549 /* We have macro value encoded in quotes */
1550 c = p;
1551 ucl_chunk_skipc (chunk, p);
1552 if (!ucl_lex_json_string (parser, chunk, &need_unescape, &ucl_escape, &var_expand)) {
1553 return false;
1554 }
1555
1556 *macro_start = c + 1;
1557 *macro_len = chunk->pos - c - 2;
1558 p = chunk->pos;
1559 break;
1560 case '{':
1561 /* We got a multiline macro body */
1562 ucl_chunk_skipc (chunk, p);
1563 /* Skip spaces at the beginning */
1564 while (p < chunk->end) {
1565 if (ucl_test_character (*p, UCL_CHARACTER_WHITESPACE_UNSAFE)) {
1566 ucl_chunk_skipc (chunk, p);
1567 }
1568 else {
1569 break;
1570 }
1571 }
1572 c = p;
1573 while (p < chunk->end) {
1574 if (*p == '}') {
1575 break;
1576 }
1577 ucl_chunk_skipc (chunk, p);
1578 }
1579 *macro_start = c;
1580 *macro_len = p - c;
1581 ucl_chunk_skipc (chunk, p);
1582 break;
1583 default:
1584 /* Macro is not enclosed in quotes or braces */
1585 c = p;
1586 while (p < chunk->end) {
1587 if (ucl_lex_is_atom_end (*p)) {
1588 break;
1589 }
1590 ucl_chunk_skipc (chunk, p);
1591 }
1592 *macro_start = c;
1593 *macro_len = p - c;
1594 break;
1595 }
1596
1597 /* We are at the end of a macro */
1598 /* Skip ';' and space characters and return to previous state */
1599 while (p < chunk->end) {
1600 if (!ucl_test_character (*p, UCL_CHARACTER_WHITESPACE_UNSAFE) && *p != ';') {
1601 break;
1602 }
1603 ucl_chunk_skipc (chunk, p);
1604 }
1605 return true;
1606 }
1607
1608 /**
1609 * Handle the main states of rcl parser
1610 * @param parser parser structure
1611 * @param data the pointer to the beginning of a chunk
1612 * @param len the length of a chunk
1613 * @return true if chunk has been parsed and false in case of error
1614 */
1615 static bool
ucl_state_machine(struct ucl_parser * parser)1616 ucl_state_machine (struct ucl_parser *parser)
1617 {
1618 ucl_object_t *obj;
1619 struct ucl_chunk *chunk = parser->chunks;
1620 const unsigned char *p, *c = NULL, *macro_start = NULL;
1621 unsigned char *macro_escaped;
1622 size_t macro_len = 0;
1623 struct ucl_macro *macro = NULL;
1624 bool next_key = false, end_of_object = false;
1625
1626 if (parser->top_obj == NULL) {
1627 if (*chunk->pos == '[') {
1628 obj = ucl_add_parser_stack (NULL, parser, true, 0);
1629 }
1630 else {
1631 obj = ucl_add_parser_stack (NULL, parser, false, 0);
1632 }
1633 if (obj == NULL) {
1634 return false;
1635 }
1636 parser->top_obj = obj;
1637 parser->cur_obj = obj;
1638 parser->state = UCL_STATE_INIT;
1639 }
1640
1641 p = chunk->pos;
1642 while (chunk->pos < chunk->end) {
1643 switch (parser->state) {
1644 case UCL_STATE_INIT:
1645 /*
1646 * At the init state we can either go to the parse array or object
1647 * if we got [ or { correspondingly or can just treat new data as
1648 * a key of newly created object
1649 */
1650 obj = parser->cur_obj;
1651 if (!ucl_skip_comments (parser)) {
1652 parser->prev_state = parser->state;
1653 parser->state = UCL_STATE_ERROR;
1654 return false;
1655 }
1656 else {
1657 p = chunk->pos;
1658 if (*p == '[') {
1659 parser->state = UCL_STATE_VALUE;
1660 ucl_chunk_skipc (chunk, p);
1661 }
1662 else {
1663 parser->state = UCL_STATE_KEY;
1664 if (*p == '{') {
1665 ucl_chunk_skipc (chunk, p);
1666 }
1667 }
1668 }
1669 break;
1670 case UCL_STATE_KEY:
1671 /* Skip any spaces */
1672 while (p < chunk->end && ucl_test_character (*p, UCL_CHARACTER_WHITESPACE_UNSAFE)) {
1673 ucl_chunk_skipc (chunk, p);
1674 }
1675 if (*p == '}') {
1676 /* We have the end of an object */
1677 parser->state = UCL_STATE_AFTER_VALUE;
1678 continue;
1679 }
1680 if (parser->stack == NULL) {
1681 /* No objects are on stack, but we want to parse a key */
1682 ucl_set_err (chunk, UCL_ESYNTAX, "top object is finished but the parser "
1683 "expects a key", &parser->err);
1684 parser->prev_state = parser->state;
1685 parser->state = UCL_STATE_ERROR;
1686 return false;
1687 }
1688 if (!ucl_parse_key (parser, chunk, &next_key, &end_of_object)) {
1689 parser->prev_state = parser->state;
1690 parser->state = UCL_STATE_ERROR;
1691 return false;
1692 }
1693 if (end_of_object) {
1694 p = chunk->pos;
1695 parser->state = UCL_STATE_AFTER_VALUE;
1696 continue;
1697 }
1698 else if (parser->state != UCL_STATE_MACRO_NAME) {
1699 if (next_key && parser->stack->obj->type == UCL_OBJECT) {
1700 /* Parse more keys and nest objects accordingly */
1701 obj = ucl_add_parser_stack (parser->cur_obj, parser, false,
1702 parser->stack->level + 1);
1703 if (obj == NULL) {
1704 return false;
1705 }
1706 }
1707 else {
1708 parser->state = UCL_STATE_VALUE;
1709 }
1710 }
1711 else {
1712 c = chunk->pos;
1713 }
1714 p = chunk->pos;
1715 break;
1716 case UCL_STATE_VALUE:
1717 /* We need to check what we do have */
1718 if (!ucl_parse_value (parser, chunk)) {
1719 parser->prev_state = parser->state;
1720 parser->state = UCL_STATE_ERROR;
1721 return false;
1722 }
1723 /* State is set in ucl_parse_value call */
1724 p = chunk->pos;
1725 break;
1726 case UCL_STATE_AFTER_VALUE:
1727 if (!ucl_parse_after_value (parser, chunk)) {
1728 parser->prev_state = parser->state;
1729 parser->state = UCL_STATE_ERROR;
1730 return false;
1731 }
1732 if (parser->stack != NULL) {
1733 if (parser->stack->obj->type == UCL_OBJECT) {
1734 parser->state = UCL_STATE_KEY;
1735 }
1736 else {
1737 /* Array */
1738 parser->state = UCL_STATE_VALUE;
1739 }
1740 }
1741 else {
1742 /* Skip everything at the end */
1743 return true;
1744 }
1745 p = chunk->pos;
1746 break;
1747 case UCL_STATE_MACRO_NAME:
1748 if (!ucl_test_character (*p, UCL_CHARACTER_WHITESPACE_UNSAFE)) {
1749 ucl_chunk_skipc (chunk, p);
1750 }
1751 else if (p - c > 0) {
1752 /* We got macro name */
1753 macro_len = (size_t)(p - c);
1754 HASH_FIND (hh, parser->macroes, c, macro_len, macro);
1755 if (macro == NULL) {
1756 ucl_create_err (&parser->err, "error on line %d at column %d: "
1757 "unknown macro: '%.*s', character: '%c'",
1758 chunk->line, chunk->column, (int)(p - c), c, *chunk->pos);
1759 parser->state = UCL_STATE_ERROR;
1760 return false;
1761 }
1762 /* Now we need to skip all spaces */
1763 while (p < chunk->end) {
1764 if (!ucl_test_character (*p, UCL_CHARACTER_WHITESPACE_UNSAFE)) {
1765 if (chunk->remain >= 2 && ucl_lex_is_comment (p[0], p[1])) {
1766 /* Skip comment */
1767 if (!ucl_skip_comments (parser)) {
1768 return false;
1769 }
1770 p = chunk->pos;
1771 }
1772 break;
1773 }
1774 ucl_chunk_skipc (chunk, p);
1775 }
1776 parser->state = UCL_STATE_MACRO;
1777 }
1778 break;
1779 case UCL_STATE_MACRO:
1780 if (!ucl_parse_macro_value (parser, chunk, macro,
1781 ¯o_start, ¯o_len)) {
1782 parser->prev_state = parser->state;
1783 parser->state = UCL_STATE_ERROR;
1784 return false;
1785 }
1786 macro_len = ucl_expand_variable (parser, ¯o_escaped, macro_start, macro_len);
1787 parser->state = parser->prev_state;
1788 if (macro_escaped == NULL) {
1789 if (!macro->handler (macro_start, macro_len, macro->ud)) {
1790 return false;
1791 }
1792 }
1793 else {
1794 if (!macro->handler (macro_escaped, macro_len, macro->ud)) {
1795 UCL_FREE (macro_len + 1, macro_escaped);
1796 return false;
1797 }
1798 UCL_FREE (macro_len + 1, macro_escaped);
1799 }
1800 p = chunk->pos;
1801 break;
1802 default:
1803 /* TODO: add all states */
1804 ucl_set_err (chunk, UCL_EINTERNAL, "internal error: parser is in an unknown state", &parser->err);
1805 parser->state = UCL_STATE_ERROR;
1806 return false;
1807 }
1808 }
1809
1810 return true;
1811 }
1812
1813 struct ucl_parser*
ucl_parser_new(int flags)1814 ucl_parser_new (int flags)
1815 {
1816 struct ucl_parser *new;
1817
1818 new = UCL_ALLOC (sizeof (struct ucl_parser));
1819 if (new == NULL) {
1820 return NULL;
1821 }
1822 memset (new, 0, sizeof (struct ucl_parser));
1823
1824 ucl_parser_register_macro (new, "include", ucl_include_handler, new);
1825 ucl_parser_register_macro (new, "try_include", ucl_try_include_handler, new);
1826 ucl_parser_register_macro (new, "includes", ucl_includes_handler, new);
1827
1828 new->flags = flags;
1829
1830 /* Initial assumption about filevars */
1831 ucl_parser_set_filevars (new, NULL, false);
1832
1833 return new;
1834 }
1835
1836
1837 void
ucl_parser_register_macro(struct ucl_parser * parser,const char * macro,ucl_macro_handler handler,void * ud)1838 ucl_parser_register_macro (struct ucl_parser *parser, const char *macro,
1839 ucl_macro_handler handler, void* ud)
1840 {
1841 struct ucl_macro *new;
1842
1843 if (macro == NULL || handler == NULL) {
1844 return;
1845 }
1846 new = UCL_ALLOC (sizeof (struct ucl_macro));
1847 if (new == NULL) {
1848 return;
1849 }
1850 memset (new, 0, sizeof (struct ucl_macro));
1851 new->handler = handler;
1852 new->name = strdup (macro);
1853 new->ud = ud;
1854 HASH_ADD_KEYPTR (hh, parser->macroes, new->name, strlen (new->name), new);
1855 }
1856
1857 void
ucl_parser_register_variable(struct ucl_parser * parser,const char * var,const char * value)1858 ucl_parser_register_variable (struct ucl_parser *parser, const char *var,
1859 const char *value)
1860 {
1861 struct ucl_variable *new = NULL, *cur;
1862
1863 if (var == NULL) {
1864 return;
1865 }
1866
1867 /* Find whether a variable already exists */
1868 LL_FOREACH (parser->variables, cur) {
1869 if (strcmp (cur->var, var) == 0) {
1870 new = cur;
1871 break;
1872 }
1873 }
1874
1875 if (value == NULL) {
1876
1877 if (new != NULL) {
1878 /* Remove variable */
1879 LL_DELETE (parser->variables, new);
1880 free (new->var);
1881 free (new->value);
1882 UCL_FREE (sizeof (struct ucl_variable), new);
1883 }
1884 else {
1885 /* Do nothing */
1886 return;
1887 }
1888 }
1889 else {
1890 if (new == NULL) {
1891 new = UCL_ALLOC (sizeof (struct ucl_variable));
1892 if (new == NULL) {
1893 return;
1894 }
1895 memset (new, 0, sizeof (struct ucl_variable));
1896 new->var = strdup (var);
1897 new->var_len = strlen (var);
1898 new->value = strdup (value);
1899 new->value_len = strlen (value);
1900
1901 LL_PREPEND (parser->variables, new);
1902 }
1903 else {
1904 free (new->value);
1905 new->value = strdup (value);
1906 new->value_len = strlen (value);
1907 }
1908 }
1909 }
1910
1911 void
ucl_parser_set_variables_handler(struct ucl_parser * parser,ucl_variable_handler handler,void * ud)1912 ucl_parser_set_variables_handler (struct ucl_parser *parser,
1913 ucl_variable_handler handler, void *ud)
1914 {
1915 parser->var_handler = handler;
1916 parser->var_data = ud;
1917 }
1918
1919 bool
ucl_parser_add_chunk(struct ucl_parser * parser,const unsigned char * data,size_t len)1920 ucl_parser_add_chunk (struct ucl_parser *parser, const unsigned char *data,
1921 size_t len)
1922 {
1923 struct ucl_chunk *chunk;
1924
1925 if (data == NULL || len == 0) {
1926 ucl_create_err (&parser->err, "invalid chunk added");
1927 return false;
1928 }
1929 if (parser->state != UCL_STATE_ERROR) {
1930 chunk = UCL_ALLOC (sizeof (struct ucl_chunk));
1931 if (chunk == NULL) {
1932 ucl_create_err (&parser->err, "cannot allocate chunk structure");
1933 return false;
1934 }
1935 chunk->begin = data;
1936 chunk->remain = len;
1937 chunk->pos = chunk->begin;
1938 chunk->end = chunk->begin + len;
1939 chunk->line = 1;
1940 chunk->column = 0;
1941 LL_PREPEND (parser->chunks, chunk);
1942 parser->recursion ++;
1943 if (parser->recursion > UCL_MAX_RECURSION) {
1944 ucl_create_err (&parser->err, "maximum include nesting limit is reached: %d",
1945 parser->recursion);
1946 return false;
1947 }
1948 return ucl_state_machine (parser);
1949 }
1950
1951 ucl_create_err (&parser->err, "a parser is in an invalid state");
1952
1953 return false;
1954 }
1955
1956 bool
ucl_parser_add_string(struct ucl_parser * parser,const char * data,size_t len)1957 ucl_parser_add_string (struct ucl_parser *parser, const char *data,
1958 size_t len)
1959 {
1960 if (data == NULL) {
1961 ucl_create_err (&parser->err, "invalid string added");
1962 return false;
1963 }
1964 if (len == 0) {
1965 len = strlen (data);
1966 }
1967
1968 return ucl_parser_add_chunk (parser, (const unsigned char *)data, len);
1969 }
1970