1 /*
2 * paths.c: a path manipulation library using svn_stringbuf_t
3 *
4 * ====================================================================
5 * Licensed to the Apache Software Foundation (ASF) under one
6 * or more contributor license agreements. See the NOTICE file
7 * distributed with this work for additional information
8 * regarding copyright ownership. The ASF licenses this file
9 * to you under the Apache License, Version 2.0 (the
10 * "License"); you may not use this file except in compliance
11 * with the License. You may obtain a copy of the License at
12 *
13 * http://www.apache.org/licenses/LICENSE-2.0
14 *
15 * Unless required by applicable law or agreed to in writing,
16 * software distributed under the License is distributed on an
17 * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
18 * KIND, either express or implied. See the License for the
19 * specific language governing permissions and limitations
20 * under the License.
21 * ====================================================================
22 */
23
24
25
26 #include <string.h>
27 #include <assert.h>
28
29 #include <apr_file_info.h>
30 #include <apr_lib.h>
31 #include <apr_uri.h>
32
33 #include "svn_string.h"
34 #include "svn_dirent_uri.h"
35 #include "svn_path.h"
36 #include "svn_private_config.h" /* for SVN_PATH_LOCAL_SEPARATOR */
37 #include "svn_utf.h"
38 #include "svn_io.h" /* for svn_io_stat() */
39 #include "svn_ctype.h"
40
41 #include "dirent_uri.h"
42
43
44 /* The canonical empty path. Can this be changed? Well, change the empty
45 test below and the path library will work, not so sure about the fs/wc
46 libraries. */
47 #define SVN_EMPTY_PATH ""
48
49 /* TRUE if s is the canonical empty path, FALSE otherwise */
50 #define SVN_PATH_IS_EMPTY(s) ((s)[0] == '\0')
51
52 /* TRUE if s,n is the platform's empty path ("."), FALSE otherwise. Can
53 this be changed? Well, the path library will work, not so sure about
54 the OS! */
55 #define SVN_PATH_IS_PLATFORM_EMPTY(s,n) ((n) == 1 && (s)[0] == '.')
56
57
58
59
60 #ifndef NDEBUG
61 /* This function is an approximation of svn_path_is_canonical.
62 * It is supposed to be used in functions that do not have access
63 * to a pool, but still want to assert that a path is canonical.
64 *
65 * PATH with length LEN is assumed to be canonical if it isn't
66 * the platform's empty path (see definition of SVN_PATH_IS_PLATFORM_EMPTY),
67 * and does not contain "/./", and any one of the following
68 * conditions is also met:
69 *
70 * 1. PATH has zero length
71 * 2. PATH is the root directory (what exactly a root directory is
72 * depends on the platform)
73 * 3. PATH is not a root directory and does not end with '/'
74 *
75 * If possible, please use svn_path_is_canonical instead.
76 */
77 static svn_boolean_t
is_canonical(const char * path,apr_size_t len)78 is_canonical(const char *path,
79 apr_size_t len)
80 {
81 return (! SVN_PATH_IS_PLATFORM_EMPTY(path, len)
82 && strstr(path, "/./") == NULL
83 && (len == 0
84 || (len == 1 && path[0] == '/')
85 || (path[len-1] != '/')
86 #if defined(WIN32) || defined(__CYGWIN__)
87 || svn_dirent_is_root(path, len)
88 #endif
89 ));
90 }
91 #endif
92
93
94 /* functionality of svn_path_is_canonical but without the deprecation */
95 static svn_boolean_t
svn_path_is_canonical_internal(const char * path,apr_pool_t * pool)96 svn_path_is_canonical_internal(const char *path, apr_pool_t *pool)
97 {
98 return svn_uri_is_canonical(path, pool) ||
99 svn_dirent_is_canonical(path, pool) ||
100 svn_relpath_is_canonical(path);
101 }
102
103 svn_boolean_t
svn_path_is_canonical(const char * path,apr_pool_t * pool)104 svn_path_is_canonical(const char *path, apr_pool_t *pool)
105 {
106 return svn_path_is_canonical_internal(path, pool);
107 }
108
109 /* functionality of svn_path_join but without the deprecation */
110 static char *
svn_path_join_internal(const char * base,const char * component,apr_pool_t * pool)111 svn_path_join_internal(const char *base,
112 const char *component,
113 apr_pool_t *pool)
114 {
115 apr_size_t blen = strlen(base);
116 apr_size_t clen = strlen(component);
117 char *path;
118
119 assert(svn_path_is_canonical_internal(base, pool));
120 assert(svn_path_is_canonical_internal(component, pool));
121
122 /* If the component is absolute, then return it. */
123 if (*component == '/')
124 return apr_pmemdup(pool, component, clen + 1);
125
126 /* If either is empty return the other */
127 if (SVN_PATH_IS_EMPTY(base))
128 return apr_pmemdup(pool, component, clen + 1);
129 if (SVN_PATH_IS_EMPTY(component))
130 return apr_pmemdup(pool, base, blen + 1);
131
132 if (blen == 1 && base[0] == '/')
133 blen = 0; /* Ignore base, just return separator + component */
134
135 /* Construct the new, combined path. */
136 path = apr_palloc(pool, blen + 1 + clen + 1);
137 memcpy(path, base, blen);
138 path[blen] = '/';
139 memcpy(path + blen + 1, component, clen + 1);
140
141 return path;
142 }
143
svn_path_join(const char * base,const char * component,apr_pool_t * pool)144 char *svn_path_join(const char *base,
145 const char *component,
146 apr_pool_t *pool)
147 {
148 return svn_path_join_internal(base, component, pool);
149 }
150
svn_path_join_many(apr_pool_t * pool,const char * base,...)151 char *svn_path_join_many(apr_pool_t *pool, const char *base, ...)
152 {
153 #define MAX_SAVED_LENGTHS 10
154 apr_size_t saved_lengths[MAX_SAVED_LENGTHS];
155 apr_size_t total_len;
156 int nargs;
157 va_list va;
158 const char *s;
159 apr_size_t len;
160 char *path;
161 char *p;
162 svn_boolean_t base_is_empty = FALSE, base_is_root = FALSE;
163 int base_arg = 0;
164
165 total_len = strlen(base);
166
167 assert(svn_path_is_canonical_internal(base, pool));
168
169 if (total_len == 1 && *base == '/')
170 base_is_root = TRUE;
171 else if (SVN_PATH_IS_EMPTY(base))
172 {
173 total_len = sizeof(SVN_EMPTY_PATH) - 1;
174 base_is_empty = TRUE;
175 }
176
177 saved_lengths[0] = total_len;
178
179 /* Compute the length of the resulting string. */
180
181 nargs = 0;
182 va_start(va, base);
183 while ((s = va_arg(va, const char *)) != NULL)
184 {
185 len = strlen(s);
186
187 assert(svn_path_is_canonical_internal(s, pool));
188
189 if (SVN_PATH_IS_EMPTY(s))
190 continue;
191
192 if (nargs++ < MAX_SAVED_LENGTHS)
193 saved_lengths[nargs] = len;
194
195 if (*s == '/')
196 {
197 /* an absolute path. skip all components to this point and reset
198 the total length. */
199 total_len = len;
200 base_arg = nargs;
201 base_is_root = len == 1;
202 base_is_empty = FALSE;
203 }
204 else if (nargs == base_arg
205 || (nargs == base_arg + 1 && base_is_root)
206 || base_is_empty)
207 {
208 /* if we have skipped everything up to this arg, then the base
209 and all prior components are empty. just set the length to
210 this component; do not add a separator. If the base is empty
211 we can now ignore it. */
212 if (base_is_empty)
213 {
214 base_is_empty = FALSE;
215 total_len = 0;
216 }
217 total_len += len;
218 }
219 else
220 {
221 total_len += 1 + len;
222 }
223 }
224 va_end(va);
225
226 /* base == "/" and no further components. just return that. */
227 if (base_is_root && total_len == 1)
228 return apr_pmemdup(pool, "/", 2);
229
230 /* we got the total size. allocate it, with room for a NULL character. */
231 path = p = apr_palloc(pool, total_len + 1);
232
233 /* if we aren't supposed to skip forward to an absolute component, and if
234 this is not an empty base that we are skipping, then copy the base
235 into the output. */
236 if (base_arg == 0 && ! (SVN_PATH_IS_EMPTY(base) && ! base_is_empty))
237 {
238 if (SVN_PATH_IS_EMPTY(base))
239 memcpy(p, SVN_EMPTY_PATH, len = saved_lengths[0]);
240 else
241 memcpy(p, base, len = saved_lengths[0]);
242 p += len;
243 }
244
245 nargs = 0;
246 va_start(va, base);
247 while ((s = va_arg(va, const char *)) != NULL)
248 {
249 if (SVN_PATH_IS_EMPTY(s))
250 continue;
251
252 if (++nargs < base_arg)
253 continue;
254
255 if (nargs < MAX_SAVED_LENGTHS)
256 len = saved_lengths[nargs];
257 else
258 len = strlen(s);
259
260 /* insert a separator if we aren't copying in the first component
261 (which can happen when base_arg is set). also, don't put in a slash
262 if the prior character is a slash (occurs when prior component
263 is "/"). */
264 if (p != path && p[-1] != '/')
265 *p++ = '/';
266
267 /* copy the new component and advance the pointer */
268 memcpy(p, s, len);
269 p += len;
270 }
271 va_end(va);
272
273 *p = '\0';
274 assert((apr_size_t)(p - path) == total_len);
275
276 return path;
277 }
278
279
280
281 apr_size_t
svn_path_component_count(const char * path)282 svn_path_component_count(const char *path)
283 {
284 apr_size_t count = 0;
285
286 assert(is_canonical(path, strlen(path)));
287
288 while (*path)
289 {
290 const char *start;
291
292 while (*path == '/')
293 ++path;
294
295 start = path;
296
297 while (*path && *path != '/')
298 ++path;
299
300 if (path != start)
301 ++count;
302 }
303
304 return count;
305 }
306
307
308 /* Return the length of substring necessary to encompass the entire
309 * previous path segment in PATH, which should be a LEN byte string.
310 *
311 * A trailing slash will not be included in the returned length except
312 * in the case in which PATH is absolute and there are no more
313 * previous segments.
314 */
315 static apr_size_t
previous_segment(const char * path,apr_size_t len)316 previous_segment(const char *path,
317 apr_size_t len)
318 {
319 if (len == 0)
320 return 0;
321
322 while (len > 0 && path[--len] != '/')
323 ;
324
325 if (len == 0 && path[0] == '/')
326 return 1;
327 else
328 return len;
329 }
330
331
332 void
svn_path_add_component(svn_stringbuf_t * path,const char * component)333 svn_path_add_component(svn_stringbuf_t *path,
334 const char *component)
335 {
336 apr_size_t len = strlen(component);
337
338 assert(is_canonical(path->data, path->len));
339 assert(is_canonical(component, strlen(component)));
340
341 /* Append a dir separator, but only if this path is neither empty
342 nor consists of a single dir separator already. */
343 if ((! SVN_PATH_IS_EMPTY(path->data))
344 && (! ((path->len == 1) && (*(path->data) == '/'))))
345 {
346 char dirsep = '/';
347 svn_stringbuf_appendbytes(path, &dirsep, sizeof(dirsep));
348 }
349
350 svn_stringbuf_appendbytes(path, component, len);
351 }
352
353
354 void
svn_path_remove_component(svn_stringbuf_t * path)355 svn_path_remove_component(svn_stringbuf_t *path)
356 {
357 assert(is_canonical(path->data, path->len));
358
359 path->len = previous_segment(path->data, path->len);
360 path->data[path->len] = '\0';
361 }
362
363
364 void
svn_path_remove_components(svn_stringbuf_t * path,apr_size_t n)365 svn_path_remove_components(svn_stringbuf_t *path, apr_size_t n)
366 {
367 while (n > 0)
368 {
369 svn_path_remove_component(path);
370 n--;
371 }
372 }
373
374
375 char *
svn_path_dirname(const char * path,apr_pool_t * pool)376 svn_path_dirname(const char *path, apr_pool_t *pool)
377 {
378 apr_size_t len = strlen(path);
379
380 assert(svn_path_is_canonical_internal(path, pool));
381
382 return apr_pstrmemdup(pool, path, previous_segment(path, len));
383 }
384
385
386 char *
svn_path_basename(const char * path,apr_pool_t * pool)387 svn_path_basename(const char *path, apr_pool_t *pool)
388 {
389 apr_size_t len = strlen(path);
390 apr_size_t start;
391
392 assert(svn_path_is_canonical_internal(path, pool));
393
394 if (len == 1 && path[0] == '/')
395 start = 0;
396 else
397 {
398 start = len;
399 while (start > 0 && path[start - 1] != '/')
400 --start;
401 }
402
403 return apr_pstrmemdup(pool, path + start, len - start);
404 }
405
406 int
svn_path_is_empty(const char * path)407 svn_path_is_empty(const char *path)
408 {
409 assert(is_canonical(path, strlen(path)));
410
411 if (SVN_PATH_IS_EMPTY(path))
412 return 1;
413
414 return 0;
415 }
416
417 int
svn_path_compare_paths(const char * path1,const char * path2)418 svn_path_compare_paths(const char *path1,
419 const char *path2)
420 {
421 apr_size_t path1_len = strlen(path1);
422 apr_size_t path2_len = strlen(path2);
423 apr_size_t min_len = ((path1_len < path2_len) ? path1_len : path2_len);
424 apr_size_t i = 0;
425
426 assert(is_canonical(path1, path1_len));
427 assert(is_canonical(path2, path2_len));
428
429 /* Skip past common prefix. */
430 while (i < min_len && path1[i] == path2[i])
431 ++i;
432
433 /* Are the paths exactly the same? */
434 if ((path1_len == path2_len) && (i >= min_len))
435 return 0;
436
437 /* Children of paths are greater than their parents, but less than
438 greater siblings of their parents. */
439 if ((path1[i] == '/') && (path2[i] == 0))
440 return 1;
441 if ((path2[i] == '/') && (path1[i] == 0))
442 return -1;
443 if (path1[i] == '/')
444 return -1;
445 if (path2[i] == '/')
446 return 1;
447
448 /* Common prefix was skipped above, next character is compared to
449 determine order. We need to use an unsigned comparison, though,
450 so a "next character" of NULL (0x00) sorts numerically
451 smallest. */
452 return (unsigned char)(path1[i]) < (unsigned char)(path2[i]) ? -1 : 1;
453 }
454
455 /* Return the string length of the longest common ancestor of PATH1 and PATH2.
456 *
457 * This function handles everything except the URL-handling logic
458 * of svn_path_get_longest_ancestor, and assumes that PATH1 and
459 * PATH2 are *not* URLs.
460 *
461 * If the two paths do not share a common ancestor, return 0.
462 *
463 * New strings are allocated in POOL.
464 */
465 static apr_size_t
get_path_ancestor_length(const char * path1,const char * path2,apr_pool_t * pool)466 get_path_ancestor_length(const char *path1,
467 const char *path2,
468 apr_pool_t *pool)
469 {
470 apr_size_t path1_len, path2_len;
471 apr_size_t i = 0;
472 apr_size_t last_dirsep = 0;
473
474 path1_len = strlen(path1);
475 path2_len = strlen(path2);
476
477 if (SVN_PATH_IS_EMPTY(path1) || SVN_PATH_IS_EMPTY(path2))
478 return 0;
479
480 while (path1[i] == path2[i])
481 {
482 /* Keep track of the last directory separator we hit. */
483 if (path1[i] == '/')
484 last_dirsep = i;
485
486 i++;
487
488 /* If we get to the end of either path, break out. */
489 if ((i == path1_len) || (i == path2_len))
490 break;
491 }
492
493 /* two special cases:
494 1. '/' is the longest common ancestor of '/' and '/foo'
495 2. '/' is the longest common ancestor of '/rif' and '/raf' */
496 if (i == 1 && path1[0] == '/' && path2[0] == '/')
497 return 1;
498
499 /* last_dirsep is now the offset of the last directory separator we
500 crossed before reaching a non-matching byte. i is the offset of
501 that non-matching byte. */
502 if (((i == path1_len) && (path2[i] == '/'))
503 || ((i == path2_len) && (path1[i] == '/'))
504 || ((i == path1_len) && (i == path2_len)))
505 return i;
506 else
507 if (last_dirsep == 0 && path1[0] == '/' && path2[0] == '/')
508 return 1;
509 return last_dirsep;
510 }
511
512
513 char *
svn_path_get_longest_ancestor(const char * path1,const char * path2,apr_pool_t * pool)514 svn_path_get_longest_ancestor(const char *path1,
515 const char *path2,
516 apr_pool_t *pool)
517 {
518 svn_boolean_t path1_is_url = svn_path_is_url(path1);
519 svn_boolean_t path2_is_url = svn_path_is_url(path2);
520
521 /* Are we messing with URLs? If we have a mix of URLs and non-URLs,
522 there's nothing common between them. */
523 if (path1_is_url && path2_is_url)
524 {
525 return svn_uri_get_longest_ancestor(path1, path2, pool);
526 }
527 else if ((! path1_is_url) && (! path2_is_url))
528 {
529 return apr_pstrndup(pool, path1,
530 get_path_ancestor_length(path1, path2, pool));
531 }
532 else
533 {
534 /* A URL and a non-URL => no common prefix */
535 return apr_pmemdup(pool, SVN_EMPTY_PATH, sizeof(SVN_EMPTY_PATH));
536 }
537 }
538
539 const char *
svn_path_is_child(const char * path1,const char * path2,apr_pool_t * pool)540 svn_path_is_child(const char *path1,
541 const char *path2,
542 apr_pool_t *pool)
543 {
544 apr_size_t i;
545
546 /* assert (is_canonical (path1, strlen (path1))); ### Expensive strlen */
547 /* assert (is_canonical (path2, strlen (path2))); ### Expensive strlen */
548
549 /* Allow "" and "foo" to be parent/child */
550 if (SVN_PATH_IS_EMPTY(path1)) /* "" is the parent */
551 {
552 if (SVN_PATH_IS_EMPTY(path2) /* "" not a child */
553 || path2[0] == '/') /* "/foo" not a child */
554 return NULL;
555 else
556 /* everything else is child */
557 return pool ? apr_pstrdup(pool, path2) : path2;
558 }
559
560 /* Reach the end of at least one of the paths. How should we handle
561 things like path1:"foo///bar" and path2:"foo/bar/baz"? It doesn't
562 appear to arise in the current Subversion code, it's not clear to me
563 if they should be parent/child or not. */
564 for (i = 0; path1[i] && path2[i]; i++)
565 if (path1[i] != path2[i])
566 return NULL;
567
568 /* There are two cases that are parent/child
569 ... path1[i] == '\0'
570 .../foo path2[i] == '/'
571 or
572 / path1[i] == '\0'
573 /foo path2[i] != '/'
574 */
575 if (path1[i] == '\0' && path2[i])
576 {
577 if (path2[i] == '/')
578 return pool ? apr_pstrdup(pool, path2 + i + 1) : path2 + i + 1;
579 else if (i == 1 && path1[0] == '/')
580 return pool ? apr_pstrdup(pool, path2 + 1) : path2 + 1;
581 }
582
583 /* Otherwise, path2 isn't a child. */
584 return NULL;
585 }
586
587
588 svn_boolean_t
svn_path_is_ancestor(const char * path1,const char * path2)589 svn_path_is_ancestor(const char *path1, const char *path2)
590 {
591 apr_size_t path1_len = strlen(path1);
592
593 /* If path1 is empty and path2 is not absoulte, then path1 is an ancestor. */
594 if (SVN_PATH_IS_EMPTY(path1))
595 return *path2 != '/';
596
597 /* If path1 is a prefix of path2, then:
598 - If path1 ends in a path separator,
599 - If the paths are of the same length
600 OR
601 - path2 starts a new path component after the common prefix,
602 then path1 is an ancestor. */
603 if (strncmp(path1, path2, path1_len) == 0)
604 return path1[path1_len - 1] == '/'
605 || (path2[path1_len] == '/' || path2[path1_len] == '\0');
606
607 return FALSE;
608 }
609
610
611 apr_array_header_t *
svn_path_decompose(const char * path,apr_pool_t * pool)612 svn_path_decompose(const char *path,
613 apr_pool_t *pool)
614 {
615 apr_size_t i, oldi;
616
617 apr_array_header_t *components =
618 apr_array_make(pool, 1, sizeof(const char *));
619
620 assert(svn_path_is_canonical_internal(path, pool));
621
622 if (SVN_PATH_IS_EMPTY(path))
623 return components; /* ### Should we return a "" component? */
624
625 /* If PATH is absolute, store the '/' as the first component. */
626 i = oldi = 0;
627 if (path[i] == '/')
628 {
629 char dirsep = '/';
630
631 APR_ARRAY_PUSH(components, const char *)
632 = apr_pstrmemdup(pool, &dirsep, sizeof(dirsep));
633
634 i++;
635 oldi++;
636 if (path[i] == '\0') /* path is a single '/' */
637 return components;
638 }
639
640 do
641 {
642 if ((path[i] == '/') || (path[i] == '\0'))
643 {
644 if (SVN_PATH_IS_PLATFORM_EMPTY(path + oldi, i - oldi))
645 APR_ARRAY_PUSH(components, const char *) = SVN_EMPTY_PATH;
646 else
647 APR_ARRAY_PUSH(components, const char *)
648 = apr_pstrmemdup(pool, path + oldi, i - oldi);
649
650 i++;
651 oldi = i; /* skipping past the dirsep */
652 continue;
653 }
654 i++;
655 }
656 while (path[i-1]);
657
658 return components;
659 }
660
661
662 const char *
svn_path_compose(const apr_array_header_t * components,apr_pool_t * pool)663 svn_path_compose(const apr_array_header_t *components,
664 apr_pool_t *pool)
665 {
666 apr_size_t *lengths = apr_palloc(pool, components->nelts*sizeof(*lengths));
667 apr_size_t max_length = components->nelts;
668 char *path;
669 char *p;
670 int i;
671
672 /* Get the length of each component so a total length can be
673 calculated. */
674 for (i = 0; i < components->nelts; ++i)
675 {
676 apr_size_t l = strlen(APR_ARRAY_IDX(components, i, const char *));
677 lengths[i] = l;
678 max_length += l;
679 }
680
681 path = apr_palloc(pool, max_length + 1);
682 p = path;
683
684 for (i = 0; i < components->nelts; ++i)
685 {
686 /* Append a '/' to the path. Handle the case with an absolute
687 path where a '/' appears in the first component. Only append
688 a '/' if the component is the second component that does not
689 follow a "/" first component; or it is the third or later
690 component. */
691 if (i > 1 ||
692 (i == 1 && strcmp("/", APR_ARRAY_IDX(components,
693 0,
694 const char *)) != 0))
695 {
696 *p++ = '/';
697 }
698
699 memcpy(p, APR_ARRAY_IDX(components, i, const char *), lengths[i]);
700 p += lengths[i];
701 }
702
703 *p = '\0';
704
705 return path;
706 }
707
708
709 svn_boolean_t
svn_path_is_single_path_component(const char * name)710 svn_path_is_single_path_component(const char *name)
711 {
712 assert(is_canonical(name, strlen(name)));
713
714 /* Can't be empty or `..' */
715 if (SVN_PATH_IS_EMPTY(name)
716 || (name[0] == '.' && name[1] == '.' && name[2] == '\0'))
717 return FALSE;
718
719 /* Slashes are bad, m'kay... */
720 if (strchr(name, '/') != NULL)
721 return FALSE;
722
723 /* It is valid. */
724 return TRUE;
725 }
726
727
728 svn_boolean_t
svn_path_is_dotpath_present(const char * path)729 svn_path_is_dotpath_present(const char *path)
730 {
731 size_t len;
732
733 /* The empty string does not have a dotpath */
734 if (path[0] == '\0')
735 return FALSE;
736
737 /* Handle "." or a leading "./" */
738 if (path[0] == '.' && (path[1] == '\0' || path[1] == '/'))
739 return TRUE;
740
741 /* Paths of length 1 (at this point) have no dotpath present. */
742 if (path[1] == '\0')
743 return FALSE;
744
745 /* If any segment is "/./", then a dotpath is present. */
746 if (strstr(path, "/./") != NULL)
747 return TRUE;
748
749 /* Does the path end in "/." ? */
750 len = strlen(path);
751 return path[len - 2] == '/' && path[len - 1] == '.';
752 }
753
754 svn_boolean_t
svn_path_is_backpath_present(const char * path)755 svn_path_is_backpath_present(const char *path)
756 {
757 size_t len;
758
759 /* 0 and 1-length paths do not have a backpath */
760 if (path[0] == '\0' || path[1] == '\0')
761 return FALSE;
762
763 /* Handle ".." or a leading "../" */
764 if (path[0] == '.' && path[1] == '.' && (path[2] == '\0' || path[2] == '/'))
765 return TRUE;
766
767 /* Paths of length 2 (at this point) have no backpath present. */
768 if (path[2] == '\0')
769 return FALSE;
770
771 /* If any segment is "..", then a backpath is present. */
772 if (strstr(path, "/../") != NULL)
773 return TRUE;
774
775 /* Does the path end in "/.." ? */
776 len = strlen(path);
777 return path[len - 3] == '/' && path[len - 2] == '.' && path[len - 1] == '.';
778 }
779
780
781 /*** URI Stuff ***/
782
783 /* Examine PATH as a potential URI, and return a substring of PATH
784 that immediately follows the (scheme):// portion of the URI, or
785 NULL if PATH doesn't appear to be a valid URI. The returned value
786 is not alloced -- it shares memory with PATH. */
787 static const char *
skip_uri_scheme(const char * path)788 skip_uri_scheme(const char *path)
789 {
790 apr_size_t j;
791
792 /* A scheme is terminated by a : and cannot contain any /'s. */
793 for (j = 0; path[j] && path[j] != ':'; ++j)
794 if (path[j] == '/')
795 return NULL;
796
797 if (j > 0 && path[j] == ':' && path[j+1] == '/' && path[j+2] == '/')
798 return path + j + 3;
799
800 return NULL;
801 }
802
803
804 svn_boolean_t
svn_path_is_url(const char * path)805 svn_path_is_url(const char *path)
806 {
807 /* ### This function is reaaaaaaaaaaaaaally stupid right now.
808 We're just going to look for:
809
810 (scheme)://(optional_stuff)
811
812 Where (scheme) has no ':' or '/' characters.
813
814 Someday it might be nice to have an actual URI parser here.
815 */
816 return skip_uri_scheme(path) != NULL;
817 }
818
819
820
821 /* Here is the BNF for path components in a URI. "pchar" is a
822 character in a path component.
823
824 pchar = unreserved | escaped |
825 ":" | "@" | "&" | "=" | "+" | "$" | ","
826 unreserved = alphanum | mark
827 mark = "-" | "_" | "." | "!" | "~" | "*" | "'" | "(" | ")"
828
829 Note that "escaped" doesn't really apply to what users can put in
830 their paths, so that really means the set of characters is:
831
832 alphanum | mark | ":" | "@" | "&" | "=" | "+" | "$" | ","
833 */
834 const char svn_uri__char_validity[256] = {
835 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
836 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
837 0, 1, 0, 0, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
838 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 1, 0, 0,
839
840 /* 64 */
841 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
842 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 1,
843 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
844 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 1, 0,
845
846 /* 128 */
847 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
848 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
849 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
850 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
851
852 /* 192 */
853 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
854 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
855 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
856 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
857 };
858
859
860 svn_boolean_t
svn_path_is_uri_safe(const char * path)861 svn_path_is_uri_safe(const char *path)
862 {
863 apr_size_t i;
864
865 /* Skip the URI scheme. */
866 path = skip_uri_scheme(path);
867
868 /* No scheme? Get outta here. */
869 if (! path)
870 return FALSE;
871
872 /* Skip to the first slash that's after the URI scheme. */
873 path = strchr(path, '/');
874
875 /* If there's no first slash, then there's only a host portion;
876 therefore there couldn't be any uri-unsafe characters after the
877 host... so return true. */
878 if (path == NULL)
879 return TRUE;
880
881 for (i = 0; path[i]; i++)
882 {
883 /* Allow '%XX' (where each X is a hex digit) */
884 if (path[i] == '%')
885 {
886 if (svn_ctype_isxdigit(path[i + 1]) &&
887 svn_ctype_isxdigit(path[i + 2]))
888 {
889 i += 2;
890 continue;
891 }
892 return FALSE;
893 }
894 else if (! svn_uri__char_validity[((unsigned char)path[i])])
895 {
896 return FALSE;
897 }
898 }
899
900 return TRUE;
901 }
902
903
904 /* URI-encode each character c in PATH for which TABLE[c] is 0.
905 If no encoding was needed, return PATH, else return a new string allocated
906 in POOL. */
907 static const char *
uri_escape(const char * path,const char table[],apr_pool_t * pool)908 uri_escape(const char *path, const char table[], apr_pool_t *pool)
909 {
910 svn_stringbuf_t *retstr;
911 apr_size_t i, copied = 0;
912 int c;
913
914 retstr = svn_stringbuf_create_ensure(strlen(path), pool);
915 for (i = 0; path[i]; i++)
916 {
917 c = (unsigned char)path[i];
918 if (table[c])
919 continue;
920
921 /* If we got here, we're looking at a character that isn't
922 supported by the (or at least, our) URI encoding scheme. We
923 need to escape this character. */
924
925 /* First things first, copy all the good stuff that we haven't
926 yet copied into our output buffer. */
927 if (i - copied)
928 svn_stringbuf_appendbytes(retstr, path + copied,
929 i - copied);
930
931 /* Now, write in our escaped character, consisting of the
932 '%' and two digits. We cast the C to unsigned char here because
933 the 'X' format character will be tempted to treat it as an unsigned
934 int...which causes problem when messing with 0x80-0xFF chars.
935 We also need space for a null as apr_snprintf will write one. */
936 svn_stringbuf_ensure(retstr, retstr->len + 4);
937 apr_snprintf(retstr->data + retstr->len, 4, "%%%02X", (unsigned char)c);
938 retstr->len += 3;
939
940 /* Finally, update our copy counter. */
941 copied = i + 1;
942 }
943
944 /* If we didn't encode anything, we don't need to duplicate the string. */
945 if (retstr->len == 0)
946 return path;
947
948 /* Anything left to copy? */
949 if (i - copied)
950 svn_stringbuf_appendbytes(retstr, path + copied, i - copied);
951
952 /* retstr is null-terminated either by apr_snprintf or the svn_stringbuf
953 functions. */
954
955 return retstr->data;
956 }
957
958
959 const char *
svn_path_uri_encode(const char * path,apr_pool_t * pool)960 svn_path_uri_encode(const char *path, apr_pool_t *pool)
961 {
962 const char *ret;
963
964 ret = uri_escape(path, svn_uri__char_validity, pool);
965
966 /* Our interface guarantees a copy. */
967 if (ret == path)
968 return apr_pstrdup(pool, path);
969 else
970 return ret;
971 }
972
973 static const char iri_escape_chars[256] = {
974 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
975 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
976 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
977 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
978 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
979 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
980 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
981 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
982
983 /* 128 */
984 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
985 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
986 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
987 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
988 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
989 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
990 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
991 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0
992 };
993
994 const char *
svn_path_uri_from_iri(const char * iri,apr_pool_t * pool)995 svn_path_uri_from_iri(const char *iri, apr_pool_t *pool)
996 {
997 return uri_escape(iri, iri_escape_chars, pool);
998 }
999
1000 static const char uri_autoescape_chars[256] = {
1001 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1002 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1003 0, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1004 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 0, 1,
1005
1006 /* 64 */
1007 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1008 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 0, 1,
1009 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1010 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 1, 1,
1011
1012 /* 128 */
1013 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1014 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1015 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1016 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1017
1018 /* 192 */
1019 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1020 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1021 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1022 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1023 };
1024
1025 const char *
svn_path_uri_autoescape(const char * uri,apr_pool_t * pool)1026 svn_path_uri_autoescape(const char *uri, apr_pool_t *pool)
1027 {
1028 return uri_escape(uri, uri_autoescape_chars, pool);
1029 }
1030
1031 const char *
svn_path_uri_decode(const char * path,apr_pool_t * pool)1032 svn_path_uri_decode(const char *path, apr_pool_t *pool)
1033 {
1034 svn_stringbuf_t *retstr;
1035 apr_size_t i;
1036 svn_boolean_t query_start = FALSE;
1037
1038 /* avoid repeated realloc */
1039 retstr = svn_stringbuf_create_ensure(strlen(path) + 1, pool);
1040
1041 retstr->len = 0;
1042 for (i = 0; path[i]; i++)
1043 {
1044 char c = path[i];
1045
1046 if (c == '?')
1047 {
1048 /* Mark the start of the query string, if it exists. */
1049 query_start = TRUE;
1050 }
1051 else if (c == '+' && query_start)
1052 {
1053 /* Only do this if we are into the query string.
1054 * RFC 2396, section 3.3 */
1055 c = ' ';
1056 }
1057 else if (c == '%' && svn_ctype_isxdigit(path[i + 1])
1058 && svn_ctype_isxdigit(path[i+2]))
1059 {
1060 char digitz[3];
1061 digitz[0] = path[++i];
1062 digitz[1] = path[++i];
1063 digitz[2] = '\0';
1064 c = (char)(strtol(digitz, NULL, 16));
1065 }
1066
1067 retstr->data[retstr->len++] = c;
1068 }
1069
1070 /* Null-terminate this bad-boy. */
1071 retstr->data[retstr->len] = 0;
1072
1073 return retstr->data;
1074 }
1075
1076
1077 const char *
svn_path_url_add_component2(const char * url,const char * component,apr_pool_t * pool)1078 svn_path_url_add_component2(const char *url,
1079 const char *component,
1080 apr_pool_t *pool)
1081 {
1082 /* = svn_path_uri_encode() but without always copying */
1083 component = uri_escape(component, svn_uri__char_validity, pool);
1084
1085 return svn_path_join_internal(url, component, pool);
1086 }
1087
1088 svn_error_t *
svn_path_get_absolute(const char ** pabsolute,const char * relative,apr_pool_t * pool)1089 svn_path_get_absolute(const char **pabsolute,
1090 const char *relative,
1091 apr_pool_t *pool)
1092 {
1093 if (svn_path_is_url(relative))
1094 {
1095 *pabsolute = apr_pstrdup(pool, relative);
1096 return SVN_NO_ERROR;
1097 }
1098
1099 return svn_dirent_get_absolute(pabsolute, relative, pool);
1100 }
1101
1102
1103 #if !defined(WIN32) && !defined(DARWIN)
1104 /** Get APR's internal path encoding. */
1105 static svn_error_t *
get_path_encoding(svn_boolean_t * path_is_utf8,apr_pool_t * pool)1106 get_path_encoding(svn_boolean_t *path_is_utf8, apr_pool_t *pool)
1107 {
1108 apr_status_t apr_err;
1109 int encoding_style;
1110
1111 apr_err = apr_filepath_encoding(&encoding_style, pool);
1112 if (apr_err)
1113 return svn_error_wrap_apr(apr_err,
1114 _("Can't determine the native path encoding"));
1115
1116 /* ### What to do about APR_FILEPATH_ENCODING_UNKNOWN?
1117 Well, for now we'll just punt to the svn_utf_ functions;
1118 those will at least do the ASCII-subset check. */
1119 *path_is_utf8 = (encoding_style == APR_FILEPATH_ENCODING_UTF8);
1120 return SVN_NO_ERROR;
1121 }
1122 #endif
1123
1124
1125 svn_error_t *
svn_path_cstring_from_utf8(const char ** path_apr,const char * path_utf8,apr_pool_t * pool)1126 svn_path_cstring_from_utf8(const char **path_apr,
1127 const char *path_utf8,
1128 apr_pool_t *pool)
1129 {
1130 #if !defined(WIN32) && !defined(DARWIN)
1131 svn_boolean_t path_is_utf8;
1132 SVN_ERR(get_path_encoding(&path_is_utf8, pool));
1133 if (path_is_utf8)
1134 #endif
1135 {
1136 *path_apr = apr_pstrdup(pool, path_utf8);
1137 return SVN_NO_ERROR;
1138 }
1139 #if !defined(WIN32) && !defined(DARWIN)
1140 else
1141 return svn_utf_cstring_from_utf8(path_apr, path_utf8, pool);
1142 #endif
1143 }
1144
1145
1146 svn_error_t *
svn_path_cstring_to_utf8(const char ** path_utf8,const char * path_apr,apr_pool_t * pool)1147 svn_path_cstring_to_utf8(const char **path_utf8,
1148 const char *path_apr,
1149 apr_pool_t *pool)
1150 {
1151 #if !defined(WIN32) && !defined(DARWIN)
1152 svn_boolean_t path_is_utf8;
1153 SVN_ERR(get_path_encoding(&path_is_utf8, pool));
1154 if (path_is_utf8)
1155 #endif
1156 {
1157 *path_utf8 = apr_pstrdup(pool, path_apr);
1158 return SVN_NO_ERROR;
1159 }
1160 #if !defined(WIN32) && !defined(DARWIN)
1161 else
1162 return svn_utf_cstring_to_utf8(path_utf8, path_apr, pool);
1163 #endif
1164 }
1165
1166
1167 /* Return a copy of PATH, allocated from POOL, for which control
1168 characters have been escaped using the form \NNN (where NNN is the
1169 octal representation of the byte's ordinal value). */
1170 const char *
svn_path_illegal_path_escape(const char * path,apr_pool_t * pool)1171 svn_path_illegal_path_escape(const char *path, apr_pool_t *pool)
1172 {
1173 svn_stringbuf_t *retstr;
1174 apr_size_t i, copied = 0;
1175 int c;
1176
1177 /* At least one control character:
1178 strlen - 1 (control) + \ + N + N + N + null . */
1179 retstr = svn_stringbuf_create_ensure(strlen(path) + 4, pool);
1180 for (i = 0; path[i]; i++)
1181 {
1182 c = (unsigned char)path[i];
1183 if (! svn_ctype_iscntrl(c))
1184 continue;
1185
1186 /* If we got here, we're looking at a character that isn't
1187 supported by the (or at least, our) URI encoding scheme. We
1188 need to escape this character. */
1189
1190 /* First things first, copy all the good stuff that we haven't
1191 yet copied into our output buffer. */
1192 if (i - copied)
1193 svn_stringbuf_appendbytes(retstr, path + copied,
1194 i - copied);
1195
1196 /* Make sure buffer is big enough for '\' 'N' 'N' 'N' (and NUL) */
1197 svn_stringbuf_ensure(retstr, retstr->len + 5);
1198 /*### The backslash separator doesn't work too great with Windows,
1199 but it's what we'll use for consistency with invalid utf8
1200 formatting (until someone has a better idea) */
1201 apr_snprintf(retstr->data + retstr->len, 5, "\\%03o", (unsigned char)c);
1202 retstr->len += 4;
1203
1204 /* Finally, update our copy counter. */
1205 copied = i + 1;
1206 }
1207
1208 /* If we didn't encode anything, we don't need to duplicate the string. */
1209 if (retstr->len == 0)
1210 return path;
1211
1212 /* Anything left to copy? */
1213 if (i - copied)
1214 svn_stringbuf_appendbytes(retstr, path + copied, i - copied);
1215
1216 /* retstr is null-terminated either by apr_snprintf or the svn_stringbuf
1217 functions. */
1218
1219 return retstr->data;
1220 }
1221
1222 svn_error_t *
svn_path_check_valid(const char * path,apr_pool_t * pool)1223 svn_path_check_valid(const char *path, apr_pool_t *pool)
1224 {
1225 const char *c;
1226
1227 for (c = path; *c; c++)
1228 {
1229 if (svn_ctype_iscntrl(*c))
1230 {
1231 return svn_error_createf
1232 (SVN_ERR_FS_PATH_SYNTAX, NULL,
1233 _("Invalid control character '0x%02x' in path '%s'"),
1234 (unsigned char)*c,
1235 svn_path_illegal_path_escape(svn_dirent_local_style(path, pool),
1236 pool));
1237 }
1238 }
1239
1240 return SVN_NO_ERROR;
1241 }
1242
1243 void
svn_path_splitext(const char ** path_root,const char ** path_ext,const char * path,apr_pool_t * pool)1244 svn_path_splitext(const char **path_root,
1245 const char **path_ext,
1246 const char *path,
1247 apr_pool_t *pool)
1248 {
1249 const char *last_dot, *last_slash;
1250
1251 /* Easy out -- why do all the work when there's no way to report it? */
1252 if (! (path_root || path_ext))
1253 return;
1254
1255 /* Do we even have a period in this thing? And if so, is there
1256 anything after it? We look for the "rightmost" period in the
1257 string. */
1258 last_dot = strrchr(path, '.');
1259 if (last_dot && (last_dot + 1 != '\0'))
1260 {
1261 /* If we have a period, we need to make sure it occurs in the
1262 final path component -- that there's no path separator
1263 between the last period and the end of the PATH -- otherwise,
1264 it doesn't count. Also, we want to make sure that our period
1265 isn't the first character of the last component. */
1266 last_slash = strrchr(path, '/');
1267 if ((last_slash && (last_dot > (last_slash + 1)))
1268 || ((! last_slash) && (last_dot > path)))
1269 {
1270 if (path_root)
1271 *path_root = apr_pstrmemdup(pool, path,
1272 (last_dot - path + 1) * sizeof(*path));
1273 if (path_ext)
1274 *path_ext = apr_pstrdup(pool, last_dot + 1);
1275 return;
1276 }
1277 }
1278 /* If we get here, we never found a suitable separator character, so
1279 there's no split. */
1280 if (path_root)
1281 *path_root = apr_pstrdup(pool, path);
1282 if (path_ext)
1283 *path_ext = "";
1284 }
1285
1286
1287 /* Repository relative URLs (^/). */
1288
1289 svn_boolean_t
svn_path_is_repos_relative_url(const char * path)1290 svn_path_is_repos_relative_url(const char *path)
1291 {
1292 return (0 == strncmp("^/", path, 2));
1293 }
1294
1295 svn_error_t *
svn_path_resolve_repos_relative_url(const char ** absolute_url,const char * relative_url,const char * repos_root_url,apr_pool_t * pool)1296 svn_path_resolve_repos_relative_url(const char **absolute_url,
1297 const char *relative_url,
1298 const char *repos_root_url,
1299 apr_pool_t *pool)
1300 {
1301 if (! svn_path_is_repos_relative_url(relative_url))
1302 return svn_error_createf(SVN_ERR_BAD_URL, NULL,
1303 _("Improper relative URL '%s'"),
1304 relative_url);
1305
1306 /* No assumptions are made about the canonicalization of the inut
1307 * arguments, it is presumed that the output will be canonicalized after
1308 * this function, which will remove any duplicate path separator.
1309 */
1310 *absolute_url = apr_pstrcat(pool, repos_root_url, relative_url + 1,
1311 (char *)NULL);
1312
1313 return SVN_NO_ERROR;
1314 }
1315
1316