xref: /freebsd-11-stable/contrib/subversion/subversion/libsvn_subr/path.c (revision 3c9339f7792540596bf97077a8f403e944af7f39)
1 /*
2  * paths.c:   a path manipulation library using svn_stringbuf_t
3  *
4  * ====================================================================
5  *    Licensed to the Apache Software Foundation (ASF) under one
6  *    or more contributor license agreements.  See the NOTICE file
7  *    distributed with this work for additional information
8  *    regarding copyright ownership.  The ASF licenses this file
9  *    to you under the Apache License, Version 2.0 (the
10  *    "License"); you may not use this file except in compliance
11  *    with the License.  You may obtain a copy of the License at
12  *
13  *      http://www.apache.org/licenses/LICENSE-2.0
14  *
15  *    Unless required by applicable law or agreed to in writing,
16  *    software distributed under the License is distributed on an
17  *    "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
18  *    KIND, either express or implied.  See the License for the
19  *    specific language governing permissions and limitations
20  *    under the License.
21  * ====================================================================
22  */
23 
24 
25 
26 #include <string.h>
27 #include <assert.h>
28 
29 #include <apr_file_info.h>
30 #include <apr_lib.h>
31 #include <apr_uri.h>
32 
33 #include "svn_string.h"
34 #include "svn_dirent_uri.h"
35 #include "svn_path.h"
36 #include "svn_private_config.h"         /* for SVN_PATH_LOCAL_SEPARATOR */
37 #include "svn_utf.h"
38 #include "svn_io.h"                     /* for svn_io_stat() */
39 #include "svn_ctype.h"
40 
41 #include "dirent_uri.h"
42 
43 
44 /* The canonical empty path.  Can this be changed?  Well, change the empty
45    test below and the path library will work, not so sure about the fs/wc
46    libraries. */
47 #define SVN_EMPTY_PATH ""
48 
49 /* TRUE if s is the canonical empty path, FALSE otherwise */
50 #define SVN_PATH_IS_EMPTY(s) ((s)[0] == '\0')
51 
52 /* TRUE if s,n is the platform's empty path ("."), FALSE otherwise. Can
53    this be changed?  Well, the path library will work, not so sure about
54    the OS! */
55 #define SVN_PATH_IS_PLATFORM_EMPTY(s,n) ((n) == 1 && (s)[0] == '.')
56 
57 
58 
59 
60 #ifndef NDEBUG
61 /* This function is an approximation of svn_path_is_canonical.
62  * It is supposed to be used in functions that do not have access
63  * to a pool, but still want to assert that a path is canonical.
64  *
65  * PATH with length LEN is assumed to be canonical if it isn't
66  * the platform's empty path (see definition of SVN_PATH_IS_PLATFORM_EMPTY),
67  * and does not contain "/./", and any one of the following
68  * conditions is also met:
69  *
70  *  1. PATH has zero length
71  *  2. PATH is the root directory (what exactly a root directory is
72  *                                depends on the platform)
73  *  3. PATH is not a root directory and does not end with '/'
74  *
75  * If possible, please use svn_path_is_canonical instead.
76  */
77 static svn_boolean_t
is_canonical(const char * path,apr_size_t len)78 is_canonical(const char *path,
79              apr_size_t len)
80 {
81   return (! SVN_PATH_IS_PLATFORM_EMPTY(path, len)
82           && strstr(path, "/./") == NULL
83           && (len == 0
84               || (len == 1 && path[0] == '/')
85               || (path[len-1] != '/')
86 #if defined(WIN32) || defined(__CYGWIN__)
87               || svn_dirent_is_root(path, len)
88 #endif
89               ));
90 }
91 #endif
92 
93 
94 /* functionality of svn_path_is_canonical but without the deprecation */
95 static svn_boolean_t
svn_path_is_canonical_internal(const char * path,apr_pool_t * pool)96 svn_path_is_canonical_internal(const char *path, apr_pool_t *pool)
97 {
98   return svn_uri_is_canonical(path, pool) ||
99       svn_dirent_is_canonical(path, pool) ||
100       svn_relpath_is_canonical(path);
101 }
102 
103 svn_boolean_t
svn_path_is_canonical(const char * path,apr_pool_t * pool)104 svn_path_is_canonical(const char *path, apr_pool_t *pool)
105 {
106   return svn_path_is_canonical_internal(path, pool);
107 }
108 
109 /* functionality of svn_path_join but without the deprecation */
110 static char *
svn_path_join_internal(const char * base,const char * component,apr_pool_t * pool)111 svn_path_join_internal(const char *base,
112                        const char *component,
113                        apr_pool_t *pool)
114 {
115   apr_size_t blen = strlen(base);
116   apr_size_t clen = strlen(component);
117   char *path;
118 
119   assert(svn_path_is_canonical_internal(base, pool));
120   assert(svn_path_is_canonical_internal(component, pool));
121 
122   /* If the component is absolute, then return it.  */
123   if (*component == '/')
124     return apr_pmemdup(pool, component, clen + 1);
125 
126   /* If either is empty return the other */
127   if (SVN_PATH_IS_EMPTY(base))
128     return apr_pmemdup(pool, component, clen + 1);
129   if (SVN_PATH_IS_EMPTY(component))
130     return apr_pmemdup(pool, base, blen + 1);
131 
132   if (blen == 1 && base[0] == '/')
133     blen = 0; /* Ignore base, just return separator + component */
134 
135   /* Construct the new, combined path. */
136   path = apr_palloc(pool, blen + 1 + clen + 1);
137   memcpy(path, base, blen);
138   path[blen] = '/';
139   memcpy(path + blen + 1, component, clen + 1);
140 
141   return path;
142 }
143 
svn_path_join(const char * base,const char * component,apr_pool_t * pool)144 char *svn_path_join(const char *base,
145                     const char *component,
146                     apr_pool_t *pool)
147 {
148   return svn_path_join_internal(base, component, pool);
149 }
150 
svn_path_join_many(apr_pool_t * pool,const char * base,...)151 char *svn_path_join_many(apr_pool_t *pool, const char *base, ...)
152 {
153 #define MAX_SAVED_LENGTHS 10
154   apr_size_t saved_lengths[MAX_SAVED_LENGTHS];
155   apr_size_t total_len;
156   int nargs;
157   va_list va;
158   const char *s;
159   apr_size_t len;
160   char *path;
161   char *p;
162   svn_boolean_t base_is_empty = FALSE, base_is_root = FALSE;
163   int base_arg = 0;
164 
165   total_len = strlen(base);
166 
167   assert(svn_path_is_canonical_internal(base, pool));
168 
169   if (total_len == 1 && *base == '/')
170     base_is_root = TRUE;
171   else if (SVN_PATH_IS_EMPTY(base))
172     {
173       total_len = sizeof(SVN_EMPTY_PATH) - 1;
174       base_is_empty = TRUE;
175     }
176 
177   saved_lengths[0] = total_len;
178 
179   /* Compute the length of the resulting string. */
180 
181   nargs = 0;
182   va_start(va, base);
183   while ((s = va_arg(va, const char *)) != NULL)
184     {
185       len = strlen(s);
186 
187       assert(svn_path_is_canonical_internal(s, pool));
188 
189       if (SVN_PATH_IS_EMPTY(s))
190         continue;
191 
192       if (nargs++ < MAX_SAVED_LENGTHS)
193         saved_lengths[nargs] = len;
194 
195       if (*s == '/')
196         {
197           /* an absolute path. skip all components to this point and reset
198              the total length. */
199           total_len = len;
200           base_arg = nargs;
201           base_is_root = len == 1;
202           base_is_empty = FALSE;
203         }
204       else if (nargs == base_arg
205                || (nargs == base_arg + 1 && base_is_root)
206                || base_is_empty)
207         {
208           /* if we have skipped everything up to this arg, then the base
209              and all prior components are empty. just set the length to
210              this component; do not add a separator.  If the base is empty
211              we can now ignore it. */
212           if (base_is_empty)
213             {
214               base_is_empty = FALSE;
215               total_len = 0;
216             }
217           total_len += len;
218         }
219       else
220         {
221           total_len += 1 + len;
222         }
223     }
224   va_end(va);
225 
226   /* base == "/" and no further components. just return that. */
227   if (base_is_root && total_len == 1)
228     return apr_pmemdup(pool, "/", 2);
229 
230   /* we got the total size. allocate it, with room for a NULL character. */
231   path = p = apr_palloc(pool, total_len + 1);
232 
233   /* if we aren't supposed to skip forward to an absolute component, and if
234      this is not an empty base that we are skipping, then copy the base
235      into the output. */
236   if (base_arg == 0 && ! (SVN_PATH_IS_EMPTY(base) && ! base_is_empty))
237     {
238       if (SVN_PATH_IS_EMPTY(base))
239         memcpy(p, SVN_EMPTY_PATH, len = saved_lengths[0]);
240       else
241         memcpy(p, base, len = saved_lengths[0]);
242       p += len;
243     }
244 
245   nargs = 0;
246   va_start(va, base);
247   while ((s = va_arg(va, const char *)) != NULL)
248     {
249       if (SVN_PATH_IS_EMPTY(s))
250         continue;
251 
252       if (++nargs < base_arg)
253         continue;
254 
255       if (nargs < MAX_SAVED_LENGTHS)
256         len = saved_lengths[nargs];
257       else
258         len = strlen(s);
259 
260       /* insert a separator if we aren't copying in the first component
261          (which can happen when base_arg is set). also, don't put in a slash
262          if the prior character is a slash (occurs when prior component
263          is "/"). */
264       if (p != path && p[-1] != '/')
265         *p++ = '/';
266 
267       /* copy the new component and advance the pointer */
268       memcpy(p, s, len);
269       p += len;
270     }
271   va_end(va);
272 
273   *p = '\0';
274   assert((apr_size_t)(p - path) == total_len);
275 
276   return path;
277 }
278 
279 
280 
281 apr_size_t
svn_path_component_count(const char * path)282 svn_path_component_count(const char *path)
283 {
284   apr_size_t count = 0;
285 
286   assert(is_canonical(path, strlen(path)));
287 
288   while (*path)
289     {
290       const char *start;
291 
292       while (*path == '/')
293         ++path;
294 
295       start = path;
296 
297       while (*path && *path != '/')
298         ++path;
299 
300       if (path != start)
301         ++count;
302     }
303 
304   return count;
305 }
306 
307 
308 /* Return the length of substring necessary to encompass the entire
309  * previous path segment in PATH, which should be a LEN byte string.
310  *
311  * A trailing slash will not be included in the returned length except
312  * in the case in which PATH is absolute and there are no more
313  * previous segments.
314  */
315 static apr_size_t
previous_segment(const char * path,apr_size_t len)316 previous_segment(const char *path,
317                  apr_size_t len)
318 {
319   if (len == 0)
320     return 0;
321 
322   while (len > 0 && path[--len] != '/')
323     ;
324 
325   if (len == 0 && path[0] == '/')
326     return 1;
327   else
328     return len;
329 }
330 
331 
332 void
svn_path_add_component(svn_stringbuf_t * path,const char * component)333 svn_path_add_component(svn_stringbuf_t *path,
334                        const char *component)
335 {
336   apr_size_t len = strlen(component);
337 
338   assert(is_canonical(path->data, path->len));
339   assert(is_canonical(component, strlen(component)));
340 
341   /* Append a dir separator, but only if this path is neither empty
342      nor consists of a single dir separator already. */
343   if ((! SVN_PATH_IS_EMPTY(path->data))
344       && (! ((path->len == 1) && (*(path->data) == '/'))))
345     {
346       char dirsep = '/';
347       svn_stringbuf_appendbytes(path, &dirsep, sizeof(dirsep));
348     }
349 
350   svn_stringbuf_appendbytes(path, component, len);
351 }
352 
353 
354 void
svn_path_remove_component(svn_stringbuf_t * path)355 svn_path_remove_component(svn_stringbuf_t *path)
356 {
357   assert(is_canonical(path->data, path->len));
358 
359   path->len = previous_segment(path->data, path->len);
360   path->data[path->len] = '\0';
361 }
362 
363 
364 void
svn_path_remove_components(svn_stringbuf_t * path,apr_size_t n)365 svn_path_remove_components(svn_stringbuf_t *path, apr_size_t n)
366 {
367   while (n > 0)
368     {
369       svn_path_remove_component(path);
370       n--;
371     }
372 }
373 
374 
375 char *
svn_path_dirname(const char * path,apr_pool_t * pool)376 svn_path_dirname(const char *path, apr_pool_t *pool)
377 {
378   apr_size_t len = strlen(path);
379 
380   assert(svn_path_is_canonical_internal(path, pool));
381 
382   return apr_pstrmemdup(pool, path, previous_segment(path, len));
383 }
384 
385 
386 char *
svn_path_basename(const char * path,apr_pool_t * pool)387 svn_path_basename(const char *path, apr_pool_t *pool)
388 {
389   apr_size_t len = strlen(path);
390   apr_size_t start;
391 
392   assert(svn_path_is_canonical_internal(path, pool));
393 
394   if (len == 1 && path[0] == '/')
395     start = 0;
396   else
397     {
398       start = len;
399       while (start > 0 && path[start - 1] != '/')
400         --start;
401     }
402 
403   return apr_pstrmemdup(pool, path + start, len - start);
404 }
405 
406 int
svn_path_is_empty(const char * path)407 svn_path_is_empty(const char *path)
408 {
409   assert(is_canonical(path, strlen(path)));
410 
411   if (SVN_PATH_IS_EMPTY(path))
412     return 1;
413 
414   return 0;
415 }
416 
417 int
svn_path_compare_paths(const char * path1,const char * path2)418 svn_path_compare_paths(const char *path1,
419                        const char *path2)
420 {
421   apr_size_t path1_len = strlen(path1);
422   apr_size_t path2_len = strlen(path2);
423   apr_size_t min_len = ((path1_len < path2_len) ? path1_len : path2_len);
424   apr_size_t i = 0;
425 
426   assert(is_canonical(path1, path1_len));
427   assert(is_canonical(path2, path2_len));
428 
429   /* Skip past common prefix. */
430   while (i < min_len && path1[i] == path2[i])
431     ++i;
432 
433   /* Are the paths exactly the same? */
434   if ((path1_len == path2_len) && (i >= min_len))
435     return 0;
436 
437   /* Children of paths are greater than their parents, but less than
438      greater siblings of their parents. */
439   if ((path1[i] == '/') && (path2[i] == 0))
440     return 1;
441   if ((path2[i] == '/') && (path1[i] == 0))
442     return -1;
443   if (path1[i] == '/')
444     return -1;
445   if (path2[i] == '/')
446     return 1;
447 
448   /* Common prefix was skipped above, next character is compared to
449      determine order.  We need to use an unsigned comparison, though,
450      so a "next character" of NULL (0x00) sorts numerically
451      smallest. */
452   return (unsigned char)(path1[i]) < (unsigned char)(path2[i]) ? -1 : 1;
453 }
454 
455 /* Return the string length of the longest common ancestor of PATH1 and PATH2.
456  *
457  * This function handles everything except the URL-handling logic
458  * of svn_path_get_longest_ancestor, and assumes that PATH1 and
459  * PATH2 are *not* URLs.
460  *
461  * If the two paths do not share a common ancestor, return 0.
462  *
463  * New strings are allocated in POOL.
464  */
465 static apr_size_t
get_path_ancestor_length(const char * path1,const char * path2,apr_pool_t * pool)466 get_path_ancestor_length(const char *path1,
467                          const char *path2,
468                          apr_pool_t *pool)
469 {
470   apr_size_t path1_len, path2_len;
471   apr_size_t i = 0;
472   apr_size_t last_dirsep = 0;
473 
474   path1_len = strlen(path1);
475   path2_len = strlen(path2);
476 
477   if (SVN_PATH_IS_EMPTY(path1) || SVN_PATH_IS_EMPTY(path2))
478     return 0;
479 
480   while (path1[i] == path2[i])
481     {
482       /* Keep track of the last directory separator we hit. */
483       if (path1[i] == '/')
484         last_dirsep = i;
485 
486       i++;
487 
488       /* If we get to the end of either path, break out. */
489       if ((i == path1_len) || (i == path2_len))
490         break;
491     }
492 
493   /* two special cases:
494      1. '/' is the longest common ancestor of '/' and '/foo'
495      2. '/' is the longest common ancestor of '/rif' and '/raf' */
496   if (i == 1 && path1[0] == '/' && path2[0] == '/')
497     return 1;
498 
499   /* last_dirsep is now the offset of the last directory separator we
500      crossed before reaching a non-matching byte.  i is the offset of
501      that non-matching byte. */
502   if (((i == path1_len) && (path2[i] == '/'))
503            || ((i == path2_len) && (path1[i] == '/'))
504            || ((i == path1_len) && (i == path2_len)))
505     return i;
506   else
507     if (last_dirsep == 0 && path1[0] == '/' && path2[0] == '/')
508       return 1;
509   return last_dirsep;
510 }
511 
512 
513 char *
svn_path_get_longest_ancestor(const char * path1,const char * path2,apr_pool_t * pool)514 svn_path_get_longest_ancestor(const char *path1,
515                               const char *path2,
516                               apr_pool_t *pool)
517 {
518   svn_boolean_t path1_is_url = svn_path_is_url(path1);
519   svn_boolean_t path2_is_url = svn_path_is_url(path2);
520 
521   /* Are we messing with URLs?  If we have a mix of URLs and non-URLs,
522      there's nothing common between them.  */
523   if (path1_is_url && path2_is_url)
524     {
525       return svn_uri_get_longest_ancestor(path1, path2, pool);
526     }
527   else if ((! path1_is_url) && (! path2_is_url))
528     {
529       return apr_pstrndup(pool, path1,
530                           get_path_ancestor_length(path1, path2, pool));
531     }
532   else
533     {
534       /* A URL and a non-URL => no common prefix */
535       return apr_pmemdup(pool, SVN_EMPTY_PATH, sizeof(SVN_EMPTY_PATH));
536     }
537 }
538 
539 const char *
svn_path_is_child(const char * path1,const char * path2,apr_pool_t * pool)540 svn_path_is_child(const char *path1,
541                   const char *path2,
542                   apr_pool_t *pool)
543 {
544   apr_size_t i;
545 
546   /* assert (is_canonical (path1, strlen (path1)));  ### Expensive strlen */
547   /* assert (is_canonical (path2, strlen (path2)));  ### Expensive strlen */
548 
549   /* Allow "" and "foo" to be parent/child */
550   if (SVN_PATH_IS_EMPTY(path1))               /* "" is the parent  */
551     {
552       if (SVN_PATH_IS_EMPTY(path2)            /* "" not a child    */
553           || path2[0] == '/')                  /* "/foo" not a child */
554         return NULL;
555       else
556         /* everything else is child */
557         return pool ? apr_pstrdup(pool, path2) : path2;
558     }
559 
560   /* Reach the end of at least one of the paths.  How should we handle
561      things like path1:"foo///bar" and path2:"foo/bar/baz"?  It doesn't
562      appear to arise in the current Subversion code, it's not clear to me
563      if they should be parent/child or not. */
564   for (i = 0; path1[i] && path2[i]; i++)
565     if (path1[i] != path2[i])
566       return NULL;
567 
568   /* There are two cases that are parent/child
569           ...      path1[i] == '\0'
570           .../foo  path2[i] == '/'
571       or
572           /        path1[i] == '\0'
573           /foo     path2[i] != '/'
574   */
575   if (path1[i] == '\0' && path2[i])
576     {
577       if (path2[i] == '/')
578         return pool ? apr_pstrdup(pool, path2 + i + 1) : path2 + i + 1;
579       else if (i == 1 && path1[0] == '/')
580         return pool ? apr_pstrdup(pool, path2 + 1) : path2 + 1;
581     }
582 
583   /* Otherwise, path2 isn't a child. */
584   return NULL;
585 }
586 
587 
588 svn_boolean_t
svn_path_is_ancestor(const char * path1,const char * path2)589 svn_path_is_ancestor(const char *path1, const char *path2)
590 {
591   apr_size_t path1_len = strlen(path1);
592 
593   /* If path1 is empty and path2 is not absoulte, then path1 is an ancestor. */
594   if (SVN_PATH_IS_EMPTY(path1))
595     return *path2 != '/';
596 
597   /* If path1 is a prefix of path2, then:
598      - If path1 ends in a path separator,
599      - If the paths are of the same length
600      OR
601      - path2 starts a new path component after the common prefix,
602      then path1 is an ancestor. */
603   if (strncmp(path1, path2, path1_len) == 0)
604     return path1[path1_len - 1] == '/'
605       || (path2[path1_len] == '/' || path2[path1_len] == '\0');
606 
607   return FALSE;
608 }
609 
610 
611 apr_array_header_t *
svn_path_decompose(const char * path,apr_pool_t * pool)612 svn_path_decompose(const char *path,
613                    apr_pool_t *pool)
614 {
615   apr_size_t i, oldi;
616 
617   apr_array_header_t *components =
618     apr_array_make(pool, 1, sizeof(const char *));
619 
620   assert(svn_path_is_canonical_internal(path, pool));
621 
622   if (SVN_PATH_IS_EMPTY(path))
623     return components;  /* ### Should we return a "" component? */
624 
625   /* If PATH is absolute, store the '/' as the first component. */
626   i = oldi = 0;
627   if (path[i] == '/')
628     {
629       char dirsep = '/';
630 
631       APR_ARRAY_PUSH(components, const char *)
632         = apr_pstrmemdup(pool, &dirsep, sizeof(dirsep));
633 
634       i++;
635       oldi++;
636       if (path[i] == '\0') /* path is a single '/' */
637         return components;
638     }
639 
640   do
641     {
642       if ((path[i] == '/') || (path[i] == '\0'))
643         {
644           if (SVN_PATH_IS_PLATFORM_EMPTY(path + oldi, i - oldi))
645             APR_ARRAY_PUSH(components, const char *) = SVN_EMPTY_PATH;
646           else
647             APR_ARRAY_PUSH(components, const char *)
648               = apr_pstrmemdup(pool, path + oldi, i - oldi);
649 
650           i++;
651           oldi = i;  /* skipping past the dirsep */
652           continue;
653         }
654       i++;
655     }
656   while (path[i-1]);
657 
658   return components;
659 }
660 
661 
662 const char *
svn_path_compose(const apr_array_header_t * components,apr_pool_t * pool)663 svn_path_compose(const apr_array_header_t *components,
664                  apr_pool_t *pool)
665 {
666   apr_size_t *lengths = apr_palloc(pool, components->nelts*sizeof(*lengths));
667   apr_size_t max_length = components->nelts;
668   char *path;
669   char *p;
670   int i;
671 
672   /* Get the length of each component so a total length can be
673      calculated. */
674   for (i = 0; i < components->nelts; ++i)
675     {
676       apr_size_t l = strlen(APR_ARRAY_IDX(components, i, const char *));
677       lengths[i] = l;
678       max_length += l;
679     }
680 
681   path = apr_palloc(pool, max_length + 1);
682   p = path;
683 
684   for (i = 0; i < components->nelts; ++i)
685     {
686       /* Append a '/' to the path.  Handle the case with an absolute
687          path where a '/' appears in the first component.  Only append
688          a '/' if the component is the second component that does not
689          follow a "/" first component; or it is the third or later
690          component. */
691       if (i > 1 ||
692           (i == 1 && strcmp("/", APR_ARRAY_IDX(components,
693                                                0,
694                                                const char *)) != 0))
695         {
696           *p++ = '/';
697         }
698 
699       memcpy(p, APR_ARRAY_IDX(components, i, const char *), lengths[i]);
700       p += lengths[i];
701     }
702 
703   *p = '\0';
704 
705   return path;
706 }
707 
708 
709 svn_boolean_t
svn_path_is_single_path_component(const char * name)710 svn_path_is_single_path_component(const char *name)
711 {
712   assert(is_canonical(name, strlen(name)));
713 
714   /* Can't be empty or `..'  */
715   if (SVN_PATH_IS_EMPTY(name)
716       || (name[0] == '.' && name[1] == '.' && name[2] == '\0'))
717     return FALSE;
718 
719   /* Slashes are bad, m'kay... */
720   if (strchr(name, '/') != NULL)
721     return FALSE;
722 
723   /* It is valid.  */
724   return TRUE;
725 }
726 
727 
728 svn_boolean_t
svn_path_is_dotpath_present(const char * path)729 svn_path_is_dotpath_present(const char *path)
730 {
731   size_t len;
732 
733   /* The empty string does not have a dotpath */
734   if (path[0] == '\0')
735     return FALSE;
736 
737   /* Handle "." or a leading "./" */
738   if (path[0] == '.' && (path[1] == '\0' || path[1] == '/'))
739     return TRUE;
740 
741   /* Paths of length 1 (at this point) have no dotpath present. */
742   if (path[1] == '\0')
743     return FALSE;
744 
745   /* If any segment is "/./", then a dotpath is present. */
746   if (strstr(path, "/./") != NULL)
747     return TRUE;
748 
749   /* Does the path end in "/." ? */
750   len = strlen(path);
751   return path[len - 2] == '/' && path[len - 1] == '.';
752 }
753 
754 svn_boolean_t
svn_path_is_backpath_present(const char * path)755 svn_path_is_backpath_present(const char *path)
756 {
757   size_t len;
758 
759   /* 0 and 1-length paths do not have a backpath */
760   if (path[0] == '\0' || path[1] == '\0')
761     return FALSE;
762 
763   /* Handle ".." or a leading "../" */
764   if (path[0] == '.' && path[1] == '.' && (path[2] == '\0' || path[2] == '/'))
765     return TRUE;
766 
767   /* Paths of length 2 (at this point) have no backpath present. */
768   if (path[2] == '\0')
769     return FALSE;
770 
771   /* If any segment is "..", then a backpath is present. */
772   if (strstr(path, "/../") != NULL)
773     return TRUE;
774 
775   /* Does the path end in "/.." ? */
776   len = strlen(path);
777   return path[len - 3] == '/' && path[len - 2] == '.' && path[len - 1] == '.';
778 }
779 
780 
781 /*** URI Stuff ***/
782 
783 /* Examine PATH as a potential URI, and return a substring of PATH
784    that immediately follows the (scheme):// portion of the URI, or
785    NULL if PATH doesn't appear to be a valid URI.  The returned value
786    is not alloced -- it shares memory with PATH. */
787 static const char *
skip_uri_scheme(const char * path)788 skip_uri_scheme(const char *path)
789 {
790   apr_size_t j;
791 
792   /* A scheme is terminated by a : and cannot contain any /'s. */
793   for (j = 0; path[j] && path[j] != ':'; ++j)
794     if (path[j] == '/')
795       return NULL;
796 
797   if (j > 0 && path[j] == ':' && path[j+1] == '/' && path[j+2] == '/')
798     return path + j + 3;
799 
800   return NULL;
801 }
802 
803 
804 svn_boolean_t
svn_path_is_url(const char * path)805 svn_path_is_url(const char *path)
806 {
807   /* ### This function is reaaaaaaaaaaaaaally stupid right now.
808      We're just going to look for:
809 
810         (scheme)://(optional_stuff)
811 
812      Where (scheme) has no ':' or '/' characters.
813 
814      Someday it might be nice to have an actual URI parser here.
815   */
816   return skip_uri_scheme(path) != NULL;
817 }
818 
819 
820 
821 /* Here is the BNF for path components in a URI. "pchar" is a
822    character in a path component.
823 
824       pchar       = unreserved | escaped |
825                     ":" | "@" | "&" | "=" | "+" | "$" | ","
826       unreserved  = alphanum | mark
827       mark        = "-" | "_" | "." | "!" | "~" | "*" | "'" | "(" | ")"
828 
829    Note that "escaped" doesn't really apply to what users can put in
830    their paths, so that really means the set of characters is:
831 
832       alphanum | mark | ":" | "@" | "&" | "=" | "+" | "$" | ","
833 */
834 const char svn_uri__char_validity[256] = {
835   0, 0, 0, 0, 0, 0, 0, 0,   0, 0, 0, 0, 0, 0, 0, 0,
836   0, 0, 0, 0, 0, 0, 0, 0,   0, 0, 0, 0, 0, 0, 0, 0,
837   0, 1, 0, 0, 1, 0, 1, 1,   1, 1, 1, 1, 1, 1, 1, 1,
838   1, 1, 1, 1, 1, 1, 1, 1,   1, 1, 1, 0, 0, 1, 0, 0,
839 
840   /* 64 */
841   1, 1, 1, 1, 1, 1, 1, 1,   1, 1, 1, 1, 1, 1, 1, 1,
842   1, 1, 1, 1, 1, 1, 1, 1,   1, 1, 1, 0, 0, 0, 0, 1,
843   0, 1, 1, 1, 1, 1, 1, 1,   1, 1, 1, 1, 1, 1, 1, 1,
844   1, 1, 1, 1, 1, 1, 1, 1,   1, 1, 1, 0, 0, 0, 1, 0,
845 
846   /* 128 */
847   0, 0, 0, 0, 0, 0, 0, 0,   0, 0, 0, 0, 0, 0, 0, 0,
848   0, 0, 0, 0, 0, 0, 0, 0,   0, 0, 0, 0, 0, 0, 0, 0,
849   0, 0, 0, 0, 0, 0, 0, 0,   0, 0, 0, 0, 0, 0, 0, 0,
850   0, 0, 0, 0, 0, 0, 0, 0,   0, 0, 0, 0, 0, 0, 0, 0,
851 
852   /* 192 */
853   0, 0, 0, 0, 0, 0, 0, 0,   0, 0, 0, 0, 0, 0, 0, 0,
854   0, 0, 0, 0, 0, 0, 0, 0,   0, 0, 0, 0, 0, 0, 0, 0,
855   0, 0, 0, 0, 0, 0, 0, 0,   0, 0, 0, 0, 0, 0, 0, 0,
856   0, 0, 0, 0, 0, 0, 0, 0,   0, 0, 0, 0, 0, 0, 0, 0,
857 };
858 
859 
860 svn_boolean_t
svn_path_is_uri_safe(const char * path)861 svn_path_is_uri_safe(const char *path)
862 {
863   apr_size_t i;
864 
865   /* Skip the URI scheme. */
866   path = skip_uri_scheme(path);
867 
868   /* No scheme?  Get outta here. */
869   if (! path)
870     return FALSE;
871 
872   /* Skip to the first slash that's after the URI scheme. */
873   path = strchr(path, '/');
874 
875   /* If there's no first slash, then there's only a host portion;
876      therefore there couldn't be any uri-unsafe characters after the
877      host... so return true. */
878   if (path == NULL)
879     return TRUE;
880 
881   for (i = 0; path[i]; i++)
882     {
883       /* Allow '%XX' (where each X is a hex digit) */
884       if (path[i] == '%')
885         {
886           if (svn_ctype_isxdigit(path[i + 1]) &&
887               svn_ctype_isxdigit(path[i + 2]))
888             {
889               i += 2;
890               continue;
891             }
892           return FALSE;
893         }
894       else if (! svn_uri__char_validity[((unsigned char)path[i])])
895         {
896           return FALSE;
897         }
898     }
899 
900   return TRUE;
901 }
902 
903 
904 /* URI-encode each character c in PATH for which TABLE[c] is 0.
905    If no encoding was needed, return PATH, else return a new string allocated
906    in POOL. */
907 static const char *
uri_escape(const char * path,const char table[],apr_pool_t * pool)908 uri_escape(const char *path, const char table[], apr_pool_t *pool)
909 {
910   svn_stringbuf_t *retstr;
911   apr_size_t i, copied = 0;
912   int c;
913   apr_size_t len;
914   const char *p;
915 
916   /* To terminate our scanning loop, table[NUL] must report "invalid". */
917   assert(table[0] == 0);
918 
919   /* Quick check: Does any character need escaping? */
920   for (p = path; table[(unsigned char)*p]; ++p)
921     {}
922 
923   /* No char to escape before EOS? */
924   if (*p == '\0')
925     return path;
926 
927   /* We need to escape at least one character. */
928   len = strlen(p) + (p - path);
929   retstr = svn_stringbuf_create_ensure(len, pool);
930   for (i = p - path; i < len; i++)
931     {
932       c = (unsigned char)path[i];
933       if (table[c])
934         continue;
935 
936       /* If we got here, we're looking at a character that isn't
937          supported by the (or at least, our) URI encoding scheme.  We
938          need to escape this character.  */
939 
940       /* First things first, copy all the good stuff that we haven't
941          yet copied into our output buffer. */
942       if (i - copied)
943         svn_stringbuf_appendbytes(retstr, path + copied,
944                                   i - copied);
945 
946       /* Now, write in our escaped character, consisting of the
947          '%' and two digits.  We cast the C to unsigned char here because
948          the 'X' format character will be tempted to treat it as an unsigned
949          int...which causes problem when messing with 0x80-0xFF chars.
950          We also need space for a null as apr_snprintf will write one. */
951       svn_stringbuf_ensure(retstr, retstr->len + 4);
952       apr_snprintf(retstr->data + retstr->len, 4, "%%%02X", (unsigned char)c);
953       retstr->len += 3;
954 
955       /* Finally, update our copy counter. */
956       copied = i + 1;
957     }
958 
959   /* Anything left to copy? */
960   if (i - copied)
961     svn_stringbuf_appendbytes(retstr, path + copied, i - copied);
962 
963   /* retstr is null-terminated either by apr_snprintf or the svn_stringbuf
964      functions. */
965 
966   return retstr->data;
967 }
968 
969 
970 const char *
svn_path_uri_encode(const char * path,apr_pool_t * pool)971 svn_path_uri_encode(const char *path, apr_pool_t *pool)
972 {
973   const char *ret;
974 
975   ret = uri_escape(path, svn_uri__char_validity, pool);
976 
977   /* Our interface guarantees a copy. */
978   if (ret == path)
979     return apr_pstrdup(pool, path);
980   else
981     return ret;
982 }
983 
984 static const char iri_escape_chars[256] = {
985   0, 1, 1, 1, 1, 1, 1, 1,  1, 1, 1, 1, 1, 1, 1, 1,
986   1, 1, 1, 1, 1, 1, 1, 1,  1, 1, 1, 1, 1, 1, 1, 1,
987   1, 1, 1, 1, 1, 1, 1, 1,  1, 1, 1, 1, 1, 1, 1, 1,
988   1, 1, 1, 1, 1, 1, 1, 1,  1, 1, 1, 1, 1, 1, 1, 1,
989   1, 1, 1, 1, 1, 1, 1, 1,  1, 1, 1, 1, 1, 1, 1, 1,
990   1, 1, 1, 1, 1, 1, 1, 1,  1, 1, 1, 1, 1, 1, 1, 1,
991   1, 1, 1, 1, 1, 1, 1, 1,  1, 1, 1, 1, 1, 1, 1, 1,
992   1, 1, 1, 1, 1, 1, 1, 1,  1, 1, 1, 1, 1, 1, 1, 1,
993 
994   /* 128 */
995   0, 0, 0, 0, 0, 0, 0, 0,  0, 0, 0, 0, 0, 0, 0, 0,
996   0, 0, 0, 0, 0, 0, 0, 0,  0, 0, 0, 0, 0, 0, 0, 0,
997   0, 0, 0, 0, 0, 0, 0, 0,  0, 0, 0, 0, 0, 0, 0, 0,
998   0, 0, 0, 0, 0, 0, 0, 0,  0, 0, 0, 0, 0, 0, 0, 0,
999   0, 0, 0, 0, 0, 0, 0, 0,  0, 0, 0, 0, 0, 0, 0, 0,
1000   0, 0, 0, 0, 0, 0, 0, 0,  0, 0, 0, 0, 0, 0, 0, 0,
1001   0, 0, 0, 0, 0, 0, 0, 0,  0, 0, 0, 0, 0, 0, 0, 0,
1002   0, 0, 0, 0, 0, 0, 0, 0,  0, 0, 0, 0, 0, 0, 0, 0
1003 };
1004 
1005 const char *
svn_path_uri_from_iri(const char * iri,apr_pool_t * pool)1006 svn_path_uri_from_iri(const char *iri, apr_pool_t *pool)
1007 {
1008   return uri_escape(iri, iri_escape_chars, pool);
1009 }
1010 
1011 static const char uri_autoescape_chars[256] = {
1012   0, 1, 1, 1, 1, 1, 1, 1,  1, 1, 1, 1, 1, 1, 1, 1,
1013   1, 1, 1, 1, 1, 1, 1, 1,  1, 1, 1, 1, 1, 1, 1, 1,
1014   0, 1, 0, 1, 1, 1, 1, 1,  1, 1, 1, 1, 1, 1, 1, 1,
1015   1, 1, 1, 1, 1, 1, 1, 1,  1, 1, 1, 1, 0, 1, 0, 1,
1016 
1017   /* 64 */
1018   1, 1, 1, 1, 1, 1, 1, 1,  1, 1, 1, 1, 1, 1, 1, 1,
1019   1, 1, 1, 1, 1, 1, 1, 1,  1, 1, 1, 1, 0, 1, 0, 1,
1020   0, 1, 1, 1, 1, 1, 1, 1,  1, 1, 1, 1, 1, 1, 1, 1,
1021   1, 1, 1, 1, 1, 1, 1, 1,  1, 1, 1, 0, 0, 0, 1, 1,
1022 
1023   /* 128 */
1024   1, 1, 1, 1, 1, 1, 1, 1,  1, 1, 1, 1, 1, 1, 1, 1,
1025   1, 1, 1, 1, 1, 1, 1, 1,  1, 1, 1, 1, 1, 1, 1, 1,
1026   1, 1, 1, 1, 1, 1, 1, 1,  1, 1, 1, 1, 1, 1, 1, 1,
1027   1, 1, 1, 1, 1, 1, 1, 1,  1, 1, 1, 1, 1, 1, 1, 1,
1028 
1029   /* 192 */
1030   1, 1, 1, 1, 1, 1, 1, 1,  1, 1, 1, 1, 1, 1, 1, 1,
1031   1, 1, 1, 1, 1, 1, 1, 1,  1, 1, 1, 1, 1, 1, 1, 1,
1032   1, 1, 1, 1, 1, 1, 1, 1,  1, 1, 1, 1, 1, 1, 1, 1,
1033   1, 1, 1, 1, 1, 1, 1, 1,  1, 1, 1, 1, 1, 1, 1, 1,
1034 };
1035 
1036 const char *
svn_path_uri_autoescape(const char * uri,apr_pool_t * pool)1037 svn_path_uri_autoescape(const char *uri, apr_pool_t *pool)
1038 {
1039   return uri_escape(uri, uri_autoescape_chars, pool);
1040 }
1041 
1042 const char *
svn_path_uri_decode(const char * path,apr_pool_t * pool)1043 svn_path_uri_decode(const char *path, apr_pool_t *pool)
1044 {
1045   svn_stringbuf_t *retstr;
1046   apr_size_t i;
1047   svn_boolean_t query_start = FALSE;
1048 
1049   /* avoid repeated realloc */
1050   retstr = svn_stringbuf_create_ensure(strlen(path) + 1, pool);
1051 
1052   retstr->len = 0;
1053   for (i = 0; path[i]; i++)
1054     {
1055       char c = path[i];
1056 
1057       if (c == '?')
1058         {
1059           /* Mark the start of the query string, if it exists. */
1060           query_start = TRUE;
1061         }
1062       else if (c == '+' && query_start)
1063         {
1064           /* Only do this if we are into the query string.
1065            * RFC 2396, section 3.3  */
1066           c = ' ';
1067         }
1068       else if (c == '%' && svn_ctype_isxdigit(path[i + 1])
1069                && svn_ctype_isxdigit(path[i+2]))
1070         {
1071           char digitz[3];
1072           digitz[0] = path[++i];
1073           digitz[1] = path[++i];
1074           digitz[2] = '\0';
1075           c = (char)(strtol(digitz, NULL, 16));
1076         }
1077 
1078       retstr->data[retstr->len++] = c;
1079     }
1080 
1081   /* Null-terminate this bad-boy. */
1082   retstr->data[retstr->len] = 0;
1083 
1084   return retstr->data;
1085 }
1086 
1087 
1088 const char *
svn_path_url_add_component2(const char * url,const char * component,apr_pool_t * pool)1089 svn_path_url_add_component2(const char *url,
1090                             const char *component,
1091                             apr_pool_t *pool)
1092 {
1093   /* = svn_path_uri_encode() but without always copying */
1094   component = uri_escape(component, svn_uri__char_validity, pool);
1095 
1096   return svn_path_join_internal(url, component, pool);
1097 }
1098 
1099 svn_error_t *
svn_path_get_absolute(const char ** pabsolute,const char * relative,apr_pool_t * pool)1100 svn_path_get_absolute(const char **pabsolute,
1101                       const char *relative,
1102                       apr_pool_t *pool)
1103 {
1104   if (svn_path_is_url(relative))
1105     {
1106       *pabsolute = apr_pstrdup(pool, relative);
1107       return SVN_NO_ERROR;
1108     }
1109 
1110   return svn_dirent_get_absolute(pabsolute, relative, pool);
1111 }
1112 
1113 
1114 #if !defined(WIN32) && !defined(DARWIN)
1115 /** Get APR's internal path encoding. */
1116 static svn_error_t *
get_path_encoding(svn_boolean_t * path_is_utf8,apr_pool_t * pool)1117 get_path_encoding(svn_boolean_t *path_is_utf8, apr_pool_t *pool)
1118 {
1119   apr_status_t apr_err;
1120   int encoding_style;
1121 
1122   apr_err = apr_filepath_encoding(&encoding_style, pool);
1123   if (apr_err)
1124     return svn_error_wrap_apr(apr_err,
1125                               _("Can't determine the native path encoding"));
1126 
1127   /* ### What to do about APR_FILEPATH_ENCODING_UNKNOWN?
1128      Well, for now we'll just punt to the svn_utf_ functions;
1129      those will at least do the ASCII-subset check. */
1130   *path_is_utf8 = (encoding_style == APR_FILEPATH_ENCODING_UTF8);
1131   return SVN_NO_ERROR;
1132 }
1133 #endif
1134 
1135 
1136 svn_error_t *
svn_path_cstring_from_utf8(const char ** path_apr,const char * path_utf8,apr_pool_t * pool)1137 svn_path_cstring_from_utf8(const char **path_apr,
1138                            const char *path_utf8,
1139                            apr_pool_t *pool)
1140 {
1141 #if !defined(WIN32) && !defined(DARWIN)
1142   svn_boolean_t path_is_utf8;
1143   SVN_ERR(get_path_encoding(&path_is_utf8, pool));
1144   if (path_is_utf8)
1145 #endif
1146     {
1147       *path_apr = apr_pstrdup(pool, path_utf8);
1148       return SVN_NO_ERROR;
1149     }
1150 #if !defined(WIN32) && !defined(DARWIN)
1151   else
1152     return svn_utf_cstring_from_utf8(path_apr, path_utf8, pool);
1153 #endif
1154 }
1155 
1156 
1157 svn_error_t *
svn_path_cstring_to_utf8(const char ** path_utf8,const char * path_apr,apr_pool_t * pool)1158 svn_path_cstring_to_utf8(const char **path_utf8,
1159                          const char *path_apr,
1160                          apr_pool_t *pool)
1161 {
1162 #if !defined(WIN32) && !defined(DARWIN)
1163   svn_boolean_t path_is_utf8;
1164   SVN_ERR(get_path_encoding(&path_is_utf8, pool));
1165   if (path_is_utf8)
1166 #endif
1167     {
1168       *path_utf8 = apr_pstrdup(pool, path_apr);
1169       return SVN_NO_ERROR;
1170     }
1171 #if !defined(WIN32) && !defined(DARWIN)
1172   else
1173     return svn_utf_cstring_to_utf8(path_utf8, path_apr, pool);
1174 #endif
1175 }
1176 
1177 
1178 const char *
svn_path_illegal_path_escape(const char * path,apr_pool_t * pool)1179 svn_path_illegal_path_escape(const char *path, apr_pool_t *pool)
1180 {
1181   svn_stringbuf_t *retstr;
1182   apr_size_t i, copied = 0;
1183   int c;
1184 
1185   /* At least one control character:
1186       strlen - 1 (control) + \ + N + N + N + null . */
1187   retstr = svn_stringbuf_create_ensure(strlen(path) + 4, pool);
1188   for (i = 0; path[i]; i++)
1189     {
1190       c = (unsigned char)path[i];
1191       if (! svn_ctype_iscntrl(c))
1192         continue;
1193 
1194       /* If we got here, we're looking at a character that isn't
1195          supported by the (or at least, our) URI encoding scheme.  We
1196          need to escape this character.  */
1197 
1198       /* First things first, copy all the good stuff that we haven't
1199          yet copied into our output buffer. */
1200       if (i - copied)
1201         svn_stringbuf_appendbytes(retstr, path + copied,
1202                                   i - copied);
1203 
1204       /* Make sure buffer is big enough for '\' 'N' 'N' 'N' (and NUL) */
1205       svn_stringbuf_ensure(retstr, retstr->len + 5);
1206       /*### The backslash separator doesn't work too great with Windows,
1207          but it's what we'll use for consistency with invalid utf8
1208          formatting (until someone has a better idea) */
1209       apr_snprintf(retstr->data + retstr->len, 5, "\\%03o", (unsigned char)c);
1210       retstr->len += 4;
1211 
1212       /* Finally, update our copy counter. */
1213       copied = i + 1;
1214     }
1215 
1216   /* If we didn't encode anything, we don't need to duplicate the string. */
1217   if (retstr->len == 0)
1218     return path;
1219 
1220   /* Anything left to copy? */
1221   if (i - copied)
1222     svn_stringbuf_appendbytes(retstr, path + copied, i - copied);
1223 
1224   /* retstr is null-terminated either by apr_snprintf or the svn_stringbuf
1225      functions. */
1226 
1227   return retstr->data;
1228 }
1229 
1230 svn_error_t *
svn_path_check_valid(const char * path,apr_pool_t * pool)1231 svn_path_check_valid(const char *path, apr_pool_t *pool)
1232 {
1233   const char *c;
1234 
1235   for (c = path; *c; c++)
1236     {
1237       if (svn_ctype_iscntrl(*c))
1238         {
1239           return svn_error_createf(SVN_ERR_FS_PATH_SYNTAX, NULL,
1240              _("Invalid control character '0x%02x' in path '%s'"),
1241              (unsigned char)*c,
1242              svn_path_illegal_path_escape(svn_dirent_local_style(path, pool),
1243                                           pool));
1244         }
1245     }
1246 
1247   return SVN_NO_ERROR;
1248 }
1249 
1250 void
svn_path_splitext(const char ** path_root,const char ** path_ext,const char * path,apr_pool_t * pool)1251 svn_path_splitext(const char **path_root,
1252                   const char **path_ext,
1253                   const char *path,
1254                   apr_pool_t *pool)
1255 {
1256   const char *last_dot, *last_slash;
1257 
1258   /* Easy out -- why do all the work when there's no way to report it? */
1259   if (! (path_root || path_ext))
1260     return;
1261 
1262   /* Do we even have a period in this thing?  And if so, is there
1263      anything after it?  We look for the "rightmost" period in the
1264      string. */
1265   last_dot = strrchr(path, '.');
1266   if (last_dot && (*(last_dot + 1) != '\0'))
1267     {
1268       /* If we have a period, we need to make sure it occurs in the
1269          final path component -- that there's no path separator
1270          between the last period and the end of the PATH -- otherwise,
1271          it doesn't count.  Also, we want to make sure that our period
1272          isn't the first character of the last component. */
1273       last_slash = strrchr(path, '/');
1274       if ((last_slash && (last_dot > (last_slash + 1)))
1275           || ((! last_slash) && (last_dot > path)))
1276         {
1277           if (path_root)
1278             *path_root = apr_pstrmemdup(pool, path,
1279                                         (last_dot - path + 1) * sizeof(*path));
1280           if (path_ext)
1281             *path_ext = apr_pstrdup(pool, last_dot + 1);
1282           return;
1283         }
1284     }
1285   /* If we get here, we never found a suitable separator character, so
1286      there's no split. */
1287   if (path_root)
1288     *path_root = apr_pstrdup(pool, path);
1289   if (path_ext)
1290     *path_ext = "";
1291 }
1292 
1293 
1294 /* Repository relative URLs (^/). */
1295 
1296 svn_boolean_t
svn_path_is_repos_relative_url(const char * path)1297 svn_path_is_repos_relative_url(const char *path)
1298 {
1299   return (0 == strncmp("^/", path, 2));
1300 }
1301 
1302 svn_error_t *
svn_path_resolve_repos_relative_url(const char ** absolute_url,const char * relative_url,const char * repos_root_url,apr_pool_t * pool)1303 svn_path_resolve_repos_relative_url(const char **absolute_url,
1304                                     const char *relative_url,
1305                                     const char *repos_root_url,
1306                                     apr_pool_t *pool)
1307 {
1308   if (! svn_path_is_repos_relative_url(relative_url))
1309     return svn_error_createf(SVN_ERR_BAD_URL, NULL,
1310                              _("Improper relative URL '%s'"),
1311                              relative_url);
1312 
1313   /* No assumptions are made about the canonicalization of the input
1314    * arguments, it is presumed that the output will be canonicalized after
1315    * this function, which will remove any duplicate path separator.
1316    */
1317   *absolute_url = apr_pstrcat(pool, repos_root_url, relative_url + 1,
1318                               SVN_VA_NULL);
1319 
1320   return SVN_NO_ERROR;
1321 }
1322 
1323