xref: /trueos/contrib/subversion/subversion/libsvn_subr/dirent_uri.c (revision 99bc25a1685ca34d8638e41c44477ff59a2dc490)
1 /*
2  * dirent_uri.c:   a library to manipulate URIs and directory entries.
3  *
4  * ====================================================================
5  *    Licensed to the Apache Software Foundation (ASF) under one
6  *    or more contributor license agreements.  See the NOTICE file
7  *    distributed with this work for additional information
8  *    regarding copyright ownership.  The ASF licenses this file
9  *    to you under the Apache License, Version 2.0 (the
10  *    "License"); you may not use this file except in compliance
11  *    with the License.  You may obtain a copy of the License at
12  *
13  *      http://www.apache.org/licenses/LICENSE-2.0
14  *
15  *    Unless required by applicable law or agreed to in writing,
16  *    software distributed under the License is distributed on an
17  *    "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
18  *    KIND, either express or implied.  See the License for the
19  *    specific language governing permissions and limitations
20  *    under the License.
21  * ====================================================================
22  */
23 
24 
25 
26 #include <string.h>
27 #include <assert.h>
28 #include <ctype.h>
29 
30 #include <apr_uri.h>
31 #include <apr_lib.h>
32 
33 #include "svn_private_config.h"
34 #include "svn_string.h"
35 #include "svn_dirent_uri.h"
36 #include "svn_path.h"
37 #include "svn_ctype.h"
38 
39 #include "dirent_uri.h"
40 #include "private/svn_fspath.h"
41 #include "private/svn_cert.h"
42 
43 /* The canonical empty path.  Can this be changed?  Well, change the empty
44    test below and the path library will work, not so sure about the fs/wc
45    libraries. */
46 #define SVN_EMPTY_PATH ""
47 
48 /* TRUE if s is the canonical empty path, FALSE otherwise */
49 #define SVN_PATH_IS_EMPTY(s) ((s)[0] == '\0')
50 
51 /* TRUE if s,n is the platform's empty path ("."), FALSE otherwise. Can
52    this be changed?  Well, the path library will work, not so sure about
53    the OS! */
54 #define SVN_PATH_IS_PLATFORM_EMPTY(s,n) ((n) == 1 && (s)[0] == '.')
55 
56 /* This check must match the check on top of dirent_uri-tests.c and
57    path-tests.c */
58 #if defined(WIN32) || defined(__CYGWIN__) || defined(__OS2__)
59 #define SVN_USE_DOS_PATHS
60 #endif
61 
62 /* Path type definition. Used only by internal functions. */
63 typedef enum path_type_t {
64   type_uri,
65   type_dirent,
66   type_relpath
67 } path_type_t;
68 
69 
70 /**** Forward declarations *****/
71 
72 static svn_boolean_t
73 relpath_is_canonical(const char *relpath);
74 
75 
76 /**** Internal implementation functions *****/
77 
78 /* Return an internal-style new path based on PATH, allocated in POOL.
79  *
80  * "Internal-style" means that separators are all '/'.
81  */
82 static const char *
internal_style(const char * path,apr_pool_t * pool)83 internal_style(const char *path, apr_pool_t *pool)
84 {
85 #if '/' != SVN_PATH_LOCAL_SEPARATOR
86     {
87       char *p = apr_pstrdup(pool, path);
88       path = p;
89 
90       /* Convert all local-style separators to the canonical ones. */
91       for (; *p != '\0'; ++p)
92         if (*p == SVN_PATH_LOCAL_SEPARATOR)
93           *p = '/';
94     }
95 #endif
96 
97   return path;
98 }
99 
100 /* Locale insensitive tolower() for converting parts of dirents and urls
101    while canonicalizing */
102 static char
canonicalize_to_lower(char c)103 canonicalize_to_lower(char c)
104 {
105   if (c < 'A' || c > 'Z')
106     return c;
107   else
108     return (char)(c - 'A' + 'a');
109 }
110 
111 /* Locale insensitive toupper() for converting parts of dirents and urls
112    while canonicalizing */
113 static char
canonicalize_to_upper(char c)114 canonicalize_to_upper(char c)
115 {
116   if (c < 'a' || c > 'z')
117     return c;
118   else
119     return (char)(c - 'a' + 'A');
120 }
121 
122 /* Calculates the length of the dirent absolute or non absolute root in
123    DIRENT, return 0 if dirent is not rooted  */
124 static apr_size_t
dirent_root_length(const char * dirent,apr_size_t len)125 dirent_root_length(const char *dirent, apr_size_t len)
126 {
127 #ifdef SVN_USE_DOS_PATHS
128   if (len >= 2 && dirent[1] == ':' &&
129       ((dirent[0] >= 'A' && dirent[0] <= 'Z') ||
130        (dirent[0] >= 'a' && dirent[0] <= 'z')))
131     {
132       return (len > 2 && dirent[2] == '/') ? 3 : 2;
133     }
134 
135   if (len > 2 && dirent[0] == '/' && dirent[1] == '/')
136     {
137       apr_size_t i = 2;
138 
139       while (i < len && dirent[i] != '/')
140         i++;
141 
142       if (i == len)
143         return len; /* Cygwin drive alias, invalid path on WIN32 */
144 
145       i++; /* Skip '/' */
146 
147       while (i < len && dirent[i] != '/')
148         i++;
149 
150       return i;
151     }
152 #endif /* SVN_USE_DOS_PATHS */
153   if (len >= 1 && dirent[0] == '/')
154     return 1;
155 
156   return 0;
157 }
158 
159 
160 /* Return the length of substring necessary to encompass the entire
161  * previous dirent segment in DIRENT, which should be a LEN byte string.
162  *
163  * A trailing slash will not be included in the returned length except
164  * in the case in which DIRENT is absolute and there are no more
165  * previous segments.
166  */
167 static apr_size_t
dirent_previous_segment(const char * dirent,apr_size_t len)168 dirent_previous_segment(const char *dirent,
169                         apr_size_t len)
170 {
171   if (len == 0)
172     return 0;
173 
174   --len;
175   while (len > 0 && dirent[len] != '/'
176 #ifdef SVN_USE_DOS_PATHS
177                  && (dirent[len] != ':' || len != 1)
178 #endif /* SVN_USE_DOS_PATHS */
179         )
180     --len;
181 
182   /* check if the remaining segment including trailing '/' is a root dirent */
183   if (dirent_root_length(dirent, len+1) == len + 1)
184     return len + 1;
185   else
186     return len;
187 }
188 
189 /* Calculates the length occupied by the schema defined root of URI */
190 static apr_size_t
uri_schema_root_length(const char * uri,apr_size_t len)191 uri_schema_root_length(const char *uri, apr_size_t len)
192 {
193   apr_size_t i;
194 
195   for (i = 0; i < len; i++)
196     {
197       if (uri[i] == '/')
198         {
199           if (i > 0 && uri[i-1] == ':' && i < len-1 && uri[i+1] == '/')
200             {
201               /* We have an absolute uri */
202               if (i == 5 && strncmp("file", uri, 4) == 0)
203                 return 7; /* file:// */
204               else
205                 {
206                   for (i += 2; i < len; i++)
207                     if (uri[i] == '/')
208                       return i;
209 
210                   return len; /* Only a hostname is found */
211                 }
212             }
213           else
214             return 0;
215         }
216     }
217 
218   return 0;
219 }
220 
221 /* Returns TRUE if svn_dirent_is_absolute(dirent) or when dirent has
222    a non absolute root. (E.g. '/' or 'F:' on Windows) */
223 static svn_boolean_t
dirent_is_rooted(const char * dirent)224 dirent_is_rooted(const char *dirent)
225 {
226   if (! dirent)
227     return FALSE;
228 
229   /* Root on all systems */
230   if (dirent[0] == '/')
231     return TRUE;
232 
233   /* On Windows, dirent is also absolute when it starts with 'H:' or 'H:/'
234      where 'H' is any letter. */
235 #ifdef SVN_USE_DOS_PATHS
236   if (((dirent[0] >= 'A' && dirent[0] <= 'Z') ||
237        (dirent[0] >= 'a' && dirent[0] <= 'z')) &&
238       (dirent[1] == ':'))
239      return TRUE;
240 #endif /* SVN_USE_DOS_PATHS */
241 
242   return FALSE;
243 }
244 
245 /* Return the length of substring necessary to encompass the entire
246  * previous relpath segment in RELPATH, which should be a LEN byte string.
247  *
248  * A trailing slash will not be included in the returned length.
249  */
250 static apr_size_t
relpath_previous_segment(const char * relpath,apr_size_t len)251 relpath_previous_segment(const char *relpath,
252                          apr_size_t len)
253 {
254   if (len == 0)
255     return 0;
256 
257   --len;
258   while (len > 0 && relpath[len] != '/')
259     --len;
260 
261   return len;
262 }
263 
264 /* Return the length of substring necessary to encompass the entire
265  * previous uri segment in URI, which should be a LEN byte string.
266  *
267  * A trailing slash will not be included in the returned length except
268  * in the case in which URI is absolute and there are no more
269  * previous segments.
270  */
271 static apr_size_t
uri_previous_segment(const char * uri,apr_size_t len)272 uri_previous_segment(const char *uri,
273                      apr_size_t len)
274 {
275   apr_size_t root_length;
276   apr_size_t i = len;
277   if (len == 0)
278     return 0;
279 
280   root_length = uri_schema_root_length(uri, len);
281 
282   --i;
283   while (len > root_length && uri[i] != '/')
284     --i;
285 
286   if (i == 0 && len > 1 && *uri == '/')
287     return 1;
288 
289   return i;
290 }
291 
292 /* Return the canonicalized version of PATH, of type TYPE, allocated in
293  * POOL.
294  */
295 static const char *
canonicalize(path_type_t type,const char * path,apr_pool_t * pool)296 canonicalize(path_type_t type, const char *path, apr_pool_t *pool)
297 {
298   char *canon, *dst;
299   const char *src;
300   apr_size_t seglen;
301   apr_size_t schemelen = 0;
302   apr_size_t canon_segments = 0;
303   svn_boolean_t url = FALSE;
304   char *schema_data = NULL;
305 
306   /* "" is already canonical, so just return it; note that later code
307      depends on path not being zero-length.  */
308   if (SVN_PATH_IS_EMPTY(path))
309     {
310       assert(type != type_uri);
311       return "";
312     }
313 
314   dst = canon = apr_pcalloc(pool, strlen(path) + 1);
315 
316   /* If this is supposed to be an URI, it should start with
317      "scheme://".  We'll copy the scheme, host name, etc. to DST and
318      set URL = TRUE. */
319   src = path;
320   if (type == type_uri)
321     {
322       assert(*src != '/');
323 
324       while (*src && (*src != '/') && (*src != ':'))
325         src++;
326 
327       if (*src == ':' && *(src+1) == '/' && *(src+2) == '/')
328         {
329           const char *seg;
330 
331           url = TRUE;
332 
333           /* Found a scheme, convert to lowercase and copy to dst. */
334           src = path;
335           while (*src != ':')
336             {
337               *(dst++) = canonicalize_to_lower((*src++));
338               schemelen++;
339             }
340           *(dst++) = ':';
341           *(dst++) = '/';
342           *(dst++) = '/';
343           src += 3;
344           schemelen += 3;
345 
346           /* This might be the hostname */
347           seg = src;
348           while (*src && (*src != '/') && (*src != '@'))
349             src++;
350 
351           if (*src == '@')
352             {
353               /* Copy the username & password. */
354               seglen = src - seg + 1;
355               memcpy(dst, seg, seglen);
356               dst += seglen;
357               src++;
358             }
359           else
360             src = seg;
361 
362           /* Found a hostname, convert to lowercase and copy to dst. */
363           if (*src == '[')
364             {
365              *(dst++) = *(src++); /* Copy '[' */
366 
367               while (*src == ':'
368                      || (*src >= '0' && (*src <= '9'))
369                      || (*src >= 'a' && (*src <= 'f'))
370                      || (*src >= 'A' && (*src <= 'F')))
371                 {
372                   *(dst++) = canonicalize_to_lower((*src++));
373                 }
374 
375               if (*src == ']')
376                 *(dst++) = *(src++); /* Copy ']' */
377             }
378           else
379             while (*src && (*src != '/') && (*src != ':'))
380               *(dst++) = canonicalize_to_lower((*src++));
381 
382           if (*src == ':')
383             {
384               /* We probably have a port number: Is it a default portnumber
385                  which doesn't belong in a canonical url? */
386               if (src[1] == '8' && src[2] == '0'
387                   && (src[3]== '/'|| !src[3])
388                   && !strncmp(canon, "http:", 5))
389                 {
390                   src += 3;
391                 }
392               else if (src[1] == '4' && src[2] == '4' && src[3] == '3'
393                        && (src[4]== '/'|| !src[4])
394                        && !strncmp(canon, "https:", 6))
395                 {
396                   src += 4;
397                 }
398               else if (src[1] == '3' && src[2] == '6'
399                        && src[3] == '9' && src[4] == '0'
400                        && (src[5]== '/'|| !src[5])
401                        && !strncmp(canon, "svn:", 4))
402                 {
403                   src += 5;
404                 }
405               else if (src[1] == '/' || !src[1])
406                 {
407                   src += 1;
408                 }
409 
410               while (*src && (*src != '/'))
411                 *(dst++) = canonicalize_to_lower((*src++));
412             }
413 
414           /* Copy trailing slash, or null-terminator. */
415           *(dst) = *(src);
416 
417           /* Move src and dst forward only if we are not
418            * at null-terminator yet. */
419           if (*src)
420             {
421               src++;
422               dst++;
423               schema_data = dst;
424             }
425 
426           canon_segments = 1;
427         }
428     }
429 
430   /* Copy to DST any separator or drive letter that must come before the
431      first regular path segment. */
432   if (! url && type != type_relpath)
433     {
434       src = path;
435       /* If this is an absolute path, then just copy over the initial
436          separator character. */
437       if (*src == '/')
438         {
439           *(dst++) = *(src++);
440 
441 #ifdef SVN_USE_DOS_PATHS
442           /* On Windows permit two leading separator characters which means an
443            * UNC path. */
444           if ((type == type_dirent) && *src == '/')
445             *(dst++) = *(src++);
446 #endif /* SVN_USE_DOS_PATHS */
447         }
448 #ifdef SVN_USE_DOS_PATHS
449       /* On Windows the first segment can be a drive letter, which we normalize
450          to upper case. */
451       else if (type == type_dirent &&
452                ((*src >= 'a' && *src <= 'z') ||
453                 (*src >= 'A' && *src <= 'Z')) &&
454                (src[1] == ':'))
455         {
456           *(dst++) = canonicalize_to_upper(*(src++));
457           /* Leave the ':' to be processed as (or as part of) a path segment
458              by the following code block, so we need not care whether it has
459              a slash after it. */
460         }
461 #endif /* SVN_USE_DOS_PATHS */
462     }
463 
464   while (*src)
465     {
466       /* Parse each segment, finding the closing '/' (which might look
467          like '%2F' for URIs).  */
468       const char *next = src;
469       apr_size_t slash_len = 0;
470 
471       while (*next
472              && (next[0] != '/')
473              && (! (type == type_uri && next[0] == '%' && next[1] == '2' &&
474                     canonicalize_to_upper(next[2]) == 'F')))
475         {
476           ++next;
477         }
478 
479       /* Record how long our "slash" is. */
480       if (next[0] == '/')
481         slash_len = 1;
482       else if (type == type_uri && next[0] == '%')
483         slash_len = 3;
484 
485       seglen = next - src;
486 
487       if (seglen == 0
488           || (seglen == 1 && src[0] == '.')
489           || (type == type_uri && seglen == 3 && src[0] == '%' && src[1] == '2'
490               && canonicalize_to_upper(src[2]) == 'E'))
491         {
492           /* Empty or noop segment, so do nothing.  (For URIs, '%2E'
493              is equivalent to '.').  */
494         }
495 #ifdef SVN_USE_DOS_PATHS
496       /* If this is the first path segment of a file:// URI and it contains a
497          windows drive letter, convert the drive letter to upper case. */
498       else if (url && canon_segments == 1 && seglen == 2 &&
499                (strncmp(canon, "file:", 5) == 0) &&
500                src[0] >= 'a' && src[0] <= 'z' && src[1] == ':')
501         {
502           *(dst++) = canonicalize_to_upper(src[0]);
503           *(dst++) = ':';
504           if (*next)
505             *(dst++) = *next;
506           canon_segments++;
507         }
508 #endif /* SVN_USE_DOS_PATHS */
509       else
510         {
511           /* An actual segment, append it to the destination path */
512           memcpy(dst, src, seglen);
513           dst += seglen;
514           if (slash_len)
515             *(dst++) = '/';
516           canon_segments++;
517         }
518 
519       /* Skip over trailing slash to the next segment. */
520       src = next + slash_len;
521     }
522 
523   /* Remove the trailing slash if there was at least one
524    * canonical segment and the last segment ends with a slash.
525    *
526    * But keep in mind that, for URLs, the scheme counts as a
527    * canonical segment -- so if path is ONLY a scheme (such
528    * as "https://") we should NOT remove the trailing slash. */
529   if ((canon_segments > 0 && *(dst - 1) == '/')
530       && ! (url && path[schemelen] == '\0'))
531     {
532       dst --;
533     }
534 
535   *dst = '\0';
536 
537 #ifdef SVN_USE_DOS_PATHS
538   /* Skip leading double slashes when there are less than 2
539    * canon segments. UNC paths *MUST* have two segments. */
540   if ((type == type_dirent) && canon[0] == '/' && canon[1] == '/')
541     {
542       if (canon_segments < 2)
543         return canon + 1;
544       else
545         {
546           /* Now we're sure this is a valid UNC path, convert the server name
547              (the first path segment) to lowercase as Windows treats it as case
548              insensitive.
549              Note: normally the share name is treated as case insensitive too,
550              but it seems to be possible to configure Samba to treat those as
551              case sensitive, so better leave that alone. */
552           for (dst = canon + 2; *dst && *dst != '/'; dst++)
553             *dst = canonicalize_to_lower(*dst);
554         }
555     }
556 #endif /* SVN_USE_DOS_PATHS */
557 
558   /* Check the normalization of characters in a uri */
559   if (schema_data)
560     {
561       int need_extra = 0;
562       src = schema_data;
563 
564       while (*src)
565         {
566           switch (*src)
567             {
568               case '/':
569                 break;
570               case '%':
571                 if (!svn_ctype_isxdigit(*(src+1)) ||
572                     !svn_ctype_isxdigit(*(src+2)))
573                   need_extra += 2;
574                 else
575                   src += 2;
576                 break;
577               default:
578                 if (!svn_uri__char_validity[(unsigned char)*src])
579                   need_extra += 2;
580                 break;
581             }
582           src++;
583         }
584 
585       if (need_extra > 0)
586         {
587           apr_size_t pre_schema_size = (apr_size_t)(schema_data - canon);
588 
589           dst = apr_palloc(pool, (apr_size_t)(src - canon) + need_extra + 1);
590           memcpy(dst, canon, pre_schema_size);
591           canon = dst;
592 
593           dst += pre_schema_size;
594         }
595       else
596         dst = schema_data;
597 
598       src = schema_data;
599 
600       while (*src)
601         {
602           switch (*src)
603             {
604               case '/':
605                 *(dst++) = '/';
606                 break;
607               case '%':
608                 if (!svn_ctype_isxdigit(*(src+1)) ||
609                     !svn_ctype_isxdigit(*(src+2)))
610                   {
611                     *(dst++) = '%';
612                     *(dst++) = '2';
613                     *(dst++) = '5';
614                   }
615                 else
616                   {
617                     char digitz[3];
618                     int val;
619 
620                     digitz[0] = *(++src);
621                     digitz[1] = *(++src);
622                     digitz[2] = 0;
623 
624                     val = (int)strtol(digitz, NULL, 16);
625 
626                     if (svn_uri__char_validity[(unsigned char)val])
627                       *(dst++) = (char)val;
628                     else
629                       {
630                         *(dst++) = '%';
631                         *(dst++) = canonicalize_to_upper(digitz[0]);
632                         *(dst++) = canonicalize_to_upper(digitz[1]);
633                       }
634                   }
635                 break;
636               default:
637                 if (!svn_uri__char_validity[(unsigned char)*src])
638                   {
639                     apr_snprintf(dst, 4, "%%%02X", (unsigned char)*src);
640                     dst += 3;
641                   }
642                 else
643                   *(dst++) = *src;
644                 break;
645             }
646           src++;
647         }
648       *dst = '\0';
649     }
650 
651   return canon;
652 }
653 
654 /* Return the string length of the longest common ancestor of PATH1 and PATH2.
655  * Pass type_uri for TYPE if PATH1 and PATH2 are URIs, and type_dirent if
656  * PATH1 and PATH2 are regular paths.
657  *
658  * If the two paths do not share a common ancestor, return 0.
659  *
660  * New strings are allocated in POOL.
661  */
662 static apr_size_t
get_longest_ancestor_length(path_type_t types,const char * path1,const char * path2,apr_pool_t * pool)663 get_longest_ancestor_length(path_type_t types,
664                             const char *path1,
665                             const char *path2,
666                             apr_pool_t *pool)
667 {
668   apr_size_t path1_len, path2_len;
669   apr_size_t i = 0;
670   apr_size_t last_dirsep = 0;
671 #ifdef SVN_USE_DOS_PATHS
672   svn_boolean_t unc = FALSE;
673 #endif
674 
675   path1_len = strlen(path1);
676   path2_len = strlen(path2);
677 
678   if (SVN_PATH_IS_EMPTY(path1) || SVN_PATH_IS_EMPTY(path2))
679     return 0;
680 
681   while (path1[i] == path2[i])
682     {
683       /* Keep track of the last directory separator we hit. */
684       if (path1[i] == '/')
685         last_dirsep = i;
686 
687       i++;
688 
689       /* If we get to the end of either path, break out. */
690       if ((i == path1_len) || (i == path2_len))
691         break;
692     }
693 
694   /* two special cases:
695      1. '/' is the longest common ancestor of '/' and '/foo' */
696   if (i == 1 && path1[0] == '/' && path2[0] == '/')
697     return 1;
698   /* 2. '' is the longest common ancestor of any non-matching
699    * strings 'foo' and 'bar' */
700   if (types == type_dirent && i == 0)
701     return 0;
702 
703   /* Handle some windows specific cases */
704 #ifdef SVN_USE_DOS_PATHS
705   if (types == type_dirent)
706     {
707       /* don't count the '//' from UNC paths */
708       if (last_dirsep == 1 && path1[0] == '/' && path1[1] == '/')
709         {
710           last_dirsep = 0;
711           unc = TRUE;
712         }
713 
714       /* X:/ and X:/foo */
715       if (i == 3 && path1[2] == '/' && path1[1] == ':')
716         return i;
717 
718       /* Cannot use SVN_ERR_ASSERT here, so we'll have to crash, sorry.
719        * Note that this assertion triggers only if the code above has
720        * been broken. The code below relies on this assertion, because
721        * it uses [i - 1] as index. */
722       assert(i > 0);
723 
724       /* X: and X:/ */
725       if ((path1[i - 1] == ':' && path2[i] == '/') ||
726           (path2[i - 1] == ':' && path1[i] == '/'))
727           return 0;
728       /* X: and X:foo */
729       if (path1[i - 1] == ':' || path2[i - 1] == ':')
730           return i;
731     }
732 #endif /* SVN_USE_DOS_PATHS */
733 
734   /* last_dirsep is now the offset of the last directory separator we
735      crossed before reaching a non-matching byte.  i is the offset of
736      that non-matching byte, and is guaranteed to be <= the length of
737      whichever path is shorter.
738      If one of the paths is the common part return that. */
739   if (((i == path1_len) && (path2[i] == '/'))
740            || ((i == path2_len) && (path1[i] == '/'))
741            || ((i == path1_len) && (i == path2_len)))
742     return i;
743   else
744     {
745       /* Nothing in common but the root folder '/' or 'X:/' for Windows
746          dirents. */
747 #ifdef SVN_USE_DOS_PATHS
748       if (! unc)
749         {
750           /* X:/foo and X:/bar returns X:/ */
751           if ((types == type_dirent) &&
752               last_dirsep == 2 && path1[1] == ':' && path1[2] == '/'
753                                && path2[1] == ':' && path2[2] == '/')
754             return 3;
755 #endif /* SVN_USE_DOS_PATHS */
756           if (last_dirsep == 0 && path1[0] == '/' && path2[0] == '/')
757             return 1;
758 #ifdef SVN_USE_DOS_PATHS
759         }
760 #endif
761     }
762 
763   return last_dirsep;
764 }
765 
766 /* Determine whether PATH2 is a child of PATH1.
767  *
768  * PATH2 is a child of PATH1 if
769  * 1) PATH1 is empty, and PATH2 is not empty and not an absolute path.
770  * or
771  * 2) PATH2 is has n components, PATH1 has x < n components,
772  *    and PATH1 matches PATH2 in all its x components.
773  *    Components are separated by a slash, '/'.
774  *
775  * Pass type_uri for TYPE if PATH1 and PATH2 are URIs, and type_dirent if
776  * PATH1 and PATH2 are regular paths.
777  *
778  * If PATH2 is not a child of PATH1, return NULL.
779  *
780  * If PATH2 is a child of PATH1, and POOL is not NULL, allocate a copy
781  * of the child part of PATH2 in POOL and return a pointer to the
782  * newly allocated child part.
783  *
784  * If PATH2 is a child of PATH1, and POOL is NULL, return a pointer
785  * pointing to the child part of PATH2.
786  * */
787 static const char *
is_child(path_type_t type,const char * path1,const char * path2,apr_pool_t * pool)788 is_child(path_type_t type, const char *path1, const char *path2,
789          apr_pool_t *pool)
790 {
791   apr_size_t i;
792 
793   /* Allow "" and "foo" or "H:foo" to be parent/child */
794   if (SVN_PATH_IS_EMPTY(path1))               /* "" is the parent  */
795     {
796       if (SVN_PATH_IS_EMPTY(path2))            /* "" not a child    */
797         return NULL;
798 
799       /* check if this is an absolute path */
800       if ((type == type_uri) ||
801           (type == type_dirent && dirent_is_rooted(path2)))
802         return NULL;
803       else
804         /* everything else is child */
805         return pool ? apr_pstrdup(pool, path2) : path2;
806     }
807 
808   /* Reach the end of at least one of the paths.  How should we handle
809      things like path1:"foo///bar" and path2:"foo/bar/baz"?  It doesn't
810      appear to arise in the current Subversion code, it's not clear to me
811      if they should be parent/child or not. */
812   /* Hmmm... aren't paths assumed to be canonical in this function?
813    * How can "foo///bar" even happen if the paths are canonical? */
814   for (i = 0; path1[i] && path2[i]; i++)
815     if (path1[i] != path2[i])
816       return NULL;
817 
818   /* FIXME: This comment does not really match
819    * the checks made in the code it refers to: */
820   /* There are two cases that are parent/child
821           ...      path1[i] == '\0'
822           .../foo  path2[i] == '/'
823       or
824           /        path1[i] == '\0'
825           /foo     path2[i] != '/'
826 
827      Other root paths (like X:/) fall under the former case:
828           X:/        path1[i] == '\0'
829           X:/foo     path2[i] != '/'
830 
831      Check for '//' to avoid matching '/' and '//srv'.
832   */
833   if (path1[i] == '\0' && path2[i])
834     {
835       if (path1[i - 1] == '/'
836 #ifdef SVN_USE_DOS_PATHS
837           || ((type == type_dirent) && path1[i - 1] == ':')
838 #endif
839            )
840         {
841           if (path2[i] == '/')
842             /* .../
843              * ..../
844              *     i   */
845             return NULL;
846           else
847             /* .../
848              * .../foo
849              *     i    */
850             return pool ? apr_pstrdup(pool, path2 + i) : path2 + i;
851         }
852       else if (path2[i] == '/')
853         {
854           if (path2[i + 1])
855             /* ...
856              * .../foo
857              *    i    */
858             return pool ? apr_pstrdup(pool, path2 + i + 1) : path2 + i + 1;
859           else
860             /* ...
861              * .../
862              *    i    */
863             return NULL;
864         }
865     }
866 
867   /* Otherwise, path2 isn't a child. */
868   return NULL;
869 }
870 
871 
872 /**** Public API functions ****/
873 
874 const char *
svn_dirent_internal_style(const char * dirent,apr_pool_t * pool)875 svn_dirent_internal_style(const char *dirent, apr_pool_t *pool)
876 {
877   return svn_dirent_canonicalize(internal_style(dirent, pool), pool);
878 }
879 
880 const char *
svn_dirent_local_style(const char * dirent,apr_pool_t * pool)881 svn_dirent_local_style(const char *dirent, apr_pool_t *pool)
882 {
883   /* Internally, Subversion represents the current directory with the
884      empty string.  But users like to see "." . */
885   if (SVN_PATH_IS_EMPTY(dirent))
886     return ".";
887 
888 #if '/' != SVN_PATH_LOCAL_SEPARATOR
889     {
890       char *p = apr_pstrdup(pool, dirent);
891       dirent = p;
892 
893       /* Convert all canonical separators to the local-style ones. */
894       for (; *p != '\0'; ++p)
895         if (*p == '/')
896           *p = SVN_PATH_LOCAL_SEPARATOR;
897     }
898 #endif
899 
900   return dirent;
901 }
902 
903 const char *
svn_relpath__internal_style(const char * relpath,apr_pool_t * pool)904 svn_relpath__internal_style(const char *relpath,
905                             apr_pool_t *pool)
906 {
907   return svn_relpath_canonicalize(internal_style(relpath, pool), pool);
908 }
909 
910 
911 /* We decided against using apr_filepath_root here because of the negative
912    performance impact (creating a pool and converting strings ). */
913 svn_boolean_t
svn_dirent_is_root(const char * dirent,apr_size_t len)914 svn_dirent_is_root(const char *dirent, apr_size_t len)
915 {
916 #ifdef SVN_USE_DOS_PATHS
917   /* On Windows and Cygwin, 'H:' or 'H:/' (where 'H' is any letter)
918      are also root directories */
919   if ((len == 2 || ((len == 3) && (dirent[2] == '/'))) &&
920       (dirent[1] == ':') &&
921       ((dirent[0] >= 'A' && dirent[0] <= 'Z') ||
922        (dirent[0] >= 'a' && dirent[0] <= 'z')))
923     return TRUE;
924 
925   /* On Windows and Cygwin //server/share is a root directory,
926      and on Cygwin //drive is a drive alias */
927   if (len >= 2 && dirent[0] == '/' && dirent[1] == '/'
928       && dirent[len - 1] != '/')
929     {
930       int segments = 0;
931       apr_size_t i;
932       for (i = len; i >= 2; i--)
933         {
934           if (dirent[i] == '/')
935             {
936               segments ++;
937               if (segments > 1)
938                 return FALSE;
939             }
940         }
941 #ifdef __CYGWIN__
942       return (segments <= 1);
943 #else
944       return (segments == 1); /* //drive is invalid on plain Windows */
945 #endif
946     }
947 #endif
948 
949   /* directory is root if it's equal to '/' */
950   if (len == 1 && dirent[0] == '/')
951     return TRUE;
952 
953   return FALSE;
954 }
955 
956 svn_boolean_t
svn_uri_is_root(const char * uri,apr_size_t len)957 svn_uri_is_root(const char *uri, apr_size_t len)
958 {
959   assert(svn_uri_is_canonical(uri, NULL));
960   return (len == uri_schema_root_length(uri, len));
961 }
962 
svn_dirent_join(const char * base,const char * component,apr_pool_t * pool)963 char *svn_dirent_join(const char *base,
964                       const char *component,
965                       apr_pool_t *pool)
966 {
967   apr_size_t blen = strlen(base);
968   apr_size_t clen = strlen(component);
969   char *dirent;
970   int add_separator;
971 
972   assert(svn_dirent_is_canonical(base, pool));
973   assert(svn_dirent_is_canonical(component, pool));
974 
975   /* If the component is absolute, then return it.  */
976   if (svn_dirent_is_absolute(component))
977     return apr_pmemdup(pool, component, clen + 1);
978 
979   /* If either is empty return the other */
980   if (SVN_PATH_IS_EMPTY(base))
981     return apr_pmemdup(pool, component, clen + 1);
982   if (SVN_PATH_IS_EMPTY(component))
983     return apr_pmemdup(pool, base, blen + 1);
984 
985 #ifdef SVN_USE_DOS_PATHS
986   if (component[0] == '/')
987     {
988       /* '/' is drive relative on Windows, not absolute like on Posix */
989       if (dirent_is_rooted(base))
990         {
991           /* Join component without '/' to root-of(base) */
992           blen = dirent_root_length(base, blen);
993           component++;
994           clen--;
995 
996           if (blen == 2 && base[1] == ':') /* "C:" case */
997             {
998               char *root = apr_pmemdup(pool, base, 3);
999               root[2] = '/'; /* We don't need the final '\0' */
1000 
1001               base = root;
1002               blen = 3;
1003             }
1004 
1005           if (clen == 0)
1006             return apr_pstrndup(pool, base, blen);
1007         }
1008       else
1009         return apr_pmemdup(pool, component, clen + 1);
1010     }
1011   else if (dirent_is_rooted(component))
1012     return apr_pmemdup(pool, component, clen + 1);
1013 #endif /* SVN_USE_DOS_PATHS */
1014 
1015   /* if last character of base is already a separator, don't add a '/' */
1016   add_separator = 1;
1017   if (base[blen - 1] == '/'
1018 #ifdef SVN_USE_DOS_PATHS
1019        || base[blen - 1] == ':'
1020 #endif
1021         )
1022           add_separator = 0;
1023 
1024   /* Construct the new, combined dirent. */
1025   dirent = apr_palloc(pool, blen + add_separator + clen + 1);
1026   memcpy(dirent, base, blen);
1027   if (add_separator)
1028     dirent[blen] = '/';
1029   memcpy(dirent + blen + add_separator, component, clen + 1);
1030 
1031   return dirent;
1032 }
1033 
svn_dirent_join_many(apr_pool_t * pool,const char * base,...)1034 char *svn_dirent_join_many(apr_pool_t *pool, const char *base, ...)
1035 {
1036 #define MAX_SAVED_LENGTHS 10
1037   apr_size_t saved_lengths[MAX_SAVED_LENGTHS];
1038   apr_size_t total_len;
1039   int nargs;
1040   va_list va;
1041   const char *s;
1042   apr_size_t len;
1043   char *dirent;
1044   char *p;
1045   int add_separator;
1046   int base_arg = 0;
1047 
1048   total_len = strlen(base);
1049 
1050   assert(svn_dirent_is_canonical(base, pool));
1051 
1052   /* if last character of base is already a separator, don't add a '/' */
1053   add_separator = 1;
1054   if (total_len == 0
1055        || base[total_len - 1] == '/'
1056 #ifdef SVN_USE_DOS_PATHS
1057        || base[total_len - 1] == ':'
1058 #endif
1059         )
1060           add_separator = 0;
1061 
1062   saved_lengths[0] = total_len;
1063 
1064   /* Compute the length of the resulting string. */
1065 
1066   nargs = 0;
1067   va_start(va, base);
1068   while ((s = va_arg(va, const char *)) != NULL)
1069     {
1070       len = strlen(s);
1071 
1072       assert(svn_dirent_is_canonical(s, pool));
1073 
1074       if (SVN_PATH_IS_EMPTY(s))
1075         continue;
1076 
1077       if (nargs++ < MAX_SAVED_LENGTHS)
1078         saved_lengths[nargs] = len;
1079 
1080       if (dirent_is_rooted(s))
1081         {
1082           total_len = len;
1083           base_arg = nargs;
1084 
1085 #ifdef SVN_USE_DOS_PATHS
1086           if (!svn_dirent_is_absolute(s)) /* Handle non absolute roots */
1087             {
1088               /* Set new base and skip the current argument */
1089               base = s = svn_dirent_join(base, s, pool);
1090               base_arg++;
1091               saved_lengths[0] = total_len = len = strlen(s);
1092             }
1093           else
1094 #endif /* SVN_USE_DOS_PATHS */
1095             {
1096               base = ""; /* Don't add base */
1097               saved_lengths[0] = 0;
1098             }
1099 
1100           add_separator = 1;
1101           if (s[len - 1] == '/'
1102 #ifdef SVN_USE_DOS_PATHS
1103              || s[len - 1] == ':'
1104 #endif
1105               )
1106              add_separator = 0;
1107         }
1108       else if (nargs <= base_arg + 1)
1109         {
1110           total_len += add_separator + len;
1111         }
1112       else
1113         {
1114           total_len += 1 + len;
1115         }
1116     }
1117   va_end(va);
1118 
1119   /* base == "/" and no further components. just return that. */
1120   if (add_separator == 0 && total_len == 1)
1121     return apr_pmemdup(pool, "/", 2);
1122 
1123   /* we got the total size. allocate it, with room for a NULL character. */
1124   dirent = p = apr_palloc(pool, total_len + 1);
1125 
1126   /* if we aren't supposed to skip forward to an absolute component, and if
1127      this is not an empty base that we are skipping, then copy the base
1128      into the output. */
1129   if (! SVN_PATH_IS_EMPTY(base))
1130     {
1131       memcpy(p, base, len = saved_lengths[0]);
1132       p += len;
1133     }
1134 
1135   nargs = 0;
1136   va_start(va, base);
1137   while ((s = va_arg(va, const char *)) != NULL)
1138     {
1139       if (SVN_PATH_IS_EMPTY(s))
1140         continue;
1141 
1142       if (++nargs < base_arg)
1143         continue;
1144 
1145       if (nargs < MAX_SAVED_LENGTHS)
1146         len = saved_lengths[nargs];
1147       else
1148         len = strlen(s);
1149 
1150       /* insert a separator if we aren't copying in the first component
1151          (which can happen when base_arg is set). also, don't put in a slash
1152          if the prior character is a slash (occurs when prior component
1153          is "/"). */
1154       if (p != dirent &&
1155           ( ! (nargs - 1 <= base_arg) || add_separator))
1156         *p++ = '/';
1157 
1158       /* copy the new component and advance the pointer */
1159       memcpy(p, s, len);
1160       p += len;
1161     }
1162   va_end(va);
1163 
1164   *p = '\0';
1165   assert((apr_size_t)(p - dirent) == total_len);
1166 
1167   return dirent;
1168 }
1169 
1170 char *
svn_relpath_join(const char * base,const char * component,apr_pool_t * pool)1171 svn_relpath_join(const char *base,
1172                  const char *component,
1173                  apr_pool_t *pool)
1174 {
1175   apr_size_t blen = strlen(base);
1176   apr_size_t clen = strlen(component);
1177   char *path;
1178 
1179   assert(relpath_is_canonical(base));
1180   assert(relpath_is_canonical(component));
1181 
1182   /* If either is empty return the other */
1183   if (blen == 0)
1184     return apr_pmemdup(pool, component, clen + 1);
1185   if (clen == 0)
1186     return apr_pmemdup(pool, base, blen + 1);
1187 
1188   path = apr_palloc(pool, blen + 1 + clen + 1);
1189   memcpy(path, base, blen);
1190   path[blen] = '/';
1191   memcpy(path + blen + 1, component, clen + 1);
1192 
1193   return path;
1194 }
1195 
1196 char *
svn_dirent_dirname(const char * dirent,apr_pool_t * pool)1197 svn_dirent_dirname(const char *dirent, apr_pool_t *pool)
1198 {
1199   apr_size_t len = strlen(dirent);
1200 
1201   assert(svn_dirent_is_canonical(dirent, pool));
1202 
1203   if (len == dirent_root_length(dirent, len))
1204     return apr_pstrmemdup(pool, dirent, len);
1205   else
1206     return apr_pstrmemdup(pool, dirent, dirent_previous_segment(dirent, len));
1207 }
1208 
1209 const char *
svn_dirent_basename(const char * dirent,apr_pool_t * pool)1210 svn_dirent_basename(const char *dirent, apr_pool_t *pool)
1211 {
1212   apr_size_t len = strlen(dirent);
1213   apr_size_t start;
1214 
1215   assert(!pool || svn_dirent_is_canonical(dirent, pool));
1216 
1217   if (svn_dirent_is_root(dirent, len))
1218     return "";
1219   else
1220     {
1221       start = len;
1222       while (start > 0 && dirent[start - 1] != '/'
1223 #ifdef SVN_USE_DOS_PATHS
1224              && dirent[start - 1] != ':'
1225 #endif
1226             )
1227         --start;
1228     }
1229 
1230   if (pool)
1231     return apr_pstrmemdup(pool, dirent + start, len - start);
1232   else
1233     return dirent + start;
1234 }
1235 
1236 void
svn_dirent_split(const char ** dirpath,const char ** base_name,const char * dirent,apr_pool_t * pool)1237 svn_dirent_split(const char **dirpath,
1238                  const char **base_name,
1239                  const char *dirent,
1240                  apr_pool_t *pool)
1241 {
1242   assert(dirpath != base_name);
1243 
1244   if (dirpath)
1245     *dirpath = svn_dirent_dirname(dirent, pool);
1246 
1247   if (base_name)
1248     *base_name = svn_dirent_basename(dirent, pool);
1249 }
1250 
1251 char *
svn_relpath_dirname(const char * relpath,apr_pool_t * pool)1252 svn_relpath_dirname(const char *relpath,
1253                     apr_pool_t *pool)
1254 {
1255   apr_size_t len = strlen(relpath);
1256 
1257   assert(relpath_is_canonical(relpath));
1258 
1259   return apr_pstrmemdup(pool, relpath,
1260                         relpath_previous_segment(relpath, len));
1261 }
1262 
1263 const char *
svn_relpath_basename(const char * relpath,apr_pool_t * pool)1264 svn_relpath_basename(const char *relpath,
1265                      apr_pool_t *pool)
1266 {
1267   apr_size_t len = strlen(relpath);
1268   apr_size_t start;
1269 
1270   assert(relpath_is_canonical(relpath));
1271 
1272   start = len;
1273   while (start > 0 && relpath[start - 1] != '/')
1274     --start;
1275 
1276   if (pool)
1277     return apr_pstrmemdup(pool, relpath + start, len - start);
1278   else
1279     return relpath + start;
1280 }
1281 
1282 void
svn_relpath_split(const char ** dirpath,const char ** base_name,const char * relpath,apr_pool_t * pool)1283 svn_relpath_split(const char **dirpath,
1284                   const char **base_name,
1285                   const char *relpath,
1286                   apr_pool_t *pool)
1287 {
1288   assert(dirpath != base_name);
1289 
1290   if (dirpath)
1291     *dirpath = svn_relpath_dirname(relpath, pool);
1292 
1293   if (base_name)
1294     *base_name = svn_relpath_basename(relpath, pool);
1295 }
1296 
1297 char *
svn_uri_dirname(const char * uri,apr_pool_t * pool)1298 svn_uri_dirname(const char *uri, apr_pool_t *pool)
1299 {
1300   apr_size_t len = strlen(uri);
1301 
1302   assert(svn_uri_is_canonical(uri, pool));
1303 
1304   if (svn_uri_is_root(uri, len))
1305     return apr_pstrmemdup(pool, uri, len);
1306   else
1307     return apr_pstrmemdup(pool, uri, uri_previous_segment(uri, len));
1308 }
1309 
1310 const char *
svn_uri_basename(const char * uri,apr_pool_t * pool)1311 svn_uri_basename(const char *uri, apr_pool_t *pool)
1312 {
1313   apr_size_t len = strlen(uri);
1314   apr_size_t start;
1315 
1316   assert(svn_uri_is_canonical(uri, NULL));
1317 
1318   if (svn_uri_is_root(uri, len))
1319     return "";
1320 
1321   start = len;
1322   while (start > 0 && uri[start - 1] != '/')
1323     --start;
1324 
1325   return svn_path_uri_decode(uri + start, pool);
1326 }
1327 
1328 void
svn_uri_split(const char ** dirpath,const char ** base_name,const char * uri,apr_pool_t * pool)1329 svn_uri_split(const char **dirpath,
1330               const char **base_name,
1331               const char *uri,
1332               apr_pool_t *pool)
1333 {
1334   assert(dirpath != base_name);
1335 
1336   if (dirpath)
1337     *dirpath = svn_uri_dirname(uri, pool);
1338 
1339   if (base_name)
1340     *base_name = svn_uri_basename(uri, pool);
1341 }
1342 
1343 char *
svn_dirent_get_longest_ancestor(const char * dirent1,const char * dirent2,apr_pool_t * pool)1344 svn_dirent_get_longest_ancestor(const char *dirent1,
1345                                 const char *dirent2,
1346                                 apr_pool_t *pool)
1347 {
1348   return apr_pstrndup(pool, dirent1,
1349                       get_longest_ancestor_length(type_dirent, dirent1,
1350                                                   dirent2, pool));
1351 }
1352 
1353 char *
svn_relpath_get_longest_ancestor(const char * relpath1,const char * relpath2,apr_pool_t * pool)1354 svn_relpath_get_longest_ancestor(const char *relpath1,
1355                                  const char *relpath2,
1356                                  apr_pool_t *pool)
1357 {
1358   assert(relpath_is_canonical(relpath1));
1359   assert(relpath_is_canonical(relpath2));
1360 
1361   return apr_pstrndup(pool, relpath1,
1362                       get_longest_ancestor_length(type_relpath, relpath1,
1363                                                   relpath2, pool));
1364 }
1365 
1366 char *
svn_uri_get_longest_ancestor(const char * uri1,const char * uri2,apr_pool_t * pool)1367 svn_uri_get_longest_ancestor(const char *uri1,
1368                              const char *uri2,
1369                              apr_pool_t *pool)
1370 {
1371   apr_size_t uri_ancestor_len;
1372   apr_size_t i = 0;
1373 
1374   assert(svn_uri_is_canonical(uri1, NULL));
1375   assert(svn_uri_is_canonical(uri2, NULL));
1376 
1377   /* Find ':' */
1378   while (1)
1379     {
1380       /* No shared protocol => no common prefix */
1381       if (uri1[i] != uri2[i])
1382         return apr_pmemdup(pool, SVN_EMPTY_PATH,
1383                            sizeof(SVN_EMPTY_PATH));
1384 
1385       if (uri1[i] == ':')
1386         break;
1387 
1388       /* They're both URLs, so EOS can't come before ':' */
1389       assert((uri1[i] != '\0') && (uri2[i] != '\0'));
1390 
1391       i++;
1392     }
1393 
1394   i += 3;  /* Advance past '://' */
1395 
1396   uri_ancestor_len = get_longest_ancestor_length(type_uri, uri1 + i,
1397                                                  uri2 + i, pool);
1398 
1399   if (uri_ancestor_len == 0 ||
1400       (uri_ancestor_len == 1 && (uri1 + i)[0] == '/'))
1401     return apr_pmemdup(pool, SVN_EMPTY_PATH, sizeof(SVN_EMPTY_PATH));
1402   else
1403     return apr_pstrndup(pool, uri1, uri_ancestor_len + i);
1404 }
1405 
1406 const char *
svn_dirent_is_child(const char * parent_dirent,const char * child_dirent,apr_pool_t * pool)1407 svn_dirent_is_child(const char *parent_dirent,
1408                     const char *child_dirent,
1409                     apr_pool_t *pool)
1410 {
1411   return is_child(type_dirent, parent_dirent, child_dirent, pool);
1412 }
1413 
1414 const char *
svn_dirent_skip_ancestor(const char * parent_dirent,const char * child_dirent)1415 svn_dirent_skip_ancestor(const char *parent_dirent,
1416                          const char *child_dirent)
1417 {
1418   apr_size_t len = strlen(parent_dirent);
1419   apr_size_t root_len;
1420 
1421   if (0 != strncmp(parent_dirent, child_dirent, len))
1422     return NULL; /* parent_dirent is no ancestor of child_dirent */
1423 
1424   if (child_dirent[len] == 0)
1425     return ""; /* parent_dirent == child_dirent */
1426 
1427   /* Child == parent + more-characters */
1428 
1429   root_len = dirent_root_length(child_dirent, strlen(child_dirent));
1430   if (root_len > len)
1431     /* Different root, e.g. ("" "/...") or ("//z" "//z/share") */
1432     return NULL;
1433 
1434   /* Now, child == [root-of-parent] + [rest-of-parent] + more-characters.
1435    * It must be one of the following forms.
1436    *
1437    * rlen parent    child       bad?  rlen=len? c[len]=/?
1438    *  0   ""        "foo"               *
1439    *  0   "b"       "bad"         !
1440    *  0   "b"       "b/foo"                       *
1441    *  1   "/"       "/foo"              *
1442    *  1   "/b"      "/bad"        !
1443    *  1   "/b"      "/b/foo"                      *
1444    *  2   "a:"      "a:foo"             *
1445    *  2   "a:b"     "a:bad"       !
1446    *  2   "a:b"     "a:b/foo"                     *
1447    *  3   "a:/"     "a:/foo"            *
1448    *  3   "a:/b"    "a:/bad"      !
1449    *  3   "a:/b"    "a:/b/foo"                    *
1450    *  5   "//s/s"   "//s/s/foo"         *         *
1451    *  5   "//s/s/b" "//s/s/bad"   !
1452    *  5   "//s/s/b" "//s/s/b/foo"                 *
1453    */
1454 
1455   if (child_dirent[len] == '/')
1456     /* "parent|child" is one of:
1457      * "[a:]b|/foo" "[a:]/b|/foo" "//s/s|/foo" "//s/s/b|/foo" */
1458     return child_dirent + len + 1;
1459 
1460   if (root_len == len)
1461     /* "parent|child" is "|foo" "/|foo" "a:|foo" "a:/|foo" "//s/s|/foo" */
1462     return child_dirent + len;
1463 
1464   return NULL;
1465 }
1466 
1467 const char *
svn_relpath_skip_ancestor(const char * parent_relpath,const char * child_relpath)1468 svn_relpath_skip_ancestor(const char *parent_relpath,
1469                           const char *child_relpath)
1470 {
1471   apr_size_t len = strlen(parent_relpath);
1472 
1473   assert(relpath_is_canonical(parent_relpath));
1474   assert(relpath_is_canonical(child_relpath));
1475 
1476   if (len == 0)
1477     return child_relpath;
1478 
1479   if (0 != strncmp(parent_relpath, child_relpath, len))
1480     return NULL; /* parent_relpath is no ancestor of child_relpath */
1481 
1482   if (child_relpath[len] == 0)
1483     return ""; /* parent_relpath == child_relpath */
1484 
1485   if (child_relpath[len] == '/')
1486     return child_relpath + len + 1;
1487 
1488   return NULL;
1489 }
1490 
1491 
1492 /* */
1493 static const char *
uri_skip_ancestor(const char * parent_uri,const char * child_uri)1494 uri_skip_ancestor(const char *parent_uri,
1495                   const char *child_uri)
1496 {
1497   apr_size_t len = strlen(parent_uri);
1498 
1499   assert(svn_uri_is_canonical(parent_uri, NULL));
1500   assert(svn_uri_is_canonical(child_uri, NULL));
1501 
1502   if (0 != strncmp(parent_uri, child_uri, len))
1503     return NULL; /* parent_uri is no ancestor of child_uri */
1504 
1505   if (child_uri[len] == 0)
1506     return ""; /* parent_uri == child_uri */
1507 
1508   if (child_uri[len] == '/')
1509     return child_uri + len + 1;
1510 
1511   return NULL;
1512 }
1513 
1514 const char *
svn_uri_skip_ancestor(const char * parent_uri,const char * child_uri,apr_pool_t * result_pool)1515 svn_uri_skip_ancestor(const char *parent_uri,
1516                       const char *child_uri,
1517                       apr_pool_t *result_pool)
1518 {
1519   const char *result = uri_skip_ancestor(parent_uri, child_uri);
1520 
1521   return result ? svn_path_uri_decode(result, result_pool) : NULL;
1522 }
1523 
1524 svn_boolean_t
svn_dirent_is_ancestor(const char * parent_dirent,const char * child_dirent)1525 svn_dirent_is_ancestor(const char *parent_dirent, const char *child_dirent)
1526 {
1527   return svn_dirent_skip_ancestor(parent_dirent, child_dirent) != NULL;
1528 }
1529 
1530 svn_boolean_t
svn_uri__is_ancestor(const char * parent_uri,const char * child_uri)1531 svn_uri__is_ancestor(const char *parent_uri, const char *child_uri)
1532 {
1533   return uri_skip_ancestor(parent_uri, child_uri) != NULL;
1534 }
1535 
1536 
1537 svn_boolean_t
svn_dirent_is_absolute(const char * dirent)1538 svn_dirent_is_absolute(const char *dirent)
1539 {
1540   if (! dirent)
1541     return FALSE;
1542 
1543   /* dirent is absolute if it starts with '/' on non-Windows platforms
1544      or with '//' on Windows platforms */
1545   if (dirent[0] == '/'
1546 #ifdef SVN_USE_DOS_PATHS
1547       && dirent[1] == '/' /* Single '/' depends on current drive */
1548 #endif
1549       )
1550     return TRUE;
1551 
1552   /* On Windows, dirent is also absolute when it starts with 'H:/'
1553      where 'H' is any letter. */
1554 #ifdef SVN_USE_DOS_PATHS
1555   if (((dirent[0] >= 'A' && dirent[0] <= 'Z')) &&
1556       (dirent[1] == ':') && (dirent[2] == '/'))
1557      return TRUE;
1558 #endif /* SVN_USE_DOS_PATHS */
1559 
1560   return FALSE;
1561 }
1562 
1563 svn_error_t *
svn_dirent_get_absolute(const char ** pabsolute,const char * relative,apr_pool_t * pool)1564 svn_dirent_get_absolute(const char **pabsolute,
1565                         const char *relative,
1566                         apr_pool_t *pool)
1567 {
1568   char *buffer;
1569   apr_status_t apr_err;
1570   const char *path_apr;
1571 
1572   SVN_ERR_ASSERT(! svn_path_is_url(relative));
1573 
1574   /* Merge the current working directory with the relative dirent. */
1575   SVN_ERR(svn_path_cstring_from_utf8(&path_apr, relative, pool));
1576 
1577   apr_err = apr_filepath_merge(&buffer, NULL,
1578                                path_apr,
1579                                APR_FILEPATH_NOTRELATIVE,
1580                                pool);
1581   if (apr_err)
1582     {
1583       /* In some cases when the passed path or its ancestor(s) do not exist
1584          or no longer exist apr returns an error.
1585 
1586          In many of these cases we would like to return a path anyway, when the
1587          passed path was already a safe absolute path. So check for that now to
1588          avoid an error.
1589 
1590          svn_dirent_is_absolute() doesn't perform the necessary checks to see
1591          if the path doesn't need post processing to be in the canonical absolute
1592          format.
1593          */
1594 
1595       if (svn_dirent_is_absolute(relative)
1596           && svn_dirent_is_canonical(relative, pool)
1597           && !svn_path_is_backpath_present(relative))
1598         {
1599           *pabsolute = apr_pstrdup(pool, relative);
1600           return SVN_NO_ERROR;
1601         }
1602 
1603       return svn_error_createf(SVN_ERR_BAD_FILENAME,
1604                                svn_error_create(apr_err, NULL, NULL),
1605                                _("Couldn't determine absolute path of '%s'"),
1606                                svn_dirent_local_style(relative, pool));
1607     }
1608 
1609   SVN_ERR(svn_path_cstring_to_utf8(pabsolute, buffer, pool));
1610   *pabsolute = svn_dirent_canonicalize(*pabsolute, pool);
1611   return SVN_NO_ERROR;
1612 }
1613 
1614 const char *
svn_uri_canonicalize(const char * uri,apr_pool_t * pool)1615 svn_uri_canonicalize(const char *uri, apr_pool_t *pool)
1616 {
1617   return canonicalize(type_uri, uri, pool);
1618 }
1619 
1620 const char *
svn_relpath_canonicalize(const char * relpath,apr_pool_t * pool)1621 svn_relpath_canonicalize(const char *relpath, apr_pool_t *pool)
1622 {
1623   return canonicalize(type_relpath, relpath, pool);
1624 }
1625 
1626 const char *
svn_dirent_canonicalize(const char * dirent,apr_pool_t * pool)1627 svn_dirent_canonicalize(const char *dirent, apr_pool_t *pool)
1628 {
1629   const char *dst = canonicalize(type_dirent, dirent, pool);
1630 
1631 #ifdef SVN_USE_DOS_PATHS
1632   /* Handle a specific case on Windows where path == "X:/". Here we have to
1633      append the final '/', as svn_path_canonicalize will chop this of. */
1634   if (((dirent[0] >= 'A' && dirent[0] <= 'Z') ||
1635         (dirent[0] >= 'a' && dirent[0] <= 'z')) &&
1636         dirent[1] == ':' && dirent[2] == '/' &&
1637         dst[3] == '\0')
1638     {
1639       char *dst_slash = apr_pcalloc(pool, 4);
1640       dst_slash[0] = canonicalize_to_upper(dirent[0]);
1641       dst_slash[1] = ':';
1642       dst_slash[2] = '/';
1643       dst_slash[3] = '\0';
1644 
1645       return dst_slash;
1646     }
1647 #endif /* SVN_USE_DOS_PATHS */
1648 
1649   return dst;
1650 }
1651 
1652 svn_boolean_t
svn_dirent_is_canonical(const char * dirent,apr_pool_t * scratch_pool)1653 svn_dirent_is_canonical(const char *dirent, apr_pool_t *scratch_pool)
1654 {
1655   const char *ptr = dirent;
1656   if (*ptr == '/')
1657     {
1658       ptr++;
1659 #ifdef SVN_USE_DOS_PATHS
1660       /* Check for UNC paths */
1661       if (*ptr == '/')
1662         {
1663           /* TODO: Scan hostname and sharename and fall back to part code */
1664 
1665           /* ### Fall back to old implementation */
1666           return (strcmp(dirent, svn_dirent_canonicalize(dirent, scratch_pool))
1667                   == 0);
1668         }
1669 #endif /* SVN_USE_DOS_PATHS */
1670     }
1671 #ifdef SVN_USE_DOS_PATHS
1672   else if (((*ptr >= 'a' && *ptr <= 'z') || (*ptr >= 'A' && *ptr <= 'Z')) &&
1673            (ptr[1] == ':'))
1674     {
1675       /* The only canonical drive names are "A:"..."Z:", no lower case */
1676       if (*ptr < 'A' || *ptr > 'Z')
1677         return FALSE;
1678 
1679       ptr += 2;
1680 
1681       if (*ptr == '/')
1682         ptr++;
1683     }
1684 #endif /* SVN_USE_DOS_PATHS */
1685 
1686   return relpath_is_canonical(ptr);
1687 }
1688 
1689 static svn_boolean_t
relpath_is_canonical(const char * relpath)1690 relpath_is_canonical(const char *relpath)
1691 {
1692   const char *ptr = relpath, *seg = relpath;
1693 
1694   /* RELPATH is canonical if it has:
1695    *  - no '.' segments
1696    *  - no start and closing '/'
1697    *  - no '//'
1698    */
1699 
1700   if (*relpath == '\0')
1701     return TRUE;
1702 
1703   if (*ptr == '/')
1704     return FALSE;
1705 
1706   /* Now validate the rest of the path. */
1707   while(1)
1708     {
1709       apr_size_t seglen = ptr - seg;
1710 
1711       if (seglen == 1 && *seg == '.')
1712         return FALSE;  /*  /./   */
1713 
1714       if (*ptr == '/' && *(ptr+1) == '/')
1715         return FALSE;  /*  //    */
1716 
1717       if (! *ptr && *(ptr - 1) == '/')
1718         return FALSE;  /* foo/  */
1719 
1720       if (! *ptr)
1721         break;
1722 
1723       if (*ptr == '/')
1724         ptr++;
1725       seg = ptr;
1726 
1727       while (*ptr && (*ptr != '/'))
1728         ptr++;
1729     }
1730 
1731   return TRUE;
1732 }
1733 
1734 svn_boolean_t
svn_relpath_is_canonical(const char * relpath)1735 svn_relpath_is_canonical(const char *relpath)
1736 {
1737   return relpath_is_canonical(relpath);
1738 }
1739 
1740 svn_boolean_t
svn_uri_is_canonical(const char * uri,apr_pool_t * scratch_pool)1741 svn_uri_is_canonical(const char *uri, apr_pool_t *scratch_pool)
1742 {
1743   const char *ptr = uri, *seg = uri;
1744   const char *schema_data = NULL;
1745 
1746   /* URI is canonical if it has:
1747    *  - lowercase URL scheme
1748    *  - lowercase URL hostname
1749    *  - no '.' segments
1750    *  - no closing '/'
1751    *  - no '//'
1752    *  - uppercase hex-encoded pair digits ("%AB", not "%ab")
1753    */
1754 
1755   if (*uri == '\0')
1756     return FALSE;
1757 
1758   if (! svn_path_is_url(uri))
1759     return FALSE;
1760 
1761   /* Skip the scheme. */
1762   while (*ptr && (*ptr != '/') && (*ptr != ':'))
1763     ptr++;
1764 
1765   /* No scheme?  No good. */
1766   if (! (*ptr == ':' && *(ptr+1) == '/' && *(ptr+2) == '/'))
1767     return FALSE;
1768 
1769   /* Found a scheme, check that it's all lowercase. */
1770   ptr = uri;
1771   while (*ptr != ':')
1772     {
1773       if (*ptr >= 'A' && *ptr <= 'Z')
1774         return FALSE;
1775       ptr++;
1776     }
1777   /* Skip :// */
1778   ptr += 3;
1779 
1780   /* Scheme only?  That works. */
1781   if (! *ptr)
1782     return TRUE;
1783 
1784   /* This might be the hostname */
1785   seg = ptr;
1786   while (*ptr && (*ptr != '/') && (*ptr != '@'))
1787     ptr++;
1788 
1789   if (*ptr == '@')
1790     seg = ptr + 1;
1791 
1792   /* Found a hostname, check that it's all lowercase. */
1793   ptr = seg;
1794 
1795   if (*ptr == '[')
1796     {
1797       ptr++;
1798       while (*ptr == ':'
1799              || (*ptr >= '0' && *ptr <= '9')
1800              || (*ptr >= 'a' && *ptr <= 'f'))
1801         {
1802           ptr++;
1803         }
1804 
1805       if (*ptr != ']')
1806         return FALSE;
1807       ptr++;
1808     }
1809   else
1810     while (*ptr && *ptr != '/' && *ptr != ':')
1811       {
1812         if (*ptr >= 'A' && *ptr <= 'Z')
1813           return FALSE;
1814         ptr++;
1815       }
1816 
1817   /* Found a portnumber */
1818   if (*ptr == ':')
1819     {
1820       apr_int64_t port = 0;
1821 
1822       ptr++;
1823       schema_data = ptr;
1824 
1825       while (*ptr >= '0' && *ptr <= '9')
1826         {
1827           port = 10 * port + (*ptr - '0');
1828           ptr++;
1829         }
1830 
1831       if (ptr == schema_data)
1832         return FALSE; /* Fail on "http://host:" */
1833 
1834       if (*ptr && *ptr != '/')
1835         return FALSE; /* Not a port number */
1836 
1837       if (port == 80 && strncmp(uri, "http:", 5) == 0)
1838         return FALSE;
1839       else if (port == 443 && strncmp(uri, "https:", 6) == 0)
1840         return FALSE;
1841       else if (port == 3690 && strncmp(uri, "svn:", 4) == 0)
1842         return FALSE;
1843     }
1844 
1845   schema_data = ptr;
1846 
1847 #ifdef SVN_USE_DOS_PATHS
1848   if (schema_data && *ptr == '/')
1849     {
1850       /* If this is a file url, ptr now points to the third '/' in
1851          file:///C:/path. Check that if we have such a URL the drive
1852          letter is in uppercase. */
1853       if (strncmp(uri, "file:", 5) == 0 &&
1854           ! (*(ptr+1) >= 'A' && *(ptr+1) <= 'Z') &&
1855           *(ptr+2) == ':')
1856         return FALSE;
1857     }
1858 #endif /* SVN_USE_DOS_PATHS */
1859 
1860   /* Now validate the rest of the URI. */
1861   seg = ptr;
1862   while (*ptr && (*ptr != '/'))
1863     ptr++;
1864   while(1)
1865     {
1866       apr_size_t seglen = ptr - seg;
1867 
1868       if (seglen == 1 && *seg == '.')
1869         return FALSE;  /*  /./   */
1870 
1871       if (*ptr == '/' && *(ptr+1) == '/')
1872         return FALSE;  /*  //    */
1873 
1874       if (! *ptr && *(ptr - 1) == '/' && ptr - 1 != uri)
1875         return FALSE;  /* foo/  */
1876 
1877       if (! *ptr)
1878         break;
1879 
1880       if (*ptr == '/')
1881         ptr++;
1882 
1883       seg = ptr;
1884       while (*ptr && (*ptr != '/'))
1885         ptr++;
1886     }
1887 
1888   ptr = schema_data;
1889 
1890   while (*ptr)
1891     {
1892       if (*ptr == '%')
1893         {
1894           char digitz[3];
1895           int val;
1896 
1897           /* Can't usesvn_ctype_isxdigit() because lower case letters are
1898              not in our canonical format */
1899           if (((*(ptr+1) < '0' || *(ptr+1) > '9'))
1900               && (*(ptr+1) < 'A' || *(ptr+1) > 'F'))
1901             return FALSE;
1902           else if (((*(ptr+2) < '0' || *(ptr+2) > '9'))
1903                    && (*(ptr+2) < 'A' || *(ptr+2) > 'F'))
1904             return FALSE;
1905 
1906           digitz[0] = *(++ptr);
1907           digitz[1] = *(++ptr);
1908           digitz[2] = '\0';
1909           val = (int)strtol(digitz, NULL, 16);
1910 
1911           if (svn_uri__char_validity[val])
1912             return FALSE; /* Should not have been escaped */
1913         }
1914       else if (*ptr != '/' && !svn_uri__char_validity[(unsigned char)*ptr])
1915         return FALSE; /* Character should have been escaped */
1916       ptr++;
1917     }
1918 
1919   return TRUE;
1920 }
1921 
1922 svn_error_t *
svn_dirent_condense_targets(const char ** pcommon,apr_array_header_t ** pcondensed_targets,const apr_array_header_t * targets,svn_boolean_t remove_redundancies,apr_pool_t * result_pool,apr_pool_t * scratch_pool)1923 svn_dirent_condense_targets(const char **pcommon,
1924                             apr_array_header_t **pcondensed_targets,
1925                             const apr_array_header_t *targets,
1926                             svn_boolean_t remove_redundancies,
1927                             apr_pool_t *result_pool,
1928                             apr_pool_t *scratch_pool)
1929 {
1930   int i, num_condensed = targets->nelts;
1931   svn_boolean_t *removed;
1932   apr_array_header_t *abs_targets;
1933 
1934   /* Early exit when there's no data to work on. */
1935   if (targets->nelts <= 0)
1936     {
1937       *pcommon = NULL;
1938       if (pcondensed_targets)
1939         *pcondensed_targets = NULL;
1940       return SVN_NO_ERROR;
1941     }
1942 
1943   /* Get the absolute path of the first target. */
1944   SVN_ERR(svn_dirent_get_absolute(pcommon,
1945                                   APR_ARRAY_IDX(targets, 0, const char *),
1946                                   scratch_pool));
1947 
1948   /* Early exit when there's only one dirent to work on. */
1949   if (targets->nelts == 1)
1950     {
1951       *pcommon = apr_pstrdup(result_pool, *pcommon);
1952       if (pcondensed_targets)
1953         *pcondensed_targets = apr_array_make(result_pool, 0,
1954                                              sizeof(const char *));
1955       return SVN_NO_ERROR;
1956     }
1957 
1958   /* Copy the targets array, but with absolute dirents instead of
1959      relative.  Also, find the pcommon argument by finding what is
1960      common in all of the absolute dirents. NOTE: This is not as
1961      efficient as it could be.  The calculation of the basedir could
1962      be done in the loop below, which would save some calls to
1963      svn_dirent_get_longest_ancestor.  I decided to do it this way
1964      because I thought it would be simpler, since this way, we don't
1965      even do the loop if we don't need to condense the targets. */
1966 
1967   removed = apr_pcalloc(scratch_pool, (targets->nelts *
1968                                           sizeof(svn_boolean_t)));
1969   abs_targets = apr_array_make(scratch_pool, targets->nelts,
1970                                sizeof(const char *));
1971 
1972   APR_ARRAY_PUSH(abs_targets, const char *) = *pcommon;
1973 
1974   for (i = 1; i < targets->nelts; ++i)
1975     {
1976       const char *rel = APR_ARRAY_IDX(targets, i, const char *);
1977       const char *absolute;
1978       SVN_ERR(svn_dirent_get_absolute(&absolute, rel, scratch_pool));
1979       APR_ARRAY_PUSH(abs_targets, const char *) = absolute;
1980       *pcommon = svn_dirent_get_longest_ancestor(*pcommon, absolute,
1981                                                  scratch_pool);
1982     }
1983 
1984   *pcommon = apr_pstrdup(result_pool, *pcommon);
1985 
1986   if (pcondensed_targets != NULL)
1987     {
1988       size_t basedir_len;
1989 
1990       if (remove_redundancies)
1991         {
1992           /* Find the common part of each pair of targets.  If
1993              common part is equal to one of the dirents, the other
1994              is a child of it, and can be removed.  If a target is
1995              equal to *pcommon, it can also be removed. */
1996 
1997           /* First pass: when one non-removed target is a child of
1998              another non-removed target, remove the child. */
1999           for (i = 0; i < abs_targets->nelts; ++i)
2000             {
2001               int j;
2002 
2003               if (removed[i])
2004                 continue;
2005 
2006               for (j = i + 1; j < abs_targets->nelts; ++j)
2007                 {
2008                   const char *abs_targets_i;
2009                   const char *abs_targets_j;
2010                   const char *ancestor;
2011 
2012                   if (removed[j])
2013                     continue;
2014 
2015                   abs_targets_i = APR_ARRAY_IDX(abs_targets, i, const char *);
2016                   abs_targets_j = APR_ARRAY_IDX(abs_targets, j, const char *);
2017 
2018                   ancestor = svn_dirent_get_longest_ancestor
2019                     (abs_targets_i, abs_targets_j, scratch_pool);
2020 
2021                   if (*ancestor == '\0')
2022                     continue;
2023 
2024                   if (strcmp(ancestor, abs_targets_i) == 0)
2025                     {
2026                       removed[j] = TRUE;
2027                       num_condensed--;
2028                     }
2029                   else if (strcmp(ancestor, abs_targets_j) == 0)
2030                     {
2031                       removed[i] = TRUE;
2032                       num_condensed--;
2033                     }
2034                 }
2035             }
2036 
2037           /* Second pass: when a target is the same as *pcommon,
2038              remove the target. */
2039           for (i = 0; i < abs_targets->nelts; ++i)
2040             {
2041               const char *abs_targets_i = APR_ARRAY_IDX(abs_targets, i,
2042                                                         const char *);
2043 
2044               if ((strcmp(abs_targets_i, *pcommon) == 0) && (! removed[i]))
2045                 {
2046                   removed[i] = TRUE;
2047                   num_condensed--;
2048                 }
2049             }
2050         }
2051 
2052       /* Now create the return array, and copy the non-removed items */
2053       basedir_len = strlen(*pcommon);
2054       *pcondensed_targets = apr_array_make(result_pool, num_condensed,
2055                                            sizeof(const char *));
2056 
2057       for (i = 0; i < abs_targets->nelts; ++i)
2058         {
2059           const char *rel_item = APR_ARRAY_IDX(abs_targets, i, const char *);
2060 
2061           /* Skip this if it's been removed. */
2062           if (removed[i])
2063             continue;
2064 
2065           /* If a common prefix was found, condensed_targets are given
2066              relative to that prefix.  */
2067           if (basedir_len > 0)
2068             {
2069               /* Only advance our pointer past a dirent separator if
2070                  REL_ITEM isn't the same as *PCOMMON.
2071 
2072                  If *PCOMMON is a root dirent, basedir_len will already
2073                  include the closing '/', so never advance the pointer
2074                  here.
2075                  */
2076               rel_item += basedir_len;
2077               if (rel_item[0] &&
2078                   ! svn_dirent_is_root(*pcommon, basedir_len))
2079                 rel_item++;
2080             }
2081 
2082           APR_ARRAY_PUSH(*pcondensed_targets, const char *)
2083             = apr_pstrdup(result_pool, rel_item);
2084         }
2085     }
2086 
2087   return SVN_NO_ERROR;
2088 }
2089 
2090 svn_error_t *
svn_uri_condense_targets(const char ** pcommon,apr_array_header_t ** pcondensed_targets,const apr_array_header_t * targets,svn_boolean_t remove_redundancies,apr_pool_t * result_pool,apr_pool_t * scratch_pool)2091 svn_uri_condense_targets(const char **pcommon,
2092                          apr_array_header_t **pcondensed_targets,
2093                          const apr_array_header_t *targets,
2094                          svn_boolean_t remove_redundancies,
2095                          apr_pool_t *result_pool,
2096                          apr_pool_t *scratch_pool)
2097 {
2098   int i, num_condensed = targets->nelts;
2099   apr_array_header_t *uri_targets;
2100   svn_boolean_t *removed;
2101 
2102   /* Early exit when there's no data to work on. */
2103   if (targets->nelts <= 0)
2104     {
2105       *pcommon = NULL;
2106       if (pcondensed_targets)
2107         *pcondensed_targets = NULL;
2108       return SVN_NO_ERROR;
2109     }
2110 
2111   *pcommon = svn_uri_canonicalize(APR_ARRAY_IDX(targets, 0, const char *),
2112                                   scratch_pool);
2113 
2114   /* Early exit when there's only one uri to work on. */
2115   if (targets->nelts == 1)
2116     {
2117       *pcommon = apr_pstrdup(result_pool, *pcommon);
2118       if (pcondensed_targets)
2119         *pcondensed_targets = apr_array_make(result_pool, 0,
2120                                              sizeof(const char *));
2121       return SVN_NO_ERROR;
2122     }
2123 
2124   /* Find the pcommon argument by finding what is common in all of the
2125      uris. NOTE: This is not as efficient as it could be.  The calculation
2126      of the basedir could be done in the loop below, which would
2127      save some calls to svn_uri_get_longest_ancestor.  I decided to do it
2128      this way because I thought it would be simpler, since this way, we don't
2129      even do the loop if we don't need to condense the targets. */
2130 
2131   removed = apr_pcalloc(scratch_pool, (targets->nelts *
2132                                           sizeof(svn_boolean_t)));
2133   uri_targets = apr_array_make(scratch_pool, targets->nelts,
2134                                sizeof(const char *));
2135 
2136   APR_ARRAY_PUSH(uri_targets, const char *) = *pcommon;
2137 
2138   for (i = 1; i < targets->nelts; ++i)
2139     {
2140       const char *uri = svn_uri_canonicalize(
2141                            APR_ARRAY_IDX(targets, i, const char *),
2142                            scratch_pool);
2143       APR_ARRAY_PUSH(uri_targets, const char *) = uri;
2144 
2145       /* If the commonmost ancestor so far is empty, there's no point
2146          in continuing to search for a common ancestor at all.  But
2147          we'll keep looping for the sake of canonicalizing the
2148          targets, I suppose.  */
2149       if (**pcommon != '\0')
2150         *pcommon = svn_uri_get_longest_ancestor(*pcommon, uri,
2151                                                 scratch_pool);
2152     }
2153 
2154   *pcommon = apr_pstrdup(result_pool, *pcommon);
2155 
2156   if (pcondensed_targets != NULL)
2157     {
2158       size_t basedir_len;
2159 
2160       if (remove_redundancies)
2161         {
2162           /* Find the common part of each pair of targets.  If
2163              common part is equal to one of the dirents, the other
2164              is a child of it, and can be removed.  If a target is
2165              equal to *pcommon, it can also be removed. */
2166 
2167           /* First pass: when one non-removed target is a child of
2168              another non-removed target, remove the child. */
2169           for (i = 0; i < uri_targets->nelts; ++i)
2170             {
2171               int j;
2172 
2173               if (removed[i])
2174                 continue;
2175 
2176               for (j = i + 1; j < uri_targets->nelts; ++j)
2177                 {
2178                   const char *uri_i;
2179                   const char *uri_j;
2180                   const char *ancestor;
2181 
2182                   if (removed[j])
2183                     continue;
2184 
2185                   uri_i = APR_ARRAY_IDX(uri_targets, i, const char *);
2186                   uri_j = APR_ARRAY_IDX(uri_targets, j, const char *);
2187 
2188                   ancestor = svn_uri_get_longest_ancestor(uri_i,
2189                                                           uri_j,
2190                                                           scratch_pool);
2191 
2192                   if (*ancestor == '\0')
2193                     continue;
2194 
2195                   if (strcmp(ancestor, uri_i) == 0)
2196                     {
2197                       removed[j] = TRUE;
2198                       num_condensed--;
2199                     }
2200                   else if (strcmp(ancestor, uri_j) == 0)
2201                     {
2202                       removed[i] = TRUE;
2203                       num_condensed--;
2204                     }
2205                 }
2206             }
2207 
2208           /* Second pass: when a target is the same as *pcommon,
2209              remove the target. */
2210           for (i = 0; i < uri_targets->nelts; ++i)
2211             {
2212               const char *uri_targets_i = APR_ARRAY_IDX(uri_targets, i,
2213                                                         const char *);
2214 
2215               if ((strcmp(uri_targets_i, *pcommon) == 0) && (! removed[i]))
2216                 {
2217                   removed[i] = TRUE;
2218                   num_condensed--;
2219                 }
2220             }
2221         }
2222 
2223       /* Now create the return array, and copy the non-removed items */
2224       basedir_len = strlen(*pcommon);
2225       *pcondensed_targets = apr_array_make(result_pool, num_condensed,
2226                                            sizeof(const char *));
2227 
2228       for (i = 0; i < uri_targets->nelts; ++i)
2229         {
2230           const char *rel_item = APR_ARRAY_IDX(uri_targets, i, const char *);
2231 
2232           /* Skip this if it's been removed. */
2233           if (removed[i])
2234             continue;
2235 
2236           /* If a common prefix was found, condensed_targets are given
2237              relative to that prefix.  */
2238           if (basedir_len > 0)
2239             {
2240               /* Only advance our pointer past a dirent separator if
2241                  REL_ITEM isn't the same as *PCOMMON.
2242 
2243                  If *PCOMMON is a root dirent, basedir_len will already
2244                  include the closing '/', so never advance the pointer
2245                  here.
2246                  */
2247               rel_item += basedir_len;
2248               if ((rel_item[0] == '/') ||
2249                   (rel_item[0] && !svn_uri_is_root(*pcommon, basedir_len)))
2250                 {
2251                   rel_item++;
2252                 }
2253             }
2254 
2255           APR_ARRAY_PUSH(*pcondensed_targets, const char *)
2256             = svn_path_uri_decode(rel_item, result_pool);
2257         }
2258     }
2259 
2260   return SVN_NO_ERROR;
2261 }
2262 
2263 svn_error_t *
svn_dirent_is_under_root(svn_boolean_t * under_root,const char ** result_path,const char * base_path,const char * path,apr_pool_t * result_pool)2264 svn_dirent_is_under_root(svn_boolean_t *under_root,
2265                          const char **result_path,
2266                          const char *base_path,
2267                          const char *path,
2268                          apr_pool_t *result_pool)
2269 {
2270   apr_status_t status;
2271   char *full_path;
2272 
2273   *under_root = FALSE;
2274   if (result_path)
2275     *result_path = NULL;
2276 
2277   status = apr_filepath_merge(&full_path,
2278                               base_path,
2279                               path,
2280                               APR_FILEPATH_NOTABOVEROOT
2281                               | APR_FILEPATH_SECUREROOTTEST,
2282                               result_pool);
2283 
2284   if (status == APR_SUCCESS)
2285     {
2286       if (result_path)
2287         *result_path = svn_dirent_canonicalize(full_path, result_pool);
2288       *under_root = TRUE;
2289       return SVN_NO_ERROR;
2290     }
2291   else if (status == APR_EABOVEROOT)
2292     {
2293       *under_root = FALSE;
2294       return SVN_NO_ERROR;
2295     }
2296 
2297   return svn_error_wrap_apr(status, NULL);
2298 }
2299 
2300 svn_error_t *
svn_uri_get_dirent_from_file_url(const char ** dirent,const char * url,apr_pool_t * pool)2301 svn_uri_get_dirent_from_file_url(const char **dirent,
2302                                  const char *url,
2303                                  apr_pool_t *pool)
2304 {
2305   const char *hostname, *path;
2306 
2307   SVN_ERR_ASSERT(svn_uri_is_canonical(url, pool));
2308 
2309   /* Verify that the URL is well-formed (loosely) */
2310 
2311   /* First, check for the "file://" prefix. */
2312   if (strncmp(url, "file://", 7) != 0)
2313     return svn_error_createf(SVN_ERR_RA_ILLEGAL_URL, NULL,
2314                              _("Local URL '%s' does not contain 'file://' "
2315                                "prefix"), url);
2316 
2317   /* Find the HOSTNAME portion and the PATH portion of the URL.  The host
2318      name is between the "file://" prefix and the next occurence of '/'.  We
2319      are considering everything from that '/' until the end of the URL to be
2320      the absolute path portion of the URL.
2321      If we got just "file://", treat it the same as "file:///". */
2322   hostname = url + 7;
2323   path = strchr(hostname, '/');
2324   if (path)
2325     hostname = apr_pstrmemdup(pool, hostname, path - hostname);
2326   else
2327     path = "/";
2328 
2329   /* URI-decode HOSTNAME, and set it to NULL if it is "" or "localhost". */
2330   if (*hostname == '\0')
2331     hostname = NULL;
2332   else
2333     {
2334       hostname = svn_path_uri_decode(hostname, pool);
2335       if (strcmp(hostname, "localhost") == 0)
2336         hostname = NULL;
2337     }
2338 
2339   /* Duplicate the URL, starting at the top of the path.
2340      At the same time, we URI-decode the path. */
2341 #ifdef SVN_USE_DOS_PATHS
2342   /* On Windows, we'll typically have to skip the leading / if the
2343      path starts with a drive letter.  Like most Web browsers, We
2344      support two variants of this scheme:
2345 
2346          file:///X:/path    and
2347          file:///X|/path
2348 
2349     Note that, at least on WinNT and above,  file:////./X:/path  will
2350     also work, so we must make sure the transformation doesn't break
2351     that, and  file:///path  (that looks within the current drive
2352     only) should also keep working.
2353     If we got a non-empty hostname other than localhost, we convert this
2354     into an UNC path.  In this case, we obviously don't strip the slash
2355     even if the path looks like it starts with a drive letter.
2356   */
2357   {
2358     static const char valid_drive_letters[] =
2359       "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz";
2360     /* Casting away const! */
2361     char *dup_path = (char *)svn_path_uri_decode(path, pool);
2362 
2363     /* This check assumes ':' and '|' are already decoded! */
2364     if (!hostname && dup_path[1] && strchr(valid_drive_letters, dup_path[1])
2365         && (dup_path[2] == ':' || dup_path[2] == '|'))
2366       {
2367         /* Skip the leading slash. */
2368         ++dup_path;
2369 
2370         if (dup_path[1] == '|')
2371           dup_path[1] = ':';
2372 
2373         if (dup_path[2] == '/' || dup_path[2] == '\0')
2374           {
2375             if (dup_path[2] == '\0')
2376               {
2377                 /* A valid dirent for the driveroot must be like "C:/" instead of
2378                    just "C:" or svn_dirent_join() will use the current directory
2379                    on the drive instead */
2380                 char *new_path = apr_pcalloc(pool, 4);
2381                 new_path[0] = dup_path[0];
2382                 new_path[1] = ':';
2383                 new_path[2] = '/';
2384                 new_path[3] = '\0';
2385                 dup_path = new_path;
2386               }
2387           }
2388       }
2389     if (hostname)
2390       {
2391         if (dup_path[0] == '/' && dup_path[1] == '\0')
2392           return svn_error_createf(SVN_ERR_RA_ILLEGAL_URL, NULL,
2393                                    _("Local URL '%s' contains only a hostname, "
2394                                      "no path"), url);
2395 
2396         /* We still know that the path starts with a slash. */
2397         *dirent = apr_pstrcat(pool, "//", hostname, dup_path, NULL);
2398       }
2399     else
2400       *dirent = dup_path;
2401   }
2402 #else /* !SVN_USE_DOS_PATHS */
2403   /* Currently, the only hostnames we are allowing on non-Win32 platforms
2404      are the empty string and 'localhost'. */
2405   if (hostname)
2406     return svn_error_createf(SVN_ERR_RA_ILLEGAL_URL, NULL,
2407                              _("Local URL '%s' contains unsupported hostname"),
2408                              url);
2409 
2410   *dirent = svn_path_uri_decode(path, pool);
2411 #endif /* SVN_USE_DOS_PATHS */
2412   return SVN_NO_ERROR;
2413 }
2414 
2415 svn_error_t *
svn_uri_get_file_url_from_dirent(const char ** url,const char * dirent,apr_pool_t * pool)2416 svn_uri_get_file_url_from_dirent(const char **url,
2417                                  const char *dirent,
2418                                  apr_pool_t *pool)
2419 {
2420   assert(svn_dirent_is_canonical(dirent, pool));
2421 
2422   SVN_ERR(svn_dirent_get_absolute(&dirent, dirent, pool));
2423 
2424   dirent = svn_path_uri_encode(dirent, pool);
2425 
2426 #ifndef SVN_USE_DOS_PATHS
2427   if (dirent[0] == '/' && dirent[1] == '\0')
2428     dirent = NULL; /* "file://" is the canonical form of "file:///" */
2429 
2430   *url = apr_pstrcat(pool, "file://", dirent, (char *)NULL);
2431 #else
2432   if (dirent[0] == '/')
2433     {
2434       /* Handle UNC paths //server/share -> file://server/share */
2435       assert(dirent[1] == '/'); /* Expect UNC, not non-absolute */
2436 
2437       *url = apr_pstrcat(pool, "file:", dirent, NULL);
2438     }
2439   else
2440     {
2441       char *uri = apr_pstrcat(pool, "file:///", dirent, NULL);
2442       apr_size_t len = 8 /* strlen("file:///") */ + strlen(dirent);
2443 
2444       /* "C:/" is a canonical dirent on Windows,
2445          but "file:///C:/" is not a canonical uri */
2446       if (uri[len-1] == '/')
2447         uri[len-1] = '\0';
2448 
2449       *url = uri;
2450     }
2451 #endif
2452 
2453   return SVN_NO_ERROR;
2454 }
2455 
2456 
2457 
2458 /* -------------- The fspath API (see private/svn_fspath.h) -------------- */
2459 
2460 svn_boolean_t
svn_fspath__is_canonical(const char * fspath)2461 svn_fspath__is_canonical(const char *fspath)
2462 {
2463   return fspath[0] == '/' && relpath_is_canonical(fspath + 1);
2464 }
2465 
2466 
2467 const char *
svn_fspath__canonicalize(const char * fspath,apr_pool_t * pool)2468 svn_fspath__canonicalize(const char *fspath,
2469                          apr_pool_t *pool)
2470 {
2471   if ((fspath[0] == '/') && (fspath[1] == '\0'))
2472     return "/";
2473 
2474   return apr_pstrcat(pool, "/", svn_relpath_canonicalize(fspath, pool),
2475                      (char *)NULL);
2476 }
2477 
2478 
2479 svn_boolean_t
svn_fspath__is_root(const char * fspath,apr_size_t len)2480 svn_fspath__is_root(const char *fspath, apr_size_t len)
2481 {
2482   /* directory is root if it's equal to '/' */
2483   return (len == 1 && fspath[0] == '/');
2484 }
2485 
2486 
2487 const char *
svn_fspath__skip_ancestor(const char * parent_fspath,const char * child_fspath)2488 svn_fspath__skip_ancestor(const char *parent_fspath,
2489                           const char *child_fspath)
2490 {
2491   assert(svn_fspath__is_canonical(parent_fspath));
2492   assert(svn_fspath__is_canonical(child_fspath));
2493 
2494   return svn_relpath_skip_ancestor(parent_fspath + 1, child_fspath + 1);
2495 }
2496 
2497 
2498 const char *
svn_fspath__dirname(const char * fspath,apr_pool_t * pool)2499 svn_fspath__dirname(const char *fspath,
2500                     apr_pool_t *pool)
2501 {
2502   assert(svn_fspath__is_canonical(fspath));
2503 
2504   if (fspath[0] == '/' && fspath[1] == '\0')
2505     return apr_pstrdup(pool, fspath);
2506   else
2507     return apr_pstrcat(pool, "/", svn_relpath_dirname(fspath + 1, pool),
2508                        (char *)NULL);
2509 }
2510 
2511 
2512 const char *
svn_fspath__basename(const char * fspath,apr_pool_t * pool)2513 svn_fspath__basename(const char *fspath,
2514                      apr_pool_t *pool)
2515 {
2516   const char *result;
2517   assert(svn_fspath__is_canonical(fspath));
2518 
2519   result = svn_relpath_basename(fspath + 1, pool);
2520 
2521   assert(strchr(result, '/') == NULL);
2522   return result;
2523 }
2524 
2525 void
svn_fspath__split(const char ** dirpath,const char ** base_name,const char * fspath,apr_pool_t * result_pool)2526 svn_fspath__split(const char **dirpath,
2527                   const char **base_name,
2528                   const char *fspath,
2529                   apr_pool_t *result_pool)
2530 {
2531   assert(dirpath != base_name);
2532 
2533   if (dirpath)
2534     *dirpath = svn_fspath__dirname(fspath, result_pool);
2535 
2536   if (base_name)
2537     *base_name = svn_fspath__basename(fspath, result_pool);
2538 }
2539 
2540 char *
svn_fspath__join(const char * fspath,const char * relpath,apr_pool_t * result_pool)2541 svn_fspath__join(const char *fspath,
2542                  const char *relpath,
2543                  apr_pool_t *result_pool)
2544 {
2545   char *result;
2546   assert(svn_fspath__is_canonical(fspath));
2547   assert(svn_relpath_is_canonical(relpath));
2548 
2549   if (relpath[0] == '\0')
2550     result = apr_pstrdup(result_pool, fspath);
2551   else if (fspath[1] == '\0')
2552     result = apr_pstrcat(result_pool, "/", relpath, (char *)NULL);
2553   else
2554     result = apr_pstrcat(result_pool, fspath, "/", relpath, (char *)NULL);
2555 
2556   assert(svn_fspath__is_canonical(result));
2557   return result;
2558 }
2559 
2560 char *
svn_fspath__get_longest_ancestor(const char * fspath1,const char * fspath2,apr_pool_t * result_pool)2561 svn_fspath__get_longest_ancestor(const char *fspath1,
2562                                  const char *fspath2,
2563                                  apr_pool_t *result_pool)
2564 {
2565   char *result;
2566   assert(svn_fspath__is_canonical(fspath1));
2567   assert(svn_fspath__is_canonical(fspath2));
2568 
2569   result = apr_pstrcat(result_pool, "/",
2570                        svn_relpath_get_longest_ancestor(fspath1 + 1,
2571                                                         fspath2 + 1,
2572                                                         result_pool),
2573                        (char *)NULL);
2574 
2575   assert(svn_fspath__is_canonical(result));
2576   return result;
2577 }
2578 
2579 
2580 
2581 
2582 /* -------------- The urlpath API (see private/svn_fspath.h) ------------- */
2583 
2584 const char *
svn_urlpath__canonicalize(const char * uri,apr_pool_t * pool)2585 svn_urlpath__canonicalize(const char *uri,
2586                           apr_pool_t *pool)
2587 {
2588   if (svn_path_is_url(uri))
2589     {
2590       uri = svn_uri_canonicalize(uri, pool);
2591     }
2592   else
2593     {
2594       uri = svn_fspath__canonicalize(uri, pool);
2595       /* Do a little dance to normalize hex encoding. */
2596       uri = svn_path_uri_decode(uri, pool);
2597       uri = svn_path_uri_encode(uri, pool);
2598     }
2599   return uri;
2600 }
2601 
2602 
2603 /* -------------- The cert API (see private/svn_cert.h) ------------- */
2604 
2605 svn_boolean_t
svn_cert__match_dns_identity(svn_string_t * pattern,svn_string_t * hostname)2606 svn_cert__match_dns_identity(svn_string_t *pattern, svn_string_t *hostname)
2607 {
2608   apr_size_t pattern_pos = 0, hostname_pos = 0;
2609 
2610   /* support leading wildcards that composed of the only character in the
2611    * left-most label. */
2612   if (pattern->len >= 2 &&
2613       pattern->data[pattern_pos] == '*' &&
2614       pattern->data[pattern_pos + 1] == '.')
2615     {
2616       while (hostname_pos < hostname->len &&
2617              hostname->data[hostname_pos] != '.')
2618         {
2619           hostname_pos++;
2620         }
2621       /* Assume that the wildcard must match something.  Rule 2 says
2622        * that *.example.com should not match example.com.  If the wildcard
2623        * ends up not matching anything then it matches .example.com which
2624        * seems to be essentially the same as just example.com */
2625       if (hostname_pos == 0)
2626         return FALSE;
2627 
2628       pattern_pos++;
2629     }
2630 
2631   while (pattern_pos < pattern->len && hostname_pos < hostname->len)
2632     {
2633       char pattern_c = pattern->data[pattern_pos];
2634       char hostname_c = hostname->data[hostname_pos];
2635 
2636       /* fold case as described in RFC 4343.
2637        * Note: We actually convert to lowercase, since our URI
2638        * canonicalization code converts to lowercase and generally
2639        * most certs are issued with lowercase DNS names, meaning
2640        * this avoids the fold operation in most cases.  The RFC
2641        * suggests the opposite transformation, but doesn't require
2642        * any specific implementation in any case.  It is critical
2643        * that this folding be locale independent so you can't use
2644        * tolower(). */
2645       pattern_c = canonicalize_to_lower(pattern_c);
2646       hostname_c = canonicalize_to_lower(hostname_c);
2647 
2648       if (pattern_c != hostname_c)
2649         {
2650           /* doesn't match */
2651           return FALSE;
2652         }
2653       else
2654         {
2655           /* characters match so skip both */
2656           pattern_pos++;
2657           hostname_pos++;
2658         }
2659     }
2660 
2661   /* ignore a trailing period on the hostname since this has no effect on the
2662    * security of the matching.  See the following for the long explanation as
2663    * to why:
2664    * https://bugzilla.mozilla.org/show_bug.cgi?id=134402#c28
2665    */
2666   if (pattern_pos == pattern->len &&
2667       hostname_pos == hostname->len - 1 &&
2668       hostname->data[hostname_pos] == '.')
2669     hostname_pos++;
2670 
2671   if (pattern_pos != pattern->len || hostname_pos != hostname->len)
2672     {
2673       /* end didn't match */
2674       return FALSE;
2675     }
2676 
2677   return TRUE;
2678 }
2679