xref: /dragonfly/lib/libfetch/fetch.c (revision c4e275d8e4e2c8b827d1f5bd091f6c26b75052fe)
1 /*-
2  * SPDX-License-Identifier: BSD-3-Clause
3  *
4  * Copyright (c) 1998-2004 Dag-Erling Smørgrav
5  * All rights reserved.
6  *
7  * Redistribution and use in source and binary forms, with or without
8  * modification, are permitted provided that the following conditions
9  * are met:
10  * 1. Redistributions of source code must retain the above copyright
11  *    notice, this list of conditions and the following disclaimer
12  *    in this position and unchanged.
13  * 2. Redistributions in binary form must reproduce the above copyright
14  *    notice, this list of conditions and the following disclaimer in the
15  *    documentation and/or other materials provided with the distribution.
16  * 3. The name of the author may not be used to endorse or promote products
17  *    derived from this software without specific prior written permission
18  *
19  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
20  * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
21  * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
22  * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
23  * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
24  * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
25  * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
26  * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
27  * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
28  * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
29  *
30  * $FreeBSD: head/lib/libfetch/fetch.c 357579 2020-02-05 16:55:00Z emaste $
31  */
32 
33 #include <sys/cdefs.h>
34 
35 #include <sys/param.h>
36 
37 #include <netinet/in.h>
38 
39 #include <errno.h>
40 #include <ctype.h>
41 #include <stdio.h>
42 #include <stdlib.h>
43 #include <string.h>
44 
45 #include "fetch.h"
46 #include "common.h"
47 
48 auth_t     fetchAuthMethod;
49 int        fetchLastErrCode;
50 char       fetchLastErrString[MAXERRSTRING];
51 int        fetchTimeout;
52 int        fetchRestartCalls = 1;
53 int        fetchDebug;
54 
55 
56 /*** Local data **************************************************************/
57 
58 /*
59  * Error messages for parser errors
60  */
61 #define URL_MALFORMED                   1
62 #define URL_BAD_SCHEME                  2
63 #define URL_BAD_PORT                    3
64 static struct fetcherr url_errlist[] = {
65           { URL_MALFORMED,    FETCH_URL,          "Malformed URL" },
66           { URL_BAD_SCHEME,   FETCH_URL,          "Invalid URL scheme" },
67           { URL_BAD_PORT,               FETCH_URL,          "Invalid server port" },
68           { -1,                         FETCH_UNKNOWN,      "Unknown parser error" }
69 };
70 
71 
72 /*** Public API **************************************************************/
73 
74 /*
75  * Select the appropriate protocol for the URL scheme, and return a
76  * read-only stream connected to the document referenced by the URL.
77  * Also fill out the struct url_stat.
78  */
79 FILE *
fetchXGet(struct url * URL,struct url_stat * us,const char * flags)80 fetchXGet(struct url *URL, struct url_stat *us, const char *flags)
81 {
82 
83           if (us != NULL) {
84                     us->size = -1;
85                     us->atime = us->mtime = 0;
86           }
87           if (strcmp(URL->scheme, SCHEME_FILE) == 0)
88                     return (fetchXGetFile(URL, us, flags));
89           else if (strcmp(URL->scheme, SCHEME_FTP) == 0)
90                     return (fetchXGetFTP(URL, us, flags));
91           else if (strcmp(URL->scheme, SCHEME_HTTP) == 0)
92                     return (fetchXGetHTTP(URL, us, flags));
93           else if (strcmp(URL->scheme, SCHEME_HTTPS) == 0)
94                     return (fetchXGetHTTP(URL, us, flags));
95           url_seterr(URL_BAD_SCHEME);
96           return (NULL);
97 }
98 
99 /*
100  * Select the appropriate protocol for the URL scheme, and return a
101  * read-only stream connected to the document referenced by the URL.
102  */
103 FILE *
fetchGet(struct url * URL,const char * flags)104 fetchGet(struct url *URL, const char *flags)
105 {
106           return (fetchXGet(URL, NULL, flags));
107 }
108 
109 /*
110  * Select the appropriate protocol for the URL scheme, and return a
111  * write-only stream connected to the document referenced by the URL.
112  */
113 FILE *
fetchPut(struct url * URL,const char * flags)114 fetchPut(struct url *URL, const char *flags)
115 {
116 
117           if (strcmp(URL->scheme, SCHEME_FILE) == 0)
118                     return (fetchPutFile(URL, flags));
119           else if (strcmp(URL->scheme, SCHEME_FTP) == 0)
120                     return (fetchPutFTP(URL, flags));
121           else if (strcmp(URL->scheme, SCHEME_HTTP) == 0)
122                     return (fetchPutHTTP(URL, flags));
123           else if (strcmp(URL->scheme, SCHEME_HTTPS) == 0)
124                     return (fetchPutHTTP(URL, flags));
125           url_seterr(URL_BAD_SCHEME);
126           return (NULL);
127 }
128 
129 /*
130  * Select the appropriate protocol for the URL scheme, and return the
131  * size of the document referenced by the URL if it exists.
132  */
133 int
fetchStat(struct url * URL,struct url_stat * us,const char * flags)134 fetchStat(struct url *URL, struct url_stat *us, const char *flags)
135 {
136 
137           if (us != NULL) {
138                     us->size = -1;
139                     us->atime = us->mtime = 0;
140           }
141           if (strcmp(URL->scheme, SCHEME_FILE) == 0)
142                     return (fetchStatFile(URL, us, flags));
143           else if (strcmp(URL->scheme, SCHEME_FTP) == 0)
144                     return (fetchStatFTP(URL, us, flags));
145           else if (strcmp(URL->scheme, SCHEME_HTTP) == 0)
146                     return (fetchStatHTTP(URL, us, flags));
147           else if (strcmp(URL->scheme, SCHEME_HTTPS) == 0)
148                     return (fetchStatHTTP(URL, us, flags));
149           url_seterr(URL_BAD_SCHEME);
150           return (-1);
151 }
152 
153 /*
154  * Select the appropriate protocol for the URL scheme, and return a
155  * list of files in the directory pointed to by the URL.
156  */
157 struct url_ent *
fetchList(struct url * URL,const char * flags)158 fetchList(struct url *URL, const char *flags)
159 {
160 
161           if (strcmp(URL->scheme, SCHEME_FILE) == 0)
162                     return (fetchListFile(URL, flags));
163           else if (strcmp(URL->scheme, SCHEME_FTP) == 0)
164                     return (fetchListFTP(URL, flags));
165           else if (strcmp(URL->scheme, SCHEME_HTTP) == 0)
166                     return (fetchListHTTP(URL, flags));
167           else if (strcmp(URL->scheme, SCHEME_HTTPS) == 0)
168                     return (fetchListHTTP(URL, flags));
169           url_seterr(URL_BAD_SCHEME);
170           return (NULL);
171 }
172 
173 /*
174  * Attempt to parse the given URL; if successful, call fetchXGet().
175  */
176 FILE *
fetchXGetURL(const char * URL,struct url_stat * us,const char * flags)177 fetchXGetURL(const char *URL, struct url_stat *us, const char *flags)
178 {
179           struct url *u;
180           FILE *f;
181 
182           if ((u = fetchParseURL(URL)) == NULL)
183                     return (NULL);
184 
185           f = fetchXGet(u, us, flags);
186 
187           fetchFreeURL(u);
188           return (f);
189 }
190 
191 /*
192  * Attempt to parse the given URL; if successful, call fetchGet().
193  */
194 FILE *
fetchGetURL(const char * URL,const char * flags)195 fetchGetURL(const char *URL, const char *flags)
196 {
197           return (fetchXGetURL(URL, NULL, flags));
198 }
199 
200 /*
201  * Attempt to parse the given URL; if successful, call fetchPut().
202  */
203 FILE *
fetchPutURL(const char * URL,const char * flags)204 fetchPutURL(const char *URL, const char *flags)
205 {
206           struct url *u;
207           FILE *f;
208 
209           if ((u = fetchParseURL(URL)) == NULL)
210                     return (NULL);
211 
212           f = fetchPut(u, flags);
213 
214           fetchFreeURL(u);
215           return (f);
216 }
217 
218 /*
219  * Attempt to parse the given URL; if successful, call fetchStat().
220  */
221 int
fetchStatURL(const char * URL,struct url_stat * us,const char * flags)222 fetchStatURL(const char *URL, struct url_stat *us, const char *flags)
223 {
224           struct url *u;
225           int s;
226 
227           if ((u = fetchParseURL(URL)) == NULL)
228                     return (-1);
229 
230           s = fetchStat(u, us, flags);
231 
232           fetchFreeURL(u);
233           return (s);
234 }
235 
236 /*
237  * Attempt to parse the given URL; if successful, call fetchList().
238  */
239 struct url_ent *
fetchListURL(const char * URL,const char * flags)240 fetchListURL(const char *URL, const char *flags)
241 {
242           struct url *u;
243           struct url_ent *ue;
244 
245           if ((u = fetchParseURL(URL)) == NULL)
246                     return (NULL);
247 
248           ue = fetchList(u, flags);
249 
250           fetchFreeURL(u);
251           return (ue);
252 }
253 
254 /*
255  * Make a URL
256  */
257 struct url *
fetchMakeURL(const char * scheme,const char * host,int port,const char * doc,const char * user,const char * pwd)258 fetchMakeURL(const char *scheme, const char *host, int port, const char *doc,
259     const char *user, const char *pwd)
260 {
261           struct url *u;
262 
263           if (!scheme || (!host && !doc)) {
264                     url_seterr(URL_MALFORMED);
265                     return (NULL);
266           }
267 
268           if (port < 0 || port > 65535) {
269                     url_seterr(URL_BAD_PORT);
270                     return (NULL);
271           }
272 
273           /* allocate struct url */
274           if ((u = calloc(1, sizeof(*u))) == NULL) {
275                     fetch_syserr();
276                     return (NULL);
277           }
278           u->netrcfd = -1;
279 
280           if ((u->doc = strdup(doc ? doc : "/")) == NULL) {
281                     fetch_syserr();
282                     free(u);
283                     return (NULL);
284           }
285 
286 #define seturl(x) snprintf(u->x, sizeof(u->x), "%s", x)
287           seturl(scheme);
288           seturl(host);
289           seturl(user);
290           seturl(pwd);
291 #undef seturl
292           u->port = port;
293 
294           return (u);
295 }
296 
297 /*
298  * Return value of the given hex digit.
299  */
300 static int
fetch_hexval(char ch)301 fetch_hexval(char ch)
302 {
303 
304           if (ch >= '0' && ch <= '9')
305                     return (ch - '0');
306           else if (ch >= 'a' && ch <= 'f')
307                     return (ch - 'a' + 10);
308           else if (ch >= 'A' && ch <= 'F')
309                     return (ch - 'A' + 10);
310           return (-1);
311 }
312 
313 /*
314  * Decode percent-encoded URL component from src into dst, stopping at end
315  * of string, or at @ or : separators.  Returns a pointer to the unhandled
316  * part of the input string (null terminator, @, or :).  No terminator is
317  * written to dst (it is the caller's responsibility).
318  */
319 static const char *
fetch_pctdecode(char * dst,const char * src,size_t dlen)320 fetch_pctdecode(char *dst, const char *src, size_t dlen)
321 {
322           int d1, d2;
323           char c;
324           const char *s;
325 
326           for (s = src; *s != '\0' && *s != '@' && *s != ':'; s++) {
327                     if (s[0] == '%' && (d1 = fetch_hexval(s[1])) >= 0 &&
328                         (d2 = fetch_hexval(s[2])) >= 0 && (d1 > 0 || d2 > 0)) {
329                               c = d1 << 4 | d2;
330                               s += 2;
331                     } else if (s[0] == '%') {
332                               /* Invalid escape sequence. */
333                               return (NULL);
334                     } else {
335                               c = *s;
336                     }
337                     if (dlen-- > 0)
338                               *dst++ = c;
339                     else
340                               return (NULL);
341           }
342           return (s);
343 }
344 
345 /*
346  * Split an URL into components. URL syntax is:
347  * [method:/][/[user[:pwd]@]host[:port]/][document]
348  * This almost, but not quite, RFC1738 URL syntax.
349  */
350 struct url *
fetchParseURL(const char * URL)351 fetchParseURL(const char *URL)
352 {
353           char *doc;
354           const char *p, *q;
355           struct url *u;
356           int i, n;
357 
358           /* allocate struct url */
359           if ((u = calloc(1, sizeof(*u))) == NULL) {
360                     fetch_syserr();
361                     return (NULL);
362           }
363           u->netrcfd = -1;
364 
365           /* scheme name */
366           if ((p = strstr(URL, ":/"))) {
367                 if (p - URL > URL_SCHEMELEN)
368                         goto ouch;
369                 for (i = 0; URL + i < p; i++)
370                         u->scheme[i] = tolower((unsigned char)URL[i]);
371                     URL = ++p;
372                     /*
373                      * Only one slash: no host, leave slash as part of document
374                      * Two slashes: host follows, strip slashes
375                      */
376                     if (URL[1] == '/')
377                               URL = (p += 2);
378           } else {
379                     p = URL;
380           }
381           if (!*URL || *URL == '/' || *URL == '.' ||
382               (u->scheme[0] == '\0' &&
383                     strchr(URL, '/') == NULL && strchr(URL, ':') == NULL))
384                     goto nohost;
385 
386           p = strpbrk(URL, "/@");
387           if (p && *p == '@') {
388                     /* username */
389                     q = fetch_pctdecode(u->user, URL, URL_USERLEN);
390                     if (q == NULL)
391                               goto ouch;
392 
393                     /* password */
394                     if (*q == ':') {
395                               q = fetch_pctdecode(u->pwd, q + 1, URL_PWDLEN);
396                               if (q == NULL)
397                                         goto ouch;
398                     }
399                     p++;
400           } else {
401                     p = URL;
402           }
403 
404           /* hostname */
405           if (*p == '[') {
406                     q = p + 1 + strspn(p + 1, ":0123456789ABCDEFabcdef");
407                     if (*q++ != ']')
408                               goto ouch;
409           } else {
410                     /* valid characters in a DNS name */
411                     q = p + strspn(p, "-." "0123456789"
412                         "ABCDEFGHIJKLMNOPQRSTUVWXYZ" "_"
413                         "abcdefghijklmnopqrstuvwxyz");
414           }
415           if ((*q != '\0' && *q != '/' && *q != ':') || q - p > MAXHOSTNAMELEN)
416                     goto ouch;
417           for (i = 0; p + i < q; i++)
418                     u->host[i] = tolower((unsigned char)p[i]);
419           u->host[i] = '\0';
420           p = q;
421 
422           /* port */
423           if (*p == ':') {
424                     for (n = 0, q = ++p; *q && (*q != '/'); q++) {
425                               if (*q >= '0' && *q <= '9' && n < INT_MAX / 10) {
426                                         n = n * 10 + (*q - '0');
427                               } else {
428                                         /* invalid port */
429                                         url_seterr(URL_BAD_PORT);
430                                         goto ouch;
431                               }
432                     }
433                     if (n < 1 || n > IPPORT_MAX)
434                               goto ouch;
435                     u->port = n;
436                     p = q;
437           }
438 
439 nohost:
440           /* document */
441           if (!*p)
442                     p = "/";
443 
444           if (strcmp(u->scheme, SCHEME_HTTP) == 0 ||
445               strcmp(u->scheme, SCHEME_HTTPS) == 0) {
446                     const char hexnums[] = "0123456789abcdef";
447 
448                     /* percent-escape whitespace. */
449                     if ((doc = malloc(strlen(p) * 3 + 1)) == NULL) {
450                               fetch_syserr();
451                               goto ouch;
452                     }
453                     u->doc = doc;
454                     /* fragments are reserved for client-side processing, see
455                      * https://www.rfc-editor.org/rfc/rfc9110.html#section-7.1
456                      */
457                     while (*p != '\0' && *p != '#') {
458                               if (!isspace((unsigned char)*p)) {
459                                         *doc++ = *p++;
460                               } else {
461                                         *doc++ = '%';
462                                         *doc++ = hexnums[((unsigned int)*p) >> 4];
463                                         *doc++ = hexnums[((unsigned int)*p) & 0xf];
464                                         p++;
465                               }
466                     }
467                     *doc = '\0';
468           } else if ((u->doc = strdup(p)) == NULL) {
469                     fetch_syserr();
470                     goto ouch;
471           }
472 
473           DEBUGF("scheme:   \"%s\"\n"
474               "user:     \"%s\"\n"
475               "password: \"%s\"\n"
476               "host:     \"%s\"\n"
477               "port:     \"%d\"\n"
478               "document: \"%s\"\n",
479               u->scheme, u->user, u->pwd,
480               u->host, u->port, u->doc);
481 
482           return (u);
483 
484 ouch:
485           free(u);
486           return (NULL);
487 }
488 
489 /*
490  * Free a URL
491  */
492 void
fetchFreeURL(struct url * u)493 fetchFreeURL(struct url *u)
494 {
495           free(u->doc);
496           free(u);
497 }
498