1 /*
2  * $LynxId: HTAccess.c,v 1.78 2013/04/30 22:59:03 tom Exp $
3  *
4  *		Access Manager					HTAccess.c
5  *		==============
6  *
7  *  Authors
8  *	TBL	Tim Berners-Lee timbl@info.cern.ch
9  *	JFG	Jean-Francois Groff jfg@dxcern.cern.ch
10  *	DD	Denis DeLaRoca (310) 825-4580  <CSP1DWD@mvs.oac.ucla.edu>
11  *	FM	Foteos Macrides macrides@sci.wfeb.edu
12  *	PDM	Danny Mayer mayer@ljo.dec.com
13  *
14  *  History
15  *	 8 Jun 92 Telnet hopping prohibited as telnet is not secure TBL
16  *	26 Jun 92 When over DECnet, suppressed FTP, Gopher and News. JFG
17  *	 6 Oct 92 Moved HTClientHost and logfile into here. TBL
18  *	17 Dec 92 Tn3270 added, bug fix. DD
19  *	 4 Feb 93 Access registration, Search escapes bad chars TBL
20  *		  PARAMETERS TO HTSEARCH AND HTLOADRELATIVE CHANGED
21  *	28 May 93 WAIS gateway explicit if no WAIS library linked in.
22  *	31 May 94 Added DIRECT_WAIS support for VMS. FM
23  *	27 Jan 95 Fixed proxy support to use NNTPSERVER for checking
24  *		  whether or not to use the proxy server. PDM
25  *	27 Jan 95 Ensured that proxy service will be overridden for files
26  *		  on the local host (because HTLoadFile() doesn't try ftp
27  *		  for those) and will substitute ftp for remote files. FM
28  *	28 Jan 95 Tweaked PDM's proxy override mods to handle port info
29  *		  for news and wais URLs. FM
30  *
31  *  Bugs
32  *	This module assumes that that the graphic object is hypertext, as it
33  *	needs to select it when it has been loaded.  A superclass needs to be
34  *	defined which accepts select and select_anchor.
35  */
36 
37 #ifdef VMS
38 #define DIRECT_WAIS
39 #endif /* VMS */
40 
41 #include <HTUtils.h>
42 #include <HTTP.h>
43 #include <HTAlert.h>
44 /*
45  *  Implements:
46  */
47 #include <HTAccess.h>
48 
49 /*
50  *  Uses:
51  */
52 #include <HTParse.h>
53 #include <HTML.h>		/* SCW */
54 
55 #ifndef NO_RULES
56 #include <HTRules.h>
57 #endif
58 
59 #include <HTList.h>
60 #include <HText.h>		/* See bugs above */
61 #include <HTCJK.h>
62 #include <UCMap.h>
63 #include <GridText.h>
64 
65 #include <LYGlobalDefs.h>
66 #include <LYexit.h>
67 #include <LYStrings.h>
68 #include <LYUtils.h>
69 #include <LYLeaks.h>
70 
71 /*
72  *  These flags may be set to modify the operation of this module
73  */
74 char *HTClientHost = NULL;	/* Name of remote login host if any */
75 FILE *HTlogfile = NULL;		/* File to which to output one-liners */
76 BOOL HTSecure = NO;		/* Disable access for telnet users? */
77 BOOL HTPermitRedir = NO;	/* Always allow redirection in getfile()? */
78 
79 BOOL using_proxy = NO;		/* are we using a proxy gateway? */
80 
81 /*
82  *  To generate other things, play with these:
83  */
84 HTFormat HTOutputFormat = NULL;
85 HTStream *HTOutputStream = NULL;	/* For non-interactive, set this */
86 
87 static HTList *protocols = NULL;	/* List of registered protocol descriptors */
88 
89 char *use_this_url_instead = NULL;
90 
91 static int pushed_assume_LYhndl = -1;	/* see LYUC* functions below - kw */
92 static char *pushed_assume_MIMEname = NULL;
93 
94 #ifdef LY_FIND_LEAKS
free_protocols(void)95 static void free_protocols(void)
96 {
97     HTList_delete(protocols);
98     protocols = NULL;
99     FREE(pushed_assume_MIMEname);	/* shouldn't happen, just in case - kw */
100 }
101 #endif /* LY_FIND_LEAKS */
102 
103 /*	Register a Protocol.				HTRegisterProtocol()
104  *	--------------------
105  */
HTRegisterProtocol(HTProtocol * protocol)106 BOOL HTRegisterProtocol(HTProtocol * protocol)
107 {
108     if (!protocols) {
109 	protocols = HTList_new();
110 #ifdef LY_FIND_LEAKS
111 	atexit(free_protocols);
112 #endif
113     }
114     HTList_addObject(protocols, protocol);
115     return YES;
116 }
117 
118 /*	Register all known protocols.			HTAccessInit()
119  *	-----------------------------
120  *
121  *	Add to or subtract from this list if you add or remove protocol
122  *	modules.  This routine is called the first time the protocol list
123  *	is needed, unless any protocols are already registered, in which
124  *	case it is not called.	Therefore the application can override
125  *	this list.
126  *
127  *	Compiling with NO_INIT prevents all known protocols from being
128  *	forced in at link time.
129  */
130 #ifndef NO_INIT
131 #ifdef GLOBALREF_IS_MACRO
132 extern GLOBALREF (HTProtocol, HTTP);
133 extern GLOBALREF (HTProtocol, HTTPS);
134 extern GLOBALREF (HTProtocol, HTFile);
135 extern GLOBALREF (HTProtocol, HTTelnet);
136 extern GLOBALREF (HTProtocol, HTTn3270);
137 extern GLOBALREF (HTProtocol, HTRlogin);
138 
139 #ifndef DECNET
140 #ifndef DISABLE_FTP
141 extern GLOBALREF (HTProtocol, HTFTP);
142 #endif /* DISABLE_FTP */
143 #ifndef DISABLE_NEWS
144 extern GLOBALREF (HTProtocol, HTNews);
145 extern GLOBALREF (HTProtocol, HTNNTP);
146 extern GLOBALREF (HTProtocol, HTNewsPost);
147 extern GLOBALREF (HTProtocol, HTNewsReply);
148 extern GLOBALREF (HTProtocol, HTSNews);
149 extern GLOBALREF (HTProtocol, HTSNewsPost);
150 extern GLOBALREF (HTProtocol, HTSNewsReply);
151 #endif /* not DISABLE_NEWS */
152 #ifndef DISABLE_GOPHER
153 extern GLOBALREF (HTProtocol, HTGopher);
154 extern GLOBALREF (HTProtocol, HTCSO);
155 #endif /* not DISABLE_GOPHER */
156 #ifndef DISABLE_FINGER
157 extern GLOBALREF (HTProtocol, HTFinger);
158 #endif /* not DISABLE_FINGER */
159 #ifdef DIRECT_WAIS
160 extern GLOBALREF (HTProtocol, HTWAIS);
161 #endif /* DIRECT_WAIS */
162 #endif /* !DECNET */
163 #else
164 GLOBALREF HTProtocol HTTP, HTTPS, HTFile, HTTelnet, HTTn3270, HTRlogin;
165 
166 #ifndef DECNET
167 #ifndef DISABLE_FTP
168 GLOBALREF HTProtocol HTFTP;
169 #endif /* DISABLE_FTP */
170 #ifndef DISABLE_NEWS
171 GLOBALREF HTProtocol HTNews, HTNNTP, HTNewsPost, HTNewsReply;
172 GLOBALREF HTProtocol HTSNews, HTSNewsPost, HTSNewsReply;
173 #endif /* not DISABLE_NEWS */
174 #ifndef DISABLE_GOPHER
175 GLOBALREF HTProtocol HTGopher, HTCSO;
176 #endif /* not DISABLE_GOPHER */
177 #ifndef DISABLE_FINGER
178 GLOBALREF HTProtocol HTFinger;
179 #endif /* not DISABLE_FINGER */
180 #ifdef DIRECT_WAIS
181 GLOBALREF HTProtocol HTWAIS;
182 #endif /* DIRECT_WAIS */
183 #endif /* !DECNET */
184 #endif /* GLOBALREF_IS_MACRO */
185 
HTAccessInit(void)186 static void HTAccessInit(void)	/* Call me once */
187 {
188     HTRegisterProtocol(&HTTP);
189     HTRegisterProtocol(&HTTPS);
190     HTRegisterProtocol(&HTFile);
191     HTRegisterProtocol(&HTTelnet);
192     HTRegisterProtocol(&HTTn3270);
193     HTRegisterProtocol(&HTRlogin);
194 #ifndef DECNET
195 #ifndef DISABLE_FTP
196     HTRegisterProtocol(&HTFTP);
197 #endif /* DISABLE_FTP */
198 #ifndef DISABLE_NEWS
199     HTRegisterProtocol(&HTNews);
200     HTRegisterProtocol(&HTNNTP);
201     HTRegisterProtocol(&HTNewsPost);
202     HTRegisterProtocol(&HTNewsReply);
203     HTRegisterProtocol(&HTSNews);
204     HTRegisterProtocol(&HTSNewsPost);
205     HTRegisterProtocol(&HTSNewsReply);
206 #endif /* not DISABLE_NEWS */
207 #ifndef DISABLE_GOPHER
208     HTRegisterProtocol(&HTGopher);
209     HTRegisterProtocol(&HTCSO);
210 #endif /* not DISABLE_GOPHER */
211 #ifndef DISABLE_FINGER
212     HTRegisterProtocol(&HTFinger);
213 #endif /* not DISABLE_FINGER */
214 #ifdef DIRECT_WAIS
215     HTRegisterProtocol(&HTWAIS);
216 #endif /* DIRECT_WAIS */
217 #endif /* !DECNET */
218     LYRegisterLynxProtocols();
219 }
220 #endif /* !NO_INIT */
221 
222 /*	Check for proxy override.			override_proxy()
223  *	-------------------------
224  *
225  *	Check the no_proxy environment variable to get the list
226  *	of hosts for which proxy server is not consulted.
227  *
228  *	no_proxy is a comma- or space-separated list of machine
229  *	or domain names, with optional :port part.  If no :port
230  *	part is present, it applies to all ports on that domain.
231  *
232  *  Example:
233  *	    no_proxy="cern.ch,some.domain:8001"
234  *
235  *  Use "*" to override all proxy service:
236  *	     no_proxy="*"
237  */
override_proxy(const char * addr)238 BOOL override_proxy(const char *addr)
239 {
240     const char *no_proxy = getenv("no_proxy");
241     char *p = NULL;
242     char *at = NULL;
243     char *host = NULL;
244     char *Host = NULL;
245     char *acc_method = NULL;
246     int port = 0;
247     int h_len = 0;
248 
249     /*
250      * Check for global override.
251      */
252     if (no_proxy) {
253 	if (!strcmp(no_proxy, "*"))
254 	    return YES;
255     }
256 
257     /*
258      * Never proxy file:// URLs if they are on the local host.  HTLoadFile()
259      * will not attempt ftp for those if direct access fails.  We'll check that
260      * first, in case no_proxy hasn't been defined.  - FM
261      */
262     if (!addr)
263 	return NO;
264     if (!(host = HTParse(addr, "", PARSE_HOST)))
265 	return NO;
266     if (!*host) {
267 	FREE(host);
268 	return NO;
269     }
270     Host = (((at = strchr(host, '@')) != NULL) ? (at + 1) : host);
271 
272     if ((acc_method = HTParse(addr, "", PARSE_ACCESS))) {
273 	if (!strcmp("file", acc_method) &&
274 	    (LYSameHostname(Host, "localhost") ||
275 	     LYSameHostname(Host, HTHostName()))) {
276 	    FREE(host);
277 	    FREE(acc_method);
278 	    return YES;
279 	}
280 	FREE(acc_method);
281     }
282 
283     if (!no_proxy) {
284 	FREE(host);
285 	return NO;
286     }
287 
288     if (NULL != (p = HTParsePort(Host, &port))) {	/* Port specified */
289 	*p = 0;			/* Chop off port */
290     } else {			/* Use default port */
291 	acc_method = HTParse(addr, "", PARSE_ACCESS);
292 	if (acc_method != NULL) {
293 	    /* *INDENT-OFF* */
294 	    if	    (!strcmp(acc_method, "http"))	port = 80;
295 	    else if (!strcmp(acc_method, "https"))	port = 443;
296 	    else if (!strcmp(acc_method, "ftp"))	port = 21;
297 #ifndef DISABLE_GOPHER
298 	    else if (!strcmp(acc_method, "gopher"))	port = 70;
299 #endif
300 	    else if (!strcmp(acc_method, "cso"))	port = 105;
301 #ifndef DISABLE_NEWS
302 	    else if (!strcmp(acc_method, "news"))	port = 119;
303 	    else if (!strcmp(acc_method, "nntp"))	port = 119;
304 	    else if (!strcmp(acc_method, "newspost"))	port = 119;
305 	    else if (!strcmp(acc_method, "newsreply"))	port = 119;
306 	    else if (!strcmp(acc_method, "snews"))	port = 563;
307 	    else if (!strcmp(acc_method, "snewspost"))	port = 563;
308 	    else if (!strcmp(acc_method, "snewsreply")) port = 563;
309 #endif
310 	    else if (!strcmp(acc_method, "wais"))	port = 210;
311 #ifndef DISABLE_FINGER
312 	    else if (!strcmp(acc_method, "finger"))	port = 79;
313 #endif
314 	    else if (!strcmp(acc_method, "telnet"))	port = 23;
315 	    else if (!strcmp(acc_method, "tn3270"))	port = 23;
316 	    else if (!strcmp(acc_method, "rlogin"))	port = 513;
317 	    /* *INDENT-ON* */
318 
319 	    FREE(acc_method);
320 	}
321     }
322     if (!port)
323 	port = 80;		/* Default */
324     h_len = (int) strlen(Host);
325 
326     while (*no_proxy) {
327 	const char *end;
328 	const char *colon = NULL;
329 	int templ_port = 0;
330 	int t_len;
331 	int brackets = 0;
332 
333 	while (*no_proxy && (WHITE(*no_proxy) || *no_proxy == ','))
334 	    no_proxy++;		/* Skip whitespace and separators */
335 
336 	end = no_proxy;
337 	while (*end && !WHITE(*end) && *end != ',') {	/* Find separator */
338 	    if (!brackets && (*end == ':'))
339 		colon = end;	/* Port number given */
340 	    else if (*end == '[')
341 		++brackets;
342 	    else if (*end == ']')
343 		--brackets;
344 	    end++;
345 	}
346 
347 	if (colon) {
348 	    /* unlike HTParsePort(), this may be followed by non-digits */
349 	    templ_port = atoi(colon + 1);
350 	    t_len = (int) (colon - no_proxy);
351 	} else {
352 	    t_len = (int) (end - no_proxy);
353 	}
354 
355 	if ((!templ_port || templ_port == port) &&
356 	    (t_len > 0 && t_len <= h_len &&
357 	     !strncasecomp(Host + h_len - t_len, no_proxy, t_len))) {
358 	    FREE(host);
359 	    return YES;
360 	}
361 #ifdef CJK_EX			/* ASATAKU PROXY HACK */
362 	if ((!templ_port || templ_port == port) &&
363 	    (t_len > 0 && t_len <= h_len &&
364 	     isdigit(UCH(*no_proxy)) &&
365 	     !StrNCmp(host, no_proxy, t_len))) {
366 	    FREE(host);
367 	    return YES;
368 	}
369 #endif /* ASATAKU PROXY HACK */
370 
371 	if (*end)
372 	    no_proxy = (end + 1);
373 	else
374 	    break;
375     }
376 
377     FREE(host);
378     return NO;
379 }
380 
381 /*	Find physical name and access protocol		get_physical()
382  *	--------------------------------------
383  *
384  *  On entry,
385  *	addr		must point to the fully qualified hypertext reference.
386  *	anchor		a parent anchor with whose address is addr
387  *
388  *  On exit,
389  *	returns		HT_NO_ACCESS		Error has occurred.
390  *			HT_OK			Success
391  */
get_physical(const char * addr,HTParentAnchor * anchor)392 static int get_physical(const char *addr,
393 			HTParentAnchor *anchor)
394 {
395     int result;
396     char *acc_method = NULL;	/* Name of access method */
397     char *physical = NULL;
398     char *Server_addr = NULL;
399     BOOL override_flag = NO;
400 
401     CTRACE((tfp, "get_physical %s\n", addr));
402 
403     /*
404      * Make sure the using_proxy variable is FALSE.
405      */
406     using_proxy = NO;
407 
408 #ifndef NO_RULES
409     if ((physical = HTTranslate(addr)) == 0) {
410 	if (redirecting_url) {
411 	    return HT_REDIRECTING;
412 	}
413 	return HT_FORBIDDEN;
414     }
415     if (anchor->isISMAPScript == TRUE) {
416 	StrAllocCat(physical, "?0,0");
417 	CTRACE((tfp, "HTAccess: Appending '?0,0' coordinate pair.\n"));
418     }
419     if (!StrNCmp(physical, "Proxied=", 8)) {
420 	HTAnchor_setPhysical(anchor, physical + 8);
421 	using_proxy = YES;
422     } else if (!StrNCmp(physical, "NoProxy=", 8)) {
423 	HTAnchor_setPhysical(anchor, physical + 8);
424 	override_flag = YES;
425     } else {
426 	HTAnchor_setPhysical(anchor, physical);
427     }
428     FREE(physical);		/* free our copy */
429 #else
430     if (anchor->isISMAPScript == TRUE) {
431 	StrAllocCopy(physical, addr);
432 	StrAllocCat(physical, "?0,0");
433 	CTRACE((tfp, "HTAccess: Appending '?0,0' coordinate pair.\n"));
434 	HTAnchor_setPhysical(anchor, physical);
435 	FREE(physical);		/* free our copy */
436     } else {
437 	HTAnchor_setPhysical(anchor, addr);
438     }
439 #endif /* NO_RULES */
440 
441     acc_method = HTParse(HTAnchor_physical(anchor), STR_FILE_URL, PARSE_ACCESS);
442 
443     /*
444      * Check whether gateway access has been set up for this.
445      *
446      * This function can be replaced by the rule system above.
447      *
448      * If the rule system has already determined that we should use a proxy, or
449      * that we shouldn't, ignore proxy-related settings, don't use no_proxy
450      * either.
451      */
452 #define USE_GATEWAYS
453 #ifdef USE_GATEWAYS
454 
455     if (!override_flag && !using_proxy) {	/* else ignore no_proxy env var */
456 	char *host = NULL;
457 	int port;
458 
459 	if (!strcasecomp(acc_method, "news")) {
460 	    /*
461 	     * News is different, so we need to check the name of the server,
462 	     * as well as the default port for selective exclusions.
463 	     */
464 	    if ((host = HTParse(addr, "", PARSE_HOST))) {
465 		if (HTParsePort(host, &port) == NULL) {
466 		    StrAllocCopy(Server_addr, "news://");
467 		    StrAllocCat(Server_addr, host);
468 		    StrAllocCat(Server_addr, ":119/");
469 		}
470 		FREE(host);
471 	    } else if (LYGetEnv("NNTPSERVER") != NULL) {
472 		StrAllocCopy(Server_addr, "news://");
473 		StrAllocCat(Server_addr, LYGetEnv("NNTPSERVER"));
474 		StrAllocCat(Server_addr, ":119/");
475 	    }
476 	} else if (!strcasecomp(acc_method, "wais")) {
477 	    /*
478 	     * Wais also needs checking of the default port for selective
479 	     * exclusions.
480 	     */
481 	    if ((host = HTParse(addr, "", PARSE_HOST))) {
482 		if (!(HTParsePort(host, &port))) {
483 		    StrAllocCopy(Server_addr, "wais://");
484 		    StrAllocCat(Server_addr, host);
485 		    StrAllocCat(Server_addr, ":210/");
486 		}
487 		FREE(host);
488 	    } else
489 		StrAllocCopy(Server_addr, addr);
490 	} else {
491 	    StrAllocCopy(Server_addr, addr);
492 	}
493 	override_flag = override_proxy(Server_addr);
494     }
495 
496     if (!override_flag && !using_proxy) {
497 	char *gateway_parameter = NULL, *gateway, *proxy;
498 
499 	/*
500 	 * Search for gateways.
501 	 */
502 	HTSprintf0(&gateway_parameter, "WWW_%s_GATEWAY", acc_method);
503 	gateway = LYGetEnv(gateway_parameter);	/* coerce for decstation */
504 
505 	/*
506 	 * Search for proxy servers.
507 	 */
508 	if (!strcmp(acc_method, "file"))
509 	    /*
510 	     * If we got to here, a file URL is for ftp on a remote host. - FM
511 	     */
512 	    strcpy(gateway_parameter, "ftp_proxy");
513 	else
514 	    sprintf(gateway_parameter, "%s_proxy", acc_method);
515 	proxy = LYGetEnv(gateway_parameter);
516 	FREE(gateway_parameter);
517 
518 	if (gateway)
519 	    CTRACE((tfp, "Gateway found: %s\n", gateway));
520 	if (proxy)
521 	    CTRACE((tfp, "proxy server found: %s\n", proxy));
522 
523 	/*
524 	 * Proxy servers have precedence over gateway servers.
525 	 */
526 	if (proxy) {
527 	    char *gatewayed = NULL;
528 
529 	    StrAllocCopy(gatewayed, proxy);
530 	    if (!StrNCmp(gatewayed, "http", 4)) {
531 		char *cp = strrchr(gatewayed, '/');
532 
533 		/* Append a slash to the proxy specification if it doesn't
534 		 * end in one but otherwise looks normal (starts with "http",
535 		 * has no '/' other than ones before the hostname). - kw */
536 		if (cp && (cp - gatewayed) <= 7)
537 		    LYAddHtmlSep(&gatewayed);
538 	    }
539 	    /*
540 	     * Ensure that the proxy server uses ftp for file URLs. - FM
541 	     */
542 	    if (!StrNCmp(addr, "file", 4)) {
543 		StrAllocCat(gatewayed, "ftp");
544 		StrAllocCat(gatewayed, (addr + 4));
545 	    } else
546 		StrAllocCat(gatewayed, addr);
547 	    using_proxy = YES;
548 	    if (anchor->isISMAPScript == TRUE)
549 		StrAllocCat(gatewayed, "?0,0");
550 	    HTAnchor_setPhysical(anchor, gatewayed);
551 	    FREE(gatewayed);
552 	    FREE(acc_method);
553 
554 	    acc_method = HTParse(HTAnchor_physical(anchor),
555 				 STR_HTTP_URL, PARSE_ACCESS);
556 
557 	} else if (gateway) {
558 	    char *path = HTParse(addr, "",
559 				 PARSE_HOST + PARSE_PATH + PARSE_PUNCTUATION);
560 
561 	    /* Chop leading / off to make host into part of path */
562 	    char *gatewayed = HTParse(path + 1, gateway, PARSE_ALL);
563 
564 	    FREE(path);
565 	    HTAnchor_setPhysical(anchor, gatewayed);
566 	    FREE(gatewayed);
567 	    FREE(acc_method);
568 
569 	    acc_method = HTParse(HTAnchor_physical(anchor),
570 				 STR_HTTP_URL, PARSE_ACCESS);
571 	}
572     }
573     FREE(Server_addr);
574 #endif /* use gateways */
575 
576     /*
577      * Search registered protocols to find suitable one.
578      */
579     result = HT_NO_ACCESS;
580     {
581 	int i, n;
582 
583 #ifndef NO_INIT
584 	if (!protocols)
585 	    HTAccessInit();
586 #endif
587 	n = HTList_count(protocols);
588 	for (i = 0; i < n; i++) {
589 	    HTProtocol *p = (HTProtocol *) HTList_objectAt(protocols, i);
590 
591 	    if (!strcmp(p->name, acc_method)) {
592 		HTAnchor_setProtocol(anchor, p);
593 		FREE(acc_method);
594 		result = HT_OK;
595 		break;
596 	    }
597 	}
598     }
599 
600     FREE(acc_method);
601     return result;
602 }
603 
604 /*
605  * Temporarily set the int UCLYhndl_for_unspec and string UCLYhndl_for_unspec
606  * used for charset "assuming" to the values implied by a HTParentAnchor's
607  * UCStages, after saving the current values for later restoration.  - kw @@@
608  * These functions may not really belong here, but where else?  I want the
609  * "pop" to occur as soon as possible after loading has finished.  - kw @@@
610  */
LYUCPushAssumed(HTParentAnchor * anchor)611 void LYUCPushAssumed(HTParentAnchor *anchor)
612 {
613     int anchor_LYhndl = -1;
614     LYUCcharset *anchor_UCI = NULL;
615 
616     if (anchor) {
617 	anchor_LYhndl = HTAnchor_getUCLYhndl(anchor, UCT_STAGE_PARSER);
618 	if (anchor_LYhndl >= 0)
619 	    anchor_UCI = HTAnchor_getUCInfoStage(anchor,
620 						 UCT_STAGE_PARSER);
621 	if (anchor_UCI && anchor_UCI->MIMEname) {
622 	    pushed_assume_MIMEname = UCAssume_MIMEcharset;
623 	    UCAssume_MIMEcharset = NULL;
624 	    if (HTCJK == JAPANESE)
625 		StrAllocCopy(UCAssume_MIMEcharset, pushed_assume_MIMEname);
626 	    else
627 		StrAllocCopy(UCAssume_MIMEcharset, anchor_UCI->MIMEname);
628 	    pushed_assume_LYhndl = anchor_LYhndl;
629 	    /* some diagnostics */
630 	    if (UCLYhndl_for_unspec != anchor_LYhndl)
631 		CTRACE((tfp,
632 			"LYUCPushAssumed: UCLYhndl_for_unspec changed %d -> %d\n",
633 			UCLYhndl_for_unspec,
634 			anchor_LYhndl));
635 	    UCLYhndl_for_unspec = anchor_LYhndl;
636 	    return;
637 	}
638     }
639     pushed_assume_LYhndl = -1;
640     FREE(pushed_assume_MIMEname);
641 }
642 
643 /*
644  * Restore the int UCLYhndl_for_unspec and string UCLYhndl_for_unspec used for
645  * charset "assuming" from the values saved by LYUCPushAssumed, if any.  - kw
646  */
LYUCPopAssumed(void)647 int LYUCPopAssumed(void)
648 {
649     if (pushed_assume_LYhndl >= 0) {
650 	/* some diagnostics */
651 	if (UCLYhndl_for_unspec != pushed_assume_LYhndl)
652 	    CTRACE((tfp,
653 		    "LYUCPopAssumed: UCLYhndl_for_unspec changed %d -> %d\n",
654 		    UCLYhndl_for_unspec,
655 		    pushed_assume_LYhndl));
656 	UCLYhndl_for_unspec = pushed_assume_LYhndl;
657 	pushed_assume_LYhndl = -1;
658 	FREE(UCAssume_MIMEcharset);
659 	UCAssume_MIMEcharset = pushed_assume_MIMEname;
660 	pushed_assume_MIMEname = NULL;
661 	return UCLYhndl_for_unspec;
662     }
663     return -1;
664 }
665 
666 /*	Load a document					HTLoad()
667  *	---------------
668  *
669  *	This is an internal routine, which has an address AND a matching
670  *	anchor.  (The public routines are called with one OR the other.)
671  *
672  *  On entry,
673  *	addr		must point to the fully qualified hypertext reference.
674  *	anchor		a parent anchor with whose address is addr
675  *
676  *  On exit,
677  *	returns		<0		Error has occurred.
678  *			HT_LOADED	Success
679  *			HT_NO_DATA	Success, but no document loaded.
680  *					(telnet session started etc)
681  */
HTLoad(const char * addr,HTParentAnchor * anchor,HTFormat format_out,HTStream * sink)682 static int HTLoad(const char *addr,
683 		  HTParentAnchor *anchor,
684 		  HTFormat format_out,
685 		  HTStream *sink)
686 {
687     HTProtocol *p;
688     int status = get_physical(addr, anchor);
689 
690     if (reloading) {
691 	FREE(anchor->charset);
692 	FREE(anchor->UCStages);
693     }
694 
695     if (status == HT_FORBIDDEN) {
696 	/* prevent crash if telnet or similar was forbidden by rule. - kw */
697 	LYFixCursesOn("show alert:");
698 	status = HTLoadError(sink, 500, gettext("Access forbidden by rule"));
699     } else if (status == HT_REDIRECTING) {
700 	;			/* fake redirection by rule, to redirecting_url */
701     } else if (status >= 0) {
702 	/* prevent crash if telnet or similar mapped or proxied by rule. - kw */
703 	LYFixCursesOnForAccess(addr, HTAnchor_physical(anchor));
704 	p = (HTProtocol *) HTAnchor_protocol(anchor);
705 	anchor->parent->underway = TRUE;	/* Hack to deal with caching */
706 	status = p->load(HTAnchor_physical(anchor),
707 			 anchor, format_out, sink);
708 	anchor->parent->underway = FALSE;
709 	LYUCPopAssumed();
710     }
711     return status;
712 }
713 
714 /*	Get a save stream for a document		HTSaveStream()
715  *	--------------------------------
716  */
HTSaveStream(HTParentAnchor * anchor)717 HTStream *HTSaveStream(HTParentAnchor *anchor)
718 {
719     HTProtocol *p = (HTProtocol *) HTAnchor_protocol(anchor);
720 
721     if (!p)
722 	return NULL;
723 
724     return p->saveStream(anchor);
725 }
726 
727 int redirection_attempts = 0;	/* counter in HTLoadDocument */
728 
729 /*	Load a document - with logging etc		HTLoadDocument()
730  *	----------------------------------
731  *
732  *	- Checks or documents already loaded
733  *	- Logs the access
734  *	- Allows stdin filter option
735  *	- Trace output and error messages
736  *
737  *  On Entry,
738  *	  anchor	    is the node_anchor for the document
739  *	  full_address	    The address of the document to be accessed.
740  *	  filter	    if YES, treat stdin as HTML
741  *
742  *  On Exit,
743  *	  returns    YES     Success in opening document
744  *		     NO      Failure
745  */
HTLoadDocument(const char * full_address,HTParentAnchor * anchor,HTFormat format_out,HTStream * sink)746 static BOOL HTLoadDocument(const char *full_address,	/* may include #fragment */
747 			   HTParentAnchor *anchor,
748 			   HTFormat format_out,
749 			   HTStream *sink)
750 {
751     int status;
752     HText *text;
753     const char *address_to_load = full_address;
754     char *cp;
755     BOOL ForcingNoCache = LYforce_no_cache;
756 
757     CTRACE((tfp, "HTAccess: loading document %s\n", address_to_load));
758 
759     /*
760      * Free use_this_url_instead and reset permanent_redirection if not done
761      * elsewhere.  - FM
762      */
763     FREE(use_this_url_instead);
764     permanent_redirection = FALSE;
765 
766     /*
767      * Make sure some yoyo doesn't send us 'round in circles with redirecting
768      * URLs that point back to themselves.  We'll set the original Lynx limit
769      * of 10 redirections per requested URL from a user, because the HTTP/1.1
770      * will no longer specify a restriction to 5, but will leave it up to the
771      * browser's discretion, in deference to Microsoft.  - FM
772      */
773     if (redirection_attempts > 10) {
774 	redirection_attempts = 0;
775 	HTAlert(TOO_MANY_REDIRECTIONS);
776 	return NO;
777     }
778 
779     /*
780      * If this is marked as an internal link but we don't have the document
781      * loaded any more, and we haven't explicitly flagged that we want to
782      * reload with LYforce_no_cache, then something has disappeared from the
783      * cache when we expected it to be still there.  The user probably doesn't
784      * expect a new network access.  So if we have POST data and safe is not
785      * set in the anchor, ask for confirmation, and fail if not granted.  The
786      * exception are LYNXIMGMAP documents, for which we defer to LYLoadIMGmap
787      * for prompting if necessary.  - kw
788      */
789     text = (HText *) HTAnchor_document(anchor);
790     if (LYinternal_flag && !text && !LYforce_no_cache &&
791 	anchor->post_data && !anchor->safe &&
792 	!isLYNXIMGMAP(full_address) &&
793 	HTConfirm(gettext("Document with POST content not found in cache.  Resubmit?"))
794 	!= TRUE) {
795 	return NO;
796     }
797 
798     /*
799      * If we don't have POST content, check whether this is a previous
800      * redirecting URL, and keep re-checking until we get to the final
801      * destination or redirection limit.  If we do have POST content, we didn't
802      * allow permanent redirection, and an interactive user will be deciding
803      * whether to keep redirecting.  - FM
804      */
805     if (!anchor->post_data) {
806 	while ((cp = HTAnchor_physical(anchor)) != NULL &&
807 	       !StrNCmp(cp, "Location=", 9)) {
808 	    DocAddress NewDoc;
809 
810 	    CTRACE((tfp, "HTAccess: '%s' is a redirection URL.\n",
811 		    anchor->address));
812 	    CTRACE((tfp, "HTAccess: Redirecting to '%s'\n", cp + 9));
813 
814 	    /*
815 	     * Don't exceed the redirection_attempts limit.  - FM
816 	     */
817 	    if (++redirection_attempts > 10) {
818 		HTAlert(TOO_MANY_REDIRECTIONS);
819 		redirection_attempts = 0;
820 		FREE(use_this_url_instead);
821 		return NO;
822 	    }
823 
824 	    /*
825 	     * Set up the redirection. - FM
826 	     */
827 	    StrAllocCopy(use_this_url_instead, cp + 9);
828 	    NewDoc.address = use_this_url_instead;
829 	    NewDoc.post_data = NULL;
830 	    NewDoc.post_content_type = NULL;
831 	    NewDoc.bookmark = anchor->bookmark;
832 	    NewDoc.isHEAD = anchor->isHEAD;
833 	    NewDoc.safe = anchor->safe;
834 	    anchor = HTAnchor_findAddress(&NewDoc);
835 	}
836     }
837     /*
838      * If we had previous redirection, go back and check out that the URL under
839      * the current restrictions.  - FM
840      */
841     if (use_this_url_instead) {
842 	FREE(redirecting_url);
843 	return (NO);
844     }
845 
846     /*
847      * See if we can use an already loaded document.
848      */
849     text = (HText *) HTAnchor_document(anchor);
850     if (text && !LYforce_no_cache) {
851 	/*
852 	 * We have a cached rendition of the target document.  Check if it's OK
853 	 * to re-use it.  We consider it OK if:
854 	 * (1) the anchor does not have the no_cache element set, or
855 	 * (2) we've overridden it, e.g., because we are acting on a PREV_DOC
856 	 * command or a link in the History Page and it's not a reply from a
857 	 * POST with the LYresubmit_posts flag set, or
858 	 * (3) we are repositioning within the currently loaded document based
859 	 * on the target anchor's address (URL_Reference).
860 	 *
861 	 * If track_internal_links is false, HText_AreDifferent() is
862 	 * used to determine whether (3) applies.  If the target address
863 	 * differs from that of the current document only by a fragment and the
864 	 * target address has an appended fragment, repositioning without
865 	 * reloading is always assumed.  Note that HText_AreDifferent()
866 	 * currently always returns TRUE if the target has a LYNXIMGMAP URL, so
867 	 * that an internally generated pseudo-document will normally not be
868 	 * re-used unless condition (2) applies.  (Condition (1) cannot apply
869 	 * since in LYMap.c, no_cache is always set in the anchor object).
870 	 * This doesn't guarantee that the resource from which the MAP element
871 	 * is taken will be read again (reloaded) when the list of links for a
872 	 * client-side image map is regenerated, when in some cases it should
873 	 * (e.g., user requested RELOAD, or HTTP response with no-cache header
874 	 * and we are not overriding).
875 	 *
876 	 * If track_internal_links is true, a target address that
877 	 * points to the same URL as the current document may still result in
878 	 * reloading, depending on whether the original URL-Reference was given
879 	 * as an internal link in the context of the previously loaded
880 	 * document.  HText_AreDifferent() is not used here for testing whether
881 	 * we are just repositioning.  For an internal link, the potential
882 	 * callers of this function from mainloop() down will either avoid
883 	 * making the call (and do the repositioning differently) or set
884 	 * LYinternal_flag (or LYoverride_no_cache).  Note that (a) LYNXIMGMAP
885 	 * pseudo-documents and (b) The "List Page" document are treated
886 	 * logically as being part of the document on which they are based, for
887 	 * the purpose of whether to treat a link as internal, but the logic
888 	 * for this (by setting LYinternal_flag as necessary) is implemented
889 	 * elsewhere.  There is a specific test for LYNXIMGMAP here so that the
890 	 * generated pseudo-document will not be re-used unless
891 	 * LYoverride_no_cache is set.  The same caveat as above applies w.r.t.
892 	 * reloading of the underlying resource.
893 	 *
894 	 * We also should be checking other aspects of cache regulation (e.g.,
895 	 * based on an If-Modified-Since check, etc.) but the code for doing
896 	 * those other things isn't available yet.
897 	 */
898 	if ((reloading != REAL_RELOAD) &&
899 	    (LYoverride_no_cache ||
900 	     ((!track_internal_links &&
901 	       (!HText_hasNoCacheSet(text) ||
902 		!HText_AreDifferent(anchor, full_address))) ||
903 	      (track_internal_links &&
904 	       (((LYinternal_flag || !HText_hasNoCacheSet(text)) &&
905 		 !isLYNXIMGMAP(full_address))))))) {
906 	    CTRACE((tfp, "HTAccess: Document already in memory.\n"));
907 	    HText_select(text);
908 
909 #ifdef DIRED_SUPPORT
910 	    if (HTAnchor_format(anchor) == WWW_DIRED)
911 		lynx_edit_mode = TRUE;
912 #endif
913 	    redirection_attempts = 0;
914 	    return YES;
915 	} else {
916 	    ForcingNoCache = YES;
917 	    BStrFree(anchor->post_data);
918 	    CTRACE((tfp, "HTAccess: Auto-reloading document.\n"));
919 	}
920     }
921 
922     if (HText_HaveUserChangedForms(text)) {
923 	/*
924 	 * Issue a warning.  User forms content will be lost.
925 	 * Will not restore changed forms, currently.
926 	 */
927 	HTAlert(RELOADING_FORM);
928     }
929 
930     /*
931      * Get the document from the net.  If we are auto-reloading, the mutable
932      * anchor elements from the previous rendition should be freed in
933      * conjunction with loading of the new rendition.  - FM
934      */
935     LYforce_no_cache = NO;	/* reset after each time through */
936     if (ForcingNoCache) {
937 	FREE(anchor->title);	/* ??? */
938     }
939     status = HTLoad(address_to_load, anchor, format_out, sink);
940     CTRACE((tfp, "HTAccess:  status=%d\n", status));
941 
942     /*
943      * RECOVERY:  if the loading failed, and we had a cached HText copy, and no
944      * new HText created - use a previous copy, issue a warning.
945      */
946     if (text && status < 0 && (HText *) HTAnchor_document(anchor) == text) {
947 	HTAlert(gettext("Loading failed, use a previous copy."));
948 	CTRACE((tfp, "HTAccess: Loading failed, use a previous copy.\n"));
949 	HText_select(text);
950 
951 #ifdef DIRED_SUPPORT
952 	if (HTAnchor_format(anchor) == WWW_DIRED)
953 	    lynx_edit_mode = TRUE;
954 #endif
955 	redirection_attempts = 0;
956 	return YES;
957     }
958 
959     /*
960      * Log the access if necessary.
961      */
962     if (HTlogfile) {
963 	time_t theTime;
964 
965 	time(&theTime);
966 	fprintf(HTlogfile, "%24.24s %s %s %s\n",
967 		ctime(&theTime),
968 		HTClientHost ? HTClientHost : "local",
969 		status < 0 ? "FAIL" : "GET",
970 		full_address);
971 	fflush(HTlogfile);	/* Actually update it on disk */
972 	CTRACE((tfp, "Log: %24.24s %s %s %s\n",
973 		ctime(&theTime),
974 		HTClientHost ? HTClientHost : "local",
975 		status < 0 ? "FAIL" : "GET",
976 		full_address));
977     }
978 
979     /*
980      * Check out what we received from the net.
981      */
982     if (status == HT_REDIRECTING) {
983 	/* Exported from HTMIME.c, of all places.  */
984 	/* NO!! - FM */
985 	/*
986 	 * Doing this via HTMIME.c meant that the redirection cover page was
987 	 * already loaded before we learned that we want a different URL.
988 	 * Also, changing anchor->address, as Lynx was doing, meant we could
989 	 * never again access its hash table entry, creating an insolvable
990 	 * memory leak.  Instead, if we had a 301 status and set
991 	 * permanent_redirection, we'll load the new URL in anchor->physical,
992 	 * preceded by a token, which we can check to make replacements on
993 	 * subsequent access attempts.  We'll check recursively, and retrieve
994 	 * the final URL if we had multiple redirections to it.  If we just
995 	 * went to HTLoad now, as Lou originally had this, we couldn't do
996 	 * Lynx' security checks and alternate handling of some URL types.
997 	 * So, instead, we'll go all the way back to the top of getfile in
998 	 * LYGetFile.c when the status is HT_REDIRECTING.  This may seem
999 	 * bizarre, but it works like a charm!  - FM
1000 	 *
1001 	 * Actually, the location header for redirections is now again picked
1002 	 * up in HTMIME.c.  But that's an internal matter between HTTP.c and
1003 	 * HTMIME.c, is still under control of HTLoadHTTP for http URLs, is
1004 	 * done in a way that doesn't load the redirection response's body
1005 	 * (except when wanted as an error fallback), and thus need not concern
1006 	 * us here.  - kw 1999-12-02
1007 	 */
1008 	CTRACE((tfp, "HTAccess: '%s' is a redirection URL.\n",
1009 		address_to_load));
1010 	CTRACE((tfp, "HTAccess: Redirecting to '%s'\n",
1011 		redirecting_url));
1012 	/*
1013 	 * Prevent circular references.
1014 	 */
1015 	if (strcmp(address_to_load, redirecting_url)) {		/* if different */
1016 	    /*
1017 	     * Load token and redirecting url into anchor->physical if we had
1018 	     * 301 Permanent redirection.  HTTP.c does not allow this if we
1019 	     * have POST content.  - FM
1020 	     */
1021 	    if (permanent_redirection) {
1022 		StrAllocCopy(anchor->physical, "Location=");
1023 		StrAllocCat(anchor->physical, redirecting_url);
1024 	    }
1025 
1026 	    /*
1027 	     * Set up flags before return to getfile.  - FM
1028 	     */
1029 	    StrAllocCopy(use_this_url_instead, redirecting_url);
1030 	    if (ForcingNoCache)
1031 		LYforce_no_cache = YES;
1032 	    ++redirection_attempts;
1033 	    FREE(redirecting_url);
1034 	    permanent_redirection = FALSE;
1035 	    return (NO);
1036 	}
1037 	++redirection_attempts;
1038 	FREE(redirecting_url);
1039 	permanent_redirection = FALSE;
1040 	return (YES);
1041     }
1042 
1043     /*
1044      * We did not receive a redirecting URL.  - FM
1045      */
1046     redirection_attempts = 0;
1047     FREE(redirecting_url);
1048     permanent_redirection = FALSE;
1049 
1050     if (status == HT_LOADED) {
1051 	CTRACE((tfp, "HTAccess: `%s' has been accessed.\n",
1052 		full_address));
1053 	return YES;
1054     }
1055     if (status == HT_PARTIAL_CONTENT) {
1056 	HTAlert(gettext("Loading incomplete."));
1057 	CTRACE((tfp, "HTAccess: `%s' has been accessed, partial content.\n",
1058 		full_address));
1059 	return YES;
1060     }
1061 
1062     if (status == HT_NO_DATA) {
1063 	CTRACE((tfp, "HTAccess: `%s' has been accessed, No data left.\n",
1064 		full_address));
1065 	return NO;
1066     }
1067 
1068     if (status == HT_NOT_LOADED) {
1069 	CTRACE((tfp, "HTAccess: `%s' has been accessed, No data loaded.\n",
1070 		full_address));
1071 	return NO;
1072     }
1073 
1074     if (status == HT_INTERRUPTED) {
1075 	CTRACE((tfp,
1076 		"HTAccess: `%s' has been accessed, transfer interrupted.\n",
1077 		full_address));
1078 	return NO;
1079     }
1080 
1081     if (status > 0) {
1082 	/*
1083 	 * If you get this, then please find which routine is returning a
1084 	 * positive unrecognized error code!
1085 	 */
1086 	fprintf(stderr,
1087 		gettext("**** HTAccess: socket or file number returned by obsolete load routine!\n"));
1088 	fprintf(stderr,
1089 		gettext("**** HTAccess: Internal software error.  Please mail lynx-dev@nongnu.org!\n"));
1090 	fprintf(stderr, gettext("**** HTAccess: Status returned was: %d\n"), status);
1091 	exit_immediately(EXIT_FAILURE);
1092     }
1093 
1094     /* Failure in accessing a document */
1095     cp = NULL;
1096     StrAllocCopy(cp, gettext("Can't Access"));
1097     StrAllocCat(cp, " `");
1098     StrAllocCat(cp, full_address);
1099     StrAllocCat(cp, "'");
1100     _HTProgress(cp);
1101     FREE(cp);
1102 
1103     CTRACE((tfp, "HTAccess: Can't access `%s'\n", full_address));
1104     HTLoadError(sink, 500, gettext("Unable to access document."));
1105     return NO;
1106 }				/* HTLoadDocument */
1107 
1108 /*	Load a document from absolute name.		HTLoadAbsolute()
1109  *	-----------------------------------
1110  *
1111  *  On Entry,
1112  *	  addr	   The absolute address of the document to be accessed.
1113  *	  filter   if YES, treat document as HTML
1114  *
1115  *  On Exit,
1116  *	  returns    YES     Success in opening document
1117  *		     NO      Failure
1118  */
HTLoadAbsolute(const DocAddress * docaddr)1119 BOOL HTLoadAbsolute(const DocAddress *docaddr)
1120 {
1121     return HTLoadDocument(docaddr->address,
1122 			  HTAnchor_findAddress(docaddr),
1123 			  (HTOutputFormat ? HTOutputFormat : WWW_PRESENT),
1124 			  HTOutputStream);
1125 }
1126 
1127 #ifdef NOT_USED_CODE
1128 /*	Load a document from absolute name to stream.	HTLoadToStream()
1129  *	---------------------------------------------
1130  *
1131  *  On Entry,
1132  *	  addr	   The absolute address of the document to be accessed.
1133  *	  sink	   if non-NULL, send data down this stream
1134  *
1135  *  On Exit,
1136  *	  returns    YES     Success in opening document
1137  *		     NO      Failure
1138  */
HTLoadToStream(const char * addr,BOOL filter,HTStream * sink)1139 BOOL HTLoadToStream(const char *addr,
1140 		    BOOL filter,
1141 		    HTStream *sink)
1142 {
1143     return HTLoadDocument(addr,
1144 			  HTAnchor_findSimpleAddress(addr),
1145 			  (HTOutputFormat ? HTOutputFormat : WWW_PRESENT),
1146 			  sink);
1147 }
1148 #endif /* NOT_USED_CODE */
1149 
1150 /*	Load a document from relative name.		HTLoadRelative()
1151  *	-----------------------------------
1152  *
1153  *  On Entry,
1154  *	  relative_name     The relative address of the document
1155  *			    to be accessed.
1156  *
1157  *  On Exit,
1158  *	  returns    YES     Success in opening document
1159  *		     NO      Failure
1160  */
HTLoadRelative(const char * relative_name,HTParentAnchor * here)1161 BOOL HTLoadRelative(const char *relative_name,
1162 		    HTParentAnchor *here)
1163 {
1164     DocAddress full_address;
1165     BOOL result;
1166     char *mycopy = NULL;
1167     char *stripped = NULL;
1168 
1169     full_address.address = NULL;
1170     full_address.post_data = NULL;
1171     full_address.post_content_type = NULL;
1172     full_address.bookmark = NULL;
1173     full_address.isHEAD = FALSE;
1174     full_address.safe = FALSE;
1175 
1176     StrAllocCopy(mycopy, relative_name);
1177 
1178     stripped = HTStrip(mycopy);
1179     full_address.address =
1180 	HTParse(stripped,
1181 		here->address,
1182 		PARSE_ALL_WITHOUT_ANCHOR);
1183     result = HTLoadAbsolute(&full_address);
1184     /*
1185      * If we got redirection, result will be NO, but use_this_url_instead will
1186      * be set.  The calling routine should check both and do whatever is
1187      * appropriate.  - FM
1188      */
1189     FREE(full_address.address);
1190     FREE(mycopy);		/* Memory leak fixed 10/7/92 -- JFG */
1191     return result;
1192 }
1193 
1194 /*	Load if necessary, and select an anchor.	HTLoadAnchor()
1195  *	----------------------------------------
1196  *
1197  *  On Entry,
1198  *	  destination		    The child or parent anchor to be loaded.
1199  *
1200  *  On Exit,
1201  *	  returns    YES     Success
1202  *		     NO      Failure
1203  */
HTLoadAnchor(HTAnchor * destination)1204 BOOL HTLoadAnchor(HTAnchor * destination)
1205 {
1206     HTParentAnchor *parent;
1207     BOOL loaded = NO;
1208 
1209     if (!destination)
1210 	return NO;		/* No link */
1211 
1212     parent = HTAnchor_parent(destination);
1213 
1214     if (HTAnchor_document(parent) == NULL) {	/* If not already loaded */
1215 	/* TBL 921202 */
1216 	BOOL result;
1217 
1218 	result = HTLoadDocument(parent->address,
1219 				parent,
1220 				HTOutputFormat ?
1221 				HTOutputFormat : WWW_PRESENT,
1222 				HTOutputStream);
1223 	if (!result)
1224 	    return NO;
1225 	loaded = YES;
1226     } {
1227 	HText *text = (HText *) HTAnchor_document(parent);
1228 
1229 	if ((destination != (HTAnchor *) parent) &&
1230 	    (destination != (HTAnchor *) (parent->parent))) {
1231 	    /* If child anchor */
1232 	    HText_selectAnchor(text,	/* Double display? @@ */
1233 			       (HTChildAnchor *) destination);
1234 	} else {
1235 	    if (!loaded)
1236 		HText_select(text);
1237 	}
1238     }
1239     return YES;
1240 
1241 }				/* HTLoadAnchor */
1242 
1243 /*	Search.						HTSearch()
1244  *	-------
1245  *
1246  *	Performs a keyword search on word given by the user.  Adds the
1247  *	keyword to the end of the current address and attempts to open
1248  *	the new address.
1249  *
1250  *  On Entry,
1251  *	 *keywords	space-separated keyword list or similar search list
1252  *	here		is anchor search is to be done on.
1253  */
hex(int i)1254 static char hex(int i)
1255 {
1256     const char *hexchars = "0123456789ABCDEF";
1257 
1258     return hexchars[i];
1259 }
1260 
HTSearch(const char * keywords,HTParentAnchor * here)1261 BOOL HTSearch(const char *keywords,
1262 	      HTParentAnchor *here)
1263 {
1264 #define acceptable \
1265 "1234567890abcdefghijlkmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ.-_"
1266 
1267     char *q, *u;
1268     const char *p, *s, *e;	/* Pointers into keywords */
1269     char *address = NULL;
1270     BOOL result;
1271     char *escaped = typecallocn(char, (strlen(keywords) * 3) + 1);
1272     static const BOOL isAcceptable[96] =
1273     /* *INDENT-OFF* */
1274     /*	 0 1 2 3 4 5 6 7 8 9 A B C D E F */
1275     {	 0,0,0,0,0,0,0,0,0,0,1,0,0,1,1,0,	/* 2x	!"#$%&'()*+,-./  */
1276 	 1,1,1,1,1,1,1,1,1,1,0,0,0,0,0,0,	/* 3x  0123456789:;<=>?  */
1277 	 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,	/* 4x  @ABCDEFGHIJKLMNO  */
1278 	 1,1,1,1,1,1,1,1,1,1,1,0,0,0,0,1,	/* 5X  PQRSTUVWXYZ[\]^_  */
1279 	 0,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,	/* 6x  `abcdefghijklmno  */
1280 	 1,1,1,1,1,1,1,1,1,1,1,0,0,0,0,0 };	/* 7X  pqrstuvwxyz{\}~	DEL */
1281     /* *INDENT-ON* */
1282 
1283     if (escaped == NULL)
1284 	outofmem(__FILE__, "HTSearch");
1285 
1286     assert(escaped != NULL);
1287 
1288     StrAllocCopy(address, here->isIndexAction);
1289 
1290     /*
1291      * Convert spaces to + and hex escape unacceptable characters.
1292      */
1293     for (s = keywords; *s && WHITE(*s); s++)	/* Scan */
1294 	;			/* Skip white space */
1295     for (e = s + strlen(s); e > s && WHITE(*(e - 1)); e--)	/* Scan */
1296 	;			/* Skip trailers */
1297     for (q = escaped, p = s; p < e; p++) {	/* Scan stripped field */
1298 	unsigned char c = UCH(TOASCII(*p));
1299 
1300 	if (WHITE(*p)) {
1301 	    *q++ = '+';
1302 	} else if (IS_CJK_TTY) {
1303 	    *q++ = *p;
1304 	} else if (c >= 32 && c <= UCH(127) && isAcceptable[c - 32]) {
1305 	    *q++ = *p;		/* 930706 TBL for MVS bug */
1306 	} else {
1307 	    *q++ = '%';
1308 	    *q++ = hex((int) (c >> 4));
1309 	    *q++ = hex((int) (c & 15));
1310 	}
1311     }				/* Loop over string */
1312     *q = '\0';			/* Terminate escaped string */
1313     u = strchr(address, '?');	/* Find old search string */
1314     if (u != NULL)
1315 	*u = '\0';		/* Chop old search off */
1316 
1317     StrAllocCat(address, "?");
1318     StrAllocCat(address, escaped);
1319     FREE(escaped);
1320     result = HTLoadRelative(address, here);
1321     FREE(address);
1322 
1323     /*
1324      * If we got redirection, result will be NO, but use_this_url_instead will
1325      * be set.  The calling routine should check both and do whatever is
1326      * appropriate.  Only an http server (not a gopher or wais server) could
1327      * return redirection.  Lynx will go all the way back to its mainloop() and
1328      * subject a redirecting URL to all of its security and restrictions
1329      * checks.  - FM
1330      */
1331     return result;
1332 }
1333 
1334 /*	Search Given Indexname.			HTSearchAbsolute()
1335  *	-----------------------
1336  *
1337  *	Performs a keyword search on word given by the user.  Adds the
1338  *	keyword to the end of the current address and attempts to open
1339  *	the new address.
1340  *
1341  *  On Entry,
1342  *	*keywords	space-separated keyword list or similar search list
1343  *	*indexname	is name of object search is to be done on.
1344  */
HTSearchAbsolute(const char * keywords,char * indexname)1345 BOOL HTSearchAbsolute(const char *keywords,
1346 		      char *indexname)
1347 {
1348     DocAddress abs_doc;
1349     HTParentAnchor *anchor;
1350 
1351     abs_doc.address = indexname;
1352     abs_doc.post_data = NULL;
1353     abs_doc.post_content_type = NULL;
1354     abs_doc.bookmark = NULL;
1355     abs_doc.isHEAD = FALSE;
1356     abs_doc.safe = FALSE;
1357 
1358     anchor = HTAnchor_findAddress(&abs_doc);
1359     return HTSearch(keywords, anchor);
1360 }
1361 
1362 #ifdef NOT_USED_CODE
1363 /*	Generate the anchor for the home page.		HTHomeAnchor()
1364  *	--------------------------------------
1365  *
1366  *	As it involves file access, this should only be done once
1367  *	when the program first runs.
1368  *	This is a default algorithm -- browser don't HAVE to use this.
1369  *	But consistency between browsers is STRONGLY recommended!
1370  *
1371  *  Priority order is:
1372  *		1	WWW_HOME environment variable (logical name, etc)
1373  *		2	~/WWW/default.html
1374  *		3	/usr/local/bin/default.html
1375  *		4	http://www.w3.org/default.html
1376  */
HTHomeAnchor(void)1377 HTParentAnchor *HTHomeAnchor(void)
1378 {
1379     char *my_home_document = NULL;
1380     char *home = LYGetEnv(LOGICAL_DEFAULT);
1381     char *ref;
1382     HTParentAnchor *anchor;
1383 
1384     if (home) {
1385 	StrAllocCopy(my_home_document, home);
1386 #define MAX_FILE_NAME 1024	/* @@@ */
1387     } else if (HTClientHost) {	/* Telnet server */
1388 	/*
1389 	 * Someone telnets in, they get a special home.
1390 	 */
1391 	FILE *fp = fopen(REMOTE_POINTER, "r");
1392 	char *status;
1393 
1394 	if (fp) {
1395 	    my_home_document = typecallocn(char, MAX_FILE_NAME);
1396 
1397 	    if (my_home_document == NULL)
1398 		outofmem(__FILE__, "HTHomeAnchor");
1399 	    status = fgets(my_home_document, MAX_FILE_NAME, fp);
1400 	    if (!status) {
1401 		FREE(my_home_document);
1402 	    }
1403 	    fclose(fp);
1404 	}
1405 	if (my_home_document == NULL)
1406 	    StrAllocCopy(my_home_document, REMOTE_ADDRESS);
1407     }
1408 #ifdef UNIX
1409     if (my_home_document == NULL) {
1410 	FILE *fp = NULL;
1411 	char *home = LYGetEnv("HOME");
1412 
1413 	if (home != 0) {
1414 	    HTSprintf0(&my_home_document, "%s/%s", home, PERSONAL_DEFAULT);
1415 	    fp = fopen(my_home_document, "r");
1416 	}
1417 
1418 	if (!fp) {
1419 	    StrAllocCopy(my_home_document, LOCAL_DEFAULT_FILE);
1420 	    fp = fopen(my_home_document, "r");
1421 	}
1422 	if (fp) {
1423 	    fclose(fp);
1424 	} else {
1425 	    CTRACE((tfp, "HTBrowse: No local home document ~/%s or %s\n",
1426 		    PERSONAL_DEFAULT, LOCAL_DEFAULT_FILE));
1427 	    FREE(my_home_document);
1428 	}
1429     }
1430 #endif /* UNIX */
1431     ref = HTParse((my_home_document ?
1432 		   my_home_document : (HTClientHost ?
1433 				       REMOTE_ADDRESS : LAST_RESORT)),
1434 		  STR_FILE_URL,
1435 		  PARSE_ALL_WITHOUT_ANCHOR);
1436     if (my_home_document) {
1437 	CTRACE((tfp, "HTAccess: Using custom home page %s i.e., address %s\n",
1438 		my_home_document, ref));
1439 	FREE(my_home_document);
1440     }
1441     anchor = HTAnchor_findSimpleAddress(ref);
1442     FREE(ref);
1443     return anchor;
1444 }
1445 #endif /* NOT_USED_CODE */
1446