1 /*
2  * $LynxId: HTGopher.c,v 1.60 2013/05/01 22:20:13 tom Exp $
3  *
4  *			GOPHER ACCESS				HTGopher.c
5  *			=============
6  *
7  *  History:
8  *	26 Sep 90	Adapted from other accesses (News, HTTP) TBL
9  *	29 Nov 91	Downgraded to C, for portable implementation.
10  *	10 Mar 96	Foteos Macrides (macrides@sci.wfbr.edu).  Added a
11  *			  form-based CSO/PH gateway.  Can be invoked via a
12  *			  "cso://host[:port]/" or "gopher://host:105/2"
13  *			  URL.	If a gopher URL is used with a query token
14  *			  ('?'), the old ISINDEX procedure will be used
15  *			  instead of the form-based gateway.
16  *	15 Mar 96	Foteos Macrides (macrides@sci.wfbr.edu).  Pass
17  *			  port 79, gtype 0 gopher URLs to the finger
18  *			  gateway.
19  */
20 
21 #define HTSTREAM_INTERNAL 1
22 
23 #include <HTUtils.h>		/* Coding convention macros */
24 #include <HTFile.h>		/* For HTFileFormat() */
25 
26 #ifndef DISABLE_GOPHER
27 #include <HTAlert.h>
28 #include <HTParse.h>
29 #include <HTTCP.h>
30 #include <HTFinger.h>
31 
32 /*
33  *  Implements.
34  */
35 #include <HTGopher.h>
36 
37 #define GOPHER_PORT 70		/* See protocol spec */
38 #define CSO_PORT 105		/* See protocol spec */
39 #define BIG 1024		/* Bug */
40 #define LINE_LENGTH 256		/* Bug */
41 
42 /*
43  *  Gopher entity types.
44  */
45 #define GOPHER_TEXT		'0'
46 #define GOPHER_MENU		'1'
47 #define GOPHER_CSO		'2'
48 #define GOPHER_ERROR		'3'
49 #define GOPHER_MACBINHEX	'4'
50 #define GOPHER_PCBINARY		'5'
51 #define GOPHER_UUENCODED	'6'
52 #define GOPHER_INDEX		'7'
53 #define GOPHER_TELNET		'8'
54 #define GOPHER_BINARY		'9'
55 #define GOPHER_GIF		'g'
56 #define GOPHER_HTML		'h'	/* HTML */
57 #define GOPHER_CHTML		'H'	/* HTML */
58 #define GOPHER_SOUND		's'
59 #define GOPHER_WWW		'w'	/* W3 address */
60 #define GOPHER_IMAGE		'I'
61 #define GOPHER_TN3270		'T'
62 #define GOPHER_INFO		'i'
63 #define GOPHER_DUPLICATE	'+'
64 #define GOPHER_PLUS_IMAGE	':'	/* Addition from Gopher Plus */
65 #define GOPHER_PLUS_MOVIE	';'
66 #define GOPHER_PLUS_SOUND	'<'
67 #define GOPHER_PLUS_PDF		'P'
68 
69 #include <HTFormat.h>
70 
71 /*
72  *  Hypertext object building machinery.
73  */
74 #include <HTML.h>
75 
76 #include <LYStrings.h>
77 #include <LYUtils.h>
78 #include <LYLeaks.h>
79 
80 #define PUTC(c) (*targetClass.put_character)(target, c)
81 #define PUTS(s) (*targetClass.put_string)(target, s)
82 #define START(e) (*targetClass.start_element)(target, e, 0, 0, -1, 0)
83 #define END(e) (*targetClass.end_element)(target, e, 0)
84 #define FREE_TARGET (*targetClass._free)(target)
85 
86 #define NEXT_CHAR HTGetCharacter()
87 
88 /*
89  *  Module-wide variables.
90  */
91 static int s;			/* Socket for gopher or CSO host */
92 
93 struct _HTStructured {
94     const HTStructuredClass *isa;	/* For gopher streams */
95     /* ... */
96 };
97 
98 static HTStructured *target;	/* the new gopher hypertext */
99 static HTStructuredClass targetClass;	/* Its action routines */
100 
101 struct _HTStream {
102     HTStreamClass *isa;		/* For form-based CSO  gateway - FM */
103 };
104 
105 typedef struct _CSOfield_info {	/* For form-based CSO gateway - FM */
106     struct _CSOfield_info *next;
107     char *name;
108     char *attributes;
109     char *description;
110     int id;
111     int lookup;
112     int indexed;
113     int url;
114     int max_size;
115     int defreturn;
116     int explicit_return;
117     int reserved;
118     int gpublic;
119     char name_buf[16];		/* Avoid malloc if we can */
120     char desc_buf[32];		/* Avoid malloc if we can */
121     char attr_buf[80];		/* Avoid malloc if we can */
122 } CSOfield_info;
123 
124 static CSOfield_info *CSOfields = NULL;		/* For form-based CSO gateway - FM */
125 
126 typedef struct _CSOformgen_context {	/* For form-based CSO gateway - FM */
127     const char *host;
128     const char *seek;
129     CSOfield_info *fld;
130     int port;
131     int cur_line;
132     int cur_off;
133     int rep_line;
134     int rep_off;
135     int public_override;
136     int field_select;
137 } CSOformgen_context;
138 
139 /*	Matrix of allowed characters in filenames
140  *	=========================================
141  */
142 static BOOL acceptable[256];
143 static BOOL acceptable_inited = NO;
144 
init_acceptable(void)145 static void init_acceptable(void)
146 {
147     unsigned int i;
148     const char *good =
149     "abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789./-_$";
150 
151     for (i = 0; i < 256; i++)
152 	acceptable[i] = NO;
153     for (; *good; good++)
154 	acceptable[(unsigned int) *good] = YES;
155     acceptable_inited = YES;
156 }
157 
158 /*	Decode one hex character
159  *	========================
160  */
161 static const char hex[17] = "0123456789abcdef";
162 
from_hex(int c)163 static char from_hex(int c)
164 {
165     return (char) ((c >= '0') && (c <= '9') ? c - '0'
166 		   : (c >= 'A') && (c <= 'F') ? c - 'A' + 10
167 		   : (c >= 'a') && (c <= 'f') ? c - 'a' + 10
168 		   : 0);
169 }
170 
171 /*	Paste in an Anchor
172  *	==================
173  *
174  *	The title of the destination is set, as there is no way
175  *	of knowing what the title is when we arrive.
176  *
177  * On entry,
178  *	HT	is in append mode.
179  *	text	points to the text to be put into the file, 0 terminated.
180  *	addr	points to the hypertext refernce address 0 terminated.
181  */
182 BOOLEAN HT_Is_Gopher_URL = FALSE;
183 
write_anchor(const char * text,const char * addr)184 static void write_anchor(const char *text, const char *addr)
185 {
186     BOOL present[HTML_A_ATTRIBUTES];
187     const char *value[HTML_A_ATTRIBUTES];
188 
189     int i;
190 
191     for (i = 0; i < HTML_A_ATTRIBUTES; i++)
192 	present[i] = 0;
193     present[HTML_A_HREF] = YES;
194     ((const char **) value)[HTML_A_HREF] = addr;
195     present[HTML_A_TITLE] = YES;
196     ((const char **) value)[HTML_A_TITLE] = text;
197 
198     CTRACE((tfp, "HTGopher: adding URL: %s\n", addr));
199 
200     HT_Is_Gopher_URL = TRUE;	/* tell HTML.c that this is a Gopher URL */
201     (*targetClass.start_element) (target, HTML_A, present,
202 				  (const char **) value, -1, 0);
203 
204     PUTS(text);
205     END(HTML_A);
206 }
207 
208 /*	Parse a Gopher Menu document
209  *	============================
210  */
parse_menu(const char * arg GCC_UNUSED,HTParentAnchor * anAnchor)211 static void parse_menu(const char *arg GCC_UNUSED,
212 		       HTParentAnchor *anAnchor)
213 {
214     char gtype;
215     int ich;
216     char line[BIG];
217     char *name = NULL, *selector = NULL;	/* Gopher menu fields */
218     char *host = NULL;
219     char *port;
220     char *p = line;
221     const char *title;
222     int bytes = 0;
223     int BytesReported = 0;
224     char buffer[128];
225 
226 #define TAB		'\t'
227 #define HEX_ESCAPE	'%'
228 
229     START(HTML_HTML);
230     PUTC('\n');
231     START(HTML_HEAD);
232     PUTC('\n');
233     START(HTML_TITLE);
234     if ((title = HTAnchor_title(anAnchor)))
235 	PUTS(title);
236     else
237 	PUTS(GOPHER_MENU_TITLE);
238     END(HTML_TITLE);
239     PUTC('\n');
240     END(HTML_HEAD);
241     PUTC('\n');
242 
243     START(HTML_BODY);
244     PUTC('\n');
245     START(HTML_H1);
246     if ((title = HTAnchor_title(anAnchor)))
247 	PUTS(title);
248     else
249 	PUTS(GOPHER_MENU_TITLE);
250     END(HTML_H1);
251     PUTC('\n');
252     START(HTML_PRE);
253     PUTC('\n');			/* newline after HTML_PRE forces split-line */
254     while ((ich = NEXT_CHAR) != EOF) {
255 
256 	if (interrupted_in_htgetcharacter) {
257 	    CTRACE((tfp,
258 		    "HTGopher: Interrupted in HTGetCharacter, apparently.\n"));
259 	    goto end_html;
260 	}
261 
262 	if ((char) ich != LF) {
263 	    *p = (char) ich;	/* Put character in line */
264 	    if (p < &line[BIG - 1])
265 		p++;
266 
267 	} else {
268 	    *p++ = '\0';	/* Terminate line */
269 	    bytes += (int) (p - line);	/* add size */
270 	    p = line;		/* Scan it to parse it */
271 	    port = 0;		/* Flag "not parsed" */
272 	    CTRACE((tfp, "HTGopher: Menu item: %s\n", line));
273 	    gtype = *p++;
274 
275 	    if (bytes > BytesReported + 1024) {
276 		sprintf(buffer, TRANSFERRED_X_BYTES, bytes);
277 		HTProgress(buffer);
278 		BytesReported = bytes;
279 	    }
280 
281 	    /* Break on line with a dot by itself */
282 	    if ((gtype == '.') && ((*p == '\r') || (*p == 0)))
283 		break;
284 
285 	    if (gtype && *p) {
286 		name = p;
287 		selector = strchr(name, TAB);
288 		if (selector) {
289 		    *selector++ = '\0';		/* Terminate name */
290 		    /*
291 		     * Gopher+ Type=0+ objects can be binary, and will
292 		     * have 9 or 5 beginning their selector.  Make sure
293 		     * we don't trash the terminal by treating them as
294 		     * text. - FM
295 		     */
296 		    if (gtype == GOPHER_TEXT && (*selector == GOPHER_BINARY ||
297 						 *selector == GOPHER_PCBINARY))
298 			gtype = *selector;
299 		    host = strchr(selector, TAB);
300 		    if (host) {
301 			*host++ = '\0';		/* Terminate selector */
302 			port = strchr(host, TAB);
303 			if (port) {
304 			    char *junk;
305 
306 			    port[0] = ':';	/* delimit host a la W3 */
307 			    junk = strchr(port, TAB);
308 			    if (junk)
309 				*junk = '\0';	/* Chop port */
310 			    if ((port[1] == '0') && (!port[2]))
311 				port[0] = '\0';		/* 0 means none */
312 			}	/* no port */
313 		    }		/* host ok */
314 		}		/* selector ok */
315 	    }
316 	    /* gtype and name ok */
317 	    /* Nameless files are a separator line */
318 	    if (name != NULL && gtype == GOPHER_TEXT) {
319 		int i = (int) strlen(name) - 1;
320 
321 		while (name[i] == ' ' && i >= 0)
322 		    name[i--] = '\0';
323 		if (i < 0)
324 		    gtype = GOPHER_INFO;
325 	    }
326 
327 	    if (gtype == GOPHER_WWW) {	/* Gopher pointer to W3 */
328 		PUTS("(HTML) ");
329 		write_anchor(name, selector);
330 
331 	    } else if (gtype == GOPHER_INFO) {
332 		/* Information or separator line */
333 		PUTS("       ");
334 		PUTS(name);
335 
336 	    } else if (port) {	/* Other types need port */
337 		char *address = 0;
338 		const char *format = *selector ? "%s//%s@%s/" : "%s//%s/";
339 
340 		if (gtype == GOPHER_TELNET) {
341 		    PUTS(" (TEL) ");
342 		    HTSprintf0(&address, format, STR_TELNET_URL, selector, host);
343 		} else if (gtype == GOPHER_TN3270) {
344 		    PUTS("(3270) ");
345 		    HTSprintf0(&address, format, STR_TN3270_URL, selector, host);
346 		} else {	/* If parsed ok */
347 		    char *r;
348 
349 		    switch (gtype) {
350 		    case GOPHER_TEXT:
351 			PUTS("(FILE) ");
352 			break;
353 		    case GOPHER_MENU:
354 			PUTS(" (DIR) ");
355 			break;
356 		    case GOPHER_CSO:
357 			PUTS(" (CSO) ");
358 			break;
359 		    case GOPHER_PCBINARY:
360 			PUTS(" (BIN) ");
361 			break;
362 		    case GOPHER_UUENCODED:
363 			PUTS(" (UUE) ");
364 			break;
365 		    case GOPHER_INDEX:
366 			PUTS("  (?)  ");
367 			break;
368 		    case GOPHER_BINARY:
369 			PUTS(" (BIN) ");
370 			break;
371 		    case GOPHER_GIF:
372 		    case GOPHER_IMAGE:
373 		    case GOPHER_PLUS_IMAGE:
374 			PUTS(" (IMG) ");
375 			break;
376 		    case GOPHER_SOUND:
377 		    case GOPHER_PLUS_SOUND:
378 			PUTS(" (SND) ");
379 			break;
380 		    case GOPHER_MACBINHEX:
381 			PUTS(" (HQX) ");
382 			break;
383 		    case GOPHER_HTML:
384 		    case GOPHER_CHTML:
385 			PUTS("(HTML) ");
386 			break;
387 		    case 'm':
388 			PUTS("(MIME) ");
389 			break;
390 		    case GOPHER_PLUS_MOVIE:
391 			PUTS(" (MOV) ");
392 			break;
393 		    case GOPHER_PLUS_PDF:
394 			PUTS(" (PDF) ");
395 			break;
396 		    default:
397 			PUTS("(UNKN) ");
398 			break;
399 		    }
400 
401 		    HTSprintf0(&address, "//%s/%c", host, gtype);
402 
403 		    for (r = selector; *r; r++) {	/* Encode selector string */
404 			if (acceptable[UCH(*r)]) {
405 			    HTSprintf(&address, "%c", *r);
406 			} else {
407 			    HTSprintf(&address, "%c%c%c",
408 				      HEX_ESCAPE,	/* Means hex coming */
409 				      hex[(TOASCII(*r)) >> 4],
410 				      hex[(TOASCII(*r)) & 15]);
411 			}
412 		    }
413 		}
414 		/* Error response from Gopher doesn't deserve to
415 		   be a hyperlink. */
416 		if (strcmp(address, "gopher://error.host:1/0"))
417 		    write_anchor(name, address);
418 		else
419 		    PUTS(name);
420 		FREE(address);
421 	    } else {		/* parse error */
422 		CTRACE((tfp, "HTGopher: Bad menu item.\n"));
423 		PUTS(line);
424 
425 	    }			/* parse error */
426 
427 	    PUTC('\n');
428 	    p = line;		/* Start again at beginning of line */
429 
430 	}			/* if end of line */
431 
432     }				/* Loop over characters */
433 
434   end_html:
435     END(HTML_PRE);
436     PUTC('\n');
437     END(HTML_BODY);
438     PUTC('\n');
439     END(HTML_HTML);
440     PUTC('\n');
441     FREE_TARGET;
442 
443     return;
444 }
445 
446 /*	Parse a Gopher CSO document from an ISINDEX query.
447  *	==================================================
448  *
449  *   Accepts an open socket to a CSO server waiting to send us
450  *   data and puts it on the screen in a reasonable manner.
451  *
452  *   Perhaps this data can be automatically linked to some
453  *   other source as well???
454  *
455  *  Taken from hacking by Lou Montulli@ukanaix.cc.ukans.edu
456  *  on XMosaic-1.1, and put on libwww 2.11 by Arthur Secret,
457  *  secret@dxcern.cern.ch .
458  */
parse_cso(const char * arg,HTParentAnchor * anAnchor)459 static void parse_cso(const char *arg,
460 		      HTParentAnchor *anAnchor)
461 {
462     int ich;
463     char line[BIG];
464     char *p = line;
465     char *first_colon, *second_colon, last_char = '\0';
466     const char *title;
467 
468     START(HTML_HEAD);
469     PUTC('\n');
470     START(HTML_TITLE);
471     if ((title = HTAnchor_title(anAnchor)))
472 	PUTS(title);
473     else
474 	PUTS(GOPHER_CSO_SEARCH_RESULTS);
475     END(HTML_TITLE);
476     PUTC('\n');
477     END(HTML_HEAD);
478     PUTC('\n');
479     START(HTML_H1);
480     if ((title = HTAnchor_title(anAnchor)))
481 	PUTS(title);
482     else {
483 	PUTS(arg);
484 	PUTS(GOPHER_SEARCH_RESULTS);
485     }
486     END(HTML_H1);
487     PUTC('\n');
488     START(HTML_PRE);
489 
490     /*
491      * Start grabbing chars from the network.
492      */
493     while ((ich = NEXT_CHAR) != EOF) {
494 	if ((char) ich != LF) {
495 	    *p = (char) ich;	/* Put character in line */
496 	    if (p < &line[BIG - 1])
497 		p++;
498 	} else {
499 	    *p = '\0';		/* Terminate line */
500 	    p = line;		/* Scan it to parse it */
501 	    /*
502 	     * OK we now have a line in 'p'.  Lets parse it and print it.
503 	     */
504 
505 	    /*
506 	     * Break on line that begins with a 2.  It's the end of data.
507 	     */
508 	    if (*p == '2')
509 		break;
510 
511 	    /*
512 	     * Lines beginning with 5 are errors.  Print them and quit.
513 	     */
514 	    if (*p == '5') {
515 		START(HTML_H2);
516 		PUTS(p + 4);
517 		END(HTML_H2);
518 		break;
519 	    }
520 
521 	    if (*p == '-') {
522 		/*
523 		 * Data lines look like -200:#:
524 		 * where # is the search result number and can be multiple
525 		 * digits (infinite?).
526 		 * Find the second colon and check the digit to the left of it
527 		 * to see if they are diferent.  If they are then a different
528 		 * person is starting.  Make this line an <h2>.
529 		 */
530 
531 		/*
532 		 * Find the second_colon.
533 		 */
534 		second_colon = NULL;
535 		first_colon = strchr(p, ':');
536 		if (first_colon != NULL) {
537 		    second_colon = strchr(first_colon + 1, ':');
538 		}
539 
540 		if (second_colon != NULL) {	/* error check */
541 
542 		    if (*(second_colon - 1) != last_char)
543 			/* print seperator */
544 		    {
545 			END(HTML_PRE);
546 			START(HTML_H2);
547 		    }
548 
549 		    /*
550 		     * Right now the record appears with the alias (first line)
551 		     * as the header and the rest as <pre> text.
552 		     *
553 		     * It might look better with the name as the header and the
554 		     * rest as a <ul> with <li> tags.  I'm not sure whether the
555 		     * name field comes in any special order or if its even
556 		     * required in a record, so for now the first line is the
557 		     * header no matter what it is (it's almost always the
558 		     * alias).
559 		     *
560 		     * A <dl> with the first line as the <DT> and the rest as
561 		     * some form of <DD> might good also?
562 		     */
563 
564 		    /*
565 		     * Print data.
566 		     */
567 		    PUTS(second_colon + 1);
568 		    PUTC('\n');
569 
570 		    if (*(second_colon - 1) != last_char)
571 			/* end seperator */
572 		    {
573 			END(HTML_H2);
574 			START(HTML_PRE);
575 		    }
576 
577 		    /*
578 		     * Save the char before the second colon for comparison on
579 		     * the next pass.
580 		     */
581 		    last_char = *(second_colon - 1);
582 
583 		}		/* end if second_colon */
584 	    }			/* end if *p == '-' */
585 	}			/* if end of line */
586 
587     }				/* Loop over characters */
588 
589     /* end the text block */
590     PUTC('\n');
591     END(HTML_PRE);
592     PUTC('\n');
593     FREE_TARGET;
594 
595     return;			/* all done */
596 }				/* end of procedure */
597 
598 /*	Display a Gopher CSO ISINDEX cover page.
599  *	========================================
600  */
display_cso(const char * arg,HTParentAnchor * anAnchor)601 static void display_cso(const char *arg,
602 			HTParentAnchor *anAnchor)
603 {
604     const char *title;
605 
606     START(HTML_HEAD);
607     PUTC('\n');
608     START(HTML_TITLE);
609     if ((title = HTAnchor_title(anAnchor)))
610 	PUTS(title);
611     else
612 	PUTS(GOPHER_CSO_INDEX);
613     END(HTML_TITLE);
614     PUTC('\n');
615     START(HTML_ISINDEX);
616     PUTC('\n');
617     END(HTML_HEAD);
618     PUTC('\n');
619     START(HTML_H1);
620     if ((title = HTAnchor_title(anAnchor)))
621 	PUTS(title);
622     else {
623 	PUTS(arg);
624 	PUTS(INDEX_SEGMENT);
625     }
626     END(HTML_H1);
627     PUTS(GOPHER_CSO_INDEX_SUBTITLE);
628     START(HTML_P);
629     PUTS(GOPHER_CSO_SOLICIT_KEYWORDS);
630     START(HTML_P);
631     PUTS(SEGMENT_KEYWORDS_WILL);
632     PUTS(SEGMENT_PERSONS_DB_NAME);
633 
634     if (!HTAnchor_title(anAnchor))
635 	HTAnchor_setTitle(anAnchor, arg);
636 
637     FREE_TARGET;
638     return;
639 }
640 
641 /*	Display a Gopher Index document.
642  *	================================
643  */
display_index(const char * arg,HTParentAnchor * anAnchor)644 static void display_index(const char *arg,
645 			  HTParentAnchor *anAnchor)
646 {
647     const char *title;
648 
649     START(HTML_HEAD);
650     PUTC('\n');
651     PUTC('\n');
652     START(HTML_TITLE);
653     if ((title = HTAnchor_title(anAnchor)))
654 	PUTS(title);
655     else
656 	PUTS(GOPHER_INDEX_TITLE);
657     END(HTML_TITLE);
658     PUTC('\n');
659     START(HTML_ISINDEX);
660     PUTC('\n');
661     END(HTML_HEAD);
662     PUTC('\n');
663     START(HTML_H1);
664     if ((title = HTAnchor_title(anAnchor)))
665 	PUTS(title);
666     else {
667 	PUTS(arg);
668 	PUTS(INDEX_SEGMENT);
669     }
670     END(HTML_H1);
671     PUTS(GOPHER_INDEX_SUBTITLE);
672     START(HTML_P);
673     PUTS(GOPHER_SOLICIT_KEYWORDS);
674 
675     if (!HTAnchor_title(anAnchor))
676 	HTAnchor_setTitle(anAnchor, arg);
677 
678     FREE_TARGET;
679     return;
680 }
681 
682 /*	De-escape a selector into a command.
683  *	====================================
684  *
685  *	The % hex escapes are converted. Otheriwse, the string is copied.
686  */
de_escape(char * command,const char * selector)687 static void de_escape(char *command, const char *selector)
688 {
689     const char *p = selector;
690     char *q;
691 
692     if (command == NULL)
693 	outofmem(__FILE__, "HTLoadGopher");
694 
695     assert(command != NULL);
696 
697     q = command;
698     while (*p) {		/* Decode hex */
699 	if (*p == HEX_ESCAPE) {
700 	    char c;
701 	    unsigned int b;
702 
703 	    p++;
704 	    c = *p++;
705 	    b = UCH(from_hex(c));
706 	    c = *p++;
707 	    if (!c)
708 		break;		/* Odd number of chars! */
709 	    *q++ = (char) FROMASCII((b << 4) + UCH(from_hex(c)));
710 	} else {
711 	    *q++ = *p++;	/* Record */
712 	}
713     }
714     *q = '\0';			/* Terminate command */
715 }
716 
717 /*	Free the CSOfields structures. - FM
718  *	===================================
719  */
free_CSOfields(void)720 static void free_CSOfields(void)
721 {
722     CSOfield_info *cur = CSOfields;
723     CSOfield_info *prev;
724 
725     while (cur) {
726 	if (cur->name != cur->name_buf)
727 	    FREE(cur->name);
728 	if (cur->attributes != cur->attr_buf)
729 	    FREE(cur->attributes);
730 	if (cur->description != cur->desc_buf)
731 	    FREE(cur->description);
732 	prev = cur;
733 	cur = cur->next;
734 	FREE(prev);
735     }
736 
737     return;
738 }
739 
740 /*	Interpret CSO/PH form template keys. - FM
741  *	=========================================
742  */
interpret_cso_key(const char * key,char * buf,size_t bufsize,int * length,CSOformgen_context * ctx,HTStream * Target)743 static void interpret_cso_key(const char *key,
744 			      char *buf,
745 			      size_t bufsize,
746 			      int *length,
747 			      CSOformgen_context * ctx,
748 			      HTStream *Target)
749 {
750     CSOfield_info *fld;
751 
752     if ((fld = ctx->fld) != 0) {
753 	/*
754 	 * Most substitutions only recognized inside of loops.
755 	 */
756 	int error = 0;
757 
758 	if (0 == StrNCmp(key, "$(FID)", 6)) {
759 	    sprintf(buf, "%d", fld->id);
760 	} else if (0 == StrNCmp(key, "$(FDESC)", 8)) {
761 	    sprintf(buf, "%.2046s", fld->description);
762 	} else if (0 == StrNCmp(key, "$(FDEF)", 7)) {
763 	    strcpy(buf, fld->defreturn ? " checked" : "");
764 	} else if (0 == StrNCmp(key, "$(FNDX)", 7)) {
765 	    strcpy(buf, fld->indexed ? "*" : "");
766 	} else if (0 == StrNCmp(key, "$(FSIZE)", 8)) {
767 	    sprintf(buf, " size=%d maxlength=%d",
768 		    fld->max_size > 55 ? 55 : fld->max_size,
769 		    fld->max_size);
770 	} else if (0 == StrNCmp(key, "$(FSIZE2)", 9)) {
771 	    sprintf(buf, " maxlength=%d", fld->max_size);
772 	} else {
773 	    error = 1;
774 	}
775 	if (!error) {
776 	    *length = (int) strlen(buf);
777 	    return;
778 	}
779     }
780     buf[0] = '\0';
781     if (0 == StrNCmp(key, "$(NEXTFLD)", 10)) {
782 	if (!ctx->fld)
783 	    fld = CSOfields;
784 	else
785 	    fld = ctx->fld->next;
786 	switch (ctx->field_select) {
787 	case 0:
788 	    /*
789 	     * 'Query' fields, public and lookup attributes.
790 	     */
791 	    for (; fld; fld = fld->next)
792 		if (fld->gpublic && (fld->lookup == 1))
793 		    break;
794 	    break;
795 	case 1:
796 	    /*
797 	     * 'Query' fields, accept lookup attribute.
798 	     */
799 	    for (; fld; fld = fld->next)
800 		if (fld->lookup == 1)
801 		    break;
802 	    break;
803 	case 2:
804 	    /*
805 	     * 'Return' fields, public only.
806 	     */
807 	    for (; fld; fld = fld->next)
808 		if (fld->gpublic)
809 		    break;
810 	    break;
811 	case 3:
812 	    /*
813 	     * All fields.
814 	     */
815 	    break;
816 	}
817 	if (fld) {
818 	    ctx->cur_line = ctx->rep_line;
819 	    ctx->cur_off = ctx->rep_off;
820 	}
821 	ctx->fld = fld;
822 
823     } else if ((0 == StrNCmp(key, "$(QFIELDS)", 10)) ||
824 	       (0 == StrNCmp(key, "$(RFIELDS)", 10))) {
825 	/*
826 	 * Begin iteration sequence.
827 	 */
828 	ctx->rep_line = ctx->cur_line;
829 	ctx->rep_off = ctx->cur_off;
830 	ctx->fld = (CSOfield_info *) 0;
831 	ctx->seek = "$(NEXTFLD)";
832 	ctx->field_select = (key[2] == 'Q') ? 0 : 2;
833 	if (ctx->public_override)
834 	    ctx->field_select++;
835 
836     } else if (0 == StrNCmp(key, "$(NAMEFLD)", 10)) {
837 	/*
838 	 * Special, locate name field.  Flag lookup so QFIELDS will skip it.
839 	 */
840 	for (fld = CSOfields; fld; fld = fld->next)
841 	    if (strcmp(fld->name, "name") == 0 ||
842 		strcmp(fld->name, "Name") == 0) {
843 		if (fld->lookup)
844 		    fld->lookup = 2;
845 		break;
846 	    }
847 	ctx->fld = fld;
848     } else if (0 == StrNCmp(key, "$(HOST)", 7)) {
849 	strcpy(buf, ctx->host);
850     } else if (0 == StrNCmp(key, "$(PORT)", 7)) {
851 	sprintf(buf, "%d", ctx->port);
852     } else {
853 	/*
854 	 * No match, dump key to buffer so client sees it for debugging.
855 	 */
856 	size_t out = 0;
857 
858 	while (*key && (*key != ')')) {
859 	    buf[out++] = (*key++);
860 	    if (out > bufsize - 2) {
861 		buf[out] = '\0';
862 		(*Target->isa->put_block) (Target, buf, (int) strlen(buf));
863 		out = 0;
864 	    }
865 	}
866 	buf[out++] = ')';
867 	buf[out] = '\0';
868     }
869     *length = (int) strlen(buf);
870     return;
871 }
872 
873 /*	Parse the elements in a CSO/PH fields structure. - FM
874  *	=====================================================
875  */
parse_cso_field_info(CSOfield_info * blk)876 static int parse_cso_field_info(CSOfield_info *blk)
877 {
878     char *info, *max_spec;
879 
880     /*
881      * Initialize all fields to default values.
882      */
883     blk->indexed = blk->lookup = blk->reserved = blk->max_size = blk->url = 0;
884     blk->defreturn = blk->explicit_return = blk->gpublic = 0;
885 
886     /*
887      * Search for keywords in info string and set values.  Attributes are
888      * converted to all lower-case for comparison.
889      */
890     info = blk->attributes;
891     LYLowerCase(info);
892     if (strstr(info, "indexed "))
893 	blk->indexed = 1;
894     if (strstr(info, "default "))
895 	blk->defreturn = 1;
896     if (strstr(info, "public "))
897 	blk->gpublic = 1;
898     if (strstr(info, "lookup "))
899 	blk->lookup = 1;
900     if (strstr(info, "url ")) {
901 	blk->url = 1;
902 	blk->defreturn = 1;
903     }
904     max_spec = strstr(info, "max ");
905     if (max_spec) {
906 	sscanf(&max_spec[4], "%d", &blk->max_size);
907     } else {
908 	blk->max_size = 32;
909     }
910 
911     return 0;
912 }
913 
914 /*	Parse a reply from a CSO/PH fields request. - FM
915  *	================================================
916  */
parse_cso_fields(char * buf,size_t size)917 static int parse_cso_fields(char *buf,
918 			    size_t size)
919 {
920     int ich;
921     char *p = buf;
922     int i, code = 0, prev_code;
923     size_t alen;
924     char *indx, *name;
925     CSOfield_info *last, *newf;
926 
927     last = CSOfields = (CSOfield_info *) 0;
928     prev_code = -2555;
929     buf[0] = '\0';
930 
931     /*
932      * Start grabbing chars from the network.
933      */
934     while ((ich = NEXT_CHAR) != EOF) {
935 	if (interrupted_in_htgetcharacter) {
936 	    CTRACE((tfp,
937 		    "HTLoadCSO: Interrupted in HTGetCharacter, apparently.\n"));
938 	    free_CSOfields();
939 	    buf[0] = '\0';
940 	    return HT_INTERRUPTED;
941 	}
942 
943 	if ((char) ich != LF) {
944 	    *p = (char) ich;	/* Put character in buffer */
945 	    if (p < &buf[size - 1]) {
946 		p++;
947 	    }
948 	} else {
949 	    *p = '\0';		/* Terminate line */
950 	    p = buf;		/* Scan it to parse it */
951 
952 	    /* OK we now have a line in 'p' lets parse it.
953 	     */
954 
955 	    /*
956 	     * Break on line that begins with a 2.  It's the end of data.
957 	     */
958 	    if (*p == '2')
959 		break;
960 
961 	    /*
962 	     * Lines beginning with 5 are errors.  Print them and quit.
963 	     */
964 	    if (*p == '5') {
965 		strcpy(buf, p);
966 		return 5;
967 	    }
968 
969 	    if (*p == '-') {
970 		/*
971 		 * Data lines look like -200:#:
972 		 * where # is the search result number and can be multiple
973 		 * digits (infinite?).
974 		 */
975 
976 		/*
977 		 * Check status, ignore any non-success.
978 		 */
979 		if (p[1] != '2')
980 		    continue;
981 
982 		/*
983 		 * Parse fields within returned line into status, ndx, name,
984 		 * data.
985 		 */
986 		indx = NULL;
987 		name = NULL;
988 		for (i = 0; p[i]; i++) {
989 		    if (p[i] == ':') {
990 			p[i] = '\0';
991 			if (!indx) {
992 			    indx = (char *) &p[i + 1];
993 			    code = atoi(indx);
994 			} else if (!name) {
995 			    name = (char *) &p[i + 1];
996 			} else {
997 			    i++;
998 			    break;
999 			}
1000 		    }
1001 		}
1002 		/*
1003 		 * Add data to field structure.
1004 		 */
1005 		if (name) {
1006 		    if (code == prev_code) {
1007 			/*
1008 			 * Remaining data are description.  Save in current
1009 			 * info block.
1010 			 */
1011 			if (last != NULL) {
1012 			    alen = strlen((char *) &p[i]) + 1;
1013 			    if (alen > sizeof(last->desc_buf)) {
1014 				if (last->description != last->desc_buf)
1015 				    FREE(last->description);
1016 				if (!(last->description = (char *) malloc(alen))) {
1017 				    outofmem(__FILE__, "HTLoadCSO");
1018 				}
1019 			    }
1020 			    strcpy(last->description, (char *) &p[i]);
1021 			}
1022 		    } else {
1023 			/*
1024 			 * Initialize new block, append to end of list to
1025 			 * preserve order.
1026 			 */
1027 			newf = typecalloc(CSOfield_info);
1028 
1029 			if (!newf) {
1030 			    outofmem(__FILE__, "HTLoadCSO");
1031 			}
1032 
1033 			assert(newf != NULL);
1034 
1035 			if (last)
1036 			    last->next = newf;
1037 			else
1038 			    CSOfields = newf;
1039 			last = newf;
1040 
1041 			newf->next = (CSOfield_info *) 0;
1042 			newf->name = newf->name_buf;
1043 			alen = strlen(name) + 1;
1044 			if (alen > sizeof(newf->name_buf)) {
1045 			    if (!(newf->name = (char *) malloc(alen))) {
1046 				outofmem(__FILE__, "HTLoadCSO");
1047 			    }
1048 			}
1049 			strcpy(newf->name, name);
1050 
1051 			newf->attributes = newf->attr_buf;
1052 			alen = strlen((char *) &p[i]) + 2;
1053 			if (alen > sizeof(newf->attr_buf)) {
1054 			    if (!(newf->attributes = (char *) malloc(alen))) {
1055 				outofmem(__FILE__, "HTLoadCSO");
1056 			    }
1057 			}
1058 			strcpy(newf->attributes, (char *) &p[i]);
1059 			strcpy((char *) &newf->attributes[alen - 2], " ");
1060 			newf->description = newf->desc_buf;
1061 			newf->desc_buf[0] = '\0';
1062 			newf->id = atoi(indx);
1063 			/*
1064 			 * Scan for keywords.
1065 			 */
1066 			parse_cso_field_info(newf);
1067 		    }
1068 		    prev_code = code;
1069 		} else
1070 		    break;
1071 	    }			/* end if *p == '-' */
1072 	}			/* if end of line */
1073 
1074     }				/* Loop over characters */
1075 
1076     /* end the text block */
1077 
1078     if (buf[0] == '\0') {
1079 	return -1;		/* no response */
1080     }
1081     buf[0] = '\0';
1082     return 0;			/* all done */
1083 }				/* end of procedure */
1084 
1085 /*	Generate a form for submitting CSO/PH searches. - FM
1086  *	====================================================
1087  */
generate_cso_form(char * host,int port,char * buf,size_t bufsize,HTStream * Target)1088 static int generate_cso_form(char *host,
1089 			     int port,
1090 			     char *buf,
1091 			     size_t bufsize,
1092 			     HTStream *Target)
1093 {
1094     int i, j, length;
1095     size_t out;
1096     int full_flag = 1;
1097     const char *key;
1098     const char *line;
1099     CSOformgen_context ctx;
1100     static const char *ctemplate[] =
1101     {
1102 	"<HTML>\n<HEAD>\n<TITLE>CSO/PH Query Form for $(HOST)</TITLE>\n</HEAD>\n<BODY>",
1103 	"<H2><I>CSO/PH Query Form</I> for <EM>$(HOST)</EM></H2>",
1104 	"To search the database for a name, fill in one or more of the fields",
1105 	"in the form below and activate the 'Submit query' button.  At least",
1106 	"one of the entered fields must be flagged as indexed.",
1107 	"<HR><FORM method=\"POST\" action=\"cso://$(HOST)/\">",
1108 	"[ <input type=\"submit\" value=\"Submit query\"> | ",
1109 	"<input type=\"reset\" value=\"Clear fields\"> ]",
1110 	"<P><DL>",
1111 	"   <DT>Search parameters (* indicates indexed field):",
1112 	"   <DD>",
1113 	"$(NAMEFLD)    <DL COMPACT>\n    <DT><I>$(FDESC)</I>$(FNDX)",
1114 	"    <DD>Last: <input name=\"q_$(FID)\" type=\"text\" size=49$(FSIZE2)>",
1115 	"    <DD>First: <input name=\"q_$(FID)\" type=\"text\" size=48$(FSIZE2)>",
1116 	"$(QFIELDS)    <DT><I>$(FDESC)</I>$(FNDX)",
1117 	"    <DD><input name=\"q_$(FID)\" type=\"text\" $(FSIZE)>\n$(NEXTFLD)",
1118 	"    </DL>",
1119 	"   </DL>\n<P><DL>",
1120 	"   <DT>Output format:",
1121 	"   <DD>Returned data option: <select name=\"return\">",
1122 	"    <option>default<option selected>all<option>selected</select><BR>",
1123 	"$(RFIELDS)    <input type=\"checkbox\" name=\"r_$(FID)\"$(FDEF)> $(FDESC)<BR>",
1124 	"$(NEXTFLD)    ",
1125 	"   </DL></FORM><HR>\n</BODY>\n</HTML>",
1126 	(char *) 0
1127     };
1128 
1129     memset(&ctx, 0, sizeof(ctx));
1130     ctx.host = host;
1131     ctx.seek = (char *) 0;
1132     ctx.port = port;
1133     ctx.fld = (CSOfield_info *) 0;
1134     ctx.public_override = full_flag;
1135     /*
1136      * Parse the strings in the template array to produce HTML document to send
1137      * to client.  First line is skipped for 'full' lists.
1138      */
1139     out = 0;
1140     buf[out] = '\0';
1141     for (i = 0; ctemplate[i]; i++) {
1142 	/*
1143 	 * Search the current string for substitution, flagged by $(
1144 	 */
1145 	for (line = ctemplate[i], j = 0; line[j]; j++) {
1146 	    if ((line[j] == '$') && (line[j + 1] == '(')) {
1147 		/*
1148 		 * Command detected, flush output buffer and find closing ')'
1149 		 * that delimits the command.
1150 		 */
1151 		buf[out] = '\0';
1152 		if (out > 0)
1153 		    (*Target->isa->put_block) (Target, buf, (int) strlen(buf));
1154 		for (key = &line[j]; line[j + 1] && (line[j] != ')'); j++) {
1155 		    ;
1156 		}
1157 		/*
1158 		 * Save context, interpet command and restore updated context.
1159 		 */
1160 		ctx.cur_line = i;
1161 		ctx.cur_off = j;
1162 		interpret_cso_key(key, buf, bufsize, &length, &ctx, Target);
1163 		i = ctx.cur_line;
1164 		j = ctx.cur_off;
1165 		line = ctemplate[i];
1166 		out = (size_t) length;
1167 
1168 		if (ctx.seek) {
1169 		    /*
1170 		     * Command wants us to skip (forward) to indicated token.
1171 		     * Start at current position.
1172 		     */
1173 		    size_t slen = strlen(ctx.seek);
1174 
1175 		    for (; ctemplate[i]; i++) {
1176 			for (line = ctemplate[i]; line[j]; j++) {
1177 			    if (line[j] == '$')
1178 				if (0 == StrNCmp(ctx.seek, &line[j], slen)) {
1179 				    if (j == 0)
1180 					j = (int) strlen(ctemplate[--i]) - 1;
1181 				    else
1182 					--j;
1183 				    line = ctemplate[i];
1184 				    ctx.seek = (char *) 0;
1185 				    break;
1186 				}
1187 			}
1188 			if (!ctx.seek)
1189 			    break;
1190 			j = 0;
1191 		    }
1192 		    if (ctx.seek) {
1193 			char *temp = 0;
1194 
1195 			HTSprintf0(&temp, GOPHER_CSO_SEEK_FAILED, ctx.seek);
1196 			(*Target->isa->put_block) (Target, temp, (int) strlen(temp));
1197 			FREE(temp);
1198 		    }
1199 		}
1200 	    } else {
1201 		/*
1202 		 * Non-command text, add to output buffer.
1203 		 */
1204 		buf[out++] = line[j];
1205 		if (out > (bufsize - 3)) {
1206 		    buf[out] = '\0';
1207 		    (*Target->isa->put_block) (Target, buf, (int) strlen(buf));
1208 		    out = 0;
1209 		}
1210 	    }
1211 	}
1212 	buf[out++] = '\n';
1213 	buf[out] = '\0';
1214     }
1215     if (out > 0)
1216 	(*Target->isa->put_block) (Target, buf, (int) strlen(buf));
1217 
1218     return 0;
1219 }
1220 
1221 /*	Generate a results report for CSO/PH form-based searches. - FM
1222  *	==============================================================
1223  */
generate_cso_report(HTStream * Target)1224 static int generate_cso_report(HTStream *Target)
1225 {
1226     int ich;
1227     char line[BIG];
1228     char *buf = 0;
1229     char *p = line, *href = NULL;
1230     int len, i, prev_ndx, ndx;
1231     char *rcode, *ndx_str, *fname, *fvalue, *l;
1232     CSOfield_info *fld;
1233     BOOL stop = FALSE;
1234 
1235     /*
1236      * Read lines until non-negative status.
1237      */
1238     prev_ndx = -100;
1239     /*
1240      * Start grabbing chars from the network.
1241      */
1242     while (!stop && (ich = NEXT_CHAR) != EOF) {
1243 	if (interrupted_in_htgetcharacter) {
1244 	    CTRACE((tfp,
1245 		    "HTLoadCSO: Interrupted in HTGetCharacter, apparently.\n"));
1246 	    _HTProgress(CONNECTION_INTERRUPTED);
1247 	    goto end_CSOreport;
1248 	}
1249 
1250 	if ((char) ich != LF) {
1251 	    *p = (char) ich;	/* Put character in line */
1252 	    if (p < &line[BIG - 1]) {
1253 		p++;
1254 	    }
1255 	} else {
1256 	    *p = '\0';		/* Terminate line */
1257 	    /*
1258 	     * OK we now have a line.  Load it as 'p' and parse it.
1259 	     */
1260 	    p = line;
1261 	    if (p[0] != '-' && p[0] != '1') {
1262 		stop = TRUE;
1263 	    }
1264 	    rcode = (p[0] == '-') ? &p[1] : p;
1265 	    ndx_str = fname = NULL;
1266 	    len = (int) strlen(p);
1267 	    for (i = 0; i < len; i++) {
1268 		if (p[i] == ':') {
1269 		    p[i] = '\0';
1270 		    if (!ndx_str) {
1271 			fname = ndx_str = &p[i + 1];
1272 		    } else {
1273 			fname = &p[i + 1];
1274 			break;
1275 		    }
1276 		}
1277 	    }
1278 	    if (ndx_str) {
1279 		ndx = atoi(ndx_str);
1280 		if (prev_ndx != ndx) {
1281 		    if (prev_ndx != -100) {
1282 			HTSprintf0(&buf, "</DL></DL>\n");
1283 			(*Target->isa->put_block) (Target, buf, (int) strlen(buf));
1284 		    }
1285 		    if (ndx == 0) {
1286 			HTSprintf0(&buf,
1287 				   "<HR><DL><DT>Information/status<DD><DL><DT>\n");
1288 			(*Target->isa->put_block) (Target, buf, (int) strlen(buf));
1289 		    } else {
1290 			HTSprintf0(&buf,
1291 				   "<HR><DL><DT>Entry %d:<DD><DL COMPACT><DT>\n", ndx);
1292 			(*Target->isa->put_block) (Target, buf, (int) strlen(buf));
1293 		    }
1294 		    prev_ndx = ndx;
1295 		}
1296 	    } else {
1297 		HTSprintf0(&buf, "<DD>%s\n", rcode);
1298 		(*Target->isa->put_block) (Target, buf, (int) strlen(buf));
1299 		continue;
1300 	    }
1301 	    if ((*rcode >= '2') && (*rcode <= '5') && (fname != ndx_str)) {
1302 		while (*fname == ' ') {
1303 		    fname++;	/* trim leading spaces */
1304 		}
1305 		for (fvalue = fname; *fvalue; fvalue++) {
1306 		    if (*fvalue == ':') {
1307 			*fvalue++ = '\0';
1308 			i = (int) strlen(fname) - 1;
1309 			while (i >= 0 && fname[i] == ' ') {
1310 			    fname[i--] = '\0';	/* trim trailing */
1311 			}
1312 			break;
1313 		    }
1314 		}
1315 		if (fvalue) {
1316 		    while (*fvalue == ' ') {
1317 			fvalue++;	/* trim leading spaces */
1318 		    }
1319 		}
1320 		if (*fname) {
1321 		    for (fld = CSOfields; fld; fld = fld->next) {
1322 			if (!strcmp(fld->name, fname)) {
1323 			    if (fld->description) {
1324 				fname = fld->description;
1325 			    }
1326 			    break;
1327 			}
1328 		    }
1329 		    if (fld && fld->url) {
1330 			HTSprintf0(&buf,
1331 				   "<DT><I>%s</I><DD><A HREF=\"%s\">%s</A>\n",
1332 				   fname, fvalue, fvalue);
1333 			(*Target->isa->put_block) (Target, buf, (int) strlen(buf));
1334 		    } else {
1335 			HTSprintf0(&buf, "<DT><I>%s</I><DD>", fname);
1336 			(*Target->isa->put_block) (Target, buf, (int) strlen(buf));
1337 			buf[0] = '\0';
1338 			l = fvalue;
1339 			while (*l) {
1340 			    if (*l == '<') {
1341 				StrAllocCat(buf, "&lt;");
1342 				l++;
1343 			    } else if (*l == '>') {
1344 				StrAllocCat(buf, "&gt;");
1345 				l++;
1346 			    } else if (StrNCmp(l, STR_NEWS_URL, LEN_NEWS_URL) &&
1347 				       StrNCmp(l, "snews://", 8) &&
1348 				       StrNCmp(l, "nntp://", 7) &&
1349 				       StrNCmp(l, "snewspost:", 10) &&
1350 				       StrNCmp(l, "snewsreply:", 11) &&
1351 				       StrNCmp(l, "newspost:", 9) &&
1352 				       StrNCmp(l, "newsreply:", 10) &&
1353 				       StrNCmp(l, "ftp://", 6) &&
1354 				       StrNCmp(l, "file:/", 6) &&
1355 				       StrNCmp(l, "finger://", 9) &&
1356 				       StrNCmp(l, "http://", 7) &&
1357 				       StrNCmp(l, "https://", 8) &&
1358 				       StrNCmp(l, "wais://", 7) &&
1359 				       StrNCmp(l, STR_MAILTO_URL,
1360 					       LEN_MAILTO_URL) &&
1361 				       StrNCmp(l, "cso://", 6) &&
1362 				       StrNCmp(l, "gopher://", 9)) {
1363 				HTSprintf(&buf, "%c", *l++);
1364 			    } else {
1365 				StrAllocCat(buf, "<a href=\"");
1366 				StrAllocCopy(href, l);
1367 				StrAllocCat(buf, strtok(href, " \r\n\t,>)\""));
1368 				StrAllocCat(buf, "\">");
1369 				while (*l && !strchr(" \r\n\t,>)\"", *l)) {
1370 				    HTSprintf(&buf, "%c", *l++);
1371 				}
1372 				StrAllocCat(buf, "</a>");
1373 				FREE(href);
1374 			    }
1375 			}
1376 			StrAllocCat(buf, "\n");
1377 			(*Target->isa->put_block) (Target, buf, (int) strlen(buf));
1378 		    }
1379 		} else {
1380 		    HTSprintf0(&buf, "<DD>");
1381 		    (*Target->isa->put_block) (Target, buf, (int) strlen(buf));
1382 		    buf[0] = '\0';
1383 		    l = fvalue;
1384 		    while (*l) {
1385 			if (*l == '<') {
1386 			    StrAllocCat(buf, "&lt;");
1387 			    l++;
1388 			} else if (*l == '>') {
1389 			    StrAllocCat(buf, "&gt;");
1390 			    l++;
1391 			} else if (StrNCmp(l, STR_NEWS_URL, LEN_NEWS_URL) &&
1392 				   StrNCmp(l, "snews://", 8) &&
1393 				   StrNCmp(l, "nntp://", 7) &&
1394 				   StrNCmp(l, "snewspost:", 10) &&
1395 				   StrNCmp(l, "snewsreply:", 11) &&
1396 				   StrNCmp(l, "newspost:", 9) &&
1397 				   StrNCmp(l, "newsreply:", 10) &&
1398 				   StrNCmp(l, "ftp://", 6) &&
1399 				   StrNCmp(l, "file:/", 6) &&
1400 				   StrNCmp(l, "finger://", 9) &&
1401 				   StrNCmp(l, "http://", 7) &&
1402 				   StrNCmp(l, "https://", 8) &&
1403 				   StrNCmp(l, "wais://", 7) &&
1404 				   StrNCmp(l, STR_MAILTO_URL, LEN_MAILTO_URL) &&
1405 				   StrNCmp(l, "cso://", 6) &&
1406 				   StrNCmp(l, "gopher://", 9)) {
1407 			    HTSprintf(&buf, "%c", *l++);
1408 			} else {
1409 			    StrAllocCat(buf, "<a href=\"");
1410 			    StrAllocCopy(href, l);
1411 			    StrAllocCat(buf, strtok(href, " \r\n\t,>)\""));
1412 			    StrAllocCat(buf, "\">");
1413 			    while (*l && !strchr(" \r\n\t,>)\"", *l)) {
1414 				HTSprintf(&buf, "%c", *l++);
1415 			    }
1416 			    StrAllocCat(buf, "</a>");
1417 			    FREE(href);
1418 			}
1419 		    }
1420 		    StrAllocCat(buf, "\n");
1421 		    (*Target->isa->put_block) (Target, buf, (int) strlen(buf));
1422 		}
1423 	    } else {
1424 		HTSprintf0(&buf, "<DD>%s\n", fname);
1425 		(*Target->isa->put_block) (Target, buf, (int) strlen(buf));
1426 	    }
1427 	}
1428     }
1429   end_CSOreport:
1430     if (prev_ndx != -100) {
1431 	HTSprintf0(&buf, "</DL></DL>\n");
1432 	(*Target->isa->put_block) (Target, buf, (int) strlen(buf));
1433     }
1434     FREE(buf);
1435     return 0;
1436 }
1437 
1438 /*	CSO/PH form-based search gateway - FM			HTLoadCSO
1439  *	=====================================
1440  */
HTLoadCSO(const char * arg,HTParentAnchor * anAnchor,HTFormat format_out,HTStream * sink)1441 static int HTLoadCSO(const char *arg,
1442 		     HTParentAnchor *anAnchor,
1443 		     HTFormat format_out,
1444 		     HTStream *sink)
1445 {
1446     static const char end_form[] = "</BODY>\n</HTML>\n";
1447     char *host, *cp, *data;
1448     int port = CSO_PORT;
1449     int status;			/* tcp return */
1450     bstring *command = NULL;
1451     bstring *content = NULL;
1452     int len, i, j, start, finish, flen, ndx;
1453     int return_type, has_indexed;
1454     CSOfield_info *fld;
1455     char buf[2048];
1456     HTFormat format_in = WWW_HTML;
1457     HTStream *Target = NULL;
1458 
1459     if (!acceptable_inited)
1460 	init_acceptable();
1461 
1462     if (!arg)
1463 	return -3;		/* Bad if no name sepcified     */
1464     if (!*arg)
1465 	return -2;		/* Bad if name had zero length  */
1466     CTRACE((tfp, "HTLoadCSO: Looking for %s\n", arg));
1467 
1468     /*
1469      * Set up a socket to the server for the data.
1470      */
1471     status = HTDoConnect(arg, "cso", CSO_PORT, &s);
1472     if (status == HT_INTERRUPTED) {
1473 	/*
1474 	 * Interrupt cleanly.
1475 	 */
1476 	CTRACE((tfp,
1477 		"HTLoadCSO: Interrupted on connect; recovering cleanly.\n"));
1478 	_HTProgress(CONNECTION_INTERRUPTED);
1479 	return HT_NOT_LOADED;
1480     }
1481     if (status < 0) {
1482 	CTRACE((tfp, "HTLoadCSO: Unable to connect to remote host for `%s'.\n",
1483 		arg));
1484 	return HTInetStatus("connect");
1485     }
1486 
1487     HTInitInput(s);		/* Set up input buffering */
1488 
1489     HTBprintf(&command, "fields%c%c", CR, LF);
1490     if (TRACE) {
1491 	CTRACE((tfp, "HTLoadCSO: Connected, writing command `"));
1492 	trace_bstring(command);
1493 	CTRACE((tfp, "' to socket %d\n", s));
1494     }
1495     _HTProgress(GOPHER_SENDING_CSO_REQUEST);
1496     status = (int) NETWRITE(s, BStrData(command), BStrLen(command));
1497     BStrFree(command);
1498     if (status < 0) {
1499 	CTRACE((tfp, "HTLoadCSO: Unable to send command.\n"));
1500 	return HTInetStatus("send");
1501     }
1502     _HTProgress(GOPHER_SENT_CSO_REQUEST);
1503 
1504     /*
1505      * Now read the data from the socket.
1506      */
1507     status = parse_cso_fields(buf, sizeof(buf));
1508     if (status) {
1509 	NETCLOSE(s);
1510 	if (status == HT_INTERRUPTED) {
1511 	    _HTProgress(CONNECTION_INTERRUPTED);
1512 	} else if (buf[0] != '\0') {
1513 	    HTAlert(buf);
1514 	} else {
1515 	    HTAlert(FAILED_NO_RESPONSE);
1516 	}
1517 	return HT_NOT_LOADED;
1518     }
1519     Target = HTStreamStack(format_in,
1520 			   format_out,
1521 			   sink, anAnchor);
1522     if (!Target || Target == NULL) {
1523 	char *temp = 0;
1524 
1525 	HTSprintf0(&temp, CANNOT_CONVERT_I_TO_O,
1526 		   HTAtom_name(format_in), HTAtom_name(format_out));
1527 	HTAlert(temp);
1528 	FREE(temp);
1529 	NETCLOSE(s);
1530 	return HT_NOT_LOADED;
1531     }
1532     host = HTParse(arg, "", PARSE_HOST);
1533     if ((cp = HTParsePort(host, &port)) != NULL) {
1534 	if (port == CSO_PORT) {
1535 	    *cp = '\0';
1536 	}
1537     }
1538     anAnchor->safe = TRUE;
1539     if (isBEmpty(anAnchor->post_data)) {
1540 	generate_cso_form(host, port, buf, sizeof(buf), Target);
1541 	(*Target->isa->_free) (Target);
1542 	FREE(host);
1543 	NETCLOSE(s);
1544 	free_CSOfields();
1545 	return HT_LOADED;
1546     }
1547 
1548     HTBprintf(&command,
1549 	      "<HTML>\n<HEAD>\n<TITLE>CSO/PH Results on %s</TITLE>\n</HEAD>\n<BODY>\n",
1550 	      host);
1551     (*Target->isa->put_block) (Target, BStrData(command), BStrLen(command));
1552     BStrFree(command);
1553     FREE(host);
1554 
1555     BStrCopy(content, anAnchor->post_data);
1556     assert(content != NULL);
1557 
1558     if (BStrData(content)[BStrLen(content) - 1] != '&')
1559 	BStrCat0(content, "&");
1560 
1561     data = BStrData(content);
1562     len = BStrLen(content);
1563     for (i = 0; i < len; i++) {
1564 	if (data[i] == '+') {
1565 	    data[i] = ' ';
1566 	}
1567     }
1568 
1569     data = BStrData(content);
1570     HTUnEscape(data);		/* FIXME: could it have embedded null? */
1571     len = BStrLen(content);
1572 
1573     return_type = 0;
1574     has_indexed = 0;
1575     start = 0;
1576     for (i = 0; i < len; i++) {
1577 	if (!data[i] || data[i] == '&') {
1578 	    /*
1579 	     * Value parsed.  Unescape characters and look for first '=' to
1580 	     * delimit field name from value.
1581 	     */
1582 	    flen = i - start;
1583 	    finish = start + flen;
1584 	    data[finish] = '\0';
1585 	    for (j = start; j < finish; j++) {
1586 		if (data[j] == '=') {
1587 		    /*
1588 		     * data[start..j-1] is field name,
1589 		     * [j+1..finish-1] is value.
1590 		     */
1591 		    if ((data[start + 1] == '_') &&
1592 			((data[start] == 'r') || (data[start] == 'q'))) {
1593 			/*
1594 			 * Decode fields number and lookup field info.
1595 			 */
1596 			sscanf(&data[start + 2], "%d=", &ndx);
1597 			for (fld = CSOfields; fld; fld = fld->next) {
1598 			    if (ndx == fld->id) {
1599 				if ((j + 1) >= finish)
1600 				    break;	/* ignore nulls */
1601 				if (data[start] == 'q') {
1602 				    /*
1603 				     * Append field to query line.
1604 				     */
1605 				    if (fld->lookup) {
1606 					if (fld->indexed)
1607 					    has_indexed = 1;
1608 					if (isBEmpty(command)) {
1609 					    BStrCopy0(command, "query ");
1610 					} else {
1611 					    BStrCat0(command, " ");
1612 					}
1613 					HTBprintf(&command, "%s=\"%s\"",
1614 						  fld->name, &data[j + 1]);
1615 				    } else {
1616 					strcpy(buf,
1617 					       "Warning: non-lookup field ignored<BR>\n");
1618 					(*Target->isa->put_block) (Target,
1619 								   buf,
1620 								   (int)
1621 								   strlen(buf));
1622 				    }
1623 				} else if (data[start] == 'r') {
1624 				    fld->explicit_return = 1;
1625 				}
1626 				break;
1627 			    }
1628 			}
1629 		    } else if (!StrNCmp(&data[start], "return=", 7)) {
1630 			if (!strcmp(&data[start + 7], "all")) {
1631 			    return_type = 1;
1632 			} else if (!strcmp(&data[start + 7], "selected")) {
1633 			    return_type = 2;
1634 			}
1635 		    }
1636 		}
1637 	    }
1638 	    start = i + 1;
1639 	}
1640     }
1641     BStrFree(content);
1642     if (isBEmpty(command) || !has_indexed) {
1643 	NETCLOSE(s);
1644 	strcpy(buf,
1645 	       "<EM>Error:</EM> At least one indexed field value must be specified!\n");
1646 	(*Target->isa->put_block) (Target, buf, (int) strlen(buf));
1647 	strcpy(buf, "</BODY>\n</HTML>\n");
1648 	(*Target->isa->put_block) (Target, buf, (int) strlen(buf));
1649 	(*Target->isa->_free) (Target);
1650 	free_CSOfields();
1651 	return HT_LOADED;
1652     }
1653     /*
1654      * Append return fields.
1655      */
1656     if (return_type == 1) {
1657 	BStrCat0(command, " return all");
1658     } else if (return_type == 2) {
1659 	BStrCat0(command, " return");
1660 	for (fld = CSOfields; fld; fld = fld->next) {
1661 	    if (fld->explicit_return) {
1662 		HTBprintf(&command, " %s", fld->name);
1663 	    }
1664 	}
1665     }
1666     HTBprintf(&command, "%c%c", CR, LF);
1667     strcpy(buf, "<H2>\n<EM>CSO/PH command:</EM> ");
1668     (*Target->isa->put_block) (Target, buf, (int) strlen(buf));
1669     (*Target->isa->put_block) (Target, BStrData(command), BStrLen(command));
1670     strcpy(buf, "</H2>\n");
1671     (*Target->isa->put_block) (Target, buf, (int) strlen(buf));
1672     if (TRACE) {
1673 	CTRACE((tfp, "HTLoadCSO: Writing command `"));
1674 	trace_bstring(command);
1675 	CTRACE((tfp, "' to socket %d\n", s));
1676     }
1677     status = (int) NETWRITE(s, BStrData(command), BStrLen(command));
1678     BStrFree(command);
1679     if (status < 0) {
1680 	CTRACE((tfp, "HTLoadCSO: Unable to send command.\n"));
1681 	free_CSOfields();
1682 	return HTInetStatus("send");
1683     }
1684     generate_cso_report(Target);
1685     NETCLOSE(s);
1686     (*Target->isa->put_block) (Target, end_form, (int) sizeof(end_form) - 1);
1687     (*Target->isa->_free) (Target);
1688     FREE(host);
1689     free_CSOfields();
1690     return HT_LOADED;
1691 }
1692 
1693 /*	Load by name.						HTLoadGopher
1694  *	=============
1695  *
1696  */
HTLoadGopher(const char * arg,HTParentAnchor * anAnchor,HTFormat format_out,HTStream * sink)1697 static int HTLoadGopher(const char *arg,
1698 			HTParentAnchor *anAnchor,
1699 			HTFormat format_out,
1700 			HTStream *sink)
1701 {
1702     char *command;		/* The whole command */
1703     int status;			/* tcp return */
1704     char gtype;			/* Gopher Node type */
1705     char *selector;		/* Selector string */
1706 
1707     if (!acceptable_inited)
1708 	init_acceptable();
1709 
1710     if (!arg)
1711 	return -3;		/* Bad if no name sepcified     */
1712     if (!*arg)
1713 	return -2;		/* Bad if name had zero length  */
1714     CTRACE((tfp, "HTGopher: Looking for %s\n", arg));
1715 
1716     /*
1717      * If it's a port 105 GOPHER_CSO gtype with no ISINDEX token ('?'), use the
1718      * form-based CSO gateway (otherwise, return an ISINDEX cover page or do
1719      * the ISINDEX search).  - FM
1720      */
1721     {
1722 	size_t len;
1723 
1724 	if ((len = strlen(arg)) > 5) {
1725 	    if (0 == strcmp((const char *) &arg[len - 6], ":105/2")) {
1726 		/* Use CSO gateway. */
1727 		CTRACE((tfp, "HTGopher: Passing to CSO/PH gateway.\n"));
1728 		return HTLoadCSO(arg, anAnchor, format_out, sink);
1729 	    }
1730 	}
1731     }
1732 
1733     /*
1734      * If it's a port 79/0[/...] URL, use the finger gateway.  - FM
1735      */
1736     if (strstr(arg, ":79/0") != NULL) {
1737 #ifndef DISABLE_FINGER
1738 	CTRACE((tfp, "HTGopher: Passing to finger gateway.\n"));
1739 	return HTLoadFinger(arg, anAnchor, format_out, sink);
1740 #else /* finger is disabled */
1741 	HTAlert(COULD_NOT_ACCESS_DOCUMENT);
1742 	return HT_NOT_LOADED;
1743 #endif /* DISABLE_FINGER */
1744     }
1745 
1746     /*
1747      * Get entity type, and selector string.
1748      */
1749     {
1750 	char *p1 = HTParse(arg, "", PARSE_PATH | PARSE_PUNCTUATION);
1751 
1752 	gtype = '1';		/* Default = menu */
1753 	selector = p1;
1754 	if ((*selector++ == '/') && (*selector)) {	/* Skip first slash */
1755 	    gtype = *selector++;	/* Pick up gtype */
1756 	}
1757 	if (gtype == GOPHER_INDEX) {
1758 	    char *query;
1759 
1760 	    /*
1761 	     * Search is allowed.
1762 	     */
1763 	    HTAnchor_setIndex(anAnchor, anAnchor->address);
1764 	    query = strchr(selector, '?');	/* Look for search string */
1765 	    if (!query || !query[1]) {	/* No search required */
1766 		target = HTML_new(anAnchor, format_out, sink);
1767 		targetClass = *target->isa;
1768 		display_index(arg, anAnchor);	/* Display "cover page" */
1769 		return HT_LOADED;	/* Local function only */
1770 	    }
1771 	    *query++ = '\0';	/* Skip '?'     */
1772 	    command =
1773 		(char *) malloc(strlen(selector) + 1 + strlen(query) + 2 + 1);
1774 	    if (command == NULL)
1775 		outofmem(__FILE__, "HTLoadGopher");
1776 
1777 	    assert(command != NULL);
1778 
1779 	    de_escape(command, selector);	/* Bug fix TBL 921208 */
1780 
1781 	    strcat(command, "\t");
1782 
1783 	    {			/* Remove plus signs 921006 */
1784 		char *p;
1785 
1786 		for (p = query; *p; p++) {
1787 		    if (*p == '+')
1788 			*p = ' ';
1789 		}
1790 	    }
1791 
1792 	    de_escape(&command[strlen(command)], query);	/* bug fix LJM 940415 */
1793 	} else if (gtype == GOPHER_CSO) {
1794 	    char *query;
1795 
1796 	    /*
1797 	     * Search is allowed.
1798 	     */
1799 	    query = strchr(selector, '?');	/* Look for search string */
1800 	    if (!query || !query[1]) {	/* No search required */
1801 		target = HTML_new(anAnchor, format_out, sink);
1802 		targetClass = *target->isa;
1803 		display_cso(arg, anAnchor);	/* Display "cover page" */
1804 		return HT_LOADED;	/* Local function only */
1805 	    }
1806 	    HTAnchor_setIndex(anAnchor, anAnchor->address);
1807 	    *query++ = '\0';	/* Skip '?'     */
1808 	    command = (char *) malloc(strlen("query") + 1 +
1809 				      strlen(query) + 2 + 1);
1810 	    if (command == NULL)
1811 		outofmem(__FILE__, "HTLoadGopher");
1812 
1813 	    assert(command != NULL);
1814 
1815 	    de_escape(command, selector);	/* Bug fix TBL 921208 */
1816 
1817 	    strcpy(command, "query ");
1818 
1819 	    {			/* Remove plus signs 921006 */
1820 		char *p;
1821 
1822 		for (p = query; *p; p++) {
1823 		    if (*p == '+')
1824 			*p = ' ';
1825 		}
1826 	    }
1827 	    de_escape(&command[strlen(command)], query);	/* bug fix LJM 940415 */
1828 
1829 	} else {		/* Not index */
1830 	    command = (char *) malloc(strlen(selector) + 2 + 1);
1831 	    if (command == NULL)
1832 		outofmem(__FILE__, "HTLoadGopher");
1833 
1834 	    assert(command != NULL);
1835 
1836 	    de_escape(command, selector);
1837 	}
1838 	FREE(p1);
1839     }
1840 
1841     {
1842 	char *p = command + strlen(command);
1843 
1844 	*p++ = CR;		/* Macros to be correct on Mac */
1845 	*p++ = LF;
1846 	*p = '\0';
1847     }
1848 
1849     /*
1850      * Set up a socket to the server for the data.
1851      */
1852     status = HTDoConnect(arg, "gopher", GOPHER_PORT, &s);
1853     if (status == HT_INTERRUPTED) {
1854 	/*
1855 	 * Interrupt cleanly.
1856 	 */
1857 	CTRACE((tfp, "HTGopher: Interrupted on connect; recovering cleanly.\n"));
1858 	_HTProgress(CONNECTION_INTERRUPTED);
1859 	FREE(command);
1860 	return HT_NOT_LOADED;
1861     }
1862     if (status < 0) {
1863 	CTRACE((tfp, "HTGopher: Unable to connect to remote host for `%s'.\n",
1864 		arg));
1865 	FREE(command);
1866 	return HTInetStatus("connect");
1867     }
1868 
1869     HTInitInput(s);		/* Set up input buffering */
1870 
1871     CTRACE((tfp, "HTGopher: Connected, writing command `%s' to socket %d\n",
1872 	    command, s));
1873 
1874 #ifdef NOT_ASCII
1875     {
1876 	char *p;
1877 
1878 	for (p = command; *p; p++) {
1879 	    *p = TOASCII(*p);
1880 	}
1881     }
1882 #endif
1883 
1884     _HTProgress(GOPHER_SENDING_REQUEST);
1885 
1886     status = (int) NETWRITE(s, command, (int) strlen(command));
1887     FREE(command);
1888     if (status < 0) {
1889 	CTRACE((tfp, "HTGopher: Unable to send command.\n"));
1890 	return HTInetStatus("send");
1891     }
1892 
1893     _HTProgress(GOPHER_SENT_REQUEST);
1894 
1895     /*
1896      * Now read the data from the socket.
1897      */
1898     switch (gtype) {
1899 
1900     case GOPHER_TEXT:
1901 	HTParseSocket(WWW_PLAINTEXT, format_out, anAnchor, s, sink);
1902 	break;
1903 
1904     case GOPHER_HTML:
1905     case GOPHER_CHTML:
1906 	HTParseSocket(WWW_HTML, format_out, anAnchor, s, sink);
1907 	break;
1908 
1909     case GOPHER_GIF:
1910     case GOPHER_IMAGE:
1911     case GOPHER_PLUS_IMAGE:
1912 	HTParseSocket(HTAtom_for("image/gif"),
1913 		      format_out, anAnchor, s, sink);
1914 	break;
1915 
1916     case GOPHER_MENU:
1917     case GOPHER_INDEX:
1918 	target = HTML_new(anAnchor, format_out, sink);
1919 	targetClass = *target->isa;
1920 	parse_menu(arg, anAnchor);
1921 	break;
1922 
1923     case GOPHER_CSO:
1924 	target = HTML_new(anAnchor, format_out, sink);
1925 	targetClass = *target->isa;
1926 	parse_cso(arg, anAnchor);
1927 	break;
1928 
1929     case GOPHER_SOUND:
1930     case GOPHER_PLUS_SOUND:
1931 	HTParseSocket(WWW_AUDIO, format_out, anAnchor, s, sink);
1932 	break;
1933 
1934     case GOPHER_PLUS_MOVIE:
1935 	HTParseSocket(HTAtom_for("video/mpeg"), format_out, anAnchor, s, sink);
1936 	break;
1937 
1938     case GOPHER_PLUS_PDF:
1939 	HTParseSocket(HTAtom_for("application/pdf"), format_out, anAnchor,
1940 		      s, sink);
1941 	break;
1942 
1943     default:
1944 	{
1945 	    HTAtom *encoding = 0;
1946 	    const char *desc = 0;
1947 	    HTFormat format = HTFileFormat(arg, &encoding, &desc);
1948 
1949 	    /*
1950 	     * Ignore WWW_BINARY (since that is returned by HTFileFormat when
1951 	     * it does not have a representation), but otherwise use the
1952 	     * result.
1953 	     */
1954 	    if (format != WWW_BINARY) {
1955 		HTParseSocket(format, format_out, anAnchor, s, sink);
1956 		break;
1957 	    }
1958 	}
1959 	/* FALL-THRU */
1960 
1961     case GOPHER_MACBINHEX:
1962     case GOPHER_PCBINARY:
1963     case GOPHER_UUENCODED:
1964     case GOPHER_BINARY:
1965 	/*
1966 	 * Specifying WWW_UNKNOWN forces dump to local disk.
1967 	 */
1968 	HTParseSocket(WWW_UNKNOWN, format_out, anAnchor, s, sink);
1969 	break;
1970 
1971     }				/* switch(gtype) */
1972 
1973     NETCLOSE(s);
1974     return HT_LOADED;
1975 }
1976 
1977 #ifdef GLOBALDEF_IS_MACRO
1978 #define _HTGOPHER_C_1_INIT { "gopher", HTLoadGopher, NULL }
1979 GLOBALDEF(HTProtocol, HTGopher, _HTGOPHER_C_1_INIT);
1980 #define _HTCSO_C_1_INIT { "cso", HTLoadCSO, NULL }
1981 GLOBALDEF(HTProtocol, HTCSO, _HTCSO_C_1_INIT);
1982 #else
1983 GLOBALDEF HTProtocol HTGopher =
1984 {"gopher", HTLoadGopher, NULL};
1985 GLOBALDEF HTProtocol HTCSO =
1986 {"cso", HTLoadCSO, NULL};
1987 #endif /* GLOBALDEF_IS_MACRO */
1988 
1989 #endif /* not DISABLE_GOPHER */
1990