1 /*
2  * $LynxId: HTFile.c,v 1.136 2013/05/01 10:48:11 tom Exp $
3  *
4  *			File Access				HTFile.c
5  *			===========
6  *
7  *	This is unix-specific code in general, with some VMS bits.
8  *	These are routines for file access used by browsers.
9  *	Development of this module for Unix DIRED_SUPPORT in Lynx
10  *	 regrettably has has been conducted in a manner with now
11  *	 creates a major impediment for hopes of adapting Lynx to
12  *	 a newer version of the library.
13  *
14  *  History:
15  *	   Feb 91	Written Tim Berners-Lee CERN/CN
16  *	   Apr 91	vms-vms access included using DECnet syntax
17  *	26 Jun 92 (JFG) When running over DECnet, suppressed FTP.
18  *			Fixed access bug for relative names on VMS.
19  *	   Sep 93 (MD)	Access to VMS files allows sharing.
20  *	15 Nov 93 (MD)	Moved HTVMSname to HTVMSUTILS.C
21  *	27 Dec 93 (FM)	FTP now works with VMS hosts.
22  *			FTP path must be Unix-style and cannot include
23  *			the device or top directory.
24  */
25 
26 #include <HTUtils.h>
27 
28 #ifndef VMS
29 #if defined(DOSPATH)
30 #undef LONG_LIST
31 #define LONG_LIST		/* Define this for long style unix listings (ls -l),
32 				   the actual style is configurable from lynx.cfg */
33 #endif
34 /* #define NO_PARENT_DIR_REFERENCE */
35 /* Define this for no parent links */
36 #endif /* !VMS */
37 
38 #if defined(DOSPATH)
39 #define HAVE_READDIR 1
40 #define USE_DIRENT
41 #endif
42 
43 #if defined(USE_DOS_DRIVES)
44 #include <HTDOS.h>
45 #endif
46 
47 #include <HTFile.h>		/* Implemented here */
48 
49 #ifdef VMS
50 #include <stat.h>
51 #endif /* VMS */
52 
53 #if defined (USE_ZLIB) || defined (USE_BZLIB)
54 #include <GridText.h>
55 #endif
56 
57 #define MULTI_SUFFIX ".multi"	/* Extension for scanning formats */
58 
59 #include <HTParse.h>
60 #include <HTTCP.h>
61 #ifndef DECNET
62 #include <HTFTP.h>
63 #endif /* !DECNET */
64 #include <HTAnchor.h>
65 #include <HTAtom.h>
66 #include <HTAAProt.h>
67 #include <HTFWriter.h>
68 #include <HTInit.h>
69 #include <HTBTree.h>
70 #include <HTAlert.h>
71 #include <HTCJK.h>
72 #include <UCDefs.h>
73 #include <UCMap.h>
74 #include <UCAux.h>
75 
76 #include <LYexit.h>
77 #include <LYCharSets.h>
78 #include <LYGlobalDefs.h>
79 #include <LYStrings.h>
80 #include <LYUtils.h>
81 
82 #ifdef USE_PRETTYSRC
83 # include <LYPrettySrc.h>
84 #endif
85 
86 #include <LYLeaks.h>
87 
88 typedef struct _HTSuffix {
89     char *suffix;
90     HTAtom *rep;
91     HTAtom *encoding;
92     char *desc;
93     float quality;
94 } HTSuffix;
95 
96 typedef struct {
97     struct stat file_info;
98     char sort_tags;
99     char file_name[1];		/* on the end of the struct, since its length varies */
100 } DIRED;
101 
102 #ifndef NGROUPS
103 #ifdef NGROUPS_MAX
104 #define NGROUPS NGROUPS_MAX
105 #else
106 #define NGROUPS 32
107 #endif /* NGROUPS_MAX */
108 #endif /* NGROUPS */
109 
110 #ifndef GETGROUPS_T
111 #define GETGROUPS_T int
112 #endif
113 
114 #include <HTML.h>		/* For directory object building */
115 
116 #define PUTC(c)      (*target->isa->put_character)(target, c)
117 #define PUTS(s)      (*target->isa->put_string)(target, s)
118 #define START(e)     (*target->isa->start_element)(target, e, 0, 0, -1, 0)
119 #define END(e)       (*target->isa->end_element)(target, e, 0)
120 #define MAYBE_END(e) if (HTML_dtd.tags[e].contents != SGML_EMPTY) \
121 			(*target->isa->end_element)(target, e, 0)
122 #define FREE_TARGET  (*target->isa->_free)(target)
123 #define ABORT_TARGET (*targetClass._abort)(target, NULL);
124 
125 struct _HTStructured {
126     const HTStructuredClass *isa;
127     /* ... */
128 };
129 
130 /*
131  *  Controlling globals.
132  */
133 int HTDirAccess = HT_DIR_OK;
134 
135 #ifdef DIRED_SUPPORT
136 int HTDirReadme = HT_DIR_README_NONE;
137 
138 #else
139 int HTDirReadme = HT_DIR_README_TOP;
140 #endif /* DIRED_SUPPORT */
141 
142 static const char *HTMountRoot = "/Net/";	/* Where to find mounts */
143 
144 #ifdef VMS
145 static const char *HTCacheRoot = "/WWW$SCRATCH";	/* Where to cache things */
146 
147 #else
148 static const char *HTCacheRoot = "/tmp/W3_Cache_";	/* Where to cache things */
149 #endif /* VMS */
150 
151 static char s_no_suffix[] = "*";
152 static char s_unknown_suffix[] = "*.*";
153 
154 /*
155  *  Suffix registration.
156  */
157 static HTList *HTSuffixes = 0;
158 
159 static HTSuffix no_suffix =
160 {
161     s_no_suffix, NULL, NULL, NULL, 1.0
162 };
163 
164 static HTSuffix unknown_suffix =
165 {
166     s_unknown_suffix, NULL, NULL, NULL, 1.0
167 };
168 
169 /*	To free up the suffixes at program exit.
170  *	----------------------------------------
171  */
172 #ifdef LY_FIND_LEAKS
173 static void free_suffixes(void);
174 #endif
175 
176 #ifdef LONG_LIST
FormatStr(char ** bufp,char * start,const char * entry)177 static char *FormatStr(char **bufp,
178 		       char *start,
179 		       const char *entry)
180 {
181     char fmt[512];
182 
183     if (*start) {
184 	sprintf(fmt, "%%%.*ss", (int) sizeof(fmt) - 3, start);
185 	HTSprintf0(bufp, fmt, entry);
186     } else if (*bufp && !(entry && *entry)) {
187 	**bufp = '\0';
188     } else if (entry) {
189 	StrAllocCopy(*bufp, entry);
190     }
191     return *bufp;
192 }
193 
FormatSize(char ** bufp,char * start,off_t entry)194 static char *FormatSize(char **bufp,
195 			char *start,
196 			off_t entry)
197 {
198     char fmt[512];
199 
200     if (*start) {
201 	sprintf(fmt, "%%%.*s" PRI_off_t, (int) sizeof(fmt) - 3, start);
202 
203 	HTSprintf0(bufp, fmt, entry);
204     } else {
205 	sprintf(fmt, "%" PRI_off_t, entry);
206 
207 	StrAllocCopy(*bufp, fmt);
208     }
209     return *bufp;
210 }
211 
FormatNum(char ** bufp,char * start,int entry)212 static char *FormatNum(char **bufp,
213 		       char *start,
214 		       int entry)
215 {
216     char fmt[512];
217 
218     if (*start) {
219 	sprintf(fmt, "%%%.*sd", (int) sizeof(fmt) - 3, start);
220 	HTSprintf0(bufp, fmt, entry);
221     } else {
222 	sprintf(fmt, "%d", entry);
223 	StrAllocCopy(*bufp, fmt);
224     }
225     return *bufp;
226 }
227 
LYListFmtParse(const char * fmtstr,DIRED * data,char * file,HTStructured * target,char * tail)228 static void LYListFmtParse(const char *fmtstr,
229 			   DIRED * data,
230 			   char *file,
231 			   HTStructured * target,
232 			   char *tail)
233 {
234     char c;
235     char *s;
236     char *end;
237     char *start;
238     char *str = NULL;
239     char *buf = NULL;
240     char tmp[LY_MAXPATH];
241     char type;
242 
243 #ifndef NOUSERS
244     const char *name;
245 #endif
246     time_t now;
247     char *datestr;
248 
249 #ifdef S_IFLNK
250     int len;
251 #endif
252 #define SEC_PER_YEAR	(60 * 60 * 24 * 365)
253 
254 #ifdef _WINDOWS			/* 1998/01/06 (Tue) 21:20:53 */
255     static const char *pbits[] =
256     {
257 	"---", "--x", "-w-", "-wx",
258 	"r--", "r-x", "rw-", "rwx",
259 	0};
260 
261 #define PBIT(a, n, s)  pbits[((a) >> (n)) & 0x7]
262 
263 #else
264     static const char *pbits[] =
265     {"---", "--x", "-w-", "-wx",
266      "r--", "r-x", "rw-", "rwx", 0};
267     static const char *psbits[] =
268     {"--S", "--s", "-wS", "-ws",
269      "r-S", "r-s", "rwS", "rws", 0};
270 
271 #define PBIT(a, n, s)  (s) ? psbits[((a) >> (n)) & 0x7] : \
272 	pbits[((a) >> (n)) & 0x7]
273 #endif
274 #if defined(S_ISVTX) && !defined(_WINDOWS)
275     static const char *ptbits[] =
276     {"--T", "--t", "-wT", "-wt",
277      "r-T", "r-t", "rwT", "rwt", 0};
278 
279 #define PTBIT(a, s)  (s) ? ptbits[(a) & 0x7] : pbits[(a) & 0x7]
280 #else
281 #define PTBIT(a, s)  PBIT(a, 0, 0)
282 #endif
283 
284     if (data->file_info.st_mode == 0)
285 	fmtstr = "    %a";	/* can't stat so just do anchor */
286 
287     StrAllocCopy(str, fmtstr);
288     s = str;
289     end = str + strlen(str);
290     while (*s) {
291 	start = s;
292 	while (*s) {
293 	    if (*s == '%') {
294 		if (*(s + 1) == '%')	/* literal % */
295 		    s++;
296 		else
297 		    break;
298 	    }
299 	    s++;
300 	}
301 	/* s is positioned either at a % or at \0 */
302 	*s = '\0';
303 	if (s > start) {	/* some literal chars. */
304 	    PUTS(start);
305 	}
306 	if (s == end)
307 	    break;
308 	start = ++s;
309 	while (isdigit(UCH(*s)) || *s == '.' || *s == '-' || *s == ' ' ||
310 	       *s == '#' || *s == '+' || *s == '\'')
311 	    s++;
312 	c = *s;			/* the format char. or \0 */
313 	*s = '\0';
314 
315 	switch (c) {
316 	case '\0':
317 	    PUTS(start);
318 	    continue;
319 
320 	case 'A':
321 	case 'a':		/* anchor */
322 	    HTDirEntry(target, tail, data->file_name);
323 	    FormatStr(&buf, start, data->file_name);
324 	    PUTS(buf);
325 	    END(HTML_A);
326 	    *buf = '\0';
327 #ifdef S_IFLNK
328 	    if (c != 'A' && S_ISLNK(data->file_info.st_mode) &&
329 		(len = (int) readlink(file, tmp, sizeof(tmp) - 1)) >= 0) {
330 		PUTS(" -> ");
331 		tmp[len] = '\0';
332 		PUTS(tmp);
333 	    }
334 #endif
335 	    break;
336 
337 	case 'T':		/* MIME type */
338 	case 't':		/* MIME type description */
339 	    if (S_ISDIR(data->file_info.st_mode)) {
340 		if (c != 'T') {
341 		    FormatStr(&buf, start, ENTRY_IS_DIRECTORY);
342 		} else {
343 		    FormatStr(&buf, start, "");
344 		}
345 	    } else {
346 		const char *cp2;
347 		HTFormat format;
348 
349 		format = HTFileFormat(file, NULL, &cp2);
350 
351 		if (c != 'T') {
352 		    if (cp2 == NULL) {
353 			if (!StrNCmp(HTAtom_name(format),
354 				     "application", 11)) {
355 			    cp2 = HTAtom_name(format) + 12;
356 			    if (!StrNCmp(cp2, "x-", 2))
357 				cp2 += 2;
358 			} else {
359 			    cp2 = HTAtom_name(format);
360 			}
361 		    }
362 		    FormatStr(&buf, start, cp2);
363 		} else {
364 		    FormatStr(&buf, start, HTAtom_name(format));
365 		}
366 	    }
367 	    break;
368 
369 	case 'd':		/* date */
370 	    now = time(0);
371 	    datestr = ctime(&data->file_info.st_mtime);
372 	    if ((now - data->file_info.st_mtime) < SEC_PER_YEAR / 2)
373 		/*
374 		 * MMM DD HH:MM
375 		 */
376 		sprintf(tmp, "%.12s", datestr + 4);
377 	    else
378 		/*
379 		 * MMM DD YYYY
380 		 */
381 		sprintf(tmp, "%.7s %.4s ", datestr + 4,
382 			datestr + 20);
383 	    FormatStr(&buf, start, tmp);
384 	    break;
385 
386 	case 's':		/* size in bytes */
387 	    FormatSize(&buf, start, data->file_info.st_size);
388 	    break;
389 
390 	case 'K':		/* size in Kilobytes but not for directories */
391 	    if (S_ISDIR(data->file_info.st_mode)) {
392 		FormatStr(&buf, start, "");
393 		StrAllocCat(buf, " ");
394 		break;
395 	    }
396 	    /* FALL THROUGH */
397 	case 'k':		/* size in Kilobytes */
398 	    FormatSize(&buf, start, ((data->file_info.st_size + 1023) / 1024));
399 	    StrAllocCat(buf, "K");
400 	    break;
401 
402 	case 'p':		/* unix-style permission bits */
403 	    switch (data->file_info.st_mode & S_IFMT) {
404 #if defined(_MSC_VER) && defined(_S_IFIFO)
405 	    case _S_IFIFO:
406 		type = 'p';
407 		break;
408 #else
409 	    case S_IFIFO:
410 		type = 'p';
411 		break;
412 #endif
413 	    case S_IFCHR:
414 		type = 'c';
415 		break;
416 	    case S_IFDIR:
417 		type = 'd';
418 		break;
419 	    case S_IFREG:
420 		type = '-';
421 		break;
422 #ifdef S_IFBLK
423 	    case S_IFBLK:
424 		type = 'b';
425 		break;
426 #endif
427 #ifdef S_IFLNK
428 	    case S_IFLNK:
429 		type = 'l';
430 		break;
431 #endif
432 #ifdef S_IFSOCK
433 # ifdef S_IFIFO			/* some older machines (e.g., apollo) have a conflict */
434 #  if S_IFIFO != S_IFSOCK
435 	    case S_IFSOCK:
436 		type = 's';
437 		break;
438 #  endif
439 # else
440 	    case S_IFSOCK:
441 		type = 's';
442 		break;
443 # endif
444 #endif /* S_IFSOCK */
445 	    default:
446 		type = '?';
447 		break;
448 	    }
449 #ifdef _WINDOWS
450 	    sprintf(tmp, "%c%s", type,
451 		    PBIT(data->file_info.st_mode, 6, data->file_info.st_mode & S_IRWXU));
452 #else
453 	    sprintf(tmp, "%c%s%s%s", type,
454 		    PBIT(data->file_info.st_mode, 6, data->file_info.st_mode & S_ISUID),
455 		    PBIT(data->file_info.st_mode, 3, data->file_info.st_mode & S_ISGID),
456 		    PTBIT(data->file_info.st_mode, data->file_info.st_mode & S_ISVTX));
457 #endif
458 	    FormatStr(&buf, start, tmp);
459 	    break;
460 
461 	case 'o':		/* owner */
462 #ifndef NOUSERS
463 	    name = HTAA_UidToName((int) data->file_info.st_uid);
464 	    if (*name) {
465 		FormatStr(&buf, start, name);
466 	    } else {
467 		FormatNum(&buf, start, (int) data->file_info.st_uid);
468 	    }
469 #endif
470 	    break;
471 
472 	case 'g':		/* group */
473 #ifndef NOUSERS
474 	    name = HTAA_GidToName((int) data->file_info.st_gid);
475 	    if (*name) {
476 		FormatStr(&buf, start, name);
477 	    } else {
478 		FormatNum(&buf, start, (int) data->file_info.st_gid);
479 	    }
480 #endif
481 	    break;
482 
483 	case 'l':		/* link count */
484 	    FormatNum(&buf, start, (int) data->file_info.st_nlink);
485 	    break;
486 
487 	case '%':		/* literal % with flags/width */
488 	    FormatStr(&buf, start, "%");
489 	    break;
490 
491 	default:
492 	    fprintf(stderr,
493 		    "Unknown format character `%c' in list format\n", c);
494 	    break;
495 	}
496 	if (buf)
497 	    PUTS(buf);
498 
499 	s++;
500     }
501     FREE(buf);
502     PUTC('\n');
503     FREE(str);
504 }
505 #endif /* LONG_LIST */
506 
507 /*	Define the representation associated with a file suffix.
508  *	--------------------------------------------------------
509  *
510  *	Calling this with suffix set to "*" will set the default
511  *	representation.
512  *	Calling this with suffix set to "*.*" will set the default
513  *	representation for unknown suffix files which contain a ".".
514  *
515  *	The encoding parameter can give a trivial (8bit, 7bit, binary)
516  *	or real (gzip, compress) encoding.
517  *
518  *	If filename suffix is already defined with the same encoding
519  *	its previous definition is overridden.
520  */
HTSetSuffix5(const char * suffix,const char * representation,const char * encoding,const char * desc,double value)521 void HTSetSuffix5(const char *suffix,
522 		  const char *representation,
523 		  const char *encoding,
524 		  const char *desc,
525 		  double value)
526 {
527     HTSuffix *suff;
528     BOOL trivial_enc = (BOOL) IsUnityEncStr(encoding);
529 
530     if (strcmp(suffix, s_no_suffix) == 0)
531 	suff = &no_suffix;
532     else if (strcmp(suffix, s_unknown_suffix) == 0)
533 	suff = &unknown_suffix;
534     else {
535 	HTList *cur = HTSuffixes;
536 
537 	while (NULL != (suff = (HTSuffix *) HTList_nextObject(cur))) {
538 	    if (suff->suffix && 0 == strcmp(suff->suffix, suffix) &&
539 		((trivial_enc && IsUnityEnc(suff->encoding)) ||
540 		 (!trivial_enc && !IsUnityEnc(suff->encoding) &&
541 		  strcmp(encoding, HTAtom_name(suff->encoding)) == 0)))
542 		break;
543 	}
544 	if (!suff) {		/* Not found -- create a new node */
545 	    suff = typecalloc(HTSuffix);
546 	    if (suff == NULL)
547 		outofmem(__FILE__, "HTSetSuffix");
548 
549 	    assert(suff != NULL);
550 
551 	    if (!HTSuffixes) {
552 		HTSuffixes = HTList_new();
553 #ifdef LY_FIND_LEAKS
554 		atexit(free_suffixes);
555 #endif
556 	    }
557 
558 	    HTList_addObject(HTSuffixes, suff);
559 
560 	    StrAllocCopy(suff->suffix, suffix);
561 	}
562     }
563 
564     if (representation)
565 	suff->rep = HTAtom_for(representation);
566 
567     /*
568      * Memory leak fixed.
569      * 05-28-94 Lynx 2-3-1 Garrett Arch Blythe
570      * Invariant code removed.
571      */
572     suff->encoding = HTAtom_for(encoding);
573 
574     StrAllocCopy(suff->desc, desc);
575 
576     suff->quality = (float) value;
577 }
578 
579 #ifdef LY_FIND_LEAKS
580 /*
581  *	Purpose:	Free all added suffixes.
582  *	Arguments:	void
583  *	Return Value:	void
584  *	Remarks/Portability/Dependencies/Restrictions:
585  *		To be used at program exit.
586  *	Revision History:
587  *		05-28-94	created Lynx 2-3-1 Garrett Arch Blythe
588  */
free_suffixes(void)589 static void free_suffixes(void)
590 {
591     HTSuffix *suff = NULL;
592 
593     /*
594      * Loop through all suffixes.
595      */
596     while (!HTList_isEmpty(HTSuffixes)) {
597 	/*
598 	 * Free off each item and its members if need be.
599 	 */
600 	suff = (HTSuffix *) HTList_removeLastObject(HTSuffixes);
601 	FREE(suff->suffix);
602 	FREE(suff->desc);
603 	FREE(suff);
604     }
605     /*
606      * Free off the list itself.
607      */
608     HTList_delete(HTSuffixes);
609     HTSuffixes = NULL;
610 }
611 #endif /* LY_FIND_LEAKS */
612 
613 /*	Make the cache file name for a W3 document.
614  *	-------------------------------------------
615  *	Make up a suitable name for saving the node in
616  *
617  *	E.g.	/tmp/WWW_Cache_news/1234@cernvax.cern.ch
618  *		/tmp/WWW_Cache_http/crnvmc/FIND/xx.xxx.xx
619  *
620  *  On exit:
621  *	Returns a malloc'ed string which must be freed by the caller.
622  */
HTCacheFileName(const char * name)623 char *HTCacheFileName(const char *name)
624 {
625     char *acc_method = HTParse(name, "", PARSE_ACCESS);
626     char *host = HTParse(name, "", PARSE_HOST);
627     char *path = HTParse(name, "", PARSE_PATH + PARSE_PUNCTUATION);
628     char *result = NULL;
629 
630     HTSprintf0(&result, "%s/WWW/%s/%s%s", HTCacheRoot, acc_method, host, path);
631 
632     FREE(path);
633     FREE(acc_method);
634     FREE(host);
635     return result;
636 }
637 
638 /*	Open a file for write, creating the path.
639  *	-----------------------------------------
640  */
641 #ifdef NOT_IMPLEMENTED
HTCreatePath(const char * path)642 static int HTCreatePath(const char *path)
643 {
644     return -1;
645 }
646 #endif /* NOT_IMPLEMENTED */
647 
648 /*	Convert filename from URL-path syntax to local path format
649  *	----------------------------------------------------------
650  *	Input name is assumed to be the URL-path of a local file
651  *      URL, i.e. what comes after the "file://localhost".
652  *      '#'-fragments to be treated as such must already be stripped.
653  *      If expand_all is FALSE, unescape only escaped '/'. - kw
654  *
655  *  On exit:
656  *	Returns a malloc'ed string which must be freed by the caller.
657  */
HTURLPath_toFile(const char * name,int expand_all,int is_remote GCC_UNUSED)658 char *HTURLPath_toFile(const char *name,
659 		       int expand_all,
660 		       int is_remote GCC_UNUSED)
661 {
662     char *path = NULL;
663     char *result = NULL;
664 
665     StrAllocCopy(path, name);
666     if (expand_all)
667 	HTUnEscape(path);	/* Interpret all % signs */
668     else
669 	HTUnEscapeSome(path, "/");	/* Interpret % signs for path delims */
670 
671     CTRACE((tfp, "URLPath `%s' means path `%s'\n", name, path));
672 #if defined(USE_DOS_DRIVES)
673     StrAllocCopy(result, is_remote ? path : HTDOS_name(path));
674 #else
675     StrAllocCopy(result, path);
676 #endif
677 
678     FREE(path);
679 
680     return result;
681 }
682 /*	Convert filenames between local and WWW formats.
683  *	------------------------------------------------
684  *	Make up a suitable name for saving the node in
685  *
686  *	E.g.	$(HOME)/WWW/news/1234@cernvax.cern.ch
687  *		$(HOME)/WWW/http/crnvmc/FIND/xx.xxx.xx
688  *
689  *  On exit:
690  *	Returns a malloc'ed string which must be freed by the caller.
691  */
692 /* NOTE: Don't use this function if you know that the input is a URL path
693 	 rather than a full URL, use HTURLPath_toFile instead.  Otherwise
694 	 this function will return the wrong thing for some unusual
695 	 paths (like ones containing "//", possibly escaped). - kw
696 */
HTnameOfFile_WWW(const char * name,int WWW_prefix,int expand_all)697 char *HTnameOfFile_WWW(const char *name,
698 		       int WWW_prefix,
699 		       int expand_all)
700 {
701     char *acc_method = HTParse(name, "", PARSE_ACCESS);
702     char *host = HTParse(name, "", PARSE_HOST);
703     char *path = HTParse(name, "", PARSE_PATH + PARSE_PUNCTUATION);
704     const char *home;
705     char *result = NULL;
706 
707     if (expand_all) {
708 	HTUnEscape(path);	/* Interpret all % signs */
709     } else
710 	HTUnEscapeSome(path, "/");	/* Interpret % signs for path delims */
711 
712     if (0 == strcmp(acc_method, "file")		/* local file */
713 	||!*acc_method) {	/* implicitly local? */
714 	if ((0 == strcasecomp(host, HTHostName())) ||
715 	    (0 == strcasecomp(host, "localhost")) || !*host) {
716 	    CTRACE((tfp, "Node `%s' means path `%s'\n", name, path));
717 	    StrAllocCopy(result, HTSYS_name(path));
718 	} else if (WWW_prefix) {
719 	    HTSprintf0(&result, "%s%s%s", "/Net/", host, path);
720 	    CTRACE((tfp, "Node `%s' means file `%s'\n", name, result));
721 	} else {
722 	    StrAllocCopy(result, path);
723 	}
724     } else if (WWW_prefix) {	/* other access */
725 #ifdef VMS
726 	if ((home = LYGetEnv("HOME")) == NULL)
727 	    home = HTCacheRoot;
728 	else
729 	    home = HTVMS_wwwName(home);
730 #else
731 #if defined(_WINDOWS)		/* 1997/10/16 (Thu) 20:42:51 */
732 	home = Home_Dir();
733 #else
734 	home = LYGetEnv("HOME");
735 #endif
736 	if (home == NULL)
737 	    home = "/tmp";
738 #endif /* VMS */
739 	HTSprintf0(&result, "%s/WWW/%s/%s%s", home, acc_method, host, path);
740     } else {
741 	StrAllocCopy(result, path);
742     }
743 
744     FREE(host);
745     FREE(path);
746     FREE(acc_method);
747 
748     CTRACE((tfp, "HTnameOfFile_WWW(%s,%d,%d) = %s\n",
749 	    name, WWW_prefix, expand_all, result));
750 
751     return result;
752 }
753 
754 /*	Make a WWW name from a full local path name.
755  *	--------------------------------------------
756  *
757  *  Bugs:
758  *	At present, only the names of two network root nodes are hand-coded
759  *	in and valid for the NeXT only.  This should be configurable in
760  *	the general case.
761  */
WWW_nameOfFile(const char * name)762 char *WWW_nameOfFile(const char *name)
763 {
764     char *result = NULL;
765 
766 #ifdef NeXT
767     if (0 == StrNCmp("/private/Net/", name, 13)) {
768 	HTSprintf0(&result, "%s//%s", STR_FILE_URL, name + 13);
769     } else
770 #endif /* NeXT */
771     if (0 == StrNCmp(HTMountRoot, name, 5)) {
772 	HTSprintf0(&result, "%s//%s", STR_FILE_URL, name + 5);
773     } else {
774 	HTSprintf0(&result, "%s//%s%s", STR_FILE_URL, HTHostName(), name);
775     }
776     CTRACE((tfp, "File `%s'\n\tmeans node `%s'\n", name, result));
777     return result;
778 }
779 
780 /*	Determine a suitable suffix, given the representation.
781  *	------------------------------------------------------
782  *
783  *  On entry,
784  *	rep	is the atomized MIME style representation
785  *	enc	is an encoding, trivial (8bit, binary, etc.) or gzip etc.
786  *
787  *  On exit:
788  *	Returns a pointer to a suitable suffix string if one has been
789  *	found, else "".
790  */
HTFileSuffix(HTAtom * rep,const char * enc)791 const char *HTFileSuffix(HTAtom *rep,
792 			 const char *enc)
793 {
794     HTSuffix *suff;
795 
796 #ifdef FNAMES_8_3
797     HTSuffix *first_found = NULL;
798 #endif
799     BOOL trivial_enc;
800     int n;
801     int i;
802 
803 #define NO_INIT			/* don't init anymore since I do it in Lynx at startup */
804 #ifndef NO_INIT
805     if (!HTSuffixes)
806 	HTFileInit();
807 #endif /* !NO_INIT */
808 
809     trivial_enc = (BOOL) IsUnityEncStr(enc);
810     n = HTList_count(HTSuffixes);
811     for (i = 0; i < n; i++) {
812 	suff = (HTSuffix *) HTList_objectAt(HTSuffixes, i);
813 	if (suff->rep == rep &&
814 #if defined(VMS) || defined(FNAMES_8_3)
815 	/*  Don't return a suffix whose first char is a dot, and which
816 	   has more dots or asterisks after that, for
817 	   these systems - kw */
818 	    (!suff->suffix || !suff->suffix[0] || suff->suffix[0] != '.' ||
819 	     (strchr(suff->suffix + 1, '.') == NULL &&
820 	      strchr(suff->suffix + 1, '*') == NULL)) &&
821 #endif
822 	    ((trivial_enc && IsUnityEnc(suff->encoding)) ||
823 	     (!trivial_enc && !IsUnityEnc(suff->encoding) &&
824 	      strcmp(enc, HTAtom_name(suff->encoding)) == 0))) {
825 #ifdef FNAMES_8_3
826 	    if (suff->suffix && (strlen(suff->suffix) <= 4)) {
827 		/*
828 		 * If length of suffix (including dot) is 4 or smaller, return
829 		 * this one even if we found a longer one earlier - kw
830 		 */
831 		return suff->suffix;
832 	    } else if (!first_found) {
833 		first_found = suff;	/* remember this one */
834 	    }
835 #else
836 	    return suff->suffix;	/* OK -- found */
837 #endif
838 	}
839     }
840 #ifdef FNAMES_8_3
841     if (first_found)
842 	return first_found->suffix;
843 #endif
844     return "";			/* Dunno */
845 }
846 
847 /*
848  * Trim version from VMS filenames to avoid confusing comparisons.
849  */
850 #ifdef VMS
VMS_trim_version(const char * filename)851 static const char *VMS_trim_version(const char *filename)
852 {
853     const char *result = filename;
854     const char *version = strchr(filename, ';');
855 
856     if (version != 0) {
857 	static char *stripped;
858 
859 	StrAllocCopy(stripped, filename);
860 	stripped[version - filename] = '\0';
861 	result = (const char *) stripped;
862     }
863     return result;
864 }
865 #define VMS_DEL_VERSION(name) name = VMS_trim_version(name)
866 #else
867 #define VMS_DEL_VERSION(name)	/* nothing */
868 #endif
869 
870 /*	Determine file format from file name.
871  *	-------------------------------------
872  *
873  *	This version will return the representation and also set
874  *	a variable for the encoding.
875  *
876  *	Encoding may be a unity encoding (binary, 8bit, etc.) or
877  *	a content-coding like gzip, compress.
878  *
879  *	It will handle for example  x.txt, x.txt,Z, x.Z
880  */
HTFileFormat(const char * filename,HTAtom ** pencoding,const char ** pdesc)881 HTFormat HTFileFormat(const char *filename,
882 		      HTAtom **pencoding,
883 		      const char **pdesc)
884 {
885     HTSuffix *suff;
886     int n;
887     int i;
888     int lf;
889 
890     VMS_DEL_VERSION(filename);
891 
892     if (pencoding)
893 	*pencoding = NULL;
894     if (pdesc)
895 	*pdesc = NULL;
896     if (LYforce_HTML_mode) {
897 	if (pencoding)
898 	    *pencoding = WWW_ENC_8BIT;
899 	return WWW_HTML;
900     }
901 #ifndef NO_INIT
902     if (!HTSuffixes)
903 	HTFileInit();
904 #endif /* !NO_INIT */
905     lf = (int) strlen(filename);
906     n = HTList_count(HTSuffixes);
907     for (i = 0; i < n; i++) {
908 	int ls;
909 
910 	suff = (HTSuffix *) HTList_objectAt(HTSuffixes, i);
911 	ls = (int) strlen(suff->suffix);
912 	if ((ls <= lf) && 0 == strcasecomp(suff->suffix, filename + lf - ls)) {
913 	    int j;
914 
915 	    if (pencoding)
916 		*pencoding = suff->encoding;
917 	    if (pdesc)
918 		*pdesc = suff->desc;
919 	    if (suff->rep) {
920 		return suff->rep;	/* OK -- found */
921 	    }
922 	    for (j = 0; j < n; j++) {	/* Got encoding, need representation */
923 		int ls2;
924 
925 		suff = (HTSuffix *) HTList_objectAt(HTSuffixes, j);
926 		ls2 = (int) strlen(suff->suffix);
927 		if ((ls + ls2 <= lf) &&
928 		    !strncasecomp(suff->suffix,
929 				  filename + lf - ls - ls2, ls2)) {
930 		    if (suff->rep) {
931 			if (pdesc && !(*pdesc))
932 			    *pdesc = suff->desc;
933 			if (pencoding && IsUnityEnc(*pencoding) &&
934 			    *pencoding != WWW_ENC_7BIT &&
935 			    !IsUnityEnc(suff->encoding))
936 			    *pencoding = suff->encoding;
937 			return suff->rep;
938 		    }
939 		}
940 	    }
941 
942 	}
943     }
944 
945     /* defaults tree */
946 
947     suff = (strchr(filename, '.')
948 	    ? (unknown_suffix.rep
949 	       ? &unknown_suffix
950 	       : &no_suffix)
951 	    : &no_suffix);
952 
953     /*
954      * Set default encoding unless found with suffix already.
955      */
956     if (pencoding && !*pencoding) {
957 	*pencoding = (suff->encoding
958 		      ? suff->encoding
959 		      : HTAtom_for("binary"));
960     }
961     return suff->rep ? suff->rep : WWW_BINARY;
962 }
963 
964 /*	Revise the file format in relation to the Lynx charset. - FM
965  *	-------------------------------------------------------
966  *
967  *	This checks the format associated with an anchor for
968  *	an extended MIME Content-Type, and if a charset is
969  *	indicated, sets Lynx up for proper handling in relation
970  *	to the currently selected character set. - FM
971  */
HTCharsetFormat(HTFormat format,HTParentAnchor * anchor,int default_LYhndl)972 HTFormat HTCharsetFormat(HTFormat format,
973 			 HTParentAnchor *anchor,
974 			 int default_LYhndl)
975 {
976     char *cp = NULL, *cp1, *cp2, *cp3 = NULL, *cp4;
977     BOOL chartrans_ok = FALSE;
978     int chndl = -1;
979 
980     FREE(anchor->charset);
981     StrAllocCopy(cp, format->name);
982     LYLowerCase(cp);
983     if (((cp1 = strchr(cp, ';')) != NULL) &&
984 	(cp2 = strstr(cp1, "charset")) != NULL) {
985 	CTRACE((tfp, "HTCharsetFormat: Extended MIME Content-Type is %s\n",
986 		format->name));
987 	cp2 += 7;
988 	while (*cp2 == ' ' || *cp2 == '=')
989 	    cp2++;
990 	StrAllocCopy(cp3, cp2);	/* copy to mutilate more */
991 	for (cp4 = cp3; (*cp4 != '\0' && *cp4 != '"' &&
992 			 *cp4 != ';' && *cp4 != ':' &&
993 			 !WHITE(*cp4)); cp4++) {
994 	    ;			/* do nothing */
995 	}
996 	*cp4 = '\0';
997 	cp4 = cp3;
998 	chndl = UCGetLYhndl_byMIME(cp3);
999 	if (UCCanTranslateFromTo(chndl, current_char_set)) {
1000 	    chartrans_ok = YES;
1001 	    *cp1 = '\0';
1002 	    format = HTAtom_for(cp);
1003 	    StrAllocCopy(anchor->charset, cp4);
1004 	    HTAnchor_setUCInfoStage(anchor, chndl,
1005 				    UCT_STAGE_MIME,
1006 				    UCT_SETBY_MIME);
1007 	} else if (chndl < 0) {
1008 	    /*
1009 	     * Got something but we don't recognize it.
1010 	     */
1011 	    chndl = UCLYhndl_for_unrec;
1012 	    if (chndl < 0)
1013 		/*
1014 		 * UCLYhndl_for_unrec not defined :-( fallback to
1015 		 * UCLYhndl_for_unspec which always valid.
1016 		 */
1017 		chndl = UCLYhndl_for_unspec;	/* always >= 0 */
1018 	    if (UCCanTranslateFromTo(chndl, current_char_set)) {
1019 		chartrans_ok = YES;
1020 		HTAnchor_setUCInfoStage(anchor, chndl,
1021 					UCT_STAGE_MIME,
1022 					UCT_SETBY_DEFAULT);
1023 	    }
1024 	}
1025 	if (chartrans_ok) {
1026 	    LYUCcharset *p_in = HTAnchor_getUCInfoStage(anchor,
1027 							UCT_STAGE_MIME);
1028 	    LYUCcharset *p_out = HTAnchor_setUCInfoStage(anchor,
1029 							 current_char_set,
1030 							 UCT_STAGE_HTEXT,
1031 							 UCT_SETBY_DEFAULT);
1032 
1033 	    if (!p_out) {
1034 		/*
1035 		 * Try again.
1036 		 */
1037 		p_out = HTAnchor_getUCInfoStage(anchor, UCT_STAGE_HTEXT);
1038 	    }
1039 	    if (!strcmp(p_in->MIMEname, "x-transparent")) {
1040 		HTPassEightBitRaw = TRUE;
1041 		HTAnchor_setUCInfoStage(anchor,
1042 					HTAnchor_getUCLYhndl(anchor,
1043 							     UCT_STAGE_HTEXT),
1044 					UCT_STAGE_MIME,
1045 					UCT_SETBY_DEFAULT);
1046 	    }
1047 	    if (!strcmp(p_out->MIMEname, "x-transparent")) {
1048 		HTPassEightBitRaw = TRUE;
1049 		HTAnchor_setUCInfoStage(anchor,
1050 					HTAnchor_getUCLYhndl(anchor,
1051 							     UCT_STAGE_MIME),
1052 					UCT_STAGE_HTEXT,
1053 					UCT_SETBY_DEFAULT);
1054 	    }
1055 	    if (p_in->enc != UCT_ENC_CJK) {
1056 		HTCJK = NOCJK;
1057 		if (!(p_in->codepoints &
1058 		      UCT_CP_SUBSETOF_LAT1) &&
1059 		    chndl == current_char_set) {
1060 		    HTPassEightBitRaw = TRUE;
1061 		}
1062 	    } else if (p_out->enc == UCT_ENC_CJK) {
1063 		Set_HTCJK(p_in->MIMEname, p_out->MIMEname);
1064 	    }
1065 	} else {
1066 	    /*
1067 	     * Cannot translate.  If according to some heuristic the given
1068 	     * charset and the current display character both are likely to be
1069 	     * like ISO-8859 in structure, pretend we have some kind of match.
1070 	     */
1071 	    BOOL given_is_8859 = (BOOL) (!StrNCmp(cp4, "iso-8859-", 9) &&
1072 					 isdigit(UCH(cp4[9])));
1073 	    BOOL given_is_8859like = (BOOL) (given_is_8859 ||
1074 					     !StrNCmp(cp4, "windows-", 8) ||
1075 					     !StrNCmp(cp4, "cp12", 4) ||
1076 					     !StrNCmp(cp4, "cp-12", 5));
1077 	    BOOL given_and_display_8859like = (BOOL) (given_is_8859like &&
1078 						      (strstr(LYchar_set_names[current_char_set],
1079 							      "ISO-8859") ||
1080 						       strstr(LYchar_set_names[current_char_set],
1081 							      "windows-")));
1082 
1083 	    if (given_and_display_8859like) {
1084 		*cp1 = '\0';
1085 		format = HTAtom_for(cp);
1086 	    }
1087 	    if (given_is_8859) {
1088 		cp1 = &cp4[10];
1089 		while (*cp1 &&
1090 		       isdigit(UCH(*cp1)))
1091 		    cp1++;
1092 		*cp1 = '\0';
1093 	    }
1094 	    if (given_and_display_8859like) {
1095 		StrAllocCopy(anchor->charset, cp4);
1096 		HTPassEightBitRaw = TRUE;
1097 	    }
1098 	    HTAlert(*cp4 ? cp4 : anchor->charset);
1099 	}
1100 	FREE(cp3);
1101     } else if (cp1 != NULL) {
1102 	/*
1103 	 * No charset parameter is present.  Ignore all other parameters, as we
1104 	 * do when charset is present.  - FM
1105 	 */
1106 	*cp1 = '\0';
1107 	format = HTAtom_for(cp);
1108     }
1109     FREE(cp);
1110 
1111     /*
1112      * Set up defaults, if needed.  - FM
1113      */
1114     if (!chartrans_ok && !anchor->charset && default_LYhndl >= 0) {
1115 	HTAnchor_setUCInfoStage(anchor, default_LYhndl,
1116 				UCT_STAGE_MIME,
1117 				UCT_SETBY_DEFAULT);
1118     }
1119     HTAnchor_copyUCInfoStage(anchor,
1120 			     UCT_STAGE_PARSER,
1121 			     UCT_STAGE_MIME,
1122 			     -1);
1123 
1124     return format;
1125 }
1126 
1127 /*	Get various pieces of meta info from file name.
1128  *	-----------------------------------------------
1129  *
1130  *  LYGetFileInfo fills in information that can be determined without
1131  *  an actual (new) access to the filesystem, based on current suffix
1132  *  and character set configuration.  If the file has been loaded and
1133  *  parsed before  (with the same URL generated here!) and the anchor
1134  *  is still around, some results may be influenced by that (in
1135  *  particular, charset info from a META tag - this is not actually
1136  *  tested!).
1137  *  The caller should not keep pointers to the returned objects around
1138  *  for too long, the valid lifetimes vary. In particular, the returned
1139  *  charset string should be copied if necessary.  If return of the
1140  *  file_anchor is requested, that one can be used to retrieve
1141  *  additional bits of info that are stored in the anchor object and
1142  *  are not covered here; as usual, don't keep pointers to the
1143  *  file_anchor longer than necessary since the object may disappear
1144  *  through HTuncache_current_document or at the next document load.
1145  *  - kw
1146  */
LYGetFileInfo(const char * filename,HTParentAnchor ** pfile_anchor,HTFormat * pformat,HTAtom ** pencoding,const char ** pdesc,const char ** pcharset,int * pfile_cs)1147 void LYGetFileInfo(const char *filename,
1148 		   HTParentAnchor **pfile_anchor,
1149 		   HTFormat *pformat,
1150 		   HTAtom **pencoding,
1151 		   const char **pdesc,
1152 		   const char **pcharset,
1153 		   int *pfile_cs)
1154 {
1155     char *Afn;
1156     char *Aname = NULL;
1157     HTFormat format;
1158     HTAtom *myEnc = NULL;
1159     HTParentAnchor *file_anchor;
1160     const char *file_csname;
1161     int file_cs;
1162 
1163     /*
1164      * Convert filename to URL.  Note that it is always supposed to be a
1165      * filename, not maybe-filename-maybe-URL, so we don't use
1166      * LYFillLocalFileURL and LYEnsureAbsoluteURL.  - kw
1167      */
1168     Afn = HTEscape(filename, URL_PATH);
1169     LYLocalFileToURL(&Aname, Afn);
1170     file_anchor = HTAnchor_findSimpleAddress(Aname);
1171 
1172     format = HTFileFormat(filename, &myEnc, pdesc);
1173     format = HTCharsetFormat(format, file_anchor, UCLYhndl_HTFile_for_unspec);
1174     file_cs = HTAnchor_getUCLYhndl(file_anchor, UCT_STAGE_MIME);
1175     file_csname = file_anchor->charset;
1176     if (!file_csname) {
1177 	if (file_cs >= 0)
1178 	    file_csname = LYCharSet_UC[file_cs].MIMEname;
1179 	else
1180 	    file_csname = "display character set";
1181     }
1182     CTRACE((tfp, "GetFileInfo: '%s' is a%s %s %s file, charset=%s (%d).\n",
1183 	    filename,
1184 	    ((myEnc && *HTAtom_name(myEnc) == '8') ? "n" : myEnc ? "" :
1185 	     *HTAtom_name(format) == 'a' ? "n" : ""),
1186 	    myEnc ? HTAtom_name(myEnc) : "",
1187 	    HTAtom_name(format),
1188 	    file_csname,
1189 	    file_cs));
1190     FREE(Afn);
1191     FREE(Aname);
1192     if (pfile_anchor)
1193 	*pfile_anchor = file_anchor;
1194     if (pformat)
1195 	*pformat = format;
1196     if (pencoding)
1197 	*pencoding = myEnc;
1198     if (pcharset)
1199 	*pcharset = file_csname;
1200     if (pfile_cs)
1201 	*pfile_cs = file_cs;
1202 }
1203 
1204 /*	Determine value from file name.
1205  *	-------------------------------
1206  *
1207  */
HTFileValue(const char * filename)1208 float HTFileValue(const char *filename)
1209 {
1210     HTSuffix *suff;
1211     int n;
1212     int i;
1213     int lf = (int) strlen(filename);
1214 
1215 #ifndef NO_INIT
1216     if (!HTSuffixes)
1217 	HTFileInit();
1218 #endif /* !NO_INIT */
1219     n = HTList_count(HTSuffixes);
1220     for (i = 0; i < n; i++) {
1221 	int ls;
1222 
1223 	suff = (HTSuffix *) HTList_objectAt(HTSuffixes, i);
1224 	ls = (int) strlen(suff->suffix);
1225 	if ((ls <= lf) && 0 == strcmp(suff->suffix, filename + lf - ls)) {
1226 	    CTRACE((tfp, "File: Value of %s is %.3f\n",
1227 		    filename, suff->quality));
1228 	    return suff->quality;	/* OK -- found */
1229 	}
1230     }
1231     return (float) 0.3;		/* Dunno! */
1232 }
1233 
1234 /*
1235  *  Determine compression type from file name, by looking at its suffix.
1236  *  Sets as side-effect a pointer to the "dot" that begins the suffix.
1237  */
HTCompressFileType(const char * filename,const char * dots,int * rootlen)1238 CompressFileType HTCompressFileType(const char *filename,
1239 				    const char *dots,
1240 				    int *rootlen)
1241 {
1242     CompressFileType result = cftNone;
1243     size_t len = strlen(filename);
1244     const char *ftype = filename + len;
1245 
1246     VMS_DEL_VERSION(filename);
1247 
1248     if ((len > 4)
1249 	&& !strcasecomp((ftype - 3), "bz2")
1250 	&& strchr(dots, ftype[-4]) != 0) {
1251 	result = cftBzip2;
1252 	ftype -= 4;
1253     } else if ((len > 3)
1254 	       && !strcasecomp((ftype - 2), "gz")
1255 	       && strchr(dots, ftype[-3]) != 0) {
1256 	result = cftGzip;
1257 	ftype -= 3;
1258     } else if ((len > 3)
1259 	       && !strcasecomp((ftype - 2), "zz")
1260 	       && strchr(dots, ftype[-3]) != 0) {
1261 	result = cftDeflate;
1262 	ftype -= 3;
1263     } else if ((len > 2)
1264 	       && !strcmp((ftype - 1), "Z")
1265 	       && strchr(dots, ftype[-2]) != 0) {
1266 	result = cftCompress;
1267 	ftype -= 2;
1268     }
1269 
1270     *rootlen = (int) (ftype - filename);
1271 
1272     CTRACE((tfp, "HTCompressFileType(%s) returns %d:%s\n",
1273 	    filename, (int) result, filename + *rootlen));
1274     return result;
1275 }
1276 
1277 /*
1278  *  Determine expected file-suffix from the compression method.
1279  */
HTCompressTypeToSuffix(CompressFileType method)1280 const char *HTCompressTypeToSuffix(CompressFileType method)
1281 {
1282     const char *result = "";
1283 
1284     switch (method) {
1285     default:
1286     case cftNone:
1287 	result = "";
1288 	break;
1289     case cftGzip:
1290 	result = ".gz";
1291 	break;
1292     case cftCompress:
1293 	result = ".Z";
1294 	break;
1295     case cftBzip2:
1296 	result = ".bz2";
1297 	break;
1298     case cftDeflate:
1299 	result = ".zz";
1300 	break;
1301     }
1302     return result;
1303 }
1304 
1305 /*
1306  *  Determine compression encoding from the compression method.
1307  */
HTCompressTypeToEncoding(CompressFileType method)1308 const char *HTCompressTypeToEncoding(CompressFileType method)
1309 {
1310     const char *result = NULL;
1311 
1312     switch (method) {
1313     default:
1314     case cftNone:
1315 	result = NULL;
1316 	break;
1317     case cftGzip:
1318 	result = "gzip";
1319 	break;
1320     case cftCompress:
1321 	result = "compress";
1322 	break;
1323     case cftBzip2:
1324 	result = "bzip2";
1325 	break;
1326     case cftDeflate:
1327 	result = "deflate";
1328 	break;
1329     }
1330     return result;
1331 }
1332 
1333 /*
1334  * Check if the token from "Content-Encoding" corresponds to a compression
1335  * type.  RFC 2068 (and cut/paste into RFC 2616) lists these:
1336  *	gzip
1337  *	compress
1338  *	deflate
1339  * as well as "identity" (but that does nothing).
1340  */
HTEncodingToCompressType(const char * coding)1341 CompressFileType HTEncodingToCompressType(const char *coding)
1342 {
1343     CompressFileType result = cftNone;
1344 
1345     if (coding == NULL) {
1346 	result = cftNone;
1347     } else if (!strcasecomp(coding, "gzip") ||
1348 	       !strcasecomp(coding, "x-gzip")) {
1349 	result = cftGzip;
1350     } else if (!strcasecomp(coding, "compress") ||
1351 	       !strcasecomp(coding, "x-compress")) {
1352 	result = cftCompress;
1353     } else if (!strcasecomp(coding, "bzip2") ||
1354 	       !strcasecomp(coding, "x-bzip2")) {
1355 	result = cftBzip2;
1356     } else if (!strcasecomp(coding, "deflate") ||
1357 	       !strcasecomp(coding, "x-deflate")) {
1358 	result = cftDeflate;
1359     }
1360     return result;
1361 }
1362 
HTContentTypeToCompressType(const char * ct)1363 CompressFileType HTContentTypeToCompressType(const char *ct)
1364 {
1365     CompressFileType method = cftNone;
1366 
1367     if (ct == NULL) {
1368 	method = cftNone;
1369     } else if (!strncasecomp(ct, "application/gzip", 16) ||
1370 	       !strncasecomp(ct, "application/x-gzip", 18)) {
1371 	method = cftGzip;
1372     } else if (!strncasecomp(ct, "application/compress", 20) ||
1373 	       !strncasecomp(ct, "application/x-compress", 22)) {
1374 	method = cftCompress;
1375     } else if (!strncasecomp(ct, "application/bzip2", 17) ||
1376 	       !strncasecomp(ct, "application/x-bzip2", 19)) {
1377 	method = cftBzip2;
1378     }
1379     return method;
1380 }
1381 
1382 /*
1383  * Check the anchor's content_type and content_encoding elements for a gzip or
1384  * Unix compressed file -FM, TD
1385  */
HTContentToCompressType(HTParentAnchor * anchor)1386 CompressFileType HTContentToCompressType(HTParentAnchor *anchor)
1387 {
1388     CompressFileType method = cftNone;
1389     const char *ct = HTAnchor_content_type(anchor);
1390     const char *ce = HTAnchor_content_encoding(anchor);
1391 
1392     if (ce == NULL && ct != 0) {
1393 	method = HTContentTypeToCompressType(ct);
1394     } else if (ce != 0) {
1395 	method = HTEncodingToCompressType(ce);
1396     }
1397     return method;
1398 }
1399 
1400 /*	Determine write access to a file.
1401  *	---------------------------------
1402  *
1403  *  On exit:
1404  *	Returns YES if file can be accessed and can be written to.
1405  *
1406  *  Bugs:
1407  *	1.	No code for non-unix systems.
1408  *	2.	Isn't there a quicker way?
1409  */
HTEditable(const char * filename GCC_UNUSED)1410 BOOL HTEditable(const char *filename GCC_UNUSED)
1411 {
1412 #ifndef NO_GROUPS
1413     GETGROUPS_T groups[NGROUPS];
1414     uid_t myUid;
1415     int ngroups;		/* The number of groups  */
1416     struct stat fileStatus;
1417     int i;
1418 
1419     if (stat(filename, &fileStatus))	/* Get details of filename */
1420 	return NO;		/* Can't even access file! */
1421 
1422     ngroups = getgroups(NGROUPS, groups);	/* Groups to which I belong  */
1423     myUid = geteuid();		/* Get my user identifier */
1424 
1425     if (TRACE) {
1426 	int i2;
1427 
1428 	fprintf(tfp,
1429 		"File mode is 0%o, uid=%d, gid=%d. My uid=%d, %d groups (",
1430 		(unsigned int) fileStatus.st_mode,
1431 		(int) fileStatus.st_uid,
1432 		(int) fileStatus.st_gid,
1433 		(int) myUid,
1434 		(int) ngroups);
1435 	for (i2 = 0; i2 < ngroups; i2++)
1436 	    fprintf(tfp, " %d", (int) groups[i2]);
1437 	fprintf(tfp, ")\n");
1438     }
1439 
1440     if (fileStatus.st_mode & 0002)	/* I can write anyway? */
1441 	return YES;
1442 
1443     if ((fileStatus.st_mode & 0200)	/* I can write my own file? */
1444 	&&(fileStatus.st_uid == myUid))
1445 	return YES;
1446 
1447     if (fileStatus.st_mode & 0020)	/* Group I am in can write? */
1448     {
1449 	for (i = 0; i < ngroups; i++) {
1450 	    if (groups[i] == fileStatus.st_gid)
1451 		return YES;
1452 	}
1453     }
1454     CTRACE((tfp, "\tFile is not editable.\n"));
1455 #endif /* NO_GROUPS */
1456     return NO;			/* If no excuse, can't do */
1457 }
1458 
1459 /*	Make a save stream.
1460  *	-------------------
1461  *
1462  *	The stream must be used for writing back the file.
1463  *	@@@ no backup done
1464  */
HTFileSaveStream(HTParentAnchor * anchor)1465 HTStream *HTFileSaveStream(HTParentAnchor *anchor)
1466 {
1467     const char *addr = anchor->address;
1468     char *localname = HTLocalName(addr);
1469     FILE *fp = fopen(localname, BIN_W);
1470 
1471     FREE(localname);
1472     if (!fp)
1473 	return NULL;
1474 
1475     return HTFWriter_new(fp);
1476 }
1477 
1478 /*	Output one directory entry.
1479  *	---------------------------
1480  */
HTDirEntry(HTStructured * target,const char * tail,const char * entry)1481 void HTDirEntry(HTStructured * target, const char *tail,
1482 		const char *entry)
1483 {
1484     char *relative = NULL;
1485     char *stripped = NULL;
1486     char *escaped = NULL;
1487     int len;
1488 
1489     StrAllocCopy(escaped, entry);
1490     LYTrimPathSep(escaped);
1491     if (strcmp(escaped, "..") != 0) {
1492 	stripped = escaped;
1493 	escaped = HTEscape(stripped, URL_XPALPHAS);
1494 	if (((len = (int) strlen(escaped)) > 2) &&
1495 	    escaped[(len - 3)] == '%' &&
1496 	    escaped[(len - 2)] == '2' &&
1497 	    TOUPPER(escaped[(len - 1)]) == 'F') {
1498 	    escaped[(len - 3)] = '\0';
1499 	}
1500     }
1501 
1502     if (isEmpty(tail)) {
1503 	/*
1504 	 * Handle extra slash at end of path.
1505 	 */
1506 	HTStartAnchor(target, NULL, (escaped[0] != '\0' ? escaped : "/"));
1507     } else {
1508 	/*
1509 	 * If empty tail, gives absolute ref below.
1510 	 */
1511 	relative = 0;
1512 	HTSprintf0(&relative, "%s%s%s",
1513 		   tail,
1514 		   (*escaped != '\0' ? "/" : ""),
1515 		   escaped);
1516 	HTStartAnchor(target, NULL, relative);
1517 	FREE(relative);
1518     }
1519     FREE(stripped);
1520     FREE(escaped);
1521 }
1522 
view_structured(HTFormat format_out)1523 static BOOL view_structured(HTFormat format_out)
1524 {
1525     BOOL result = FALSE;
1526 
1527 #ifdef USE_PRETTYSRC
1528     if (psrc_view
1529 	|| (format_out == HTAtom_for("www/dump")))
1530 	result = TRUE;
1531 #else
1532     if (format_out == WWW_SOURCE)
1533 	result = TRUE;
1534 #endif
1535     return result;
1536 }
1537 
1538 /*
1539  * Write a DOCTYPE to the given stream if we happen to want to see the
1540  * source view, or are dumping source.  This is not needed when the source
1541  * is not visible, since the document is rendered from a HTStructured object.
1542  */
HTStructured_doctype(HTStructured * target,HTFormat format_out)1543 void HTStructured_doctype(HTStructured * target, HTFormat format_out)
1544 {
1545     if (view_structured(format_out))
1546 	PUTS("<!DOCTYPE HTML PUBLIC \"-//W3C//DTD HTML 4.01 Transitional//EN\">\n");
1547 }
1548 
HTStructured_meta(HTStructured * target,HTFormat format_out)1549 void HTStructured_meta(HTStructured * target, HTFormat format_out)
1550 {
1551     if (view_structured(format_out))
1552 	PUTS("<meta http-equiv=\"Content-Type\" content=\"text/html; charset=iso-8859-1\">\n");
1553 }
1554 /*	Output parent directory entry.
1555  *	------------------------------
1556  *
1557  *    This gives the TITLE and H1 header, and also a link
1558  *    to the parent directory if appropriate.
1559  *
1560  *  On exit:
1561  *	Returns TRUE if an "Up to <parent>" link was not created
1562  *	for a readable local directory because LONG_LIST is defined
1563  *	and NO_PARENT_DIR_REFERENCE is not defined, so that the
1564  *	calling function should use LYListFmtParse() to create a link
1565  *	to the parent directory.  Otherwise, it returns FALSE. - FM
1566  */
HTDirTitles(HTStructured * target,HTParentAnchor * anchor,HTFormat format_out,int tildeIsTop)1567 BOOL HTDirTitles(HTStructured * target, HTParentAnchor *anchor,
1568 		 HTFormat format_out,
1569 		 int tildeIsTop)
1570 {
1571     const char *logical = anchor->address;
1572     char *path = HTParse(logical, "", PARSE_PATH + PARSE_PUNCTUATION);
1573     char *current;
1574     char *cp = NULL;
1575     BOOL need_parent_link = FALSE;
1576     int i;
1577 
1578 #if defined(USE_DOS_DRIVES)
1579     BOOL local_link = (strlen(logical) > 18
1580 		       && !strncasecomp(logical, "file://localhost/", 17)
1581 		       && LYIsDosDrive(logical + 17));
1582     BOOL is_remote = !local_link;
1583 
1584 #else
1585 #define is_remote TRUE
1586 #endif
1587 
1588     /*
1589      * Check tildeIsTop for treating home directory as Welcome (assume the
1590      * tilde is not followed by a username).  - FM
1591      */
1592     if (tildeIsTop && !StrNCmp(path, "/~", 2)) {
1593 	if (path[2] == '\0') {
1594 	    path[1] = '\0';
1595 	} else {
1596 	    for (i = 0; path[(i + 2)]; i++) {
1597 		path[i] = path[(i + 2)];
1598 	    }
1599 	    path[i] = '\0';
1600 	}
1601     }
1602 
1603     /*
1604      * Trim out the ;type= parameter, if present.  - FM
1605      */
1606     if ((cp = strrchr(path, ';')) != NULL) {
1607 	if (!strncasecomp((cp + 1), "type=", 5)) {
1608 	    if (TOUPPER(*(cp + 6)) == 'D' ||
1609 		TOUPPER(*(cp + 6)) == 'A' ||
1610 		TOUPPER(*(cp + 6)) == 'I')
1611 		*cp = '\0';
1612 	}
1613 	cp = NULL;
1614     }
1615     current = LYPathLeaf(path);	/* last part or "" */
1616 
1617     {
1618 	char *printable = NULL;
1619 
1620 #ifdef DIRED_SUPPORT
1621 	printable = HTURLPath_toFile(((!strncasecomp(path, "/%2F", 4))	/* "//" ? */
1622 				      ? (path + 1)
1623 				      : path),
1624 				     TRUE,
1625 				     is_remote);
1626 	if (0 == strncasecomp(printable, "/vmsysu:", 8) ||
1627 	    0 == strncasecomp(printable, "/anonymou.", 10)) {
1628 	    StrAllocCopy(cp, (printable + 1));
1629 	    StrAllocCopy(printable, cp);
1630 	    FREE(cp);
1631 	}
1632 #else
1633 	StrAllocCopy(printable, current);
1634 	HTUnEscape(printable);
1635 #endif /* DIRED_SUPPORT */
1636 
1637 	HTStructured_doctype(target, format_out);
1638 
1639 	START(HTML_HEAD);
1640 	PUTC('\n');
1641 	START(HTML_TITLE);
1642 	PUTS(*printable ? printable : WELCOME_MSG);
1643 	PUTS(SEGMENT_DIRECTORY);
1644 	END(HTML_TITLE);
1645 	PUTC('\n');
1646 	HTStructured_meta(target, format_out);
1647 	END(HTML_HEAD);
1648 	PUTC('\n');
1649 
1650 	START(HTML_BODY);
1651 	PUTC('\n');
1652 
1653 #ifdef DIRED_SUPPORT
1654 	START(HTML_H2);
1655 	PUTS(*printable ? SEGMENT_CURRENT_DIR : "");
1656 	PUTS(*printable ? printable : WELCOME_MSG);
1657 	END(HTML_H2);
1658 	PUTC('\n');
1659 #else
1660 	START(HTML_H1);
1661 	PUTS(*printable ? printable : WELCOME_MSG);
1662 	END(HTML_H1);
1663 	PUTC('\n');
1664 #endif /* DIRED_SUPPORT */
1665 	if (((0 == strncasecomp(printable, "vmsysu:", 7)) &&
1666 	     (cp = strchr(printable, '.')) != NULL &&
1667 	     strchr(cp, '/') == NULL) ||
1668 	    (0 == strncasecomp(printable, "anonymou.", 9) &&
1669 	     strchr(printable, '/') == NULL)) {
1670 	    FREE(printable);
1671 	    FREE(path);
1672 	    return (need_parent_link);
1673 	}
1674 	FREE(printable);
1675     }
1676 
1677 #ifndef NO_PARENT_DIR_REFERENCE
1678     /*
1679      * Make link back to parent directory.
1680      */
1681     if (current - path > 0
1682 	&& LYIsPathSep(current[-1])
1683 	&& current[0] != '\0') {	/* was a slash AND something else too */
1684 	char *parent = NULL;
1685 	char *relative = NULL;
1686 
1687 	current[-1] = '\0';
1688 	parent = strrchr(path, '/');	/* penultimate slash */
1689 
1690 	if ((parent &&
1691 	     (!strcmp(parent, "/..") ||
1692 	      !strncasecomp(parent, "/%2F", 4))) ||
1693 	    !strncasecomp(current, "%2F", 3)) {
1694 	    FREE(path);
1695 	    return (need_parent_link);
1696 	}
1697 
1698 	relative = 0;
1699 	HTSprintf0(&relative, "%s/..", current);
1700 
1701 #if defined(DOSPATH) || defined(__EMX__)
1702 	if (local_link) {
1703 	    if (parent != 0 && strlen(parent) == 3) {
1704 		StrAllocCat(relative, "/.");
1705 	    }
1706 	} else
1707 #endif
1708 
1709 #if !defined (VMS)
1710 	{
1711 	    /*
1712 	     * On Unix, if it's not ftp and the directory cannot be read, don't
1713 	     * put out a link.
1714 	     *
1715 	     * On VMS, this problem is dealt with internally by
1716 	     * HTVMSBrowseDir().
1717 	     */
1718 	    DIR *dp = NULL;
1719 
1720 	    if (LYisLocalFile(logical)) {
1721 		/*
1722 		 * We need an absolute file path for the opendir.  We also need
1723 		 * to unescape for this test.  Don't worry about %2F now, they
1724 		 * presumably have been dealt with above, and shouldn't appear
1725 		 * for local files anyway...  Assume OS / filesystem will just
1726 		 * ignore superfluous slashes.  - KW
1727 		 */
1728 		char *fullparentpath = NULL;
1729 
1730 		/*
1731 		 * Path has been shortened above.
1732 		 */
1733 		StrAllocCopy(fullparentpath, *path ? path : "/");
1734 
1735 		/*
1736 		 * Guard against weirdness.
1737 		 */
1738 		if (0 == strcmp(current, "..")) {
1739 		    StrAllocCat(fullparentpath, "/../..");
1740 		} else if (0 == strcmp(current, ".")) {
1741 		    StrAllocCat(fullparentpath, "/..");
1742 		}
1743 
1744 		HTUnEscape(fullparentpath);
1745 		if ((dp = opendir(fullparentpath)) == NULL) {
1746 		    FREE(fullparentpath);
1747 		    FREE(relative);
1748 		    FREE(path);
1749 		    return (need_parent_link);
1750 		}
1751 		closedir(dp);
1752 		FREE(fullparentpath);
1753 #ifdef LONG_LIST
1754 		need_parent_link = TRUE;
1755 		FREE(path);
1756 		FREE(relative);
1757 		return (need_parent_link);
1758 #endif /* LONG_LIST */
1759 	    }
1760 	}
1761 #endif /* !VMS */
1762 	HTStartAnchor(target, "", relative);
1763 	FREE(relative);
1764 
1765 	PUTS(SEGMENT_UP_TO);
1766 	if (parent) {
1767 	    if ((0 == strcmp(current, ".")) ||
1768 		(0 == strcmp(current, ".."))) {
1769 		/*
1770 		 * Should not happen, but if it does, at least avoid giving
1771 		 * misleading info.  - KW
1772 		 */
1773 		PUTS("..");
1774 	    } else {
1775 		char *printable = NULL;
1776 
1777 		StrAllocCopy(printable, parent + 1);
1778 		HTUnEscape(printable);
1779 		PUTS(printable);
1780 		FREE(printable);
1781 	    }
1782 	} else {
1783 	    PUTC('/');
1784 	}
1785 	END(HTML_A);
1786 	PUTC('\n');
1787     }
1788 #endif /* !NO_PARENT_DIR_REFERENCE */
1789 
1790     FREE(path);
1791     return (need_parent_link);
1792 }
1793 
1794 #if defined HAVE_READDIR
1795 /*	Send README file.
1796  *	-----------------
1797  *
1798  *  If a README file exists, then it is inserted into the document here.
1799  */
do_readme(HTStructured * target,const char * localname)1800 static void do_readme(HTStructured * target, const char *localname)
1801 {
1802     FILE *fp;
1803     char *readme_file_name = NULL;
1804     int ch;
1805 
1806     HTSprintf0(&readme_file_name, "%s/%s", localname, HT_DIR_README_FILE);
1807 
1808     fp = fopen(readme_file_name, "r");
1809 
1810     if (fp) {
1811 	START(HTML_PRE);
1812 	while ((ch = fgetc(fp)) != EOF) {
1813 	    PUTC((char) ch);
1814 	}
1815 	END(HTML_PRE);
1816 	HTDisplayPartial();
1817 	fclose(fp);
1818     }
1819     FREE(readme_file_name);
1820 }
1821 
1822 #define DIRED_BLOK(obj) (((DIRED *)(obj))->sort_tags)
1823 #define DIRED_NAME(obj) (((DIRED *)(obj))->file_name)
1824 
1825 #define NM_cmp(a,b) ((a) < (b) ? -1 : ((a) > (b) ? 1 : 0))
1826 
1827 #if defined(LONG_LIST) && defined(DIRED_SUPPORT)
file_type(const char * path)1828 static const char *file_type(const char *path)
1829 {
1830     const char *type;
1831 
1832     while (*path == '.')
1833 	++path;
1834     type = strchr(path, '.');
1835     if (type == NULL)
1836 	type = "";
1837     return type;
1838 }
1839 #endif /* LONG_LIST && DIRED_SUPPORT */
1840 
dired_cmp(void * a,void * b)1841 static int dired_cmp(void *a, void *b)
1842 {
1843     DIRED *p = (DIRED *) a;
1844     DIRED *q = (DIRED *) b;
1845     int code = p->sort_tags - q->sort_tags;
1846 
1847 #if defined(LONG_LIST) && defined(DIRED_SUPPORT)
1848     if (code == 0) {
1849 	switch (dir_list_order) {
1850 	case ORDER_BY_SIZE:
1851 	    code = -NM_cmp(p->file_info.st_size, q->file_info.st_size);
1852 	    break;
1853 	case ORDER_BY_DATE:
1854 	    code = -NM_cmp(p->file_info.st_mtime, q->file_info.st_mtime);
1855 	    break;
1856 	case ORDER_BY_MODE:
1857 	    code = NM_cmp(p->file_info.st_mode, q->file_info.st_mode);
1858 	    break;
1859 	case ORDER_BY_USER:
1860 	    code = NM_cmp(p->file_info.st_uid, q->file_info.st_uid);
1861 	    break;
1862 	case ORDER_BY_GROUP:
1863 	    code = NM_cmp(p->file_info.st_gid, q->file_info.st_gid);
1864 	    break;
1865 	case ORDER_BY_TYPE:
1866 	    code = AS_cmp(file_type(p->file_name), file_type(q->file_name));
1867 	    break;
1868 	default:
1869 	    code = 0;
1870 	    break;
1871 	}
1872     }
1873 #endif /* LONG_LIST && DIRED_SUPPORT */
1874     if (code == 0)
1875 	code = AS_cmp(p->file_name, q->file_name);
1876 #if 0
1877     CTRACE((tfp, "dired_cmp(%d) ->%d\n\t%c:%s (%s)\n\t%c:%s (%s)\n",
1878 	    dir_list_order,
1879 	    code,
1880 	    p->sort_tags, p->file_name, file_type(p->file_name),
1881 	    q->sort_tags, q->file_name, file_type(q->file_name)));
1882 #endif
1883     return code;
1884 }
1885 
print_local_dir(DIR * dp,char * localname,HTParentAnchor * anchor,HTFormat format_out,HTStream * sink)1886 static int print_local_dir(DIR *dp, char *localname,
1887 			   HTParentAnchor *anchor,
1888 			   HTFormat format_out,
1889 			   HTStream *sink)
1890 {
1891     HTStructured *target;	/* HTML object */
1892     HTBTree *bt;
1893     HTStructuredClass targetClass;
1894     STRUCT_DIRENT *dirbuf;
1895     char *pathname = NULL;
1896     char *tail = NULL;
1897     const char *p;
1898     char *tmpfilename = NULL;
1899     BOOL need_parent_link = FALSE;
1900     BOOL preformatted = FALSE;
1901     int status;
1902     struct stat *actual_info;
1903 
1904 #ifdef DISP_PARTIAL
1905     int num_of_entries = 0;	/* lines counter */
1906 #endif
1907 
1908 #ifdef S_IFLNK
1909     struct stat link_info;
1910 #endif
1911 
1912     CTRACE((tfp, "print_local_dir() started\n"));
1913 
1914     pathname = HTParse(anchor->address, "",
1915 		       PARSE_PATH + PARSE_PUNCTUATION);
1916 
1917     if ((p = strrchr(pathname, '/')) == NULL)
1918 	p = "/";
1919     StrAllocCopy(tail, (p + 1));
1920     FREE(pathname);
1921 
1922     if (UCLYhndl_HTFile_for_unspec >= 0) {
1923 	HTAnchor_setUCInfoStage(anchor,
1924 				UCLYhndl_HTFile_for_unspec,
1925 				UCT_STAGE_PARSER,
1926 				UCT_SETBY_DEFAULT);
1927     }
1928 
1929     target = HTML_new(anchor, format_out, sink);
1930     targetClass = *target->isa;	/* Copy routine entry points */
1931 
1932     /*
1933      * The need_parent_link flag will be set if an "Up to <parent>" link was
1934      * not created for a readable parent in HTDirTitles() because LONG_LIST is
1935      * defined and NO_PARENT_DIR_REFERENCE is not defined so that need we to
1936      * create the link via an LYListFmtParse() call.  - FM
1937      */
1938     need_parent_link = HTDirTitles(target, anchor, format_out, FALSE);
1939 
1940 #ifdef DIRED_SUPPORT
1941     if (!isLYNXCGI(anchor->address)) {
1942 	HTAnchor_setFormat(anchor, WWW_DIRED);
1943 	lynx_edit_mode = TRUE;
1944     }
1945 #endif /* DIRED_SUPPORT */
1946     if (HTDirReadme == HT_DIR_README_TOP)
1947 	do_readme(target, localname);
1948 
1949     bt = HTBTree_new(dired_cmp);
1950 
1951     _HTProgress(READING_DIRECTORY);
1952     status = HT_LOADED;		/* assume we don't get interrupted */
1953     while ((dirbuf = readdir(dp)) != NULL) {
1954 	/*
1955 	 * While there are directory entries to be read...
1956 	 */
1957 	DIRED *data = NULL;
1958 
1959 #ifdef STRUCT_DIRENT__D_INO
1960 	if (dirbuf->d_ino == 0)
1961 	    /*
1962 	     * If the entry is not being used, skip it.
1963 	     */
1964 	    continue;
1965 #endif
1966 	/*
1967 	 * Skip self, parent if handled in HTDirTitles() or if
1968 	 * NO_PARENT_DIR_REFERENCE is not defined, and any dot files if
1969 	 * no_dotfiles is set or show_dotfiles is not set.  - FM
1970 	 */
1971 	if (!strcmp(dirbuf->d_name, ".") /* self       */ ||
1972 	    (!strcmp(dirbuf->d_name, "..") /* parent */ &&
1973 	     need_parent_link == FALSE) ||
1974 	    ((strcmp(dirbuf->d_name, "..")) &&
1975 	     (dirbuf->d_name[0] == '.' &&
1976 	      (no_dotfiles || !show_dotfiles))))
1977 	    continue;
1978 
1979 	StrAllocCopy(tmpfilename, localname);
1980 	/*
1981 	 * If filename is not root directory, add trailing separator.
1982 	 */
1983 	LYAddPathSep(&tmpfilename);
1984 
1985 	StrAllocCat(tmpfilename, dirbuf->d_name);
1986 	data = (DIRED *) malloc(sizeof(DIRED) + strlen(dirbuf->d_name) + 4);
1987 	if (data == NULL) {
1988 	    status = HT_PARTIAL_CONTENT;
1989 	    break;
1990 	}
1991 	LYTrimPathSep(tmpfilename);
1992 
1993 	actual_info = &(data->file_info);
1994 #ifdef S_IFLNK
1995 	if (lstat(tmpfilename, actual_info) < 0) {
1996 	    actual_info->st_mode = 0;
1997 	} else {
1998 	    if (S_ISLNK(actual_info->st_mode)) {
1999 		actual_info = &link_info;
2000 		if (stat(tmpfilename, actual_info) < 0)
2001 		    actual_info->st_mode = 0;
2002 	    }
2003 	}
2004 #else
2005 	if (stat(tmpfilename, actual_info) < 0)
2006 	    actual_info->st_mode = 0;
2007 #endif
2008 
2009 	strcpy(data->file_name, dirbuf->d_name);
2010 #ifndef DIRED_SUPPORT
2011 	if (S_ISDIR(actual_info->st_mode)) {
2012 	    data->sort_tags = 'D';
2013 	} else {
2014 	    data->sort_tags = 'F';
2015 	    /* D & F to have first directories, then files */
2016 	}
2017 #else
2018 	if (S_ISDIR(actual_info->st_mode)) {
2019 	    if (dir_list_style == MIXED_STYLE) {
2020 		data->sort_tags = ' ';
2021 		LYAddPathSep0(data->file_name);
2022 	    } else if (!strcmp(dirbuf->d_name, "..")) {
2023 		data->sort_tags = 'A';
2024 	    } else {
2025 		data->sort_tags = 'D';
2026 	    }
2027 	} else if (dir_list_style == MIXED_STYLE) {
2028 	    data->sort_tags = ' ';
2029 	} else if (dir_list_style == FILES_FIRST) {
2030 	    data->sort_tags = 'C';
2031 	    /* C & D to have first files, then directories */
2032 	} else {
2033 	    data->sort_tags = 'F';
2034 	}
2035 #endif /* !DIRED_SUPPORT */
2036 	/*
2037 	 * Sort dirname in the tree bt.
2038 	 */
2039 	HTBTree_add(bt, data);
2040 
2041 #ifdef DISP_PARTIAL
2042 	/* optimize for expensive operation: */
2043 	if (num_of_entries % (partial_threshold > 0 ?
2044 			      partial_threshold : display_lines) == 0) {
2045 	    if (HTCheckForInterrupt()) {
2046 		status = HT_PARTIAL_CONTENT;
2047 		break;
2048 	    }
2049 	}
2050 	num_of_entries++;
2051 #endif /* DISP_PARTIAL */
2052 
2053     }				/* end while directory entries left to read */
2054 
2055     if (status != HT_PARTIAL_CONTENT)
2056 	_HTProgress(OPERATION_OK);
2057     else
2058 	CTRACE((tfp, "Reading the directory interrupted by user\n"));
2059 
2060     /*
2061      * Run through tree printing out in order.
2062      */
2063     {
2064 	HTBTElement *next_element = HTBTree_next(bt, NULL);
2065 
2066 	/* pick up the first element of the list */
2067 	int num_of_entries_output = 0;	/* lines counter */
2068 
2069 	char state;
2070 
2071 	/* I for initial (.. file),
2072 	   D for directory file,
2073 	   F for file */
2074 
2075 #ifdef DIRED_SUPPORT
2076 	char test;
2077 #endif /* DIRED_SUPPORT */
2078 	state = 'I';
2079 
2080 	while (next_element != NULL) {
2081 	    DIRED *entry;
2082 
2083 #ifndef DISP_PARTIAL
2084 	    if (num_of_entries_output % HTMAX(display_lines, 10) == 0) {
2085 		if (HTCheckForInterrupt()) {
2086 		    _HTProgress(TRANSFER_INTERRUPTED);
2087 		    status = HT_PARTIAL_CONTENT;
2088 		    break;
2089 		}
2090 	    }
2091 #endif
2092 	    StrAllocCopy(tmpfilename, localname);
2093 	    /*
2094 	     * If filename is not root directory.
2095 	     */
2096 	    LYAddPathSep(&tmpfilename);
2097 
2098 	    entry = (DIRED *) (HTBTree_object(next_element));
2099 	    /*
2100 	     * Append the current entry's filename to the path.
2101 	     */
2102 	    StrAllocCat(tmpfilename, entry->file_name);
2103 	    HTSimplify(tmpfilename);
2104 	    /*
2105 	     * Output the directory entry.
2106 	     */
2107 	    if (strcmp(DIRED_NAME(HTBTree_object(next_element)), "..")) {
2108 #ifdef DIRED_SUPPORT
2109 		test =
2110 		    (char) (DIRED_BLOK(HTBTree_object(next_element))
2111 			    == 'D' ? 'D' : 'F');
2112 		if (state != test) {
2113 #ifndef LONG_LIST
2114 		    if (dir_list_style == FILES_FIRST) {
2115 			if (state == 'F') {
2116 			    END(HTML_DIR);
2117 			    PUTC('\n');
2118 			}
2119 		    } else if (dir_list_style != MIXED_STYLE)
2120 			if (state == 'D') {
2121 			    END(HTML_DIR);
2122 			    PUTC('\n');
2123 			}
2124 #endif /* !LONG_LIST */
2125 		    state =
2126 			(char) (DIRED_BLOK(HTBTree_object(next_element))
2127 				== 'D' ? 'D' : 'F');
2128 		    if (preformatted) {
2129 			END(HTML_PRE);
2130 			PUTC('\n');
2131 			preformatted = FALSE;
2132 		    }
2133 		    START(HTML_H2);
2134 		    if (dir_list_style != MIXED_STYLE) {
2135 			START(HTML_EM);
2136 			PUTS(state == 'D'
2137 			     ? LABEL_SUBDIRECTORIES
2138 			     : LABEL_FILES);
2139 			END(HTML_EM);
2140 		    }
2141 		    END(HTML_H2);
2142 		    PUTC('\n');
2143 #ifndef LONG_LIST
2144 		    START(HTML_DIR);
2145 		    PUTC('\n');
2146 #endif /* !LONG_LIST */
2147 		}
2148 #else
2149 		if (state != DIRED_BLOK(HTBTree_object(next_element))) {
2150 #ifndef LONG_LIST
2151 		    if (state == 'D') {
2152 			END(HTML_DIR);
2153 			PUTC('\n');
2154 		    }
2155 #endif /* !LONG_LIST */
2156 		    state =
2157 			(char) (DIRED_BLOK(HTBTree_object(next_element))
2158 				== 'D' ? 'D' : 'F');
2159 		    if (preformatted) {
2160 			END(HTML_PRE);
2161 			PUTC('\n');
2162 			preformatted = FALSE;
2163 		    }
2164 		    START(HTML_H2);
2165 		    START(HTML_EM);
2166 		    PUTS(state == 'D'
2167 			 ? LABEL_SUBDIRECTORIES
2168 			 : LABEL_FILES);
2169 		    END(HTML_EM);
2170 		    END(HTML_H2);
2171 		    PUTC('\n');
2172 #ifndef LONG_LIST
2173 		    START(HTML_DIR);
2174 		    PUTC('\n');
2175 #endif /* !LONG_LIST */
2176 		}
2177 #endif /* DIRED_SUPPORT */
2178 #ifndef LONG_LIST
2179 		START(HTML_LI);
2180 #endif /* !LONG_LIST */
2181 	    }
2182 	    if (!preformatted) {
2183 		START(HTML_PRE);
2184 		PUTC('\n');
2185 		preformatted = TRUE;
2186 	    }
2187 #ifdef LONG_LIST
2188 	    LYListFmtParse(list_format, entry, tmpfilename, target, tail);
2189 #else
2190 	    HTDirEntry(target, tail, entry->file_name);
2191 	    PUTS(entry->file_name);
2192 	    END(HTML_A);
2193 	    MAYBE_END(HTML_LI);
2194 	    PUTC('\n');
2195 #endif /* LONG_LIST */
2196 
2197 	    next_element = HTBTree_next(bt, next_element);
2198 	    /* pick up the next element of the list;
2199 	       if none, return NULL */
2200 
2201 	    /* optimize for expensive operation: */
2202 #ifdef DISP_PARTIAL
2203 	    if (num_of_entries_output %
2204 		((partial_threshold > 0)
2205 		 ? partial_threshold
2206 		 : display_lines) == 0) {
2207 		/* num_of_entries, num_of_entries_output... */
2208 		HTDisplayPartial();
2209 
2210 		if (HTCheckForInterrupt()) {
2211 		    _HTProgress(TRANSFER_INTERRUPTED);
2212 		    status = HT_PARTIAL_CONTENT;
2213 		    break;
2214 		}
2215 	    }
2216 	    num_of_entries_output++;
2217 #endif /* DISP_PARTIAL */
2218 
2219 	}			/* end while next_element */
2220 
2221 	if (status == HT_LOADED) {
2222 	    if (state == 'I') {
2223 		START(HTML_P);
2224 		PUTS("Empty Directory");
2225 	    }
2226 #ifndef LONG_LIST
2227 	    else
2228 		END(HTML_DIR);
2229 #endif /* !LONG_LIST */
2230 	}
2231     }				/* end printing out the tree in order */
2232     if (preformatted) {
2233 	END(HTML_PRE);
2234 	PUTC('\n');
2235     }
2236     END(HTML_BODY);
2237     PUTC('\n');
2238 
2239     FREE(tmpfilename);
2240     FREE(tail);
2241     HTBTreeAndObject_free(bt);
2242 
2243     if (status == HT_LOADED) {
2244 	if (HTDirReadme == HT_DIR_README_BOTTOM)
2245 	    do_readme(target, localname);
2246 	FREE_TARGET;
2247     } else {
2248 	ABORT_TARGET;
2249     }
2250     HTFinishDisplayPartial();
2251     return status;		/* document loaded, maybe partial */
2252 }
2253 #endif /* HAVE_READDIR */
2254 
2255 #ifndef VMS
HTStat(const char * filename,struct stat * data)2256 int HTStat(const char *filename,
2257 	   struct stat *data)
2258 {
2259     int result = -1;
2260     size_t len = strlen(filename);
2261 
2262     if (len != 0 && LYIsPathSep(filename[len - 1])) {
2263 	char *temp_name = NULL;
2264 
2265 	HTSprintf0(&temp_name, "%s.", filename);
2266 	result = HTStat(temp_name, data);
2267 	FREE(temp_name);
2268     } else {
2269 	result = stat(filename, data);
2270 #ifdef _WINDOWS
2271 	/*
2272 	 * Someone claims that stat() doesn't give the proper result for a
2273 	 * directory on Windows.
2274 	 */
2275 	if (result == -1
2276 	    && access(filename, 0) == 0) {
2277 	    data->st_mode = S_IFDIR;
2278 	    result = 0;
2279 	}
2280 #endif
2281     }
2282     return result;
2283 }
2284 #endif
2285 
2286 #if defined(USE_ZLIB) || defined(USE_BZLIB)
sniffStream(FILE * fp,char * buffer,size_t needed)2287 static BOOL sniffStream(FILE *fp, char *buffer, size_t needed)
2288 {
2289     long offset = ftell(fp);
2290     BOOL result = FALSE;
2291 
2292     if (offset >= 0) {
2293 	if (fread(buffer, sizeof(char), needed, fp) == needed) {
2294 	    result = TRUE;
2295 	}
2296 	if (fseek(fp, offset, SEEK_SET) < 0) {
2297 	    CTRACE((tfp, "error seeking in stream\n"));
2298 	    result = FALSE;
2299 	}
2300     }
2301     return result;
2302 }
2303 #endif
2304 
2305 #ifdef USE_ZLIB
isGzipStream(FILE * fp)2306 static BOOL isGzipStream(FILE *fp)
2307 {
2308     char buffer[3];
2309     BOOL result;
2310 
2311     if (sniffStream(fp, buffer, sizeof(buffer))
2312 	&& !MemCmp(buffer, "\037\213", sizeof(buffer) - 1)) {
2313 	result = TRUE;
2314     } else {
2315 	CTRACE((tfp, "not a gzip-stream\n"));
2316 	result = FALSE;
2317     }
2318     return result;
2319 }
2320 
isDeflateStream(FILE * fp)2321 static BOOL isDeflateStream(FILE *fp)
2322 {
2323     char buffer[3];
2324     BOOL result;
2325 
2326     if (sniffStream(fp, buffer, sizeof(buffer))
2327 	&& !MemCmp(buffer, "\170\234", sizeof(buffer) - 1)) {
2328 	result = TRUE;
2329     } else {
2330 	CTRACE((tfp, "not a deflate-stream\n"));
2331 	result = FALSE;
2332     }
2333     return result;
2334 }
2335 #endif
2336 
2337 #ifdef USE_BZLIB
isBzip2Stream(FILE * fp)2338 static BOOL isBzip2Stream(FILE *fp)
2339 {
2340     char buffer[6];
2341     BOOL result;
2342 
2343     if (sniffStream(fp, buffer, sizeof(buffer))
2344 	&& !MemCmp(buffer, "BZh", 3)
2345 	&& isdigit(UCH(buffer[3]))
2346 	&& isdigit(UCH(buffer[4]))) {
2347 	result = TRUE;
2348     } else {
2349 	CTRACE((tfp, "not a bzip2-stream\n"));
2350 	result = FALSE;
2351     }
2352     return result;
2353 }
2354 #endif
2355 
2356 #ifdef VMS
2357 #define FOPEN_MODE(bin) "r", "shr=put", "shr=upd"
2358 #define DOT_STRING "._-"	/* FIXME: should we check if suffix is after ']' or ':' ? */
2359 #else
2360 #define FOPEN_MODE(bin) (bin ? BIN_R : "r")
2361 #define DOT_STRING "."
2362 #endif
2363 
decompressAndParse(HTParentAnchor * anchor,HTFormat format_out,HTStream * sink,char * nodename GCC_UNUSED,char * filename,HTAtom * myEncoding,HTFormat format,int * statusp)2364 static int decompressAndParse(HTParentAnchor *anchor,
2365 			      HTFormat format_out,
2366 			      HTStream *sink,
2367 			      char *nodename GCC_UNUSED,
2368 			      char *filename,
2369 			      HTAtom *myEncoding,
2370 			      HTFormat format,
2371 			      int *statusp)
2372 {
2373     HTAtom *encoding = 0;
2374 
2375 #ifdef USE_ZLIB
2376     FILE *zzfp = 0;
2377     gzFile gzfp = 0;
2378 #endif /* USE_ZLIB */
2379 #ifdef USE_BZLIB
2380     BZFILE *bzfp = 0;
2381 #endif /* USE_ZLIB */
2382 #if defined(USE_ZLIB) || defined(USE_BZLIB)
2383     CompressFileType internal_decompress = cftNone;
2384     BOOL failed_decompress = NO;
2385 #endif
2386     int rootlen = 0;
2387     char *localname = filename;
2388     int bin;
2389     FILE *fp;
2390     int result = FALSE;
2391 
2392 #ifdef VMS
2393     /*
2394      * Assume that the file is in Unix-style syntax if it contains a '/' after
2395      * the leading one.  @@
2396      */
2397     localname = (strchr(localname + 1, '/')
2398 		 ? HTVMS_name(nodename, localname)
2399 		 : localname + 1);
2400 #endif /* VMS */
2401 
2402     bin = HTCompressFileType(filename, ".", &rootlen) != cftNone;
2403     fp = fopen(localname, FOPEN_MODE(bin));
2404 
2405 #ifdef VMS
2406     /*
2407      * If the file wasn't VMS syntax, then perhaps it is Ultrix.
2408      */
2409     if (!fp) {
2410 	char *ultrixname = 0;
2411 
2412 	CTRACE((tfp, "HTLoadFile: Can't open as %s\n", localname));
2413 	HTSprintf0(&ultrixname, "%s::\"%s\"", nodename, filename);
2414 	fp = fopen(ultrixname, FOPEN_MODE(bin));
2415 	if (!fp) {
2416 	    CTRACE((tfp, "HTLoadFile: Can't open as %s\n", ultrixname));
2417 	}
2418 	FREE(ultrixname);
2419     }
2420 #endif /* VMS */
2421     CTRACE((tfp, "HTLoadFile: Opening `%s' gives %p\n", localname, (void *) fp));
2422     if (fp) {			/* Good! */
2423 	if (HTEditable(localname)) {
2424 	    HTAtom *put = HTAtom_for("PUT");
2425 	    HTList *methods = HTAnchor_methods(anchor);
2426 
2427 	    if (HTList_indexOf(methods, put) == (-1)) {
2428 		HTList_addObject(methods, put);
2429 	    }
2430 	}
2431 	/*
2432 	 * Fake a Content-Encoding for compressed files.  - FM
2433 	 */
2434 	if (!IsUnityEnc(myEncoding)) {
2435 	    /*
2436 	     * We already know from the call to HTFileFormat that
2437 	     * this is a compressed file, no need to look at the filename
2438 	     * again.  - kw
2439 	     */
2440 #if defined(USE_ZLIB) || defined(USE_BZLIB)
2441 	    CompressFileType method = HTEncodingToCompressType(HTAtom_name(myEncoding));
2442 #endif
2443 
2444 #define isDOWNLOAD(m) (strcmp(format_out->name, "www/download") && (method == m))
2445 #ifdef USE_ZLIB
2446 	    if (isDOWNLOAD(cftGzip)) {
2447 		if (isGzipStream(fp)) {
2448 		    fclose(fp);
2449 		    fp = 0;
2450 		    gzfp = gzopen(localname, BIN_R);
2451 
2452 		    CTRACE((tfp, "HTLoadFile: gzopen of `%s' gives %p\n",
2453 			    localname, gzfp));
2454 		}
2455 		internal_decompress = cftGzip;
2456 	    } else if (isDOWNLOAD(cftDeflate)) {
2457 		if (isDeflateStream(fp)) {
2458 		    zzfp = fp;
2459 		    fp = 0;
2460 
2461 		    CTRACE((tfp, "HTLoadFile: zzopen of `%s' gives %p\n",
2462 			    localname, (void *) zzfp));
2463 		}
2464 		internal_decompress = cftDeflate;
2465 	    } else
2466 #endif /* USE_ZLIB */
2467 #ifdef USE_BZLIB
2468 	    if (isDOWNLOAD(cftBzip2)) {
2469 		if (isBzip2Stream(fp)) {
2470 		    fclose(fp);
2471 		    fp = 0;
2472 		    bzfp = BZ2_bzopen(localname, BIN_R);
2473 
2474 		    CTRACE((tfp, "HTLoadFile: bzopen of `%s' gives %p\n",
2475 			    localname, bzfp));
2476 		}
2477 		internal_decompress = cftBzip2;
2478 	    } else
2479 #endif /* USE_BZLIB */
2480 	    {
2481 		StrAllocCopy(anchor->content_type, format->name);
2482 		StrAllocCopy(anchor->content_encoding, HTAtom_name(myEncoding));
2483 		format = HTAtom_for("www/compressed");
2484 	    }
2485 	} else {
2486 	    CompressFileType cft = HTCompressFileType(localname, DOT_STRING, &rootlen);
2487 
2488 	    if (cft != cftNone) {
2489 		char *cp = NULL;
2490 
2491 		StrAllocCopy(cp, localname);
2492 		cp[rootlen] = '\0';
2493 		format = HTFileFormat(cp, &encoding, NULL);
2494 		FREE(cp);
2495 		format = HTCharsetFormat(format, anchor,
2496 					 UCLYhndl_HTFile_for_unspec);
2497 		StrAllocCopy(anchor->content_type, format->name);
2498 	    }
2499 
2500 	    switch (cft) {
2501 	    case cftCompress:
2502 		StrAllocCopy(anchor->content_encoding, "x-compress");
2503 		format = HTAtom_for("www/compressed");
2504 		break;
2505 	    case cftDeflate:
2506 		StrAllocCopy(anchor->content_encoding, "x-deflate");
2507 #ifdef USE_ZLIB
2508 		if (strcmp(format_out->name, "www/download") != 0) {
2509 		    if (isDeflateStream(fp)) {
2510 			zzfp = fp;
2511 			fp = 0;
2512 
2513 			CTRACE((tfp, "HTLoadFile: zzopen of `%s' gives %p\n",
2514 				localname, (void *) zzfp));
2515 		    }
2516 		    internal_decompress = cftDeflate;
2517 		}
2518 #else /* USE_ZLIB */
2519 		format = HTAtom_for("www/compressed");
2520 #endif /* USE_ZLIB */
2521 		break;
2522 	    case cftGzip:
2523 		StrAllocCopy(anchor->content_encoding, "x-gzip");
2524 #ifdef USE_ZLIB
2525 		if (strcmp(format_out->name, "www/download") != 0) {
2526 		    if (isGzipStream(fp)) {
2527 			fclose(fp);
2528 			fp = 0;
2529 			gzfp = gzopen(localname, BIN_R);
2530 
2531 			CTRACE((tfp, "HTLoadFile: gzopen of `%s' gives %p\n",
2532 				localname, gzfp));
2533 		    }
2534 		    internal_decompress = cftGzip;
2535 		}
2536 #else /* USE_ZLIB */
2537 		format = HTAtom_for("www/compressed");
2538 #endif /* USE_ZLIB */
2539 		break;
2540 	    case cftBzip2:
2541 		StrAllocCopy(anchor->content_encoding, "x-bzip2");
2542 #ifdef USE_BZLIB
2543 		if (strcmp(format_out->name, "www/download") != 0) {
2544 		    if (isBzip2Stream(fp)) {
2545 			fclose(fp);
2546 			fp = 0;
2547 			bzfp = BZ2_bzopen(localname, BIN_R);
2548 
2549 			CTRACE((tfp, "HTLoadFile: bzopen of `%s' gives %p\n",
2550 				localname, bzfp));
2551 		    }
2552 		    internal_decompress = cftBzip2;
2553 		}
2554 #else /* USE_BZLIB */
2555 		format = HTAtom_for("www/compressed");
2556 #endif /* USE_BZLIB */
2557 		break;
2558 	    case cftNone:
2559 		break;
2560 	    }
2561 	}
2562 #if defined(USE_ZLIB) || defined(USE_BZLIB)
2563 	if (internal_decompress != cftNone) {
2564 	    switch (internal_decompress) {
2565 #ifdef USE_ZLIB
2566 	    case cftDeflate:
2567 		failed_decompress = (BOOLEAN) (zzfp == NULL);
2568 		break;
2569 	    case cftCompress:
2570 	    case cftGzip:
2571 		failed_decompress = (BOOLEAN) (gzfp == NULL);
2572 		break;
2573 #endif
2574 #ifdef USE_BZLIB
2575 	    case cftBzip2:
2576 		failed_decompress = (BOOLEAN) (bzfp == NULL);
2577 		break;
2578 #endif
2579 	    default:
2580 		failed_decompress = YES;
2581 		break;
2582 	    }
2583 	    if (failed_decompress) {
2584 		*statusp = HTLoadError(NULL,
2585 				       -(HT_ERROR),
2586 				       FAILED_OPEN_COMPRESSED_FILE);
2587 	    } else {
2588 		char *sugfname = NULL;
2589 
2590 		if (anchor->SugFname) {
2591 		    StrAllocCopy(sugfname, anchor->SugFname);
2592 		} else {
2593 		    char *anchor_path = HTParse(anchor->address, "",
2594 						PARSE_PATH + PARSE_PUNCTUATION);
2595 		    char *lastslash;
2596 
2597 		    HTUnEscape(anchor_path);
2598 		    lastslash = strrchr(anchor_path, '/');
2599 		    if (lastslash)
2600 			StrAllocCopy(sugfname, lastslash + 1);
2601 		    FREE(anchor_path);
2602 		}
2603 		FREE(anchor->content_encoding);
2604 		if (sugfname && *sugfname)
2605 		    HTCheckFnameForCompression(&sugfname, anchor,
2606 					       TRUE);
2607 		if (sugfname && *sugfname)
2608 		    StrAllocCopy(anchor->SugFname, sugfname);
2609 		FREE(sugfname);
2610 #ifdef USE_BZLIB
2611 		if (bzfp)
2612 		    *statusp = HTParseBzFile(format, format_out,
2613 					     anchor,
2614 					     bzfp, sink);
2615 #endif
2616 #ifdef USE_ZLIB
2617 		if (gzfp)
2618 		    *statusp = HTParseGzFile(format, format_out,
2619 					     anchor,
2620 					     gzfp, sink);
2621 		else if (zzfp)
2622 		    *statusp = HTParseZzFile(format, format_out,
2623 					     anchor,
2624 					     zzfp, sink);
2625 #endif
2626 	    }
2627 	} else
2628 #endif /* USE_ZLIB || USE_BZLIB */
2629 	{
2630 	    *statusp = HTParseFile(format, format_out, anchor, fp, sink);
2631 	}
2632 	if (fp != 0) {
2633 	    fclose(fp);
2634 	    fp = 0;
2635 	}
2636 	result = TRUE;
2637     }				/* If successful open */
2638     return result;
2639 }
2640 
2641 /*	Load a document.
2642  *	----------------
2643  *
2644  *  On entry:
2645  *	addr		must point to the fully qualified hypertext reference.
2646  *			This is the physical address of the file
2647  *
2648  *  On exit:
2649  *	returns		<0		Error has occurred.
2650  *			HTLOADED	OK
2651  *
2652  */
HTLoadFile(const char * addr,HTParentAnchor * anchor,HTFormat format_out,HTStream * sink)2653 int HTLoadFile(const char *addr,
2654 	       HTParentAnchor *anchor,
2655 	       HTFormat format_out,
2656 	       HTStream *sink)
2657 {
2658     char *filename = NULL;
2659     char *acc_method = NULL;
2660     HTFormat format;
2661     char *nodename = NULL;
2662     char *newname = NULL;	/* Simplified name of file */
2663     HTAtom *myEncoding = NULL;	/* enc of this file, may be gzip etc. */
2664     int status = -1;
2665 
2666 #ifndef DISABLE_FTP
2667     char *ftp_newhost;
2668 #endif
2669 
2670 #ifdef VMS
2671     struct stat stat_info;
2672 #endif /* VMS */
2673 
2674     /*
2675      * Reduce the filename to a basic form (hopefully unique!).
2676      */
2677     StrAllocCopy(newname, addr);
2678     filename = HTParse(newname, "", PARSE_PATH | PARSE_PUNCTUATION);
2679     nodename = HTParse(newname, "", PARSE_HOST);
2680 
2681     /*
2682      * If access is ftp, or file is on another host, invoke ftp now.
2683      */
2684     acc_method = HTParse(newname, "", PARSE_ACCESS);
2685     if (strcmp("ftp", acc_method) == 0 ||
2686 	(!LYSameHostname("localhost", nodename) &&
2687 	 !LYSameHostname(nodename, HTHostName()))) {
2688 	status = -1;
2689 	FREE(newname);
2690 	FREE(filename);
2691 	FREE(nodename);
2692 	FREE(acc_method);
2693 #ifndef DISABLE_FTP
2694 	ftp_newhost = HTParse(addr, "", PARSE_HOST);
2695 	if (strcmp(ftp_lasthost, ftp_newhost))
2696 	    ftp_local_passive = ftp_passive;
2697 
2698 	status = HTFTPLoad(addr, anchor, format_out, sink);
2699 
2700 	if (ftp_passive == ftp_local_passive) {
2701 	    if ((status >= 400) || (status < 0)) {
2702 		ftp_local_passive = (BOOLEAN) !ftp_passive;
2703 		status = HTFTPLoad(addr, anchor, format_out, sink);
2704 	    }
2705 	}
2706 
2707 	free(ftp_lasthost);
2708 	ftp_lasthost = ftp_newhost;
2709 #endif /* DISABLE_FTP */
2710 	return status;
2711     } else {
2712 	FREE(newname);
2713 	FREE(acc_method);
2714     }
2715 #if defined(VMS) || defined(USE_DOS_DRIVES)
2716     HTUnEscape(filename);
2717 #endif /* VMS */
2718 
2719     /*
2720      * Determine the format and encoding mapped to any suffix.
2721      */
2722     if (anchor->content_type && anchor->content_encoding) {
2723 	/*
2724 	 * If content_type and content_encoding are BOTH already set in the
2725 	 * anchor object, we believe it and don't try to derive format and
2726 	 * encoding from the filename.  - kw
2727 	 */
2728 	format = HTAtom_for(anchor->content_type);
2729 	myEncoding = HTAtom_for(anchor->content_encoding);
2730     } else {
2731 	int default_UCLYhndl = UCLYhndl_HTFile_for_unspec;
2732 
2733 	if (force_old_UCLYhndl_on_reload) {
2734 	    force_old_UCLYhndl_on_reload = FALSE;
2735 	    default_UCLYhndl = forced_UCLYhdnl;
2736 	}
2737 
2738 	format = HTFileFormat(filename, &myEncoding, NULL);
2739 
2740 	/*
2741 	 * Check the format for an extended MIME charset value, and act on it
2742 	 * if present.  Otherwise, assume what is indicated by the last
2743 	 * parameter (fallback will effectively be UCLYhndl_for_unspec, by
2744 	 * default ISO-8859-1).  - kw
2745 	 */
2746 	format = HTCharsetFormat(format, anchor, default_UCLYhndl);
2747     }
2748 
2749 #ifdef VMS
2750     /*
2751      * Check to see if the 'filename' is in fact a directory.  If it is create
2752      * a new hypertext object containing a list of files and subdirectories
2753      * contained in the directory.  All of these are links to the directories
2754      * or files listed.
2755      */
2756     if (HTStat(filename, &stat_info) == -1) {
2757 	CTRACE((tfp, "HTLoadFile: Can't stat %s\n", filename));
2758     } else {
2759 	if (S_ISDIR(stat_info.st_mode)) {
2760 	    if (HTDirAccess == HT_DIR_FORBID) {
2761 		FREE(filename);
2762 		FREE(nodename);
2763 		return HTLoadError(sink, 403, DISALLOWED_DIR_SCAN);
2764 	    }
2765 
2766 	    if (HTDirAccess == HT_DIR_SELECTIVE) {
2767 		char *enable_file_name = NULL;
2768 
2769 		HTSprintf0(&enable_file_name, "%s/%s", filename, HT_DIR_ENABLE_FILE);
2770 		if (HTStat(enable_file_name, &stat_info) == -1) {
2771 		    FREE(filename);
2772 		    FREE(nodename);
2773 		    FREE(enable_file_name);
2774 		    return HTLoadError(sink, 403, DISALLOWED_SELECTIVE_ACCESS);
2775 		}
2776 	    }
2777 
2778 	    FREE(filename);
2779 	    FREE(nodename);
2780 	    return HTVMSBrowseDir(addr, anchor, format_out, sink);
2781 	}
2782     }
2783 
2784     if (decompressAndParse(anchor,
2785 			   format_out,
2786 			   sink,
2787 			   nodename,
2788 			   filename,
2789 			   myEncoding,
2790 			   format,
2791 			   &status)) {
2792 	FREE(nodename);
2793 	FREE(filename);
2794 	return status;
2795     }
2796     FREE(filename);
2797 
2798 #else /* not VMS: */
2799 
2800     FREE(filename);
2801 
2802     /*
2803      * For unix, we try to translate the name into the name of a transparently
2804      * mounted file.
2805      *
2806      * Not allowed in secure (HTClientHost) situations.  TBL 921019
2807      */
2808 #ifndef NO_UNIX_IO
2809     /*  Need protection here for telnet server but not httpd server. */
2810 
2811     if (!HTSecure) {		/* try local file system */
2812 	char *localname = HTLocalName(addr);
2813 	struct stat dir_info;
2814 
2815 #ifdef HAVE_READDIR
2816 	/*
2817 	 * Multiformat handling.
2818 	 *
2819 	 * If needed, scan directory to find a good file.  Bug:  We don't stat
2820 	 * the file to find the length.
2821 	 */
2822 	if ((strlen(localname) > strlen(MULTI_SUFFIX)) &&
2823 	    (0 == strcmp(localname + strlen(localname) - strlen(MULTI_SUFFIX),
2824 			 MULTI_SUFFIX))) {
2825 	    DIR *dp = 0;
2826 	    BOOL forget_multi = NO;
2827 
2828 	    STRUCT_DIRENT *dirbuf;
2829 	    float best = (float) NO_VALUE_FOUND;	/* So far best is bad */
2830 	    HTFormat best_rep = NULL;	/* Set when rep found */
2831 	    HTAtom *best_enc = NULL;
2832 	    char *best_name = NULL;	/* Best dir entry so far */
2833 
2834 	    char *base = strrchr(localname, '/');
2835 	    size_t baselen = 0;
2836 
2837 	    if (!base || base == localname) {
2838 		forget_multi = YES;
2839 	    } else {
2840 		*base++ = '\0';	/* Just got directory name */
2841 		baselen = strlen(base) - strlen(MULTI_SUFFIX);
2842 		base[baselen] = '\0';	/* Chop off suffix */
2843 
2844 		dp = opendir(localname);
2845 	    }
2846 	    if (forget_multi || !dp) {
2847 		FREE(localname);
2848 		FREE(nodename);
2849 		return HTLoadError(sink, 500, FAILED_DIR_SCAN);
2850 	    }
2851 
2852 	    while ((dirbuf = readdir(dp)) != NULL) {
2853 		/*
2854 		 * While there are directory entries to be read...
2855 		 */
2856 #ifdef STRUCT_DIRENT__D_INO
2857 		if (dirbuf->d_ino == 0)
2858 		    continue;	/* if the entry is not being used, skip it */
2859 #endif
2860 		if (strlen(dirbuf->d_name) > baselen &&		/* Match? */
2861 		    !StrNCmp(dirbuf->d_name, base, baselen)) {
2862 		    HTAtom *enc;
2863 		    HTFormat rep = HTFileFormat(dirbuf->d_name, &enc, NULL);
2864 		    float filevalue = HTFileValue(dirbuf->d_name);
2865 		    float value = HTStackValue(rep, format_out,
2866 					       filevalue,
2867 					       0L /* @@@@@@ */ );
2868 
2869 		    if (value <= 0.0) {
2870 			int rootlen = 0;
2871 			const char *atomname = NULL;
2872 			CompressFileType cft =
2873 			HTCompressFileType(dirbuf->d_name, ".", &rootlen);
2874 			char *cp = NULL;
2875 
2876 			enc = NULL;
2877 			if (cft != cftNone) {
2878 			    StrAllocCopy(cp, dirbuf->d_name);
2879 			    cp[rootlen] = '\0';
2880 			    format = HTFileFormat(cp, NULL, NULL);
2881 			    FREE(cp);
2882 			    value = HTStackValue(format, format_out,
2883 						 filevalue, 0L);
2884 			}
2885 			switch (cft) {
2886 			case cftCompress:
2887 			    atomname = "application/x-compressed";
2888 			    break;
2889 			case cftGzip:
2890 			    atomname = "application/x-gzip";
2891 			    break;
2892 			case cftDeflate:
2893 			    atomname = "application/x-deflate";
2894 			    break;
2895 			case cftBzip2:
2896 			    atomname = "application/x-bzip2";
2897 			    break;
2898 			case cftNone:
2899 			    break;
2900 			}
2901 
2902 			if (atomname != NULL) {
2903 			    value = HTStackValue(format, format_out,
2904 						 filevalue, 0L);
2905 			    if (value <= 0.0) {
2906 				format = HTAtom_for(atomname);
2907 				value = HTStackValue(format, format_out,
2908 						     filevalue, 0L);
2909 			    }
2910 			    if (value <= 0.0) {
2911 				format = HTAtom_for("www/compressed");
2912 				value = HTStackValue(format, format_out,
2913 						     filevalue, 0L);
2914 			    }
2915 			}
2916 		    }
2917 		    if (value < NO_VALUE_FOUND) {
2918 			CTRACE((tfp,
2919 				"HTLoadFile: value of presenting %s is %f\n",
2920 				HTAtom_name(rep), value));
2921 			if (value > best) {
2922 			    best_rep = rep;
2923 			    best_enc = enc;
2924 			    best = value;
2925 			    StrAllocCopy(best_name, dirbuf->d_name);
2926 			}
2927 		    }		/* if best so far */
2928 		}
2929 		/* if match */
2930 	    }			/* end while directory entries left to read */
2931 	    closedir(dp);
2932 
2933 	    if (best_rep) {
2934 		format = best_rep;
2935 		myEncoding = best_enc;
2936 		base[-1] = '/';	/* Restore directory name */
2937 		base[0] = '\0';
2938 		StrAllocCat(localname, best_name);
2939 		FREE(best_name);
2940 	    } else {		/* If not found suitable file */
2941 		FREE(localname);
2942 		FREE(nodename);
2943 		return HTLoadError(sink, 403, FAILED_NO_REPRESENTATION);
2944 	    }
2945 	    /*NOTREACHED */
2946 	}
2947 	/* if multi suffix */
2948 	/*
2949 	 * Check to see if the 'localname' is in fact a directory.  If it is
2950 	 * create a new hypertext object containing a list of files and
2951 	 * subdirectories contained in the directory.  All of these are links
2952 	 * to the directories or files listed.  NB This assumes the existence
2953 	 * of a type 'STRUCT_DIRENT', which will hold the directory entry, and
2954 	 * a type 'DIR' which is used to point to the current directory being
2955 	 * read.
2956 	 */
2957 #if defined(USE_DOS_DRIVES)
2958 	if (strlen(localname) == 2 && LYIsDosDrive(localname))
2959 	    LYAddPathSep(&localname);
2960 #endif
2961 	if (HTStat(localname, &dir_info) == -1)		/* get file information */
2962 	{
2963 	    /* if can't read file information */
2964 	    CTRACE((tfp, "HTLoadFile: can't stat %s\n", localname));
2965 
2966 	} else {		/* Stat was OK */
2967 
2968 	    if (S_ISDIR(dir_info.st_mode)) {
2969 		/*
2970 		 * If localname is a directory.
2971 		 */
2972 		DIR *dp;
2973 		struct stat file_info;
2974 
2975 		CTRACE((tfp, "%s is a directory\n", localname));
2976 
2977 		/*
2978 		 * Check directory access.  Selective access means only those
2979 		 * directories containing a marker file can be browsed.
2980 		 */
2981 		if (HTDirAccess == HT_DIR_FORBID) {
2982 		    FREE(localname);
2983 		    FREE(nodename);
2984 		    return HTLoadError(sink, 403, DISALLOWED_DIR_SCAN);
2985 		}
2986 
2987 		if (HTDirAccess == HT_DIR_SELECTIVE) {
2988 		    char *enable_file_name = NULL;
2989 
2990 		    HTSprintf0(&enable_file_name, "%s/%s", localname, HT_DIR_ENABLE_FILE);
2991 		    if (stat(enable_file_name, &file_info) != 0) {
2992 			FREE(localname);
2993 			FREE(nodename);
2994 			FREE(enable_file_name);
2995 			return HTLoadError(sink, 403, DISALLOWED_SELECTIVE_ACCESS);
2996 		    }
2997 		}
2998 
2999 		CTRACE((tfp, "Opening directory %s\n", localname));
3000 		dp = opendir(localname);
3001 		if (!dp) {
3002 		    FREE(localname);
3003 		    FREE(nodename);
3004 		    return HTLoadError(sink, 403, FAILED_DIR_UNREADABLE);
3005 		}
3006 
3007 		/*
3008 		 * Directory access is allowed and possible.
3009 		 */
3010 
3011 		status = print_local_dir(dp, localname,
3012 					 anchor, format_out, sink);
3013 		closedir(dp);
3014 		FREE(localname);
3015 		FREE(nodename);
3016 		return status;	/* document loaded, maybe partial */
3017 
3018 	    }
3019 	    /* end if localname is a directory */
3020 	    if (S_ISREG(dir_info.st_mode)) {
3021 #ifdef LONG_MAX
3022 		if (dir_info.st_size <= LONG_MAX)
3023 #endif
3024 		    anchor->content_length = (long) dir_info.st_size;
3025 	    }
3026 
3027 	}			/* end if file stat worked */
3028 
3029 /* End of directory reading section
3030 */
3031 #endif /* HAVE_READDIR */
3032 	if (decompressAndParse(anchor,
3033 			       format_out,
3034 			       sink,
3035 			       nodename,
3036 			       localname,
3037 			       myEncoding,
3038 			       format,
3039 			       &status)) {
3040 	    FREE(nodename);
3041 	    FREE(localname);
3042 	    return status;
3043 	}
3044 	FREE(localname);
3045     }				/* local unix file system */
3046 #endif /* !NO_UNIX_IO */
3047 #endif /* VMS */
3048 
3049 #ifndef DECNET
3050     /*
3051      * Now, as transparently mounted access has failed, we try FTP.
3052      */
3053     {
3054 	/*
3055 	 * Deal with case-sensitivity differences on VMS versus Unix.
3056 	 */
3057 #ifdef VMS
3058 	if (strcasecomp(nodename, HTHostName()) != 0)
3059 #else
3060 	if (strcmp(nodename, HTHostName()) != 0)
3061 #endif /* VMS */
3062 	{
3063 	    status = -1;
3064 	    FREE(nodename);
3065 	    if (StrNCmp(addr, "file://localhost", 16)) {
3066 		/* never go to ftp site when URL
3067 		 * is file://localhost
3068 		 */
3069 #ifndef DISABLE_FTP
3070 		status = HTFTPLoad(addr, anchor, format_out, sink);
3071 #endif /* DISABLE_FTP */
3072 	    }
3073 	    return status;
3074 	}
3075 	FREE(nodename);
3076     }
3077 #endif /* !DECNET */
3078 
3079     /*
3080      * All attempts have failed.
3081      */
3082     {
3083 	CTRACE((tfp, "Can't open `%s', errno=%d\n", addr, SOCKET_ERRNO));
3084 
3085 	return HTLoadError(sink, 403, FAILED_FILE_UNREADABLE);
3086     }
3087 }
3088 
3089 static const char *program_paths[pp_Last];
3090 
3091 /*
3092  * Given a program number, return its path
3093  */
HTGetProgramPath(ProgramPaths code)3094 const char *HTGetProgramPath(ProgramPaths code)
3095 {
3096     const char *result = NULL;
3097 
3098     if (code > ppUnknown && code < pp_Last)
3099 	result = program_paths[code];
3100     return result;
3101 }
3102 
3103 /*
3104  * Store a program's path.  The caller must allocate the string used for 'path',
3105  * since HTInitProgramPaths() may free it.
3106  */
HTSetProgramPath(ProgramPaths code,const char * path)3107 void HTSetProgramPath(ProgramPaths code, const char *path)
3108 {
3109     if (code > ppUnknown && code < pp_Last) {
3110 	program_paths[code] = isEmpty(path) ? 0 : path;
3111     }
3112 }
3113 
3114 /*
3115  * Reset the list of known program paths to the ones that are compiled-in
3116  */
HTInitProgramPaths(BOOL init)3117 void HTInitProgramPaths(BOOL init)
3118 {
3119     ProgramPaths code;
3120     int n;
3121     const char *path;
3122     const char *test;
3123 
3124     for (n = (int) ppUnknown + 1; n < (int) pp_Last; ++n) {
3125 	switch (code = (ProgramPaths) n) {
3126 #ifdef BZIP2_PATH
3127 	case ppBZIP2:
3128 	    path = BZIP2_PATH;
3129 	    break;
3130 #endif
3131 #ifdef CHMOD_PATH
3132 	case ppCHMOD:
3133 	    path = CHMOD_PATH;
3134 	    break;
3135 #endif
3136 #ifdef COMPRESS_PATH
3137 	case ppCOMPRESS:
3138 	    path = COMPRESS_PATH;
3139 	    break;
3140 #endif
3141 #ifdef COPY_PATH
3142 	case ppCOPY:
3143 	    path = COPY_PATH;
3144 	    break;
3145 #endif
3146 #ifdef CSWING_PATH
3147 	case ppCSWING:
3148 	    path = CSWING_PATH;
3149 	    break;
3150 #endif
3151 #ifdef GZIP_PATH
3152 	case ppGZIP:
3153 	    path = GZIP_PATH;
3154 	    break;
3155 #endif
3156 #ifdef INFLATE_PATH
3157 	case ppINFLATE:
3158 	    path = INFLATE_PATH;
3159 	    break;
3160 #endif
3161 #ifdef INSTALL_PATH
3162 	case ppINSTALL:
3163 	    path = INSTALL_PATH;
3164 	    break;
3165 #endif
3166 #ifdef MKDIR_PATH
3167 	case ppMKDIR:
3168 	    path = MKDIR_PATH;
3169 	    break;
3170 #endif
3171 #ifdef MV_PATH
3172 	case ppMV:
3173 	    path = MV_PATH;
3174 	    break;
3175 #endif
3176 #ifdef RLOGIN_PATH
3177 	case ppRLOGIN:
3178 	    path = RLOGIN_PATH;
3179 	    break;
3180 #endif
3181 #ifdef RM_PATH
3182 	case ppRM:
3183 	    path = RM_PATH;
3184 	    break;
3185 #endif
3186 #ifdef RMDIR_PATH
3187 	case ppRMDIR:
3188 	    path = RMDIR_PATH;
3189 	    break;
3190 #endif
3191 #ifdef SETFONT_PATH
3192 	case ppSETFONT:
3193 	    path = SETFONT_PATH;
3194 	    break;
3195 #endif
3196 #ifdef TAR_PATH
3197 	case ppTAR:
3198 	    path = TAR_PATH;
3199 	    break;
3200 #endif
3201 #ifdef TELNET_PATH
3202 	case ppTELNET:
3203 	    path = TELNET_PATH;
3204 	    break;
3205 #endif
3206 #ifdef TN3270_PATH
3207 	case ppTN3270:
3208 	    path = TN3270_PATH;
3209 	    break;
3210 #endif
3211 #ifdef TOUCH_PATH
3212 	case ppTOUCH:
3213 	    path = TOUCH_PATH;
3214 	    break;
3215 #endif
3216 #ifdef UNCOMPRESS_PATH
3217 	case ppUNCOMPRESS:
3218 	    path = UNCOMPRESS_PATH;
3219 	    break;
3220 #endif
3221 #ifdef UNZIP_PATH
3222 	case ppUNZIP:
3223 	    path = UNZIP_PATH;
3224 	    break;
3225 #endif
3226 #ifdef UUDECODE_PATH
3227 	case ppUUDECODE:
3228 	    path = UUDECODE_PATH;
3229 	    break;
3230 #endif
3231 #ifdef ZCAT_PATH
3232 	case ppZCAT:
3233 	    path = ZCAT_PATH;
3234 	    break;
3235 #endif
3236 #ifdef ZIP_PATH
3237 	case ppZIP:
3238 	    path = ZIP_PATH;
3239 	    break;
3240 #endif
3241 	default:
3242 	    path = NULL;
3243 	    break;
3244 	}
3245 	test = HTGetProgramPath(code);
3246 	if (test != NULL && test != path) {
3247 	    free((char *) test);
3248 	}
3249 	if (init) {
3250 	    HTSetProgramPath(code, path);
3251 	}
3252     }
3253 }
3254 
3255 /*
3256  *	Protocol descriptors
3257  */
3258 #ifdef GLOBALDEF_IS_MACRO
3259 #define _HTFILE_C_1_INIT { "ftp", HTLoadFile, 0 }
3260 GLOBALDEF(HTProtocol, HTFTP, _HTFILE_C_1_INIT);
3261 #define _HTFILE_C_2_INIT { "file", HTLoadFile, HTFileSaveStream }
3262 GLOBALDEF(HTProtocol, HTFile, _HTFILE_C_2_INIT);
3263 #else
3264 GLOBALDEF HTProtocol HTFTP =
3265 {"ftp", HTLoadFile, 0};
3266 GLOBALDEF HTProtocol HTFile =
3267 {"file", HTLoadFile, HTFileSaveStream};
3268 #endif /* GLOBALDEF_IS_MACRO */
3269