1 /*
2  * $LynxId: HTFormat.h,v 1.33 2010/10/27 00:13:53 tom Exp $
3  *
4  *                                            HTFormat: The format manager in the WWW Library
5  *                          MANAGE DIFFERENT DOCUMENT FORMATS
6  *
7  * Here we describe the functions of the HTFormat module which handles conversion between
8  * different data representations.  (In MIME parlance, a representation is known as a
9  * content-type.  In WWW the term "format" is often used as it is shorter).
10  *
11  * This module is implemented by HTFormat.c.  This hypertext document is used to generate
12  * the HTFormat.h include file.  Part of the WWW library.
13  */
14 #ifndef HTFORMAT_H
15 #define HTFORMAT_H
16 
17 #include <HTStream.h>
18 #include <HTAtom.h>
19 #include <HTList.h>
20 #include <HTAnchor.h>
21 
22 #ifdef USE_SOURCE_CACHE
23 #include <HTChunk.h>
24 #endif
25 
26 #ifdef USE_BZLIB
27 #include <bzlib.h>
28 #endif
29 
30 #ifdef USE_ZLIB
31 #include <zlib.h>
32 #endif
33 
34 #ifdef __cplusplus
35 extern "C" {
36 #endif
37 /*
38 
39    These macros (which used to be constants) define some basic internally
40    referenced representations.  The www/xxx ones are of course not MIME
41    standard.
42 
43    www/source is an output format which leaves the input untouched.  It is
44    useful for diagnostics, and for users who want to see the original, whatever
45    it is.
46 
47 																	 */
48 /* Internal ones */
49 /* #define WWW_SOURCE HTAtom_for("www/source") */
50 /* Whatever it was originally */
51     extern HTAtom *WWW_SOURCE;
52     /* calculated once, heavy used */
53 
54 /*
55 
56    www/present represents the user's perception of the document.  If you
57    convert to www/present, you present the material to the user.
58 
59  */
60 #define WWW_PRESENT HTAtom_for("www/present")	/* The user's perception */
61 
62 #define WWW_DEBUG       HTAtom_for("www/debug")
63 /*
64 
65    WWW_DEBUG represents the user's perception of debug information, for example
66    sent as a HTML document in a HTTP redirection message.
67 
68  */
69 
70 /*
71 
72    The message/rfc822 format means a MIME message or a plain text message with
73    no MIME header.  This is what is returned by an HTTP server.
74 
75  */
76 #define WWW_MIME HTAtom_for("www/mime")		/* A MIME message */
77 
78 /*
79   For parsing only the header. - kw
80   */
81 #define WWW_MIME_HEAD   HTAtom_for("message/x-rfc822-head")
82 
83 /*
84 
85    www/print is like www/present except it represents a printed copy.
86 
87  */
88 #define WWW_PRINT HTAtom_for("www/print")	/* A printed copy */
89 
90 /*
91 
92    www/unknown is a really unknown type.  Some default action is appropriate.
93 
94  */
95 #define WWW_UNKNOWN     HTAtom_for("www/unknown")
96 
97 #ifdef DIRED_SUPPORT
98 /*
99    www/dired signals directory edit mode.
100 */
101 #define WWW_DIRED      HTAtom_for("www/dired")
102 #endif
103 
104 /*
105 
106    These are regular MIME types.  HTML is assumed to be added by the W3 code.
107    application/octet-stream was mistakenly application/binary in earlier libwww
108    versions (pre 2.11).
109 
110  */
111 #define WWW_PLAINTEXT   HTAtom_for("text/plain")
112 #define WWW_POSTSCRIPT  HTAtom_for("application/postscript")
113 #define WWW_RICHTEXT    HTAtom_for("application/rtf")
114 #define WWW_AUDIO       HTAtom_for("audio/basic")
115 #define WWW_HTML        HTAtom_for("text/html")
116 #define WWW_BINARY      HTAtom_for("application/octet-stream")
117 
118     typedef HTAtom *HTEncoding;
119 
120 /*
121  * The following are values for the MIME types:
122  */
123 #define WWW_ENC_7BIT            HTAtom_for("7bit")
124 #define WWW_ENC_8BIT            HTAtom_for("8bit")
125 #define WWW_ENC_BINARY          HTAtom_for("binary")
126 
127 /*
128  * We also add
129  */
130 #define WWW_ENC_COMPRESS        HTAtom_for("compress")
131 
132 /*
133  * Does a string designate a real encoding, or is it just
134  * a "dummy" as for example 7bit, 8bit, and binary?
135  */
136 #define IsUnityEncStr(senc) \
137         ((senc)==NULL || *(senc)=='\0' || !strcmp(senc,"identity") ||\
138         !strcmp(senc,"8bit") || !strcmp(senc,"binary") || !strcmp(senc,"7bit"))
139 
140 #define IsUnityEnc(enc) \
141         ((enc)==NULL || (enc)==HTAtom_for("identity") ||\
142         (enc)==WWW_ENC_8BIT || (enc)==WWW_ENC_BINARY || (enc)==WWW_ENC_7BIT)
143 
144 /*
145 
146 The HTPresentation and HTConverter types
147 
148    This HTPresentation structure represents a possible conversion algorithm
149    from one format to another.  It includes a pointer to a conversion routine.
150    The conversion routine returns a stream to which data should be fed.  See
151    also HTStreamStack which scans the list of registered converters and calls
152    one.  See the initialisation module for a list of conversion routines.
153 
154  */
155     typedef struct _HTPresentation HTPresentation;
156 
157     typedef HTStream *HTConverter (HTPresentation *pres,
158 				   HTParentAnchor *anchor,
159 				   HTStream *sink);
160 
161     struct _HTPresentation {
162 	HTAtom *rep;		/* representation name atomized */
163 	HTAtom *rep_out;	/* resulting representation */
164 	HTConverter *converter;	/* routine to gen the stream stack */
165 	char *command;		/* MIME-format command string */
166 	char *testcommand;	/* MIME-format test string */
167 	float quality;		/* Between 0 (bad) and 1 (good) */
168 	float secs;
169 	float secs_per_byte;
170 	off_t maxbytes;
171 	BOOL get_accept;	/* list in "Accept:" for GET */
172 	int accept_opt;		/* matches against LYAcceptMedia */
173     };
174 
175 /*
176 
177    The list of presentations is kept by this module.  It is also scanned by
178    modules which want to know the set of formats supported.  for example.
179 
180  */
181     extern HTList *HTPresentations;
182 
183 /*
184 
185    The default presentation is used when no other is appropriate
186 
187  */
188     extern HTPresentation *default_presentation;
189 
190 /*
191  * Options used for "Accept:" string
192  */
193     typedef enum {
194 	/* make the components powers of two so we can add them */
195 	mediaINT = 1		/* internal types predefined in HTInit.c */
196 	,mediaEXT = 2		/* external types predefined in HTInit.c */
197 	,mediaCFG = 4		/* types, e.g., viewers, from lynx.cfg */
198 	,mediaUSR = 8		/* user's mime-types, etc. */
199 	,mediaSYS = 16		/* system's mime-types, etc. */
200 	/* these are useful flavors for the options menu */
201 	,mediaOpt1 = mediaINT
202 	,mediaOpt2 = mediaINT + mediaCFG
203 	,mediaOpt3 = mediaINT + mediaCFG + mediaUSR
204 	,mediaOpt4 = mediaINT + mediaCFG + mediaUSR + mediaSYS
205 	/* this is the flavor from pre-2.8.6 */
206 	,mediaALL = mediaINT + mediaEXT + mediaCFG + mediaUSR + mediaSYS
207     } AcceptMedia;
208 
209 /*
210  * Options used for "Accept-Encoding:" string
211  */
212     typedef enum {
213 	encodingNONE = 0
214 	,encodingGZIP = 1
215 	,encodingDEFLATE = 2
216 	,encodingCOMPRESS = 4
217 	,encodingBZIP2 = 8
218 	,encodingALL = (encodingGZIP
219 			+ encodingDEFLATE
220 			+ encodingCOMPRESS
221 			+ encodingBZIP2)
222     } AcceptEncoding;
223 
224 /*
225 
226 HTSetPresentation: Register a system command to present a format
227 
228   ON ENTRY,
229 
230   rep                     is the MIME - style format name
231 
232   command                 is the MAILCAP - style command template
233 
234   testcommand             is the MAILCAP - style testcommand template
235 
236   quality                 A degradation faction 0..1.0
237 
238   secs                    A limit on the time user will wait (0.0 for infinity)
239   secs_per_byte
240 
241   maxbytes                A limit on the length acceptable as input (0 infinite)
242 
243   media                   Used in filtering presentation types for "Accept:"
244 
245  */
246     extern void HTSetPresentation(const char *representation,
247 				  const char *command,
248 				  const char *testcommand,
249 				  double quality,
250 				  double secs,
251 				  double secs_per_byte,
252 				  long int maxbytes,
253 				  AcceptMedia media
254     );
255 
256 /*
257 
258 HTSetConversion:   Register a converstion routine
259 
260   ON ENTRY,
261 
262   rep_in                  is the content-type input
263 
264   rep_out                 is the resulting content-type
265 
266   converter               is the routine to make the stream to do it
267 
268  */
269 
270     extern void HTSetConversion(const char *rep_in,
271 				const char *rep_out,
272 				HTConverter *converter,
273 				double quality,
274 				double secs,
275 				double secs_per_byte,
276 				long int maxbytes,
277 				AcceptMedia media
278     );
279 
280 /*
281 
282 HTStreamStack:   Create a stack of streams
283 
284    This is the routine which actually sets up the conversion.  It currently
285    checks only for direct conversions, but multi-stage conversions are forseen.
286    It takes a stream into which the output should be sent in the final format,
287    builds the conversion stack, and returns a stream into which the data in the
288    input format should be fed.  The anchor is passed because hypertxet objects
289    load information into the anchor object which represents them.
290 
291  */
292     extern HTStream *HTStreamStack(HTFormat format_in,
293 				   HTFormat format_out,
294 				   HTStream *stream_out,
295 				   HTParentAnchor *anchor);
296 
297 /*
298 HTReorderPresentation: put presentation near head of list
299 
300     Look up a presentation (exact match only) and, if found, reorder it to the
301     start of the HTPresentations list.  - kw
302     */
303 
304     extern void HTReorderPresentation(HTFormat format_in,
305 				      HTFormat format_out);
306 
307 /*
308  * Setup 'get_accept' flag to denote presentations that are not redundant,
309  * and will be listed in "Accept:" header.
310  */
311     extern void HTFilterPresentations(void);
312 
313 /*
314 
315 HTStackValue: Find the cost of a filter stack
316 
317    Must return the cost of the same stack which HTStreamStack would set up.
318 
319   ON ENTRY,
320 
321   format_in               The fomat of the data to be converted
322 
323   format_out              The format required
324 
325   initial_value           The intrinsic "value" of the data before conversion on a scale
326                          from 0 to 1
327 
328   length                  The number of bytes expected in the input format
329 
330  */
331     extern float HTStackValue(HTFormat format_in,
332 			      HTFormat rep_out,
333 			      double initial_value,
334 			      long int length);
335 
336 #define NO_VALUE_FOUND  -1e20	/* returned if none found */
337 
338 /*	Display the page while transfer in progress
339  *	-------------------------------------------
340  *
341  *   Repaint the page only when necessary.
342  *   This is a traverse call for HText_pageDispaly() - it works!.
343  *
344  */
345     extern void HTDisplayPartial(void);
346 
347     extern void HTFinishDisplayPartial(void);
348 
349 /*
350 
351 HTCopy:  Copy a socket to a stream
352 
353    This is used by the protocol engines to send data down a stream, typically
354    one which has been generated by HTStreamStack.
355 
356  */
357     extern int HTCopy(HTParentAnchor *anchor,
358 		      int file_number,
359 		      void *handle,
360 		      HTStream *sink);
361 
362 /*
363 
364 HTFileCopy:  Copy a file to a stream
365 
366    This is used by the protocol engines to send data down a stream, typically
367    one which has been generated by HTStreamStack.  It is currently called by
368    HTParseFile
369 
370  */
371     extern int HTFileCopy(FILE *fp,
372 			  HTStream *sink);
373 
374 #ifdef USE_SOURCE_CACHE
375 /*
376 
377 HTMemCopy:  Copy a memory chunk to a stream
378 
379    This is used by the protocol engines to send data down a stream, typically
380    one which has been generated by HTStreamStack.  It is currently called by
381    HTParseMem
382 
383  */
384     extern int HTMemCopy(HTChunk *chunk,
385 			 HTStream *sink);
386 #endif
387 
388 /*
389 
390 HTCopyNoCR: Copy a socket to a stream, stripping CR characters.
391 
392    It is slower than HTCopy .
393 
394  */
395 
396     extern void HTCopyNoCR(HTParentAnchor *anchor,
397 			   int file_number,
398 			   HTStream *sink);
399 
400 /*
401 
402 Clear input buffer and set file number
403 
404    This routine and the one below provide simple character input from sockets.
405    (They are left over from the older architecture and may not be used very
406    much.) The existence of a common routine and buffer saves memory space in
407    small implementations.
408 
409  */
410     extern void HTInitInput(int file_number);
411 
412 /*
413 
414 Get next character from buffer
415 
416  */
417     extern int interrupted_in_htgetcharacter;
418     extern int HTGetCharacter(void);
419 
420 /*
421 
422 HTParseSocket: Parse a socket given its format
423 
424    This routine is called by protocol modules to load an object.  uses
425    HTStreamStack and the copy routines above.  Returns HT_LOADED if successful,
426    <0 if not.
427 
428  */
429     extern int HTParseSocket(HTFormat format_in,
430 			     HTFormat format_out,
431 			     HTParentAnchor *anchor,
432 			     int file_number,
433 			     HTStream *sink);
434 
435 /*
436 
437 HTParseFile: Parse a File through a file pointer
438 
439    This routine is called by protocols modules to load an object.  uses
440    HTStreamStack and HTFileCopy.  Returns HT_LOADED if successful, can also
441    return HT_PARTIAL_CONTENT, HT_NO_DATA, or other <0 for failure.
442 
443  */
444     extern int HTParseFile(HTFormat format_in,
445 			   HTFormat format_out,
446 			   HTParentAnchor *anchor,
447 			   FILE *fp,
448 			   HTStream *sink);
449 
450 #ifdef USE_SOURCE_CACHE
451 /*
452 
453 HTParseMem: Parse a document in memory
454 
455    This routine is called by protocols modules to load an object.  uses
456    HTStreamStack and HTMemCopy.  Returns HT_LOADED if successful, can also
457    return <0 for failure.
458 
459  */
460     extern int HTParseMem(HTFormat format_in,
461 			  HTFormat format_out,
462 			  HTParentAnchor *anchor,
463 			  HTChunk *chunk,
464 			  HTStream *sink);
465 #endif
466 
467 #ifdef USE_ZLIB
468 /*
469 HTParseGzFile: Parse a gzip'ed File through a file pointer
470 
471    This routine is called by protocols modules to load an object.  uses
472    HTStreamStack and HTGzFileCopy.  Returns HT_LOADED if successful, can also
473    return HT_PARTIAL_CONTENT, HT_NO_DATA, or other <0 for failure.
474  */
475     extern int HTParseGzFile(HTFormat format_in,
476 			     HTFormat format_out,
477 			     HTParentAnchor *anchor,
478 			     gzFile gzfp,
479 			     HTStream *sink);
480 
481 /*
482 HTParseZzFile: Parse a deflate'd File through a file pointer
483 
484    This routine is called by protocols modules to load an object.  uses
485    HTStreamStack and HTZzFileCopy.  Returns HT_LOADED if successful, can also
486    return HT_PARTIAL_CONTENT, HT_NO_DATA, or other <0 for failure.
487  */
488     extern int HTParseZzFile(HTFormat format_in,
489 			     HTFormat format_out,
490 			     HTParentAnchor *anchor,
491 			     FILE *zzfp,
492 			     HTStream *sink);
493 
494 #endif				/* USE_ZLIB */
495 
496 #ifdef USE_BZLIB
497 /*
498 HTParseBzFile: Parse a bzip2'ed File through a file pointer
499 
500    This routine is called by protocols modules to load an object.  uses
501    HTStreamStack and HTGzFileCopy.  Returns HT_LOADED if successful, can also
502    return HT_PARTIAL_CONTENT, HT_NO_DATA, or other <0 for failure.
503  */
504     extern int HTParseBzFile(HTFormat format_in,
505 			     HTFormat format_out,
506 			     HTParentAnchor *anchor,
507 			     BZFILE * bzfp,
508 			     HTStream *sink);
509 
510 #endif				/* USE_BZLIB */
511 
512 /*
513 
514 HTNetToText: Convert Net ASCII to local representation
515 
516    This is a filter stream suitable for taking text from a socket and passing
517    it into a stream which expects text in the local C representation.  It does
518    ASCII and newline conversion.  As usual, pass its output stream to it when
519    creating it.
520 
521  */
522     extern HTStream *HTNetToText(HTStream *sink);
523 
524 /*
525 
526 HTFormatInit: Set up default presentations and conversions
527 
528    These are defined in HTInit.c or HTSInit.c if these have been replaced.  If
529    you don't call this routine, and you don't define any presentations, then
530    this routine will automatically be called the first time a conversion is
531    needed.  However, if you explicitly add some conversions (eg using
532    HTLoadRules) then you may want also to explicitly call this to get the
533    defaults as well.
534 
535  */
536     extern void HTFormatInit(void);
537 
538 /*
539 
540 Epilogue
541 
542  */
543     extern BOOL HTOutputSource;	/* Flag: shortcut parser */
544 
545 #ifdef __cplusplus
546 }
547 #endif
548 #endif				/* HTFORMAT_H */
549