1 /* 2 * $LynxId: HTFormat.h,v 1.33 2010/10/27 00:13:53 tom Exp $ 3 * 4 * HTFormat: The format manager in the WWW Library 5 * MANAGE DIFFERENT DOCUMENT FORMATS 6 * 7 * Here we describe the functions of the HTFormat module which handles conversion between 8 * different data representations. (In MIME parlance, a representation is known as a 9 * content-type. In WWW the term "format" is often used as it is shorter). 10 * 11 * This module is implemented by HTFormat.c. This hypertext document is used to generate 12 * the HTFormat.h include file. Part of the WWW library. 13 */ 14 #ifndef HTFORMAT_H 15 #define HTFORMAT_H 16 17 #include <HTStream.h> 18 #include <HTAtom.h> 19 #include <HTList.h> 20 #include <HTAnchor.h> 21 22 #ifdef USE_SOURCE_CACHE 23 #include <HTChunk.h> 24 #endif 25 26 #ifdef USE_BZLIB 27 #include <bzlib.h> 28 #endif 29 30 #ifdef USE_ZLIB 31 #include <zlib.h> 32 #endif 33 34 #ifdef __cplusplus 35 extern "C" { 36 #endif 37 /* 38 39 These macros (which used to be constants) define some basic internally 40 referenced representations. The www/xxx ones are of course not MIME 41 standard. 42 43 www/source is an output format which leaves the input untouched. It is 44 useful for diagnostics, and for users who want to see the original, whatever 45 it is. 46 47 */ 48 /* Internal ones */ 49 /* #define WWW_SOURCE HTAtom_for("www/source") */ 50 /* Whatever it was originally */ 51 extern HTAtom *WWW_SOURCE; 52 /* calculated once, heavy used */ 53 54 /* 55 56 www/present represents the user's perception of the document. If you 57 convert to www/present, you present the material to the user. 58 59 */ 60 #define WWW_PRESENT HTAtom_for("www/present") /* The user's perception */ 61 62 #define WWW_DEBUG HTAtom_for("www/debug") 63 /* 64 65 WWW_DEBUG represents the user's perception of debug information, for example 66 sent as a HTML document in a HTTP redirection message. 67 68 */ 69 70 /* 71 72 The message/rfc822 format means a MIME message or a plain text message with 73 no MIME header. This is what is returned by an HTTP server. 74 75 */ 76 #define WWW_MIME HTAtom_for("www/mime") /* A MIME message */ 77 78 /* 79 For parsing only the header. - kw 80 */ 81 #define WWW_MIME_HEAD HTAtom_for("message/x-rfc822-head") 82 83 /* 84 85 www/print is like www/present except it represents a printed copy. 86 87 */ 88 #define WWW_PRINT HTAtom_for("www/print") /* A printed copy */ 89 90 /* 91 92 www/unknown is a really unknown type. Some default action is appropriate. 93 94 */ 95 #define WWW_UNKNOWN HTAtom_for("www/unknown") 96 97 #ifdef DIRED_SUPPORT 98 /* 99 www/dired signals directory edit mode. 100 */ 101 #define WWW_DIRED HTAtom_for("www/dired") 102 #endif 103 104 /* 105 106 These are regular MIME types. HTML is assumed to be added by the W3 code. 107 application/octet-stream was mistakenly application/binary in earlier libwww 108 versions (pre 2.11). 109 110 */ 111 #define WWW_PLAINTEXT HTAtom_for("text/plain") 112 #define WWW_POSTSCRIPT HTAtom_for("application/postscript") 113 #define WWW_RICHTEXT HTAtom_for("application/rtf") 114 #define WWW_AUDIO HTAtom_for("audio/basic") 115 #define WWW_HTML HTAtom_for("text/html") 116 #define WWW_BINARY HTAtom_for("application/octet-stream") 117 118 typedef HTAtom *HTEncoding; 119 120 /* 121 * The following are values for the MIME types: 122 */ 123 #define WWW_ENC_7BIT HTAtom_for("7bit") 124 #define WWW_ENC_8BIT HTAtom_for("8bit") 125 #define WWW_ENC_BINARY HTAtom_for("binary") 126 127 /* 128 * We also add 129 */ 130 #define WWW_ENC_COMPRESS HTAtom_for("compress") 131 132 /* 133 * Does a string designate a real encoding, or is it just 134 * a "dummy" as for example 7bit, 8bit, and binary? 135 */ 136 #define IsUnityEncStr(senc) \ 137 ((senc)==NULL || *(senc)=='\0' || !strcmp(senc,"identity") ||\ 138 !strcmp(senc,"8bit") || !strcmp(senc,"binary") || !strcmp(senc,"7bit")) 139 140 #define IsUnityEnc(enc) \ 141 ((enc)==NULL || (enc)==HTAtom_for("identity") ||\ 142 (enc)==WWW_ENC_8BIT || (enc)==WWW_ENC_BINARY || (enc)==WWW_ENC_7BIT) 143 144 /* 145 146 The HTPresentation and HTConverter types 147 148 This HTPresentation structure represents a possible conversion algorithm 149 from one format to another. It includes a pointer to a conversion routine. 150 The conversion routine returns a stream to which data should be fed. See 151 also HTStreamStack which scans the list of registered converters and calls 152 one. See the initialisation module for a list of conversion routines. 153 154 */ 155 typedef struct _HTPresentation HTPresentation; 156 157 typedef HTStream *HTConverter (HTPresentation *pres, 158 HTParentAnchor *anchor, 159 HTStream *sink); 160 161 struct _HTPresentation { 162 HTAtom *rep; /* representation name atomized */ 163 HTAtom *rep_out; /* resulting representation */ 164 HTConverter *converter; /* routine to gen the stream stack */ 165 char *command; /* MIME-format command string */ 166 char *testcommand; /* MIME-format test string */ 167 float quality; /* Between 0 (bad) and 1 (good) */ 168 float secs; 169 float secs_per_byte; 170 off_t maxbytes; 171 BOOL get_accept; /* list in "Accept:" for GET */ 172 int accept_opt; /* matches against LYAcceptMedia */ 173 }; 174 175 /* 176 177 The list of presentations is kept by this module. It is also scanned by 178 modules which want to know the set of formats supported. for example. 179 180 */ 181 extern HTList *HTPresentations; 182 183 /* 184 185 The default presentation is used when no other is appropriate 186 187 */ 188 extern HTPresentation *default_presentation; 189 190 /* 191 * Options used for "Accept:" string 192 */ 193 typedef enum { 194 /* make the components powers of two so we can add them */ 195 mediaINT = 1 /* internal types predefined in HTInit.c */ 196 ,mediaEXT = 2 /* external types predefined in HTInit.c */ 197 ,mediaCFG = 4 /* types, e.g., viewers, from lynx.cfg */ 198 ,mediaUSR = 8 /* user's mime-types, etc. */ 199 ,mediaSYS = 16 /* system's mime-types, etc. */ 200 /* these are useful flavors for the options menu */ 201 ,mediaOpt1 = mediaINT 202 ,mediaOpt2 = mediaINT + mediaCFG 203 ,mediaOpt3 = mediaINT + mediaCFG + mediaUSR 204 ,mediaOpt4 = mediaINT + mediaCFG + mediaUSR + mediaSYS 205 /* this is the flavor from pre-2.8.6 */ 206 ,mediaALL = mediaINT + mediaEXT + mediaCFG + mediaUSR + mediaSYS 207 } AcceptMedia; 208 209 /* 210 * Options used for "Accept-Encoding:" string 211 */ 212 typedef enum { 213 encodingNONE = 0 214 ,encodingGZIP = 1 215 ,encodingDEFLATE = 2 216 ,encodingCOMPRESS = 4 217 ,encodingBZIP2 = 8 218 ,encodingALL = (encodingGZIP 219 + encodingDEFLATE 220 + encodingCOMPRESS 221 + encodingBZIP2) 222 } AcceptEncoding; 223 224 /* 225 226 HTSetPresentation: Register a system command to present a format 227 228 ON ENTRY, 229 230 rep is the MIME - style format name 231 232 command is the MAILCAP - style command template 233 234 testcommand is the MAILCAP - style testcommand template 235 236 quality A degradation faction 0..1.0 237 238 secs A limit on the time user will wait (0.0 for infinity) 239 secs_per_byte 240 241 maxbytes A limit on the length acceptable as input (0 infinite) 242 243 media Used in filtering presentation types for "Accept:" 244 245 */ 246 extern void HTSetPresentation(const char *representation, 247 const char *command, 248 const char *testcommand, 249 double quality, 250 double secs, 251 double secs_per_byte, 252 long int maxbytes, 253 AcceptMedia media 254 ); 255 256 /* 257 258 HTSetConversion: Register a converstion routine 259 260 ON ENTRY, 261 262 rep_in is the content-type input 263 264 rep_out is the resulting content-type 265 266 converter is the routine to make the stream to do it 267 268 */ 269 270 extern void HTSetConversion(const char *rep_in, 271 const char *rep_out, 272 HTConverter *converter, 273 double quality, 274 double secs, 275 double secs_per_byte, 276 long int maxbytes, 277 AcceptMedia media 278 ); 279 280 /* 281 282 HTStreamStack: Create a stack of streams 283 284 This is the routine which actually sets up the conversion. It currently 285 checks only for direct conversions, but multi-stage conversions are forseen. 286 It takes a stream into which the output should be sent in the final format, 287 builds the conversion stack, and returns a stream into which the data in the 288 input format should be fed. The anchor is passed because hypertxet objects 289 load information into the anchor object which represents them. 290 291 */ 292 extern HTStream *HTStreamStack(HTFormat format_in, 293 HTFormat format_out, 294 HTStream *stream_out, 295 HTParentAnchor *anchor); 296 297 /* 298 HTReorderPresentation: put presentation near head of list 299 300 Look up a presentation (exact match only) and, if found, reorder it to the 301 start of the HTPresentations list. - kw 302 */ 303 304 extern void HTReorderPresentation(HTFormat format_in, 305 HTFormat format_out); 306 307 /* 308 * Setup 'get_accept' flag to denote presentations that are not redundant, 309 * and will be listed in "Accept:" header. 310 */ 311 extern void HTFilterPresentations(void); 312 313 /* 314 315 HTStackValue: Find the cost of a filter stack 316 317 Must return the cost of the same stack which HTStreamStack would set up. 318 319 ON ENTRY, 320 321 format_in The fomat of the data to be converted 322 323 format_out The format required 324 325 initial_value The intrinsic "value" of the data before conversion on a scale 326 from 0 to 1 327 328 length The number of bytes expected in the input format 329 330 */ 331 extern float HTStackValue(HTFormat format_in, 332 HTFormat rep_out, 333 double initial_value, 334 long int length); 335 336 #define NO_VALUE_FOUND -1e20 /* returned if none found */ 337 338 /* Display the page while transfer in progress 339 * ------------------------------------------- 340 * 341 * Repaint the page only when necessary. 342 * This is a traverse call for HText_pageDispaly() - it works!. 343 * 344 */ 345 extern void HTDisplayPartial(void); 346 347 extern void HTFinishDisplayPartial(void); 348 349 /* 350 351 HTCopy: Copy a socket to a stream 352 353 This is used by the protocol engines to send data down a stream, typically 354 one which has been generated by HTStreamStack. 355 356 */ 357 extern int HTCopy(HTParentAnchor *anchor, 358 int file_number, 359 void *handle, 360 HTStream *sink); 361 362 /* 363 364 HTFileCopy: Copy a file to a stream 365 366 This is used by the protocol engines to send data down a stream, typically 367 one which has been generated by HTStreamStack. It is currently called by 368 HTParseFile 369 370 */ 371 extern int HTFileCopy(FILE *fp, 372 HTStream *sink); 373 374 #ifdef USE_SOURCE_CACHE 375 /* 376 377 HTMemCopy: Copy a memory chunk to a stream 378 379 This is used by the protocol engines to send data down a stream, typically 380 one which has been generated by HTStreamStack. It is currently called by 381 HTParseMem 382 383 */ 384 extern int HTMemCopy(HTChunk *chunk, 385 HTStream *sink); 386 #endif 387 388 /* 389 390 HTCopyNoCR: Copy a socket to a stream, stripping CR characters. 391 392 It is slower than HTCopy . 393 394 */ 395 396 extern void HTCopyNoCR(HTParentAnchor *anchor, 397 int file_number, 398 HTStream *sink); 399 400 /* 401 402 Clear input buffer and set file number 403 404 This routine and the one below provide simple character input from sockets. 405 (They are left over from the older architecture and may not be used very 406 much.) The existence of a common routine and buffer saves memory space in 407 small implementations. 408 409 */ 410 extern void HTInitInput(int file_number); 411 412 /* 413 414 Get next character from buffer 415 416 */ 417 extern int interrupted_in_htgetcharacter; 418 extern int HTGetCharacter(void); 419 420 /* 421 422 HTParseSocket: Parse a socket given its format 423 424 This routine is called by protocol modules to load an object. uses 425 HTStreamStack and the copy routines above. Returns HT_LOADED if successful, 426 <0 if not. 427 428 */ 429 extern int HTParseSocket(HTFormat format_in, 430 HTFormat format_out, 431 HTParentAnchor *anchor, 432 int file_number, 433 HTStream *sink); 434 435 /* 436 437 HTParseFile: Parse a File through a file pointer 438 439 This routine is called by protocols modules to load an object. uses 440 HTStreamStack and HTFileCopy. Returns HT_LOADED if successful, can also 441 return HT_PARTIAL_CONTENT, HT_NO_DATA, or other <0 for failure. 442 443 */ 444 extern int HTParseFile(HTFormat format_in, 445 HTFormat format_out, 446 HTParentAnchor *anchor, 447 FILE *fp, 448 HTStream *sink); 449 450 #ifdef USE_SOURCE_CACHE 451 /* 452 453 HTParseMem: Parse a document in memory 454 455 This routine is called by protocols modules to load an object. uses 456 HTStreamStack and HTMemCopy. Returns HT_LOADED if successful, can also 457 return <0 for failure. 458 459 */ 460 extern int HTParseMem(HTFormat format_in, 461 HTFormat format_out, 462 HTParentAnchor *anchor, 463 HTChunk *chunk, 464 HTStream *sink); 465 #endif 466 467 #ifdef USE_ZLIB 468 /* 469 HTParseGzFile: Parse a gzip'ed File through a file pointer 470 471 This routine is called by protocols modules to load an object. uses 472 HTStreamStack and HTGzFileCopy. Returns HT_LOADED if successful, can also 473 return HT_PARTIAL_CONTENT, HT_NO_DATA, or other <0 for failure. 474 */ 475 extern int HTParseGzFile(HTFormat format_in, 476 HTFormat format_out, 477 HTParentAnchor *anchor, 478 gzFile gzfp, 479 HTStream *sink); 480 481 /* 482 HTParseZzFile: Parse a deflate'd File through a file pointer 483 484 This routine is called by protocols modules to load an object. uses 485 HTStreamStack and HTZzFileCopy. Returns HT_LOADED if successful, can also 486 return HT_PARTIAL_CONTENT, HT_NO_DATA, or other <0 for failure. 487 */ 488 extern int HTParseZzFile(HTFormat format_in, 489 HTFormat format_out, 490 HTParentAnchor *anchor, 491 FILE *zzfp, 492 HTStream *sink); 493 494 #endif /* USE_ZLIB */ 495 496 #ifdef USE_BZLIB 497 /* 498 HTParseBzFile: Parse a bzip2'ed File through a file pointer 499 500 This routine is called by protocols modules to load an object. uses 501 HTStreamStack and HTGzFileCopy. Returns HT_LOADED if successful, can also 502 return HT_PARTIAL_CONTENT, HT_NO_DATA, or other <0 for failure. 503 */ 504 extern int HTParseBzFile(HTFormat format_in, 505 HTFormat format_out, 506 HTParentAnchor *anchor, 507 BZFILE * bzfp, 508 HTStream *sink); 509 510 #endif /* USE_BZLIB */ 511 512 /* 513 514 HTNetToText: Convert Net ASCII to local representation 515 516 This is a filter stream suitable for taking text from a socket and passing 517 it into a stream which expects text in the local C representation. It does 518 ASCII and newline conversion. As usual, pass its output stream to it when 519 creating it. 520 521 */ 522 extern HTStream *HTNetToText(HTStream *sink); 523 524 /* 525 526 HTFormatInit: Set up default presentations and conversions 527 528 These are defined in HTInit.c or HTSInit.c if these have been replaced. If 529 you don't call this routine, and you don't define any presentations, then 530 this routine will automatically be called the first time a conversion is 531 needed. However, if you explicitly add some conversions (eg using 532 HTLoadRules) then you may want also to explicitly call this to get the 533 defaults as well. 534 535 */ 536 extern void HTFormatInit(void); 537 538 /* 539 540 Epilogue 541 542 */ 543 extern BOOL HTOutputSource; /* Flag: shortcut parser */ 544 545 #ifdef __cplusplus 546 } 547 #endif 548 #endif /* HTFORMAT_H */ 549