1 /* 2 * $LynxId: HTML.h,v 1.33 2011/05/19 09:57:53 tom Exp $ 3 * 4 * HTML to rich text converter for libwww 5 * 6 * THE HTML TO RTF OBJECT CONVERTER 7 * 8 * This interprets the HTML semantics. 9 */ 10 #ifndef HTML_H 11 #define HTML_H 12 13 #ifndef HTUTILS_H 14 #include <HTUtils.h> 15 #endif /* HTUTILS_H */ 16 17 #include <UCDefs.h> 18 #include <UCAux.h> 19 #include <HTAnchor.h> 20 #include <HTMLDTD.h> 21 22 #ifdef __cplusplus 23 extern "C" { 24 #endif 25 /* #define ATTR_CS_IN (me->T.output_utf8 ? me->UCLYhndl : 0) */ 26 #define ATTR_CS_IN me->tag_charset 27 #define TRANSLATE_AND_UNESCAPE_ENTITIES(s, p, h) \ 28 LYUCTranslateHTMLString(s, ATTR_CS_IN, current_char_set, YES, p, h, st_HTML) 29 #define TRANSLATE_AND_UNESCAPE_ENTITIES5(s,cs_from,cs_to,p,h) \ 30 LYUCTranslateHTMLString(s, cs_from, cs_to, YES, p, h, st_HTML) 31 #define TRANSLATE_AND_UNESCAPE_ENTITIES6(s,cs_from,cs_to,spcls,p,h) \ 32 LYUCTranslateHTMLString(s, cs_from, cs_to, spcls, p, h, st_HTML) 33 #define TRANSLATE_HTML(s,p,h) \ 34 LYUCFullyTranslateString(s, me->UCLYhndl, current_char_set, NO, YES, p, h, NO, st_HTML) 35 #define TRANSLATE_HTML5(s,cs_from,cs_to,p,h) \ 36 LYUCFullyTranslateString(s, cs_from, cs_to, NO, YES, p, h, NO, st_HTML) 37 #define TRANSLATE_HTML7(s,cs_from,cs_to,spcls,p,h,Back) \ 38 LYUCFullyTranslateString(s, cs_from, cs_to, NO, spcls, p, h, Back, st_HTML) 39 /* 40 * Strings from attributes which should be converted to some kind of "standard" 41 * representation (character encoding), was Latin-1, esp. URLs (incl. 42 * #fragments) and HTML NAME and ID stuff. 43 */ 44 #define TRANSLATE_AND_UNESCAPE_TO_STD(s) \ 45 LYUCTranslateHTMLString(s, ATTR_CS_IN, ATTR_CS_IN, NO, NO, YES, st_URL) 46 #define UNESCAPE_FIELDNAME_TO_STD(s) \ 47 LYUCTranslateHTMLString(s, ATTR_CS_IN, ATTR_CS_IN, NO, NO, YES, st_HTML) 48 extern const HTStructuredClass HTMLPresentation; 49 50 #ifdef Lynx_HTML_Handler 51 /* 52 * This section is semi-private to HTML.c and its helper modules. - FM 53 * ------------------------------------------------------------------- 54 */ 55 56 typedef struct _stack_element { 57 HTStyle *style; 58 int tag_number; 59 } stack_element; 60 61 /* HTML Object 62 * ----------- 63 */ 64 #define MAX_NESTING 800 /* Should be checked by parser */ 65 66 struct _HTStructured { 67 const HTStructuredClass *isa; 68 HTParentAnchor *node_anchor; 69 HText *text; 70 71 HTStream *target; /* Output stream */ 72 HTStreamClass targetClass; /* Output routines */ 73 74 HTChildAnchor *CurrentA; /* current HTML_A anchor */ 75 int CurrentANum; /* current HTML_A number */ 76 char *base_href; /* current HTML_BASE href */ 77 char *map_address; /* current HTML_MAP address */ 78 79 HTChunk title; /* Grow by 128 */ 80 HTChunk object; /* Grow by 128 */ 81 BOOL object_started; 82 BOOL object_declare; 83 BOOL object_shapes; 84 BOOL object_ismap; 85 char *object_usemap; 86 char *object_id; 87 char *object_title; 88 char *object_data; 89 char *object_type; 90 char *object_classid; 91 char *object_codebase; 92 char *object_codetype; 93 char *object_name; 94 int objects_mixed_open, objects_figged_open; 95 HTChunk option; /* Grow by 128 */ 96 BOOL first_option; /* First OPTION in SELECT? */ 97 char *LastOptionValue; 98 BOOL LastOptionChecked; 99 BOOL select_disabled; 100 HTChunk textarea; /* Grow by 128 */ 101 char *textarea_name; 102 int textarea_name_cs; 103 char *textarea_accept_cs; 104 int textarea_cols; 105 int textarea_rows; 106 int textarea_disabled; 107 int textarea_readonly; 108 char *textarea_id; 109 HTChunk math; /* Grow by 128 */ 110 HTChunk style_block; /* Grow by 128 */ 111 HTChunk script; /* Grow by 128 */ 112 113 /* 114 * Used for nested lists. - FM 115 */ 116 int List_Nesting_Level; /* counter for list nesting level */ 117 int OL_Counter[12]; /* counter for ordered lists */ 118 char OL_Type[12]; /* types for ordered lists */ 119 int Last_OL_Count; /* last count in ordered lists */ 120 char Last_OL_Type; /* last type in ordered lists */ 121 122 int Division_Level; 123 short DivisionAlignments[MAX_NESTING]; 124 int Underline_Level; 125 int Quote_Level; 126 127 BOOL UsePlainSpace; 128 BOOL HiddenValue; 129 int lastraw; 130 131 const char *comment_start; /* for literate programming */ 132 const char *comment_end; 133 134 HTTag *current_tag; 135 BOOL style_change; 136 HTStyle *new_style; 137 HTStyle *old_style; 138 int current_default_alignment; 139 BOOL in_word; /* Have just had a non-white char */ 140 stack_element stack[MAX_NESTING]; 141 stack_element *sp; /* Style stack pointer */ 142 BOOL stack_overrun; /* Was MAX_NESTING exceeded? */ 143 int skip_stack; /* flag to skip next style stack operation */ 144 145 /* 146 * Track if we are in an anchor, paragraph, address, base, etc. 147 */ 148 BOOL inA; 149 BOOL inAPPLET; 150 BOOL inAPPLETwithP; 151 BOOL inBadBASE; 152 BOOL inBadHREF; 153 BOOL inBadHTML; 154 BOOL inBASE; 155 BOOL inBoldA; 156 BOOL inBoldH; 157 BOOL inCAPTION; 158 BOOL inCREDIT; 159 BOOL inFIG; 160 BOOL inFIGwithP; 161 BOOL inFONT; 162 BOOL inFORM; 163 BOOL inLABEL; 164 BOOL inP; 165 BOOL inPRE; 166 BOOL inSELECT; 167 BOOL inTABLE; 168 BOOL inTEXTAREA; 169 BOOL inUnderline; 170 171 BOOL needBoldH; 172 173 char *xinclude; /* if no include strin address passed */ 174 /* 175 * UCI and UCLYhndl give the UCInfo and charset registered for the HTML 176 * parser in the node_anchor's UCStages structure. It indicates what is 177 * fed to the HTML parser as the stream of character data (not necessarily 178 * tags and attributes). It should currently always be set to be the same 179 * as UCI and UCLhndl for the HTEXT stage in the node_anchor's UCStages 180 * structure, since the HTML parser sends its input character data to the 181 * output without further charset translation. 182 */ 183 LYUCcharset *UCI; 184 int UCLYhndl; 185 /* 186 * inUCI and inUCLYhndl indicate the UCInfo and charset which the HTML 187 * parser treats at the input charset. It is normally set to the UCI and 188 * UCLhndl for the SGML parser in the node_anchor's UCStages structure 189 * (which may be a dummy, based on the MIME parser's UCI and UCLhndl in 190 * that structure, when we are handling a local file or non-http(s) 191 * gateway). It could be changed temporarily by the HTML parser, for 192 * conversions of attribute strings, but should be reset once done. - FM 193 */ 194 LYUCcharset *inUCI; 195 int inUCLYhndl; 196 /* 197 * outUCI and outUCLYhndl indicate the UCInfo and charset which the HTML 198 * parser treats as the output charset. It is normally set to its own UCI 199 * and UCLhndl. It could be changed for conversions of attribute strings, 200 * but should be reset once done. - FM 201 */ 202 LYUCcharset *outUCI; 203 int outUCLYhndl; 204 /* 205 * T holds the transformation rules for conversions of strings between the 206 * input and output charsets by the HTML parser. - FM 207 */ 208 UCTransParams T; 209 210 int tag_charset; /* charset for attribute values etc. */ 211 }; 212 213 extern HTStyle *LYstyles(int style_number); 214 extern BOOL LYBadHTML(HTStructured * me); 215 extern void LYShowBadHTML(const char *s); 216 217 /* 218 * Semi-Private functions. - FM 219 */ 220 extern void HTML_put_character(HTStructured * me, int c); 221 extern void HTML_put_string(HTStructured * me, const char *s); 222 extern void HTML_write(HTStructured * me, const char *s, int l); 223 extern int HTML_put_entity(HTStructured * me, int entity_number); 224 extern void actually_set_style(HTStructured * me); 225 226 /* Style buffering avoids dummy paragraph begin/ends. 227 */ 228 #define UPDATE_STYLE if (me->style_change) { actually_set_style(me); } 229 #endif /* Lynx_HTML_Handler */ 230 231 extern void strtolower(char *i); 232 233 /* P U B L I C 234 */ 235 236 /* 237 * HTConverter to present HTML 238 */ 239 extern HTStream *HTMLToPlain(HTPresentation *pres, 240 HTParentAnchor *anchor, 241 HTStream *sink); 242 243 extern HTStream *HTMLParsedPresent(HTPresentation *pres, 244 HTParentAnchor *anchor, 245 HTStream *sink); 246 247 extern HTStream *HTMLToC(HTPresentation *pres, 248 HTParentAnchor *anchor, 249 HTStream *sink); 250 251 extern HTStream *HTMLPresent(HTPresentation *pres, 252 HTParentAnchor *anchor, 253 HTStream *sink); 254 255 extern HTStructured *HTML_new(HTParentAnchor *anchor, 256 HTFormat format_out, 257 HTStream *target); 258 259 /* 260 * Record error message as a hypertext object. 261 * 262 * The error message should be marked as an error so that it can be reloaded 263 * later. This implementation just throws up an error message and leaves the 264 * document unloaded. 265 * 266 * On entry, 267 * sink is a stream to the output device if any 268 * number is the HTTP error number 269 * message is the human readable message. 270 * On exit, 271 * a return code like HT_LOADED if object exists else 60; 0 272 */ 273 extern int HTLoadError(HTStream *sink, 274 int number, 275 const char *message); 276 277 #ifdef __cplusplus 278 } 279 #endif 280 #endif /* HTML_H */ 281