1 /*
2 * $LynxId: HTPlain.c,v 1.51 2013/05/02 11:09:30 tom Exp $
3 *
4 * Plain text object HTWrite.c
5 * =================
6 *
7 * This version of the stream object just writes to a socket.
8 * The socket is assumed open and left open.
9 *
10 * Bugs:
11 * strings written must be less than buffer size.
12 */
13
14 #define HTSTREAM_INTERNAL 1
15
16 #include <HTUtils.h>
17 #include <LYCharVals.h> /* S/390 -- gil -- 0288 */
18
19 #include <HTPlain.h>
20
21 #include <HTChunk.h>
22 #include <HText.h>
23 #include <HTStyle.h>
24 #define Lynx_HTML_Handler
25 #include <HTML.h> /* styles[] */
26
27 #define BUFFER_SIZE 4096; /* Tradeoff */
28
29 #include <HTMLDTD.h>
30 #include <HTCJK.h>
31 #include <UCMap.h>
32 #include <UCDefs.h>
33 #include <UCAux.h>
34
35 #include <LYCharSets.h>
36 #include <LYStrings.h>
37 #include <LYLeaks.h>
38
39 static int HTPlain_lastraw = -1;
40 static int HTPlain_bs_pending = 0; /* 1:bs 2:underline 3:underline+bs - kw */
41
42 /* HTML Object
43 * -----------
44 */
45 struct _HTStream {
46 const HTStreamClass *isa;
47 HText *text;
48 /*
49 * The node_anchor UCInfo and handle for the input (PARSER) stage. - FM
50 */
51 LYUCcharset *inUCI;
52 int inUCLYhndl;
53 /*
54 * The node_anchor UCInfo and handle for the output (HTEXT) stage. - FM
55 */
56 LYUCcharset *outUCI;
57 int outUCLYhndl;
58 /*
59 * Counter, value, buffer and pointer for UTF-8 handling. - FM
60 */
61 char utf_count;
62 UCode_t utf_char;
63 char utf_buf[8];
64 char *utf_buf_p;
65 /*
66 * The charset transformation structure. - FM
67 */
68 UCTransParams T;
69 };
70
71 static char replace_buf[64]; /* buffer for replacement strings */
72
HTPlain_getChartransInfo(HTStream * me,HTParentAnchor * anchor)73 static void HTPlain_getChartransInfo(HTStream *me, HTParentAnchor *anchor)
74 {
75 if (me->inUCLYhndl < 0) {
76 HTAnchor_copyUCInfoStage(anchor, UCT_STAGE_PARSER, UCT_STAGE_MIME,
77 UCT_SETBY_PARSER);
78 me->inUCLYhndl = HTAnchor_getUCLYhndl(anchor, UCT_STAGE_PARSER);
79 }
80 if (me->outUCLYhndl < 0) {
81 int chndl = HTAnchor_getUCLYhndl(anchor, UCT_STAGE_HTEXT);
82
83 if (chndl < 0) {
84 chndl = current_char_set;
85 HTAnchor_setUCInfoStage(anchor, chndl,
86 UCT_STAGE_HTEXT, UCT_SETBY_DEFAULT);
87 }
88 HTAnchor_setUCInfoStage(anchor, chndl,
89 UCT_STAGE_HTEXT, UCT_SETBY_DEFAULT);
90 me->outUCLYhndl = HTAnchor_getUCLYhndl(anchor, UCT_STAGE_HTEXT);
91 }
92 me->inUCI = HTAnchor_getUCInfoStage(anchor, UCT_STAGE_PARSER);
93 me->outUCI = HTAnchor_getUCInfoStage(anchor, UCT_STAGE_HTEXT);
94 }
95
96 /* Write the buffer out to the socket
97 * ----------------------------------
98 */
99
100 /*_________________________________________________________________________
101 *
102 * A C T I O N R O U T I N E S
103 */
104
105 static void HTPlain_write(HTStream *me, const char *s,
106 int l);
107
108 /* Character handling
109 * ------------------
110 */
HTPlain_put_character(HTStream * me,int c)111 static void HTPlain_put_character(HTStream *me, int c)
112 {
113 #ifdef REMOVE_CR_ONLY
114 /*
115 * Throw away \rs.
116 */
117 if (c != '\r') {
118 HText_appendCharacter(me->text, c);
119 }
120 #else
121 /*
122 * See HTPlain_write() for explanations of the following code (we've been
123 * called via HTPlain_put_string() to do for each character of a terminated
124 * string what HTPlain_write() does via a while loop for each character in
125 * a stream of given length). - FM
126 */
127 if ((HTPlain_lastraw == '\r') && c == '\n') {
128 HTPlain_lastraw = -1;
129 return;
130 }
131 if (c == '\b' || c == '_' || HTPlain_bs_pending) {
132 char temp[1];
133
134 temp[0] = (char) c;
135 HTPlain_write(me, temp, 1);
136 return;
137 }
138 HTPlain_lastraw = UCH(c);
139 if (c == '\r') {
140 HText_appendCharacter(me->text, '\n');
141 } else if (TOASCII(UCH(c)) >= 127) { /* S/390 -- gil -- 0305 */
142 char temp[1];
143
144 temp[0] = (char) c;
145 /*
146 * For now, don't repeat everything here that has been done below - KW
147 */
148 HTPlain_write(me, temp, 1);
149 } else if (IS_CJK_TTY) {
150 HText_appendCharacter(me->text, c);
151 } else if (TOASCII(UCH(c)) >= 127 && TOASCII(UCH(c)) < 161 &&
152 HTPassHighCtrlRaw) {
153 HText_appendCharacter(me->text, c);
154 #if CH_NBSP < 127
155 } else if (UCH(c) == CH_NBSP) { /* S/390 -- gil -- 0341 */
156 HText_appendCharacter(me->text, ' ');
157 #endif
158 #if CH_SHY < 127
159 } else if (UCH(c) == CH_SHY) {
160 return;
161 #endif
162 } else if ((UCH(c) >= ' ' && TOASCII(UCH(c)) < 127) ||
163 c == '\n' || c == '\t') {
164 HText_appendCharacter(me->text, c);
165 }
166 #endif /* REMOVE_CR_ONLY */
167 }
168
169 /* String handling
170 * ---------------
171 *
172 */
HTPlain_put_string(HTStream * me,const char * s)173 static void HTPlain_put_string(HTStream *me, const char *s)
174 {
175 #ifdef REMOVE_CR_ONLY
176 HText_appendText(me->text, s);
177 #else
178 const char *p;
179
180 if (s == NULL)
181 return;
182 for (p = s; *p; p++) {
183 HTPlain_put_character(me, *p);
184 }
185 #endif /* REMOVE_CR_ONLY */
186 }
187
188 /*
189 * Entry function for displayed text/plain and WWW_SOURCE strings. - FM
190 * ---------------------------------------------------------------
191 */
HTPlain_write(HTStream * me,const char * s,int l)192 static void HTPlain_write(HTStream *me, const char *s, int l)
193 {
194 const char *p;
195 const char *e = s + l;
196 char c;
197 unsigned c_unsign;
198 BOOL chk;
199 UCode_t code, uck = -1;
200 char saved_char_in = '\0';
201
202 for (p = s; p < e; p++) {
203 #ifdef REMOVE_CR_ONLY
204 /*
205 * Append the whole string, but remove any \rs. - FM
206 */
207 if (*p != '\r') {
208 HText_appendCharacter(me->text, *p);
209 }
210 #else
211 if (*p == '\b') {
212 if (HTPlain_lastraw >= UCH(' ') &&
213 HTPlain_lastraw != '\r' && HTPlain_lastraw != '\n') {
214 if (!HTPlain_bs_pending) {
215 HTPlain_bs_pending = 1;
216 continue;
217 } else if (HTPlain_bs_pending == 2) {
218 HTPlain_bs_pending = 3;
219 continue;
220 }
221 }
222 if (HTPlain_bs_pending >= 2)
223 HText_appendCharacter(me->text, '_');
224 HTPlain_bs_pending = 0;
225 } else if (*p == '_') {
226 if (!HTPlain_bs_pending) {
227 HTPlain_bs_pending = 2;
228 HTPlain_lastraw = UCH(*p);
229 continue;
230 #if 0
231 } else if (HTPlain_bs_pending != 2) {
232 HTPlain_bs_pending--; /* 1 -> 0, 3 -> 2 */
233 HTPlain_lastraw = UCH(*p);
234 continue;
235 #endif
236 }
237 }
238
239 /*
240 * Try to handle lone LFs, CRLFs and lone CRs as newline, and to deal
241 * with control, ASCII, and 8-bit characters based on best guesses of
242 * what's appropriate. - FM
243 */
244 if ((HTPlain_lastraw == '\r') && *p == '\n') {
245 HTPlain_lastraw = -1;
246 continue;
247 }
248
249 if (HTPlain_bs_pending &&
250 !(UCH(*p) >= ' ' && *p != '\r' && *p != '\n' &&
251 (HTPlain_lastraw == UCH(*p) ||
252 HTPlain_lastraw == UCH('_') ||
253 *p == '_'))) {
254 if (HTPlain_bs_pending >= 2)
255 HText_appendCharacter(me->text, '_');
256 HTPlain_bs_pending = 0;
257 } else if (HTPlain_bs_pending == 1) {
258 HTPlain_bs_pending = 0;
259 continue; /* ignore last two of "X\bX" or "X\b_" - kw */
260 } else if (HTPlain_bs_pending == 3) {
261 if (*p == '_') {
262 HTPlain_bs_pending = 2;
263 continue; /* ignore last two of "_\b_" - kw */
264 } else {
265 HTPlain_bs_pending = 0;
266 /* ignore first two of "_\bX" - kw */
267 }
268 } else if (HTPlain_bs_pending == 2) {
269 HText_appendCharacter(me->text, '_');
270 if (*p == '_')
271 continue; /* keep second of "__" pending - kw */
272 HTPlain_bs_pending = 0;
273 } else {
274 HTPlain_bs_pending = 0;
275 }
276 HTPlain_lastraw = UCH(*p);
277 if (*p == '\r') {
278 HText_appendCharacter(me->text, '\n');
279 continue;
280 }
281 /*
282 * Make sure the character is handled as Unicode whenever that's
283 * appropriate. - FM
284 */
285 c = *p;
286 c_unsign = UCH(c);
287 code = (UCode_t) c_unsign;
288 saved_char_in = '\0';
289 /*
290 * Combine any UTF-8 multibytes into Unicode to check for special
291 * characters. - FM
292 */
293 if (me->T.decode_utf8) {
294 /*
295 * Combine UTF-8 into Unicode. Incomplete characters silently
296 * ignored. from Linux kernel's console.c - KW
297 */
298 if (TOASCII(c_unsign) > 127) { /* S/390 -- gil -- 0371 */
299 /*
300 * We have an octet from a multibyte character. - FM
301 */
302 if (me->utf_count > 0 && (c & 0xc0) == 0x80) {
303 /*
304 * Adjust the UCode_t value, add the octet to the buffer,
305 * and decrement the byte count. - FM
306 */
307 me->utf_char = (me->utf_char << 6) | (c & 0x3f);
308 me->utf_count--;
309 *(me->utf_buf_p) = c;
310 (me->utf_buf_p)++;
311 if (me->utf_count == 0) {
312 /*
313 * Got a complete multibyte character.
314 */
315 *(me->utf_buf_p) = '\0';
316 code = me->utf_char;
317 if (code > 0 && code < 256) {
318 c = FROMASCII((char) code);
319 c_unsign = UCH(c);
320 }
321 } else {
322 /*
323 * Get the next byte. - FM
324 */
325 continue;
326 }
327 } else {
328 /*
329 * Start handling a new multibyte character. - FM
330 */
331 me->utf_buf_p[0] = c;
332 me->utf_buf_p = &me->utf_buf[1];
333 if ((*p & 0xe0) == 0xc0) {
334 me->utf_count = 1;
335 me->utf_char = (c & 0x1f);
336 } else if ((*p & 0xf0) == 0xe0) {
337 me->utf_count = 2;
338 me->utf_char = (c & 0x0f);
339 } else if ((*p & 0xf8) == 0xf0) {
340 me->utf_count = 3;
341 me->utf_char = (c & 0x07);
342 } else if ((*p & 0xfc) == 0xf8) {
343 me->utf_count = 4;
344 me->utf_char = (c & 0x03);
345 } else if ((*p & 0xfe) == 0xfc) {
346 me->utf_count = 5;
347 me->utf_char = (c & 0x01);
348 } else {
349 /*
350 * We got garbage, so ignore it. - FM
351 */
352 me->utf_count = 0;
353 me->utf_buf_p[0] = '\0';
354 me->utf_buf_p = me->utf_buf;
355 }
356 /*
357 * Get the next byte. - FM
358 */
359 continue;
360 }
361 } else if (me->utf_count > 0) {
362 /*
363 * Got an ASCII character when expecting UTF-8 multibytes, so
364 * ignore the buffered multibye characters and fall through
365 * with the current ASCII character. - FM
366 */
367 me->utf_count = 0;
368 me->utf_buf[0] = '\0';
369 me->utf_buf_p = me->utf_buf;
370 code = (UCode_t) c_unsign;
371 } else {
372 /*
373 * Got a valid ASCII character, so fall through with it. - FM
374 */
375 code = (UCode_t) c_unsign;
376 }
377 }
378 /*
379 * Convert characters from non-UTF-8 charsets to Unicode (if
380 * appropriate). - FM
381 */
382 if (!(me->T.decode_utf8 &&
383 UCH(*p) > 127)) {
384 #ifdef NOTDEFINED
385 if (me->T.strip_raw_char_in)
386 saved_char_in = c;
387 #endif /* NOTDEFINED */
388 if (me->T.trans_to_uni &&
389 (TOASCII(code) >= LYlowest_eightbit[me->inUCLYhndl] || /* S/390 -- gil -- 0389 */
390 (code < ' ' && code != 0 &&
391 me->T.trans_C0_to_uni))) {
392 /*
393 * Convert the octet to Unicode. - FM
394 */
395 code = (UCode_t) UCTransToUni(c, me->inUCLYhndl);
396 if (code > 0) {
397 saved_char_in = c;
398 if (code < 256) {
399 c = FROMASCII((char) code);
400 c_unsign = UCH(c);
401 }
402 }
403 } else if (code < 32 && code != 0 &&
404 me->T.trans_C0_to_uni) {
405 /*
406 * Quote from SGML.c:
407 * "This else if may be too ugly to keep. - KW"
408 */
409 if (me->T.trans_from_uni &&
410 (((code = UCTransToUni(c, me->inUCLYhndl)) >= 32) ||
411 (me->T.transp &&
412 (code = UCTransToUni(c, me->inUCLYhndl)) > 0))) {
413 saved_char_in = c;
414 if (code < 256) {
415 c = FROMASCII((char) code);
416 c_unsign = UCH(c);
417 }
418 } else {
419 uck = -1;
420 if (me->T.transp) {
421 uck = UCTransCharStr(replace_buf, 60, c,
422 me->inUCLYhndl,
423 me->inUCLYhndl, NO);
424 }
425 if (!me->T.transp || uck < 0) {
426 uck = UCTransCharStr(replace_buf, 60, c,
427 me->inUCLYhndl,
428 me->outUCLYhndl, YES);
429 }
430 if (uck == 0) {
431 continue;
432 } else if (uck < 0) {
433 me->utf_buf[0] = '\0';
434 } else {
435 c = replace_buf[0];
436 if (c && replace_buf[1]) {
437 HText_appendText(me->text, replace_buf);
438 continue;
439 }
440 }
441 me->utf_buf[0] = '\0';
442 code = UCH(c);
443 } /* Next line end of ugly stuff for C0. - KW */
444 } else {
445 me->utf_buf[0] = '\0';
446 code = UCH(c);
447 }
448 }
449 /*
450 * At this point we have either code in Unicode (and c in latin1 if
451 * code is in the latin1 range), or code and c will have to be passed
452 * raw.
453 */
454
455 /*
456 * If CJK mode is on, we'll assume the document matches the user's
457 * display character set, and if not, the user should toggle off
458 * raw/CJK mode to reload. - FM
459 */
460 if (IS_CJK_TTY) {
461 HText_appendCharacter(me->text, c);
462
463 #define PASSHICTRL (me->T.transp || \
464 code >= LYlowest_eightbit[me->inUCLYhndl])
465 #define PASS8859SPECL me->T.pass_160_173_raw
466 #define PASSHI8BIT (HTPassEightBitRaw || \
467 (me->T.do_8bitraw && !me->T.trans_from_uni))
468 /*
469 * If HTPassHighCtrlRaw is set (e.g., for KOI8-R) assume the
470 * document matches and pass 127-160 8-bit characters. If it
471 * doesn't match, the user should toggle raw/CJK mode off. - FM
472 */
473 } else if (TOASCII(code) >= 127 && TOASCII(code) < 161 && /* S/390 -- gil -- 0427 */
474 PASSHICTRL && PASS8859SPECL) {
475 HText_appendCharacter(me->text, c);
476 } else if (code == CH_SHY && PASS8859SPECL) {
477 HText_appendCharacter(me->text, c);
478 /*
479 * If neither HTPassHighCtrlRaw nor CJK is set, play it safe and
480 * treat 160 (nbsp) as an ASCII space (32). - FM
481 */
482 } else if (code == CH_NBSP) {
483 HText_appendCharacter(me->text, ' ');
484 /*
485 * If neither HTPassHighCtrlRaw nor CJK is set, play it safe and
486 * ignore 173 (shy). - FM
487 * Now only ignore it for color style, which doesn't handle it
488 * anyway. Otherwise pass it on as LY_SOFT_HYPHEN and let HText
489 * deal with it. It should be either ignored, or displayed as a
490 * hyphen if it was indeed at the end of a line. Well it should.
491 * - kw
492 */
493 } else if (code == CH_SHY) {
494 #ifndef USE_COLOR_STYLE
495 HText_appendCharacter(me->text, LY_SOFT_HYPHEN);
496 #endif
497 continue;
498 /*
499 * If we get to here, pass the displayable ASCII characters. - FM
500 */
501 } else if ((code >= ' ' && TOASCII(code) < 127) ||
502 (PASSHI8BIT &&
503 c >= LYlowest_eightbit[me->outUCLYhndl]) ||
504 *p == '\n' || *p == '\t') {
505 HText_appendCharacter(me->text, c);
506 /*
507 * Use an ASCII space (32) for ensp, emsp or thinsp. - FM
508 */
509 } else if (code == 8194 || code == 8195 || code == 8201) {
510 HText_appendCharacter(me->text, ' ');
511 /*
512 * If we want the raw character, pass it now. - FM
513 */
514 } else if (me->T.use_raw_char_in && saved_char_in) {
515 HText_appendCharacter(me->text, saved_char_in);
516 /******************************************************************
517 * I. LATIN-1 OR UCS2 TO DISPLAY CHARSET
518 ******************************************************************/
519 } else if ((chk = (BOOL) (me->T.trans_from_uni && code >= 160)) &&
520 (uck = UCTransUniChar(code,
521 me->outUCLYhndl)) >= ' ' && /* S/390 -- gil -- 0464 */
522 uck < 256) {
523 CTRACE((tfp, "UCTransUniChar returned 0x%.2" PRI_UCode_t
524 ":'%c'.\n",
525 uck, FROMASCII(UCH(uck))));
526 HText_appendCharacter(me->text, ((char) (uck & 0xff)));
527 } else if (chk &&
528 (uck == -4 ||
529 (me->T.repl_translated_C0 && uck > 0 && uck < ' ')) && /* S/390 -- gil -- 0481 */
530 /*
531 * Not found; look for replacement string.
532 */
533 (uck = UCTransUniCharStr(replace_buf, 60, code,
534 me->outUCLYhndl, 0) >= 0)) {
535 /*
536 * No further tests for valididy - assume that whoever defined
537 * replacement strings knew what she was doing.
538 */
539 HText_appendText(me->text, replace_buf);
540 /*
541 * If we get to here, and should have translated, translation has
542 * failed so far.
543 */
544 } else if (chk && TOASCII(code) > 127 && me->T.output_utf8) { /* S/390 -- gil -- 0498 */
545 /*
546 * We want UTF-8 output, so do it now. - FM
547 */
548 if (*me->utf_buf) {
549 HText_appendText(me->text, me->utf_buf);
550 me->utf_buf[0] = '\0';
551 me->utf_buf_p = me->utf_buf;
552 } else if (UCConvertUniToUtf8(code, replace_buf)) {
553 HText_appendText(me->text, replace_buf);
554 } else {
555 /*
556 * Out of luck, so use the UHHH notation (ugh). - gil
557 */
558 /* S/390 -- gil -- 0517 */
559 sprintf(replace_buf, "U%.2lX", (unsigned long) TOASCII(code));
560 HText_appendText(me->text, replace_buf);
561 }
562 #ifdef NOTDEFINED
563 } else if (me->T.strip_raw_char_in &&
564 UCH(*p) >= 192 &&
565 UCH(*p) < 255) {
566 /*
567 * KOI special: strip high bit, gives (somewhat) readable ASCII.
568 */
569 HText_appendCharacter(me->text, (char) (*p & 0x7f));
570 #endif /* NOTDEFINED */
571 /*
572 * If we don't actually want the character, make it safe and output
573 * that now. - FM
574 */
575 } else if ((c_unsign > 0 &&
576 (int) c_unsign < LYlowest_eightbit[me->outUCLYhndl]) ||
577 (me->T.trans_from_uni && !HTPassEightBitRaw)) {
578 /*
579 * If we do not have the "7-bit approximations" as our output
580 * character set (in which case we did it already) seek a
581 * translation for that. Otherwise, or if the translation fails,
582 * use UHHH notation. - FM
583 */
584 if ((chk = (BOOL) (me->outUCLYhndl !=
585 UCGetLYhndl_byMIME("us-ascii"))) &&
586 (uck = UCTransUniChar(code,
587 UCGetLYhndl_byMIME("us-ascii")))
588 >= ' ' && TOASCII(uck) < 127) { /* S/390 -- gil -- 0535 */
589 /*
590 * Got an ASCII character (yippey). - FM
591 */
592 c = FROMASCII((char) uck);
593 HText_appendCharacter(me->text, c);
594 } else if ((chk && uck == -4) &&
595 (uck = UCTransUniCharStr(replace_buf,
596 60, code,
597 UCGetLYhndl_byMIME("us-ascii"),
598 0) >= 0)) {
599 /*
600 * Got a repacement string (yippey). - FM
601 */
602 HText_appendText(me->text, replace_buf);
603 } else if (code == 8204 || code == 8205) {
604 /*
605 * Ignore 8204 (zwnj) or 8205 (zwj), if we get to here. - FM
606 */
607 CTRACE((tfp, "HTPlain_write: Ignoring '%" PRI_UCode_t "'.\n", code));
608 } else if (code == 8206 || code == 8207) {
609 /*
610 * Ignore 8206 (lrm) or 8207 (rlm), if we get to here. - FM
611 */
612 CTRACE((tfp, "HTPlain_write: Ignoring '%" PRI_UCode_t "'.\n", code));
613 } else {
614 /*
615 * Out of luck, so use the UHHH notation (ugh). - FM
616 */
617 /* do not print UHHH for now
618 sprintf(replace_buf, "U%.2lX", code);
619 HText_appendText(me->text, replace_buf);
620 */
621 }
622 /*
623 * If we get to here and have a monobyte character, pass it. - FM
624 */
625 } else if (c_unsign != 0 && c_unsign < 256) {
626 HText_appendCharacter(me->text, c);
627 }
628 #endif /* REMOVE_CR_ONLY */
629 }
630 }
631
632 /* Free an HTML object
633 * -------------------
634 *
635 * Note that the SGML parsing context is freed, but the created object is
636 * not, as it takes on an existence of its own unless explicitly freed.
637 */
HTPlain_free(HTStream * me)638 static void HTPlain_free(HTStream *me)
639 {
640 if (HTPlain_bs_pending >= 2)
641 HText_appendCharacter(me->text, '_');
642 FREE(me);
643 }
644
645 /* End writing
646 */
HTPlain_abort(HTStream * me,HTError e GCC_UNUSED)647 static void HTPlain_abort(HTStream *me, HTError e GCC_UNUSED)
648 {
649 HTPlain_free(me);
650 }
651
652 /* Structured Object Class
653 * -----------------------
654 */
655 static const HTStreamClass HTPlain =
656 {
657 "PlainPresenter",
658 HTPlain_free,
659 HTPlain_abort,
660 HTPlain_put_character, HTPlain_put_string, HTPlain_write,
661 };
662
663 /* New object
664 * ----------
665 */
HTPlainPresent(HTPresentation * pres GCC_UNUSED,HTParentAnchor * anchor,HTStream * sink GCC_UNUSED)666 HTStream *HTPlainPresent(HTPresentation *pres GCC_UNUSED, HTParentAnchor *anchor,
667 HTStream *sink GCC_UNUSED)
668 {
669
670 HTStream *me = (HTStream *) malloc(sizeof(*me));
671
672 if (me == NULL)
673 outofmem(__FILE__, "HTPlain_new");
674
675 assert(me != NULL);
676
677 me->isa = &HTPlain;
678
679 HTPlain_lastraw = -1;
680
681 me->utf_count = 0;
682 me->utf_char = 0;
683 me->utf_buf[0] = me->utf_buf[6] = me->utf_buf[7] = '\0';
684 me->utf_buf_p = me->utf_buf;
685 me->outUCLYhndl = HTAnchor_getUCLYhndl(anchor, UCT_STAGE_HTEXT);
686 me->inUCLYhndl = HTAnchor_getUCLYhndl(anchor, UCT_STAGE_PARSER);
687 HTPlain_getChartransInfo(me, anchor);
688 UCSetTransParams(&me->T,
689 me->inUCLYhndl, me->inUCI,
690 me->outUCLYhndl,
691 HTAnchor_getUCInfoStage(anchor, UCT_STAGE_HTEXT));
692
693 me->text = HText_new(anchor);
694 HText_setStyle(me->text, LYstyles(HTML_XMP));
695 HText_beginAppend(me->text);
696
697 return (HTStream *) me;
698 }
699