1 /*
2  * $LynxId: HTPlain.c,v 1.51 2013/05/02 11:09:30 tom Exp $
3  *
4  *		Plain text object		HTWrite.c
5  *		=================
6  *
7  *	This version of the stream object just writes to a socket.
8  *	The socket is assumed open and left open.
9  *
10  *	Bugs:
11  *		strings written must be less than buffer size.
12  */
13 
14 #define HTSTREAM_INTERNAL 1
15 
16 #include <HTUtils.h>
17 #include <LYCharVals.h>		/* S/390 -- gil -- 0288 */
18 
19 #include <HTPlain.h>
20 
21 #include <HTChunk.h>
22 #include <HText.h>
23 #include <HTStyle.h>
24 #define Lynx_HTML_Handler
25 #include <HTML.h>		/* styles[] */
26 
27 #define BUFFER_SIZE 4096;	/* Tradeoff */
28 
29 #include <HTMLDTD.h>
30 #include <HTCJK.h>
31 #include <UCMap.h>
32 #include <UCDefs.h>
33 #include <UCAux.h>
34 
35 #include <LYCharSets.h>
36 #include <LYStrings.h>
37 #include <LYLeaks.h>
38 
39 static int HTPlain_lastraw = -1;
40 static int HTPlain_bs_pending = 0;	/* 1:bs 2:underline 3:underline+bs - kw */
41 
42 /*		HTML Object
43  *		-----------
44  */
45 struct _HTStream {
46     const HTStreamClass *isa;
47     HText *text;
48     /*
49      * The node_anchor UCInfo and handle for the input (PARSER) stage.  - FM
50      */
51     LYUCcharset *inUCI;
52     int inUCLYhndl;
53     /*
54      * The node_anchor UCInfo and handle for the output (HTEXT) stage.  - FM
55      */
56     LYUCcharset *outUCI;
57     int outUCLYhndl;
58     /*
59      * Counter, value, buffer and pointer for UTF-8 handling.  - FM
60      */
61     char utf_count;
62     UCode_t utf_char;
63     char utf_buf[8];
64     char *utf_buf_p;
65     /*
66      * The charset transformation structure.  - FM
67      */
68     UCTransParams T;
69 };
70 
71 static char replace_buf[64];	/* buffer for replacement strings */
72 
HTPlain_getChartransInfo(HTStream * me,HTParentAnchor * anchor)73 static void HTPlain_getChartransInfo(HTStream *me, HTParentAnchor *anchor)
74 {
75     if (me->inUCLYhndl < 0) {
76 	HTAnchor_copyUCInfoStage(anchor, UCT_STAGE_PARSER, UCT_STAGE_MIME,
77 				 UCT_SETBY_PARSER);
78 	me->inUCLYhndl = HTAnchor_getUCLYhndl(anchor, UCT_STAGE_PARSER);
79     }
80     if (me->outUCLYhndl < 0) {
81 	int chndl = HTAnchor_getUCLYhndl(anchor, UCT_STAGE_HTEXT);
82 
83 	if (chndl < 0) {
84 	    chndl = current_char_set;
85 	    HTAnchor_setUCInfoStage(anchor, chndl,
86 				    UCT_STAGE_HTEXT, UCT_SETBY_DEFAULT);
87 	}
88 	HTAnchor_setUCInfoStage(anchor, chndl,
89 				UCT_STAGE_HTEXT, UCT_SETBY_DEFAULT);
90 	me->outUCLYhndl = HTAnchor_getUCLYhndl(anchor, UCT_STAGE_HTEXT);
91     }
92     me->inUCI = HTAnchor_getUCInfoStage(anchor, UCT_STAGE_PARSER);
93     me->outUCI = HTAnchor_getUCInfoStage(anchor, UCT_STAGE_HTEXT);
94 }
95 
96 /*	Write the buffer out to the socket
97  *	----------------------------------
98  */
99 
100 /*_________________________________________________________________________
101  *
102  *			A C T I O N	R O U T I N E S
103  */
104 
105 static void HTPlain_write(HTStream *me, const char *s,
106 			  int l);
107 
108 /*	Character handling
109  *	------------------
110  */
HTPlain_put_character(HTStream * me,int c)111 static void HTPlain_put_character(HTStream *me, int c)
112 {
113 #ifdef REMOVE_CR_ONLY
114     /*
115      * Throw away \rs.
116      */
117     if (c != '\r') {
118 	HText_appendCharacter(me->text, c);
119     }
120 #else
121     /*
122      * See HTPlain_write() for explanations of the following code (we've been
123      * called via HTPlain_put_string() to do for each character of a terminated
124      * string what HTPlain_write() does via a while loop for each character in
125      * a stream of given length).  - FM
126      */
127     if ((HTPlain_lastraw == '\r') && c == '\n') {
128 	HTPlain_lastraw = -1;
129 	return;
130     }
131     if (c == '\b' || c == '_' || HTPlain_bs_pending) {
132 	char temp[1];
133 
134 	temp[0] = (char) c;
135 	HTPlain_write(me, temp, 1);
136 	return;
137     }
138     HTPlain_lastraw = UCH(c);
139     if (c == '\r') {
140 	HText_appendCharacter(me->text, '\n');
141     } else if (TOASCII(UCH(c)) >= 127) {	/* S/390 -- gil -- 0305 */
142 	char temp[1];
143 
144 	temp[0] = (char) c;
145 	/*
146 	 * For now, don't repeat everything here that has been done below - KW
147 	 */
148 	HTPlain_write(me, temp, 1);
149     } else if (IS_CJK_TTY) {
150 	HText_appendCharacter(me->text, c);
151     } else if (TOASCII(UCH(c)) >= 127 && TOASCII(UCH(c)) < 161 &&
152 	       HTPassHighCtrlRaw) {
153 	HText_appendCharacter(me->text, c);
154 #if CH_NBSP < 127
155     } else if (UCH(c) == CH_NBSP) {	/* S/390 -- gil -- 0341 */
156 	HText_appendCharacter(me->text, ' ');
157 #endif
158 #if CH_SHY < 127
159     } else if (UCH(c) == CH_SHY) {
160 	return;
161 #endif
162     } else if ((UCH(c) >= ' ' && TOASCII(UCH(c)) < 127) ||
163 	       c == '\n' || c == '\t') {
164 	HText_appendCharacter(me->text, c);
165     }
166 #endif /* REMOVE_CR_ONLY */
167 }
168 
169 /*	String handling
170  *	---------------
171  *
172  */
HTPlain_put_string(HTStream * me,const char * s)173 static void HTPlain_put_string(HTStream *me, const char *s)
174 {
175 #ifdef REMOVE_CR_ONLY
176     HText_appendText(me->text, s);
177 #else
178     const char *p;
179 
180     if (s == NULL)
181 	return;
182     for (p = s; *p; p++) {
183 	HTPlain_put_character(me, *p);
184     }
185 #endif /* REMOVE_CR_ONLY */
186 }
187 
188 /*
189  *	Entry function for displayed text/plain and WWW_SOURCE strings. - FM
190  *	---------------------------------------------------------------
191  */
HTPlain_write(HTStream * me,const char * s,int l)192 static void HTPlain_write(HTStream *me, const char *s, int l)
193 {
194     const char *p;
195     const char *e = s + l;
196     char c;
197     unsigned c_unsign;
198     BOOL chk;
199     UCode_t code, uck = -1;
200     char saved_char_in = '\0';
201 
202     for (p = s; p < e; p++) {
203 #ifdef REMOVE_CR_ONLY
204 	/*
205 	 * Append the whole string, but remove any \rs.  - FM
206 	 */
207 	if (*p != '\r') {
208 	    HText_appendCharacter(me->text, *p);
209 	}
210 #else
211 	if (*p == '\b') {
212 	    if (HTPlain_lastraw >= UCH(' ') &&
213 		HTPlain_lastraw != '\r' && HTPlain_lastraw != '\n') {
214 		if (!HTPlain_bs_pending) {
215 		    HTPlain_bs_pending = 1;
216 		    continue;
217 		} else if (HTPlain_bs_pending == 2) {
218 		    HTPlain_bs_pending = 3;
219 		    continue;
220 		}
221 	    }
222 	    if (HTPlain_bs_pending >= 2)
223 		HText_appendCharacter(me->text, '_');
224 	    HTPlain_bs_pending = 0;
225 	} else if (*p == '_') {
226 	    if (!HTPlain_bs_pending) {
227 		HTPlain_bs_pending = 2;
228 		HTPlain_lastraw = UCH(*p);
229 		continue;
230 #if 0
231 	    } else if (HTPlain_bs_pending != 2) {
232 		HTPlain_bs_pending--;	/* 1 -> 0, 3 -> 2 */
233 		HTPlain_lastraw = UCH(*p);
234 		continue;
235 #endif
236 	    }
237 	}
238 
239 	/*
240 	 * Try to handle lone LFs, CRLFs and lone CRs as newline, and to deal
241 	 * with control, ASCII, and 8-bit characters based on best guesses of
242 	 * what's appropriate.  - FM
243 	 */
244 	if ((HTPlain_lastraw == '\r') && *p == '\n') {
245 	    HTPlain_lastraw = -1;
246 	    continue;
247 	}
248 
249 	if (HTPlain_bs_pending &&
250 	    !(UCH(*p) >= ' ' && *p != '\r' && *p != '\n' &&
251 	      (HTPlain_lastraw == UCH(*p) ||
252 	       HTPlain_lastraw == UCH('_') ||
253 	       *p == '_'))) {
254 	    if (HTPlain_bs_pending >= 2)
255 		HText_appendCharacter(me->text, '_');
256 	    HTPlain_bs_pending = 0;
257 	} else if (HTPlain_bs_pending == 1) {
258 	    HTPlain_bs_pending = 0;
259 	    continue;		/* ignore last two of "X\bX" or "X\b_" - kw */
260 	} else if (HTPlain_bs_pending == 3) {
261 	    if (*p == '_') {
262 		HTPlain_bs_pending = 2;
263 		continue;	/* ignore last two of "_\b_" - kw */
264 	    } else {
265 		HTPlain_bs_pending = 0;
266 		/* ignore first two of "_\bX" - kw */
267 	    }
268 	} else if (HTPlain_bs_pending == 2) {
269 	    HText_appendCharacter(me->text, '_');
270 	    if (*p == '_')
271 		continue;	/* keep second of "__" pending - kw */
272 	    HTPlain_bs_pending = 0;
273 	} else {
274 	    HTPlain_bs_pending = 0;
275 	}
276 	HTPlain_lastraw = UCH(*p);
277 	if (*p == '\r') {
278 	    HText_appendCharacter(me->text, '\n');
279 	    continue;
280 	}
281 	/*
282 	 * Make sure the character is handled as Unicode whenever that's
283 	 * appropriate.  - FM
284 	 */
285 	c = *p;
286 	c_unsign = UCH(c);
287 	code = (UCode_t) c_unsign;
288 	saved_char_in = '\0';
289 	/*
290 	 * Combine any UTF-8 multibytes into Unicode to check for special
291 	 * characters.  - FM
292 	 */
293 	if (me->T.decode_utf8) {
294 	    /*
295 	     * Combine UTF-8 into Unicode.  Incomplete characters silently
296 	     * ignored.  from Linux kernel's console.c - KW
297 	     */
298 	    if (TOASCII(c_unsign) > 127) {	/* S/390 -- gil -- 0371 */
299 		/*
300 		 * We have an octet from a multibyte character.  - FM
301 		 */
302 		if (me->utf_count > 0 && (c & 0xc0) == 0x80) {
303 		    /*
304 		     * Adjust the UCode_t value, add the octet to the buffer,
305 		     * and decrement the byte count.  - FM
306 		     */
307 		    me->utf_char = (me->utf_char << 6) | (c & 0x3f);
308 		    me->utf_count--;
309 		    *(me->utf_buf_p) = c;
310 		    (me->utf_buf_p)++;
311 		    if (me->utf_count == 0) {
312 			/*
313 			 * Got a complete multibyte character.
314 			 */
315 			*(me->utf_buf_p) = '\0';
316 			code = me->utf_char;
317 			if (code > 0 && code < 256) {
318 			    c = FROMASCII((char) code);
319 			    c_unsign = UCH(c);
320 			}
321 		    } else {
322 			/*
323 			 * Get the next byte.  - FM
324 			 */
325 			continue;
326 		    }
327 		} else {
328 		    /*
329 		     * Start handling a new multibyte character.  - FM
330 		     */
331 		    me->utf_buf_p[0] = c;
332 		    me->utf_buf_p = &me->utf_buf[1];
333 		    if ((*p & 0xe0) == 0xc0) {
334 			me->utf_count = 1;
335 			me->utf_char = (c & 0x1f);
336 		    } else if ((*p & 0xf0) == 0xe0) {
337 			me->utf_count = 2;
338 			me->utf_char = (c & 0x0f);
339 		    } else if ((*p & 0xf8) == 0xf0) {
340 			me->utf_count = 3;
341 			me->utf_char = (c & 0x07);
342 		    } else if ((*p & 0xfc) == 0xf8) {
343 			me->utf_count = 4;
344 			me->utf_char = (c & 0x03);
345 		    } else if ((*p & 0xfe) == 0xfc) {
346 			me->utf_count = 5;
347 			me->utf_char = (c & 0x01);
348 		    } else {
349 			/*
350 			 * We got garbage, so ignore it.  - FM
351 			 */
352 			me->utf_count = 0;
353 			me->utf_buf_p[0] = '\0';
354 			me->utf_buf_p = me->utf_buf;
355 		    }
356 		    /*
357 		     * Get the next byte.  - FM
358 		     */
359 		    continue;
360 		}
361 	    } else if (me->utf_count > 0) {
362 		/*
363 		 * Got an ASCII character when expecting UTF-8 multibytes, so
364 		 * ignore the buffered multibye characters and fall through
365 		 * with the current ASCII character.  - FM
366 		 */
367 		me->utf_count = 0;
368 		me->utf_buf[0] = '\0';
369 		me->utf_buf_p = me->utf_buf;
370 		code = (UCode_t) c_unsign;
371 	    } else {
372 		/*
373 		 * Got a valid ASCII character, so fall through with it.  - FM
374 		 */
375 		code = (UCode_t) c_unsign;
376 	    }
377 	}
378 	/*
379 	 * Convert characters from non-UTF-8 charsets to Unicode (if
380 	 * appropriate).  - FM
381 	 */
382 	if (!(me->T.decode_utf8 &&
383 	      UCH(*p) > 127)) {
384 #ifdef NOTDEFINED
385 	    if (me->T.strip_raw_char_in)
386 		saved_char_in = c;
387 #endif /* NOTDEFINED */
388 	    if (me->T.trans_to_uni &&
389 		(TOASCII(code) >= LYlowest_eightbit[me->inUCLYhndl] ||	/* S/390 -- gil -- 0389 */
390 		 (code < ' ' && code != 0 &&
391 		  me->T.trans_C0_to_uni))) {
392 		/*
393 		 * Convert the octet to Unicode.  - FM
394 		 */
395 		code = (UCode_t) UCTransToUni(c, me->inUCLYhndl);
396 		if (code > 0) {
397 		    saved_char_in = c;
398 		    if (code < 256) {
399 			c = FROMASCII((char) code);
400 			c_unsign = UCH(c);
401 		    }
402 		}
403 	    } else if (code < 32 && code != 0 &&
404 		       me->T.trans_C0_to_uni) {
405 		/*
406 		 * Quote from SGML.c:
407 		 * "This else if may be too ugly to keep.  - KW"
408 		 */
409 		if (me->T.trans_from_uni &&
410 		    (((code = UCTransToUni(c, me->inUCLYhndl)) >= 32) ||
411 		     (me->T.transp &&
412 		      (code = UCTransToUni(c, me->inUCLYhndl)) > 0))) {
413 		    saved_char_in = c;
414 		    if (code < 256) {
415 			c = FROMASCII((char) code);
416 			c_unsign = UCH(c);
417 		    }
418 		} else {
419 		    uck = -1;
420 		    if (me->T.transp) {
421 			uck = UCTransCharStr(replace_buf, 60, c,
422 					     me->inUCLYhndl,
423 					     me->inUCLYhndl, NO);
424 		    }
425 		    if (!me->T.transp || uck < 0) {
426 			uck = UCTransCharStr(replace_buf, 60, c,
427 					     me->inUCLYhndl,
428 					     me->outUCLYhndl, YES);
429 		    }
430 		    if (uck == 0) {
431 			continue;
432 		    } else if (uck < 0) {
433 			me->utf_buf[0] = '\0';
434 		    } else {
435 			c = replace_buf[0];
436 			if (c && replace_buf[1]) {
437 			    HText_appendText(me->text, replace_buf);
438 			    continue;
439 			}
440 		    }
441 		    me->utf_buf[0] = '\0';
442 		    code = UCH(c);
443 		}		/*  Next line end of ugly stuff for C0. - KW */
444 	    } else {
445 		me->utf_buf[0] = '\0';
446 		code = UCH(c);
447 	    }
448 	}
449 	/*
450 	 * At this point we have either code in Unicode (and c in latin1 if
451 	 * code is in the latin1 range), or code and c will have to be passed
452 	 * raw.
453 	 */
454 
455 	/*
456 	 * If CJK mode is on, we'll assume the document matches the user's
457 	 * display character set, and if not, the user should toggle off
458 	 * raw/CJK mode to reload.  - FM
459 	 */
460 	if (IS_CJK_TTY) {
461 	    HText_appendCharacter(me->text, c);
462 
463 #define PASSHICTRL (me->T.transp || \
464 		    code >= LYlowest_eightbit[me->inUCLYhndl])
465 #define PASS8859SPECL me->T.pass_160_173_raw
466 #define PASSHI8BIT (HTPassEightBitRaw || \
467 		    (me->T.do_8bitraw && !me->T.trans_from_uni))
468 	    /*
469 	     * If HTPassHighCtrlRaw is set (e.g., for KOI8-R) assume the
470 	     * document matches and pass 127-160 8-bit characters.  If it
471 	     * doesn't match, the user should toggle raw/CJK mode off.  - FM
472 	     */
473 	} else if (TOASCII(code) >= 127 && TOASCII(code) < 161 &&	/* S/390 -- gil -- 0427 */
474 		   PASSHICTRL && PASS8859SPECL) {
475 	    HText_appendCharacter(me->text, c);
476 	} else if (code == CH_SHY && PASS8859SPECL) {
477 	    HText_appendCharacter(me->text, c);
478 	    /*
479 	     * If neither HTPassHighCtrlRaw nor CJK is set, play it safe and
480 	     * treat 160 (nbsp) as an ASCII space (32).  - FM
481 	     */
482 	} else if (code == CH_NBSP) {
483 	    HText_appendCharacter(me->text, ' ');
484 	    /*
485 	     * If neither HTPassHighCtrlRaw nor CJK is set, play it safe and
486 	     * ignore 173 (shy).  - FM
487 	     * Now only ignore it for color style, which doesn't handle it
488 	     * anyway.  Otherwise pass it on as LY_SOFT_HYPHEN and let HText
489 	     * deal with it.  It should be either ignored, or displayed as a
490 	     * hyphen if it was indeed at the end of a line.  Well it should.
491 	     * - kw
492 	     */
493 	} else if (code == CH_SHY) {
494 #ifndef USE_COLOR_STYLE
495 	    HText_appendCharacter(me->text, LY_SOFT_HYPHEN);
496 #endif
497 	    continue;
498 	    /*
499 	     * If we get to here, pass the displayable ASCII characters.  - FM
500 	     */
501 	} else if ((code >= ' ' && TOASCII(code) < 127) ||
502 		   (PASSHI8BIT &&
503 		    c >= LYlowest_eightbit[me->outUCLYhndl]) ||
504 		   *p == '\n' || *p == '\t') {
505 	    HText_appendCharacter(me->text, c);
506 	    /*
507 	     * Use an ASCII space (32) for ensp, emsp or thinsp.  - FM
508 	     */
509 	} else if (code == 8194 || code == 8195 || code == 8201) {
510 	    HText_appendCharacter(me->text, ' ');
511 	    /*
512 	     * If we want the raw character, pass it now.  - FM
513 	     */
514 	} else if (me->T.use_raw_char_in && saved_char_in) {
515 	    HText_appendCharacter(me->text, saved_char_in);
516 /******************************************************************
517  * I.  LATIN-1 OR UCS2 TO DISPLAY CHARSET
518  ******************************************************************/
519 	} else if ((chk = (BOOL) (me->T.trans_from_uni && code >= 160)) &&
520 		   (uck = UCTransUniChar(code,
521 					 me->outUCLYhndl)) >= ' ' &&	/* S/390 -- gil -- 0464 */
522 		   uck < 256) {
523 	    CTRACE((tfp, "UCTransUniChar returned 0x%.2" PRI_UCode_t
524 		    ":'%c'.\n",
525 		    uck, FROMASCII(UCH(uck))));
526 	    HText_appendCharacter(me->text, ((char) (uck & 0xff)));
527 	} else if (chk &&
528 		   (uck == -4 ||
529 		    (me->T.repl_translated_C0 && uck > 0 && uck < ' ')) &&	/* S/390 -- gil -- 0481 */
530 	    /*
531 	     * Not found; look for replacement string.
532 	     */
533 		   (uck = UCTransUniCharStr(replace_buf, 60, code,
534 					    me->outUCLYhndl, 0) >= 0)) {
535 	    /*
536 	     * No further tests for valididy - assume that whoever defined
537 	     * replacement strings knew what she was doing.
538 	     */
539 	    HText_appendText(me->text, replace_buf);
540 	    /*
541 	     * If we get to here, and should have translated, translation has
542 	     * failed so far.
543 	     */
544 	} else if (chk && TOASCII(code) > 127 && me->T.output_utf8) {	/* S/390 -- gil -- 0498 */
545 	    /*
546 	     * We want UTF-8 output, so do it now.  - FM
547 	     */
548 	    if (*me->utf_buf) {
549 		HText_appendText(me->text, me->utf_buf);
550 		me->utf_buf[0] = '\0';
551 		me->utf_buf_p = me->utf_buf;
552 	    } else if (UCConvertUniToUtf8(code, replace_buf)) {
553 		HText_appendText(me->text, replace_buf);
554 	    } else {
555 		/*
556 		 * Out of luck, so use the UHHH notation (ugh).  - gil
557 		 */
558 		/* S/390 -- gil -- 0517 */
559 		sprintf(replace_buf, "U%.2lX", (unsigned long) TOASCII(code));
560 		HText_appendText(me->text, replace_buf);
561 	    }
562 #ifdef NOTDEFINED
563 	} else if (me->T.strip_raw_char_in &&
564 		   UCH(*p) >= 192 &&
565 		   UCH(*p) < 255) {
566 	    /*
567 	     * KOI special:  strip high bit, gives (somewhat) readable ASCII.
568 	     */
569 	    HText_appendCharacter(me->text, (char) (*p & 0x7f));
570 #endif /* NOTDEFINED */
571 	    /*
572 	     * If we don't actually want the character, make it safe and output
573 	     * that now.  - FM
574 	     */
575 	} else if ((c_unsign > 0 &&
576 		    (int) c_unsign < LYlowest_eightbit[me->outUCLYhndl]) ||
577 		   (me->T.trans_from_uni && !HTPassEightBitRaw)) {
578 	    /*
579 	     * If we do not have the "7-bit approximations" as our output
580 	     * character set (in which case we did it already) seek a
581 	     * translation for that.  Otherwise, or if the translation fails,
582 	     * use UHHH notation.  - FM
583 	     */
584 	    if ((chk = (BOOL) (me->outUCLYhndl !=
585 			       UCGetLYhndl_byMIME("us-ascii"))) &&
586 		(uck = UCTransUniChar(code,
587 				      UCGetLYhndl_byMIME("us-ascii")))
588 		>= ' ' && TOASCII(uck) < 127) {		/* S/390 -- gil -- 0535 */
589 		/*
590 		 * Got an ASCII character (yippey).  - FM
591 		 */
592 		c = FROMASCII((char) uck);
593 		HText_appendCharacter(me->text, c);
594 	    } else if ((chk && uck == -4) &&
595 		       (uck = UCTransUniCharStr(replace_buf,
596 						60, code,
597 						UCGetLYhndl_byMIME("us-ascii"),
598 						0) >= 0)) {
599 		/*
600 		 * Got a repacement string (yippey).  - FM
601 		 */
602 		HText_appendText(me->text, replace_buf);
603 	    } else if (code == 8204 || code == 8205) {
604 		/*
605 		 * Ignore 8204 (zwnj) or 8205 (zwj), if we get to here.  - FM
606 		 */
607 		CTRACE((tfp, "HTPlain_write: Ignoring '%" PRI_UCode_t "'.\n", code));
608 	    } else if (code == 8206 || code == 8207) {
609 		/*
610 		 * Ignore 8206 (lrm) or 8207 (rlm), if we get to here.  - FM
611 		 */
612 		CTRACE((tfp, "HTPlain_write: Ignoring '%" PRI_UCode_t "'.\n", code));
613 	    } else {
614 		/*
615 		 * Out of luck, so use the UHHH notation (ugh).  - FM
616 		 */
617 		/* do not print UHHH for now
618 		   sprintf(replace_buf, "U%.2lX", code);
619 		   HText_appendText(me->text, replace_buf);
620 		 */
621 	    }
622 	    /*
623 	     * If we get to here and have a monobyte character, pass it.  - FM
624 	     */
625 	} else if (c_unsign != 0 && c_unsign < 256) {
626 	    HText_appendCharacter(me->text, c);
627 	}
628 #endif /* REMOVE_CR_ONLY */
629     }
630 }
631 
632 /*	Free an HTML object
633  *	-------------------
634  *
635  *	Note that the SGML parsing context is freed, but the created object is
636  *	not, as it takes on an existence of its own unless explicitly freed.
637  */
HTPlain_free(HTStream * me)638 static void HTPlain_free(HTStream *me)
639 {
640     if (HTPlain_bs_pending >= 2)
641 	HText_appendCharacter(me->text, '_');
642     FREE(me);
643 }
644 
645 /*	End writing
646 */
HTPlain_abort(HTStream * me,HTError e GCC_UNUSED)647 static void HTPlain_abort(HTStream *me, HTError e GCC_UNUSED)
648 {
649     HTPlain_free(me);
650 }
651 
652 /*		Structured Object Class
653  *		-----------------------
654  */
655 static const HTStreamClass HTPlain =
656 {
657     "PlainPresenter",
658     HTPlain_free,
659     HTPlain_abort,
660     HTPlain_put_character, HTPlain_put_string, HTPlain_write,
661 };
662 
663 /*		New object
664  *		----------
665  */
HTPlainPresent(HTPresentation * pres GCC_UNUSED,HTParentAnchor * anchor,HTStream * sink GCC_UNUSED)666 HTStream *HTPlainPresent(HTPresentation *pres GCC_UNUSED, HTParentAnchor *anchor,
667 			 HTStream *sink GCC_UNUSED)
668 {
669 
670     HTStream *me = (HTStream *) malloc(sizeof(*me));
671 
672     if (me == NULL)
673 	outofmem(__FILE__, "HTPlain_new");
674 
675     assert(me != NULL);
676 
677     me->isa = &HTPlain;
678 
679     HTPlain_lastraw = -1;
680 
681     me->utf_count = 0;
682     me->utf_char = 0;
683     me->utf_buf[0] = me->utf_buf[6] = me->utf_buf[7] = '\0';
684     me->utf_buf_p = me->utf_buf;
685     me->outUCLYhndl = HTAnchor_getUCLYhndl(anchor, UCT_STAGE_HTEXT);
686     me->inUCLYhndl = HTAnchor_getUCLYhndl(anchor, UCT_STAGE_PARSER);
687     HTPlain_getChartransInfo(me, anchor);
688     UCSetTransParams(&me->T,
689 		     me->inUCLYhndl, me->inUCI,
690 		     me->outUCLYhndl,
691 		     HTAnchor_getUCInfoStage(anchor, UCT_STAGE_HTEXT));
692 
693     me->text = HText_new(anchor);
694     HText_setStyle(me->text, LYstyles(HTML_XMP));
695     HText_beginAppend(me->text);
696 
697     return (HTStream *) me;
698 }
699