1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54 package org.w3c.tidy;
55
56 import java.io.IOException;
57 import java.io.OutputStream;
58
59 import org.w3c.tidy.EncodingUtils.PutBytes;
60
61
62 /***
63 * Output implementation. This implementation is from the c version of tidy and it doesn't take advantage of java
64 * writers.
65 * @author Dave Raggett <a href="mailto:dsr@w3.org">dsr@w3.org </a>
66 * @author Andy Quick <a href="mailto:ac.quick@sympatico.ca">ac.quick@sympatico.ca </a> (translation to Java)
67 * @author Fabrizio Giustina
68 * @version $Revision: 1.16 $ ($Author: fgiust $)
69 */
70 public class OutImpl implements Out
71 {
72
73 /***
74 * output encoding.
75 */
76 private int encoding;
77
78 /***
79 * actual state for ISO 2022.
80 */
81 private int state;
82
83 /***
84 * output stream.
85 */
86 private OutputStream out;
87
88 /***
89 * putter callback.
90 */
91 private PutBytes putBytes;
92
93 /***
94 * newline bytes.
95 */
96 private byte[] newline;
97
98 /***
99 * Constructor.
100 * @param configuration actual configuration instance (needed for newline configuration)
101 * @param encoding encoding constant
102 * @param out output stream
103 */
104 public OutImpl(Configuration configuration, int encoding, OutputStream out)
105 {
106 this.encoding = encoding;
107 this.state = EncodingUtils.FSM_ASCII;
108 this.out = out;
109
110
111 this.newline = new byte[configuration.newline.length];
112 for (int j = 0; j < configuration.newline.length; j++)
113 {
114 this.newline[j] = (byte) configuration.newline[j];
115 }
116
117 this.putBytes = new PutBytes()
118 {
119
120 private OutImpl impl;
121
122 PutBytes setOut(OutImpl out)
123 {
124 this.impl = out;
125 return this;
126 }
127
128 public void doPut(byte[] buf, int[] count)
129 {
130 impl.outcUTF8Bytes(buf, count);
131 }
132 }
133 .setOut(this);
134 }
135
136 /***
137 * output UTF-8 bytes to output stream.
138 * @param buf array of bytes
139 * @param count number of bytes in buf to write
140 */
141 void outcUTF8Bytes(byte[] buf, int[] count)
142 {
143 try
144 {
145 for (int i = 0; i < count[0]; i++)
146 {
147 out.write(buf[i]);
148 }
149 }
150 catch (IOException e)
151 {
152 System.err.println("OutImpl.outcUTF8Bytes: " + e.toString());
153 }
154 }
155
156 /***
157 * .
158 * @see org.w3c.tidy.Out#outc(byte)
159 */
160 public void outc(byte c)
161 {
162 outc(c & 0xFF);
163 }
164
165 /***
166 * @see org.w3c.tidy.Out#outc(int)
167 */
168 public void outc(int c)
169 {
170 int ch;
171
172 try
173 {
174
175 if (this.encoding == Configuration.MACROMAN)
176 {
177 if (c < 128)
178 {
179 out.write(c);
180 }
181 else
182 {
183 int i;
184
185 for (i = 128; i < 256; i++)
186 {
187 if (EncodingUtils.decodeMacRoman(i - 128) == c)
188 {
189 out.write(i);
190 break;
191 }
192 }
193 }
194 }
195 else
196
197 if (this.encoding == Configuration.WIN1252)
198 {
199 if (c < 128 || (c > 159 && c < 256))
200 {
201 out.write(c);
202 }
203 else
204 {
205 int i;
206
207 for (i = 128; i < 160; i++)
208 {
209 if (EncodingUtils.decodeWin1252(i - 128) == c)
210 {
211 out.write(i);
212 break;
213 }
214 }
215 }
216 }
217 else if (this.encoding == Configuration.UTF8)
218 {
219 int[] count = new int[]{0};
220
221 EncodingUtils.encodeCharToUTF8Bytes(c, null, this.putBytes, count);
222 if (count[0] <= 0)
223 {
224
225
226 out.write(0xEF);
227 out.write(0xBF);
228 out.write(0xBF);
229 }
230 }
231 else if (this.encoding == Configuration.ISO2022)
232 {
233 if (c == 0x1b)
234 {
235 this.state = EncodingUtils.FSM_ESC;
236 }
237 else
238 {
239 switch (this.state)
240 {
241 case EncodingUtils.FSM_ESC :
242 if (c == '$')
243 {
244 this.state = EncodingUtils.FSM_ESCD;
245 }
246 else if (c == '(')
247 {
248 this.state = EncodingUtils.FSM_ESCP;
249 }
250 else
251 {
252 this.state = EncodingUtils.FSM_ASCII;
253 }
254 break;
255
256 case EncodingUtils.FSM_ESCD :
257 if (c == '(')
258 {
259 this.state = EncodingUtils.FSM_ESCDP;
260 }
261 else
262 {
263 this.state = EncodingUtils.FSM_NONASCII;
264 }
265 break;
266
267 case EncodingUtils.FSM_ESCDP :
268 this.state = EncodingUtils.FSM_NONASCII;
269 break;
270
271 case EncodingUtils.FSM_ESCP :
272 this.state = EncodingUtils.FSM_ASCII;
273 break;
274
275 case EncodingUtils.FSM_NONASCII :
276 c &= 0x7F;
277 break;
278
279 default :
280
281 break;
282 }
283 }
284
285 this.out.write(c);
286 }
287 else if (this.encoding == Configuration.UTF16LE
288 || this.encoding == Configuration.UTF16BE
289 || this.encoding == Configuration.UTF16)
290 {
291 int i = 1;
292 int numChars = 1;
293 int[] theChars = new int[2];
294
295 if (c > EncodingUtils.MAX_UTF16_FROM_UCS4)
296 {
297
298
299 c = 0;
300 numChars = 0;
301 }
302 else if (c >= EncodingUtils.UTF16_SURROGATES_BEGIN)
303 {
304
305
306
307 if (((c & 0x0000FFFE) == 0x0000FFFE) || ((c & 0x0000FFFF) == 0x0000FFFF))
308 {
309
310 c = 0;
311 numChars = 0;
312 }
313 else
314 {
315 theChars[0] = (c - EncodingUtils.UTF16_SURROGATES_BEGIN)
316 / 0x400
317 + EncodingUtils.UTF16_LOW_SURROGATE_BEGIN;
318 theChars[1] = (c - EncodingUtils.UTF16_SURROGATES_BEGIN)
319 % 0x400
320 + EncodingUtils.UTF16_HIGH_SURROGATE_BEGIN;
321
322
323 numChars = 2;
324 }
325 }
326 else
327 {
328
329 theChars[0] = c;
330 }
331
332 for (i = 0; i < numChars; i++)
333 {
334 c = theChars[i];
335
336 if (this.encoding == Configuration.UTF16LE)
337 {
338 ch = c & 0xFF;
339 out.write(ch);
340 ch = (c >> 8) & 0xFF;
341 out.write(ch);
342 }
343
344 else if (this.encoding == Configuration.UTF16BE || this.encoding == Configuration.UTF16)
345 {
346 ch = (c >> 8) & 0xFF;
347 out.write(ch);
348 ch = c & 0xFF;
349 out.write(ch);
350 }
351 }
352 }
353
354 else if (this.encoding == Configuration.BIG5 || this.encoding == Configuration.SHIFTJIS)
355 {
356 if (c < 128)
357 {
358 this.out.write(c);
359 }
360 else
361 {
362 ch = (c >> 8) & 0xFF;
363 this.out.write(ch);
364 ch = c & 0xFF;
365 this.out.write(ch);
366 }
367 }
368
369 else
370 {
371 this.out.write(c);
372 }
373 }
374 catch (IOException e)
375 {
376 System.err.println("OutImpl.outc: " + e.toString());
377 }
378 }
379
380 /***
381 * @see org.w3c.tidy.Out#newline()
382 */
383 public void newline()
384 {
385 try
386 {
387 this.out.write(this.newline);
388 this.out.flush();
389 }
390 catch (IOException e)
391 {
392 System.err.println("OutImpl.newline: " + e.toString());
393 }
394 }
395
396 /***
397 * Setter for <code>out</code>.
398 * @param out The out to set.
399 */
400 public void setOut(OutputStream out)
401 {
402 this.out = out;
403 }
404
405 /***
406 * Output a Byte Order Mark.
407 */
408 public void outBOM()
409 {
410 if (this.encoding == Configuration.UTF8
411 || this.encoding == Configuration.UTF16LE
412 || this.encoding == Configuration.UTF16BE
413 || this.encoding == Configuration.UTF16)
414 {
415 outc(EncodingUtils.UNICODE_BOM);
416 }
417 }
418
419 /***
420 * @see org.w3c.tidy.Out#close()
421 */
422 public void close()
423 {
424 try
425 {
426 this.out.flush();
427 this.out.close();
428 }
429 catch (IOException e)
430 {
431 System.err.println("OutImpl.close: " + e.toString());
432 }
433 }
434 }