1 |
| |
2 |
| |
3 |
| |
4 |
| |
5 |
| |
6 |
| |
7 |
| |
8 |
| |
9 |
| |
10 |
| |
11 |
| |
12 |
| |
13 |
| |
14 |
| |
15 |
| |
16 |
| |
17 |
| |
18 |
| |
19 |
| |
20 |
| |
21 |
| |
22 |
| |
23 |
| |
24 |
| |
25 |
| |
26 |
| |
27 |
| |
28 |
| |
29 |
| |
30 |
| |
31 |
| |
32 |
| |
33 |
| |
34 |
| |
35 |
| |
36 |
| |
37 |
| |
38 |
| |
39 |
| |
40 |
| |
41 |
| |
42 |
| |
43 |
| |
44 |
| |
45 |
| |
46 |
| |
47 |
| |
48 |
| |
49 |
| |
50 |
| |
51 |
| |
52 |
| |
53 |
| |
54 |
| package org.w3c.tidy; |
55 |
| |
56 |
| import java.io.IOException; |
57 |
| import java.io.OutputStream; |
58 |
| |
59 |
| import org.w3c.tidy.EncodingUtils.PutBytes; |
60 |
| |
61 |
| |
62 |
| |
63 |
| |
64 |
| |
65 |
| |
66 |
| |
67 |
| |
68 |
| |
69 |
| |
70 |
| public class OutImpl implements Out |
71 |
| { |
72 |
| |
73 |
| |
74 |
| |
75 |
| |
76 |
| private int encoding; |
77 |
| |
78 |
| |
79 |
| |
80 |
| |
81 |
| private int state; |
82 |
| |
83 |
| |
84 |
| |
85 |
| |
86 |
| private OutputStream out; |
87 |
| |
88 |
| |
89 |
| |
90 |
| |
91 |
| private PutBytes putBytes; |
92 |
| |
93 |
| |
94 |
| |
95 |
| |
96 |
| private byte[] newline; |
97 |
| |
98 |
| |
99 |
| |
100 |
| |
101 |
| |
102 |
| |
103 |
| |
104 |
0
| public OutImpl(Configuration configuration, int encoding, OutputStream out)
|
105 |
| { |
106 |
0
| this.encoding = encoding;
|
107 |
0
| this.state = EncodingUtils.FSM_ASCII;
|
108 |
0
| this.out = out;
|
109 |
| |
110 |
| |
111 |
0
| this.newline = new byte[configuration.newline.length];
|
112 |
0
| for (int j = 0; j < configuration.newline.length; j++)
|
113 |
| { |
114 |
0
| this.newline[j] = (byte) configuration.newline[j];
|
115 |
| } |
116 |
| |
117 |
0
| this.putBytes = new PutBytes()
|
118 |
| { |
119 |
| |
120 |
| private OutImpl impl; |
121 |
| |
122 |
0
| PutBytes setOut(OutImpl out)
|
123 |
| { |
124 |
0
| this.impl = out;
|
125 |
0
| return this;
|
126 |
| } |
127 |
| |
128 |
0
| public void doPut(byte[] buf, int[] count)
|
129 |
| { |
130 |
0
| impl.outcUTF8Bytes(buf, count);
|
131 |
| } |
132 |
| } |
133 |
| .setOut(this); |
134 |
| } |
135 |
| |
136 |
| |
137 |
| |
138 |
| |
139 |
| |
140 |
| |
141 |
0
| void outcUTF8Bytes(byte[] buf, int[] count)
|
142 |
| { |
143 |
0
| try
|
144 |
| { |
145 |
0
| for (int i = 0; i < count[0]; i++)
|
146 |
| { |
147 |
0
| out.write(buf[i]);
|
148 |
| } |
149 |
| } |
150 |
| catch (IOException e) |
151 |
| { |
152 |
0
| System.err.println("OutImpl.outcUTF8Bytes: " + e.toString());
|
153 |
| } |
154 |
| } |
155 |
| |
156 |
| |
157 |
| |
158 |
| |
159 |
| |
160 |
0
| public void outc(byte c)
|
161 |
| { |
162 |
0
| outc(c & 0xFF);
|
163 |
| } |
164 |
| |
165 |
| |
166 |
| |
167 |
| |
168 |
0
| public void outc(int c)
|
169 |
| { |
170 |
0
| int ch;
|
171 |
| |
172 |
0
| try
|
173 |
| { |
174 |
| |
175 |
0
| if (this.encoding == Configuration.MACROMAN)
|
176 |
| { |
177 |
0
| if (c < 128)
|
178 |
| { |
179 |
0
| out.write(c);
|
180 |
| } |
181 |
| else |
182 |
| { |
183 |
0
| int i;
|
184 |
| |
185 |
0
| for (i = 128; i < 256; i++)
|
186 |
| { |
187 |
0
| if (EncodingUtils.decodeMacRoman(i - 128) == c)
|
188 |
| { |
189 |
0
| out.write(i);
|
190 |
0
| break;
|
191 |
| } |
192 |
| } |
193 |
| } |
194 |
| } |
195 |
| else |
196 |
| |
197 |
0
| if (this.encoding == Configuration.WIN1252)
|
198 |
| { |
199 |
0
| if (c < 128 || (c > 159 && c < 256))
|
200 |
| { |
201 |
0
| out.write(c);
|
202 |
| } |
203 |
| else |
204 |
| { |
205 |
0
| int i;
|
206 |
| |
207 |
0
| for (i = 128; i < 160; i++)
|
208 |
| { |
209 |
0
| if (EncodingUtils.decodeWin1252(i - 128) == c)
|
210 |
| { |
211 |
0
| out.write(i);
|
212 |
0
| break;
|
213 |
| } |
214 |
| } |
215 |
| } |
216 |
| } |
217 |
0
| else if (this.encoding == Configuration.UTF8)
|
218 |
| { |
219 |
0
| int[] count = new int[]{0};
|
220 |
| |
221 |
0
| EncodingUtils.encodeCharToUTF8Bytes(c, null, this.putBytes, count);
|
222 |
0
| if (count[0] <= 0)
|
223 |
| { |
224 |
| |
225 |
| |
226 |
0
| out.write(0xEF);
|
227 |
0
| out.write(0xBF);
|
228 |
0
| out.write(0xBF);
|
229 |
| } |
230 |
| } |
231 |
0
| else if (this.encoding == Configuration.ISO2022)
|
232 |
| { |
233 |
0
| if (c == 0x1b)
|
234 |
| { |
235 |
0
| this.state = EncodingUtils.FSM_ESC;
|
236 |
| } |
237 |
| else |
238 |
| { |
239 |
0
| switch (this.state)
|
240 |
| { |
241 |
0
| case EncodingUtils.FSM_ESC :
|
242 |
0
| if (c == '$')
|
243 |
| { |
244 |
0
| this.state = EncodingUtils.FSM_ESCD;
|
245 |
| } |
246 |
0
| else if (c == '(')
|
247 |
| { |
248 |
0
| this.state = EncodingUtils.FSM_ESCP;
|
249 |
| } |
250 |
| else |
251 |
| { |
252 |
0
| this.state = EncodingUtils.FSM_ASCII;
|
253 |
| } |
254 |
0
| break;
|
255 |
| |
256 |
0
| case EncodingUtils.FSM_ESCD :
|
257 |
0
| if (c == '(')
|
258 |
| { |
259 |
0
| this.state = EncodingUtils.FSM_ESCDP;
|
260 |
| } |
261 |
| else |
262 |
| { |
263 |
0
| this.state = EncodingUtils.FSM_NONASCII;
|
264 |
| } |
265 |
0
| break;
|
266 |
| |
267 |
0
| case EncodingUtils.FSM_ESCDP :
|
268 |
0
| this.state = EncodingUtils.FSM_NONASCII;
|
269 |
0
| break;
|
270 |
| |
271 |
0
| case EncodingUtils.FSM_ESCP :
|
272 |
0
| this.state = EncodingUtils.FSM_ASCII;
|
273 |
0
| break;
|
274 |
| |
275 |
0
| case EncodingUtils.FSM_NONASCII :
|
276 |
0
| c &= 0x7F;
|
277 |
0
| break;
|
278 |
| |
279 |
0
| default :
|
280 |
| |
281 |
0
| break;
|
282 |
| } |
283 |
| } |
284 |
| |
285 |
0
| this.out.write(c);
|
286 |
| } |
287 |
0
| else if (this.encoding == Configuration.UTF16LE
|
288 |
| || this.encoding == Configuration.UTF16BE |
289 |
| || this.encoding == Configuration.UTF16) |
290 |
| { |
291 |
0
| int i = 1;
|
292 |
0
| int numChars = 1;
|
293 |
0
| int[] theChars = new int[2];
|
294 |
| |
295 |
0
| if (c > EncodingUtils.MAX_UTF16_FROM_UCS4)
|
296 |
| { |
297 |
| |
298 |
| |
299 |
0
| c = 0;
|
300 |
0
| numChars = 0;
|
301 |
| } |
302 |
0
| else if (c >= EncodingUtils.UTF16_SURROGATES_BEGIN)
|
303 |
| { |
304 |
| |
305 |
| |
306 |
| |
307 |
0
| if (((c & 0x0000FFFE) == 0x0000FFFE) || ((c & 0x0000FFFF) == 0x0000FFFF))
|
308 |
| { |
309 |
| |
310 |
0
| c = 0;
|
311 |
0
| numChars = 0;
|
312 |
| } |
313 |
| else |
314 |
| { |
315 |
0
| theChars[0] = (c - EncodingUtils.UTF16_SURROGATES_BEGIN)
|
316 |
| / 0x400 |
317 |
| + EncodingUtils.UTF16_LOW_SURROGATE_BEGIN; |
318 |
0
| theChars[1] = (c - EncodingUtils.UTF16_SURROGATES_BEGIN)
|
319 |
| % 0x400 |
320 |
| + EncodingUtils.UTF16_HIGH_SURROGATE_BEGIN; |
321 |
| |
322 |
| |
323 |
0
| numChars = 2;
|
324 |
| } |
325 |
| } |
326 |
| else |
327 |
| { |
328 |
| |
329 |
0
| theChars[0] = c;
|
330 |
| } |
331 |
| |
332 |
0
| for (i = 0; i < numChars; i++)
|
333 |
| { |
334 |
0
| c = theChars[i];
|
335 |
| |
336 |
0
| if (this.encoding == Configuration.UTF16LE)
|
337 |
| { |
338 |
0
| ch = c & 0xFF;
|
339 |
0
| out.write(ch);
|
340 |
0
| ch = (c >> 8) & 0xFF;
|
341 |
0
| out.write(ch);
|
342 |
| } |
343 |
| |
344 |
0
| else if (this.encoding == Configuration.UTF16BE || this.encoding == Configuration.UTF16)
|
345 |
| { |
346 |
0
| ch = (c >> 8) & 0xFF;
|
347 |
0
| out.write(ch);
|
348 |
0
| ch = c & 0xFF;
|
349 |
0
| out.write(ch);
|
350 |
| } |
351 |
| } |
352 |
| } |
353 |
| |
354 |
0
| else if (this.encoding == Configuration.BIG5 || this.encoding == Configuration.SHIFTJIS)
|
355 |
| { |
356 |
0
| if (c < 128)
|
357 |
| { |
358 |
0
| this.out.write(c);
|
359 |
| } |
360 |
| else |
361 |
| { |
362 |
0
| ch = (c >> 8) & 0xFF;
|
363 |
0
| this.out.write(ch);
|
364 |
0
| ch = c & 0xFF;
|
365 |
0
| this.out.write(ch);
|
366 |
| } |
367 |
| } |
368 |
| |
369 |
| else |
370 |
| { |
371 |
0
| this.out.write(c);
|
372 |
| } |
373 |
| } |
374 |
| catch (IOException e) |
375 |
| { |
376 |
0
| System.err.println("OutImpl.outc: " + e.toString());
|
377 |
| } |
378 |
| } |
379 |
| |
380 |
| |
381 |
| |
382 |
| |
383 |
0
| public void newline()
|
384 |
| { |
385 |
0
| try
|
386 |
| { |
387 |
0
| this.out.write(this.newline);
|
388 |
0
| this.out.flush();
|
389 |
| } |
390 |
| catch (IOException e) |
391 |
| { |
392 |
0
| System.err.println("OutImpl.newline: " + e.toString());
|
393 |
| } |
394 |
| } |
395 |
| |
396 |
| |
397 |
| |
398 |
| |
399 |
| |
400 |
0
| public void setOut(OutputStream out)
|
401 |
| { |
402 |
0
| this.out = out;
|
403 |
| } |
404 |
| |
405 |
| |
406 |
| |
407 |
| |
408 |
0
| public void outBOM()
|
409 |
| { |
410 |
0
| if (this.encoding == Configuration.UTF8
|
411 |
| || this.encoding == Configuration.UTF16LE |
412 |
| || this.encoding == Configuration.UTF16BE |
413 |
| || this.encoding == Configuration.UTF16) |
414 |
| { |
415 |
0
| outc(EncodingUtils.UNICODE_BOM);
|
416 |
| } |
417 |
| } |
418 |
| |
419 |
| |
420 |
| |
421 |
| |
422 |
0
| public void close()
|
423 |
| { |
424 |
0
| try
|
425 |
| { |
426 |
0
| this.out.flush();
|
427 |
0
| this.out.close();
|
428 |
| } |
429 |
| catch (IOException e) |
430 |
| { |
431 |
0
| System.err.println("OutImpl.close: " + e.toString());
|
432 |
| } |
433 |
| } |
434 |
| } |